From 59743fc48f760d791df0141e9c40714cb0987eb2 Mon Sep 17 00:00:00 2001 From: Scott Christley Date: Sat, 15 Apr 2023 14:05:19 -0500 Subject: [PATCH 01/59] initial airr-js package --- docker/Dockerfile | 14 + lang/js/.eslintrc.json | 16 + lang/js/NEWS.rst | 4 + lang/js/README.rst | 176 + lang/js/airr-schema-openapi3.yaml | 4660 ++++++++++++++++++++ lang/js/airr.js | 521 +++ lang/js/jest.config.js | 195 + lang/js/package.json | 46 + lang/js/tests/airr.test.js | 46 + lang/js/tests/data/bad_genotype_set.json | 44 + lang/js/tests/data/bad_germline_set.json | 351 ++ lang/js/tests/data/bad_rearrangement.tsv | 10 + lang/js/tests/data/bad_repertoire.yaml | 148 + lang/js/tests/data/extra_rearrangement.tsv | 2 + lang/js/tests/data/good_combined_airr.json | 838 ++++ lang/js/tests/data/good_combined_airr.yaml | 771 ++++ lang/js/tests/data/good_genotype_set.json | 38 + lang/js/tests/data/good_germline_set.json | 354 ++ lang/js/tests/data/good_rearrangement.tsv | 10 + lang/js/tests/data/good_repertoire.yaml | 379 ++ 20 files changed, 8623 insertions(+) create mode 100644 lang/js/.eslintrc.json create mode 100644 lang/js/NEWS.rst create mode 100644 lang/js/README.rst create mode 100644 lang/js/airr-schema-openapi3.yaml create mode 100644 lang/js/airr.js create mode 100644 lang/js/jest.config.js create mode 100644 lang/js/package.json create mode 100644 lang/js/tests/airr.test.js create mode 100644 lang/js/tests/data/bad_genotype_set.json create mode 100644 lang/js/tests/data/bad_germline_set.json create mode 100644 lang/js/tests/data/bad_rearrangement.tsv create mode 100644 lang/js/tests/data/bad_repertoire.yaml create mode 100644 lang/js/tests/data/extra_rearrangement.tsv create mode 100644 lang/js/tests/data/good_combined_airr.json create mode 100644 lang/js/tests/data/good_combined_airr.yaml create mode 100644 lang/js/tests/data/good_genotype_set.json create mode 100644 lang/js/tests/data/good_germline_set.json create mode 100644 lang/js/tests/data/good_rearrangement.tsv create mode 100644 lang/js/tests/data/good_repertoire.yaml diff --git a/docker/Dockerfile b/docker/Dockerfile index 5f436aee8..a4ff2043a 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -33,6 +33,15 @@ RUN pip3 install \ # Install R devtools RUN R -e 'install.packages(c("devtools","knitr","rmarkdown","testthat","readr"),dependencies=T)' +# node +ENV NODE_VER v14.21.3 +RUN wget https://nodejs.org/dist/$NODE_VER/node-$NODE_VER-linux-x64.tar.xz +RUN tar xf node-$NODE_VER-linux-x64.tar.xz +RUN cp -rf /node-$NODE_VER-linux-x64/bin/* /usr/bin +RUN cp -rf /node-$NODE_VER-linux-x64/lib/* /usr/lib +RUN cp -rf /node-$NODE_VER-linux-x64/include/* /usr/include +RUN cp -rf /node-$NODE_VER-linux-x64/share/* /usr/share + # Copy source RUN mkdir /airr-standards COPY . /airr-standards @@ -40,5 +49,10 @@ COPY . /airr-standards # Install python package RUN cd /airr-standards/lang/python && python3 setup.py install +# nodejs package +RUN cd /airr-standards/lang/js && npm install +RUN cd /airr-standards/lang/js && npm run eslint +RUN cd /airr-standards/lang/js && npm run test + # Generate the documentation RUN cd /airr-standards && sphinx-build -a -E -b html docs docs/_build/html diff --git a/lang/js/.eslintrc.json b/lang/js/.eslintrc.json new file mode 100644 index 000000000..df7a8b369 --- /dev/null +++ b/lang/js/.eslintrc.json @@ -0,0 +1,16 @@ +{ + "env": { + "es2021": true, + "node": true + }, + "extends": "eslint:recommended", + "parserOptions": { + "ecmaVersion": 12, + "sourceType": "module" + }, + "rules": { + "no-unused-vars": 0, + "no-redeclare": 0, + "no-prototype-builtins": 1 + } +} diff --git a/lang/js/NEWS.rst b/lang/js/NEWS.rst new file mode 100644 index 000000000..9a2328558 --- /dev/null +++ b/lang/js/NEWS.rst @@ -0,0 +1,4 @@ +Version 1.4.2: DATE +-------------------------------------------------------------------------------- + +Initial release. diff --git a/lang/js/README.rst b/lang/js/README.rst new file mode 100644 index 000000000..8c73d9bdd --- /dev/null +++ b/lang/js/README.rst @@ -0,0 +1,176 @@ +Installation +------------------------------------------------------------------------------ + +Install in the usual manner from npm:: + + > npm install airr-js + +Or from the `downloaded `__ +source code directory:: + + > npm install file:lang/js + + +Quick Start +------------------------------------------------------------------------------ + +Reading AIRR Data Files +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``airr`` package contains functions to read and write AIRR Data +Model files. The file format is either YAML or JSON, and the package provides a +light wrapper over the standard parsers. The file needs a ``json``, ``yaml``, or ``yml`` +file extension so that the proper parser is utilized. All of the AIRR objects +are loaded into memory at once and no streaming interface is provided:: + + import airr + + # Load the AIRR data + data = airr.read_airr('input.airr.json') + # loop through the repertoires + for rep in data['Repertoire']: + print(rep) + +Why are the AIRR objects, such as Repertoire, GermlineSet, and etc., in a list versus in a +dictionary keyed by their identifier (e.g., ``repertoire_id``)? There are two primary reasons for +this. First, the identifier might not have been assigned yet. Some systems might allow MiAIRR +metadata to be entered but the identifier is assigned to that data later by another process. Without +the identifier, the data could not be stored in a dictionary. Secondly, the list allows the data to +have a default ordering. If you know that the data has a unique identifier then you can quickly +create a dictionary object using a comprehension. For example, with repertoires:: + + rep_dict = { obj['repertoire_id'] : obj for obj in data['Repertoire'] } + +another example with germline sets:: + + germline_dict = { obj['germline_set_id'] : obj for obj in data['GermlineSet'] } + +Writing AIRR Data Files +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Writing an AIRR Data File is also a light wrapper over standard YAML or JSON +parsers. Multiple AIRR objects, such as Repertoire, GermlineSet, and etc., can be +written together into the same file. In this example, we use the ``airr`` library ``template`` +method to create some blank Repertoire objects, and write them to a file. +As with the read function, the complete list of repertoires are written at once, +there is no streaming interface:: + + import airr + + # Create some blank repertoire objects in a list + data = { 'Repertoire': [] } + for i in range(5): + data['Repertoire'].append(airr.schema.RepertoireSchema.template()) + + # Write the AIRR Data + airr.write_airr('output.airr.json', data) + +Reading AIRR Rearrangement TSV files +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``airr`` package contains functions to read and write AIRR Rearrangement +TSV files as either iterables or pandas data frames. The usage is straightforward, +as the file format is a typical tab delimited file, but the package +performs some additional validation and type conversion beyond using a +standard CSV reader:: + + import airr + + # Create an iteratable that returns a dictionary for each row + reader = airr.read_rearrangement('input.tsv') + for row in reader: print(row) + + # Load the entire file into a pandas data frame + df = airr.load_rearrangement('input.tsv') + +Writing AIRR Rearrangement TSV files +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Similar to the read operations, write functions are provided for either creating +a writer class to perform row-wise output or writing the entire contents of +a pandas data frame to a file. Again, usage is straightforward with the ``airr`` +output functions simply performing some type conversion and field ordering +operations:: + + import airr + + # Create a writer class for iterative row output + writer = airr.create_rearrangement('output.tsv') + for row in reader: writer.write(row) + + # Write an entire pandas data frame to a file + airr.dump_rearrangement(df, 'file.tsv') + +By default, ``create_rearrangement`` will only write the ``required`` fields +in the output file. Additional fields can be included in the output file by +providing the ``fields`` parameter with an array of additional field names:: + + # Specify additional fields in the output + fields = ['new_calc', 'another_field'] + writer = airr.create_rearrangement('output.tsv', fields=fields) + +A common operation is to read an AIRR rearrangement file, and then +write an AIRR rearrangement file with additional fields in it while +keeping all of the existing fields from the original file. The +``derive_rearrangement`` function provides this capability:: + + import airr + + # Read rearrangement data and write new file with additional fields + reader = airr.read_rearrangement('input.tsv') + fields = ['new_calc'] + writer = airr.derive_rearrangement('output.tsv', 'input.tsv', fields=fields) + for row in reader: + row['new_calc'] = 'a value' + writer.write(row) + + +Validating AIRR data files +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``airr`` package can validate AIRR Data Model JSON/YAML files and Rearrangement +TSV files to ensure that they contain all required fields and that the fields types +match the AIRR Schema. This can be done using the ``airr-tools`` command +line program or the validate functions in the library can be called:: + + # Validate a rearrangement TSV file + airr-tools validate rearrangement -a input.tsv + + # Validate an AIRR DataFile + airr-tools validate airr -a input.airr.json + +Combining Repertoire metadata and Rearrangement files +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``airr`` package does not currently keep track of which AIRR Data Model files +are associated with which Rearrangement TSV files, though there is ongoing work to define +a standardized manifest, so users will need to handle those +associations themselves. However, in the data, AIRR identifier fields, such as ``repertoire_id``, +form the link between objects in the AIRR Data Model. +The typical usage is that a program is going to perform some +computation on the Rearrangements, and it needs access to the Repertoire metadata +as part of the computation logic. This example code shows the basic framework +for doing that, in this case doing gender specific computation:: + + import airr + + # Load AIRR data containing repertoires + data = airr.read_airr('input.airr.json') + + # Put repertoires in dictionary keyed by repertoire_id + rep_dict = { obj['repertoire_id'] : obj for obj in data['Repertoire'] } + + # Create an iteratable for rearrangement data + reader = airr.read_rearrangement('input.tsv') + for row in reader: + # get repertoire metadata with this rearrangement + rep = rep_dict[row['repertoire_id']] + + # check the gender + if rep['subject']['sex'] == 'male': + # do male specific computation + elif rep['subject']['sex'] == 'female': + # do female specific computation + else: + # do other specific computation + diff --git a/lang/js/airr-schema-openapi3.yaml b/lang/js/airr-schema-openapi3.yaml new file mode 100644 index 000000000..21772e41f --- /dev/null +++ b/lang/js/airr-schema-openapi3.yaml @@ -0,0 +1,4660 @@ +# +# Schema definitions for AIRR standards objects +# +Info: + title: AIRR Schema + description: Schema definitions for AIRR standards objects + version: "1.4" + contact: + name: AIRR Community + url: https://github.com/airr-community + license: + name: Creative Commons Attribution 4.0 International + url: https://creativecommons.org/licenses/by/4.0/ + + +# Properties that are based upon an ontology use this +# standard schema definition +Ontology: + type: object + properties: + id: + type: string + nullable: true + description: CURIE of the concept, encoding the ontology and the local ID + label: + type: string + nullable: true + description: Label of the concept in the respective ontology + +# Map to expand CURIE prefixes to full IRIs +CURIEMap: + ABREG: + type: identifier + default: + map: ABREG + map: + ABREG: + iri_prefix: "http://antibodyregistry.org/AB_" + CHEBI: + type: ontology + default: + map: OBO + provider: OLS + map: + OBO: + iri_prefix: "http://purl.obolibrary.org/obo/CHEBI_" + CL: + type: ontology + default: + map: OBO + provider: OLS + map: + OBO: + iri_prefix: "http://purl.obolibrary.org/obo/CL_" + DOI: + type: identifier + default: + map: DOI + map: + DOI: + iri_prefix: "https://doi.org/" + DOID: + type: ontology + default: + map: OBO + provider: OLS + map: + OBO: + iri_prefix: "http://purl.obolibrary.org/obo/DOID_" + ENA: + type: identifier + default: + map: ENA + map: + ENA: + iri_prefix: "https://www.ebi.ac.uk/ena/browser/view/" + ENSG: + type: identifier + default: + map: ENSG + map: + ENSG: + iri_prefix: "https://www.ensembl.org/Multi/Search/Results?q=" + IEDB_RECEPTOR: + type: identifier + default: + map: IEDB + provider: IEDB + map: + IEDB: + iri_prefix: "https://www.iedb.org/receptor/" + MRO: + type: ontology + default: + map: OBO + provider: OLS + map: + OBO: + iri_prefix: "http://purl.obolibrary.org/obo/MRO_" + NCBITAXON: + type: taxonomy + default: + map: OBO + provider: OLS + map: + OBO: + iri_prefix: "http://purl.obolibrary.org/obo/NCBITaxon_" + BioPortal: + iri_prefix: "http://purl.bioontology.org/ontology/NCBITAXON/" + NCIT: + type: ontology + default: + map: OBO + provider: OLS + map: + OBO: + iri_prefix: "http://purl.obolibrary.org/obo/NCIT_" + ORCID: + type: catalog + default: + map: ORCID + provider: ORCID + map: + ORCID: + iri_prefix: "https://orcid.org/" + ROR: + type: catalog + default: + map: ROR + provider: ROR + map: + ROR: + iri_prefix: "https://ror.org/" + SRA: + type: identifier + default: + map: SRA + map: + SRA: + iri_prefix: "https://trace.ncbi.nlm.nih.gov/Traces/sra/?run=" + UBERON: + type: ontology + default: + map: OBO + provider: OLS + map: + OBO: + iri_prefix: "http://purl.obolibrary.org/obo/UBERON_" + UNIPROT: + type: identifier + default: + map: UNIPROT + map: + UniProt: + iri_prefix: "http://purl.uniprot.org/uniprot/" + UO: + type: ontology + default: + map: OBO + provider: OLS + map: + OBO: + iri_prefix: "http://purl.obolibrary.org/obo/UO_" + +InformationProvider: + provider: + ENA: + request: + url: "{iri}" + response: text/html + IEDB: + request: + url: "https://query-api.iedb.org/tcr_search?receptor_group_id=eq.{local_id}" + response: application/json + OLS: + request: + url: "https://www.ebi.ac.uk/ols/api/ontologies/{ontology_id}/terms?iri={iri}" + response: application/json + Ontobee: + request: + url: "http://www.ontobee.org/ontology/rdf/{ontology_id}?iri={iri}" + response: application/rdf+xml + ORCID: + request: + url: "https://pub.orcid.org/v2.1/{local_id}" + header: + Accept: application/json + response: application/json + ROR: + request: + url: "https://api.ror.org/organizations/{iri}" + response: application/json + SRA: + request: + url: "{iri}" + response: text/html + parameter: + CHEBI: + Ontobee: + ontology_id: CHEBI + OLS: + ontology_id: chebi + CL: + Ontobee: + ontology_id: CL + OLS: + ontology_id: cl + DOID: + Ontobee: + ontology_id: DOID + OLS: + ontology_id: doid + MRO: + Ontobee: + ontology_id: MRO + OLS: + ontology_id: mro + NCBITAXON: + Ontobee: + ontology_id: NCBITaxon + OLS: + ontology_id: ncbitaxon + BioPortal: + ontology_id: NCBITAXON + NCIT: + Ontobee: + ontology_id: NCIT + OLS: + ontology_id: ncit + UBERON: + Ontobee: + ontology_id: UBERON + OLS: + ontology_id: uberon + UO: + Ontobee: + ontology_id: UO + OLS: + ontology_id: uo + +# AIRR specification extensions +# +# The schema definitions for AIRR standards objects is extended to +# provide a number of AIRR specific attributes. This schema definition +# specifies the structure, property names and data types. These +# attributes are attached to an AIRR field with the x-airr property. + +Attributes: + type: object + properties: + miairr: + type: string + description: MiAIRR requirement level. + enum: + - essential + - important + - defined + default: useful + identifier: + type: boolean + description: > + True if the field is an identifier required to link metadata and/or individual + sequence records across objects in the complete AIRR Data Model and ADC API. + default: false + adc-query-support: + type: boolean + description: > + True if an ADC API implementation must support queries on the field. + If false, query support for the field in ADC API implementations is optional. + default: false + deprecated: + type: boolean + description: True if the field has been deprecated from the schema. + default: false + deprecated-description: + type: string + description: Information regarding the deprecation of the field. + deprecated-replaced-by: + type: array + items: + type: string + description: The deprecated field is replaced by this list of fields. + set: + type: integer + description: MiAIRR set + subset: + type: string + description: MiAIRR subset + name: + type: string + description: MiAIRR name + format: + type: string + description: Field format. If null then assume the full range of the field data type + enum: + - ontology + - controlled vocabulary + - physical quantity + - CURIE + ontology: + type: object + description: Ontology definition for field + properties: + draft: + type: boolean + description: Indicates if ontology definition is a draft + top_node: + type: object + description: > + Concept to use as top node for ontology. Note that this must have the same CURIE namespace + as the actually annotated concept. + properties: + id: + type: string + description: CURIE for the top node term + label: + type: string + description: Ontology name for the top node term + +# AIRR Data File +# +# A JSON data file that holds Repertoire metadata, data processing +# analysis objects, or any object in the AIRR Data Model. +# +# It is presumed that the objects gathered together in an AIRR Data File are related +# or relevant to each other, e.g. part of the same study; thus, the ID fields can be +# internally resolved unless the ID contains an external PID. This implies that AIRR +# Data Files cannot be merged simply by concatenating arrays; any merge program +# would need to manage duplicate or conflicting ID values. +# +# While the properties in an AIRR Data File are not required, if one is provided then +# the value should not be null. + +DataFile: + type: object + properties: + Info: + nullable: false + $ref: '#/InfoObject' + Repertoire: + type: array + nullable: false + description: List of repertoires + items: + $ref: '#/Repertoire' + RepertoireGroup: + type: array + nullable: false + description: List of repertoire collections + items: + $ref: '#/RepertoireGroup' + Rearrangement: + type: array + nullable: false + description: List of rearrangement records + items: + $ref: '#/Rearrangement' + Cell: + type: array + nullable: false + description: List of cells + items: + $ref: '#/Cell' + Clone: + type: array + nullable: false + description: List of clones + items: + $ref: '#/Clone' + GermlineSet: + type: array + nullable: false + description: List of germline sets + items: + $ref: '#/GermlineSet' + GenotypeSet: + type: array + nullable: false + description: List of genotype sets + items: + $ref: '#/GenotypeSet' + +# AIRR Info object, should be similar to openapi +# should we point to an openapi schema? +InfoObject: + type: object + description: Provides information about data and API responses. + required: + - title + - version + properties: + title: + type: string + nullable: false + version: + type: string + nullable: false + description: + type: string + nullable: true + contact: + type: object + nullable: true + properties: + name: + type: string + nullable: true + url: + type: string + nullable: true + email: + type: string + nullable: true + license: + type: object + nullable: true + required: + - name + properties: + name: + type: string + nullable: false + url: + type: string + nullable: true + +# A time point +TimePoint: + description: Time point at which an observation or other action was performed. + type: object + properties: + label: + type: string + nullable: true + description: Informative label for the time point + example: Pre-operative sampling of cancer tissue + x-airr: + adc-query-support: true + value: + type: number + nullable: true + description: Value of the time point + example: -5.0 + x-airr: + adc-query-support: true + unit: + $ref: '#/Ontology' + nullable: true + description: Unit of the time point + title: Unit of immunization schedule + example: + id: UO:0000033 + label: day + x-airr: + adc-query-support: true + format: ontology + ontology: + draft: false + top_node: + id: UO:0000003 + label: time unit + +# +# General objects +# + +# An individual +Acknowledgement: + description: Individual whose contribution to this work should be acknowledged + type: object + required: + - acknowledgement_id + - name + - institution_name + properties: + acknowledgement_id: + type: string + nullable: false + description: unique identifier of this Acknowledgement within the file + name: + type: string + nullable: true + description: Full name of individual + institution_name: + type: string + nullable: true + description: Individual's department and institution name + orcid_id: + type: string + nullable: true + description: Individual's ORCID identifier + +# +# Germline gene schema +# + +# Rearranged and genomic germline sequences +RearrangedSequence: + description: Details of a directly observed rearranged sequence or an inference from rearranged sequences contributing support for a gene or allele + type: object + required: + - sequence_id + - sequence + - derivation + - observation_type + - repository_name + - repository_id + - deposited_version + - seq_start + - seq_end + properties: + sequence_id: + type: string + nullable: false + description: Unique identifier of this RearrangedSequence within the file, typically generated by the repository hosting the schema, for example from the underlying ID of the database record + sequence: + type: string + nullable: false + description: nucleotide sequence + derivation: + type: string + nullable: false + enum: + - DNA + - RNA + description: The class of nucleic acid that was used as primary starting material + observation_type: + type: string + nullable: false + description: The type of observation from which this sequence was drawn, e.g. direct sequencing, inference from repertoire + enum: + - direct sequencing + - inference from repertoire + curation: + type: string + nullable: true + description: Curational notes on the sequence + repository_name: + type: string + nullable: false + description: Name of the repository in which the sequence has been deposited + repository_ref: + type: string + nullable: false + description: Queryable id or accession number of the sequence published by the repository + deposited_version: + type: string + nullable: false + description: Version number of the sequence within the repository + sequence_start: + type: integer + nullable: false + description: Start co-ordinate of the sequence detailed in this record, within the sequence deposited + sequence_end: + type: integer + nullable: false + description: End co-ordinate of the sequence detailed in this record, within the sequence deposited + +UnrearrangedSequence: + description: Details of an unrearranged sequence contributing support for a gene or allele + type: object + required: + - sequence_id + - sequence + - repository_name + - assembly_id + - gff_seqid + - gff_start + - gff_end + - strand + properties: + sequence_id: + type: string + nullable: false + description: unique identifier of this UnrearrangedSequence within the file + sequence: + type: string + nullable: false + description: Sequence of interest described in this record (typically this will include gene and promoter region) + curation: + type: string + nullable: true + description: Curational notes on the sequence + repository_name: + type: string + nullable: false + description: Name of the repository in which the assembly or contig is deposited + repository_ref: + type: string + nullable: false + description: Queryable id or accession number of the sequence published by the repository + patch_no: + type: string + nullable: true + description: Genome assembly patch number in which this gene was determined + gff_seqid: + type: string + nullable: true + description: Sequence (from the assembly) of a window including the gene and preferably also the promoter region + gff_start: + type: integer + nullable: true + description: Genomic co-ordinates of the start of the sequence of interest described in this record, in Ensemble GFF version 3 + gff_end: + type: integer + nullable: true + description: Genomic co-ordinates of the end of the sequence of interest described in this record, in Ensemble GFF version 3 + strand: + type: string + nullable: true + enum: + - + + - "-" + description: sense (+ or -) + +# V gene delineation +SequenceDelineationV: + description: Delineation of a V-gene in a particular system + type: object + required: + - sequence_delineation_id + - delineation_scheme + - fwr1_start + - fwr1_end + - cdr1_start + - cdr1_end + - fwr2_start + - fwr2_end + - cdr2_start + - cdr2_end + - fwr3_start + - fwr3_end + - cdr3_start + properties: + sequence_delineation_id: + type: string + nullable: false + description: Unique identifier of this SequenceDelineationV within the file, typically generated by the repository hosting the schema, for example from the underlying ID of the database record + delineation_scheme: + type: string + nullable: false + description: Name of the delineation scheme + example: Chothia + fwr1_start: + type: integer + nullable: false + description: FWR1 start co-ordinate in Gene Description 'alignment' field + fwr1_end: + type: integer + nullable: false + description: FWR1 end co-ordinate in Gene Description 'alignment' field + cdr1_start: + type: integer + nullable: false + description: CDR1 start co-ordinate in Gene Description 'alignment' field + cdr1_end: + type: integer + nullable: false + description: CDR1 end co-ordinate in Gene Description 'alignment' field + fwr2_start: + type: integer + nullable: false + description: FWR2 start co-ordinate in Gene Description 'alignment' field + fwr2_end: + type: integer + nullable: false + description: FWR2 end co-ordinate in Gene Description 'alignment' field + cdr2_start: + type: integer + nullable: false + description: CDR2 start co-ordinate in Gene Description 'alignment' field + cdr2_end: + type: integer + nullable: false + description: CDR2 end co-ordinate in Gene Description 'alignment' field + fwr3_start: + type: integer + nullable: false + description: FWR3 start co-ordinate in Gene Description 'alignment' field + fwr3_end: + type: integer + nullable: false + description: FWR3 end co-ordinate in Gene Description 'alignment' field + cdr3_start: + type: integer + nullable: false + description: CDR3 start co-ordinate in Gene Description 'alignment' field + alignment: + type: array + nullable: true + items: + type: string + description: one string for each codon in the fields v_start to cdr3_start indicating the label of that codon according to the numbering of the delineation scheme + +# Description of a putative or confirmed Ig receptor gene/allele +AlleleDescription: + description: Details of a putative or confirmed Ig receptor gene/allele inferred from one or more observations + type: object + required: + - allele_description_id + - maintainer + - lab_address + - release_version + - release_date + - release_description + - sequence + - coding_sequence + - locus + - sequence_type + - functional + - inference_type + - species + properties: + allele_description_id: + type: string + nullable: false + description: Unique identifier of this AlleleDescription within the file, typically generated by the repository hosting the schema, for example from the underlying ID of the database record + allele_description_ref: + type: string + nullable: false + description: Unique reference to the allele description, in standardized form (Repo:Label:Version) + example: OGRDB:Human_IGH:IGHV1-69*01.001 + maintainer: + type: string + nullable: false + description: Maintainer of this sequence record + acknowledgements: + type: array + nullable: true + description: List of individuals whose contribution to the gene description should be acknowledged + items: + $ref: '#/Acknowledgement' + lab_address: + type: string + nullable: false + description: Institution and full address of corresponding author + release_version: + type: integer + nullable: false + description: Version number of this record, updated whenever a revised version is published or released + release_date: + type: string + format: date-time + nullable: false + description: Date of this release + title: Release Date + example: "2021-02-02" + release_description: + type: string + nullable: false + description: Brief descriptive notes of the reason for this release and the changes embodied + label: + type: string + nullable: true + description: The accepted name for this gene or allele + example: IGHV1-69*01 + sequence: + type: string + nullable: false + description: nt sequence of the gene. This should cover the full length that is available, including where possible RSS, and 5' UTR and lead-in for V-gene sequences + coding_sequence: + type: string + nullable: false + description: nucleotide sequence of the core region of the gene (V-, D-, J- or C-REGION), aligned, in the case of the V-REGION, with the IMGT numbering scheme + aliases: + type: array + nullable: true + items: + type: string + description: Alternative names for this sequence + locus: + type: string + nullable: false + enum: + - IGH + - IGK + - IGL + - TRA + - TRB + - TRG + - TRD + description: Gene locus + chromosome: + type: integer + nullable: true + description: chromosome on which the gene is located + sequence_type: + type: string + nullable: false + enum: + - V + - D + - J + - C + description: Sequence type (V, D, J, C) + functional: + type: boolean + nullable: false + description: True if the gene is functional, false if it is a pseudogene + inference_type: + type: string + nullable: false + enum: + - Genomic and rearranged + - Genomic only + - Rearranged only + description: Type of inference(s) from which this gene sequence was inferred + species: + $ref: '#/Ontology' + nullable: false + description: Binomial designation of subject's species + title: Organism + example: + id: NCBITAXON:9606 + label: Homo sapiens + species_subgroup: + type: string + nullable: true + description: Race, strain or other species subgroup to which this subject belongs + example: BALB/c + species_subgroup_type: + type: string + nullable: true + enum: + - breed + - strain + - inbred + - outbred + - locational + status: + type: string + nullable: true + enum: + - active + - draft + - retired + - withdrawn + description: Status of record, assumed active if the field is not present + subgroup_designation: + type: string + nullable: true + description: Identifier of the gene subgroup or clade, as (and if) defined + gene_designation: + type: string + nullable: true + description: Gene number or other identifier, as (and if) defined + allele_designation: + type: string + nullable: true + description: Allele number or other identifier, as (and if) defined + j_codon_frame: + type: integer + nullable: true + enum: + - 1 + - 2 + - 3 + description: Codon position of the first nucleotide in the 'coding_sequence' field. Mandatory for J genes. Not used for V or D genes. ('1' means the sequence is in-frame, '2' means that the first bp is missing from the first codon, '3' means that the first 2 bp are missing) + gene_start: + type: integer + nullable: true + description: Co-ordinate (in the sequence field) of the first nucleotide in the coding_sequence field + gene_end: + type: integer + nullable: true + description: Co-ordinate (in the sequence field) of the last gene-coding nucleotide in the coding_sequence field + utr_5_prime_start: + type: integer + nullable: true + description: Start co-ordinate (in the sequence field) of 5 prime UTR (V-genes only) + utr_5_prime_end: + type: integer + nullable: true + description: End co-ordinate (in the sequence field) of 5 prime UTR (V-genes only) + leader_1_start: + type: integer + nullable: true + description: Start co-ordinate (in the sequence field) of L-PART1 (V-genes only) + leader_1_end: + type: integer + nullable: true + description: End co-ordinate (in the sequence field) of L-PART1 (V-genes only) + leader_2_start: + type: integer + nullable: true + description: Start co-ordinate (in the sequence field) of L-PART2 (V-genes only) + leader_2_end: + type: integer + nullable: true + description: End co-ordinate (in the sequence field) of L-PART2 (V-genes only) + v_rs_start: + type: integer + nullable: true + description: Start co-ordinate (in the sequence field) of V recombination site (V-genes only) + v_rs_end: + type: integer + nullable: true + description: End co-ordinate (in the sequence field) of V recombination site (V-genes only) + d_rs_3_prime_start: + type: integer + nullable: true + description: Start co-ordinate (in the sequence field) of 3 prime D recombination site (D-genes only) + d_rs_3_prime_end: + type: integer + nullable: true + description: End co-ordinate (in the sequence field) of 3 prime D recombination site (D-genes only) + d_rs_5_prime_start: + type: integer + nullable: true + description: Start co-ordinate (in the sequence field) of 5 prime D recombination site (D-genes only) + d_rs_5_prime_end: + type: integer + nullable: true + description: End co-ordinate (in the sequence field) of 5 prime D recombination site (D-genes only) + j_cdr3_end: + type: integer + nullable: true + description: In the case of a J-gene, the co-ordinate (in the sequence field) of the first nucelotide of the conserved PHE or TRP (IMGT codon position 118) + j_rs_start: + type: integer + nullable: true + description: Start co-ordinate (in the sequence field) of J recombination site (J-genes only) + j_rs_end: + type: integer + nullable: true + description: End co-ordinate (in the sequence field) of J recombination site (J-genes only) + j_donor_splice: + type: integer + nullable: true + description: Co-ordinate (in the sequence field) of the final 3' nucleotide of the J-REGION (J-genes only) + v_gene_delineations: + type: array + nullable: true + items: + $ref: '#/SequenceDelineationV' + unrearranged_support: + type: array + nullable: true + items: + $ref: '#/UnrearrangedSequence' + rearranged_support: + type: array + nullable: true + items: + $ref: '#/RearrangedSequence' + paralogs: + type: array + nullable: true + items: + type: string + description: Gene symbols of any paralogs + curation: + type: string + nullable: true + description: Curational notes on the AlleleDescription. This can be used to give more extensive notes on the decisions taken than are provided in the release_description. + curational_tags: + type: array + nullable: true + items: + type: string + enum: + - likely_truncated + - likely_full_length + description: Controlled-vocabulary tags applied to this description + +# Collection of gene descriptions into a germline set +GermlineSet: + description: Details of a 'germline set' bringing together multiple AlleleDescriptions from the same strain or species. All genes in a GermlineSet should be from a single locus. + type: object + required: + - germline_set_id + - author + - lab_name + - lab_address + - release_version + - release_description + - release_date + - germline_set_name + - germline_set_ref + - species + - locus + - allele_descriptions + properties: + germline_set_id: + type: string + nullable: false + description: Unique identifier of the GermlineSet within this file, typically generated by the repository hosting the schema, for example from the underlying ID of the database record + author: + type: string + nullable: false + description: Corresponding author + lab_name: + type: string + nullable: false + description: Department of corresponding author + lab_address: + type: string + nullable: false + description: Institutional address of corresponding author + acknowledgements: + type: array + nullable: true + description: List of individuals whose contribution to the germline set should be acknowledged + items: + $ref: '#/Acknowledgement' + release_version: + type: number + nullable: false + description: Version number of this record, allocated automatically + release_description: + type: string + nullable: false + description: Brief descriptive notes of the reason for this release and the changes embodied + release_date: + type: string + format: date-time + nullable: false + description: Date of this release + title: Release Date + example: "2021-02-02" + germline_set_name: + type: string + nullable: false + description: descriptive name of this germline set + germline_set_ref: + type: string + nullable: false + description: Unique identifier of the germline set and version, in standardized form (Repo:Label:Version) + example: OGRDB:Human_IGH:2021.11 + pub_ids: + type: string + nullable: true + description: Publications describing the germline set + example: "PMID:85642,PMID:12345" + species: + $ref: '#/Ontology' + nullable: false + description: Binomial designation of subject's species + title: Organism + example: + id: NCBITAXON:9606 + label: Homo sapiens + species_subgroup: + type: string + description: Race, strain or other species subgroup to which this subject belongs + example: BALB/c + nullable: true + species_subgroup_type: + type: string + nullable: true + enum: + - breed + - strain + - inbred + - outbred + - locational + locus: + type: string + nullable: false + enum: + - IGH + - IGK + - IGL + - TRA + - TRB + - TRG + - TRD + description: Gene locus + allele_descriptions: + type: array + nullable: false + items: + $ref: '#/AlleleDescription' + description: list of allele_descriptions in the germline set + curation: + type: string + nullable: true + description: Curational notes on the GermlineSet. This can be used to give more extensive notes on the decisions taken than are provided in the release_description. + +# +# Genotype schema +# + +# GenotypeSet lists the Genotypes (describing different loci) inferred for this subject + +GenotypeSet: + type: object + required: + - receptor_genotype_set_id + properties: + receptor_genotype_set_id: + type: string + nullable: false + description: A unique identifier for this Receptor Genotype Set, typically generated by the repository hosting the schema, for example from the underlying ID of the database record + genotype_class_list: + description: List of Genotypes included in this Receptor Genotype Set. + type: array + nullable: true + items: + $ref: '#/Genotype' + + +# Genotype of adaptive immune receptors +# This enumerates the alleles and gene deletions inferred in a single subject. +# Included alleles may either be listed by reference to a GermlineSet, or +# listed as 'undocumented', in which case the inferred sequence is provided + +Genotype: + type: object + required: + - receptor_genotype_id + - locus + properties: + receptor_genotype_id: + type: string + nullable: false + description: A unique identifier within the file for this Receptor Genotype, typically generated by the repository hosting the schema, for example from the underlying ID of the database record + locus: + type: string + nullable: false + enum: + - IGH + - IGK + - IGL + - TRA + - TRB + - TRD + - TRG + example: IGH + x-airr: + adc-query-support: true + format: controlled vocabulary + documented_alleles: + type: array + nullable: true + description: Array of alleles inferred to be present which are documented in GermlineSets + items: + type: object + properties: + label: + type: string + nullable: false + description: The accepted name for this allele, taken from the GermlineSet + germline_set_ref: + type: string + nullable: false + description: GermlineSet from which it was taken, referenced in standardized form (Repo:Label:Version) + example: OGRDB:Human_IGH:2021.11 + phasing: + type: integer + nullable: true + description: Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the same chromosome + x-airr: + adc-query-support: true + undocumented_alleles: + type: array + nullable: true + description: Array of alleles inferred to be present and not documented in an identified GermlineSet + items: + type: object + properties: + allele_name: + type: string + nullable: false + description: Allele name as allocated by the inference pipeline + sequence: + type: string + nullable: false + description: nt sequence of the allele, as provided by the inference pipeline + phasing: + type: integer + nullable: true + description: Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the same chromosome + x-airr: + adc-query-support: true + deleted_genes: + type: array + nullable: true + description: Array of genes identified as being deleted in this genotype + items: + type: object + properties: + label: + type: string + nullable: false + description: The accepted name for this gene, taken from the GermlineSet + germline_set_ref: + type: string + nullable: false + description: GermlineSet from which it was taken (issuer/name/version) + phasing: + type: integer + nullable: true + description: Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the same chromosome + x-airr: + adc-query-support: true + inference_process: + type: string + nullable: true + enum: + - genomic_sequencing + - repertoire_sequencing + description: Information on how the genotype was acquired. Controlled vocabulary. + title: Genotype acquisition process + example: repertoire_sequencing + x-airr: + adc-query-support: true + format: controlled vocabulary + + +# List of MHCGenotypes describing a subject's genotype +MHCGenotypeSet: + type: object + required: + - mhc_genotype_set_id + - mhc_genotype_list + properties: + mhc_genotype_set_id: + type: string + nullable: false + description: A unique identifier for this MHCGenotypeSet + mhc_genotype_list: + description: List of MHCGenotypes included in this set + type: array + nullable: false + items: + $ref: '#/MHCGenotype' + + +# Genotype of major histocompatibility complex (MHC) class I, class II and non-classical loci +MHCGenotype: + type: object + required: + - mhc_genotype_id + - mhc_class + - mhc_alleles + properties: + mhc_genotype_id: + type: string + nullable: false + description: A unique identifier for this MHCGenotype, assumed to be unique in the context of the study + mhc_class: + type: string + nullable: false + description: Class of MHC alleles described by the MHCGenotype + enum: + - MHC-I + - MHC-II + - MHC-nonclassical + example: MHC-I + x-airr: + adc-query-support: true + format: controlled vocabulary + mhc_alleles: + type: array + nullable: false + description: List of MHC alleles of the indicated mhc_class identified in an individual + items: + type: object + properties: + allele_designation: + type: string + nullable: false + description: > + The accepted designation of an allele, usually its gene symbol plus allele/sub-allele/etc + identifiers, if provided by the mhc_typing method + gene: + $ref: '#/Ontology' + nullable: true + description: The MHC gene to which the described allele belongs + title: MHC gene + example: + id: MRO:0000046 + label: HLA-A + x-airr: + adc-query-support: false + format: ontology + ontology: + draft: true + top_node: + id: MRO:0000004 + label: MHC gene + reference_set_ref: + type: string + nullable: false + description: Repository and list from which it was taken (issuer/name/version) + x-airr: + adc-query-support: true + mhc_genotyping_method: + type: string + nullable: true + description: > + Information on how the genotype was determined. The content of this field should come from a list of + recommended terms provided in the AIRR Schema documentation. + title: MHC genotyping method + example: pcr_low_resolution + x-airr: + adc-query-support: true + +# +# Repertoire metadata schema +# + +# The overall study with a globally unique study_id +Study: + type: object + required: + - study_id + - study_title + - study_type + - inclusion_exclusion_criteria + - grants + - collected_by + - lab_name + - lab_address + - submitted_by + - pub_ids + - keywords_study + properties: + study_id: + type: string + nullable: true + description: > + Unique ID assigned by study registry such as one of the International Nucleotide Sequence Database + Collaboration (INSDC) repositories. + title: Study ID + example: PRJNA001 + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: study + name: Study ID + study_title: + type: string + nullable: true + description: Descriptive study title + title: Study title + example: Effects of sun light exposure of the Treg repertoire + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: study + name: Study title + study_type: + $ref: '#/Ontology' + nullable: true + description: Type of study design + title: Study type + example: + id: NCIT:C15197 + label: Case-Control Study + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: study + name: Study type + format: ontology + ontology: + draft: false + top_node: + id: NCIT:C63536 + label: Study + study_description: + type: string + nullable: true + description: Generic study description + title: Study description + example: Longer description + x-airr: + name: Study description + adc-query-support: true + inclusion_exclusion_criteria: + type: string + nullable: true + description: List of criteria for inclusion/exclusion for the study + title: Study inclusion/exclusion criteria + example: "Include: Clinical P. falciparum infection; Exclude: Seropositive for HIV" + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: study + name: Study inclusion/exclusion criteria + grants: + type: string + nullable: true + description: Funding agencies and grant numbers + title: Grant funding agency + example: NIH, award number R01GM987654 + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: study + name: Grant funding agency + study_contact: + type: string + nullable: true + description: > + Full contact information of the contact persons for this study This should include an e-mail address + and a persistent identifier such as an ORCID ID. + title: Contact information (study) + example: Dr. P. Stibbons, p.stibbons@unseenu.edu, https://orcid.org/0000-0002-1825-0097 + x-airr: + adc-query-support: true + name: Contact information (study) + collected_by: + type: string + nullable: true + description: > + Full contact information of the data collector, i.e. the person who is legally responsible for data + collection and release. This should include an e-mail address and a persistent identifier such as an + ORCID ID. + title: Contact information (data collection) + example: Dr. P. Stibbons, p.stibbons@unseenu.edu, https://orcid.org/0000-0002-1825-0097 + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: study + name: Contact information (data collection) + lab_name: + type: string + nullable: true + description: Department of data collector + title: Lab name + example: Department for Planar Immunology + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: study + name: Lab name + lab_address: + type: string + nullable: true + description: Institution and institutional address of data collector + title: Lab address + example: School of Medicine, Unseen University, Ankh-Morpork, Disk World + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: study + name: Lab address + submitted_by: + type: string + nullable: true + description: > + Full contact information of the data depositor, i.e., the person submitting the data to a repository. + This should include an e-mail address and a persistent identifier such as an ORCID ID. This is + supposed to be a short-lived and technical role until the submission is relased. + title: Contact information (data deposition) + example: Adrian Turnipseed, a.turnipseed@unseenu.edu, https://orcid.org/0000-0002-1825-0097 + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: study + name: Contact information (data deposition) + pub_ids: + type: string + nullable: true + description: > + Publications describing the rationale and/or outcome of the study. Where ever possible, a persistent + identifier should be used such as a DOI or a Pubmed ID + title: Relevant publications + example: "PMID:85642" + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: study + name: Relevant publications + keywords_study: + type: array + items: + type: string + enum: + - contains_ig + - contains_tr + - contains_paired_chain + - contains_schema_rearrangement + - contains_schema_clone + - contains_schema_cell + - contains_schema_receptor + nullable: true + description: Keywords describing properties of one or more data sets in a study + title: Keywords for study + example: + - contains_ig + - contains_schema_rearrangement + - contains_schema_clone + - contains_schema_cell + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: study + name: Keywords for study + format: controlled vocabulary + adc_publish_date: + type: string + format: date-time + nullable: true + description: > + Date the study was first published in the AIRR Data Commons. + title: ADC Publish Date + example: "2021-02-02" + x-airr: + adc-query-support: true + name: ADC Publish Date + adc_update_date: + type: string + format: date-time + nullable: true + description: > + Date the study data was updated in the AIRR Data Commons. + title: ADC Update Date + example: "2021-02-02" + x-airr: + adc-query-support: true + name: ADC Update Date + +# 1-to-n relationship between a study and its subjects +# subject_id is unique within a study +Subject: + type: object + required: + - subject_id + - synthetic + - species + - sex + - age_min + - age_max + - age_unit + - age_event + - ancestry_population + - ethnicity + - race + - strain_name + - linked_subjects + - link_type + properties: + subject_id: + type: string + nullable: true + description: > + Subject ID assigned by submitter, unique within study. If possible, a persistent subject ID linked to + an INSDC or similar repository study should be used. + title: Subject ID + example: SUB856413 + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: subject + name: Subject ID + synthetic: + type: boolean + nullable: false + description: TRUE for libraries in which the diversity has been synthetically generated (e.g. phage display) + title: Synthetic library + x-airr: + miairr: essential + adc-query-support: true + set: 1 + subset: subject + name: Synthetic library + species: + $ref: '#/Ontology' + nullable: false + description: Binomial designation of subject's species + title: Organism + example: + id: NCBITAXON:9606 + label: Homo sapiens + x-airr: + miairr: essential + adc-query-support: true + set: 1 + subset: subject + name: Organism + format: ontology + ontology: + draft: false + top_node: + id: NCBITAXON:7776 + label: Gnathostomata + organism: + $ref: '#/Ontology' + nullable: true + description: Binomial designation of subject's species + x-airr: + deprecated: true + deprecated-description: Field was renamed to species for clarity. + deprecated-replaced-by: + - species + sex: + type: string + enum: + - male + - female + - pooled + - hermaphrodite + - intersex + - "not collected" + - "not applicable" + nullable: true + description: Biological sex of subject + title: Sex + example: female + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: subject + name: Sex + format: controlled vocabulary + age_min: + type: number + nullable: true + description: Specific age or lower boundary of age range. + title: Age minimum + example: 60 + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: subject + name: Age minimum + age_max: + type: number + nullable: true + description: > + Upper boundary of age range or equal to age_min for specific age. + This field should only be null if age_min is null. + title: Age maximum + example: 80 + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: subject + name: Age maximum + age_unit: + $ref: '#/Ontology' + nullable: true + description: Unit of age range + title: Age unit + example: + id: UO:0000036 + label: year + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: subject + name: Age unit + format: ontology + ontology: + draft: false + top_node: + id: UO:0000003 + label: time unit + age_event: + type: string + nullable: true + description: > + Event in the study schedule to which `Age` refers. For NCBI BioSample this MUST be `sampling`. For other + implementations submitters need to be aware that there is currently no mechanism to encode to potential + delta between `Age event` and `Sample collection time`, hence the chosen events should be in temporal proximity. + title: Age event + example: enrollment + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: subject + name: Age event + age: + type: string + nullable: true + x-airr: + deprecated: true + deprecated-description: Split into two fields to specify as an age range. + deprecated-replaced-by: + - age_min + - age_max + - age_unit + ancestry_population: + type: string + nullable: true + description: Broad geographic origin of ancestry (continent) + title: Ancestry population + example: list of continents, mixed or unknown + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: subject + name: Ancestry population + ethnicity: + type: string + nullable: true + description: Ethnic group of subject (defined as cultural/language-based membership) + title: Ethnicity + example: English, Kurds, Manchu, Yakuts (and other fields from Wikipedia) + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: subject + name: Ethnicity + race: + type: string + nullable: true + description: Racial group of subject (as defined by NIH) + title: Race + example: White, American Indian or Alaska Native, Black, Asian, Native Hawaiian or Other Pacific Islander, Other + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: subject + name: Race + strain_name: + type: string + nullable: true + description: Non-human designation of the strain or breed of animal used + title: Strain name + example: C57BL/6J + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: subject + name: Strain name + linked_subjects: + type: string + nullable: true + description: Subject ID to which `Relation type` refers + title: Relation to other subjects + example: SUB1355648 + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: subject + name: Relation to other subjects + link_type: + type: string + nullable: true + description: Relation between subject and `linked_subjects`, can be genetic or environmental (e.g.exposure) + title: Relation type + example: father, daughter, household + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: subject + name: Relation type + diagnosis: + type: array + nullable: false + description: Diagnosis information for subject + items: + $ref: '#/Diagnosis' + x-airr: + adc-query-support: true + genotype: + type: object + nullable: true + description: Genotype for this subject, if known + properties: + receptor_genotype_set: + nullable: true + $ref: '#/GenotypeSet' + description: Immune receptor genotype set for this subject. + mhc_genotype_set: + nullable: true + $ref: '#/MHCGenotypeSet' + description: MHC genotype set for this subject. + +# 1-to-n relationship between a subject and its diagnoses +Diagnosis: + type: object + required: + - study_group_description + - disease_diagnosis + - disease_length + - disease_stage + - prior_therapies + - immunogen + - intervention + - medical_history + properties: + study_group_description: + type: string + nullable: true + description: Designation of study arm to which the subject is assigned to + title: Study group description + example: control + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: diagnosis and intervention + name: Study group description + disease_diagnosis: + $ref: '#/Ontology' + nullable: true + description: Diagnosis of subject + title: Diagnosis + example: + id: DOID:9538 + label: multiple myeloma + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: diagnosis and intervention + name: Diagnosis + format: ontology + ontology: + draft: false + top_node: + id: DOID:4 + label: disease + disease_length: + type: string + nullable: true + description: Time duration between initial diagnosis and current intervention + title: Length of disease + example: 23 months + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: diagnosis and intervention + name: Length of disease + format: physical quantity + disease_stage: + type: string + nullable: true + description: Stage of disease at current intervention + title: Disease stage + example: Stage II + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: diagnosis and intervention + name: Disease stage + prior_therapies: + type: string + nullable: true + description: List of all relevant previous therapies applied to subject for treatment of `Diagnosis` + title: Prior therapies for primary disease under study + example: melphalan/prednisone + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: diagnosis and intervention + name: Prior therapies for primary disease under study + immunogen: + type: string + nullable: true + description: Antigen, vaccine or drug applied to subject at this intervention + title: Immunogen/agent + example: bortezomib + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: diagnosis and intervention + name: Immunogen/agent + intervention: + type: string + nullable: true + description: Description of intervention + title: Intervention definition + example: systemic chemotherapy, 6 cycles, 1.25 mg/m2 + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: diagnosis and intervention + name: Intervention definition + medical_history: + type: string + nullable: true + description: Medical history of subject that is relevant to assess the course of disease and/or treatment + title: Other relevant medical history + example: MGUS, first diagnosed 5 years prior + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: diagnosis and intervention + name: Other relevant medical history + +# 1-to-n relationship between a subject and its samples +# sample_id is unique within a study +Sample: + type: object + required: + - sample_id + - sample_type + - tissue + - anatomic_site + - disease_state_sample + - collection_time_point_relative + - collection_time_point_relative_unit + - collection_time_point_reference + - biomaterial_provider + properties: + sample_id: + type: string + nullable: true + description: > + Sample ID assigned by submitter, unique within study. If possible, a persistent sample ID linked to + INSDC or similar repository study should be used. + title: Biological sample ID + example: SUP52415 + x-airr: + miairr: important + adc-query-support: true + set: 2 + subset: sample + name: Biological sample ID + sample_type: + type: string + nullable: true + description: The way the sample was obtained, e.g. fine-needle aspirate, organ harvest, peripheral venous puncture + title: Sample type + example: Biopsy + x-airr: + miairr: important + adc-query-support: true + set: 2 + subset: sample + name: Sample type + tissue: + $ref: '#/Ontology' + nullable: true + description: The actual tissue sampled, e.g. lymph node, liver, peripheral blood + title: Tissue + example: + id: UBERON:0002371 + label: bone marrow + x-airr: + miairr: important + adc-query-support: true + set: 2 + subset: sample + name: Tissue + format: ontology + ontology: + draft: false + top_node: + id: UBERON:0010000 + label: multicellular anatomical structure + anatomic_site: + type: string + nullable: true + description: The anatomic location of the tissue, e.g. Inguinal, femur + title: Anatomic site + example: Iliac crest + x-airr: + miairr: important + adc-query-support: true + set: 2 + subset: sample + name: Anatomic site + disease_state_sample: + type: string + nullable: true + description: Histopathologic evaluation of the sample + title: Disease state of sample + example: Tumor infiltration + x-airr: + miairr: important + adc-query-support: true + set: 2 + subset: sample + name: Disease state of sample + collection_time_point_relative: + type: number + nullable: true + description: Time point at which sample was taken, relative to `Collection time event` + title: Sample collection time + example: "14" + x-airr: + miairr: important + adc-query-support: true + set: 2 + subset: sample + name: Sample collection time + collection_time_point_relative_unit: + $ref: '#/Ontology' + nullable: true + description: Unit of Sample collection time + title: Sample collection time unit + example: + id: UO:0000033 + label: day + x-airr: + miairr: important + adc-query-support: true + set: 2 + subset: sample + name: Sample collection time unit + format: ontology + ontology: + draft: false + top_node: + id: UO:0000003 + label: time unit + collection_time_point_reference: + type: string + nullable: true + description: Event in the study schedule to which `Sample collection time` relates to + title: Collection time event + example: Primary vaccination + x-airr: + miairr: important + adc-query-support: true + set: 2 + subset: sample + name: Collection time event + biomaterial_provider: + type: string + nullable: true + description: Name and address of the entity providing the sample + title: Biomaterial provider + example: Tissues-R-Us, Tampa, FL, USA + x-airr: + miairr: important + adc-query-support: true + set: 2 + subset: sample + name: Biomaterial provider + +# 1-to-n relationship between a sample and processing of its cells +CellProcessing: + type: object + required: + - tissue_processing + - cell_subset + - cell_phenotype + - single_cell + - cell_number + - cells_per_reaction + - cell_storage + - cell_quality + - cell_isolation + - cell_processing_protocol + properties: + tissue_processing: + type: string + nullable: true + description: Enzymatic digestion and/or physical methods used to isolate cells from sample + title: Tissue processing + example: Collagenase A/Dnase I digested, followed by Percoll gradient + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (cell) + name: Tissue processing + cell_subset: + $ref: '#/Ontology' + nullable: true + description: Commonly-used designation of isolated cell population + title: Cell subset + example: + id: CL:0000972 + label: class switched memory B cell + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (cell) + name: Cell subset + format: ontology + ontology: + draft: false + top_node: + id: CL:0000542 + label: lymphocyte + cell_phenotype: + type: string + nullable: true + description: List of cellular markers and their expression levels used to isolate the cell population + title: Cell subset phenotype + example: CD19+ CD38+ CD27+ IgM- IgD- + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (cell) + name: Cell subset phenotype + cell_species: + $ref: '#/Ontology' + nullable: true + description: > + Binomial designation of the species from which the analyzed cells originate. Typically, this value + should be identical to `species`, in which case it SHOULD NOT be set explicitly. However, there are + valid experimental setups in which the two might differ, e.g., chimeric animal models. If set, this + key will overwrite the `species` information for all lower layers of the schema. + title: Cell species + example: + id: NCBITAXON:9606 + label: Homo sapiens + x-airr: + miairr: defined + adc-query-support: true + set: 3 + subset: process (cell) + name: Cell species + format: ontology + ontology: + draft: false + top_node: + id: NCBITAXON:7776 + label: Gnathostomata + single_cell: + type: boolean + nullable: true + description: TRUE if single cells were isolated into separate compartments + title: Single-cell sort + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (cell) + name: Single-cell sort + cell_number: + type: integer + nullable: true + description: Total number of cells that went into the experiment + title: Number of cells in experiment + example: 1000000 + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (cell) + name: Number of cells in experiment + cells_per_reaction: + type: integer + nullable: true + description: Number of cells for each biological replicate + title: Number of cells per sequencing reaction + example: 50000 + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (cell) + name: Number of cells per sequencing reaction + cell_storage: + type: boolean + nullable: true + description: TRUE if cells were cryo-preserved between isolation and further processing + title: Cell storage + example: TRUE + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (cell) + name: Cell storage + cell_quality: + type: string + nullable: true + description: Relative amount of viable cells after preparation and (if applicable) thawing + title: Cell quality + example: 90% viability as determined by 7-AAD + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (cell) + name: Cell quality + cell_isolation: + type: string + nullable: true + description: Description of the procedure used for marker-based isolation or enrich cells + title: Cell isolation / enrichment procedure + example: > + Cells were stained with fluorochrome labeled antibodies and then sorted on a FlowMerlin (CE) cytometer. + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (cell) + name: Cell isolation / enrichment procedure + cell_processing_protocol: + type: string + nullable: true + description: > + Description of the methods applied to the sample including cell preparation/ isolation/enrichment and + nucleic acid extraction. This should closely mirror the Materials and methods section in the manuscript. + title: Processing protocol + example: Stimulated wih anti-CD3/anti-CD28 + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (cell) + name: Processing protocol + +# object for PCR primer targets +PCRTarget: + type: object + required: + - pcr_target_locus + - forward_pcr_primer_target_location + - reverse_pcr_primer_target_location + properties: + pcr_target_locus: + type: string + enum: + - IGH + - IGI + - IGK + - IGL + - TRA + - TRB + - TRD + - TRG + nullable: true + description: > + Designation of the target locus. Note that this field uses a controlled vocubulary that is meant to + provide a generic classification of the locus, not necessarily the correct designation according to + a specific nomenclature. + title: Target locus for PCR + example: IGK + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (nucleic acid [pcr]) + name: Target locus for PCR + format: controlled vocabulary + forward_pcr_primer_target_location: + type: string + nullable: true + description: Position of the most distal nucleotide templated by the forward primer or primer mix + title: Forward PCR primer target location + example: IGHV, +23 + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (nucleic acid [pcr]) + name: Forward PCR primer target location + reverse_pcr_primer_target_location: + type: string + nullable: true + description: Position of the most proximal nucleotide templated by the reverse primer or primer mix + title: Reverse PCR primer target location + example: IGHG, +57 + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (nucleic acid [pcr]) + name: Reverse PCR primer target location + +# generally, a 1-to-1 relationship between a CellProcessing and processing of its nucleic acid +# but may be 1-to-n for technical replicates. +NucleicAcidProcessing: + type: object + required: + - template_class + - template_quality + - template_amount + - template_amount_unit + - library_generation_method + - library_generation_protocol + - library_generation_kit_version + - complete_sequences + - physical_linkage + properties: + template_class: + type: string + enum: + - DNA + - RNA + nullable: false + description: > + The class of nucleic acid that was used as primary starting material for the following procedures + title: Target substrate + example: RNA + x-airr: + miairr: essential + adc-query-support: true + set: 3 + subset: process (nucleic acid) + name: Target substrate + format: controlled vocabulary + template_quality: + type: string + nullable: true + description: Description and results of the quality control performed on the template material + title: Target substrate quality + example: RIN 9.2 + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (nucleic acid) + name: Target substrate quality + template_amount: + type: number + nullable: true + description: Amount of template that went into the process + title: Template amount + example: 1000 + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (nucleic acid) + name: Template amount + template_amount_unit: + $ref: '#/Ontology' + nullable: true + description: Unit of template amount + title: Template amount time unit + example: + id: UO:0000024 + label: nanogram + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (nucleic acid) + name: Template amount time unit + format: ontology + ontology: + draft: false + top_node: + id: UO:0000002 + label: physical quantity + library_generation_method: + type: string + enum: + - "PCR" + - "RT(RHP)+PCR" + - "RT(oligo-dT)+PCR" + - "RT(oligo-dT)+TS+PCR" + - "RT(oligo-dT)+TS(UMI)+PCR" + - "RT(specific)+PCR" + - "RT(specific)+TS+PCR" + - "RT(specific)+TS(UMI)+PCR" + - "RT(specific+UMI)+PCR" + - "RT(specific+UMI)+TS+PCR" + - "RT(specific)+TS" + - "other" + nullable: false + description: Generic type of library generation + title: Library generation method + example: RT(oligo-dT)+TS(UMI)+PCR + x-airr: + miairr: essential + adc-query-support: true + set: 3 + subset: process (nucleic acid) + name: Library generation method + format: controlled vocabulary + library_generation_protocol: + type: string + nullable: true + description: Description of processes applied to substrate to obtain a library that is ready for sequencing + title: Library generation protocol + example: cDNA was generated using + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (nucleic acid) + name: Library generation protocol + library_generation_kit_version: + type: string + nullable: true + description: When using a library generation protocol from a commercial provider, provide the protocol version number + title: Protocol IDs + example: v2.1 (2016-09-15) + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (nucleic acid) + name: Protocol IDs + pcr_target: + type: array + nullable: false + description: > + If a PCR step was performed that specifically targets the IG/TR loci, the target and primer locations + need to be provided here. This field holds an array of PCRTarget objects, so that multiplex PCR setups + amplifying multiple loci at the same time can be annotated using one record per locus. PCR setups not + targeting any specific locus must not annotate this field but select the appropriate + library_generation_method instead. + items: + $ref: '#/PCRTarget' + x-airr: + adc-query-support: true + complete_sequences: + type: string + enum: + - partial + - complete + - "complete+untemplated" + - mixed + nullable: false + description: > + To be considered `complete`, the procedure used for library construction MUST generate sequences that + 1) include the first V gene codon that encodes the mature polypeptide chain (i.e. after the + leader sequence) and 2) include the last complete codon of the J gene (i.e. 1 bp 5' of the J->C + splice site) and 3) provide sequence information for all positions between 1) and 2). To be considered + `complete & untemplated`, the sections of the sequences defined in points 1) to 3) of the previous + sentence MUST be untemplated, i.e. MUST NOT overlap with the primers used in library preparation. + `mixed` should only be used if the procedure used for library construction will likely produce multiple + categories of sequences in the given experiment. It SHOULD NOT be used as a replacement of a NULL value. + title: Complete sequences + example: partial + x-airr: + miairr: essential + adc-query-support: true + set: 3 + subset: process (nucleic acid) + name: Complete sequences + format: controlled vocabulary + physical_linkage: + type: string + enum: + - none + - "hetero_head-head" + - "hetero_tail-head" + - "hetero_prelinked" + nullable: false + description: > + In case an experimental setup is used that physically links nucleic acids derived from distinct + `Rearrangements` before library preparation, this field describes the mode of that linkage. All + `hetero_*` terms indicate that in case of paired-read sequencing, the two reads should be expected + to map to distinct IG/TR loci. `*_head-head` refers to techniques that link the 5' ends of transcripts + in a single-cell context. `*_tail-head` refers to techniques that link the 3' end of one transcript to + the 5' end of another one in a single-cell context. This term does not provide any information whether + a continuous reading-frame between the two is generated. `*_prelinked` refers to constructs in which + the linkage was already present on the DNA level (e.g. scFv). + title: Physical linkage of different rearrangements + example: hetero_head-head + x-airr: + miairr: essential + adc-query-support: true + set: 3 + subset: process (nucleic acid) + name: Physical linkage of different rearrangements + format: controlled vocabulary + +# 1-to-n relationship between a NucleicAcidProcessing and SequencingRun with resultant raw sequence file(s) +SequencingRun: + type: object + required: + - sequencing_run_id + - total_reads_passing_qc_filter + - sequencing_platform + - sequencing_facility + - sequencing_run_date + - sequencing_kit + properties: + sequencing_run_id: + type: string + nullable: true + description: ID of sequencing run assigned by the sequencing facility + title: Batch number + example: 160101_M01234 + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (sequencing) + name: Batch number + total_reads_passing_qc_filter: + type: integer + nullable: true + description: Number of usable reads for analysis + title: Total reads passing QC filter + example: 10365118 + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (sequencing) + name: Total reads passing QC filter + sequencing_platform: + type: string + nullable: true + description: Designation of sequencing instrument used + title: Sequencing platform + example: Alumina LoSeq 1000 + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (sequencing) + name: Sequencing platform + sequencing_facility: + type: string + nullable: true + description: Name and address of sequencing facility + title: Sequencing facility + example: Seqs-R-Us, Vancouver, BC, Canada + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (sequencing) + name: Sequencing facility + sequencing_run_date: + type: string + nullable: true + description: Date of sequencing run + title: Date of sequencing run + format: date + example: 2016-12-16 + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (sequencing) + name: Date of sequencing run + sequencing_kit: + type: string + nullable: true + description: Name, manufacturer, order and lot numbers of sequencing kit + title: Sequencing kit + example: "FullSeq 600, Alumina, #M123456C0, 789G1HK" + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (sequencing) + name: Sequencing kit + sequencing_files: + $ref: '#/SequencingData' + nullable: false + description: Set of sequencing files produced by the sequencing run + x-airr: + adc-query-support: true + +# Resultant raw sequencing files from a SequencingRun +SequencingData: + type: object + required: + - sequencing_data_id + - file_type + - filename + - read_direction + - read_length + - paired_filename + - paired_read_direction + - paired_read_length + properties: + sequencing_data_id: + type: string + nullable: true + description: Persistent identifier of raw data stored in an archive (e.g. INSDC run ID). Data archive should be identified in the CURIE prefix. + title: Raw sequencing data persistent identifier + example: "SRA:SRR11610494" + x-airr: + miairr: important + adc-query-support: true + set: 4 + subset: data (raw reads) + format: CURIE + file_type: + type: string + nullable: true + description: File format for the raw reads or sequences + title: Raw sequencing data file type + enum: + - fasta + - fastq + x-airr: + miairr: important + adc-query-support: true + set: 4 + subset: data (raw reads) + name: Raw sequencing data file type + format: controlled vocabulary + filename: + type: string + nullable: true + description: File name for the raw reads or sequences. The first file in paired-read sequencing. + title: Raw sequencing data file name + example: MS10R-NMonson-C7JR9_S1_R1_001.fastq + x-airr: + miairr: important + adc-query-support: true + set: 4 + subset: data (raw reads) + name: Raw sequencing data file name + read_direction: + type: string + nullable: true + description: Read direction for the raw reads or sequences. The first file in paired-read sequencing. + title: Read direction + example: forward + enum: + - forward + - reverse + - mixed + x-airr: + miairr: important + adc-query-support: true + set: 4 + subset: data (raw reads) + name: Read direction + format: controlled vocabulary + read_length: + type: integer + nullable: true + description: Read length in bases for the first file in paired-read sequencing + title: Forward read length + example: 300 + x-airr: + miairr: important + adc-query-support: true + set: 4 + subset: data (raw reads) + name: Forward read length + paired_filename: + type: string + nullable: true + description: File name for the second file in paired-read sequencing + title: Paired raw sequencing data file name + example: MS10R-NMonson-C7JR9_S1_R2_001.fastq + x-airr: + miairr: important + adc-query-support: true + set: 4 + subset: data (raw reads) + name: Paired raw sequencing data file name + paired_read_direction: + type: string + nullable: true + description: Read direction for the second file in paired-read sequencing + title: Paired read direction + example: reverse + enum: + - forward + - reverse + - mixed + x-airr: + miairr: important + adc-query-support: true + set: 4 + subset: data (raw reads) + name: Paired read direction + format: controlled vocabulary + paired_read_length: + type: integer + nullable: true + description: Read length in bases for the second file in paired-read sequencing + title: Paired read length + example: 300 + x-airr: + miairr: important + adc-query-support: true + set: 4 + subset: data (raw reads) + name: Paired read length + index_filename: + type: string + nullable: true + description: File name for the index file + title: Sequencing index file name + example: MS10R-NMonson-C7JR9_S1_R3_001.fastq + x-airr: + adc-query-support: true + index_length: + type: integer + nullable: true + description: Read length in bases for the index file + title: Index read length + example: 8 + x-airr: + adc-query-support: true + +# 1-to-n relationship between a repertoire and data processing +# +# Set of annotated rearrangement sequences produced by +# data processing upon the raw sequence data for a repertoire. +DataProcessing: + type: object + required: + - software_versions + - paired_reads_assembly + - quality_thresholds + - primer_match_cutoffs + - collapsing_method + - data_processing_protocols + - germline_database + properties: + data_processing_id: + type: string + nullable: true + description: Identifier for the data processing object. + title: Data processing ID + x-airr: + name: Data processing ID + adc-query-support: true + identifier: true + primary_annotation: + type: boolean + default: false + nullable: false + description: > + If true, indicates this is the primary or default data processing for + the repertoire and its rearrangements. If false, indicates this is a secondary + or additional data processing. + title: Primary annotation + x-airr: + adc-query-support: true + identifier: true + software_versions: + type: string + nullable: true + description: Version number and / or date, include company pipelines + title: Software tools and version numbers + example: IgBLAST 1.6 + x-airr: + miairr: important + adc-query-support: true + set: 5 + subset: process (computational) + name: Software tools and version numbers + paired_reads_assembly: + type: string + nullable: true + description: How paired end reads were assembled into a single receptor sequence + title: Paired read assembly + example: PandaSeq (minimal overlap 50, threshold 0.8) + x-airr: + miairr: important + adc-query-support: true + set: 5 + subset: process (computational) + name: Paired read assembly + quality_thresholds: + type: string + nullable: true + description: How sequences were removed from (4) based on base quality scores + title: Quality thresholds + example: Average Phred score >=20 + x-airr: + miairr: important + adc-query-support: true + set: 5 + subset: process (computational) + name: Quality thresholds + primer_match_cutoffs: + type: string + nullable: true + description: How primers were identified in the sequences, were they removed/masked/etc? + title: Primer match cutoffs + example: Hamming distance <= 2 + x-airr: + miairr: important + adc-query-support: true + set: 5 + subset: process (computational) + name: Primer match cutoffs + collapsing_method: + type: string + nullable: true + description: The method used for combining multiple sequences from (4) into a single sequence in (5) + title: Collapsing method + example: MUSCLE 3.8.31 + x-airr: + miairr: important + adc-query-support: true + set: 5 + subset: process (computational) + name: Collapsing method + data_processing_protocols: + type: string + nullable: true + description: General description of how QC is performed + title: Data processing protocols + example: Data was processed using [...] + x-airr: + miairr: important + adc-query-support: true + set: 5 + subset: process (computational) + name: Data processing protocols + data_processing_files: + type: array + items: + type: string + nullable: true + description: Array of file names for data produced by this data processing. + title: Processed data file names + example: + - 'ERR1278153_aa.txz' + - 'ERR1278153_ab.txz' + - 'ERR1278153_ac.txz' + x-airr: + adc-query-support: true + name: Processed data file names + germline_database: + type: string + nullable: true + description: Source of germline V(D)J genes with version number or date accessed. + title: V(D)J germline reference database + example: ENSEMBL, Homo sapiens build 90, 2017-10-01 + x-airr: + miairr: important + adc-query-support: true + set: 5 + subset: data (processed sequence) + name: V(D)J germline reference database + germline_set_ref: + type: string + nullable: true + description: Unique identifier of the germline set and version, in standardized form (Repo:Label:Version) + example: OGRDB:Human_IGH:2021.11 + x-airr: + adc-query-support: true + analysis_provenance_id: + type: string + nullable: true + description: Identifier for machine-readable PROV model of analysis provenance + title: Analysis provenance ID + x-airr: + adc-query-support: true + +SampleProcessing: + allOf: + - type: object + properties: + sample_processing_id: + type: string + nullable: true + description: > + Identifier for the sample processing object. This field should be unique within the repertoire. + This field can be used to uniquely identify the combination of sample, cell processing, + nucleic acid processing and sequencing run information for the repertoire. + title: Sample processing ID + x-airr: + name: Sample processing ID + adc-query-support: true + identifier: true + - $ref: '#/Sample' + - $ref: '#/CellProcessing' + - $ref: '#/NucleicAcidProcessing' + - $ref: '#/SequencingRun' + + +# The composite schema for the repertoire object +# +# This represents a sample repertoire as defined by the study +# and experimentally observed by raw sequence data. A repertoire +# can only be for one subject but may include multiple samples. +Repertoire: + type: object + required: + - study + - subject + - sample + - data_processing + properties: + repertoire_id: + type: string + nullable: true + description: > + Identifier for the repertoire object. This identifier should be globally unique so that repertoires + from multiple studies can be combined together without conflict. The repertoire_id is used to link + other AIRR data to a Repertoire. Specifically, the Rearrangements Schema includes repertoire_id for + referencing the specific Repertoire for that Rearrangement. + title: Repertoire ID + x-airr: + adc-query-support: true + identifier: true + repertoire_name: + type: string + nullable: true + description: Short generic display name for the repertoire + title: Repertoire name + x-airr: + name: Repertoire name + adc-query-support: true + repertoire_description: + type: string + nullable: true + description: Generic repertoire description + title: Repertoire description + x-airr: + name: Repertoire description + adc-query-support: true + study: + $ref: '#/Study' + nullable: false + description: Study object + x-airr: + adc-query-support: true + subject: + $ref: '#/Subject' + nullable: false + description: Subject object + x-airr: + adc-query-support: true + sample: + type: array + nullable: false + description: List of Sample Processing objects + items: + $ref: '#/SampleProcessing' + x-airr: + adc-query-support: true + data_processing: + type: array + nullable: false + description: List of Data Processing objects + items: + $ref: '#/DataProcessing' + x-airr: + adc-query-support: true + +# A collection of repertoires for analysis purposes, includes optional time course +RepertoireGroup: + type: object + required: + - repertoire_group_id + - repertoires + properties: + repertoire_group_id: + type: string + nullable: true + description: Identifier for this repertoire collection + repertoire_group_name: + type: string + nullable: true + description: Short display name for this repertoire collection + repertoire_group_description: + type: string + nullable: true + description: Repertoire collection description + repertoires: + type: array + nullable: true + description: > + List of repertoires in this collection with an associated description and time point designation + items: + type: object + properties: + repertoire_id: + type: string + nullable: false + description: Identifier to the repertoire + x-airr: + adc-query-support: true + repertoire_description: + type: string + nullable: true + description: Description of this repertoire within the group + x-airr: + adc-query-support: true + time_point: + $ref: '#/TimePoint' + nullable: true + description: Time point designation for this repertoire within the group + x-airr: + adc-query-support: true + +Alignment: + type: object + required: + - sequence_id + - segment + - call + - score + - cigar + properties: + sequence_id: + type: string + nullable: true + description: > + Unique query sequence identifier within the file. Most often this will be the input sequence + header or a substring thereof, but may also be a custom identifier defined by the tool in + cases where query sequences have been combined in some fashion prior to alignment. + segment: + type: string + nullable: true + description: > + The segment for this alignment. One of V, D, J or C. + rev_comp: + type: boolean + nullable: true + description: > + Alignment result is from the reverse complement of the query sequence. + call: + type: string + nullable: true + description: > + Gene assignment with allele. + score: + type: number + nullable: true + description: > + Alignment score. + identity: + type: number + nullable: true + description: > + Alignment fractional identity. + support: + type: number + nullable: true + description: > + Alignment E-value, p-value, likelihood, probability or other similar measure of + support for the gene assignment as defined by the alignment tool. + cigar: + type: string + nullable: true + description: > + Alignment CIGAR string. + sequence_start: + type: integer + nullable: true + description: > + Start position of the segment in the query sequence (1-based closed interval). + sequence_end: + type: integer + nullable: true + description: > + End position of the segment in the query sequence (1-based closed interval). + germline_start: + type: integer + nullable: true + description: > + Alignment start position in the reference sequence (1-based closed interval). + germline_end: + type: integer + nullable: true + description: > + Alignment end position in the reference sequence (1-based closed interval). + rank: + type: integer + nullable: true + description: > + Alignment rank. + rearrangement_id: + type: string + nullable: true + description: > + Identifier for the Rearrangement object. May be identical to sequence_id, + but will usually be a universally unique record locator for database applications. + x-airr: + deprecated: true + deprecated-description: Field has been merged with sequence_id to avoid confusion. + deprecated-replaced-by: + - sequence_id + data_processing_id: + type: string + nullable: true + description: > + Identifier to the data processing object in the repertoire metadata + for this rearrangement. If this field is empty than the primary data processing object is assumed. + germline_database: + type: string + nullable: true + description: Source of germline V(D)J genes with version number or date accessed. + example: ENSEMBL, Homo sapiens build 90, 2017-10-01 + x-airr: + deprecated: true + deprecated-description: Field was moved up to the DataProcessing level to avoid data duplication. + deprecated-replaced-by: + - "DataProcessing:germline_database" + + +# The extended rearrangement object +Rearrangement: + type: object + required: + - sequence_id + - sequence + - rev_comp + - productive + - v_call + - d_call + - j_call + - sequence_alignment + - germline_alignment + - junction + - junction_aa + - v_cigar + - d_cigar + - j_cigar + properties: + sequence_id: + type: string + nullable: true + description: > + Unique query sequence identifier for the Rearrangement. Most often this will be the input sequence + header or a substring thereof, but may also be a custom identifier defined by the tool in + cases where query sequences have been combined in some fashion prior to alignment. When + downloaded from an AIRR Data Commons repository, this will usually be a universally unique + record locator for linking with other objects in the AIRR Data Model. + x-airr: + adc-query-support: true + identifier: true + sequence: + type: string + nullable: true + description: > + The query nucleotide sequence. Usually, this is the unmodified input sequence, which may be + reverse complemented if necessary. In some cases, this field may contain consensus sequences or + other types of collapsed input sequences if these steps are performed prior to alignment. + quality: + type: string + nullable: true + description: > + The Sanger/Phred quality scores for assessment of sequence quality. + Phred quality scores from 0 to 93 are encoded using ASCII 33 to 126 (Used by Illumina from v1.8.) + sequence_aa: + type: string + nullable: true + description: > + Amino acid translation of the query nucleotide sequence. + rev_comp: + type: boolean + nullable: true + description: > + True if the alignment is on the opposite strand (reverse complemented) with respect to the + query sequence. If True then all output data, such as alignment coordinates and sequences, + are based on the reverse complement of 'sequence'. + productive: + type: boolean + nullable: true + description: > + True if the V(D)J sequence is predicted to be productive. + x-airr: + adc-query-support: true + vj_in_frame: + type: boolean + nullable: true + description: True if the V and J gene alignments are in-frame. + stop_codon: + type: boolean + nullable: true + description: True if the aligned sequence contains a stop codon. + complete_vdj: + type: boolean + nullable: true + description: > + True if the sequence alignment spans the entire V(D)J region. Meaning, + sequence_alignment includes both the first V gene codon that encodes the + mature polypeptide chain (i.e., after the leader sequence) and the last + complete codon of the J gene (i.e., before the J-C splice site). + This does not require an absence of deletions within the internal + FWR and CDR regions of the alignment. + locus: + type: string + enum: + - IGH + - IGI + - IGK + - IGL + - TRA + - TRB + - TRD + - TRG + nullable: true + description: > + Gene locus (chain type). Note that this field uses a controlled vocabulary that is meant to provide a + generic classification of the locus, not necessarily the correct designation according to a specific + nomenclature. + title: Gene locus + example: IGH + x-airr: + adc-query-support: true + name: Gene locus + format: controlled vocabulary + v_call: + type: string + nullable: true + description: > + V gene with allele. If referring to a known reference sequence in a database + the relevant gene/allele nomenclature should be followed (e.g., IGHV4-59*01 if using IMGT/GENE-DB). + title: V gene with allele + example: IGHV4-59*01 + x-airr: + miairr: important + adc-query-support: true + set: 6 + subset: data (processed sequence) + name: V gene with allele + d_call: + type: string + nullable: true + description: > + First or only D gene with allele. If referring to a known reference sequence in a database + the relevant gene/allele nomenclature should be followed (e.g., IGHD3-10*01 if using IMGT/GENE-DB). + title: D gene with allele + example: IGHD3-10*01 + x-airr: + miairr: important + adc-query-support: true + set: 6 + subset: data (processed sequence) + name: D gene with allele + d2_call: + type: string + nullable: true + description: > + Second D gene with allele. If referring to a known reference sequence in a database the relevant + gene/allele nomenclature should be followed (e.g., IGHD3-10*01 if using IMGT/GENE-DB). + example: IGHD3-10*01 + j_call: + type: string + nullable: true + description: > + J gene with allele. If referring to a known reference sequence in a database the relevant + gene/allele nomenclature should be followed (e.g., IGHJ4*02 if using IMGT/GENE-DB). + title: J gene with allele + example: IGHJ4*02 + x-airr: + miairr: important + adc-query-support: true + set: 6 + subset: data (processed sequence) + name: J gene with allele + c_call: + type: string + nullable: true + description: > + Constant region gene with allele. If referring to a known reference sequence in a database the + relevant gene/allele nomenclature should be followed (e.g., IGHG1*01 if using IMGT/GENE-DB). + title: C region + example: IGHG1*01 + x-airr: + miairr: important + adc-query-support: true + set: 6 + subset: data (processed sequence) + name: C region + sequence_alignment: + type: string + nullable: true + description: > + Aligned portion of query sequence, including any indel corrections or numbering spacers, + such as IMGT-gaps. Typically, this will include only the V(D)J region, but that is not + a requirement. + quality_alignment: + type: string + nullable: true + description: > + Sanger/Phred quality scores for assessment of sequence_alignment quality. + Phred quality scores from 0 to 93 are encoded using ASCII 33 to 126 (Used by Illumina from v1.8.) + sequence_alignment_aa: + type: string + nullable: true + description: > + Amino acid translation of the aligned query sequence. + germline_alignment: + type: string + nullable: true + description: > + Assembled, aligned, full-length inferred germline sequence spanning the same region + as the sequence_alignment field (typically the V(D)J region) and including the same set + of corrections and spacers (if any). + germline_alignment_aa: + type: string + nullable: true + description: > + Amino acid translation of the assembled germline sequence. + junction: + type: string + nullable: true + description: > + Junction region nucleotide sequence, where the junction is defined as + the CDR3 plus the two flanking conserved codons. + title: IMGT-JUNCTION nucleotide sequence + example: TGTGCAAGAGCGGGAGTTTACGACGGATATACTATGGACTACTGG + x-airr: + miairr: important + set: 6 + subset: data (processed sequence) + name: IMGT-JUNCTION nucleotide sequence + junction_aa: + type: string + nullable: true + description: > + Amino acid translation of the junction. + title: IMGT-JUNCTION amino acid sequence + example: CARAGVYDGYTMDYW + x-airr: + miairr: important + adc-query-support: true + set: 6 + subset: data (processed sequence) + name: IMGT-JUNCTION amino acid sequence + np1: + type: string + nullable: true + description: > + Nucleotide sequence of the combined N/P region between the V gene and + first D gene alignment or between the V gene and J gene alignments. + np1_aa: + type: string + nullable: true + description: > + Amino acid translation of the np1 field. + np2: + type: string + nullable: true + description: > + Nucleotide sequence of the combined N/P region between either the first D gene and J gene + alignments or the first D gene and second D gene alignments. + np2_aa: + type: string + nullable: true + description: > + Amino acid translation of the np2 field. + np3: + type: string + nullable: true + description: > + Nucleotide sequence of the combined N/P region between the second D gene + and J gene alignments. + np3_aa: + type: string + nullable: true + description: > + Amino acid translation of the np3 field. + cdr1: + type: string + nullable: true + description: > + Nucleotide sequence of the aligned CDR1 region. + cdr1_aa: + type: string + nullable: true + description: > + Amino acid translation of the cdr1 field. + cdr2: + type: string + nullable: true + description: > + Nucleotide sequence of the aligned CDR2 region. + cdr2_aa: + type: string + nullable: true + description: > + Amino acid translation of the cdr2 field. + cdr3: + type: string + nullable: true + description: > + Nucleotide sequence of the aligned CDR3 region. + cdr3_aa: + type: string + nullable: true + description: > + Amino acid translation of the cdr3 field. + fwr1: + type: string + nullable: true + description: > + Nucleotide sequence of the aligned FWR1 region. + fwr1_aa: + type: string + nullable: true + description: > + Amino acid translation of the fwr1 field. + fwr2: + type: string + nullable: true + description: > + Nucleotide sequence of the aligned FWR2 region. + fwr2_aa: + type: string + nullable: true + description: > + Amino acid translation of the fwr2 field. + fwr3: + type: string + nullable: true + description: > + Nucleotide sequence of the aligned FWR3 region. + fwr3_aa: + type: string + nullable: true + description: > + Amino acid translation of the fwr3 field. + fwr4: + type: string + nullable: true + description: > + Nucleotide sequence of the aligned FWR4 region. + fwr4_aa: + type: string + nullable: true + description: > + Amino acid translation of the fwr4 field. + v_score: + type: number + nullable: true + description: Alignment score for the V gene. + v_identity: + type: number + nullable: true + description: Fractional identity for the V gene alignment. + v_support: + type: number + nullable: true + description: > + V gene alignment E-value, p-value, likelihood, probability or other similar measure of + support for the V gene assignment as defined by the alignment tool. + v_cigar: + type: string + nullable: true + description: CIGAR string for the V gene alignment. + d_score: + type: number + nullable: true + description: Alignment score for the first or only D gene alignment. + d_identity: + type: number + nullable: true + description: Fractional identity for the first or only D gene alignment. + d_support: + type: number + nullable: true + description: > + D gene alignment E-value, p-value, likelihood, probability or other similar measure of + support for the first or only D gene as defined by the alignment tool. + d_cigar: + type: string + nullable: true + description: CIGAR string for the first or only D gene alignment. + d2_score: + type: number + nullable: true + description: Alignment score for the second D gene alignment. + d2_identity: + type: number + nullable: true + description: Fractional identity for the second D gene alignment. + d2_support: + type: number + nullable: true + description: > + D gene alignment E-value, p-value, likelihood, probability or other similar measure of + support for the second D gene as defined by the alignment tool. + d2_cigar: + type: string + nullable: true + description: CIGAR string for the second D gene alignment. + j_score: + type: number + nullable: true + description: Alignment score for the J gene alignment. + j_identity: + type: number + nullable: true + description: Fractional identity for the J gene alignment. + j_support: + type: number + nullable: true + description: > + J gene alignment E-value, p-value, likelihood, probability or other similar measure of + support for the J gene assignment as defined by the alignment tool. + j_cigar: + type: string + nullable: true + description: CIGAR string for the J gene alignment. + c_score: + type: number + nullable: true + description: Alignment score for the C gene alignment. + c_identity: + type: number + nullable: true + description: Fractional identity for the C gene alignment. + c_support: + type: number + nullable: true + description: > + C gene alignment E-value, p-value, likelihood, probability or other similar measure of + support for the C gene assignment as defined by the alignment tool. + c_cigar: + type: string + nullable: true + description: CIGAR string for the C gene alignment. + v_sequence_start: + type: integer + nullable: true + description: > + Start position of the V gene in the query sequence (1-based closed interval). + v_sequence_end: + type: integer + nullable: true + description: > + End position of the V gene in the query sequence (1-based closed interval). + v_germline_start: + type: integer + nullable: true + description: > + Alignment start position in the V gene reference sequence (1-based closed interval). + v_germline_end: + type: integer + nullable: true + description: > + Alignment end position in the V gene reference sequence (1-based closed interval). + v_alignment_start: + type: integer + nullable: true + description: > + Start position of the V gene alignment in both the sequence_alignment and germline_alignment + fields (1-based closed interval). + v_alignment_end: + type: integer + nullable: true + description: > + End position of the V gene alignment in both the sequence_alignment and germline_alignment + fields (1-based closed interval). + d_sequence_start: + type: integer + nullable: true + description: > + Start position of the first or only D gene in the query sequence. + (1-based closed interval). + d_sequence_end: + type: integer + nullable: true + description: > + End position of the first or only D gene in the query sequence. + (1-based closed interval). + d_germline_start: + type: integer + nullable: true + description: > + Alignment start position in the D gene reference sequence for the first or only + D gene (1-based closed interval). + d_germline_end: + type: integer + nullable: true + description: > + Alignment end position in the D gene reference sequence for the first or only + D gene (1-based closed interval). + d_alignment_start: + type: integer + nullable: true + description: > + Start position of the first or only D gene in both the sequence_alignment + and germline_alignment fields (1-based closed interval). + d_alignment_end: + type: integer + nullable: true + description: > + End position of the first or only D gene in both the sequence_alignment + and germline_alignment fields (1-based closed interval). + d2_sequence_start: + type: integer + nullable: true + description: > + Start position of the second D gene in the query sequence (1-based closed interval). + d2_sequence_end: + type: integer + nullable: true + description: > + End position of the second D gene in the query sequence (1-based closed interval). + d2_germline_start: + type: integer + nullable: true + description: > + Alignment start position in the second D gene reference sequence (1-based closed interval). + d2_germline_end: + type: integer + nullable: true + description: > + Alignment end position in the second D gene reference sequence (1-based closed interval). + d2_alignment_start: + type: integer + nullable: true + description: > + Start position of the second D gene alignment in both the sequence_alignment and + germline_alignment fields (1-based closed interval). + d2_alignment_end: + type: integer + nullable: true + description: > + End position of the second D gene alignment in both the sequence_alignment and + germline_alignment fields (1-based closed interval). + j_sequence_start: + type: integer + nullable: true + description: > + Start position of the J gene in the query sequence (1-based closed interval). + j_sequence_end: + type: integer + nullable: true + description: > + End position of the J gene in the query sequence (1-based closed interval). + j_germline_start: + type: integer + nullable: true + description: > + Alignment start position in the J gene reference sequence (1-based closed interval). + j_germline_end: + type: integer + nullable: true + description: > + Alignment end position in the J gene reference sequence (1-based closed interval). + j_alignment_start: + type: integer + nullable: true + description: > + Start position of the J gene alignment in both the sequence_alignment and germline_alignment + fields (1-based closed interval). + j_alignment_end: + type: integer + nullable: true + description: > + End position of the J gene alignment in both the sequence_alignment and germline_alignment + fields (1-based closed interval). + c_sequence_start: + type: integer + nullable: true + description: > + Start position of the C gene in the query sequence (1-based closed interval). + c_sequence_end: + type: integer + nullable: true + description: > + End position of the C gene in the query sequence (1-based closed interval). + c_germline_start: + type: integer + nullable: true + description: > + Alignment start position in the C gene reference sequence (1-based closed interval). + c_germline_end: + type: integer + nullable: true + description: > + Alignment end position in the C gene reference sequence (1-based closed interval). + c_alignment_start: + type: integer + nullable: true + description: > + Start position of the C gene alignment in both the sequence_alignment and germline_alignment + fields (1-based closed interval). + c_alignment_end: + type: integer + nullable: true + description: > + End position of the C gene alignment in both the sequence_alignment and germline_alignment + fields (1-based closed interval). + cdr1_start: + type: integer + nullable: true + description: CDR1 start position in the query sequence (1-based closed interval). + cdr1_end: + type: integer + nullable: true + description: CDR1 end position in the query sequence (1-based closed interval). + cdr2_start: + type: integer + nullable: true + description: CDR2 start position in the query sequence (1-based closed interval). + cdr2_end: + type: integer + nullable: true + description: CDR2 end position in the query sequence (1-based closed interval). + cdr3_start: + type: integer + nullable: true + description: CDR3 start position in the query sequence (1-based closed interval). + cdr3_end: + type: integer + nullable: true + description: CDR3 end position in the query sequence (1-based closed interval). + fwr1_start: + type: integer + nullable: true + description: FWR1 start position in the query sequence (1-based closed interval). + fwr1_end: + type: integer + nullable: true + description: FWR1 end position in the query sequence (1-based closed interval). + fwr2_start: + type: integer + nullable: true + description: FWR2 start position in the query sequence (1-based closed interval). + fwr2_end: + type: integer + nullable: true + description: FWR2 end position in the query sequence (1-based closed interval). + fwr3_start: + type: integer + nullable: true + description: FWR3 start position in the query sequence (1-based closed interval). + fwr3_end: + type: integer + nullable: true + description: FWR3 end position in the query sequence (1-based closed interval). + fwr4_start: + type: integer + nullable: true + description: FWR4 start position in the query sequence (1-based closed interval). + fwr4_end: + type: integer + nullable: true + description: FWR4 end position in the query sequence (1-based closed interval). + v_sequence_alignment: + type: string + nullable: true + description: > + Aligned portion of query sequence assigned to the V gene, including any + indel corrections or numbering spacers. + v_sequence_alignment_aa: + type: string + nullable: true + description: > + Amino acid translation of the v_sequence_alignment field. + d_sequence_alignment: + type: string + nullable: true + description: > + Aligned portion of query sequence assigned to the first or only D gene, including any + indel corrections or numbering spacers. + d_sequence_alignment_aa: + type: string + nullable: true + description: > + Amino acid translation of the d_sequence_alignment field. + d2_sequence_alignment: + type: string + nullable: true + description: > + Aligned portion of query sequence assigned to the second D gene, including any + indel corrections or numbering spacers. + d2_sequence_alignment_aa: + type: string + nullable: true + description: > + Amino acid translation of the d2_sequence_alignment field. + j_sequence_alignment: + type: string + nullable: true + description: > + Aligned portion of query sequence assigned to the J gene, including any + indel corrections or numbering spacers. + j_sequence_alignment_aa: + type: string + nullable: true + description: > + Amino acid translation of the j_sequence_alignment field. + c_sequence_alignment: + type: string + nullable: true + description: > + Aligned portion of query sequence assigned to the constant region, including + any indel corrections or numbering spacers. + c_sequence_alignment_aa: + type: string + nullable: true + description: > + Amino acid translation of the c_sequence_alignment field. + v_germline_alignment: + type: string + nullable: true + description: > + Aligned V gene germline sequence spanning the same region + as the v_sequence_alignment field and including the same set + of corrections and spacers (if any). + v_germline_alignment_aa: + type: string + nullable: true + description: > + Amino acid translation of the v_germline_alignment field. + d_germline_alignment: + type: string + nullable: true + description: > + Aligned D gene germline sequence spanning the same region + as the d_sequence_alignment field and including the same set + of corrections and spacers (if any). + d_germline_alignment_aa: + type: string + nullable: true + description: > + Amino acid translation of the d_germline_alignment field. + d2_germline_alignment: + type: string + nullable: true + description: > + Aligned D gene germline sequence spanning the same region + as the d2_sequence_alignment field and including the same set + of corrections and spacers (if any). + d2_germline_alignment_aa: + type: string + nullable: true + description: > + Amino acid translation of the d2_germline_alignment field. + j_germline_alignment: + type: string + nullable: true + description: > + Aligned J gene germline sequence spanning the same region + as the j_sequence_alignment field and including the same set + of corrections and spacers (if any). + j_germline_alignment_aa: + type: string + nullable: true + description: > + Amino acid translation of the j_germline_alignment field. + c_germline_alignment: + type: string + nullable: true + description: > + Aligned constant region germline sequence spanning the same region + as the c_sequence_alignment field and including the same set + of corrections and spacers (if any). + c_germline_alignment_aa: + type: string + nullable: true + description: > + Amino acid translation of the c_germline_aligment field. + junction_length: + type: integer + nullable: true + description: Number of nucleotides in the junction sequence. + junction_aa_length: + type: integer + nullable: true + description: Number of amino acids in the junction sequence. + x-airr: + adc-query-support: true + np1_length: + type: integer + nullable: true + description: > + Number of nucleotides between the V gene and first D gene alignments or + between the V gene and J gene alignments. + np2_length: + type: integer + nullable: true + description: > + Number of nucleotides between either the first D gene and J gene alignments + or the first D gene and second D gene alignments. + np3_length: + type: integer + nullable: true + description: > + Number of nucleotides between the second D gene and J gene alignments. + n1_length: + type: integer + nullable: true + description: Number of untemplated nucleotides 5' of the first or only D gene alignment. + n2_length: + type: integer + nullable: true + description: Number of untemplated nucleotides 3' of the first or only D gene alignment. + n3_length: + type: integer + nullable: true + description: Number of untemplated nucleotides 3' of the second D gene alignment. + p3v_length: + type: integer + nullable: true + description: Number of palindromic nucleotides 3' of the V gene alignment. + p5d_length: + type: integer + nullable: true + description: Number of palindromic nucleotides 5' of the first or only D gene alignment. + p3d_length: + type: integer + nullable: true + description: Number of palindromic nucleotides 3' of the first or only D gene alignment. + p5d2_length: + type: integer + nullable: true + description: Number of palindromic nucleotides 5' of the second D gene alignment. + p3d2_length: + type: integer + nullable: true + description: Number of palindromic nucleotides 3' of the second D gene alignment. + p5j_length: + type: integer + nullable: true + description: Number of palindromic nucleotides 5' of the J gene alignment. + v_frameshift: + type: boolean + nullable: true + description: > + True if the V gene in the query nucleotide sequence contains a translational + frameshift relative to the frame of the V gene reference sequence. + j_frameshift: + type: boolean + nullable: true + description: > + True if the J gene in the query nucleotide sequence contains a translational + frameshift relative to the frame of the J gene reference sequence. + d_frame: + type: integer + nullable: true + description: > + Numerical reading frame (1, 2, 3) of the first or only D gene in the query nucleotide sequence, + where frame 1 is relative to the first codon of D gene reference sequence. + d2_frame: + type: integer + nullable: true + description: > + Numerical reading frame (1, 2, 3) of the second D gene in the query nucleotide sequence, + where frame 1 is relative to the first codon of D gene reference sequence. + consensus_count: + type: integer + nullable: true + description: > + Number of reads contributing to the UMI consensus or contig assembly for this sequence. + For example, the sum of the number of reads for all UMIs that contribute to + the query sequence. + duplicate_count: + type: integer + nullable: true + description: > + Copy number or number of duplicate observations for the query sequence. + For example, the number of identical reads observed for this sequence. + title: Read count + example: 123 + x-airr: + miairr: important + set: 6 + subset: data (processed sequence) + name: Read count + umi_count: + type: integer + nullable: true + description: > + Number of distinct UMIs represented by this sequence. + For example, the total number of UMIs that contribute to + the contig assembly for the query sequence. + cell_id: + type: string + nullable: true + description: > + Identifier defining the cell of origin for the query sequence. + title: Cell index + example: W06_046_091 + x-airr: + miairr: important + adc-query-support: true + identifier: true + set: 6 + subset: data (processed sequence) + name: Cell index + clone_id: + type: string + nullable: true + description: Clonal cluster assignment for the query sequence. + x-airr: + adc-query-support: true + identifier: true + repertoire_id: + type: string + nullable: true + description: Identifier to the associated repertoire in study metadata. + x-airr: + adc-query-support: true + identifier: true + sample_processing_id: + type: string + nullable: true + description: > + Identifier to the sample processing object in the repertoire metadata + for this rearrangement. If the repertoire has a single sample then + this field may be empty or missing. If the repertoire has multiple samples then + this field may be empty or missing if the sample cannot be differentiated or + the relationship is not maintained by the data processing. + x-airr: + adc-query-support: true + identifier: true + data_processing_id: + type: string + nullable: true + description: > + Identifier to the data processing object in the repertoire metadata + for this rearrangement. If this field is empty than the primary data processing object is assumed. + x-airr: + adc-query-support: true + identifier: true + rearrangement_id: + type: string + nullable: true + description: > + Identifier for the Rearrangement object. May be identical to sequence_id, + but will usually be a universally unique record locator for database applications. + x-airr: + deprecated: true + deprecated-description: Field has been merged with sequence_id to avoid confusion. + deprecated-replaced-by: + - sequence_id + rearrangement_set_id: + type: string + nullable: true + description: > + Identifier for grouping Rearrangement objects. + x-airr: + deprecated: true + deprecated-description: Field has been replaced by other specialized identifiers. + deprecated-replaced-by: + - repertoire_id + - sample_processing_id + - data_processing_id + germline_database: + type: string + nullable: true + description: Source of germline V(D)J genes with version number or date accessed. + example: ENSEMBL, Homo sapiens build 90, 2017-10-01 + x-airr: + deprecated: true + deprecated-description: Field was moved up to the DataProcessing level to avoid data duplication. + deprecated-replaced-by: + - "DataProcessing:germline_database" + +# A unique inferred clone object that has been constructed within a single data processing +# for a single repertoire and a subset of its sequences and/or rearrangements. +Clone: + type: object + required: + - clone_id + - germline_alignment + properties: + clone_id: + type: string + nullable: true + description: Identifier for the clone. + repertoire_id: + type: string + nullable: true + description: Identifier to the associated repertoire in study metadata. + x-airr: + adc-query-support: true + data_processing_id: + type: string + nullable: true + description: Identifier of the data processing object in the repertoire metadata for this clone. + x-airr: + adc-query-support: true + sequences: + type: array + items: + type: string + nullable: true + description: > + List sequence_id strings that act as keys to the Rearrangement records for members of the clone. + v_call: + type: string + nullable: true + description: > + V gene with allele of the inferred ancestral of the clone. For example, IGHV4-59*01. + example: IGHV4-59*01 + d_call: + type: string + nullable: true + description: > + D gene with allele of the inferred ancestor of the clone. For example, IGHD3-10*01. + example: IGHD3-10*01 + j_call: + type: string + nullable: true + description: > + J gene with allele of the inferred ancestor of the clone. For example, IGHJ4*02. + example: IGHJ4*02 + junction: + type: string + nullable: true + description: > + Nucleotide sequence for the junction region of the inferred ancestor of the clone, + where the junction is defined as the CDR3 plus the two flanking conserved codons. + junction_aa: + type: string + nullable: true + description: > + Amino acid translation of the junction. + junction_length: + type: integer + nullable: true + description: Number of nucleotides in the junction. + junction_aa_length: + type: integer + nullable: true + description: Number of amino acids in junction_aa. + germline_alignment: + type: string + nullable: true + description: > + Assembled, aligned, full-length inferred ancestor of the clone spanning the same region + as the sequence_alignment field of nodes (typically the V(D)J region) and including the + same set of corrections and spacers (if any). + germline_alignment_aa: + type: string + nullable: true + description: > + Amino acid translation of germline_alignment. + v_alignment_start: + type: integer + nullable: true + description: > + Start position in the V gene alignment in both the sequence_alignment and germline_alignment + fields (1-based closed interval). + v_alignment_end: + type: integer + nullable: true + description: > + End position in the V gene alignment in both the sequence_alignment and germline_alignment + fields (1-based closed interval). + d_alignment_start: + type: integer + nullable: true + description: > + Start position of the D gene alignment in both the sequence_alignment and germline_alignment + fields (1-based closed interval). + d_alignment_end: + type: integer + nullable: true + description: > + End position of the D gene alignment in both the sequence_alignment and germline_alignment + fields (1-based closed interval). + j_alignment_start: + type: integer + nullable: true + description: > + Start position of the J gene alignment in both the sequence_alignment and germline_alignment + fields (1-based closed interval). + j_alignment_end: + type: integer + nullable: true + description: > + End position of the J gene alignment in both the sequence_alignment and germline_alignment + fields (1-based closed interval). + junction_start: + type: integer + nullable: true + description: Junction region start position in the alignment (1-based closed interval). + junction_end: + type: integer + nullable: true + description: Junction region end position in the alignment (1-based closed interval). + umi_count: + type: integer + nullable: true + description: > + Number of distinct UMIs observed across all sequences (Rearrangement records) in this clone. + clone_count: + type: integer + nullable: true + description: > + Absolute count of the size (number of members) of this clone in the repertoire. + This could simply be the number of sequences (Rearrangement records) observed in this clone, + the number of distinct cell barcodes (unique cell_id values), + or a more sophisticated calculation appropriate to the experimental protocol. + Absolute count is provided versus a frequency so that downstream analysis tools can perform their own normalization. + seed_id: + type: string + nullable: true + description: sequence_id of the seed sequence. Empty string (or null) if there is no seed sequence. + +# 1-to-n relationship for a clone to its trees. +Tree: + type: object + required: + - tree_id + - clone_id + - newick + properties: + tree_id: + type: string + nullable: true + description: Identifier for the tree. + clone_id: + type: string + nullable: true + description: Identifier for the clone. + newick: + type: string + nullable: true + description: Newick string of the tree edges. + nodes: + type: object + nullable: true + description: Dictionary of nodes in the tree, keyed by sequence_id string + additionalProperties: + $ref: '#/Node' + +# 1-to-n relationship between a tree and its nodes +Node: + type: object + required: + - sequence_id + properties: + sequence_id: + type: string + nullable: true + description: > + Identifier for this node that matches the identifier in the newick string and, where possible, + the sequence_id in the source repertoire. + sequence_alignment: + type: string + nullable: true + description: > + Nucleotide sequence of the node, aligned to the germline_alignment for this clone, including + including any indel corrections or spacers. + junction: + type: string + nullable: true + description: > + Junction region nucleotide sequence for the node, where the junction is defined as + the CDR3 plus the two flanking conserved codons. + junction_aa: + type: string + nullable: true + description: > + Amino acid translation of the junction. + +# The cell object acts as point of reference for all data that can be related +# to an individual cell, either by direct observation or inference. +Cell: + type: object + required: + - cell_id + - rearrangements + - repertoire_id + - virtual_pairing + properties: + cell_id: + type: string + nullable: false + description: > + Identifier defining the cell of origin for the query sequence. + title: Cell index + example: W06_046_091 + x-airr: + miairr: defined + adc-query-support: true + name: Cell index + rearrangements: + type: array + nullable: true + description: > + Array of sequence identifiers defined for the Rearrangement object + title: Cell-associated rearrangements + items: + type: string + example: [id1, id2] #empty vs NULL? + x-airr: + miairr: defined + adc-query-support: true + name: Cell-associated rearrangements + receptors: + type: array + nullable: true + description: > + Array of receptor identifiers defined for the Receptor object + title: Cell-associated receptors + items: + type: string + example: [id1, id2] #empty vs NULL? + x-airr: + miairr: defined + adc-query-support: true + name: Cell-associated receptors + repertoire_id: + type: string + nullable: true + description: Identifier to the associated repertoire in study metadata. + title: Parental repertoire of cell + x-airr: + miairr: defined + adc-query-support: true + name: Parental repertoire of cell + data_processing_id: + type: string + nullable: true + description: Identifier of the data processing object in the repertoire metadata for this clone. + title: Data processing for cell + x-airr: + miairr: defined + adc-query-support: true + name: Data processing for cell + expression_study_method: + type: string + enum: + - "flow cytometry" + - "single-cell transcriptome" + nullable: true + description: > + keyword describing the methodology used to assess expression. This values for this field MUST come from a controlled vocabulary + x-airr: + miairr: defined + adc-api-optional: true + expression_raw_doi: + type: string + nullable: true + description: > + DOI of raw data set containing the current event + x-airr: + miairr: defined + adc-api-optional: true + expression_index: + type: string + nullable: true + description: > + Index addressing the current event within the raw data set. + x-airr: + miairr: defined + adc-api-optional: true + virtual_pairing: + type: boolean + nullable: true + description: > + boolean to indicate if pairing was inferred. + title: Virtual pairing + x-airr: + miairr: defined + adc-query-support: true + name: Virtual pairing + +# The CellExpression object acts as a container to hold a single expression level measurement from +# an experiment. Expression data is associated with a cell_id and the related repertoire_id and +# data_processing_id as cell_id is not guaranteed to be unique outside the data processing for +# a single repertoire. +CellExpression: + type: object + required: + - expression_id + - reperotire_id + - data_processing_id + - cell_id + - property + - value + properties: + expression_id: + type: string + description: > + Identifier of this expression property measurement. + title: Expression property measurement identifier + nullable: false + x-airr: + miairr: defined + adc-query-support: true + name: Expression measurement identifier + cell_id: + type: string + description: > + Identifier of the cell to which this expression data is related. + title: Cell identifier + nullable: false + example: W06_046_091 + x-airr: + miairr: defined + adc-query-support: true + name: Cell identifier + repertoire_id: + type: string + description: Identifier for the associated repertoire in study metadata. + title: Parental repertoire of cell + nullable: true + x-airr: + miairr: defined + adc-query-support: true + name: Parental repertoire of cell + data_processing_id: + type: string + description: Identifier of the data processing object in the repertoire metadata for this clone. + title: Data processing for cell + nullable: true + x-airr: + miairr: defined + adc-query-support: true + name: Data processing for cell + property: + $ref: '#/Ontology' + description: Name of the property observed, typically a gene or antibody idenifier (and its label) from a canonical resource such as Ensembl (e.g. ENSG00000275747, IGHV3-79) or Antibody Registry (ABREG:1236456, Purified anti-mouse/rat/human CD27 antibody). + title: Property information + nullable: true + example: + id: ENSG:ENSG00000275747 + label: IGHV3-79 + x-airr: + miairr: defined + adc-query-support: true + format: ontology + name: Property information + value: + type: number + description: Level at which the property was observed in the experiment (non-normalized). + title: Property value + nullable: true + example: 3 + x-airr: + miairr: defined + adc-query-support: true + name: Property value + + +# The Receptor object hold information about a receptor and its reactivity. +# +Receptor: + type: object + required: + - receptor_id + - receptor_hash + - receptor_type + - receptor_variable_domain_1_aa + - receptor_variable_domain_1_locus + - receptor_variable_domain_2_aa + - receptor_variable_domain_2_locus + properties: + receptor_id: + type: string + nullable: false + description: ID of the current Receptor object, unique within the local repository. + title: Receptor ID + example: TCR-MM-012345 + x-airr: + adc-query-support: true + receptor_hash: + type: string + nullable: false + description: > + The SHA256 hash of the receptor amino acid sequence, calculated on the concatenated + ``receptor_variable_domain_*_aa`` sequences and represented as base16-encoded string. + title: Receptor hash ID + example: aa1c4b77a6f4927611ab39f5267415beaa0ba07a952c233d803b07e52261f026 + x-airr: + adc-query-support: true + receptor_type: + type: string + nullable: false + enum: + - Ig + - TCR + description: The top-level receptor type, either Immunoglobulin (Ig) or T Cell Receptor (TCR). + x-airr: + adc-api-optional: true + receptor_variable_domain_1_aa: + type: string + nullable: false + description: > + Complete amino acid sequence of the mature variable domain of the Ig heavy, TCR beta or TCR delta chain. + The mature variable domain is defined as encompassing all AA from and including first AA after the the + signal peptide to and including the last AA that is completely encoded by the J gene. + example: > + QVQLQQPGAELVKPGASVKLSCKASGYTFTSYWMHWVKQRPGRGLEWIGRIDPNSGGTKYNEKFKSKATLTVDKPSSTAYMQLSSLTSEDSAVYYCARYDYYGSSYFDYWGQGTTLTVSS + x-airr: + adc-api-optional: true + receptor_variable_domain_1_locus: + type: string + nullable: false + enum: + - IGH + - TRB + - TRD + description: Locus from which the variable domain in receptor_variable_domain_1_aa originates + example: IGH + x-airr: + adc-api-optional: true + receptor_variable_domain_2_aa: + type: string + nullable: false + description: > + Complete amino acid sequence of the mature variable domain of the Ig light, TCR alpha or TCR gamma chain. + The mature variable domain is defined as encompassing all AA from and including first AA after the the + signal peptide to and including the last AA that is completely encoded by the J gene. + example: > + QAVVTQESALTTSPGETVTLTCRSSTGAVTTSNYANWVQEKPDHLFTGLIGGTNNRAPGVPARFSGSLIGDKAALTITGAQTEDEAIYFCALWYSNHWVFGGGTKLTVL + x-airr: + adc-api-optional: true + receptor_variable_domain_2_locus: + type: string + nullable: false + enum: + - IGK + - IGL + - TRA + - TRG + description: Locus from which the variable domain in receptor_variable_domain_2_aa originates + example: IGL + x-airr: + adc-api-optional: true + receptor_ref: + type: array + nullable: true + description: Array of receptor identifiers defined for the Receptor object + title: Receptor cross-references + items: + type: string + example: ["IEDB_RECEPTOR:10"] + x-airr: + adc-query-support: true + reactivity_measurements: + type: array + nullable: true + description: Records of reactivity measurement + items: + type: object + properties: + ligand_type: + type: string + nullable: false + enum: + - MHC:peptide + - MHC:non-peptide + - protein + - peptide + - non-peptidic + description: Classification of ligand binding to receptor + example: non-peptide + antigen_type: + type: string + nullable: false + enum: + - protein + - peptide + - non-peptidic + description: > + The type of antigen before processing by the immune system. + example: protein + antigen: + $ref: '#/Ontology' + nullable: false + description: > + The substance against which the receptor was tested. This can be any substance that + stimulates an adaptive immune response in the host, either through antibody production + or by T cell activation after presentation via an MHC molecule. + title: Antigen + example: + id: UNIPROT:P19597 + label: Circumsporozoite protein + x-airr: + adc-query-support: true + format: ontology + antigen_source_species: + $ref: '#/Ontology' + nullable: true + description: The species from which the antigen was isolated + title: Source species of antigen + example: + id: NCBITAXON:5843 + label: Plasmodium falciparum NF54 + x-airr: + format: ontology + ontology: + draft: true + top_node: + id: NCBITAXON:1 + label: root + peptide_start: + type: integer + nullable: true + description: Start position of the peptide within the reference protein sequence + peptide_end: + type: integer + nullable: true + description: End position of the peptide within the reference protein sequence + mhc_class: + type: string + nullable: true + enum: + - MHC-I + - MHC-II + - MHC-nonclassical + description: Class of MHC molecule, only present for MHC:x ligand types + example: MHC-II + mhc_gene_1: + $ref: '#/Ontology' + nullable: true + description: The MHC gene to which the mhc_allele_1 belongs + title: MHC gene 1 + example: + id: MRO:0000055 + label: HLA-DRA + x-airr: + format: ontology + ontology: + draft: true + top_node: + id: MRO:0000004 + label: MHC gene + mhc_allele_1: + type: string + nullable: true + description: Allele designation of the MHC alpha chain + example: HLA-DRA + mhc_gene_2: + $ref: '#/Ontology' + nullable: true + description: The MHC gene to which the mhc_allele_2 belongs + title: MHC gene 2 + example: + id: MRO:0000057 + label: HLA-DRB1 + x-airr: + format: ontology + ontology: + draft: true + top_node: + id: MRO:0000004 + mhc_allele_2: + type: string + nullable: true + description: > + Allele designation of the MHC class II beta chain or the invariant beta2-microglobin chain + example: HLA-DRB1*04:01 + reactivity_method: + type: string + nullable: false + enum: + - SPR + - ITC + - ELISA + - cytometry + - biological activity + description: The methodology used to assess expression (assay implemented in experiment) + reactivity_readout: + type: string + nullable: false + enum: + - binding strength + - cytokine release + - dissociation constant KD + - on rate + - off rate + - pathogen inhibition + description: Reactivity measurement read-out + example: cytokine release + reactivity_value: + type: number + nullable: false + description: The absolute (processed) value of the measurement + example: 162.26 + reactivity_unit: + type: string + nullable: false + description: The unit of the measurement + example: pg/ml diff --git a/lang/js/airr.js b/lang/js/airr.js new file mode 100644 index 000000000..8a6084551 --- /dev/null +++ b/lang/js/airr.js @@ -0,0 +1,521 @@ +'use strict'; + +// +// airr.js +// AIRR Standards reference library for antibody and TCR sequencing data +// +// Copyright (C) 2023 The AIRR Community +// +// Author: Scott Christley +// + +// Node Libraries +var yaml = require('js-yaml'); +var path = require('path'); +var fs = require('fs'); +const zlib = require('zlib'); +const $RefParser = require("@apidevtools/json-schema-ref-parser"); +var csv = require('csv-parser'); +const AJV = require("ajv"); +const addFormats = require("ajv-formats") + +var airr = {}; +module.exports = airr; + +// Boolean value mappings +var true_values = ['True', 'true', 'TRUE', 'T', 't', '1', 1, true]; +var false_values = ['False', 'false', 'FALSE', 'F', 'f', '0', 0, false]; +var _to_bool_map = function(x) { + if (true_values.indexOf(x) >= 0) return true; + if (false_values.indexOf(x) >= 0) return false; + return null; +}; +var _from_bool_map = function(x) { + if (x == true) return 'T'; + if (x == false) return 'F'; + return ''; +}; + +class ValidationError extends Error { + constructor (message) { + super(message) + + // assign the error class name in your custom error (as a shortcut) + this.name = this.constructor.name + + // capturing the stack trace keeps the reference to your error class + Error.captureStackTrace(this, this.constructor); + } +} + +function isPromise(promise) { + return !!promise && typeof promise.then === 'function' +} + +// +// AIRR Schema +// + +// Load AIRR schema, returns a promise +airr.Schema = null; +airr.load_schema = async function() { + // Load AIRR spec + var airrFile = path.resolve(__dirname, './airr-schema-openapi3.yaml'); + var doc = yaml.safeLoad(fs.readFileSync(airrFile)); + if (!doc) Promise.reject(new Error('Could not load AIRR schema yaml file.')); + + // dereference all $ref objects + var spec = await $RefParser.dereference(doc); + airr.Schema = {"specification": spec}; + return Promise.resolve(spec); +}; + +airr.get_schema = function(definition) { + if (!airr.Schema) throw new Error('AIRR schema is not loaded.'); + return new airr.SchemaDefinition(definition); +}; + +airr.get_info = function() { + if (!airr.Schema) throw new Error('AIRR schema is not loaded.'); + return airr.Schema['specification']['Info']; +} + +airr.SchemaDefinition = function(definition) { + if (!airr.Schema) throw new Error('AIRR schema is not loaded.'); + + if (definition == 'Info') { + throw new Error('Info is an invalid schema definition name'); + } + + this.definition = airr.Schema['specification'][definition]; + if (! this.definition) + throw new Error('Schema definition ' + definition + ' cannot be found in the specifications'); + + this.info = airr.Schema['specification']['Info']; + if (! this.info) + throw new Error('Info object cannot be found in the specifications'); + + this.properties = this.definition['properties'] + this.required = this.definition['required'] + if (! this.required) this.required = []; + + //this.optional = [f for f in self.properties if f not in self.required] + + return this; +} + +airr.SchemaDefinition.prototype.spec = function(field) { + return this.properties[field]; +}; + +airr.SchemaDefinition.prototype.type = function(field) { + var field_spec = this.properties[field]; + if (! field_spec) return null; + var field_type = field_spec['type']; + return field_type; +}; + +airr.SchemaDefinition.prototype.is_ontology = function(field) { + var field_spec = this.properties[field]; + if (! field_spec) return false; + var field_type = field_spec['type']; + if (field_type != 'object') return false; + if ((this.properties[field]['x-airr']) && (this.properties[field]['x-airr']['format'] == 'ontology')) return true; + + return false; +}; + +airr.SchemaDefinition.prototype.to_bool = function(value, validate) { + if (value == null) return null; + + var bool_value = _to_bool_map(value); + if (validate && (bool_value == null)) + throw new Error('invalid bool ' + value); + return bool_value; +}; + +airr.SchemaDefinition.prototype.from_bool = function(value, validate) { + if (value == null) return ''; + + var str_value = _from_bool_map(value); + if (validate && (str_value == null)) + throw new Error('invalid bool ' + value); + return str_value; +}; + +airr.SchemaDefinition.prototype.to_int = function(value, validate) { + if (value == null) return null; + if (value == '') return null; + + var int_value = parseInt(value); + if (isNaN(int_value)) { + if (validate) + throw new Error('invalid int ' + value); + else + return null; + } + return int_value; +}; + +airr.SchemaDefinition.prototype.to_float = function(value, validate) { + if (value == null) return null; + if (value == '') return null; + + var float_value = parseFloat(value); + if (isNaN(float_value)) { + if (validate) + throw new Error('invalid float ' + value); + else + return null; + } + return float_value; +}; + +airr.SchemaDefinition.prototype.map_value = function(map) { + //console.log('map value: ', map); + //console.log(this); + var field_type = this.type(map['header']); + var field_value = map['value']; + switch (field_type) { + case 'boolean': + field_value = this.to_bool(field_value); + break; + case 'integer': + field_value = this.to_int(field_value); + break; + case 'number': + field_value = this.to_float(field_value); + break; + } + return field_value; +}; + +// +// Validation functions +// + +airr.SchemaDefinition.prototype.validate_header = function(header) { + return false; +} + +airr.SchemaDefinition.prototype.validate_row = function(row) { + return false; +} + +airr.SchemaDefinition.prototype.validate_object = function(object) { + const ajv = new AJV(); + addFormats(ajv); + ajv.addVocabulary(['x-airr', 'example']); + + const validate = ajv.compile(this.definition) + const valid = validate(object) + if (!valid) console.log(validate.errors) + + return valid; +} + +airr.SchemaDefinition.prototype.template = function() { + // Set defaults for each data type + var type_default = {'boolean': false, 'integer': 0, 'number': 0.0, 'string': '', 'array':[]}; + + var _default = function(spec) { + if (spec['default']) return spec['default']; + if (spec['nullable']) return null; + //if (spec['enum']) return spec['enum'][0]; + return type_default[spec['type']]; + }; + + var _populate = function(schema, obj) { + if (schema.allOf) { + for (const k in schema.allOf) + _populate(schema['allOf'][k], obj); + return; + } + for (const k in schema.properties) { + let spec = schema.properties[k]; + // Skip deprecated + if (spec['x-airr'] && spec['x-airr']['deprecated']) + continue + // populate with value + switch (spec['type']) { + case 'object': { + let new_obj = {}; + obj[k] = new_obj; + _populate(spec, new_obj); + break; + } + case 'array': + if (spec['items'] && spec['items']['type'] == 'object') { + let new_obj = {}; + obj[k] = [ _populate(spec['items'], new_obj) ]; + } else + obj[k] = _default(spec); + break; + default: + obj[k] = _default(spec); + } + } + }; + + var obj = {}; + _populate(this, obj); + return (obj); +} + +// +// Interface functions for file operations +// + +airr.read_rearrangement = async function(filename, header_callback=null, row_callback=null, validate=false, model=true, debug=false) { + var is_gz = false; + var ext = filename.split('.').pop().toLowerCase(); + if (ext == 'gz') is_gz = true; + + var schema = new airr.SchemaDefinition('Rearrangement'); + + var mapValues = function(map) { + return schema.map_value(map); + }; + + return new Promise(function(resolve, reject) { + var readable = fs.createReadStream(filename); + if (is_gz) readable.pipe(zlib.createGunzip()); + readable.pipe(csv({separator:'\t', mapValues: mapValues})) + .on('headers', async function(headers) { + readable.pause(); + + if (validate) { + try { + schema.validate_header(headers); + } catch (err) { + reject(err); + } + } + + if (header_callback) { + if (isPromise(header_callback)) await header_callback(headers); + else header_callback(headers); + } + + readable.resume(); + }) + .on('data', async function(row) { + readable.pause(); + + if (validate) { + try { + schema.validate_row(row); + } catch (err) { + reject(err); + } + } + + if (row_callback) { + if (isPromise(row_callback)) await row_callback(row); + else row_callback(row); + } + + readable.resume(); + }) + .on('end', async function() { + return resolve(); + }); + }); +} + +airr.create_rearrangement = function(file) { + return null; +} + +airr.derive_rearrangement = function(file) { + return null; +} + +airr.load_rearrangement = async function(filename, validate=false, debug=false) { + var rows = []; + + var got_row = function(row) { rows.push(row); } + await airr.read_rearrangement(filename, null, got_row, validate, true, debug) + .catch(function(error) { Promise.reject(error); }); + + return Promise.resolve(rows); +} + +airr.dump_rearrangement = function(file) { + return null; +} + +airr.merge_rearrangement = function(file) { + return null; +} + +airr.validate_rearrangement = function(file) { + return null; +} + +airr.read_airr = function(filename, validate=false, model=true, debug=false) { + var data = null; + var ext = filename.split('.').pop().toLowerCase(); + if ((ext == 'yaml') || (ext == 'yml') || (ext == 'json')) { + data = yaml.safeLoad(fs.readFileSync(filename)); + } else { + let msg = 'Unknown file type:' + ext + '. Supported file extensions are "yaml", "yml" or "json"'; + if (debug) console.error(msg); + throw new Error(msg); + } + + if (validate) { + if (debug) console.log('Validating:', filename); + try { + var schema = new airr.SchemaDefinition('DataFile'); + schema.validate_object(data); + } catch (err) { + if (debug) console.error(filename, 'failed validation.'); + throw new ValidationError(err); + } + } + + return data; +} + +airr.validate_airr = function(filename) { + return airr.read_airr(filename, true); +} + +airr.write_airr = function(file) { + return null; +} + +// Given a field, check if included in field set +// Field sets include: +// miairr, for only MiAIRR fields +// airr-core, for all required and identifier fields +// airr-schema, for all fields +airr.checkSet = function(schema, field_set, f) { + switch (field_set) { + case 'miairr': + if ((schema['properties'][f]['x-airr']) && (schema['properties'][f]['x-airr']['miairr'])) + return true; + break; + case 'airr-core': + // miairr + if ((schema['properties'][f]['x-airr']) && (schema['properties'][f]['x-airr']['miairr'])) + return true; + // identifer + if ((schema['properties'][f]['x-airr']) && (schema['properties'][f]['x-airr']['identifier'])) + return true; + // required + if ((schema['required']) && (schema['required'].indexOf(f) >= 0)) + return true; + break; + case 'airr-schema': + // all fields + return true; + } + return false; +} + +// Recursively walk through schema and collect fields based upon field set. +// The schema loader resolves the $ref references so we do not need to follow them. +airr.collectFields = function(schema, field_set, field_list, context, force) { + for (var f in schema['properties']) { + var full_field = f; + if (context) full_field = context + '.' + f; + //console.log(full_field); + //console.log(schema['properties'][f]); + + // check if deprecated + if ((schema['properties'][f]['x-airr']) && (schema['properties'][f]['x-airr']['deprecated'])) + continue; + + var field_type = schema['properties'][f]['type']; + switch (field_type) { + case 'object': + // sub-object + if ((schema['properties'][f]['x-airr']) && (schema['properties'][f]['x-airr']['ontology'])) { + // if it is an ontology object, check the object then force the ontology fields if necessary + if (airr.checkSet(schema, field_set, f)) + airr.collectFields(schema['properties'][f], field_set, field_list, full_field, true); + } else + airr.collectFields(schema['properties'][f], field_set, field_list, full_field, force); + break; + case 'array': + if (schema['properties'][f]['items']['type'] == 'object') { + // array of sub-objects + airr.collectFields(schema['properties'][f]['items'], field_set, field_list, full_field, force); + } else if (schema['properties'][f]['items']['allOf']) { + // array of composite objects + for (var s in schema['properties'][f]['items']['allOf']) { + airr.collectFields(schema['properties'][f]['items']['allOf'][s], field_set, field_list, full_field, force); + } + } else { + // array of primitive types + if (airr.checkSet(schema, field_set, f)) + field_list.push(full_field); + } + break; + case 'string': + case 'number': + case 'integer': + case 'boolean': + // primitive types + if (force) + field_list.push(full_field); + else if (airr.checkSet(schema, field_set, f)) + field_list.push(full_field); + break; + default: + // unhandled schema structure + console.error('VDJServer ADC API INFO: Unhandled schema structure: ' + full_field); + break; + } + } +} + +// Add the fields to the document if any are missing +airr.addFields = function(document, field_list, schema) { + for (var r in field_list) { + var path = field_list[r].split('.'); + var obj = document; + var spec = schema; + for (var p = 0; p < path.length; p++) { + spec = spec['properties'][path[p]]; + // if not in the spec then give up + if (!spec) break; + + if (spec['type'] == 'array') { + if ((spec['items']['type'] == undefined) || (spec['items']['type'] == 'object')) { + // array of object + if (obj[path[p]] == undefined) obj[path[p]] = [{}]; + var sub_spec = spec['items']; + if (spec['items']['allOf']) { + // need to combine the properties + sub_spec = { type: 'object', properties: {} }; + for (var i in spec['items']['allOf']) { + var sub_obj = spec['items']['allOf'][i]; + for (var j in sub_obj['properties']) { + sub_spec['properties'][j] = sub_obj['properties'][j]; + } + } + } + for (var a in obj[path[p]]) { + airr.addFields(obj[path[p]][a], [ path.slice(p+1).join('.') ], sub_spec); + } + } else { + // array of primitive data types + if (obj[path[p]] == undefined) obj[path[p]] = null; + } + break; + } else if (spec['type'] == 'object') { + if (obj[path[p]] == undefined) { + if (p == path.length - 1) obj[path[p]] = null; + else obj[path[p]] = {}; + } + obj = obj[path[p]]; + } else if (obj[path[p]] != undefined) obj = obj[path[p]]; + else if (p == path.length - 1) obj[path[p]] = null; + else console.error('VDJServer ADC API ERROR: Internal error (addFields) do not know how to handle path element: ' + p); + } + } +}; + diff --git a/lang/js/jest.config.js b/lang/js/jest.config.js new file mode 100644 index 000000000..eddfa12a0 --- /dev/null +++ b/lang/js/jest.config.js @@ -0,0 +1,195 @@ +/* + * For a detailed explanation regarding each configuration property, visit: + * https://jestjs.io/docs/configuration + */ + +module.exports = { + // All imported modules in your tests should be mocked automatically + // automock: false, + + // Stop running tests after `n` failures + // bail: 0, + + // The directory where Jest should store its cached dependency information + // cacheDirectory: "/tmp/jest_0", + + // Automatically clear mock calls, instances, contexts and results before every test + // clearMocks: false, + + // Indicates whether the coverage information should be collected while executing the test + collectCoverage: true, + + // An array of glob patterns indicating a set of files for which coverage information should be collected + // collectCoverageFrom: undefined, + + // The directory where Jest should output its coverage files + coverageDirectory: "coverage", + + // An array of regexp pattern strings used to skip coverage collection + // coveragePathIgnorePatterns: [ + // "/node_modules/" + // ], + + // Indicates which provider should be used to instrument code for coverage + coverageProvider: "v8", + + // A list of reporter names that Jest uses when writing coverage reports + // coverageReporters: [ + // "json", + // "text", + // "lcov", + // "clover" + // ], + + // An object that configures minimum threshold enforcement for coverage results + // coverageThreshold: undefined, + + // A path to a custom dependency extractor + // dependencyExtractor: undefined, + + // Make calling deprecated APIs throw helpful error messages + // errorOnDeprecated: false, + + // The default configuration for fake timers + // fakeTimers: { + // "enableGlobally": false + // }, + + // Force coverage collection from ignored files using an array of glob patterns + // forceCoverageMatch: [], + + // A path to a module which exports an async function that is triggered once before all test suites + // globalSetup: undefined, + + // A path to a module which exports an async function that is triggered once after all test suites + // globalTeardown: undefined, + + // A set of global variables that need to be available in all test environments + // globals: {}, + + // The maximum amount of workers used to run your tests. Can be specified as % or a number. E.g. maxWorkers: 10% will use 10% of your CPU amount + 1 as the maximum worker number. maxWorkers: 2 will use a maximum of 2 workers. + // maxWorkers: "50%", + + // An array of directory names to be searched recursively up from the requiring module's location + // moduleDirectories: [ + // "node_modules" + // ], + + // An array of file extensions your modules use + // moduleFileExtensions: [ + // "js", + // "mjs", + // "cjs", + // "jsx", + // "ts", + // "tsx", + // "json", + // "node" + // ], + + // A map from regular expressions to module names or to arrays of module names that allow to stub out resources with a single module + // moduleNameMapper: {}, + + // An array of regexp pattern strings, matched against all module paths before considered 'visible' to the module loader + // modulePathIgnorePatterns: [], + + // Activates notifications for test results + // notify: false, + + // An enum that specifies notification mode. Requires { notify: true } + // notifyMode: "failure-change", + + // A preset that is used as a base for Jest's configuration + // preset: undefined, + + // Run tests from one or more projects + // projects: undefined, + + // Use this configuration option to add custom reporters to Jest + // reporters: undefined, + + // Automatically reset mock state before every test + // resetMocks: false, + + // Reset the module registry before running each individual test + // resetModules: false, + + // A path to a custom resolver + // resolver: undefined, + + // Automatically restore mock state and implementation before every test + // restoreMocks: false, + + // The root directory that Jest should scan for tests and modules within + // rootDir: undefined, + + // A list of paths to directories that Jest should use to search for files in + // roots: [ + // "" + // ], + + // Allows you to use a custom runner instead of Jest's default test runner + // runner: "jest-runner", + + // The paths to modules that run some code to configure or set up the testing environment before each test + // setupFiles: [], + + // A list of paths to modules that run some code to configure or set up the testing framework before each test + // setupFilesAfterEnv: [], + + // The number of seconds after which a test is considered as slow and reported as such in the results. + // slowTestThreshold: 5, + + // A list of paths to snapshot serializer modules Jest should use for snapshot testing + // snapshotSerializers: [], + + // The test environment that will be used for testing + // testEnvironment: "jest-environment-node", + + // Options that will be passed to the testEnvironment + // testEnvironmentOptions: {}, + + // Adds a location field to test results + // testLocationInResults: false, + + // The glob patterns Jest uses to detect test files + // testMatch: [ + // "**/__tests__/**/*.[jt]s?(x)", + // "**/?(*.)+(spec|test).[tj]s?(x)" + // ], + + // An array of regexp pattern strings that are matched against all test paths, matched tests are skipped + // testPathIgnorePatterns: [ + // "/node_modules/" + // ], + + // The regexp pattern or array of patterns that Jest uses to detect test files + // testRegex: [], + + // This option allows the use of a custom results processor + // testResultsProcessor: undefined, + + // This option allows use of a custom test runner + // testRunner: "jest-circus/runner", + + // A map from regular expressions to paths to transformers + // transform: undefined, + + // An array of regexp pattern strings that are matched against all source file paths, matched files will skip transformation + // transformIgnorePatterns: [ + // "/node_modules/", + // "\\.pnp\\.[^\\/]+$" + // ], + + // An array of regexp pattern strings that are matched against all modules before the module loader will automatically return a mock for them + // unmockedModulePathPatterns: undefined, + + // Indicates whether each individual test should be reported during the run + // verbose: undefined, + + // An array of regexp patterns that are matched against all source file paths before re-running tests in watch mode + // watchPathIgnorePatterns: [], + + // Whether to use watchman for file crawling + // watchman: true, +}; diff --git a/lang/js/package.json b/lang/js/package.json new file mode 100644 index 000000000..f2851631e --- /dev/null +++ b/lang/js/package.json @@ -0,0 +1,46 @@ +{ + "name": "airr-js", + "version": "1.4.1", + "description": "AIRR Community Data Representation Standard reference library for antibody and TCR sequencing data.", + "author": [ + { + "name": "AIRR Community", + "email": "info@airr-community.org" + } + ], + "website": { + "url": "http://docs.airr-community.org" + }, + "repository": { + "type": "git", + "url": "https://github.com/airr-community/airr-standards" + }, + "license": "CC BY 4.0", + "main": "airr.js", + "private": false, + "dependencies": { + "@apidevtools/json-schema-ref-parser": "^10.1.0", + "ajv": "^8.12.0", + "ajv-formats": "^2.1.1", + "csv-parser": "^2.3.2", + "errorhandler": "1.3.2", + "js-yaml": "^3.10.0", + "json-approver": "1.0.3", + "moment": "2.24.0", + "moment-timezone": "0.4.1", + "underscore": "1.7.0" + }, + "devDependencies": { + "eslint": "^7.9.0", + "jest": "^29.5.0", + "jest-cli": "^29.5.0" + }, + "scripts": { + "eslint": "eslint", + "test": "jest" + }, + "engines": { + "node": ">=12.18.3", + "npm": ">=6.14.6" + } +} diff --git a/lang/js/tests/airr.test.js b/lang/js/tests/airr.test.js new file mode 100644 index 000000000..3219d7f4f --- /dev/null +++ b/lang/js/tests/airr.test.js @@ -0,0 +1,46 @@ +// +// airr.test.js +// Unit tests for AIRR Standards reference library +// +// Copyright (C) 2023 The AIRR Community +// +// Author: Scott Christley +// + +var path = require('path'); +var airr = require("../airr") + +// Paths +var data_path = path.resolve(__dirname, 'data'); + +// Test data +var rearrangement_good = path.resolve(data_path, 'good_rearrangement.tsv'); +var rearrangement_bad = path.resolve(data_path, 'bad_rearrangement.tsv') +var rep_good = path.resolve(data_path, 'good_repertoire.yaml') +var rep_bad = path.resolve(data_path, 'bad_repertoire.yaml') +var germline_good = path.resolve(data_path, 'good_germline_set.json') +var germline_bad = path.resolve(data_path, 'bad_germline_set.json') +var genotype_good = path.resolve(data_path, 'good_genotype_set.json') +var genotype_bad = path.resolve(data_path, 'bad_genotype_set.json') +var combined_yaml = path.resolve(data_path, 'good_combined_airr.yaml') +var combined_json = path.resolve(data_path, 'good_combined_airr.json') + +// Output data +//var output_rep = os.path.join(data_path, 'output_rep.json') +//var output_good = os.path.join(data_path, 'output_data.json') +//var output_blank = os.path.join(data_path, 'output_blank.json') + +test('load schema', async () => { + const schema = await airr.load_schema(); + expect(schema).not.toBeNull(); +}); + +test('load good airr yaml', () => { + const data = airr.read_airr(rep_good, true); + expect(data).not.toBeNull(); +}); + +test('load good rearrangement tsv', () => { + const data = airr.load_rearrangement(rearrangement_good, true); + expect(data).not.toBeNull(); +}); diff --git a/lang/js/tests/data/bad_genotype_set.json b/lang/js/tests/data/bad_genotype_set.json new file mode 100644 index 000000000..c58a39027 --- /dev/null +++ b/lang/js/tests/data/bad_genotype_set.json @@ -0,0 +1,44 @@ +{ + "GenotypeSet": [{ + "receptor_genotype_set_id": "1", + "genotype_class_list": [ + { + "receptor_genotype_id": "1", + "locus": "IGH", + "documented_alleles": [ + { + "label": "IGHV1-69*01", + "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", + "phasing": 1 + }, + { + "label": "IGHV1-69*02", + "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", + "phasing": 2 + }, + { + "label": "IGHV1-69*02", + "name": "1234", + "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", + "phasing": 2 + } + ], + "undocumented_alleles": [ + { + "allele_name": "IGHD3-1*01_S1234", + "sequence": "agtagtagtagt", + "phasing": 1 + } + ], + "deleted_genes": [ + { + "label": "IGHV3-30-3", + "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", + "phasing": "1" + } + ], + "inference_process": "repertoire_sequencing" + } + ] + }] +} \ No newline at end of file diff --git a/lang/js/tests/data/bad_germline_set.json b/lang/js/tests/data/bad_germline_set.json new file mode 100644 index 000000000..f221dcf9e --- /dev/null +++ b/lang/js/tests/data/bad_germline_set.json @@ -0,0 +1,351 @@ +{ + "GermlineSet": [{ + "germline_set_id": "OGRDB:G00007", + "author": "William Lees", + "lab_name": "", + "lab_address": "Birkbeck College, University of London, Malet Street, London", + "release_version": 1, + "release_description": "", + "release_date": "2021-11-24", + "germline_set_name": "CAST IGH", + "germline_set_ref": "OGRDB:G00007.1", + "pub_ids": "", + "species": ["Mouse"], + "species_subgroup": "CAST_EiJ", + "species_subgroup_type": "strain", + "locus": "IGH", + "allele_descriptions": [ + { + "allele_description_id": "OGRDB:A00301", + "maintainer": "William Lees", + "acknowledgements": [], + "lab_address": "Birkbeck College, University of London, Malet Street, London", + "release_version": 1, + "release_date": "24-Nov-2021", + "release_description": "First release", + "label": "IGHV-2DBF", + "sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGATGGTAGTGGCACCTACTATCTGGACTCCTTGAAGAGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", + "coding_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGA...GGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTC............AGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", + "aliases": [ + "watson_et_al:CAST_EiJ_IGHV5-3" + ], + "locus": "IGH", + "chromosome": null, + "sequence_type": "V", + "functional": true, + "inference_type": "Rearranged only", + "species": "Mouse", + "species_subgroup": "CAST_EiJ", + "species_subgroup_type": "strain", + "status": "active", + "gene_designation": null, + "subgroup_designation": null, + "allele_designation": null, + "gene_start": null, + "gene_end": null, + "utr_5_prime_start": null, + "utr_5_prime_end": null, + "leader_1_start": null, + "leader_1_end": null, + "leader_2_start": null, + "leader_2_end": null, + "v_rs_start": null, + "v_rs_end": null, + "v_gene_delineations": [ + { + "sequence_delineation_id": "1", + "delineation_scheme": "IMGT", + "fwr1_start": 1, + "fwr1_end": 78, + "cdr1_start": 79, + "cdr1_end": 114, + "fwr2_start": 115, + "fwr2_end": 165, + "cdr2_start": 166, + "cdr2_end": 195, + "fwr3_start": 196, + "fwr3_end": 312, + "cdr3_start": 313, + "alignment": [ + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104" + ] + } + ], + "unrearranged_support": [], + "rearranged_support": [], + "paralogs": [], + "notes": "Imported to OGRDB with the following notes: watson_et_al: CAST_EiJ_IGHV5-3", + "curational_tags": null + }, + { + "allele_description_id": "OGRDB:A00314", + "maintainer": "William Lees", + "acknowledgements": [], + "lab_address": "Birkbeck College, University of London, Malet Street, London", + "release_version": 1, + "release_date": "24-Nov-2021", + "release_description": "First release", + "label": "IGHV-2ETO", + "sequence": "CAAGTTACTCTAAAAGAGTCTGGCCCTGGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGATGATGATAAGTACTATAACCCATCCCTGAAGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC", + "coding_sequence": "CAAGTTACTCTAAAAGAGTCTGGCCCT...GGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGC......ACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGAT.........GATGATAAGTACTATAACCCATCCCTGAAG...AGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC", + "aliases": [ + "watson_et_al:CAST_EiJ_IGHV8-2" + ], + "locus": "IGH", + "chromosome": null, + "sequence_type": "V", + "functional": true, + "inference_type": "Rearranged only", + "species": "Mouse", + "species_subgroup": "CAST_EiJ", + "species_subgroup_type": "strain", + "status": "active", + "gene_designation": null, + "subgroup_designation": null, + "allele_designation": null, + "gene_start": null, + "gene_end": null, + "utr_5_prime_start": null, + "utr_5_prime_end": null, + "leader_1_start": null, + "leader_1_end": null, + "leader_2_start": null, + "leader_2_end": null, + "v_rs_start": null, + "v_rs_end": null, + "v_gene_delineations": [ + { + "sequence_delineation_id": "1", + "delineation_scheme": "IMGT", + "fwr1_start": 1, + "fwr1_end": 78, + "cdr1_start": 79, + "cdr1_end": 114, + "fwr2_start": 115, + "fwr2_end": 165, + "cdr2_start": 166, + "cdr2_end": 195, + "fwr3_start": 196, + "fwr3_end": 312, + "cdr3_start": 313, + "alignment": [ + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104" + ] + } + ], + "unrearranged_support": [], + "rearranged_support": [], + "paralogs": [], + "notes": "Imported to OGRDB with the following notes: watson_et_al: CAST_EiJ_IGHV8-2", + "curational_tags": null + } + ], + "notes": "" + }] +} \ No newline at end of file diff --git a/lang/js/tests/data/bad_rearrangement.tsv b/lang/js/tests/data/bad_rearrangement.tsv new file mode 100644 index 000000000..d12fc79fe --- /dev/null +++ b/lang/js/tests/data/bad_rearrangement.tsv @@ -0,0 +1,10 @@ +rearrangement_id rearrangement_set_id sequence_id wrong_name rev_comp productive sequence_alignment germline_alignment v_call d_call j_call c_call junction junction_length junction_aa v_score d_score j_score c_score v_cigar d_cigar j_cigar c_cigar v_identity v_evalue d_identity d_evalue j_identity j_evalue v_sequence_start v_sequence_end v_germline_start v_germline_end d_sequence_start d_sequence_end d_germline_start d_germline_end j_sequence_start j_sequence_end j_germline_start j_germline_end np1_length np2_length duplicate_count +IVKNQEJ01BVGQ6 1 IVKNQEJ01BVGQ6 GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T T IGHV4-31*03 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 430 16.4 75.8 22N1S275= 11N280S8= 6N292S32=1X9= 1 1E-122 1 2.7 0.9762 6E-18 0 275 0 317 279 287 10 18 291 333 5 47 4 4 1247 +IVKNQEJ01AQVWS 1 IVKNQEJ01AQVWS GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCAACTACAACCCCTCCCTCAAGAGTCGAGTCACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T T IGHV4-31*03 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 420 16.4 83.8 22N1S156=1X10=1X17=1X89= 11N280S8= 6N292S42= 0.9891 8E-120 1 2.7 1 2E-20 0 275 0 317 279 287 10 18 291 333 5 47 4 4 4 +IVKNQEJ01AOYFZ 1 IVKNQEJ01AOYFZ GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGTAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T F IGHV4-31*03 IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGTAACTTTTGACTACTGG 37 CASGVAGNF*LLX 430 20.4 83.8 22N1S275= 11N280S10= 6N293S42= 1 1E-122 1 0.17 1 2E-20 0 275 0 317 279 289 10 20 292 334 5 47 4 3 92 +IVKNQEJ01EI5S4 1 IVKNQEJ01EI5S4 GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T T IGHV4-31*03 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 430 16.4 83.8 22N1S275= 11N280S8= 6N292S42= 1 1E-122 1 2.7 1 2E-20 0 275 0 317 279 287 10 18 291 333 5 47 4 4 2913 +IVKNQEJ01DGRRI 1 IVKNQEJ01DGRRI GGCCCAGGACTGGTGAAGCCTTCGGAGACCCTGTCCCTCACCTGCGCTGTCTATGGTGGGTCCTTCAGTGGTTACTACTGGAGCTGGATCCGCCAGCCCCCAGGGAAGGGTCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T T IGHV4-34*09 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 389 16.4 83.8 22N1S23=2X85=1X15=1X1=1X3=1X2=1X1=1X5=1X6=1X118= 11N274S8= 6N286S42= 0.9628 2E-110 1 2.6 1 2E-20 0 269 0 317 273 281 10 18 285 327 5 47 4 4 1 +IVKNQEJ01APN5N 1 IVKNQEJ01APN5N GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTAGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T F IGHV4-31*03 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTAG 36 CASGVAGTFDY* 430 16.4 67.9 22N1S275= 11N280S8= 6N292S10=1X21=1X9= 1 1E-122 1 2.7 0.9524 1E-15 0 275 0 317 279 287 10 18 291 333 5 47 4 4 1 +IVKNQEJ01B0TT2 1 IVKNQEJ01B0TT2 GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGTAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T F IGHV4-31*03 IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGTAACTTTTGACTACTGG 37 CASGVAGNF*LLX 430 20.4 75.8 22N1S275= 11N280S10= 6N293S32=1X9= 1 1E-122 1 0.17 0.9762 6E-18 0 275 0 317 279 289 10 20 292 334 5 47 4 3 30 +IVKNQEJ01AIS74 1 IVKNQEJ01AIS74 GGCGCAGGACTGTTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGGCGGGGTGGCTGGTAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T F IGHV4-31*03 IGHD6-19*01 IGHJ4*02 TGTGCGAGGCGGGGTGGCTGGTAACTTTTGACTACTGG 38 CARRGGW*LLTTG 424 20.4 83.8 22N1S3=1X8=1X262= 11N281S10= 6N294S42= 0.9927 9E-121 1 0.17 1 2E-20 0 275 0 317 280 290 10 20 293 335 5 47 5 3 4 +IVKNQEJ01AJ44V 1 IVKNQEJ01AJ44V GGCCCAGGACTGGTGAAGCCTTCGGAGACCCTGTCCCTCACCTGCGCTGTCTATGGTGGGTCCTTCAGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T T IGHV4-59*06 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 386 16.4 75.8 22N1S45=1X5=2X6=1X3=1X5=1X22=1X4=1X1=1X1=1X165= 11N274S8= 6N286S32=1X9= 0.9625 2E-109 1 2.6 0.9762 5E-18 0 267 0 315 273 281 10 18 285 327 5 47 6 4 12 diff --git a/lang/js/tests/data/bad_repertoire.yaml b/lang/js/tests/data/bad_repertoire.yaml new file mode 100644 index 000000000..a98e085d2 --- /dev/null +++ b/lang/js/tests/data/bad_repertoire.yaml @@ -0,0 +1,148 @@ +# +# Example metadata +# + +Repertoire: + - repertoire_id: 1841923116114776551-242ac11c-0001-012 + study: + study_id: PRJNA300878 + study_title: "Homo sapiens B and T cell repertoire - MZ twins" + study_description: "The adaptive immune system's capability to protect the body requires a highly diverse lymphocyte antigen receptor repertoire. However, the influence of individual genetic and epigenetic differences on these repertoires is not typically measured. By leveraging the unique characteristics of B, CD4+ T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified the impact of heritable factors on both the V(D)J recombination process and thymic selection in the case of T cell receptors, and show that the repertoires of both naive and antigen experienced cells are subject to biases resulting from differences in recombination. We show that biases in V(D)J usage, as well as biased N/P additions, contribute to significant variation in the CDR3 region. Moreover, we show that the relative usage of V and J gene segments is chromosomally biased, with approximately 1.5 times as many rearrangements originating from a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level." + lab_name: "Mark M. Davis" + lab_address: "Stanford University" + submitted_by: "Florian Rubelt" + pub_ids: "PMID:27005435" + subject: + subject_id: TW01A + synthetic: false + species: + id: "NCBITaxon_9606" + value: "Homo sapiens" + sex: female + age_min: 27 + age_max: 27 + age_unit: + id: UO_0000036 + value: year + linked_subjects: TW01B + link_type: twin + sample: + - sample_id: TW01A_B_naive + tissue: PBMC + cell_subset: "Naive B cell" + cell_phenotype: "expression of CD20 and the absence of CD27" + cell_species: + id: "NCBITaxon_9606" + value: "Homo sapiens" + single_cell: false + cell_isolation: FACS + template_class: RNA + pcr_target: + - pcr_target_locus: IGH + sequencing_platform: "Illumina MiSeq" + read_length: "300" + sequencing_files: + file_type: fastq + filename: SRR2905656_R1.fastq.gz + read_direction: forward + paired_filename: SRR2905656_R2.fastq.gz + paired_read_direction: reverse + data_processing: + - data_processing_id: 3059369183532618216-242ac11b-0001-007 + analysis_provenance_id: 6623294219256599016-242ac11c-0001-012 + + - repertoire_id: 1602908186092376551-242ac11c-0001-012 + study: + study_id: PRJNA300878 + study_title: "Homo sapiens B and T cell repertoire - MZ twins" + study_description: "The adaptive immune system's capability to protect the body requires a highly diverse lymphocyte antigen receptor repertoire. However, the influence of individual genetic and epigenetic differences on these repertoires is not typically measured. By leveraging the unique characteristics of B, CD4+ T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified the impact of heritable factors on both the V(D)J recombination process and thymic selection in the case of T cell receptors, and show that the repertoires of both naive and antigen experienced cells are subject to biases resulting from differences in recombination. We show that biases in V(D)J usage, as well as biased N/P additions, contribute to significant variation in the CDR3 region. Moreover, we show that the relative usage of V and J gene segments is chromosomally biased, with approximately 1.5 times as many rearrangements originating from a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level." + lab_name: "Mark M. Davis" + lab_address: "Stanford University" + submitted_by: "Florian Rubelt" + pub_ids: "PMID:27005435" + subject: + subject_id: TW01A + synthetic: false + species: + id: "NCBITaxon_9606" + value: "Homo sapiens" + sex: female + age_min: 27 + age_max: 27 + age_unit: + id: UO_0000036 + value: year + linked_subjects: TW01B + link_type: twin + sample: + - sample_id: TW01A_B_memory + tissue: PBMC + cell_subset: "Memory B cell" + cell_phenotype: "expression of CD20 and CD27" + cell_species: + id: "NCBITaxon_9606" + value: "Homo sapiens" + single_cell: false + cell_isolation: FACS + template_class: RNA + pcr_target: + - pcr_target_locus: IGH + sequencing_platform: "Illumina MiSeq" + read_length: "300" + sequencing_files: + file_type: fastq + filename: SRR2905655_R1.fastq.gz + read_direction: forward + paired_filename: SRR2905655_R2.fastq.gz + paired_read_direction: reverse + data_processing: + - data_processing_id: 3059369183532618216-242ac11b-0001-007 + analysis_provenance_id: 6623294219256599016-242ac11c-0001-012 + + - repertoire_id: 2366080924918616551-242ac11c-0001-012 + study: + study_id: PRJNA300878 + study_title: "Homo sapiens B and T cell repertoire - MZ twins" + study_description: "The adaptive immune system's capability to protect the body requires a highly diverse lymphocyte antigen receptor repertoire. However, the influence of individual genetic and epigenetic differences on these repertoires is not typically measured. By leveraging the unique characteristics of B, CD4+ T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified the impact of heritable factors on both the V(D)J recombination process and thymic selection in the case of T cell receptors, and show that the repertoires of both naive and antigen experienced cells are subject to biases resulting from differences in recombination. We show that biases in V(D)J usage, as well as biased N/P additions, contribute to significant variation in the CDR3 region. Moreover, we show that the relative usage of V and J gene segments is chromosomally biased, with approximately 1.5 times as many rearrangements originating from a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level." + lab_name: "Mark M. Davis" + lab_address: "Stanford University" + submitted_by: "Florian Rubelt" + pub_ids: "PMID:27005435" + subject: + subject_id: TW01A + synthetic: false + species: + id: "NCBITaxon_9606" + value: "Homo sapiens" + sex: female + age_min: 27 + age_max: 27 + age_unit: + id: UO_0000036 + value: year + linked_subjects: TW01B + link_type: twin + sample: + - sample_id: TW01A_T_naive_CD4 + tissue: PBMC + cell_subset: "Naive CD4+ T cell" + cell_phenotype: "expression of CD8 and absence of CD4 and CD45RO" + cell_species: + id: "NCBITaxon_9606" + value: "Homo sapiens" + single_cell: false + cell_isolation: FACS + template_class: RNA + pcr_target: + - pcr_target_locus: TRB + sequencing_platform: "Illumina MiSeq" + read_length: "300" + sequencing_files: + file_type: fastq + filename: SRR2905659_R1.fastq.gz + read_direction: forward + paired_filename: SRR2905659_R2.fastq.gz + paired_read_direction: reverse + data_processing: + - data_processing_id: 651223970338378216-242ac11b-0001-007 + analysis_provenance_id: 4625424004665971176-242ac11c-0001-012 diff --git a/lang/js/tests/data/extra_rearrangement.tsv b/lang/js/tests/data/extra_rearrangement.tsv new file mode 100644 index 000000000..8bedb960f --- /dev/null +++ b/lang/js/tests/data/extra_rearrangement.tsv @@ -0,0 +1,2 @@ +sequence_id sequence rev_comp productive v_call d_call j_call sequence_alignment germline_alignment junction junction junction_aa v_cigar d_cigar j_cigar +1 2 F F 5 6 7 8 9 10 11 12 13 14 15 not_in_header not_in diff --git a/lang/js/tests/data/good_combined_airr.json b/lang/js/tests/data/good_combined_airr.json new file mode 100644 index 000000000..00480023b --- /dev/null +++ b/lang/js/tests/data/good_combined_airr.json @@ -0,0 +1,838 @@ +{ + "Repertoire": [ + { + "repertoire_id": "1841923116114776551-242ac11c-0001-012", + "study": { + "study_id": "PRJNA300878", + "study_title": "Homo sapiens B and T cell repertoire - MZ twins", + "study_type": { + "id": null, + "label": null + }, + "study_description": "The adaptive immune system's capability to protect the body requires a highly diverse lymphocyte antigen receptor repertoire. However, the influence of individual genetic and epigenetic differences on these repertoires is not typically measured. By leveraging the unique characteristics of B, CD4+ T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified the impact of heritable factors on both the V(D)J recombination process and thymic selection in the case of T cell receptors, and show that the repertoires of both naive and antigen experienced cells are subject to biases resulting from differences in recombination. We show that biases in V(D)J usage, as well as biased N/P additions, contribute to significant variation in the CDR3 region. Moreover, we show that the relative usage of V and J gene segments is chromosomally biased, with approximately 1.5 times as many rearrangements originating from a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level.", + "study_contact": "Mark M. Davis, mmdavis@stanford.edu, ORCID:0000-0001-6868-657X", + "inclusion_exclusion_criteria": null, + "lab_name": "Mark M. Davis", + "lab_address": "Stanford University", + "submitted_by": "Florian Rubelt", + "pub_ids": "PMID:27005435", + "collected_by": null, + "grants": null, + "keywords_study": [ + "contains_ig", + "contains_tr" + ] + }, + "subject": { + "subject_id": "TW01A", + "synthetic": false, + "species": { + "id": "NCBITaxon_9606", + "label": "Homo sapiens" + }, + "sex": "female", + "age_min": 27, + "age_max": 27, + "age_unit": { + "id": "UO_0000036", + "label": "year" + }, + "age_event": null, + "ancestry_population": null, + "ethnicity": null, + "race": null, + "strain_name": null, + "linked_subjects": "TW01B", + "link_type": "twin", + "diagnosis": [ + { + "study_group_description": null, + "disease_diagnosis": { + "id": null, + "label": null + }, + "disease_length": null, + "disease_stage": null, + "prior_therapies": null, + "immunogen": null, + "intervention": null, + "medical_history": null + } + ] + }, + "sample": [ + { + "sample_id": "TW01A_B_naive", + "sample_processing_id": null, + "sample_type": "peripheral venous puncture", + "tissue": { + "id": "UBERON_0000178", + "label": "blood" + }, + "tissue_processing": "Ficoll gradient", + "cell_subset": { + "id": "CL_0000788", + "label": "naive B cell" + }, + "cell_phenotype": "expression of CD20 and the absence of CD27", + "cell_species": { + "id": "NCBITaxon_9606", + "label": "Homo sapiens" + }, + "single_cell": false, + "cell_isolation": "FACS", + "template_class": "RNA", + "pcr_target": [ + { + "pcr_target_locus": "IGH", + "forward_pcr_primer_target_location": null, + "reverse_pcr_primer_target_location": null + } + ], + "sequencing_platform": "Illumina MiSeq", + "sequencing_files": { + "sequencing_data_id": "SRR2905656", + "file_type": "fastq", + "filename": "SRR2905656_R1.fastq.gz", + "read_direction": "forward", + "read_length": 300, + "paired_filename": "SRR2905656_R2.fastq.gz", + "paired_read_direction": "reverse", + "paired_read_length": 300 + }, + "anatomic_site": null, + "disease_state_sample": null, + "collection_time_point_relative": null, + "collection_time_point_relative_unit": { + "id": null, + "label": null + }, + "collection_time_point_reference": null, + "biomaterial_provider": null, + "cell_number": null, + "cells_per_reaction": null, + "cell_storage": false, + "cell_quality": null, + "cell_processing_protocol": null, + "template_quality": null, + "template_amount": null, + "template_amount_unit": { + "id": null, + "label": null + }, + "library_generation_method": "RT(oligo-dT)+PCR", + "library_generation_protocol": null, + "library_generation_kit_version": null, + "complete_sequences": "partial", + "physical_linkage": "none", + "sequencing_run_id": null, + "total_reads_passing_qc_filter": null, + "sequencing_facility": null, + "sequencing_run_date": null, + "sequencing_kit": null + } + ], + "data_processing": [ + { + "data_processing_id": "3059369183532618216-242ac11b-0001-007", + "primary_annotation": true, + "software_versions": null, + "paired_reads_assembly": null, + "quality_thresholds": null, + "primer_match_cutoffs": null, + "collapsing_method": null, + "data_processing_protocols": null, + "data_processing_files": null, + "germline_database": null, + "analysis_provenance_id": "6623294219256599016-242ac11c-0001-012" + } + ] + }, + { + "repertoire_id": "1602908186092376551-242ac11c-0001-012", + "study": { + "study_id": "PRJNA300878", + "study_title": "Homo sapiens B and T cell repertoire - MZ twins", + "study_type": { + "id": null, + "label": null + }, + "study_description": "The adaptive immune system's capability to protect the body requires a highly diverse lymphocyte antigen receptor repertoire. However, the influence of individual genetic and epigenetic differences on these repertoires is not typically measured. By leveraging the unique characteristics of B, CD4+ T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified the impact of heritable factors on both the V(D)J recombination process and thymic selection in the case of T cell receptors, and show that the repertoires of both naive and antigen experienced cells are subject to biases resulting from differences in recombination. We show that biases in V(D)J usage, as well as biased N/P additions, contribute to significant variation in the CDR3 region. Moreover, we show that the relative usage of V and J gene segments is chromosomally biased, with approximately 1.5 times as many rearrangements originating from a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level.", + "study_contact": "Mark M. Davis, mmdavis@stanford.edu, ORCID:0000-0001-6868-657X", + "inclusion_exclusion_criteria": null, + "lab_name": "Mark M. Davis", + "lab_address": "Stanford University", + "submitted_by": "Florian Rubelt", + "pub_ids": "PMID:27005435", + "collected_by": null, + "grants": null, + "keywords_study": [ + "contains_ig", + "contains_tr" + ] + }, + "subject": { + "subject_id": "TW01A", + "synthetic": false, + "species": { + "id": "NCBITaxon_9606", + "label": "Homo sapiens" + }, + "sex": "female", + "age_min": 27, + "age_max": 27, + "age_unit": { + "id": "UO_0000036", + "label": "year" + }, + "age_event": null, + "ancestry_population": null, + "ethnicity": null, + "race": null, + "strain_name": null, + "linked_subjects": "TW01B", + "link_type": "twin", + "diagnosis": [ + { + "study_group_description": null, + "disease_diagnosis": { + "id": null, + "label": null + }, + "disease_length": null, + "disease_stage": null, + "prior_therapies": null, + "immunogen": null, + "intervention": null, + "medical_history": null + } + ] + }, + "sample": [ + { + "sample_id": "TW01A_B_memory", + "sample_processing_id": null, + "sample_type": "peripheral venous puncture", + "tissue": { + "id": "UBERON_0000178", + "label": "blood" + }, + "tissue_processing": "Ficoll gradient", + "cell_subset": { + "id": "CL_0000787", + "label": "memory B cell" + }, + "cell_phenotype": "expression of CD20 and CD27", + "cell_species": { + "id": "NCBITaxon_9606", + "label": "Homo sapiens" + }, + "single_cell": false, + "cell_isolation": "FACS", + "template_class": "RNA", + "pcr_target": [ + { + "pcr_target_locus": "IGH", + "forward_pcr_primer_target_location": null, + "reverse_pcr_primer_target_location": null + } + ], + "sequencing_platform": "Illumina MiSeq", + "sequencing_files": { + "sequencing_data_id": "SRR2905655", + "file_type": "fastq", + "filename": "SRR2905655_R1.fastq.gz", + "read_direction": "forward", + "read_length": 300, + "paired_filename": "SRR2905655_R2.fastq.gz", + "paired_read_direction": "reverse", + "paired_read_length": 300 + }, + "anatomic_site": null, + "disease_state_sample": null, + "collection_time_point_relative": null, + "collection_time_point_relative_unit": { + "id": null, + "label": null + }, + "collection_time_point_reference": null, + "biomaterial_provider": null, + "cell_number": null, + "cells_per_reaction": null, + "cell_storage": false, + "cell_quality": null, + "cell_processing_protocol": null, + "template_quality": null, + "template_amount": null, + "template_amount_unit": { + "id": null, + "label": null + }, + "library_generation_method": "RT(oligo-dT)+PCR", + "library_generation_protocol": null, + "library_generation_kit_version": null, + "complete_sequences": "partial", + "physical_linkage": "none", + "sequencing_run_id": null, + "total_reads_passing_qc_filter": null, + "sequencing_facility": null, + "sequencing_run_date": null, + "sequencing_kit": null + } + ], + "data_processing": [ + { + "data_processing_id": "3059369183532618216-242ac11b-0001-007", + "primary_annotation": true, + "software_versions": null, + "paired_reads_assembly": null, + "quality_thresholds": null, + "primer_match_cutoffs": null, + "collapsing_method": null, + "data_processing_protocols": null, + "data_processing_files": null, + "germline_database": null, + "analysis_provenance_id": "6623294219256599016-242ac11c-0001-012" + } + ] + }, + { + "repertoire_id": "2366080924918616551-242ac11c-0001-012", + "study": { + "study_id": "PRJNA300878", + "study_title": "Homo sapiens B and T cell repertoire - MZ twins", + "study_type": { + "id": null, + "label": null + }, + "study_description": "The adaptive immune system's capability to protect the body requires a highly diverse lymphocyte antigen receptor repertoire. However, the influence of individual genetic and epigenetic differences on these repertoires is not typically measured. By leveraging the unique characteristics of B, CD4+ T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified the impact of heritable factors on both the V(D)J recombination process and thymic selection in the case of T cell receptors, and show that the repertoires of both naive and antigen experienced cells are subject to biases resulting from differences in recombination. We show that biases in V(D)J usage, as well as biased N/P additions, contribute to significant variation in the CDR3 region. Moreover, we show that the relative usage of V and J gene segments is chromosomally biased, with approximately 1.5 times as many rearrangements originating from a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level.", + "study_contact": "Mark M. Davis, mmdavis@stanford.edu, ORCID:0000-0001-6868-657X", + "inclusion_exclusion_criteria": null, + "lab_name": "Mark M. Davis", + "lab_address": "Stanford University", + "submitted_by": "Florian Rubelt", + "pub_ids": "PMID:27005435", + "collected_by": null, + "grants": null, + "keywords_study": [ + "contains_ig", + "contains_tr" + ] + }, + "subject": { + "subject_id": "TW01A", + "synthetic": false, + "species": { + "id": "NCBITaxon_9606", + "label": "Homo sapiens" + }, + "sex": "female", + "age_min": 27, + "age_max": 27, + "age_unit": { + "id": "UO_0000036", + "label": "year" + }, + "age_event": null, + "ancestry_population": null, + "ethnicity": null, + "race": null, + "strain_name": null, + "linked_subjects": "TW01B", + "link_type": "twin", + "diagnosis": [ + { + "study_group_description": null, + "disease_diagnosis": { + "id": null, + "label": null + }, + "disease_length": null, + "disease_stage": null, + "prior_therapies": null, + "immunogen": null, + "intervention": null, + "medical_history": null + } + ] + }, + "sample": [ + { + "sample_id": "TW01A_T_naive_CD4", + "sample_processing_id": null, + "sample_type": "peripheral venous puncture", + "tissue": { + "id": "UBERON_0000178", + "label": "blood" + }, + "tissue_processing": "Ficoll gradient", + "cell_subset": { + "id": "CL_0000895", + "label": "naive thymus-derived CD4-positive, alpha-beta T cell" + }, + "cell_phenotype": "expression of CD8 and absence of CD4 and CD45RO", + "cell_species": { + "id": "NCBITaxon_9606", + "label": "Homo sapiens" + }, + "single_cell": false, + "cell_isolation": "FACS", + "template_class": "RNA", + "pcr_target": [ + { + "pcr_target_locus": "TRB", + "forward_pcr_primer_target_location": null, + "reverse_pcr_primer_target_location": null + } + ], + "sequencing_platform": "Illumina MiSeq", + "sequencing_files": { + "sequencing_data_id": "SRR2905659", + "file_type": "fastq", + "filename": "SRR2905659_R1.fastq.gz", + "read_direction": "forward", + "read_length": 300, + "paired_filename": "SRR2905659_R2.fastq.gz", + "paired_read_direction": "reverse", + "paired_read_length": 300 + }, + "anatomic_site": null, + "disease_state_sample": null, + "collection_time_point_relative": null, + "collection_time_point_relative_unit": { + "id": null, + "label": null + }, + "collection_time_point_reference": null, + "biomaterial_provider": null, + "cell_number": null, + "cells_per_reaction": null, + "cell_storage": false, + "cell_quality": null, + "cell_processing_protocol": null, + "template_quality": null, + "template_amount": null, + "template_amount_unit": { + "id": null, + "label": null + }, + "library_generation_method": "RT(oligo-dT)+PCR", + "library_generation_protocol": null, + "library_generation_kit_version": null, + "complete_sequences": "partial", + "physical_linkage": "none", + "sequencing_run_id": null, + "total_reads_passing_qc_filter": null, + "sequencing_facility": null, + "sequencing_run_date": null, + "sequencing_kit": null + } + ], + "data_processing": [ + { + "data_processing_id": "651223970338378216-242ac11b-0001-007", + "primary_annotation": true, + "software_versions": null, + "paired_reads_assembly": null, + "quality_thresholds": null, + "primer_match_cutoffs": null, + "collapsing_method": null, + "data_processing_protocols": null, + "data_processing_files": null, + "germline_database": null, + "analysis_provenance_id": "4625424004665971176-242ac11c-0001-012" + } + ] + } + ], + + "GermlineSet": [{ + "germline_set_id": "OGRDB:G00007", + "author": "William Lees", + "lab_name": "", + "lab_address": "Birkbeck College, University of London, Malet Street, London", + "acknowledgements": [], + "release_version": 1, + "release_description": "", + "release_date": "2021-11-24", + "germline_set_name": "CAST IGH", + "germline_set_ref": "OGRDB:G00007.1", + "pub_ids": "", + "species": { "id": "NCBITAXON:10090", "label": "Mus musculus" }, + "species_subgroup": "CAST_EiJ", + "species_subgroup_type": "strain", + "locus": "IGH", + "allele_descriptions": [ + { + "allele_description_id": "OGRDB:A00301", + "allele_description_ref": "OGRDB:Mouse_IGH:IGHV-2DBF", + "maintainer": "William Lees", + "acknowledgements": [], + "lab_address": "Birkbeck College, University of London, Malet Street, London", + "release_version": 1, + "release_date": "24-Nov-2021", + "release_description": "First release", + "label": "IGHV-2DBF", + "sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGATGGTAGTGGCACCTACTATCTGGACTCCTTGAAGAGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", + "coding_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGA...GGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTC............AGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", + "aliases": [ + "watson_et_al:CAST_EiJ_IGHV5-3" + ], + "locus": "IGH", + "chromosome": null, + "sequence_type": "V", + "functional": true, + "inference_type": "Rearranged only", + "species": { "id": "NCBITAXON:10090", "label": "Mus musculus" }, + "species_subgroup": "CAST_EiJ", + "species_subgroup_type": "strain", + "status": "active", + "gene_designation": null, + "subgroup_designation": null, + "allele_designation": null, + "gene_start": null, + "gene_end": null, + "utr_5_prime_start": null, + "utr_5_prime_end": null, + "leader_1_start": null, + "leader_1_end": null, + "leader_2_start": null, + "leader_2_end": null, + "v_rs_start": null, + "v_rs_end": null, + "v_gene_delineations": [ + { + "sequence_delineation_id": "1", + "delineation_scheme": "IMGT", + "fwr1_start": 1, + "fwr1_end": 78, + "cdr1_start": 79, + "cdr1_end": 114, + "fwr2_start": 115, + "fwr2_end": 165, + "cdr2_start": 166, + "cdr2_end": 195, + "fwr3_start": 196, + "fwr3_end": 312, + "cdr3_start": 313, + "alignment": [ + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104" + ] + } + ], + "unrearranged_support": [], + "rearranged_support": [], + "paralogs": [], + "curation": "Imported to OGRDB with the following notes: watson_et_al: CAST_EiJ_IGHV5-3", + "curational_tags": null + }, + { + "allele_description_id": "OGRDB:A00314", + "allele_description_ref": "OGRDB:Mouse_IGH:IGHV-2ETO", + "maintainer": "William Lees", + "acknowledgements": [], + "lab_address": "Birkbeck College, University of London, Malet Street, London", + "release_version": 1, + "release_date": "24-Nov-2021", + "release_description": "First release", + "label": "IGHV-2ETO", + "sequence": "CAAGTTACTCTAAAAGAGTCTGGCCCTGGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGATGATGATAAGTACTATAACCCATCCCTGAAGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC", + "coding_sequence": "CAAGTTACTCTAAAAGAGTCTGGCCCT...GGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGC......ACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGAT.........GATGATAAGTACTATAACCCATCCCTGAAG...AGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC", + "aliases": [ + "watson_et_al:CAST_EiJ_IGHV8-2" + ], + "locus": "IGH", + "chromosome": null, + "sequence_type": "V", + "functional": true, + "inference_type": "Rearranged only", + "species": { "id": "NCBITAXON:10090", "label": "Mus musculus" }, + "species_subgroup": "CAST_EiJ", + "species_subgroup_type": "strain", + "status": "active", + "gene_designation": null, + "subgroup_designation": null, + "allele_designation": null, + "gene_start": null, + "gene_end": null, + "utr_5_prime_start": null, + "utr_5_prime_end": null, + "leader_1_start": null, + "leader_1_end": null, + "leader_2_start": null, + "leader_2_end": null, + "v_rs_start": null, + "v_rs_end": null, + "v_gene_delineations": [ + { + "sequence_delineation_id": "1", + "delineation_scheme": "IMGT", + "fwr1_start": 1, + "fwr1_end": 78, + "cdr1_start": 79, + "cdr1_end": 114, + "fwr2_start": 115, + "fwr2_end": 165, + "cdr2_start": 166, + "cdr2_end": 195, + "fwr3_start": 196, + "fwr3_end": 312, + "cdr3_start": 313, + "alignment": [ + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104" + ] + } + ], + "unrearranged_support": [], + "rearranged_support": [], + "paralogs": [], + "curation": "Imported to OGRDB with the following notes: watson_et_al: CAST_EiJ_IGHV8-2", + "curational_tags": null + } + ], + "curation": null + }], + + "GenotypeSet": [{ + "receptor_genotype_set_id": "1", + "genotype_class_list": [ + { + "receptor_genotype_id": "1", + "locus": "IGH", + "documented_alleles": [ + { + "label": "IGHV1-69*01", + "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", + "phasing": 1 + }, + { + "label": "IGHV1-69*02", + "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", + "phasing": 2 + } + ], + "undocumented_alleles": [ + { + "allele_name": "IGHD3-1*01_S1234", + "sequence": "agtagtagtagt", + "phasing": 1 + } + ], + "deleted_genes": [ + { + "label": "IGHV3-30-3", + "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", + "phasing": 1 + } + ], + "inference_process": "repertoire_sequencing" + } + ] + }] +} \ No newline at end of file diff --git a/lang/js/tests/data/good_combined_airr.yaml b/lang/js/tests/data/good_combined_airr.yaml new file mode 100644 index 000000000..5479c0540 --- /dev/null +++ b/lang/js/tests/data/good_combined_airr.yaml @@ -0,0 +1,771 @@ +Repertoire: + - repertoire_id: 1841923116114776551-242ac11c-0001-012 + study: + study_id: PRJNA300878 + study_title: Homo sapiens B and T cell repertoire - MZ twins + study_type: + id: + label: + study_description: The adaptive immune system's capability to protect the body + requires a highly diverse lymphocyte antigen receptor repertoire. However, the + influence of individual genetic and epigenetic differences on these repertoires + is not typically measured. By leveraging the unique characteristics of B, CD4+ + T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified + the impact of heritable factors on both the V(D)J recombination process and + thymic selection in the case of T cell receptors, and show that the repertoires + of both naive and antigen experienced cells are subject to biases resulting + from differences in recombination. We show that biases in V(D)J usage, as well + as biased N/P additions, contribute to significant variation in the CDR3 region. + Moreover, we show that the relative usage of V and J gene segments is chromosomally + biased, with approximately 1.5 times as many rearrangements originating from + a single chromosome. These data refine our understanding of the heritable mechanisms + affecting the repertoire, and show that biases are evident on a chromosome-wide + level. + study_contact: Mark M. Davis, mmdavis@stanford.edu, ORCID:0000-0001-6868-657X + inclusion_exclusion_criteria: + lab_name: Mark M. Davis + lab_address: Stanford University + submitted_by: Florian Rubelt + pub_ids: PMID:27005435 + collected_by: + grants: + keywords_study: + - contains_ig + - contains_tr + subject: + subject_id: TW01A + synthetic: false + species: + id: NCBITaxon_9606 + label: Homo sapiens + sex: female + age_min: 27 + age_max: 27 + age_unit: + id: UO_0000036 + label: year + age_event: + ancestry_population: + ethnicity: + race: + strain_name: + linked_subjects: TW01B + link_type: twin + diagnosis: + - study_group_description: + disease_diagnosis: + id: + label: + disease_length: + disease_stage: + prior_therapies: + immunogen: + intervention: + medical_history: + sample: + - sample_id: TW01A_B_naive + sample_processing_id: + sample_type: peripheral venous puncture + tissue: + id: UBERON_0000178 + label: blood + tissue_processing: Ficoll gradient + cell_subset: + id: CL_0000788 + label: naive B cell + cell_phenotype: expression of CD20 and the absence of CD27 + cell_species: + id: NCBITaxon_9606 + label: Homo sapiens + single_cell: false + cell_isolation: FACS + template_class: RNA + pcr_target: + - pcr_target_locus: IGH + forward_pcr_primer_target_location: + reverse_pcr_primer_target_location: + sequencing_platform: Illumina MiSeq + sequencing_files: + sequencing_data_id: SRR2905656 + file_type: fastq + filename: SRR2905656_R1.fastq.gz + read_direction: forward + read_length: 300 + paired_filename: SRR2905656_R2.fastq.gz + paired_read_direction: reverse + paired_read_length: 300 + anatomic_site: + disease_state_sample: + collection_time_point_relative: + collection_time_point_relative_unit: + id: + label: + collection_time_point_reference: + biomaterial_provider: + cell_number: + cells_per_reaction: + cell_storage: false + cell_quality: + cell_processing_protocol: + template_quality: + template_amount: + template_amount_unit: + id: + label: + library_generation_method: RT(oligo-dT)+PCR + library_generation_protocol: + library_generation_kit_version: + complete_sequences: partial + physical_linkage: none + sequencing_run_id: + total_reads_passing_qc_filter: + sequencing_facility: + sequencing_run_date: + sequencing_kit: + data_processing: + - data_processing_id: 3059369183532618216-242ac11b-0001-007 + primary_annotation: true + software_versions: + paired_reads_assembly: + quality_thresholds: + primer_match_cutoffs: + collapsing_method: + data_processing_protocols: + data_processing_files: + germline_database: + analysis_provenance_id: 6623294219256599016-242ac11c-0001-012 + - repertoire_id: 1602908186092376551-242ac11c-0001-012 + study: + study_id: PRJNA300878 + study_title: Homo sapiens B and T cell repertoire - MZ twins + study_type: + id: + label: + study_description: The adaptive immune system's capability to protect the body + requires a highly diverse lymphocyte antigen receptor repertoire. However, the + influence of individual genetic and epigenetic differences on these repertoires + is not typically measured. By leveraging the unique characteristics of B, CD4+ + T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified + the impact of heritable factors on both the V(D)J recombination process and + thymic selection in the case of T cell receptors, and show that the repertoires + of both naive and antigen experienced cells are subject to biases resulting + from differences in recombination. We show that biases in V(D)J usage, as well + as biased N/P additions, contribute to significant variation in the CDR3 region. + Moreover, we show that the relative usage of V and J gene segments is chromosomally + biased, with approximately 1.5 times as many rearrangements originating from + a single chromosome. These data refine our understanding of the heritable mechanisms + affecting the repertoire, and show that biases are evident on a chromosome-wide + level. + study_contact: Mark M. Davis, mmdavis@stanford.edu, ORCID:0000-0001-6868-657X + inclusion_exclusion_criteria: + lab_name: Mark M. Davis + lab_address: Stanford University + submitted_by: Florian Rubelt + pub_ids: PMID:27005435 + collected_by: + grants: + keywords_study: + - contains_ig + - contains_tr + subject: + subject_id: TW01A + synthetic: false + species: + id: NCBITaxon_9606 + label: Homo sapiens + sex: female + age_min: 27 + age_max: 27 + age_unit: + id: UO_0000036 + label: year + age_event: + ancestry_population: + ethnicity: + race: + strain_name: + linked_subjects: TW01B + link_type: twin + diagnosis: + - study_group_description: + disease_diagnosis: + id: + label: + disease_length: + disease_stage: + prior_therapies: + immunogen: + intervention: + medical_history: + sample: + - sample_id: TW01A_B_memory + sample_processing_id: + sample_type: peripheral venous puncture + tissue: + id: UBERON_0000178 + label: blood + tissue_processing: Ficoll gradient + cell_subset: + id: CL_0000787 + label: memory B cell + cell_phenotype: expression of CD20 and CD27 + cell_species: + id: NCBITaxon_9606 + label: Homo sapiens + single_cell: false + cell_isolation: FACS + template_class: RNA + pcr_target: + - pcr_target_locus: IGH + forward_pcr_primer_target_location: + reverse_pcr_primer_target_location: + sequencing_platform: Illumina MiSeq + sequencing_files: + sequencing_data_id: SRR2905655 + file_type: fastq + filename: SRR2905655_R1.fastq.gz + read_direction: forward + read_length: 300 + paired_filename: SRR2905655_R2.fastq.gz + paired_read_direction: reverse + paired_read_length: 300 + anatomic_site: + disease_state_sample: + collection_time_point_relative: + collection_time_point_relative_unit: + id: + label: + collection_time_point_reference: + biomaterial_provider: + cell_number: + cells_per_reaction: + cell_storage: false + cell_quality: + cell_processing_protocol: + template_quality: + template_amount: + template_amount_unit: + id: + label: + library_generation_method: RT(oligo-dT)+PCR + library_generation_protocol: + library_generation_kit_version: + complete_sequences: partial + physical_linkage: none + sequencing_run_id: + total_reads_passing_qc_filter: + sequencing_facility: + sequencing_run_date: + sequencing_kit: + data_processing: + - data_processing_id: 3059369183532618216-242ac11b-0001-007 + primary_annotation: true + software_versions: + paired_reads_assembly: + quality_thresholds: + primer_match_cutoffs: + collapsing_method: + data_processing_protocols: + data_processing_files: + germline_database: + analysis_provenance_id: 6623294219256599016-242ac11c-0001-012 + - repertoire_id: 2366080924918616551-242ac11c-0001-012 + study: + study_id: PRJNA300878 + study_title: Homo sapiens B and T cell repertoire - MZ twins + study_type: + id: + label: + study_description: The adaptive immune system's capability to protect the body + requires a highly diverse lymphocyte antigen receptor repertoire. However, the + influence of individual genetic and epigenetic differences on these repertoires + is not typically measured. By leveraging the unique characteristics of B, CD4+ + T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified + the impact of heritable factors on both the V(D)J recombination process and + thymic selection in the case of T cell receptors, and show that the repertoires + of both naive and antigen experienced cells are subject to biases resulting + from differences in recombination. We show that biases in V(D)J usage, as well + as biased N/P additions, contribute to significant variation in the CDR3 region. + Moreover, we show that the relative usage of V and J gene segments is chromosomally + biased, with approximately 1.5 times as many rearrangements originating from + a single chromosome. These data refine our understanding of the heritable mechanisms + affecting the repertoire, and show that biases are evident on a chromosome-wide + level. + study_contact: Mark M. Davis, mmdavis@stanford.edu, ORCID:0000-0001-6868-657X + inclusion_exclusion_criteria: + lab_name: Mark M. Davis + lab_address: Stanford University + submitted_by: Florian Rubelt + pub_ids: PMID:27005435 + collected_by: + grants: + keywords_study: + - contains_ig + - contains_tr + subject: + subject_id: TW01A + synthetic: false + species: + id: NCBITaxon_9606 + label: Homo sapiens + sex: female + age_min: 27 + age_max: 27 + age_unit: + id: UO_0000036 + label: year + age_event: + ancestry_population: + ethnicity: + race: + strain_name: + linked_subjects: TW01B + link_type: twin + diagnosis: + - study_group_description: + disease_diagnosis: + id: + label: + disease_length: + disease_stage: + prior_therapies: + immunogen: + intervention: + medical_history: + sample: + - sample_id: TW01A_T_naive_CD4 + sample_processing_id: + sample_type: peripheral venous puncture + tissue: + id: UBERON_0000178 + label: blood + tissue_processing: Ficoll gradient + cell_subset: + id: CL_0000895 + label: naive thymus-derived CD4-positive, alpha-beta T cell + cell_phenotype: expression of CD8 and absence of CD4 and CD45RO + cell_species: + id: NCBITaxon_9606 + label: Homo sapiens + single_cell: false + cell_isolation: FACS + template_class: RNA + pcr_target: + - pcr_target_locus: TRB + forward_pcr_primer_target_location: + reverse_pcr_primer_target_location: + sequencing_platform: Illumina MiSeq + sequencing_files: + sequencing_data_id: SRR2905659 + file_type: fastq + filename: SRR2905659_R1.fastq.gz + read_direction: forward + read_length: 300 + paired_filename: SRR2905659_R2.fastq.gz + paired_read_direction: reverse + paired_read_length: 300 + anatomic_site: + disease_state_sample: + collection_time_point_relative: + collection_time_point_relative_unit: + id: + label: + collection_time_point_reference: + biomaterial_provider: + cell_number: + cells_per_reaction: + cell_storage: false + cell_quality: + cell_processing_protocol: + template_quality: + template_amount: + template_amount_unit: + id: + label: + library_generation_method: RT(oligo-dT)+PCR + library_generation_protocol: + library_generation_kit_version: + complete_sequences: partial + physical_linkage: none + sequencing_run_id: + total_reads_passing_qc_filter: + sequencing_facility: + sequencing_run_date: + sequencing_kit: + data_processing: + - data_processing_id: 651223970338378216-242ac11b-0001-007 + primary_annotation: true + software_versions: + paired_reads_assembly: + quality_thresholds: + primer_match_cutoffs: + collapsing_method: + data_processing_protocols: + data_processing_files: + germline_database: + analysis_provenance_id: 4625424004665971176-242ac11c-0001-012 + +GermlineSet: + - germline_set_id: OGRDB:G00007 + author: William Lees + lab_name: '' + lab_address: Birkbeck College, University of London, Malet Street, London + acknowledgements: [] + release_version: 1 + release_description: '' + release_date: '2021-11-24' + germline_set_name: CAST IGH + germline_set_ref: OGRDB:G00007.1 + pub_ids: '' + species: + id: NCBITAXON:10090 + label: Mus musculus + species_subgroup: CAST_EiJ + species_subgroup_type: strain + locus: IGH + allele_descriptions: + - allele_description_id: OGRDB:A00301 + allele_description_ref: OGRDB:Mouse_IGH:IGHV-2DBF + maintainer: William Lees + acknowledgements: [] + lab_address: Birkbeck College, University of London, Malet Street, London + release_version: 1 + release_date: 24-Nov-2021 + release_description: First release + label: IGHV-2DBF + sequence: GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGATGGTAGTGGCACCTACTATCTGGACTCCTTGAAGAGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA + coding_sequence: GAAGTGAAGCTGGTGGAGTCTGAGGGA...GGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTC............AGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA + aliases: + - watson_et_al:CAST_EiJ_IGHV5-3 + locus: IGH + chromosome: + sequence_type: V + functional: true + inference_type: "Rearranged only" + species: + id: NCBITAXON:10090 + label: Mus musculus + species_subgroup: CAST_EiJ + species_subgroup_type: strain + status: active + gene_designation: + subgroup_designation: + allele_designation: + gene_start: + gene_end: + utr_5_prime_start: + utr_5_prime_end: + leader_1_start: + leader_1_end: + leader_2_start: + leader_2_end: + v_rs_start: + v_rs_end: + v_gene_delineations: + - sequence_delineation_id: '1' + delineation_scheme: IMGT + fwr1_start: 1 + fwr1_end: 78 + cdr1_start: 79 + cdr1_end: 114 + fwr2_start: 115 + fwr2_end: 165 + cdr2_start: 166 + cdr2_end: 195 + fwr3_start: 196 + fwr3_end: 312 + cdr3_start: 313 + alignment: + - '1' + - '2' + - '3' + - '4' + - '5' + - '6' + - '7' + - '8' + - '9' + - '10' + - '11' + - '12' + - '13' + - '14' + - '15' + - '16' + - '17' + - '18' + - '19' + - '20' + - '21' + - '22' + - '23' + - '24' + - '25' + - '26' + - '27' + - '28' + - '29' + - '30' + - '31' + - '32' + - '33' + - '34' + - '35' + - '36' + - '37' + - '38' + - '39' + - '40' + - '41' + - '42' + - '43' + - '44' + - '45' + - '46' + - '47' + - '48' + - '49' + - '50' + - '51' + - '52' + - '53' + - '54' + - '55' + - '56' + - '57' + - '58' + - '59' + - '60' + - '61' + - '62' + - '63' + - '64' + - '65' + - '66' + - '67' + - '68' + - '69' + - '70' + - '71' + - '72' + - '73' + - '74' + - '75' + - '76' + - '77' + - '78' + - '79' + - '80' + - '81' + - '82' + - '83' + - '84' + - '85' + - '86' + - '87' + - '88' + - '89' + - '90' + - '91' + - '92' + - '93' + - '94' + - '95' + - '96' + - '97' + - '98' + - '99' + - '100' + - '101' + - '102' + - '103' + - '104' + unrearranged_support: [] + rearranged_support: [] + paralogs: [] + curation: 'Imported to OGRDB with the following notes: watson_et_al: CAST_EiJ_IGHV5-3' + curational_tags: + - allele_description_id: OGRDB:A00314 + allele_description_ref: OGRDB:Mouse_IGH:IGHV-2ETO + maintainer: William Lees + acknowledgements: [] + lab_address: Birkbeck College, University of London, Malet Street, London + release_version: 1 + release_date: 24-Nov-2021 + release_description: First release + label: IGHV-2ETO + sequence: CAAGTTACTCTAAAAGAGTCTGGCCCTGGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGATGATGATAAGTACTATAACCCATCCCTGAAGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC + coding_sequence: CAAGTTACTCTAAAAGAGTCTGGCCCT...GGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGC......ACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGAT.........GATGATAAGTACTATAACCCATCCCTGAAG...AGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC + aliases: + - watson_et_al:CAST_EiJ_IGHV8-2 + locus: IGH + chromosome: + sequence_type: V + functional: true + inference_type: "Rearranged only" + species: + id: NCBITAXON:10090 + label: Mus musculus + species_subgroup: CAST_EiJ + species_subgroup_type: strain + status: active + gene_designation: + subgroup_designation: + allele_designation: + gene_start: + gene_end: + utr_5_prime_start: + utr_5_prime_end: + leader_1_start: + leader_1_end: + leader_2_start: + leader_2_end: + v_rs_start: + v_rs_end: + v_gene_delineations: + - sequence_delineation_id: '1' + delineation_scheme: IMGT + fwr1_start: 1 + fwr1_end: 78 + cdr1_start: 79 + cdr1_end: 114 + fwr2_start: 115 + fwr2_end: 165 + cdr2_start: 166 + cdr2_end: 195 + fwr3_start: 196 + fwr3_end: 312 + cdr3_start: 313 + alignment: + - '1' + - '2' + - '3' + - '4' + - '5' + - '6' + - '7' + - '8' + - '9' + - '10' + - '11' + - '12' + - '13' + - '14' + - '15' + - '16' + - '17' + - '18' + - '19' + - '20' + - '21' + - '22' + - '23' + - '24' + - '25' + - '26' + - '27' + - '28' + - '29' + - '30' + - '31' + - '32' + - '33' + - '34' + - '35' + - '36' + - '37' + - '38' + - '39' + - '40' + - '41' + - '42' + - '43' + - '44' + - '45' + - '46' + - '47' + - '48' + - '49' + - '50' + - '51' + - '52' + - '53' + - '54' + - '55' + - '56' + - '57' + - '58' + - '59' + - '60' + - '61' + - '62' + - '63' + - '64' + - '65' + - '66' + - '67' + - '68' + - '69' + - '70' + - '71' + - '72' + - '73' + - '74' + - '75' + - '76' + - '77' + - '78' + - '79' + - '80' + - '81' + - '82' + - '83' + - '84' + - '85' + - '86' + - '87' + - '88' + - '89' + - '90' + - '91' + - '92' + - '93' + - '94' + - '95' + - '96' + - '97' + - '98' + - '99' + - '100' + - '101' + - '102' + - '103' + - '104' + unrearranged_support: [] + rearranged_support: [] + paralogs: [] + curation: 'Imported to OGRDB with the following notes: watson_et_al: CAST_EiJ_IGHV8-2' + curational_tags: + curation: + +GenotypeSet: + - receptor_genotype_set_id: '1' + genotype_class_list: + - receptor_genotype_id: '1' + locus: IGH + documented_alleles: + - label: IGHV1-69*01 + germline_set_ref: IMGT:Homo sapiens:2022.1.31 + phasing: 1 + - label: IGHV1-69*02 + germline_set_ref: IMGT:Homo sapiens:2022.1.31 + phasing: 2 + undocumented_alleles: + - allele_name: IGHD3-1*01_S1234 + sequence: agtagtagtagt + phasing: 1 + deleted_genes: + - label: IGHV3-30-3 + germline_set_ref: IMGT:Homo sapiens:2022.1.31 + phasing: 1 + inference_process: repertoire_sequencing diff --git a/lang/js/tests/data/good_genotype_set.json b/lang/js/tests/data/good_genotype_set.json new file mode 100644 index 000000000..ba10f56e9 --- /dev/null +++ b/lang/js/tests/data/good_genotype_set.json @@ -0,0 +1,38 @@ +{ + "GenotypeSet": [{ + "receptor_genotype_set_id": "1", + "genotype_class_list": [ + { + "receptor_genotype_id": "1", + "locus": "IGH", + "documented_alleles": [ + { + "label": "IGHV1-69*01", + "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", + "phasing": 1 + }, + { + "label": "IGHV1-69*02", + "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", + "phasing": 2 + } + ], + "undocumented_alleles": [ + { + "allele_name": "IGHD3-1*01_S1234", + "sequence": "agtagtagtagt", + "phasing": 1 + } + ], + "deleted_genes": [ + { + "label": "IGHV3-30-3", + "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", + "phasing": 1 + } + ], + "inference_process": "repertoire_sequencing" + } + ] + }] +} \ No newline at end of file diff --git a/lang/js/tests/data/good_germline_set.json b/lang/js/tests/data/good_germline_set.json new file mode 100644 index 000000000..d36d19ad4 --- /dev/null +++ b/lang/js/tests/data/good_germline_set.json @@ -0,0 +1,354 @@ +{ + "GermlineSet": [{ + "germline_set_id": "OGRDB:G00007", + "author": "William Lees", + "lab_name": "", + "lab_address": "Birkbeck College, University of London, Malet Street, London", + "acknowledgements": [], + "release_version": 1, + "release_description": "", + "release_date": "2021-11-24", + "germline_set_name": "CAST IGH", + "germline_set_ref": "OGRDB:G00007.1", + "pub_ids": "", + "species": { "id": "NCBITAXON:10090", "label": "Mus musculus" }, + "species_subgroup": "CAST_EiJ", + "species_subgroup_type": "strain", + "locus": "IGH", + "allele_descriptions": [ + { + "allele_description_id": "OGRDB:A00301", + "allele_description_ref": "OGRDB:Mouse_IGH:IGHV-2DBF", + "maintainer": "William Lees", + "acknowledgements": [], + "lab_address": "Birkbeck College, University of London, Malet Street, London", + "release_version": 1, + "release_date": "24-Nov-2021", + "release_description": "First release", + "label": "IGHV-2DBF", + "sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGATGGTAGTGGCACCTACTATCTGGACTCCTTGAAGAGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", + "coding_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGA...GGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTC............AGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", + "aliases": [ + "watson_et_al:CAST_EiJ_IGHV5-3" + ], + "locus": "IGH", + "chromosome": null, + "sequence_type": "V", + "functional": true, + "inference_type": "Rearranged only", + "species": { "id": "NCBITAXON:10090", "label": "Mus musculus" }, + "species_subgroup": "CAST_EiJ", + "species_subgroup_type": "strain", + "status": "active", + "gene_designation": null, + "subgroup_designation": null, + "allele_designation": null, + "gene_start": null, + "gene_end": null, + "utr_5_prime_start": null, + "utr_5_prime_end": null, + "leader_1_start": null, + "leader_1_end": null, + "leader_2_start": null, + "leader_2_end": null, + "v_rs_start": null, + "v_rs_end": null, + "v_gene_delineations": [ + { + "sequence_delineation_id": "1", + "delineation_scheme": "IMGT", + "fwr1_start": 1, + "fwr1_end": 78, + "cdr1_start": 79, + "cdr1_end": 114, + "fwr2_start": 115, + "fwr2_end": 165, + "cdr2_start": 166, + "cdr2_end": 195, + "fwr3_start": 196, + "fwr3_end": 312, + "cdr3_start": 313, + "alignment": [ + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104" + ] + } + ], + "unrearranged_support": [], + "rearranged_support": [], + "paralogs": [], + "curation": "Imported to OGRDB with the following notes: watson_et_al: CAST_EiJ_IGHV5-3", + "curational_tags": null + }, + { + "allele_description_id": "OGRDB:A00314", + "allele_description_ref": "OGRDB:Mouse_IGH:IGHV-2ETO", + "maintainer": "William Lees", + "acknowledgements": [], + "lab_address": "Birkbeck College, University of London, Malet Street, London", + "release_version": 1, + "release_date": "24-Nov-2021", + "release_description": "First release", + "label": "IGHV-2ETO", + "sequence": "CAAGTTACTCTAAAAGAGTCTGGCCCTGGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGATGATGATAAGTACTATAACCCATCCCTGAAGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC", + "coding_sequence": "CAAGTTACTCTAAAAGAGTCTGGCCCT...GGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGC......ACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGAT.........GATGATAAGTACTATAACCCATCCCTGAAG...AGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC", + "aliases": [ + "watson_et_al:CAST_EiJ_IGHV8-2" + ], + "locus": "IGH", + "chromosome": null, + "sequence_type": "V", + "functional": true, + "inference_type": "Rearranged only", + "species": { "id": "NCBITAXON:10090", "label": "Mus musculus" }, + "species_subgroup": "CAST_EiJ", + "species_subgroup_type": "strain", + "status": "active", + "gene_designation": null, + "subgroup_designation": null, + "allele_designation": null, + "gene_start": null, + "gene_end": null, + "utr_5_prime_start": null, + "utr_5_prime_end": null, + "leader_1_start": null, + "leader_1_end": null, + "leader_2_start": null, + "leader_2_end": null, + "v_rs_start": null, + "v_rs_end": null, + "v_gene_delineations": [ + { + "sequence_delineation_id": "1", + "delineation_scheme": "IMGT", + "fwr1_start": 1, + "fwr1_end": 78, + "cdr1_start": 79, + "cdr1_end": 114, + "fwr2_start": 115, + "fwr2_end": 165, + "cdr2_start": 166, + "cdr2_end": 195, + "fwr3_start": 196, + "fwr3_end": 312, + "cdr3_start": 313, + "alignment": [ + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104" + ] + } + ], + "unrearranged_support": [], + "rearranged_support": [], + "paralogs": [], + "curation": "Imported to OGRDB with the following notes: watson_et_al: CAST_EiJ_IGHV8-2", + "curational_tags": null + } + ], + "curation": null + }] +} diff --git a/lang/js/tests/data/good_rearrangement.tsv b/lang/js/tests/data/good_rearrangement.tsv new file mode 100644 index 000000000..e8521767d --- /dev/null +++ b/lang/js/tests/data/good_rearrangement.tsv @@ -0,0 +1,10 @@ +rearrangement_id rearrangement_set_id sequence_id sequence rev_comp productive sequence_alignment germline_alignment v_call d_call j_call c_call junction junction_length junction_aa v_score d_score j_score c_score v_cigar d_cigar j_cigar c_cigar v_identity v_evalue d_identity d_evalue j_identity j_evalue v_sequence_start v_sequence_end v_germline_start v_germline_end d_sequence_start d_sequence_end d_germline_start d_germline_end j_sequence_start j_sequence_end j_germline_start j_germline_end np1_length np2_length duplicate_count +IVKNQEJ01BVGQ6 1 IVKNQEJ01BVGQ6 GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T T IGHV4-31*03 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 430 16.4 75.8 22N1S275= 11N280S8= 6N292S32=1X9= 1 1E-122 1 2.7 0.9762 6E-18 0 275 0 317 279 287 10 18 291 333 5 47 4 4 1247 +IVKNQEJ01AQVWS 1 IVKNQEJ01AQVWS GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCAACTACAACCCCTCCCTCAAGAGTCGAGTCACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T T IGHV4-31*03 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 420 16.4 83.8 22N1S156=1X10=1X17=1X89= 11N280S8= 6N292S42= 0.9891 8E-120 1 2.7 1 2E-20 0 275 0 317 279 287 10 18 291 333 5 47 4 4 4 +IVKNQEJ01AOYFZ 1 IVKNQEJ01AOYFZ GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGTAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T F IGHV4-31*03 IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGTAACTTTTGACTACTGG 37 CASGVAGNF*LLX 430 20.4 83.8 22N1S275= 11N280S10= 6N293S42= 1 1E-122 1 0.17 1 2E-20 0 275 0 317 279 289 10 20 292 334 5 47 4 3 92 +IVKNQEJ01EI5S4 1 IVKNQEJ01EI5S4 GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T T IGHV4-31*03 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 430 16.4 83.8 22N1S275= 11N280S8= 6N292S42= 1 1E-122 1 2.7 1 2E-20 0 275 0 317 279 287 10 18 291 333 5 47 4 4 2913 +IVKNQEJ01DGRRI 1 IVKNQEJ01DGRRI GGCCCAGGACTGGTGAAGCCTTCGGAGACCCTGTCCCTCACCTGCGCTGTCTATGGTGGGTCCTTCAGTGGTTACTACTGGAGCTGGATCCGCCAGCCCCCAGGGAAGGGTCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T T IGHV4-34*09 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 389 16.4 83.8 22N1S23=2X85=1X15=1X1=1X3=1X2=1X1=1X5=1X6=1X118= 11N274S8= 6N286S42= 0.9628 2E-110 1 2.6 1 2E-20 0 269 0 317 273 281 10 18 285 327 5 47 4 4 1 +IVKNQEJ01APN5N 1 IVKNQEJ01APN5N GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTAGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T F IGHV4-31*03 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTAG 36 CASGVAGTFDY* 430 16.4 67.9 22N1S275= 11N280S8= 6N292S10=1X21=1X9= 1 1E-122 1 2.7 0.9524 1E-15 0 275 0 317 279 287 10 18 291 333 5 47 4 4 1 +IVKNQEJ01B0TT2 1 IVKNQEJ01B0TT2 GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGTAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T F IGHV4-31*03 IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGTAACTTTTGACTACTGG 37 CASGVAGNF*LLX 430 20.4 75.8 22N1S275= 11N280S10= 6N293S32=1X9= 1 1E-122 1 0.17 0.9762 6E-18 0 275 0 317 279 289 10 20 292 334 5 47 4 3 30 +IVKNQEJ01AIS74 1 IVKNQEJ01AIS74 GGCGCAGGACTGTTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGGCGGGGTGGCTGGTAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T F IGHV4-31*03 IGHD6-19*01 IGHJ4*02 TGTGCGAGGCGGGGTGGCTGGTAACTTTTGACTACTGG 38 CARRGGW*LLTTG 424 20.4 83.8 22N1S3=1X8=1X262= 11N281S10= 6N294S42= 0.9927 9E-121 1 0.17 1 2E-20 0 275 0 317 280 290 10 20 293 335 5 47 5 3 4 +IVKNQEJ01AJ44V 1 IVKNQEJ01AJ44V GGCCCAGGACTGGTGAAGCCTTCGGAGACCCTGTCCCTCACCTGCGCTGTCTATGGTGGGTCCTTCAGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T T IGHV4-59*06 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 386 16.4 75.8 22N1S45=1X5=2X6=1X3=1X5=1X22=1X4=1X1=1X1=1X165= 11N274S8= 6N286S32=1X9= 0.9625 2E-109 1 2.6 0.9762 5E-18 0 267 0 315 273 281 10 18 285 327 5 47 6 4 12 diff --git a/lang/js/tests/data/good_repertoire.yaml b/lang/js/tests/data/good_repertoire.yaml new file mode 100644 index 000000000..e53106c30 --- /dev/null +++ b/lang/js/tests/data/good_repertoire.yaml @@ -0,0 +1,379 @@ +# +# Example metadata +# + +Repertoire: + - repertoire_id: 1841923116114776551-242ac11c-0001-012 + study: + study_id: PRJNA300878 + study_title: "Homo sapiens B and T cell repertoire - MZ twins" + study_type: + id: null + label: null + study_description: "The adaptive immune system's capability to protect the body requires a highly diverse lymphocyte antigen receptor repertoire. However, the influence of individual genetic and epigenetic differences on these repertoires is not typically measured. By leveraging the unique characteristics of B, CD4+ T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified the impact of heritable factors on both the V(D)J recombination process and thymic selection in the case of T cell receptors, and show that the repertoires of both naive and antigen experienced cells are subject to biases resulting from differences in recombination. We show that biases in V(D)J usage, as well as biased N/P additions, contribute to significant variation in the CDR3 region. Moreover, we show that the relative usage of V and J gene segments is chromosomally biased, with approximately 1.5 times as many rearrangements originating from a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level." + study_contact: "Mark M. Davis, mmdavis@stanford.edu, ORCID:0000-0001-6868-657X" + inclusion_exclusion_criteria: null + lab_name: "Mark M. Davis" + lab_address: "Stanford University" + submitted_by: "Florian Rubelt" + pub_ids: "PMID:27005435" + collected_by: null + grants: null + keywords_study: + - "contains_ig" + - "contains_tr" + subject: + subject_id: TW01A + synthetic: false + species: + id: "NCBITaxon_9606" + label: "Homo sapiens" + sex: female + age_min: 27 + age_max: 27 + age_unit: + id: UO_0000036 + label: year + age_event: null + ancestry_population: null + ethnicity: null + race: null + strain_name: null + linked_subjects: TW01B + link_type: twin + diagnosis: + - study_group_description: null + disease_diagnosis: + id: null + label: null + disease_length: null + disease_stage: null + prior_therapies: null + immunogen: null + intervention: null + medical_history: null + + sample: + - sample_id: TW01A_B_naive + sample_processing_id: null + sample_type: "peripheral venous puncture" + tissue: + id: "UBERON_0000178" + label: "blood" + tissue_processing: "Ficoll gradient" + cell_subset: + id: "CL_0000788" + label: "naive B cell" + cell_phenotype: "expression of CD20 and the absence of CD27" + cell_species: + id: "NCBITaxon_9606" + label: "Homo sapiens" + single_cell: false + cell_isolation: FACS + template_class: RNA + pcr_target: + - pcr_target_locus: IGH + forward_pcr_primer_target_location: null + reverse_pcr_primer_target_location: null + sequencing_platform: "Illumina MiSeq" + sequencing_files: + sequencing_data_id: SRA:SRR2905656 + file_type: fastq + filename: SRR2905656_R1.fastq.gz + read_direction: forward + read_length: 300 + paired_filename: SRR2905656_R2.fastq.gz + paired_read_direction: reverse + paired_read_length: 300 + index_filename: SRR2905656_R3.fastq.gz + index_length: 8 + anatomic_site: null + disease_state_sample: null + collection_time_point_relative: null + collection_time_point_relative_unit: + id: null + label: null + collection_time_point_reference: null + biomaterial_provider: null + cell_number: null + cells_per_reaction: null + cell_storage: false + cell_quality: null + cell_processing_protocol: null + template_quality: null + template_amount: null + template_amount_unit: + id: null + label: null + library_generation_method: "RT(oligo-dT)+PCR" + library_generation_protocol: null + library_generation_kit_version: null + complete_sequences: "partial" + physical_linkage: "none" + sequencing_run_id: null + total_reads_passing_qc_filter: null + sequencing_facility: null + sequencing_run_date: null + sequencing_kit: null + data_processing: + - data_processing_id: 3059369183532618216-242ac11b-0001-007 + primary_annotation: true + software_versions: null + paired_reads_assembly: null + quality_thresholds: null + primer_match_cutoffs: null + collapsing_method: null + data_processing_protocols: null + data_processing_files: null + germline_database: null + analysis_provenance_id: 6623294219256599016-242ac11c-0001-012 + + - repertoire_id: 1602908186092376551-242ac11c-0001-012 + study: + study_id: PRJNA300878 + study_title: "Homo sapiens B and T cell repertoire - MZ twins" + study_type: + id: null + label: null + study_description: "The adaptive immune system's capability to protect the body requires a highly diverse lymphocyte antigen receptor repertoire. However, the influence of individual genetic and epigenetic differences on these repertoires is not typically measured. By leveraging the unique characteristics of B, CD4+ T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified the impact of heritable factors on both the V(D)J recombination process and thymic selection in the case of T cell receptors, and show that the repertoires of both naive and antigen experienced cells are subject to biases resulting from differences in recombination. We show that biases in V(D)J usage, as well as biased N/P additions, contribute to significant variation in the CDR3 region. Moreover, we show that the relative usage of V and J gene segments is chromosomally biased, with approximately 1.5 times as many rearrangements originating from a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level." + study_contact: "Mark M. Davis, mmdavis@stanford.edu, ORCID:0000-0001-6868-657X" + inclusion_exclusion_criteria: null + lab_name: "Mark M. Davis" + lab_address: "Stanford University" + submitted_by: "Florian Rubelt" + pub_ids: "PMID:27005435" + collected_by: null + grants: null + keywords_study: + - "contains_ig" + - "contains_tr" + subject: + subject_id: TW01A + synthetic: false + species: + id: "NCBITaxon_9606" + label: "Homo sapiens" + sex: female + age_min: 27 + age_max: 27 + age_unit: + id: UO_0000036 + label: year + age_event: null + ancestry_population: null + ethnicity: null + race: null + strain_name: null + linked_subjects: TW01B + link_type: twin + diagnosis: + - study_group_description: null + disease_diagnosis: + id: null + label: null + disease_length: null + disease_stage: null + prior_therapies: null + immunogen: null + intervention: null + medical_history: null + + sample: + - sample_id: TW01A_B_memory + sample_processing_id: null + sample_type: "peripheral venous puncture" + tissue: + id: "UBERON_0000178" + label: "blood" + tissue_processing: "Ficoll gradient" + cell_subset: + id: "CL_0000787" + label: "memory B cell" + cell_phenotype: "expression of CD20 and CD27" + cell_species: + id: "NCBITaxon_9606" + label: "Homo sapiens" + single_cell: false + cell_isolation: FACS + template_class: RNA + pcr_target: + - pcr_target_locus: IGH + forward_pcr_primer_target_location: null + reverse_pcr_primer_target_location: null + sequencing_platform: "Illumina MiSeq" + sequencing_files: + sequencing_data_id: SRA:SRR2905655 + file_type: fastq + filename: SRR2905655_R1.fastq.gz + read_direction: forward + read_length: 300 + paired_filename: SRR2905655_R2.fastq.gz + paired_read_direction: reverse + paired_read_length: 300 + index_filename: SRR2905655_R3.fastq.gz + index_length: 8 + anatomic_site: null + disease_state_sample: null + collection_time_point_relative: null + collection_time_point_relative_unit: + id: null + label: null + collection_time_point_reference: null + biomaterial_provider: null + cell_number: null + cells_per_reaction: null + cell_storage: false + cell_quality: null + cell_processing_protocol: null + template_quality: null + template_amount: null + template_amount_unit: + id: null + label: null + library_generation_method: "RT(oligo-dT)+PCR" + library_generation_protocol: null + library_generation_kit_version: null + complete_sequences: "partial" + physical_linkage: "none" + sequencing_run_id: null + total_reads_passing_qc_filter: null + sequencing_facility: null + sequencing_run_date: null + sequencing_kit: null + data_processing: + - data_processing_id: 3059369183532618216-242ac11b-0001-007 + primary_annotation: true + software_versions: null + paired_reads_assembly: null + quality_thresholds: null + primer_match_cutoffs: null + collapsing_method: null + data_processing_protocols: null + data_processing_files: null + germline_database: null + analysis_provenance_id: 6623294219256599016-242ac11c-0001-012 + + - repertoire_id: 2366080924918616551-242ac11c-0001-012 + study: + study_id: PRJNA300878 + study_title: "Homo sapiens B and T cell repertoire - MZ twins" + study_type: + id: null + label: null + study_description: "The adaptive immune system's capability to protect the body requires a highly diverse lymphocyte antigen receptor repertoire. However, the influence of individual genetic and epigenetic differences on these repertoires is not typically measured. By leveraging the unique characteristics of B, CD4+ T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified the impact of heritable factors on both the V(D)J recombination process and thymic selection in the case of T cell receptors, and show that the repertoires of both naive and antigen experienced cells are subject to biases resulting from differences in recombination. We show that biases in V(D)J usage, as well as biased N/P additions, contribute to significant variation in the CDR3 region. Moreover, we show that the relative usage of V and J gene segments is chromosomally biased, with approximately 1.5 times as many rearrangements originating from a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level." + study_contact: "Mark M. Davis, mmdavis@stanford.edu, ORCID:0000-0001-6868-657X" + inclusion_exclusion_criteria: null + lab_name: "Mark M. Davis" + lab_address: "Stanford University" + submitted_by: "Florian Rubelt" + pub_ids: "PMID:27005435" + collected_by: null + grants: null + keywords_study: + - "contains_ig" + - "contains_tr" + subject: + subject_id: TW01A + synthetic: false + species: + id: "NCBITaxon_9606" + label: "Homo sapiens" + sex: female + age_min: 27 + age_max: 27 + age_unit: + id: UO_0000036 + label: year + age_event: null + ancestry_population: null + ethnicity: null + race: null + strain_name: null + linked_subjects: TW01B + link_type: twin + diagnosis: + - study_group_description: null + disease_diagnosis: + id: null + label: null + disease_length: null + disease_stage: null + prior_therapies: null + immunogen: null + intervention: null + medical_history: null + + sample: + - sample_id: TW01A_T_naive_CD4 + sample_processing_id: null + sample_type: "peripheral venous puncture" + tissue: + id: "UBERON_0000178" + label: "blood" + tissue_processing: "Ficoll gradient" + cell_subset: + id: "CL_0000895" + label: "naive thymus-derived CD4-positive, alpha-beta T cell" + cell_phenotype: "expression of CD8 and absence of CD4 and CD45RO" + cell_species: + id: "NCBITaxon_9606" + label: "Homo sapiens" + single_cell: false + cell_isolation: FACS + template_class: RNA + pcr_target: + - pcr_target_locus: TRB + forward_pcr_primer_target_location: null + reverse_pcr_primer_target_location: null + sequencing_platform: "Illumina MiSeq" + sequencing_files: + sequencing_data_id: SRA:SRR2905659 + file_type: fastq + filename: SRR2905659_R1.fastq.gz + read_direction: forward + read_length: 300 + paired_filename: SRR2905659_R2.fastq.gz + paired_read_direction: reverse + paired_read_length: 300 + index_filename: SRR2905659_R3.fastq.gz + index_length: 8 + anatomic_site: null + disease_state_sample: null + collection_time_point_relative: null + collection_time_point_relative_unit: + id: null + label: null + collection_time_point_reference: null + biomaterial_provider: null + cell_number: null + cells_per_reaction: null + cell_storage: false + cell_quality: null + cell_processing_protocol: null + template_quality: null + template_amount: null + template_amount_unit: + id: null + label: null + library_generation_method: "RT(oligo-dT)+PCR" + library_generation_protocol: null + library_generation_kit_version: null + complete_sequences: "partial" + physical_linkage: "none" + sequencing_run_id: null + total_reads_passing_qc_filter: null + sequencing_facility: null + sequencing_run_date: null + sequencing_kit: null + data_processing: + - data_processing_id: 651223970338378216-242ac11b-0001-007 + primary_annotation: true + software_versions: null + paired_reads_assembly: null + quality_thresholds: null + primer_match_cutoffs: null + collapsing_method: null + data_processing_protocols: null + data_processing_files: null + germline_database: null + analysis_provenance_id: 4625424004665971176-242ac11c-0001-012 From 6776a52f82fbeb6e69a748d57690520b45b0cc6b Mon Sep 17 00:00:00 2001 From: Scott Christley Date: Wed, 19 Apr 2023 11:09:24 -0500 Subject: [PATCH 02/59] new function --- lang/js/airr.js | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/lang/js/airr.js b/lang/js/airr.js index 8a6084551..f1e235fa0 100644 --- a/lang/js/airr.js +++ b/lang/js/airr.js @@ -70,6 +70,17 @@ airr.load_schema = async function() { return Promise.resolve(spec); }; +// return schemas in format appropriate for API doc +airr.get_schemas = function() { + if (! airr.Schema['specification']) return null; + + // make deep copy of schemas + let schemas = JSON.parse(JSON.stringify(airr.Schema['specification'])); + // remove Info + delete schemas['Info']; + return schemas; +} + airr.get_schema = function(definition) { if (!airr.Schema) throw new Error('AIRR schema is not loaded.'); return new airr.SchemaDefinition(definition); From d41f751e10aee46fcadbf8920aa2efa9d1b9f342 Mon Sep 17 00:00:00 2001 From: Scott Christley Date: Thu, 1 Jun 2023 18:44:43 -0500 Subject: [PATCH 03/59] restructure to support browser --- lang/js/airr-browser.js | 23 ++ lang/js/airr.js | 526 +++------------------------------------- lang/js/io.js | 146 +++++++++++ lang/js/package.json | 3 + lang/js/schema.js | 408 +++++++++++++++++++++++++++++++ 5 files changed, 608 insertions(+), 498 deletions(-) create mode 100644 lang/js/airr-browser.js create mode 100644 lang/js/io.js create mode 100644 lang/js/schema.js diff --git a/lang/js/airr-browser.js b/lang/js/airr-browser.js new file mode 100644 index 000000000..77b50044a --- /dev/null +++ b/lang/js/airr-browser.js @@ -0,0 +1,23 @@ +'use strict'; + +// +// airr-browser.js +// AIRR Standards reference library for antibody and TCR sequencing data +// browser edition +// +// Copyright (C) 2023 The AIRR Community +// +// Author: Scott Christley +// + +// The I/O file routines are not provided with the browser edition. + +// For webpack, we are utilizing the browser entry in package.json +// Are we assuming Webpack? + +export var airr = {}; + +// the specification, resolved by webpack +import AIRRSchema from 'airr-schema'; +// schema functions +var schema = require('./schema')(airr, AIRRSchema); diff --git a/lang/js/airr.js b/lang/js/airr.js index f1e235fa0..591664659 100644 --- a/lang/js/airr.js +++ b/lang/js/airr.js @@ -3,61 +3,23 @@ // // airr.js // AIRR Standards reference library for antibody and TCR sequencing data +// node edition // // Copyright (C) 2023 The AIRR Community // // Author: Scott Christley // +// The I/O file routines are provided with the node edition. + // Node Libraries var yaml = require('js-yaml'); var path = require('path'); var fs = require('fs'); -const zlib = require('zlib'); const $RefParser = require("@apidevtools/json-schema-ref-parser"); -var csv = require('csv-parser'); -const AJV = require("ajv"); -const addFormats = require("ajv-formats") - -var airr = {}; -module.exports = airr; - -// Boolean value mappings -var true_values = ['True', 'true', 'TRUE', 'T', 't', '1', 1, true]; -var false_values = ['False', 'false', 'FALSE', 'F', 'f', '0', 0, false]; -var _to_bool_map = function(x) { - if (true_values.indexOf(x) >= 0) return true; - if (false_values.indexOf(x) >= 0) return false; - return null; -}; -var _from_bool_map = function(x) { - if (x == true) return 'T'; - if (x == false) return 'F'; - return ''; -}; - -class ValidationError extends Error { - constructor (message) { - super(message) - // assign the error class name in your custom error (as a shortcut) - this.name = this.constructor.name +export var airr = {}; - // capturing the stack trace keeps the reference to your error class - Error.captureStackTrace(this, this.constructor); - } -} - -function isPromise(promise) { - return !!promise && typeof promise.then === 'function' -} - -// -// AIRR Schema -// - -// Load AIRR schema, returns a promise -airr.Schema = null; airr.load_schema = async function() { // Load AIRR spec var airrFile = path.resolve(__dirname, './airr-schema-openapi3.yaml'); @@ -66,467 +28,35 @@ airr.load_schema = async function() { // dereference all $ref objects var spec = await $RefParser.dereference(doc); - airr.Schema = {"specification": spec}; - return Promise.resolve(spec); -}; - -// return schemas in format appropriate for API doc -airr.get_schemas = function() { - if (! airr.Schema['specification']) return null; - - // make deep copy of schemas - let schemas = JSON.parse(JSON.stringify(airr.Schema['specification'])); - // remove Info - delete schemas['Info']; - return schemas; -} - -airr.get_schema = function(definition) { - if (!airr.Schema) throw new Error('AIRR schema is not loaded.'); - return new airr.SchemaDefinition(definition); -}; - -airr.get_info = function() { - if (!airr.Schema) throw new Error('AIRR schema is not loaded.'); - return airr.Schema['specification']['Info']; -} - -airr.SchemaDefinition = function(definition) { - if (!airr.Schema) throw new Error('AIRR schema is not loaded.'); - - if (definition == 'Info') { - throw new Error('Info is an invalid schema definition name'); - } - - this.definition = airr.Schema['specification'][definition]; - if (! this.definition) - throw new Error('Schema definition ' + definition + ' cannot be found in the specifications'); - - this.info = airr.Schema['specification']['Info']; - if (! this.info) - throw new Error('Info object cannot be found in the specifications'); - - this.properties = this.definition['properties'] - this.required = this.definition['required'] - if (! this.required) this.required = []; - - //this.optional = [f for f in self.properties if f not in self.required] - - return this; -} - -airr.SchemaDefinition.prototype.spec = function(field) { - return this.properties[field]; -}; - -airr.SchemaDefinition.prototype.type = function(field) { - var field_spec = this.properties[field]; - if (! field_spec) return null; - var field_type = field_spec['type']; - return field_type; -}; - -airr.SchemaDefinition.prototype.is_ontology = function(field) { - var field_spec = this.properties[field]; - if (! field_spec) return false; - var field_type = field_spec['type']; - if (field_type != 'object') return false; - if ((this.properties[field]['x-airr']) && (this.properties[field]['x-airr']['format'] == 'ontology')) return true; - - return false; -}; - -airr.SchemaDefinition.prototype.to_bool = function(value, validate) { - if (value == null) return null; - - var bool_value = _to_bool_map(value); - if (validate && (bool_value == null)) - throw new Error('invalid bool ' + value); - return bool_value; -}; - -airr.SchemaDefinition.prototype.from_bool = function(value, validate) { - if (value == null) return ''; - - var str_value = _from_bool_map(value); - if (validate && (str_value == null)) - throw new Error('invalid bool ' + value); - return str_value; -}; - -airr.SchemaDefinition.prototype.to_int = function(value, validate) { - if (value == null) return null; - if (value == '') return null; - - var int_value = parseInt(value); - if (isNaN(int_value)) { - if (validate) - throw new Error('invalid int ' + value); - else - return null; - } - return int_value; -}; - -airr.SchemaDefinition.prototype.to_float = function(value, validate) { - if (value == null) return null; - if (value == '') return null; - - var float_value = parseFloat(value); - if (isNaN(float_value)) { - if (validate) - throw new Error('invalid float ' + value); - else - return null; - } - return float_value; -}; + var schema = require('./schema')(airr, spec); + var io = require('./io')(airr); -airr.SchemaDefinition.prototype.map_value = function(map) { - //console.log('map value: ', map); - //console.log(this); - var field_type = this.type(map['header']); - var field_value = map['value']; - switch (field_type) { - case 'boolean': - field_value = this.to_bool(field_value); - break; - case 'integer': - field_value = this.to_int(field_value); - break; - case 'number': - field_value = this.to_float(field_value); - break; - } - return field_value; + return Promise.resolve(spec); }; -// -// Validation functions -// - -airr.SchemaDefinition.prototype.validate_header = function(header) { - return false; -} - -airr.SchemaDefinition.prototype.validate_row = function(row) { - return false; -} - -airr.SchemaDefinition.prototype.validate_object = function(object) { - const ajv = new AJV(); - addFormats(ajv); - ajv.addVocabulary(['x-airr', 'example']); - - const validate = ajv.compile(this.definition) - const valid = validate(object) - if (!valid) console.log(validate.errors) - - return valid; -} - -airr.SchemaDefinition.prototype.template = function() { - // Set defaults for each data type - var type_default = {'boolean': false, 'integer': 0, 'number': 0.0, 'string': '', 'array':[]}; - - var _default = function(spec) { - if (spec['default']) return spec['default']; - if (spec['nullable']) return null; - //if (spec['enum']) return spec['enum'][0]; - return type_default[spec['type']]; - }; - - var _populate = function(schema, obj) { - if (schema.allOf) { - for (const k in schema.allOf) - _populate(schema['allOf'][k], obj); - return; - } - for (const k in schema.properties) { - let spec = schema.properties[k]; - // Skip deprecated - if (spec['x-airr'] && spec['x-airr']['deprecated']) - continue - // populate with value - switch (spec['type']) { - case 'object': { - let new_obj = {}; - obj[k] = new_obj; - _populate(spec, new_obj); - break; - } - case 'array': - if (spec['items'] && spec['items']['type'] == 'object') { - let new_obj = {}; - obj[k] = [ _populate(spec['items'], new_obj) ]; - } else - obj[k] = _default(spec); - break; - default: - obj[k] = _default(spec); - } - } - }; - - var obj = {}; - _populate(this, obj); - return (obj); -} - -// -// Interface functions for file operations -// - -airr.read_rearrangement = async function(filename, header_callback=null, row_callback=null, validate=false, model=true, debug=false) { - var is_gz = false; - var ext = filename.split('.').pop().toLowerCase(); - if (ext == 'gz') is_gz = true; - - var schema = new airr.SchemaDefinition('Rearrangement'); - - var mapValues = function(map) { - return schema.map_value(map); - }; - - return new Promise(function(resolve, reject) { - var readable = fs.createReadStream(filename); - if (is_gz) readable.pipe(zlib.createGunzip()); - readable.pipe(csv({separator:'\t', mapValues: mapValues})) - .on('headers', async function(headers) { - readable.pause(); - - if (validate) { - try { - schema.validate_header(headers); - } catch (err) { - reject(err); - } - } - - if (header_callback) { - if (isPromise(header_callback)) await header_callback(headers); - else header_callback(headers); - } - - readable.resume(); - }) - .on('data', async function(row) { - readable.pause(); - - if (validate) { - try { - schema.validate_row(row); - } catch (err) { - reject(err); - } - } +// schema functions +const schema = require('./schema')(AIRRSchema); +// i/o functions +const io = require('./io'); - if (row_callback) { - if (isPromise(row_callback)) await row_callback(row); - else row_callback(row); - } - - readable.resume(); - }) - .on('end', async function() { - return resolve(); - }); - }); -} - -airr.create_rearrangement = function(file) { - return null; -} - -airr.derive_rearrangement = function(file) { - return null; -} - -airr.load_rearrangement = async function(filename, validate=false, debug=false) { - var rows = []; - - var got_row = function(row) { rows.push(row); } - await airr.read_rearrangement(filename, null, got_row, validate, true, debug) - .catch(function(error) { Promise.reject(error); }); - - return Promise.resolve(rows); -} - -airr.dump_rearrangement = function(file) { - return null; -} - -airr.merge_rearrangement = function(file) { - return null; -} - -airr.validate_rearrangement = function(file) { - return null; -} - -airr.read_airr = function(filename, validate=false, model=true, debug=false) { - var data = null; - var ext = filename.split('.').pop().toLowerCase(); - if ((ext == 'yaml') || (ext == 'yml') || (ext == 'json')) { - data = yaml.safeLoad(fs.readFileSync(filename)); +/* TODO? UMD +(function (root, factory) { + if (typeof define === 'function' && define.amd) { + // AMD. Register as an anonymous module. + define(['b'], factory); + } else if (typeof module === 'object' && module.exports) { + // Node. + module.exports = factory(require('b')); } else { - let msg = 'Unknown file type:' + ext + '. Supported file extensions are "yaml", "yml" or "json"'; - if (debug) console.error(msg); - throw new Error(msg); - } - - if (validate) { - if (debug) console.log('Validating:', filename); - try { - var schema = new airr.SchemaDefinition('DataFile'); - schema.validate_object(data); - } catch (err) { - if (debug) console.error(filename, 'failed validation.'); - throw new ValidationError(err); - } + // Browser globals (root is window) + root.returnExports = factory(root.b); } - - return data; -} +}(typeof self !== 'undefined' ? self : this, function (b) { + // Use b in some fashion. -airr.validate_airr = function(filename) { - return airr.read_airr(filename, true); -} - -airr.write_airr = function(file) { - return null; -} - -// Given a field, check if included in field set -// Field sets include: -// miairr, for only MiAIRR fields -// airr-core, for all required and identifier fields -// airr-schema, for all fields -airr.checkSet = function(schema, field_set, f) { - switch (field_set) { - case 'miairr': - if ((schema['properties'][f]['x-airr']) && (schema['properties'][f]['x-airr']['miairr'])) - return true; - break; - case 'airr-core': - // miairr - if ((schema['properties'][f]['x-airr']) && (schema['properties'][f]['x-airr']['miairr'])) - return true; - // identifer - if ((schema['properties'][f]['x-airr']) && (schema['properties'][f]['x-airr']['identifier'])) - return true; - // required - if ((schema['required']) && (schema['required'].indexOf(f) >= 0)) - return true; - break; - case 'airr-schema': - // all fields - return true; - } - return false; -} - -// Recursively walk through schema and collect fields based upon field set. -// The schema loader resolves the $ref references so we do not need to follow them. -airr.collectFields = function(schema, field_set, field_list, context, force) { - for (var f in schema['properties']) { - var full_field = f; - if (context) full_field = context + '.' + f; - //console.log(full_field); - //console.log(schema['properties'][f]); - - // check if deprecated - if ((schema['properties'][f]['x-airr']) && (schema['properties'][f]['x-airr']['deprecated'])) - continue; - - var field_type = schema['properties'][f]['type']; - switch (field_type) { - case 'object': - // sub-object - if ((schema['properties'][f]['x-airr']) && (schema['properties'][f]['x-airr']['ontology'])) { - // if it is an ontology object, check the object then force the ontology fields if necessary - if (airr.checkSet(schema, field_set, f)) - airr.collectFields(schema['properties'][f], field_set, field_list, full_field, true); - } else - airr.collectFields(schema['properties'][f], field_set, field_list, full_field, force); - break; - case 'array': - if (schema['properties'][f]['items']['type'] == 'object') { - // array of sub-objects - airr.collectFields(schema['properties'][f]['items'], field_set, field_list, full_field, force); - } else if (schema['properties'][f]['items']['allOf']) { - // array of composite objects - for (var s in schema['properties'][f]['items']['allOf']) { - airr.collectFields(schema['properties'][f]['items']['allOf'][s], field_set, field_list, full_field, force); - } - } else { - // array of primitive types - if (airr.checkSet(schema, field_set, f)) - field_list.push(full_field); - } - break; - case 'string': - case 'number': - case 'integer': - case 'boolean': - // primitive types - if (force) - field_list.push(full_field); - else if (airr.checkSet(schema, field_set, f)) - field_list.push(full_field); - break; - default: - // unhandled schema structure - console.error('VDJServer ADC API INFO: Unhandled schema structure: ' + full_field); - break; - } - } -} - -// Add the fields to the document if any are missing -airr.addFields = function(document, field_list, schema) { - for (var r in field_list) { - var path = field_list[r].split('.'); - var obj = document; - var spec = schema; - for (var p = 0; p < path.length; p++) { - spec = spec['properties'][path[p]]; - // if not in the spec then give up - if (!spec) break; - - if (spec['type'] == 'array') { - if ((spec['items']['type'] == undefined) || (spec['items']['type'] == 'object')) { - // array of object - if (obj[path[p]] == undefined) obj[path[p]] = [{}]; - var sub_spec = spec['items']; - if (spec['items']['allOf']) { - // need to combine the properties - sub_spec = { type: 'object', properties: {} }; - for (var i in spec['items']['allOf']) { - var sub_obj = spec['items']['allOf'][i]; - for (var j in sub_obj['properties']) { - sub_spec['properties'][j] = sub_obj['properties'][j]; - } - } - } - for (var a in obj[path[p]]) { - airr.addFields(obj[path[p]][a], [ path.slice(p+1).join('.') ], sub_spec); - } - } else { - // array of primitive data types - if (obj[path[p]] == undefined) obj[path[p]] = null; - } - break; - } else if (spec['type'] == 'object') { - if (obj[path[p]] == undefined) { - if (p == path.length - 1) obj[path[p]] = null; - else obj[path[p]] = {}; - } - obj = obj[path[p]]; - } else if (obj[path[p]] != undefined) obj = obj[path[p]]; - else if (p == path.length - 1) obj[path[p]] = null; - else console.error('VDJServer ADC API ERROR: Internal error (addFields) do not know how to handle path element: ' + p); - } - } -}; + // Just return a value to define the module export. + // This example returns an object, but the module + // can return a function as the exported value. + return {}; +})); */ diff --git a/lang/js/io.js b/lang/js/io.js new file mode 100644 index 000000000..b7a36e0a6 --- /dev/null +++ b/lang/js/io.js @@ -0,0 +1,146 @@ +'use strict'; + +// +// io.js +// AIRR Standards reference library for antibody and TCR sequencing data +// +// Copyright (C) 2023 The AIRR Community +// +// Author: Scott Christley +// + +// Node Libraries +var yaml = require('js-yaml'); +var fs = require('fs'); +const zlib = require('zlib'); +var csv = require('csv-parser'); + +// +// Interface functions for file operations +// +module.exports = function(airr) { + + airr.read_rearrangement = async function(filename, header_callback=null, row_callback=null, validate=false, model=true, debug=false) { + var is_gz = false; + var ext = filename.split('.').pop().toLowerCase(); + if (ext == 'gz') is_gz = true; + + var schema = new airr.SchemaDefinition('Rearrangement'); + + var mapValues = function(map) { + return schema.map_value(map); + }; + + return new Promise(function(resolve, reject) { + var readable = fs.createReadStream(filename); + if (is_gz) readable.pipe(zlib.createGunzip()); + readable.pipe(csv({separator:'\t', mapValues: mapValues})) + .on('headers', async function(headers) { + readable.pause(); + + if (validate) { + try { + schema.validate_header(headers); + } catch (err) { + reject(err); + } + } + + if (header_callback) { + if (isPromise(header_callback)) await header_callback(headers); + else header_callback(headers); + } + + readable.resume(); + }) + .on('data', async function(row) { + readable.pause(); + + if (validate) { + try { + schema.validate_row(row); + } catch (err) { + reject(err); + } + } + + if (row_callback) { + if (isPromise(row_callback)) await row_callback(row); + else row_callback(row); + } + + readable.resume(); + }) + .on('end', async function() { + return resolve(); + }); + }); + } + + airr.create_rearrangement = function(file) { + return null; + } + + airr.derive_rearrangement = function(file) { + return null; + } + + airr.load_rearrangement = async function(filename, validate=false, debug=false) { + var rows = []; + + var got_row = function(row) { rows.push(row); } + await airr.read_rearrangement(filename, null, got_row, validate, true, debug) + .catch(function(error) { Promise.reject(error); }); + + return Promise.resolve(rows); + } + + airr.dump_rearrangement = function(file) { + return null; + } + + airr.merge_rearrangement = function(file) { + return null; + } + + airr.validate_rearrangement = function(file) { + return null; + } + + airr.read_airr = function(filename, validate=false, model=true, debug=false) { + var data = null; + var ext = filename.split('.').pop().toLowerCase(); + if ((ext == 'yaml') || (ext == 'yml') || (ext == 'json')) { + data = yaml.safeLoad(fs.readFileSync(filename)); + } else { + let msg = 'Unknown file type:' + ext + '. Supported file extensions are "yaml", "yml" or "json"'; + if (debug) console.error(msg); + throw new Error(msg); + } + + if (validate) { + if (debug) console.log('Validating:', filename); + try { + var schema = new airr.SchemaDefinition('DataFile'); + schema.validate_object(data); + } catch (err) { + if (debug) console.error(filename, 'failed validation.'); + throw new ValidationError(err); + } + } + + return data; + } + + airr.validate_airr = function(filename) { + return airr.read_airr(filename, true); + } + + airr.write_airr = function(file) { + return null; + } + + return airr; +}; + + diff --git a/lang/js/package.json b/lang/js/package.json index f2851631e..557afcba1 100644 --- a/lang/js/package.json +++ b/lang/js/package.json @@ -17,6 +17,9 @@ }, "license": "CC BY 4.0", "main": "airr.js", + "browser": { + "./airr.js": "./airr-browser.js" + }, "private": false, "dependencies": { "@apidevtools/json-schema-ref-parser": "^10.1.0", diff --git a/lang/js/schema.js b/lang/js/schema.js new file mode 100644 index 000000000..689c440b2 --- /dev/null +++ b/lang/js/schema.js @@ -0,0 +1,408 @@ +'use strict'; + +// +// airr.js +// AIRR Standards reference library for antibody and TCR sequencing data +// +// Copyright (C) 2023 The AIRR Community +// +// Author: Scott Christley +// + +// Node Libraries +const AJV = require("ajv"); +const addFormats = require("ajv-formats") + +// tests if global scope is bound to "global" +var isNode = new Function("try {return this===global;}catch(e){return false;}"); +var isBrowser = new Function("try {return this===window;}catch(e){ return false;}"); + +// Boolean value mappings +var true_values = ['True', 'true', 'TRUE', 'T', 't', '1', 1, true]; +var false_values = ['False', 'false', 'FALSE', 'F', 'f', '0', 0, false]; +var _to_bool_map = function(x) { + if (true_values.indexOf(x) >= 0) return true; + if (false_values.indexOf(x) >= 0) return false; + return null; +}; +var _from_bool_map = function(x) { + if (x == true) return 'T'; + if (x == false) return 'F'; + return ''; +}; + +function isPromise(promise) { + return !!promise && typeof promise.then === 'function' +} + +class ValidationError extends Error { + constructor (message) { + super(message) + + // assign the error class name in your custom error (as a shortcut) + this.name = this.constructor.name + + // capturing the stack trace keeps the reference to your error class + Error.captureStackTrace(this, this.constructor); + } +} + +//var airr = {}; +module.exports = function(airr, schema) { + console.log('airr-js schema:', schema); + airr.Schema = {"specification": schema}; + //airr.Schema = schema; + + // Load AIRR schema, returns a promise +/* airr.load_schema = async function() { + // Load AIRR spec + var airrFile = path.resolve(__dirname, './airr-schema-openapi3.yaml'); + var doc = yaml.safeLoad(fs.readFileSync(airrFile)); + if (!doc) Promise.reject(new Error('Could not load AIRR schema yaml file.')); + + // dereference all $ref objects + var spec = await $RefParser.dereference(doc); + airr.Schema = {"specification": spec}; + return Promise.resolve(spec); + }; */ + + // return schemas in format appropriate for API doc + airr.get_schemas = function() { + if (! airr.Schema['specification']) return null; + + // make deep copy of schemas + let schemas = JSON.parse(JSON.stringify(airr.Schema['specification'])); + // remove Info + delete schemas['Info']; + return schemas; + } + + airr.get_schema = function(definition) { + if (!airr.Schema) throw new Error('AIRR schema is not loaded.'); + return new airr.SchemaDefinition(definition); + }; + + airr.get_info = function() { + if (!airr.Schema) throw new Error('AIRR schema is not loaded.'); + return airr.Schema['specification']['Info']; + } + + airr.SchemaDefinition = function(definition) { + if (!airr.Schema) throw new Error('AIRR schema is not loaded.'); + + if (definition == 'Info') { + throw new Error('Info is an invalid schema definition name'); + } + + this.definition = airr.Schema['specification'][definition]; + if (! this.definition) + throw new Error('Schema definition ' + definition + ' cannot be found in the specifications'); + + this.info = airr.Schema['specification']['Info']; + if (! this.info) + throw new Error('Info object cannot be found in the specifications'); + + this.properties = this.definition['properties'] + this.required = this.definition['required'] + if (! this.required) this.required = []; + + //this.optional = [f for f in self.properties if f not in self.required] + + return this; + } + + airr.SchemaDefinition.prototype.spec = function(field) { + return this.properties[field]; + }; + + airr.SchemaDefinition.prototype.type = function(field) { + var field_spec = this.properties[field]; + if (! field_spec) return null; + var field_type = field_spec['type']; + return field_type; + }; + + airr.SchemaDefinition.prototype.is_ontology = function(field) { + var field_spec = this.properties[field]; + if (! field_spec) return false; + var field_type = field_spec['type']; + if (field_type != 'object') return false; + if ((this.properties[field]['x-airr']) && (this.properties[field]['x-airr']['format'] == 'ontology')) return true; + + return false; + }; + + airr.SchemaDefinition.prototype.to_bool = function(value, validate) { + if (value == null) return null; + + var bool_value = _to_bool_map(value); + if (validate && (bool_value == null)) + throw new Error('invalid bool ' + value); + return bool_value; + }; + + airr.SchemaDefinition.prototype.from_bool = function(value, validate) { + if (value == null) return ''; + + var str_value = _from_bool_map(value); + if (validate && (str_value == null)) + throw new Error('invalid bool ' + value); + return str_value; + }; + + airr.SchemaDefinition.prototype.to_int = function(value, validate) { + if (value == null) return null; + if (value == '') return null; + + var int_value = parseInt(value); + if (isNaN(int_value)) { + if (validate) + throw new Error('invalid int ' + value); + else + return null; + } + return int_value; + }; + + airr.SchemaDefinition.prototype.to_float = function(value, validate) { + if (value == null) return null; + if (value == '') return null; + + var float_value = parseFloat(value); + if (isNaN(float_value)) { + if (validate) + throw new Error('invalid float ' + value); + else + return null; + } + return float_value; + }; + + airr.SchemaDefinition.prototype.map_value = function(map) { + //console.log('map value: ', map); + //console.log(this); + var field_type = this.type(map['header']); + var field_value = map['value']; + switch (field_type) { + case 'boolean': + field_value = this.to_bool(field_value); + break; + case 'integer': + field_value = this.to_int(field_value); + break; + case 'number': + field_value = this.to_float(field_value); + break; + } + return field_value; + }; + + // + // Validation functions + // + + airr.SchemaDefinition.prototype.validate_header = function(header) { + return false; + } + + airr.SchemaDefinition.prototype.validate_row = function(row) { + return false; + } + + airr.SchemaDefinition.prototype.validate_object = function(object) { + const ajv = new AJV(); + addFormats(ajv); + ajv.addVocabulary(['x-airr', 'example']); + + const validate = ajv.compile(this.definition) + const valid = validate(object) + if (!valid) console.log(validate.errors) + + return valid; + } + + airr.SchemaDefinition.prototype.template = function() { + // Set defaults for each data type + var type_default = {'boolean': false, 'integer': 0, 'number': 0.0, 'string': '', 'array':[]}; + + var _default = function(spec) { + if (spec['default']) return spec['default']; + if (spec['nullable']) return null; + //if (spec['enum']) return spec['enum'][0]; + return type_default[spec['type']]; + }; + + var _populate = function(schema, obj) { + if (schema.allOf) { + for (const k in schema.allOf) + _populate(schema['allOf'][k], obj); + return; + } + for (const k in schema.properties) { + let spec = schema.properties[k]; + // Skip deprecated + if (spec['x-airr'] && spec['x-airr']['deprecated']) + continue + // populate with value + switch (spec['type']) { + case 'object': { + let new_obj = {}; + obj[k] = new_obj; + _populate(spec, new_obj); + break; + } + case 'array': + if (spec['items'] && spec['items']['type'] == 'object') { + let new_obj = {}; + obj[k] = [ _populate(spec['items'], new_obj) ]; + } else + obj[k] = _default(spec); + break; + default: + obj[k] = _default(spec); + } + } + }; + + var obj = {}; + _populate(this, obj); + return (obj); + } + + + // Given a field, check if included in field set + // Field sets include: + // miairr, for only MiAIRR fields + // airr-core, for all required and identifier fields + // airr-schema, for all fields + airr.checkSet = function(schema, field_set, f) { + switch (field_set) { + case 'miairr': + if ((schema['properties'][f]['x-airr']) && (schema['properties'][f]['x-airr']['miairr'])) + return true; + break; + case 'airr-core': + // miairr + if ((schema['properties'][f]['x-airr']) && (schema['properties'][f]['x-airr']['miairr'])) + return true; + // identifer + if ((schema['properties'][f]['x-airr']) && (schema['properties'][f]['x-airr']['identifier'])) + return true; + // required + if ((schema['required']) && (schema['required'].indexOf(f) >= 0)) + return true; + break; + case 'airr-schema': + // all fields + return true; + } + return false; + } + + // Recursively walk through schema and collect fields based upon field set. + // The schema loader resolves the $ref references so we do not need to follow them. + airr.collectFields = function(schema, field_set, field_list, context, force) { + for (var f in schema['properties']) { + var full_field = f; + if (context) full_field = context + '.' + f; + //console.log(full_field); + //console.log(schema['properties'][f]); + + // check if deprecated + if ((schema['properties'][f]['x-airr']) && (schema['properties'][f]['x-airr']['deprecated'])) + continue; + + var field_type = schema['properties'][f]['type']; + switch (field_type) { + case 'object': + // sub-object + if ((schema['properties'][f]['x-airr']) && (schema['properties'][f]['x-airr']['ontology'])) { + // if it is an ontology object, check the object then force the ontology fields if necessary + if (airr.checkSet(schema, field_set, f)) + airr.collectFields(schema['properties'][f], field_set, field_list, full_field, true); + } else + airr.collectFields(schema['properties'][f], field_set, field_list, full_field, force); + break; + case 'array': + if (schema['properties'][f]['items']['type'] == 'object') { + // array of sub-objects + airr.collectFields(schema['properties'][f]['items'], field_set, field_list, full_field, force); + } else if (schema['properties'][f]['items']['allOf']) { + // array of composite objects + for (var s in schema['properties'][f]['items']['allOf']) { + airr.collectFields(schema['properties'][f]['items']['allOf'][s], field_set, field_list, full_field, force); + } + } else { + // array of primitive types + if (airr.checkSet(schema, field_set, f)) + field_list.push(full_field); + } + break; + case 'string': + case 'number': + case 'integer': + case 'boolean': + // primitive types + if (force) + field_list.push(full_field); + else if (airr.checkSet(schema, field_set, f)) + field_list.push(full_field); + break; + default: + // unhandled schema structure + console.error('VDJServer ADC API INFO: Unhandled schema structure: ' + full_field); + break; + } + } + } + + // Add the fields to the document if any are missing + airr.addFields = function(document, field_list, schema) { + for (var r in field_list) { + var path = field_list[r].split('.'); + var obj = document; + var spec = schema; + for (var p = 0; p < path.length; p++) { + spec = spec['properties'][path[p]]; + // if not in the spec then give up + if (!spec) break; + + if (spec['type'] == 'array') { + if ((spec['items']['type'] == undefined) || (spec['items']['type'] == 'object')) { + // array of object + if (obj[path[p]] == undefined) obj[path[p]] = [{}]; + var sub_spec = spec['items']; + if (spec['items']['allOf']) { + // need to combine the properties + sub_spec = { type: 'object', properties: {} }; + for (var i in spec['items']['allOf']) { + var sub_obj = spec['items']['allOf'][i]; + for (var j in sub_obj['properties']) { + sub_spec['properties'][j] = sub_obj['properties'][j]; + } + } + } + for (var a in obj[path[p]]) { + airr.addFields(obj[path[p]][a], [ path.slice(p+1).join('.') ], sub_spec); + } + } else { + // array of primitive data types + if (obj[path[p]] == undefined) obj[path[p]] = null; + } + break; + } else if (spec['type'] == 'object') { + if (obj[path[p]] == undefined) { + if (p == path.length - 1) obj[path[p]] = null; + else obj[path[p]] = {}; + } + obj = obj[path[p]]; + } else if (obj[path[p]] != undefined) obj = obj[path[p]]; + else if (p == path.length - 1) obj[path[p]] = null; + else console.error('VDJServer ADC API ERROR: Internal error (addFields) do not know how to handle path element: ' + p); + } + } + }; + + return airr; +}; + From 6145effc51e0dc81310631790012cce30c67e2c5 Mon Sep 17 00:00:00 2001 From: Scott Christley Date: Thu, 1 Jun 2023 23:54:30 -0500 Subject: [PATCH 04/59] restructure node edition --- lang/js/airr.js | 7 ++++--- lang/js/schema.js | 16 +--------------- 2 files changed, 5 insertions(+), 18 deletions(-) diff --git a/lang/js/airr.js b/lang/js/airr.js index 591664659..fd15ef16f 100644 --- a/lang/js/airr.js +++ b/lang/js/airr.js @@ -18,7 +18,8 @@ var path = require('path'); var fs = require('fs'); const $RefParser = require("@apidevtools/json-schema-ref-parser"); -export var airr = {}; +var airr = {}; +module.exports = airr; airr.load_schema = async function() { // Load AIRR spec @@ -35,9 +36,9 @@ airr.load_schema = async function() { }; // schema functions -const schema = require('./schema')(AIRRSchema); +//const schema = require('./schema')(AIRRSchema); // i/o functions -const io = require('./io'); +//const io = require('./io'); /* TODO? UMD (function (root, factory) { diff --git a/lang/js/schema.js b/lang/js/schema.js index 689c440b2..bed6f6841 100644 --- a/lang/js/schema.js +++ b/lang/js/schema.js @@ -49,22 +49,8 @@ class ValidationError extends Error { //var airr = {}; module.exports = function(airr, schema) { - console.log('airr-js schema:', schema); + //console.log('airr-js schema:', JSON.stringify(schema, null, 2)); airr.Schema = {"specification": schema}; - //airr.Schema = schema; - - // Load AIRR schema, returns a promise -/* airr.load_schema = async function() { - // Load AIRR spec - var airrFile = path.resolve(__dirname, './airr-schema-openapi3.yaml'); - var doc = yaml.safeLoad(fs.readFileSync(airrFile)); - if (!doc) Promise.reject(new Error('Could not load AIRR schema yaml file.')); - - // dereference all $ref objects - var spec = await $RefParser.dereference(doc); - airr.Schema = {"specification": spec}; - return Promise.resolve(spec); - }; */ // return schemas in format appropriate for API doc airr.get_schemas = function() { From b031cffb24426ab4336a51aed604a8c334bfdaf7 Mon Sep 17 00:00:00 2001 From: Scott Christley Date: Wed, 7 Jun 2023 22:58:19 -0500 Subject: [PATCH 05/59] cleanup package dependencies, deref spec for browser --- lang/js/deref.js | 32 ++++++++++++++++++++++++++++++++ lang/js/package.json | 12 ++++-------- 2 files changed, 36 insertions(+), 8 deletions(-) create mode 100644 lang/js/deref.js diff --git a/lang/js/deref.js b/lang/js/deref.js new file mode 100644 index 000000000..43e78f0ff --- /dev/null +++ b/lang/js/deref.js @@ -0,0 +1,32 @@ +'use strict'; + +// +// deref.js +// AIRR Standards reference library for antibody and TCR sequencing data +// generate a dereferenced version of the spec +// +// Copyright (C) 2023 The AIRR Community +// +// Author: Scott Christley +// + +// The I/O file routines are provided with the node edition. + +// Node Libraries +var yaml = require('js-yaml'); +var fs = require('fs'); +var path = require('path'); +var airr = require('./airr'); + +airr.load_schema().then(function() { + var outFile = path.resolve(__dirname, './airr-schema-openapi3-deref.yaml'); + fs.writeFile(outFile, yaml.safeDump(airr.Schema['specification']), (err) => { + if (err) { + console.error(err); + process.exit(1); + } + }); +}).catch(function(error) { + console.error(error); + process.exit(1); +}); diff --git a/lang/js/package.json b/lang/js/package.json index 557afcba1..29b889986 100644 --- a/lang/js/package.json +++ b/lang/js/package.json @@ -15,7 +15,7 @@ "type": "git", "url": "https://github.com/airr-community/airr-standards" }, - "license": "CC BY 4.0", + "license": "CC-BY-4.0", "main": "airr.js", "browser": { "./airr.js": "./airr-browser.js" @@ -26,12 +26,7 @@ "ajv": "^8.12.0", "ajv-formats": "^2.1.1", "csv-parser": "^2.3.2", - "errorhandler": "1.3.2", - "js-yaml": "^3.10.0", - "json-approver": "1.0.3", - "moment": "2.24.0", - "moment-timezone": "0.4.1", - "underscore": "1.7.0" + "js-yaml": "^3.10.0" }, "devDependencies": { "eslint": "^7.9.0", @@ -40,7 +35,8 @@ }, "scripts": { "eslint": "eslint", - "test": "jest" + "test": "jest", + "prepare": "node ./deref.js" }, "engines": { "node": ">=12.18.3", From c1f12469d3c322bec85d9b0a70a9e4dfb01d668a Mon Sep 17 00:00:00 2001 From: Scott Christley Date: Thu, 22 Jun 2023 13:47:44 -0500 Subject: [PATCH 06/59] fix bug for array of objects --- lang/js/schema.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lang/js/schema.js b/lang/js/schema.js index bed6f6841..8cc01e69c 100644 --- a/lang/js/schema.js +++ b/lang/js/schema.js @@ -240,7 +240,8 @@ module.exports = function(airr, schema) { case 'array': if (spec['items'] && spec['items']['type'] == 'object') { let new_obj = {}; - obj[k] = [ _populate(spec['items'], new_obj) ]; + _populate(spec['items'], new_obj); + obj[k] = [ new_obj ]; } else obj[k] = _default(spec); break; From 4d4ade320e4ea9e5060724fc8581dd956b14866e Mon Sep 17 00:00:00 2001 From: Scott Christley Date: Mon, 10 Jul 2023 13:25:25 -0500 Subject: [PATCH 07/59] update docs --- docs/packages/airr-js/news.rst | 6 + docs/packages/airr-js/overview.rst | 17 +++ docs/software.rst | 1 + lang/js/NEWS.rst | 2 +- lang/js/README.rst | 206 +++++++++++------------------ 5 files changed, 104 insertions(+), 128 deletions(-) create mode 100644 docs/packages/airr-js/news.rst create mode 100644 docs/packages/airr-js/overview.rst diff --git a/docs/packages/airr-js/news.rst b/docs/packages/airr-js/news.rst new file mode 100644 index 000000000..9a6109f21 --- /dev/null +++ b/docs/packages/airr-js/news.rst @@ -0,0 +1,6 @@ +.. _JavaScriptNews: + +JavaScript Library Release Notes +================================= + +.. include:: ../../../lang/js/NEWS.rst \ No newline at end of file diff --git a/docs/packages/airr-js/overview.rst b/docs/packages/airr-js/overview.rst new file mode 100644 index 000000000..616fe2e96 --- /dev/null +++ b/docs/packages/airr-js/overview.rst @@ -0,0 +1,17 @@ +.. _JavaScriptOverview: + +AIRR JavaScript Reference Library +=============================================================================== + +The ``airr-js`` reference library provides basic functions and classes for +interacting with AIRR Community Data Representation Standards, including tools +for read, write and validation. The library can be used in the browser or nodejs. + +.. toctree:: + :maxdepth: 1 + :caption: Table of Contents + + Release Notes + +.. include:: ../../../lang/js/README.rst + diff --git a/docs/software.rst b/docs/software.rst index b70b5f871..1c5653870 100644 --- a/docs/software.rst +++ b/docs/software.rst @@ -10,6 +10,7 @@ AIRR Standards Reference Implementations Python Library R Library + JavaScript Library ADC API Reference Implementation Resources and Tools Supporting AIRR Standards diff --git a/lang/js/NEWS.rst b/lang/js/NEWS.rst index 9a2328558..93791d62e 100644 --- a/lang/js/NEWS.rst +++ b/lang/js/NEWS.rst @@ -1,4 +1,4 @@ -Version 1.4.2: DATE +Version 1.5.0: DATE -------------------------------------------------------------------------------- Initial release. diff --git a/lang/js/README.rst b/lang/js/README.rst index 8c73d9bdd..03cc88f3e 100644 --- a/lang/js/README.rst +++ b/lang/js/README.rst @@ -14,163 +14,115 @@ source code directory:: Quick Start ------------------------------------------------------------------------------ -Reading AIRR Data Files -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +The ``airr-js`` package supports use in the browser or in nodejs. In the browser, the +file system is not available, so the read/write functions are not implemented but template +objects can be created, objects can be validated, and the OpenAPI V3 specification can be +accessed. For nodejs, the full functionality is available including the read/write functions. -The ``airr`` package contains functions to read and write AIRR Data -Model files. The file format is either YAML or JSON, and the package provides a -light wrapper over the standard parsers. The file needs a ``json``, ``yaml``, or ``yml`` -file extension so that the proper parser is utilized. All of the AIRR objects -are loaded into memory at once and no streaming interface is provided:: - - import airr +For nodejs, need to await the loading of the schema before using any functions:: - # Load the AIRR data - data = airr.read_airr('input.airr.json') - # loop through the repertoires - for rep in data['Repertoire']: - print(rep) + var airr = require('airr-js'); -Why are the AIRR objects, such as Repertoire, GermlineSet, and etc., in a list versus in a -dictionary keyed by their identifier (e.g., ``repertoire_id``)? There are two primary reasons for -this. First, the identifier might not have been assigned yet. Some systems might allow MiAIRR -metadata to be entered but the identifier is assigned to that data later by another process. Without -the identifier, the data could not be stored in a dictionary. Secondly, the list allows the data to -have a default ordering. If you know that the data has a unique identifier then you can quickly -create a dictionary object using a comprehension. For example, with repertoires:: + // await schema to be loaded and dereferenced + var spec = await airr.load_schema(); - rep_dict = { obj['repertoire_id'] : obj for obj in data['Repertoire'] } +For the browser, the schema also needs to be loaded but the package cannot do it itself, +instead you must provide the Open API V3 specification file as part of your packaging +of the website. When using webpack, a resolve alias in ``webpack.config.js`` can be used +to point to the dereferenced yaml file:: -another example with germline sets:: + resolve: { + alias: { + 'airr-schema': path.resolve(__dirname,'node_modules') + '/airr-js/airr-schema-openapi3-deref.yaml' + } + } - germline_dict = { obj['germline_set_id'] : obj for obj in data['GermlineSet'] } +The ``package.json`` utilizes the ``browser`` setting supported by website packaging tools +like webpack to provide an alternative entry point, and browser code can import like so:: -Writing AIRR Data Files -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + import { airr } from 'airr-js'; -Writing an AIRR Data File is also a light wrapper over standard YAML or JSON -parsers. Multiple AIRR objects, such as Repertoire, GermlineSet, and etc., can be -written together into the same file. In this example, we use the ``airr`` library ``template`` -method to create some blank Repertoire objects, and write them to a file. -As with the read function, the complete list of repertoires are written at once, -there is no streaming interface:: - import airr +Create Blank Template Schema Objects (browser, nodejs) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - # Create some blank repertoire objects in a list - data = { 'Repertoire': [] } - for i in range(5): - data['Repertoire'].append(airr.schema.RepertoireSchema.template()) +Import the ``airr-js`` package correctly depending upon browser or nodejs usage as +described above, and then blank template objects can be created:: - # Write the AIRR Data - airr.write_airr('output.airr.json', data) + // Get the schema definition for an AIRR Object + var repertoire_schema = new airr.SchemaDefinition('Repertoire'); + // Create a template object + var blank_repertoire = repertoire_schema.template(); -Reading AIRR Rearrangement TSV files +Validate Objects (browser, nodejs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The ``airr`` package contains functions to read and write AIRR Rearrangement -TSV files as either iterables or pandas data frames. The usage is straightforward, -as the file format is a typical tab delimited file, but the package -performs some additional validation and type conversion beyond using a -standard CSV reader:: - - import airr - - # Create an iteratable that returns a dictionary for each row - reader = airr.read_rearrangement('input.tsv') - for row in reader: print(row) +Import the ``airr-js`` package correctly depending upon browser or nodejs usage as +described above, and then an object can be validated to its schema:: - # Load the entire file into a pandas data frame - df = airr.load_rearrangement('input.tsv') + // Get the schema definition for an AIRR Object + var repertoire_schema = new airr.SchemaDefinition('Repertoire'); + // Validate a repertoire object + var is_valid = repertoire_schema.validate_object(obj); -Writing AIRR Rearrangement TSV files +Reading AIRR Data Files (nodejs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Similar to the read operations, write functions are provided for either creating -a writer class to perform row-wise output or writing the entire contents of -a pandas data frame to a file. Again, usage is straightforward with the ``airr`` -output functions simply performing some type conversion and field ordering -operations:: - - import airr - - # Create a writer class for iterative row output - writer = airr.create_rearrangement('output.tsv') - for row in reader: writer.write(row) +The ``airr-js`` package contains functions to read and write AIRR Data +Model files. The file format is either YAML or JSON, and the package provides a +light wrapper over the standard parsers. The file needs a ``json``, ``yaml``, or ``yml`` +file extension so that the proper parser is utilized. All of the AIRR objects +are loaded into memory at once and no streaming interface is provided:: - # Write an entire pandas data frame to a file - airr.dump_rearrangement(df, 'file.tsv') + var airr = require('airr-js'); + await airr.load_schema(); -By default, ``create_rearrangement`` will only write the ``required`` fields -in the output file. Additional fields can be included in the output file by -providing the ``fields`` parameter with an array of additional field names:: + // read AIRR DataFile + var data = await airr.read_airr('input.airr.json'); - # Specify additional fields in the output - fields = ['new_calc', 'another_field'] - writer = airr.create_rearrangement('output.tsv', fields=fields) +Writing AIRR Data Files (nodejs) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -A common operation is to read an AIRR rearrangement file, and then -write an AIRR rearrangement file with additional fields in it while -keeping all of the existing fields from the original file. The -``derive_rearrangement`` function provides this capability:: +Writing an AIRR Data File is also a light wrapper over standard YAML or JSON +parsers. Multiple AIRR objects, such as Repertoire, GermlineSet, and etc., can be +written together into the same file. In this example, we create some blank +Repertoire objects, and write them to a file. +As with the read function, the complete list of repertoires are written at once, +there is no streaming interface:: - import airr + var airr = require('airr-js'); + await airr.load_schema(); - # Read rearrangement data and write new file with additional fields - reader = airr.read_rearrangement('input.tsv') - fields = ['new_calc'] - writer = airr.derive_rearrangement('output.tsv', 'input.tsv', fields=fields) - for row in reader: - row['new_calc'] = 'a value' - writer.write(row) + // Create some blank repertoire objects in a list + var repertoire_schema = new airr.SchemaDefinition('Repertoire'); + var data = { 'Repertoire': [] }; + for (let i = 0; i < 5; ++i) + data['Repertoire'].push(repertoire_schema.template()); + // Write the AIRR Data + await airr.write_airr('output.airr.json', data); -Validating AIRR data files +Reading AIRR Rearrangement TSV files (nodejs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The ``airr`` package can validate AIRR Data Model JSON/YAML files and Rearrangement -TSV files to ensure that they contain all required fields and that the fields types -match the AIRR Schema. This can be done using the ``airr-tools`` command -line program or the validate functions in the library can be called:: +The ``airr-js`` package contains functions to read and write AIRR Rearrangement +TSV files as either a stream or the complete file. The streaming interface requires +two callback functions to be provided; one for the header and another for each +row as it is read:: - # Validate a rearrangement TSV file - airr-tools validate rearrangement -a input.tsv + var airr = require('airr-js'); + await airr.load_schema(); - # Validate an AIRR DataFile - airr-tools validate airr -a input.airr.json + // read file completely + var data = await airr.load_rearrangement('input.airr.tsv'); -Combining Repertoire metadata and Rearrangement files -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + // for streaming, need two callback functions + var header_callback = function(headers) { console.log('got headers:', headers); } + var row_callback = function(row) { console.log('got row:', row); } + // read the file + await airr.read_rearrangement('input.airr.tsv', header_callback, row_callback); -The ``airr`` package does not currently keep track of which AIRR Data Model files -are associated with which Rearrangement TSV files, though there is ongoing work to define -a standardized manifest, so users will need to handle those -associations themselves. However, in the data, AIRR identifier fields, such as ``repertoire_id``, -form the link between objects in the AIRR Data Model. -The typical usage is that a program is going to perform some -computation on the Rearrangements, and it needs access to the Repertoire metadata -as part of the computation logic. This example code shows the basic framework -for doing that, in this case doing gender specific computation:: - - import airr - - # Load AIRR data containing repertoires - data = airr.read_airr('input.airr.json') - - # Put repertoires in dictionary keyed by repertoire_id - rep_dict = { obj['repertoire_id'] : obj for obj in data['Repertoire'] } - - # Create an iteratable for rearrangement data - reader = airr.read_rearrangement('input.tsv') - for row in reader: - # get repertoire metadata with this rearrangement - rep = rep_dict[row['repertoire_id']] - - # check the gender - if rep['subject']['sex'] == 'male': - # do male specific computation - elif rep['subject']['sex'] == 'female': - # do female specific computation - else: - # do other specific computation +Writing AIRR Rearrangement TSV files (nodejs) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +To be implemented. These write functions will been implemented in a patch release. From de06af2dbd4fb9cfd2cd38ef991b276f10f20758 Mon Sep 17 00:00:00 2001 From: jday1 <45389553+jday1@users.noreply.github.com> Date: Mon, 17 Apr 2023 20:18:50 +0100 Subject: [PATCH 08/59] Created default spec to reduce load time (#683) --- lang/python/airr/schema.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lang/python/airr/schema.py b/lang/python/airr/schema.py index f932f4151..28967d33c 100644 --- a/lang/python/airr/schema.py +++ b/lang/python/airr/schema.py @@ -9,6 +9,9 @@ from collections import OrderedDict from pkg_resources import resource_stream +with resource_stream(__name__, 'specs/airr-schema.yaml') as f: + DEFAULT_SPEC = yaml.load(f, Loader=yamlordereddictloader.Loader) + class ValidationError(Exception): """ @@ -58,8 +61,7 @@ def __init__(self, definition): self.definition = definition spec = {'Info': []} else: - with resource_stream(__name__, 'specs/airr-schema.yaml') as f: - spec = yaml.load(f, Loader=yamlordereddictloader.Loader) + spec = DEFAULT_SPEC try: self.definition = spec[definition] From e544ef262cd6b1b2acffbf4270cbc1c2d657720b Mon Sep 17 00:00:00 2001 From: William Lees Date: Mon, 15 May 2023 19:18:37 +0100 Subject: [PATCH 09/59] Update nullable fields, add additional fields to AlleleDescription (#680) * Update nullable fields, add CDR fields to AlleleDescription * fix missing x-airr * Fix miairr tags in germline objects (#663). De-nest genotype (#667) * Add fwr3_end * Fix object name * Update v-sequence delineation fields and dropped aligned sequence from AlleleDescription. Updated germline test data for germline. AlleleDescription. Updated germline test data for. * Fix combined test data * Further updates to germline objects --- lang/R/inst/extdata/airr-schema.yaml | 377 +++++----- .../R/tests/data-tests/good_germline_set.json | 54 +- lang/python/airr/specs/airr-schema.yaml | 377 +++++----- .../python/tests/data/good_combined_airr.json | 52 +- .../python/tests/data/good_combined_airr.yaml | 671 +++++++++--------- lang/python/tests/data/good_germline_set.json | 50 +- specs/airr-schema-openapi3.yaml | 518 ++++++++++---- specs/airr-schema.yaml | 377 +++++----- 8 files changed, 1439 insertions(+), 1037 deletions(-) diff --git a/lang/R/inst/extdata/airr-schema.yaml b/lang/R/inst/extdata/airr-schema.yaml index 788d8155d..449734012 100644 --- a/lang/R/inst/extdata/airr-schema.yaml +++ b/lang/R/inst/extdata/airr-schema.yaml @@ -255,7 +255,7 @@ Attributes: - essential - important - defined - default: useful + default: defined identifier: type: boolean description: > @@ -490,7 +490,7 @@ Acknowledgement: type: string description: unique identifier of this Acknowledgement within the file x-airr: - nullable: false + miairr: important name: type: string description: Full name of individual @@ -525,12 +525,12 @@ RearrangedSequence: type: string description: Unique identifier of this RearrangedSequence within the file, typically generated by the repository hosting the schema, for example from the underlying ID of the database record x-airr: - nullable: false + miairr: important sequence: type: string description: nucleotide sequence x-airr: - nullable: false + miairr: essential derivation: type: string enum: @@ -538,7 +538,7 @@ RearrangedSequence: - RNA description: The class of nucleic acid that was used as primary starting material x-airr: - nullable: false + miairr: important observation_type: type: string description: The type of observation from which this sequence was drawn, e.g. direct sequencing, inference from repertoire @@ -546,7 +546,7 @@ RearrangedSequence: - direct sequencing - inference from repertoire x-airr: - nullable: false + miairr: essential curation: type: string description: Curational notes on the sequence @@ -554,27 +554,27 @@ RearrangedSequence: type: string description: Name of the repository in which the sequence has been deposited x-airr: - nullable: false + miairr: defined repository_ref: type: string description: Queryable id or accession number of the sequence published by the repository x-airr: - nullable: false + miairr: defined deposited_version: type: string description: Version number of the sequence within the repository x-airr: - nullable: false + miairr: defined sequence_start: type: integer description: Start co-ordinate of the sequence detailed in this record, within the sequence deposited x-airr: - nullable: false + miairr: essential sequence_end: type: integer description: End co-ordinate of the sequence detailed in this record, within the sequence deposited x-airr: - nullable: false + miairr: essential UnrearrangedSequence: discriminator: AIRR @@ -594,12 +594,12 @@ UnrearrangedSequence: type: string description: unique identifier of this UnrearrangedSequence within the file x-airr: - nullable: false + miairr: important sequence: type: string description: Sequence of interest described in this record (typically this will include gene and promoter region) x-airr: - nullable: false + miairr: essential curation: type: string description: Curational notes on the sequence @@ -607,12 +607,12 @@ UnrearrangedSequence: type: string description: Name of the repository in which the assembly or contig is deposited x-airr: - nullable: false + miairr: defined repository_ref: type: string description: Queryable id or accession number of the sequence published by the repository x-airr: - nullable: false + miairr: defined patch_no: type: string description: Genome assembly patch number in which this gene was determined @@ -656,73 +656,81 @@ SequenceDelineationV: type: string description: Unique identifier of this SequenceDelineationV within the file, typically generated by the repository hosting the schema, for example from the underlying ID of the database record x-airr: - nullable: false + miairr: important delineation_scheme: type: string description: Name of the delineation scheme example: Chothia x-airr: - nullable: false + miairr: important + unaligned_sequence: + type: string + x-airr: + miairr: important + description: entire V-sequence covered by this delineation + aligned_sequence: + type: string + description: aligned sequence, if this delineation provides an alignment (an aligned sequence should always be provided for IMGT delineations) fwr1_start: type: integer - description: FWR1 start co-ordinate in Gene Description 'alignment' field + description: FWR1 start co-ordinate in the 'unaligned sequence' field x-airr: - nullable: false + miairr: important fwr1_end: type: integer - description: FWR1 end co-ordinate in Gene Description 'alignment' field + description: FWR1 end co-ordinate in the 'unaligned sequence' field x-airr: - nullable: false + miairr: important cdr1_start: type: integer - description: CDR1 start co-ordinate in Gene Description 'alignment' field + description: CDR1 start co-ordinate in the 'unaligned sequence' field x-airr: - nullable: false + miairr: important cdr1_end: type: integer - description: CDR1 end co-ordinate in Gene Description 'alignment' field + description: CDR1 end co-ordinate in the 'unaligned sequence' field x-airr: - nullable: false + miairr: important fwr2_start: type: integer - description: FWR2 start co-ordinate in Gene Description 'alignment' field + description: FWR2 start co-ordinate in the 'unaligned sequence' field x-airr: - nullable: false + miairr: important fwr2_end: type: integer - description: FWR2 end co-ordinate in Gene Description 'alignment' field + description: FWR2 end co-ordinate in the 'unaligned sequence' field x-airr: - nullable: false + miairr: important cdr2_start: type: integer - description: CDR2 start co-ordinate in Gene Description 'alignment' field + description: CDR2 start co-ordinate in the 'unaligned sequence' field x-airr: - nullable: false + miairr: important cdr2_end: type: integer - description: CDR2 end co-ordinate in Gene Description 'alignment' field + description: CDR2 end co-ordinate in the 'unaligned sequence' field x-airr: - nullable: false + miairr: important fwr3_start: type: integer - description: FWR3 start co-ordinate in Gene Description 'alignment' field + description: FWR3 start co-ordinate in the 'unaligned sequence' field x-airr: - nullable: false + miairr: important fwr3_end: type: integer - description: FWR3 end co-ordinate in Gene Description 'alignment' field + description: FWR3 end co-ordinate in the 'unaligned sequence' field x-airr: - nullable: false + miairr: important cdr3_start: type: integer - description: CDR3 start co-ordinate in Gene Description 'alignment' field + description: CDR3 start co-ordinate in the 'unaligned sequence' field x-airr: - nullable: false - alignment: + miairr: important + alignment_labels: type: array items: type: string - description: one string for each codon in the fields v_start to cdr3_start indicating the label of that codon according to the numbering of the delineation scheme + description: one string for each codon in the aligned_sequence indicating the label of that codon according to the numbering of the delineation scheme, if it provides one # Description of a putative or confirmed Ig receptor gene/allele AlleleDescription: @@ -748,18 +756,18 @@ AlleleDescription: type: string description: Unique identifier of this AlleleDescription within the file, typically generated by the repository hosting the schema, for example from the underlying ID of the database record x-airr: - nullable: false + miairr: important allele_description_ref: type: string description: Unique reference to the allele description, in standardized form (Repo:Label:Version) example: OGRDB:Human_IGH:IGHV1-69*01.001 x-airr: - nullable: false + miairr: important maintainer: type: string description: Maintainer of this sequence record x-airr: - nullable: false + miairr: defined acknowledgements: type: array description: List of individuals whose contribution to the gene description should be acknowledged @@ -769,12 +777,12 @@ AlleleDescription: type: string description: Institution and full address of corresponding author x-airr: - nullable: false + miairr: defined release_version: type: integer description: Version number of this record, updated whenever a revised version is published or released x-airr: - nullable: false + miairr: important release_date: type: string format: date-time @@ -782,26 +790,28 @@ AlleleDescription: title: Release Date example: "2021-02-02" x-airr: - nullable: false + miairr: important release_description: type: string description: Brief descriptive notes of the reason for this release and the changes embodied x-airr: - nullable: false + miairr: important label: type: string description: The accepted name for this gene or allele example: IGHV1-69*01 + x-airr: + miairr: important sequence: type: string description: nt sequence of the gene. This should cover the full length that is available, including where possible RSS, and 5' UTR and lead-in for V-gene sequences x-airr: - nullable: false + miairr: essential coding_sequence: type: string - description: nucleotide sequence of the core region of the gene (V-, D-, J- or C-REGION), aligned, in the case of the V-REGION, with the IMGT numbering scheme + description: nucleotide sequence of the core coding region, i.e. the coding region of a D-, J- or C- gene, or the coding region of a V-gene excluding the leader x-airr: - nullable: false + miairr: important aliases: type: array items: @@ -819,7 +829,7 @@ AlleleDescription: - TRD description: Gene locus x-airr: - nullable: false + miairr: essential chromosome: type: integer description: chromosome on which the gene is located @@ -832,12 +842,12 @@ AlleleDescription: - C description: Sequence type (V, D, J, C) x-airr: - nullable: false + miairr: essential functional: type: boolean description: True if the gene is functional, false if it is a pseudogene x-airr: - nullable: false + miairr: important inference_type: type: string enum: @@ -846,7 +856,7 @@ AlleleDescription: - Rearranged only description: Type of inference(s) from which this gene sequence was inferred x-airr: - nullable: false + miairr: important species: $ref: '#/Ontology' description: Binomial designation of subject's species @@ -855,7 +865,7 @@ AlleleDescription: id: NCBITAXON:9606 label: Homo sapiens x-airr: - nullable: false + miairr: essential species_subgroup: type: string description: Race, strain or other species subgroup to which this subject belongs @@ -885,6 +895,12 @@ AlleleDescription: allele_designation: type: string description: Allele number or other identifier, as (and if) defined + allele_similarity_cluster_designation: + type: string + description: ID of the similarity cluster used in this germline set, if designated + allele_similarity_cluster_member_id: + type: string + description: Membership ID of the allele within the similarity cluster, if a cluster is designated j_codon_frame: type: integer enum: @@ -895,9 +911,13 @@ AlleleDescription: gene_start: type: integer description: Co-ordinate (in the sequence field) of the first nucleotide in the coding_sequence field + x-airr: + miairr: important gene_end: type: integer description: Co-ordinate (in the sequence field) of the last gene-coding nucleotide in the coding_sequence field + x-airr: + miairr: important utr_5_prime_start: type: integer description: Start co-ordinate (in the sequence field) of 5 prime UTR (V-genes only) @@ -998,22 +1018,22 @@ GermlineSet: type: string description: Unique identifier of the GermlineSet within this file, typically generated by the repository hosting the schema, for example from the underlying ID of the database record x-airr: - nullable: false + miairr: important author: type: string description: Corresponding author x-airr: - nullable: false + miairr: important lab_name: type: string description: Department of corresponding author x-airr: - nullable: false + miairr: important lab_address: type: string description: Institutional address of corresponding author x-airr: - nullable: false + miairr: important acknowledgements: type: array description: List of individuals whose contribution to the germline set should be acknowledged @@ -1023,12 +1043,12 @@ GermlineSet: type: number description: Version number of this record, allocated automatically x-airr: - nullable: false + miairr: important release_description: type: string description: Brief descriptive notes of the reason for this release and the changes embodied x-airr: - nullable: false + miairr: important release_date: type: string format: date-time @@ -1036,18 +1056,18 @@ GermlineSet: title: Release Date example: "2021-02-02" x-airr: - nullable: false + miairr: important germline_set_name: type: string description: descriptive name of this germline set x-airr: - nullable: false + miairr: important germline_set_ref: type: string description: Unique identifier of the germline set and version, in standardized form (Repo:Label:Version) example: OGRDB:Human_IGH:2021.11 x-airr: - nullable: false + miairr: important pub_ids: type: string description: Publications describing the germline set @@ -1060,7 +1080,7 @@ GermlineSet: id: NCBITAXON:9606 label: Homo sapiens x-airr: - nullable: false + miairr: essential species_subgroup: type: string description: Race, strain or other species subgroup to which this subject belongs @@ -1085,14 +1105,14 @@ GermlineSet: - TRD description: Gene locus x-airr: - nullable: false + miairr: essential allele_descriptions: type: array items: $ref: '#/AlleleDescription' description: list of allele_descriptions in the germline set x-airr: - nullable: false + miairr: important curation: type: string description: Curational notes on the GermlineSet. This can be used to give more extensive notes on the decisions taken than are provided in the release_description. @@ -1113,7 +1133,7 @@ GenotypeSet: type: string description: A unique identifier for this Receptor Genotype Set, typically generated by the repository hosting the schema, for example from the underlying ID of the database record x-airr: - nullable: false + miairr: important genotype_class_list: description: List of Genotypes included in this Receptor Genotype Set. type: array @@ -1135,7 +1155,7 @@ Genotype: type: string description: A unique identifier within the file for this Receptor Genotype, typically generated by the repository hosting the schema, for example from the underlying ID of the database record x-airr: - nullable: false + miairr: important locus: type: string enum: @@ -1148,75 +1168,31 @@ Genotype: - TRG example: IGH x-airr: - nullable: false + miairr: essential adc-query-support: true format: controlled vocabulary documented_alleles: type: array - description: Array of alleles inferred to be present which are documented in GermlineSets + description: List of alleles documented in reference set(s) items: - type: object - properties: - label: - type: string - description: The accepted name for this allele, taken from the GermlineSet - x-airr: - nullable: false - germline_set_ref: - type: string - description: GermlineSet from which it was taken, referenced in standardized form (Repo:Label:Version) - example: OGRDB:Human_IGH:2021.11 - x-airr: - nullable: false - phasing: - type: integer - description: Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the same chromosome + $ref: '#/DocumentedAllele' x-airr: - nullable: true - adc-query-support: true + miairr: important undocumented_alleles: type: array - description: Array of alleles inferred to be present and not documented in an identified GermlineSet + nullable: true + description: List of alleles inferred to be present and not documented in an identified GermlineSet items: - type: object - properties: - allele_name: - type: string - description: Allele name as allocated by the inference pipeline - x-airr: - nullable: false - sequence: - type: string - description: nt sequence of the allele, as provided by the inference pipeline - x-airr: - nullable: false - phasing: - type: integer - description: Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the same chromosome + $ref: '#/UndocumentedAllele' x-airr: - nullable: true adc-query-support: true deleted_genes: type: array + nullable: true description: Array of genes identified as being deleted in this genotype items: - type: object - properties: - label: - type: string - description: The accepted name for this gene, taken from the GermlineSet - x-airr: - nullable: false - germline_set_ref: - type: string - description: GermlineSet from which it was taken (issuer/name/version) - x-airr: - nullable: false - phasing: - type: integer - description: Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the same chromosome + $ref: '#/DeletedGene' x-airr: - nullable: true adc-query-support: true inference_process: type: string @@ -1231,6 +1207,84 @@ Genotype: adc-query-support: true format: controlled vocabulary +# Documented Allele +# This describes a 'known' allele found in a genotype +# It 'known' in the sense that it is documented in a reference set + +DocumentedAllele: + discriminator: AIRR + required: + - label + - germline_set_ref + properties: + label: + type: string + x-airr: + miairr: important + description: The accepted name for this allele, taken from the GermlineSet + germline_set_ref: + type: string + x-airr: + miairr: important + description: GermlineSet from which it was taken, referenced in standardized form (Repo:Label:Version) + example: OGRDB:Human_IGH:2021.11 + phasing: + type: integer + nullable: true + description: Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the same chromosome + +# Undocumented Allele +# This describes a 'undocumented' allele found in a genotype +# It is 'undocumented' in the sense that it was not found in reference sets consulted for the analysis + +UndocumentedAllele: + discriminator: AIRR + required: + - allele_name + - sequence + type: object + properties: + allele_name: + type: string + x-airr: + miairr: important + description: Allele name as allocated by the inference pipeline + sequence: + type: string + x-airr: + miairr: essential + description: nt sequence of the allele, as provided by the inference pipeline + phasing: + type: integer + nullable: true + description: Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the same chromosome + +# Deleted Gene +# It is regarded as 'deleted' in the sense that it was not identified during inference of the genotype + +DeletedGene: + discriminator: AIRR + required: + - label + - germline_set_ref + type: object + properties: + label: + type: string + x-airr: + miairr: essential + description: The accepted name for this gene, taken from the GermlineSet + germline_set_ref: + type: string + x-airr: + miairr: important + description: GermlineSet from which it was taken (issuer/name/version) + phasing: + type: integer + nullable: true + description: Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the same chromosome + + # List of MHCGenotypes describing a subject's genotype MHCGenotypeSet: discriminator: AIRR @@ -1243,14 +1297,14 @@ MHCGenotypeSet: type: string description: A unique identifier for this MHCGenotypeSet x-airr: - nullable: false + miairr: important mhc_genotype_list: description: List of MHCGenotypes included in this set type: array items: $ref: '#/MHCGenotype' x-airr: - nullable: false + miairr: important # Genotype of major histocompatibility complex (MHC) class I, class II and non-classical loci MHCGenotype: @@ -1265,7 +1319,7 @@ MHCGenotype: type: string description: A unique identifier for this MHCGenotype, assumed to be unique in the context of the study x-airr: - nullable: false + miairr: important mhc_class: type: string description: Class of MHC alleles described by the MHCGenotype @@ -1275,45 +1329,16 @@ MHCGenotype: - MHC-nonclassical example: MHC-I x-airr: - nullable: false + miairr: essential adc-query-support: true format: controlled vocabulary mhc_alleles: type: array description: List of MHC alleles of the indicated mhc_class identified in an individual items: - type: object - properties: - allele_designation: - type: string - description: > - The accepted designation of an allele, usually its gene symbol plus allele/sub-allele/etc - identifiers, if provided by the mhc_typing method - x-airr: - nullable: false - gene: - $ref: '#/Ontology' - description: The MHC gene to which the described allele belongs - title: MHC gene - example: - id: MRO:0000046 - label: HLA-A - x-airr: - nullable: true - adc-query-support: false - format: ontology - ontology: - draft: true - top_node: - id: MRO:0000004 - label: MHC gene - reference_set_ref: - type: string - description: Repository and list from which it was taken (issuer/name/version) - x-airr: - nullable: false + $ref: '#/MHCAllele' x-airr: - nullable: false + miairr: important adc-query-support: true mhc_genotyping_method: type: string @@ -1323,9 +1348,43 @@ MHCGenotype: title: MHC genotyping method example: pcr_low_resolution x-airr: - nullable: true + miairr: important adc-query-support: true + +# Allele of an MHC gene +MHCAllele: + type: object + properties: + allele_designation: + type: string + description: > + The accepted designation of an allele, usually its gene symbol plus allele/sub-allele/etc + identifiers, if provided by the mhc_typing method + x-airr: + miairr: important + gene: + $ref: '#/Ontology' + description: The MHC gene to which the described allele belongs + title: MHC gene + example: + id: MRO:0000046 + label: HLA-A + x-airr: + miairr: important + adc-query-support: false + format: ontology + ontology: + draft: true + top_node: + id: MRO:0000004 + label: MHC gene + reference_set_ref: + type: string + description: Repository and list from which it was taken (issuer/name/version) + x-airr: + miairr: important + # # Repertoire metadata schema # diff --git a/lang/R/tests/data-tests/good_germline_set.json b/lang/R/tests/data-tests/good_germline_set.json index 83f7b8d0d..38000896f 100644 --- a/lang/R/tests/data-tests/good_germline_set.json +++ b/lang/R/tests/data-tests/good_germline_set.json @@ -26,8 +26,8 @@ "release_date": "24-Nov-2021", "release_description": "First release", "label": "IGHV-2DBF", - "sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGATGGTAGTGGCACCTACTATCTGGACTCCTTGAAGAGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", - "coding_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGA...GGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTC............AGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", + "sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", + "coding_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", "aliases": [ "watson_et_al:CAST_EiJ_IGHV5-3" ], @@ -35,7 +35,7 @@ "chromosome": null, "sequence_type": "V", "functional": true, - "inference_type": "Rearranged", + "inference_type": "Rearranged only", "species": { "id": "NCBITAXON:10090", "label": "Mus musculus" }, "species_subgroup": "CAST_EiJ", "species_subgroup_type": "strain", @@ -57,17 +57,19 @@ { "sequence_delineation_id": "1", "delineation_scheme": "IMGT", + "aligned_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGA...GGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTC............AGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", + "unaligned_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", "fwr1_start": 1, - "fwr1_end": 78, - "cdr1_start": 79, - "cdr1_end": 114, - "fwr2_start": 115, - "fwr2_end": 165, - "cdr2_start": 166, - "cdr2_end": 195, - "fwr3_start": 196, - "fwr3_end": 312, - "cdr3_start": 313, + "fwr1_end": 75, + "cdr1_start": 76, + "cdr1_end": 110, + "fwr2_start": 111, + "fwr2_end": 150, + "cdr2_start": 151, + "cdr2_end": 160, + "fwr3_start": 161, + "fwr3_end": 294, + "cdr3_start": 295, "alignment": [ "1", "2", @@ -193,7 +195,7 @@ "release_description": "First release", "label": "IGHV-2ETO", "sequence": "CAAGTTACTCTAAAAGAGTCTGGCCCTGGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGATGATGATAAGTACTATAACCCATCCCTGAAGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC", - "coding_sequence": "CAAGTTACTCTAAAAGAGTCTGGCCCT...GGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGC......ACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGAT.........GATGATAAGTACTATAACCCATCCCTGAAG...AGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC", + "coding_sequence": "CAAGTTACTCTAAAAGAGTCTGGCCCTGGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGATGATGATAAGTACTATAACCCATCCCTGAAGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC", "aliases": [ "watson_et_al:CAST_EiJ_IGHV8-2" ], @@ -201,7 +203,7 @@ "chromosome": null, "sequence_type": "V", "functional": true, - "inference_type": "Rearranged", + "inference_type": "Rearranged only", "species": { "id": "NCBITAXON:10090", "label": "Mus musculus" }, "species_subgroup": "CAST_EiJ", "species_subgroup_type": "strain", @@ -223,17 +225,19 @@ { "sequence_delineation_id": "1", "delineation_scheme": "IMGT", + "aligned_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGA...GGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTC............AGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", + "unaligned_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", "fwr1_start": 1, - "fwr1_end": 78, - "cdr1_start": 79, - "cdr1_end": 114, - "fwr2_start": 115, - "fwr2_end": 165, - "cdr2_start": 166, - "cdr2_end": 195, - "fwr3_start": 196, - "fwr3_end": 312, - "cdr3_start": 313, + "fwr1_end": 75, + "cdr1_start": 76, + "cdr1_end": 110, + "fwr2_start": 111, + "fwr2_end": 150, + "cdr2_start": 151, + "cdr2_end": 160, + "fwr3_start": 161, + "fwr3_end": 294, + "cdr3_start": 295, "alignment": [ "1", "2", diff --git a/lang/python/airr/specs/airr-schema.yaml b/lang/python/airr/specs/airr-schema.yaml index 788d8155d..449734012 100644 --- a/lang/python/airr/specs/airr-schema.yaml +++ b/lang/python/airr/specs/airr-schema.yaml @@ -255,7 +255,7 @@ Attributes: - essential - important - defined - default: useful + default: defined identifier: type: boolean description: > @@ -490,7 +490,7 @@ Acknowledgement: type: string description: unique identifier of this Acknowledgement within the file x-airr: - nullable: false + miairr: important name: type: string description: Full name of individual @@ -525,12 +525,12 @@ RearrangedSequence: type: string description: Unique identifier of this RearrangedSequence within the file, typically generated by the repository hosting the schema, for example from the underlying ID of the database record x-airr: - nullable: false + miairr: important sequence: type: string description: nucleotide sequence x-airr: - nullable: false + miairr: essential derivation: type: string enum: @@ -538,7 +538,7 @@ RearrangedSequence: - RNA description: The class of nucleic acid that was used as primary starting material x-airr: - nullable: false + miairr: important observation_type: type: string description: The type of observation from which this sequence was drawn, e.g. direct sequencing, inference from repertoire @@ -546,7 +546,7 @@ RearrangedSequence: - direct sequencing - inference from repertoire x-airr: - nullable: false + miairr: essential curation: type: string description: Curational notes on the sequence @@ -554,27 +554,27 @@ RearrangedSequence: type: string description: Name of the repository in which the sequence has been deposited x-airr: - nullable: false + miairr: defined repository_ref: type: string description: Queryable id or accession number of the sequence published by the repository x-airr: - nullable: false + miairr: defined deposited_version: type: string description: Version number of the sequence within the repository x-airr: - nullable: false + miairr: defined sequence_start: type: integer description: Start co-ordinate of the sequence detailed in this record, within the sequence deposited x-airr: - nullable: false + miairr: essential sequence_end: type: integer description: End co-ordinate of the sequence detailed in this record, within the sequence deposited x-airr: - nullable: false + miairr: essential UnrearrangedSequence: discriminator: AIRR @@ -594,12 +594,12 @@ UnrearrangedSequence: type: string description: unique identifier of this UnrearrangedSequence within the file x-airr: - nullable: false + miairr: important sequence: type: string description: Sequence of interest described in this record (typically this will include gene and promoter region) x-airr: - nullable: false + miairr: essential curation: type: string description: Curational notes on the sequence @@ -607,12 +607,12 @@ UnrearrangedSequence: type: string description: Name of the repository in which the assembly or contig is deposited x-airr: - nullable: false + miairr: defined repository_ref: type: string description: Queryable id or accession number of the sequence published by the repository x-airr: - nullable: false + miairr: defined patch_no: type: string description: Genome assembly patch number in which this gene was determined @@ -656,73 +656,81 @@ SequenceDelineationV: type: string description: Unique identifier of this SequenceDelineationV within the file, typically generated by the repository hosting the schema, for example from the underlying ID of the database record x-airr: - nullable: false + miairr: important delineation_scheme: type: string description: Name of the delineation scheme example: Chothia x-airr: - nullable: false + miairr: important + unaligned_sequence: + type: string + x-airr: + miairr: important + description: entire V-sequence covered by this delineation + aligned_sequence: + type: string + description: aligned sequence, if this delineation provides an alignment (an aligned sequence should always be provided for IMGT delineations) fwr1_start: type: integer - description: FWR1 start co-ordinate in Gene Description 'alignment' field + description: FWR1 start co-ordinate in the 'unaligned sequence' field x-airr: - nullable: false + miairr: important fwr1_end: type: integer - description: FWR1 end co-ordinate in Gene Description 'alignment' field + description: FWR1 end co-ordinate in the 'unaligned sequence' field x-airr: - nullable: false + miairr: important cdr1_start: type: integer - description: CDR1 start co-ordinate in Gene Description 'alignment' field + description: CDR1 start co-ordinate in the 'unaligned sequence' field x-airr: - nullable: false + miairr: important cdr1_end: type: integer - description: CDR1 end co-ordinate in Gene Description 'alignment' field + description: CDR1 end co-ordinate in the 'unaligned sequence' field x-airr: - nullable: false + miairr: important fwr2_start: type: integer - description: FWR2 start co-ordinate in Gene Description 'alignment' field + description: FWR2 start co-ordinate in the 'unaligned sequence' field x-airr: - nullable: false + miairr: important fwr2_end: type: integer - description: FWR2 end co-ordinate in Gene Description 'alignment' field + description: FWR2 end co-ordinate in the 'unaligned sequence' field x-airr: - nullable: false + miairr: important cdr2_start: type: integer - description: CDR2 start co-ordinate in Gene Description 'alignment' field + description: CDR2 start co-ordinate in the 'unaligned sequence' field x-airr: - nullable: false + miairr: important cdr2_end: type: integer - description: CDR2 end co-ordinate in Gene Description 'alignment' field + description: CDR2 end co-ordinate in the 'unaligned sequence' field x-airr: - nullable: false + miairr: important fwr3_start: type: integer - description: FWR3 start co-ordinate in Gene Description 'alignment' field + description: FWR3 start co-ordinate in the 'unaligned sequence' field x-airr: - nullable: false + miairr: important fwr3_end: type: integer - description: FWR3 end co-ordinate in Gene Description 'alignment' field + description: FWR3 end co-ordinate in the 'unaligned sequence' field x-airr: - nullable: false + miairr: important cdr3_start: type: integer - description: CDR3 start co-ordinate in Gene Description 'alignment' field + description: CDR3 start co-ordinate in the 'unaligned sequence' field x-airr: - nullable: false - alignment: + miairr: important + alignment_labels: type: array items: type: string - description: one string for each codon in the fields v_start to cdr3_start indicating the label of that codon according to the numbering of the delineation scheme + description: one string for each codon in the aligned_sequence indicating the label of that codon according to the numbering of the delineation scheme, if it provides one # Description of a putative or confirmed Ig receptor gene/allele AlleleDescription: @@ -748,18 +756,18 @@ AlleleDescription: type: string description: Unique identifier of this AlleleDescription within the file, typically generated by the repository hosting the schema, for example from the underlying ID of the database record x-airr: - nullable: false + miairr: important allele_description_ref: type: string description: Unique reference to the allele description, in standardized form (Repo:Label:Version) example: OGRDB:Human_IGH:IGHV1-69*01.001 x-airr: - nullable: false + miairr: important maintainer: type: string description: Maintainer of this sequence record x-airr: - nullable: false + miairr: defined acknowledgements: type: array description: List of individuals whose contribution to the gene description should be acknowledged @@ -769,12 +777,12 @@ AlleleDescription: type: string description: Institution and full address of corresponding author x-airr: - nullable: false + miairr: defined release_version: type: integer description: Version number of this record, updated whenever a revised version is published or released x-airr: - nullable: false + miairr: important release_date: type: string format: date-time @@ -782,26 +790,28 @@ AlleleDescription: title: Release Date example: "2021-02-02" x-airr: - nullable: false + miairr: important release_description: type: string description: Brief descriptive notes of the reason for this release and the changes embodied x-airr: - nullable: false + miairr: important label: type: string description: The accepted name for this gene or allele example: IGHV1-69*01 + x-airr: + miairr: important sequence: type: string description: nt sequence of the gene. This should cover the full length that is available, including where possible RSS, and 5' UTR and lead-in for V-gene sequences x-airr: - nullable: false + miairr: essential coding_sequence: type: string - description: nucleotide sequence of the core region of the gene (V-, D-, J- or C-REGION), aligned, in the case of the V-REGION, with the IMGT numbering scheme + description: nucleotide sequence of the core coding region, i.e. the coding region of a D-, J- or C- gene, or the coding region of a V-gene excluding the leader x-airr: - nullable: false + miairr: important aliases: type: array items: @@ -819,7 +829,7 @@ AlleleDescription: - TRD description: Gene locus x-airr: - nullable: false + miairr: essential chromosome: type: integer description: chromosome on which the gene is located @@ -832,12 +842,12 @@ AlleleDescription: - C description: Sequence type (V, D, J, C) x-airr: - nullable: false + miairr: essential functional: type: boolean description: True if the gene is functional, false if it is a pseudogene x-airr: - nullable: false + miairr: important inference_type: type: string enum: @@ -846,7 +856,7 @@ AlleleDescription: - Rearranged only description: Type of inference(s) from which this gene sequence was inferred x-airr: - nullable: false + miairr: important species: $ref: '#/Ontology' description: Binomial designation of subject's species @@ -855,7 +865,7 @@ AlleleDescription: id: NCBITAXON:9606 label: Homo sapiens x-airr: - nullable: false + miairr: essential species_subgroup: type: string description: Race, strain or other species subgroup to which this subject belongs @@ -885,6 +895,12 @@ AlleleDescription: allele_designation: type: string description: Allele number or other identifier, as (and if) defined + allele_similarity_cluster_designation: + type: string + description: ID of the similarity cluster used in this germline set, if designated + allele_similarity_cluster_member_id: + type: string + description: Membership ID of the allele within the similarity cluster, if a cluster is designated j_codon_frame: type: integer enum: @@ -895,9 +911,13 @@ AlleleDescription: gene_start: type: integer description: Co-ordinate (in the sequence field) of the first nucleotide in the coding_sequence field + x-airr: + miairr: important gene_end: type: integer description: Co-ordinate (in the sequence field) of the last gene-coding nucleotide in the coding_sequence field + x-airr: + miairr: important utr_5_prime_start: type: integer description: Start co-ordinate (in the sequence field) of 5 prime UTR (V-genes only) @@ -998,22 +1018,22 @@ GermlineSet: type: string description: Unique identifier of the GermlineSet within this file, typically generated by the repository hosting the schema, for example from the underlying ID of the database record x-airr: - nullable: false + miairr: important author: type: string description: Corresponding author x-airr: - nullable: false + miairr: important lab_name: type: string description: Department of corresponding author x-airr: - nullable: false + miairr: important lab_address: type: string description: Institutional address of corresponding author x-airr: - nullable: false + miairr: important acknowledgements: type: array description: List of individuals whose contribution to the germline set should be acknowledged @@ -1023,12 +1043,12 @@ GermlineSet: type: number description: Version number of this record, allocated automatically x-airr: - nullable: false + miairr: important release_description: type: string description: Brief descriptive notes of the reason for this release and the changes embodied x-airr: - nullable: false + miairr: important release_date: type: string format: date-time @@ -1036,18 +1056,18 @@ GermlineSet: title: Release Date example: "2021-02-02" x-airr: - nullable: false + miairr: important germline_set_name: type: string description: descriptive name of this germline set x-airr: - nullable: false + miairr: important germline_set_ref: type: string description: Unique identifier of the germline set and version, in standardized form (Repo:Label:Version) example: OGRDB:Human_IGH:2021.11 x-airr: - nullable: false + miairr: important pub_ids: type: string description: Publications describing the germline set @@ -1060,7 +1080,7 @@ GermlineSet: id: NCBITAXON:9606 label: Homo sapiens x-airr: - nullable: false + miairr: essential species_subgroup: type: string description: Race, strain or other species subgroup to which this subject belongs @@ -1085,14 +1105,14 @@ GermlineSet: - TRD description: Gene locus x-airr: - nullable: false + miairr: essential allele_descriptions: type: array items: $ref: '#/AlleleDescription' description: list of allele_descriptions in the germline set x-airr: - nullable: false + miairr: important curation: type: string description: Curational notes on the GermlineSet. This can be used to give more extensive notes on the decisions taken than are provided in the release_description. @@ -1113,7 +1133,7 @@ GenotypeSet: type: string description: A unique identifier for this Receptor Genotype Set, typically generated by the repository hosting the schema, for example from the underlying ID of the database record x-airr: - nullable: false + miairr: important genotype_class_list: description: List of Genotypes included in this Receptor Genotype Set. type: array @@ -1135,7 +1155,7 @@ Genotype: type: string description: A unique identifier within the file for this Receptor Genotype, typically generated by the repository hosting the schema, for example from the underlying ID of the database record x-airr: - nullable: false + miairr: important locus: type: string enum: @@ -1148,75 +1168,31 @@ Genotype: - TRG example: IGH x-airr: - nullable: false + miairr: essential adc-query-support: true format: controlled vocabulary documented_alleles: type: array - description: Array of alleles inferred to be present which are documented in GermlineSets + description: List of alleles documented in reference set(s) items: - type: object - properties: - label: - type: string - description: The accepted name for this allele, taken from the GermlineSet - x-airr: - nullable: false - germline_set_ref: - type: string - description: GermlineSet from which it was taken, referenced in standardized form (Repo:Label:Version) - example: OGRDB:Human_IGH:2021.11 - x-airr: - nullable: false - phasing: - type: integer - description: Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the same chromosome + $ref: '#/DocumentedAllele' x-airr: - nullable: true - adc-query-support: true + miairr: important undocumented_alleles: type: array - description: Array of alleles inferred to be present and not documented in an identified GermlineSet + nullable: true + description: List of alleles inferred to be present and not documented in an identified GermlineSet items: - type: object - properties: - allele_name: - type: string - description: Allele name as allocated by the inference pipeline - x-airr: - nullable: false - sequence: - type: string - description: nt sequence of the allele, as provided by the inference pipeline - x-airr: - nullable: false - phasing: - type: integer - description: Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the same chromosome + $ref: '#/UndocumentedAllele' x-airr: - nullable: true adc-query-support: true deleted_genes: type: array + nullable: true description: Array of genes identified as being deleted in this genotype items: - type: object - properties: - label: - type: string - description: The accepted name for this gene, taken from the GermlineSet - x-airr: - nullable: false - germline_set_ref: - type: string - description: GermlineSet from which it was taken (issuer/name/version) - x-airr: - nullable: false - phasing: - type: integer - description: Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the same chromosome + $ref: '#/DeletedGene' x-airr: - nullable: true adc-query-support: true inference_process: type: string @@ -1231,6 +1207,84 @@ Genotype: adc-query-support: true format: controlled vocabulary +# Documented Allele +# This describes a 'known' allele found in a genotype +# It 'known' in the sense that it is documented in a reference set + +DocumentedAllele: + discriminator: AIRR + required: + - label + - germline_set_ref + properties: + label: + type: string + x-airr: + miairr: important + description: The accepted name for this allele, taken from the GermlineSet + germline_set_ref: + type: string + x-airr: + miairr: important + description: GermlineSet from which it was taken, referenced in standardized form (Repo:Label:Version) + example: OGRDB:Human_IGH:2021.11 + phasing: + type: integer + nullable: true + description: Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the same chromosome + +# Undocumented Allele +# This describes a 'undocumented' allele found in a genotype +# It is 'undocumented' in the sense that it was not found in reference sets consulted for the analysis + +UndocumentedAllele: + discriminator: AIRR + required: + - allele_name + - sequence + type: object + properties: + allele_name: + type: string + x-airr: + miairr: important + description: Allele name as allocated by the inference pipeline + sequence: + type: string + x-airr: + miairr: essential + description: nt sequence of the allele, as provided by the inference pipeline + phasing: + type: integer + nullable: true + description: Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the same chromosome + +# Deleted Gene +# It is regarded as 'deleted' in the sense that it was not identified during inference of the genotype + +DeletedGene: + discriminator: AIRR + required: + - label + - germline_set_ref + type: object + properties: + label: + type: string + x-airr: + miairr: essential + description: The accepted name for this gene, taken from the GermlineSet + germline_set_ref: + type: string + x-airr: + miairr: important + description: GermlineSet from which it was taken (issuer/name/version) + phasing: + type: integer + nullable: true + description: Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the same chromosome + + # List of MHCGenotypes describing a subject's genotype MHCGenotypeSet: discriminator: AIRR @@ -1243,14 +1297,14 @@ MHCGenotypeSet: type: string description: A unique identifier for this MHCGenotypeSet x-airr: - nullable: false + miairr: important mhc_genotype_list: description: List of MHCGenotypes included in this set type: array items: $ref: '#/MHCGenotype' x-airr: - nullable: false + miairr: important # Genotype of major histocompatibility complex (MHC) class I, class II and non-classical loci MHCGenotype: @@ -1265,7 +1319,7 @@ MHCGenotype: type: string description: A unique identifier for this MHCGenotype, assumed to be unique in the context of the study x-airr: - nullable: false + miairr: important mhc_class: type: string description: Class of MHC alleles described by the MHCGenotype @@ -1275,45 +1329,16 @@ MHCGenotype: - MHC-nonclassical example: MHC-I x-airr: - nullable: false + miairr: essential adc-query-support: true format: controlled vocabulary mhc_alleles: type: array description: List of MHC alleles of the indicated mhc_class identified in an individual items: - type: object - properties: - allele_designation: - type: string - description: > - The accepted designation of an allele, usually its gene symbol plus allele/sub-allele/etc - identifiers, if provided by the mhc_typing method - x-airr: - nullable: false - gene: - $ref: '#/Ontology' - description: The MHC gene to which the described allele belongs - title: MHC gene - example: - id: MRO:0000046 - label: HLA-A - x-airr: - nullable: true - adc-query-support: false - format: ontology - ontology: - draft: true - top_node: - id: MRO:0000004 - label: MHC gene - reference_set_ref: - type: string - description: Repository and list from which it was taken (issuer/name/version) - x-airr: - nullable: false + $ref: '#/MHCAllele' x-airr: - nullable: false + miairr: important adc-query-support: true mhc_genotyping_method: type: string @@ -1323,9 +1348,43 @@ MHCGenotype: title: MHC genotyping method example: pcr_low_resolution x-airr: - nullable: true + miairr: important adc-query-support: true + +# Allele of an MHC gene +MHCAllele: + type: object + properties: + allele_designation: + type: string + description: > + The accepted designation of an allele, usually its gene symbol plus allele/sub-allele/etc + identifiers, if provided by the mhc_typing method + x-airr: + miairr: important + gene: + $ref: '#/Ontology' + description: The MHC gene to which the described allele belongs + title: MHC gene + example: + id: MRO:0000046 + label: HLA-A + x-airr: + miairr: important + adc-query-support: false + format: ontology + ontology: + draft: true + top_node: + id: MRO:0000004 + label: MHC gene + reference_set_ref: + type: string + description: Repository and list from which it was taken (issuer/name/version) + x-airr: + miairr: important + # # Repertoire metadata schema # diff --git a/lang/python/tests/data/good_combined_airr.json b/lang/python/tests/data/good_combined_airr.json index 00480023b..07b52ffe1 100644 --- a/lang/python/tests/data/good_combined_airr.json +++ b/lang/python/tests/data/good_combined_airr.json @@ -446,6 +446,7 @@ } ], + "GermlineSet": [{ "germline_set_id": "OGRDB:G00007", "author": "William Lees", @@ -473,8 +474,8 @@ "release_date": "24-Nov-2021", "release_description": "First release", "label": "IGHV-2DBF", - "sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGATGGTAGTGGCACCTACTATCTGGACTCCTTGAAGAGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", - "coding_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGA...GGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTC............AGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", + "sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", + "coding_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", "aliases": [ "watson_et_al:CAST_EiJ_IGHV5-3" ], @@ -504,17 +505,19 @@ { "sequence_delineation_id": "1", "delineation_scheme": "IMGT", + "aligned_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGA...GGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTC............AGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", + "unaligned_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", "fwr1_start": 1, - "fwr1_end": 78, - "cdr1_start": 79, - "cdr1_end": 114, - "fwr2_start": 115, - "fwr2_end": 165, - "cdr2_start": 166, - "cdr2_end": 195, - "fwr3_start": 196, - "fwr3_end": 312, - "cdr3_start": 313, + "fwr1_end": 75, + "cdr1_start": 76, + "cdr1_end": 110, + "fwr2_start": 111, + "fwr2_end": 150, + "cdr2_start": 151, + "cdr2_end": 160, + "fwr3_start": 161, + "fwr3_end": 294, + "cdr3_start": 295, "alignment": [ "1", "2", @@ -640,7 +643,7 @@ "release_description": "First release", "label": "IGHV-2ETO", "sequence": "CAAGTTACTCTAAAAGAGTCTGGCCCTGGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGATGATGATAAGTACTATAACCCATCCCTGAAGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC", - "coding_sequence": "CAAGTTACTCTAAAAGAGTCTGGCCCT...GGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGC......ACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGAT.........GATGATAAGTACTATAACCCATCCCTGAAG...AGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC", + "coding_sequence": "CAAGTTACTCTAAAAGAGTCTGGCCCTGGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGATGATGATAAGTACTATAACCCATCCCTGAAGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC", "aliases": [ "watson_et_al:CAST_EiJ_IGHV8-2" ], @@ -670,17 +673,19 @@ { "sequence_delineation_id": "1", "delineation_scheme": "IMGT", + "aligned_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGA...GGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTC............AGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", + "unaligned_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", "fwr1_start": 1, - "fwr1_end": 78, - "cdr1_start": 79, - "cdr1_end": 114, - "fwr2_start": 115, - "fwr2_end": 165, - "cdr2_start": 166, - "cdr2_end": 195, - "fwr3_start": 196, - "fwr3_end": 312, - "cdr3_start": 313, + "fwr1_end": 75, + "cdr1_start": 76, + "cdr1_end": 110, + "fwr2_start": 111, + "fwr2_end": 150, + "cdr2_start": 151, + "cdr2_end": 160, + "fwr3_start": 161, + "fwr3_end": 294, + "cdr3_start": 295, "alignment": [ "1", "2", @@ -799,6 +804,7 @@ "curation": null }], + "GenotypeSet": [{ "receptor_genotype_set_id": "1", "genotype_class_list": [ diff --git a/lang/python/tests/data/good_combined_airr.yaml b/lang/python/tests/data/good_combined_airr.yaml index 5479c0540..89a515177 100644 --- a/lang/python/tests/data/good_combined_airr.yaml +++ b/lang/python/tests/data/good_combined_airr.yaml @@ -406,347 +406,352 @@ Repertoire: analysis_provenance_id: 4625424004665971176-242ac11c-0001-012 GermlineSet: - - germline_set_id: OGRDB:G00007 - author: William Lees - lab_name: '' +- acknowledgements: [] + allele_descriptions: + - acknowledgements: [] + aliases: + - watson_et_al:CAST_EiJ_IGHV5-3 + allele_description_id: OGRDB:A00301 + allele_description_ref: OGRDB:Mouse_IGH:IGHV-2DBF + allele_designation: null + chromosome: null + coding_sequence: GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA + curation: 'Imported to OGRDB with the following notes: watson_et_al: CAST_EiJ_IGHV5-3' + curational_tags: null + functional: true + gene_designation: null + gene_end: null + gene_start: null + inference_type: Rearranged only lab_address: Birkbeck College, University of London, Malet Street, London - acknowledgements: [] + label: IGHV-2DBF + leader_1_end: null + leader_1_start: null + leader_2_end: null + leader_2_start: null + locus: IGH + maintainer: William Lees + paralogs: [] + rearranged_support: [] + release_date: 24-Nov-2021 + release_description: First release release_version: 1 - release_description: '' - release_date: '2021-11-24' - germline_set_name: CAST IGH - germline_set_ref: OGRDB:G00007.1 - pub_ids: '' + sequence: GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA + sequence_type: V species: id: NCBITAXON:10090 label: Mus musculus species_subgroup: CAST_EiJ species_subgroup_type: strain + status: active + subgroup_designation: null + unrearranged_support: [] + utr_5_prime_end: null + utr_5_prime_start: null + v_gene_delineations: + - aligned_sequence: GAAGTGAAGCTGGTGGAGTCTGAGGGA...GGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTC............AGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA + alignment: + - '1' + - '2' + - '3' + - '4' + - '5' + - '6' + - '7' + - '8' + - '9' + - '10' + - '11' + - '12' + - '13' + - '14' + - '15' + - '16' + - '17' + - '18' + - '19' + - '20' + - '21' + - '22' + - '23' + - '24' + - '25' + - '26' + - '27' + - '28' + - '29' + - '30' + - '31' + - '32' + - '33' + - '34' + - '35' + - '36' + - '37' + - '38' + - '39' + - '40' + - '41' + - '42' + - '43' + - '44' + - '45' + - '46' + - '47' + - '48' + - '49' + - '50' + - '51' + - '52' + - '53' + - '54' + - '55' + - '56' + - '57' + - '58' + - '59' + - '60' + - '61' + - '62' + - '63' + - '64' + - '65' + - '66' + - '67' + - '68' + - '69' + - '70' + - '71' + - '72' + - '73' + - '74' + - '75' + - '76' + - '77' + - '78' + - '79' + - '80' + - '81' + - '82' + - '83' + - '84' + - '85' + - '86' + - '87' + - '88' + - '89' + - '90' + - '91' + - '92' + - '93' + - '94' + - '95' + - '96' + - '97' + - '98' + - '99' + - '100' + - '101' + - '102' + - '103' + - '104' + cdr1_end: 110 + cdr1_start: 76 + cdr2_end: 160 + cdr2_start: 151 + cdr3_start: 295 + delineation_scheme: IMGT + fwr1_end: 75 + fwr1_start: 1 + fwr2_end: 150 + fwr2_start: 111 + fwr3_end: 294 + fwr3_start: 161 + sequence_delineation_id: '1' + unaligned_sequence: GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA + v_rs_end: null + v_rs_start: null + - acknowledgements: [] + aliases: + - watson_et_al:CAST_EiJ_IGHV8-2 + allele_description_id: OGRDB:A00314 + allele_description_ref: OGRDB:Mouse_IGH:IGHV-2ETO + allele_designation: null + chromosome: null + coding_sequence: CAAGTTACTCTAAAAGAGTCTGGCCCTGGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGATGATGATAAGTACTATAACCCATCCCTGAAGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC + curation: 'Imported to OGRDB with the following notes: watson_et_al: CAST_EiJ_IGHV8-2' + curational_tags: null + functional: true + gene_designation: null + gene_end: null + gene_start: null + inference_type: Rearranged only + lab_address: Birkbeck College, University of London, Malet Street, London + label: IGHV-2ETO + leader_1_end: null + leader_1_start: null + leader_2_end: null + leader_2_start: null locus: IGH - allele_descriptions: - - allele_description_id: OGRDB:A00301 - allele_description_ref: OGRDB:Mouse_IGH:IGHV-2DBF - maintainer: William Lees - acknowledgements: [] - lab_address: Birkbeck College, University of London, Malet Street, London - release_version: 1 - release_date: 24-Nov-2021 - release_description: First release - label: IGHV-2DBF - sequence: GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGATGGTAGTGGCACCTACTATCTGGACTCCTTGAAGAGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA - coding_sequence: GAAGTGAAGCTGGTGGAGTCTGAGGGA...GGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTC............AGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA - aliases: - - watson_et_al:CAST_EiJ_IGHV5-3 - locus: IGH - chromosome: - sequence_type: V - functional: true - inference_type: "Rearranged only" - species: - id: NCBITAXON:10090 - label: Mus musculus - species_subgroup: CAST_EiJ - species_subgroup_type: strain - status: active - gene_designation: - subgroup_designation: - allele_designation: - gene_start: - gene_end: - utr_5_prime_start: - utr_5_prime_end: - leader_1_start: - leader_1_end: - leader_2_start: - leader_2_end: - v_rs_start: - v_rs_end: - v_gene_delineations: - - sequence_delineation_id: '1' - delineation_scheme: IMGT - fwr1_start: 1 - fwr1_end: 78 - cdr1_start: 79 - cdr1_end: 114 - fwr2_start: 115 - fwr2_end: 165 - cdr2_start: 166 - cdr2_end: 195 - fwr3_start: 196 - fwr3_end: 312 - cdr3_start: 313 - alignment: - - '1' - - '2' - - '3' - - '4' - - '5' - - '6' - - '7' - - '8' - - '9' - - '10' - - '11' - - '12' - - '13' - - '14' - - '15' - - '16' - - '17' - - '18' - - '19' - - '20' - - '21' - - '22' - - '23' - - '24' - - '25' - - '26' - - '27' - - '28' - - '29' - - '30' - - '31' - - '32' - - '33' - - '34' - - '35' - - '36' - - '37' - - '38' - - '39' - - '40' - - '41' - - '42' - - '43' - - '44' - - '45' - - '46' - - '47' - - '48' - - '49' - - '50' - - '51' - - '52' - - '53' - - '54' - - '55' - - '56' - - '57' - - '58' - - '59' - - '60' - - '61' - - '62' - - '63' - - '64' - - '65' - - '66' - - '67' - - '68' - - '69' - - '70' - - '71' - - '72' - - '73' - - '74' - - '75' - - '76' - - '77' - - '78' - - '79' - - '80' - - '81' - - '82' - - '83' - - '84' - - '85' - - '86' - - '87' - - '88' - - '89' - - '90' - - '91' - - '92' - - '93' - - '94' - - '95' - - '96' - - '97' - - '98' - - '99' - - '100' - - '101' - - '102' - - '103' - - '104' - unrearranged_support: [] - rearranged_support: [] - paralogs: [] - curation: 'Imported to OGRDB with the following notes: watson_et_al: CAST_EiJ_IGHV5-3' - curational_tags: - - allele_description_id: OGRDB:A00314 - allele_description_ref: OGRDB:Mouse_IGH:IGHV-2ETO - maintainer: William Lees - acknowledgements: [] - lab_address: Birkbeck College, University of London, Malet Street, London - release_version: 1 - release_date: 24-Nov-2021 - release_description: First release - label: IGHV-2ETO - sequence: CAAGTTACTCTAAAAGAGTCTGGCCCTGGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGATGATGATAAGTACTATAACCCATCCCTGAAGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC - coding_sequence: CAAGTTACTCTAAAAGAGTCTGGCCCT...GGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGC......ACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGAT.........GATGATAAGTACTATAACCCATCCCTGAAG...AGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC - aliases: - - watson_et_al:CAST_EiJ_IGHV8-2 - locus: IGH - chromosome: - sequence_type: V - functional: true - inference_type: "Rearranged only" - species: - id: NCBITAXON:10090 - label: Mus musculus - species_subgroup: CAST_EiJ - species_subgroup_type: strain - status: active - gene_designation: - subgroup_designation: - allele_designation: - gene_start: - gene_end: - utr_5_prime_start: - utr_5_prime_end: - leader_1_start: - leader_1_end: - leader_2_start: - leader_2_end: - v_rs_start: - v_rs_end: - v_gene_delineations: - - sequence_delineation_id: '1' - delineation_scheme: IMGT - fwr1_start: 1 - fwr1_end: 78 - cdr1_start: 79 - cdr1_end: 114 - fwr2_start: 115 - fwr2_end: 165 - cdr2_start: 166 - cdr2_end: 195 - fwr3_start: 196 - fwr3_end: 312 - cdr3_start: 313 - alignment: - - '1' - - '2' - - '3' - - '4' - - '5' - - '6' - - '7' - - '8' - - '9' - - '10' - - '11' - - '12' - - '13' - - '14' - - '15' - - '16' - - '17' - - '18' - - '19' - - '20' - - '21' - - '22' - - '23' - - '24' - - '25' - - '26' - - '27' - - '28' - - '29' - - '30' - - '31' - - '32' - - '33' - - '34' - - '35' - - '36' - - '37' - - '38' - - '39' - - '40' - - '41' - - '42' - - '43' - - '44' - - '45' - - '46' - - '47' - - '48' - - '49' - - '50' - - '51' - - '52' - - '53' - - '54' - - '55' - - '56' - - '57' - - '58' - - '59' - - '60' - - '61' - - '62' - - '63' - - '64' - - '65' - - '66' - - '67' - - '68' - - '69' - - '70' - - '71' - - '72' - - '73' - - '74' - - '75' - - '76' - - '77' - - '78' - - '79' - - '80' - - '81' - - '82' - - '83' - - '84' - - '85' - - '86' - - '87' - - '88' - - '89' - - '90' - - '91' - - '92' - - '93' - - '94' - - '95' - - '96' - - '97' - - '98' - - '99' - - '100' - - '101' - - '102' - - '103' - - '104' - unrearranged_support: [] - rearranged_support: [] - paralogs: [] - curation: 'Imported to OGRDB with the following notes: watson_et_al: CAST_EiJ_IGHV8-2' - curational_tags: - curation: + maintainer: William Lees + paralogs: [] + rearranged_support: [] + release_date: 24-Nov-2021 + release_description: First release + release_version: 1 + sequence: CAAGTTACTCTAAAAGAGTCTGGCCCTGGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGATGATGATAAGTACTATAACCCATCCCTGAAGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC + sequence_type: V + species: + id: NCBITAXON:10090 + label: Mus musculus + species_subgroup: CAST_EiJ + species_subgroup_type: strain + status: active + subgroup_designation: null + unrearranged_support: [] + utr_5_prime_end: null + utr_5_prime_start: null + v_gene_delineations: + - aligned_sequence: GAAGTGAAGCTGGTGGAGTCTGAGGGA...GGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTC............AGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA + alignment: + - '1' + - '2' + - '3' + - '4' + - '5' + - '6' + - '7' + - '8' + - '9' + - '10' + - '11' + - '12' + - '13' + - '14' + - '15' + - '16' + - '17' + - '18' + - '19' + - '20' + - '21' + - '22' + - '23' + - '24' + - '25' + - '26' + - '27' + - '28' + - '29' + - '30' + - '31' + - '32' + - '33' + - '34' + - '35' + - '36' + - '37' + - '38' + - '39' + - '40' + - '41' + - '42' + - '43' + - '44' + - '45' + - '46' + - '47' + - '48' + - '49' + - '50' + - '51' + - '52' + - '53' + - '54' + - '55' + - '56' + - '57' + - '58' + - '59' + - '60' + - '61' + - '62' + - '63' + - '64' + - '65' + - '66' + - '67' + - '68' + - '69' + - '70' + - '71' + - '72' + - '73' + - '74' + - '75' + - '76' + - '77' + - '78' + - '79' + - '80' + - '81' + - '82' + - '83' + - '84' + - '85' + - '86' + - '87' + - '88' + - '89' + - '90' + - '91' + - '92' + - '93' + - '94' + - '95' + - '96' + - '97' + - '98' + - '99' + - '100' + - '101' + - '102' + - '103' + - '104' + cdr1_end: 110 + cdr1_start: 76 + cdr2_end: 160 + cdr2_start: 151 + cdr3_start: 295 + delineation_scheme: IMGT + fwr1_end: 75 + fwr1_start: 1 + fwr2_end: 150 + fwr2_start: 111 + fwr3_end: 294 + fwr3_start: 161 + sequence_delineation_id: '1' + unaligned_sequence: GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA + v_rs_end: null + v_rs_start: null + author: William Lees + curation: null + germline_set_id: OGRDB:G00007 + germline_set_name: CAST IGH + germline_set_ref: OGRDB:G00007.1 + lab_address: Birkbeck College, University of London, Malet Street, London + lab_name: '' + locus: IGH + pub_ids: '' + release_date: '2021-11-24' + release_description: '' + release_version: 1 + species: + id: NCBITAXON:10090 + label: Mus musculus + species_subgroup: CAST_EiJ + species_subgroup_type: strain + GenotypeSet: - receptor_genotype_set_id: '1' diff --git a/lang/python/tests/data/good_germline_set.json b/lang/python/tests/data/good_germline_set.json index d36d19ad4..38000896f 100644 --- a/lang/python/tests/data/good_germline_set.json +++ b/lang/python/tests/data/good_germline_set.json @@ -26,8 +26,8 @@ "release_date": "24-Nov-2021", "release_description": "First release", "label": "IGHV-2DBF", - "sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGATGGTAGTGGCACCTACTATCTGGACTCCTTGAAGAGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", - "coding_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGA...GGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTC............AGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", + "sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", + "coding_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", "aliases": [ "watson_et_al:CAST_EiJ_IGHV5-3" ], @@ -57,17 +57,19 @@ { "sequence_delineation_id": "1", "delineation_scheme": "IMGT", + "aligned_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGA...GGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTC............AGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", + "unaligned_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", "fwr1_start": 1, - "fwr1_end": 78, - "cdr1_start": 79, - "cdr1_end": 114, - "fwr2_start": 115, - "fwr2_end": 165, - "cdr2_start": 166, - "cdr2_end": 195, - "fwr3_start": 196, - "fwr3_end": 312, - "cdr3_start": 313, + "fwr1_end": 75, + "cdr1_start": 76, + "cdr1_end": 110, + "fwr2_start": 111, + "fwr2_end": 150, + "cdr2_start": 151, + "cdr2_end": 160, + "fwr3_start": 161, + "fwr3_end": 294, + "cdr3_start": 295, "alignment": [ "1", "2", @@ -193,7 +195,7 @@ "release_description": "First release", "label": "IGHV-2ETO", "sequence": "CAAGTTACTCTAAAAGAGTCTGGCCCTGGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGATGATGATAAGTACTATAACCCATCCCTGAAGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC", - "coding_sequence": "CAAGTTACTCTAAAAGAGTCTGGCCCT...GGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGC......ACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGAT.........GATGATAAGTACTATAACCCATCCCTGAAG...AGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC", + "coding_sequence": "CAAGTTACTCTAAAAGAGTCTGGCCCTGGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGATGATGATAAGTACTATAACCCATCCCTGAAGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC", "aliases": [ "watson_et_al:CAST_EiJ_IGHV8-2" ], @@ -223,17 +225,19 @@ { "sequence_delineation_id": "1", "delineation_scheme": "IMGT", + "aligned_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGA...GGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTC............AGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", + "unaligned_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", "fwr1_start": 1, - "fwr1_end": 78, - "cdr1_start": 79, - "cdr1_end": 114, - "fwr2_start": 115, - "fwr2_end": 165, - "cdr2_start": 166, - "cdr2_end": 195, - "fwr3_start": 196, - "fwr3_end": 312, - "cdr3_start": 313, + "fwr1_end": 75, + "cdr1_start": 76, + "cdr1_end": 110, + "fwr2_start": 111, + "fwr2_end": 150, + "cdr2_start": 151, + "cdr2_end": 160, + "fwr3_start": 161, + "fwr3_end": 294, + "cdr3_start": 295, "alignment": [ "1", "2", diff --git a/specs/airr-schema-openapi3.yaml b/specs/airr-schema-openapi3.yaml index bcd53f87f..7d4489b45 100644 --- a/specs/airr-schema-openapi3.yaml +++ b/specs/airr-schema-openapi3.yaml @@ -259,7 +259,7 @@ Attributes: - essential - important - defined - default: useful + default: defined identifier: type: boolean description: > @@ -487,8 +487,10 @@ Acknowledgement: properties: acknowledgement_id: type: string - nullable: false description: unique identifier of this Acknowledgement within the file + x-airr: + miairr: important + nullable: true name: type: string nullable: true @@ -525,50 +527,69 @@ RearrangedSequence: properties: sequence_id: type: string - nullable: false + x-airr: + miairr: important description: Unique identifier of this RearrangedSequence within the file, typically generated by the repository hosting the schema, for example from the underlying ID of the database record + nullable: true sequence: type: string - nullable: false + x-airr: + miairr: essential description: nucleotide sequence + nullable: true derivation: type: string - nullable: false enum: - DNA - RNA description: The class of nucleic acid that was used as primary starting material + x-airr: + miairr: important + nullable: true observation_type: type: string - nullable: false + x-airr: + miairr: essential description: The type of observation from which this sequence was drawn, e.g. direct sequencing, inference from repertoire enum: - direct sequencing - inference from repertoire + nullable: true curation: type: string nullable: true description: Curational notes on the sequence + nullable: true repository_name: type: string - nullable: false + x-airr: + miairr: defined description: Name of the repository in which the sequence has been deposited + nullable: true repository_ref: type: string - nullable: false + x-airr: + miairr: defined description: Queryable id or accession number of the sequence published by the repository + nullable: true deposited_version: type: string - nullable: false + x-airr: + miairr: defined description: Version number of the sequence within the repository + nullable: true sequence_start: type: integer - nullable: false + x-airr: + miairr: essential description: Start co-ordinate of the sequence detailed in this record, within the sequence deposited + nullable: true sequence_end: type: integer - nullable: false + x-airr: + miairr: essential description: End co-ordinate of the sequence detailed in this record, within the sequence deposited + nullable: true UnrearrangedSequence: discriminator: @@ -587,24 +608,33 @@ UnrearrangedSequence: properties: sequence_id: type: string - nullable: false + x-airr: + miairr: important description: unique identifier of this UnrearrangedSequence within the file + nullable: true sequence: type: string - nullable: false + x-airr: + miairr: essential description: Sequence of interest described in this record (typically this will include gene and promoter region) + nullable: true curation: type: string nullable: true description: Curational notes on the sequence + nullable: true repository_name: type: string - nullable: false + x-airr: + miairr: defined description: Name of the repository in which the assembly or contig is deposited + nullable: true repository_ref: type: string - nullable: false + x-airr: + miairr: defined description: Queryable id or accession number of the sequence published by the repository + nullable: true patch_no: type: string nullable: true @@ -652,63 +682,99 @@ SequenceDelineationV: properties: sequence_delineation_id: type: string - nullable: false + x-airr: + miairr: important description: Unique identifier of this SequenceDelineationV within the file, typically generated by the repository hosting the schema, for example from the underlying ID of the database record + nullable: true delineation_scheme: type: string - nullable: false + x-airr: + miairr: important description: Name of the delineation scheme example: Chothia + nullable: true + unaligned_sequence: + type: string + x-airr: + miairr: important + description: entire V-sequence covered by this delineation + nullable: true + aligned_sequence: + type: string + description: aligned sequence, if this delineation provides an alignment (an aligned sequence should always be provided for IMGT delineations) + nullable: true fwr1_start: type: integer - nullable: false - description: FWR1 start co-ordinate in Gene Description 'alignment' field + x-airr: + miairr: important + description: FWR1 start co-ordinate in the 'unaligned sequence' field + nullable: true fwr1_end: type: integer - nullable: false - description: FWR1 end co-ordinate in Gene Description 'alignment' field + x-airr: + miairr: important + description: FWR1 end co-ordinate in the 'unaligned sequence' field + nullable: true cdr1_start: type: integer - nullable: false - description: CDR1 start co-ordinate in Gene Description 'alignment' field + x-airr: + miairr: important + description: CDR1 start co-ordinate in the 'unaligned sequence' field + nullable: true cdr1_end: type: integer - nullable: false - description: CDR1 end co-ordinate in Gene Description 'alignment' field + x-airr: + miairr: important + description: CDR1 end co-ordinate in the 'unaligned sequence' field + nullable: true fwr2_start: type: integer - nullable: false - description: FWR2 start co-ordinate in Gene Description 'alignment' field + x-airr: + miairr: important + description: FWR2 start co-ordinate in the 'unaligned sequence' field + nullable: true fwr2_end: type: integer - nullable: false - description: FWR2 end co-ordinate in Gene Description 'alignment' field + x-airr: + miairr: important + description: FWR2 end co-ordinate in the 'unaligned sequence' field + nullable: true cdr2_start: type: integer - nullable: false - description: CDR2 start co-ordinate in Gene Description 'alignment' field + x-airr: + miairr: important + description: CDR2 start co-ordinate in the 'unaligned sequence' field + nullable: true cdr2_end: type: integer - nullable: false - description: CDR2 end co-ordinate in Gene Description 'alignment' field + x-airr: + miairr: important + description: CDR2 end co-ordinate in the 'unaligned sequence' field + nullable: true fwr3_start: type: integer - nullable: false - description: FWR3 start co-ordinate in Gene Description 'alignment' field + x-airr: + miairr: important + description: FWR3 start co-ordinate in the 'unaligned sequence' field + nullable: true fwr3_end: type: integer - nullable: false - description: FWR3 end co-ordinate in Gene Description 'alignment' field + x-airr: + miairr: important + description: FWR3 end co-ordinate in the 'unaligned sequence' field + nullable: true cdr3_start: type: integer - nullable: false - description: CDR3 start co-ordinate in Gene Description 'alignment' field - alignment: + x-airr: + miairr: important + description: CDR3 start co-ordinate in the 'unaligned sequence' field + nullable: true + alignment_labels: type: array nullable: true items: type: string - description: one string for each codon in the fields v_start to cdr3_start indicating the label of that codon according to the numbering of the delineation scheme + description: one string for each codon in the aligned_sequence indicating the label of that codon according to the numbering of the delineation scheme, if it provides one # Description of a putative or confirmed Ig receptor gene/allele AlleleDescription: @@ -733,17 +799,23 @@ AlleleDescription: properties: allele_description_id: type: string - nullable: false + x-airr: + miairr: important description: Unique identifier of this AlleleDescription within the file, typically generated by the repository hosting the schema, for example from the underlying ID of the database record + nullable: true allele_description_ref: type: string - nullable: false + x-airr: + miairr: important description: Unique reference to the allele description, in standardized form (Repo:Label:Version) example: OGRDB:Human_IGH:IGHV1-69*01.001 + nullable: true maintainer: type: string - nullable: false + x-airr: + miairr: defined description: Maintainer of this sequence record + nullable: true acknowledgements: type: array nullable: true @@ -752,36 +824,50 @@ AlleleDescription: $ref: '#/Acknowledgement' lab_address: type: string - nullable: false + x-airr: + miairr: defined description: Institution and full address of corresponding author + nullable: true release_version: type: integer - nullable: false + x-airr: + miairr: important description: Version number of this record, updated whenever a revised version is published or released + nullable: true release_date: type: string format: date-time - nullable: false + x-airr: + miairr: important description: Date of this release title: Release Date example: "2021-02-02" + nullable: true release_description: type: string - nullable: false + x-airr: + miairr: important description: Brief descriptive notes of the reason for this release and the changes embodied + nullable: true label: type: string - nullable: true + x-airr: + miairr: important description: The accepted name for this gene or allele example: IGHV1-69*01 + nullable: true sequence: type: string - nullable: false + x-airr: + miairr: essential description: nt sequence of the gene. This should cover the full length that is available, including where possible RSS, and 5' UTR and lead-in for V-gene sequences + nullable: true coding_sequence: type: string - nullable: false - description: nucleotide sequence of the core region of the gene (V-, D-, J- or C-REGION), aligned, in the case of the V-REGION, with the IMGT numbering scheme + x-airr: + miairr: important + description: nucleotide sequence of the core coding region, i.e. the coding region of a D-, J- or C- gene, or the coding region of a V-gene excluding the leader + nullable: true aliases: type: array nullable: true @@ -790,7 +876,8 @@ AlleleDescription: description: Alternative names for this sequence locus: type: string - nullable: false + x-airr: + miairr: essential enum: - IGH - IGK @@ -800,39 +887,48 @@ AlleleDescription: - TRG - TRD description: Gene locus + nullable: true chromosome: type: integer nullable: true description: chromosome on which the gene is located sequence_type: type: string - nullable: false + x-airr: + miairr: essential enum: - V - D - J - C description: Sequence type (V, D, J, C) + nullable: true functional: type: boolean - nullable: false + x-airr: + miairr: important description: True if the gene is functional, false if it is a pseudogene + nullable: true inference_type: type: string - nullable: false + x-airr: + miairr: important enum: - Genomic and rearranged - Genomic only - Rearranged only description: Type of inference(s) from which this gene sequence was inferred + nullable: true species: $ref: '#/Ontology' - nullable: false + x-airr: + miairr: essential description: Binomial designation of subject's species title: Organism example: id: NCBITAXON:9606 label: Homo sapiens + nullable: true species_subgroup: type: string nullable: true @@ -868,6 +964,14 @@ AlleleDescription: type: string nullable: true description: Allele number or other identifier, as (and if) defined + allele_similarity_cluster_designation: + type: string + nullable: true + description: ID of the similarity cluster used in this germline set, if designated + allele_similarity_cluster_member_id: + type: string + nullable: true + description: Membership ID of the allele within the similarity cluster, if a cluster is designated j_codon_frame: type: integer nullable: true @@ -878,12 +982,17 @@ AlleleDescription: description: Codon position of the first nucleotide in the 'coding_sequence' field. Mandatory for J genes. Not used for V or D genes. ('1' means the sequence is in-frame, '2' means that the first bp is missing from the first codon, '3' means that the first 2 bp are missing) gene_start: type: integer - nullable: true description: Co-ordinate (in the sequence field) of the first nucleotide in the coding_sequence field + x-airr: + miairr: important + nullable: true gene_end: type: integer nullable: true description: Co-ordinate (in the sequence field) of the last gene-coding nucleotide in the coding_sequence field + x-airr: + miairr: important + nullable: true utr_5_prime_start: type: integer nullable: true @@ -1005,20 +1114,28 @@ GermlineSet: properties: germline_set_id: type: string - nullable: false + x-airr: + miairr: important description: Unique identifier of the GermlineSet within this file, typically generated by the repository hosting the schema, for example from the underlying ID of the database record + nullable: true author: type: string - nullable: false + x-airr: + miairr: important description: Corresponding author + nullable: true lab_name: type: string - nullable: false + x-airr: + miairr: important description: Department of corresponding author + nullable: true lab_address: type: string - nullable: false + x-airr: + miairr: important description: Institutional address of corresponding author + nullable: true acknowledgements: type: array nullable: true @@ -1027,28 +1144,38 @@ GermlineSet: $ref: '#/Acknowledgement' release_version: type: number - nullable: false + x-airr: + miairr: important description: Version number of this record, allocated automatically + nullable: true release_description: type: string - nullable: false + x-airr: + miairr: important description: Brief descriptive notes of the reason for this release and the changes embodied + nullable: true release_date: type: string format: date-time - nullable: false + x-airr: + miairr: important description: Date of this release title: Release Date example: "2021-02-02" + nullable: true germline_set_name: type: string - nullable: false + x-airr: + miairr: important description: descriptive name of this germline set + nullable: true germline_set_ref: type: string - nullable: false + x-airr: + miairr: important description: Unique identifier of the germline set and version, in standardized form (Repo:Label:Version) example: OGRDB:Human_IGH:2021.11 + nullable: true pub_ids: type: string nullable: true @@ -1056,12 +1183,14 @@ GermlineSet: example: "PMID:85642,PMID:12345" species: $ref: '#/Ontology' - nullable: false + x-airr: + miairr: essential description: Binomial designation of subject's species title: Organism example: id: NCBITAXON:9606 label: Homo sapiens + nullable: true species_subgroup: type: string description: Race, strain or other species subgroup to which this subject belongs @@ -1078,7 +1207,8 @@ GermlineSet: - locational locus: type: string - nullable: false + x-airr: + miairr: essential enum: - IGH - IGK @@ -1088,12 +1218,15 @@ GermlineSet: - TRG - TRD description: Gene locus + nullable: true allele_descriptions: type: array - nullable: false + x-airr: + miairr: important items: $ref: '#/AlleleDescription' description: list of allele_descriptions in the germline set + nullable: true curation: type: string nullable: true @@ -1114,8 +1247,10 @@ GenotypeSet: properties: receptor_genotype_set_id: type: string - nullable: false + x-airr: + miairr: important description: A unique identifier for this Receptor Genotype Set, typically generated by the repository hosting the schema, for example from the underlying ID of the database record + nullable: true genotype_class_list: description: List of Genotypes included in this Receptor Genotype Set. type: array @@ -1123,7 +1258,6 @@ GenotypeSet: items: $ref: '#/Genotype' - # Genotype of adaptive immune receptors # This enumerates the alleles and gene deletions inferred in a single subject. # Included alleles may either be listed by reference to a GermlineSet, or @@ -1139,11 +1273,12 @@ Genotype: properties: receptor_genotype_id: type: string - nullable: false + x-airr: + miairr: important description: A unique identifier within the file for this Receptor Genotype, typically generated by the repository hosting the schema, for example from the underlying ID of the database record + nullable: true locus: type: string - nullable: false enum: - IGH - IGK @@ -1156,70 +1291,34 @@ Genotype: x-airr: adc-query-support: true format: controlled vocabulary + miairr: essential + nullable: true documented_alleles: type: array - nullable: true - description: Array of alleles inferred to be present which are documented in GermlineSets + description: List of alleles documented in reference set(s) items: - type: object - properties: - label: - type: string - nullable: false - description: The accepted name for this allele, taken from the GermlineSet - germline_set_ref: - type: string - nullable: false - description: GermlineSet from which it was taken, referenced in standardized form (Repo:Label:Version) - example: OGRDB:Human_IGH:2021.11 - phasing: - type: integer - nullable: true - description: Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the same chromosome + $ref: '#/DocumentedAllele' x-airr: - adc-query-support: true + miairr: important + nullable: true undocumented_alleles: type: array nullable: true - description: Array of alleles inferred to be present and not documented in an identified GermlineSet + description: List of alleles inferred to be present and not documented in an identified GermlineSet items: - type: object - properties: - allele_name: - type: string - nullable: false - description: Allele name as allocated by the inference pipeline - sequence: - type: string - nullable: false - description: nt sequence of the allele, as provided by the inference pipeline - phasing: - type: integer - nullable: true - description: Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the same chromosome + $ref: '#/UndocumentedAllele' x-airr: adc-query-support: true + nullable: true deleted_genes: type: array nullable: true description: Array of genes identified as being deleted in this genotype items: - type: object - properties: - label: - type: string - nullable: false - description: The accepted name for this gene, taken from the GermlineSet - germline_set_ref: - type: string - nullable: false - description: GermlineSet from which it was taken (issuer/name/version) - phasing: - type: integer - nullable: true - description: Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the same chromosome + $ref: '#/DeletedGene' x-airr: adc-query-support: true + nullable: true inference_process: type: string nullable: true @@ -1233,6 +1332,92 @@ Genotype: adc-query-support: true format: controlled vocabulary +# Documented Allele +# This describes a 'known' allele found in a genotype +# It 'known' in the sense that it is documented in a reference set + +DocumentedAllele: + discriminator: + propertyName: AIRR + required: + - label + - germline_set_ref + properties: + label: + type: string + x-airr: + miairr: important + description: The accepted name for this allele, taken from the GermlineSet + nullable: true + germline_set_ref: + type: string + x-airr: + miairr: important + description: GermlineSet from which it was taken, referenced in standardized form (Repo:Label:Version) + example: OGRDB:Human_IGH:2021.11 + nullable: true + phasing: + type: integer + nullable: true + description: Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the same chromosome + +# Undocumented Allele +# This describes a 'undocumented' allele found in a genotype +# It is 'undocumented' in the sense that it was not found in reference sets consulted for the analysis + +UndocumentedAllele: + discriminator: + propertyName: AIRR + required: + - allele_name + - sequence + type: object + properties: + allele_name: + type: string + x-airr: + miairr: important + description: Allele name as allocated by the inference pipeline + nullable: true + sequence: + type: string + x-airr: + miairr: essential + description: nt sequence of the allele, as provided by the inference pipeline + nullable: true + phasing: + type: integer + nullable: true + description: Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the same chromosome + +# Deleted Gene +# It is regarded as 'deleted' in the sense that it was not identified during inference of the genotype + +DeletedGene: + discriminator: + propertyName: AIRR + required: + - label + - germline_set_ref + type: object + properties: + label: + type: string + x-airr: + miairr: essential + description: The accepted name for this gene, taken from the GermlineSet + nullable: true + germline_set_ref: + type: string + x-airr: + miairr: important + description: GermlineSet from which it was taken (issuer/name/version) + nullable: true + phasing: + type: integer + nullable: true + description: Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the same chromosome + # List of MHCGenotypes describing a subject's genotype MHCGenotypeSet: @@ -1245,15 +1430,19 @@ MHCGenotypeSet: properties: mhc_genotype_set_id: type: string - nullable: false + x-airr: + miairr: important description: A unique identifier for this MHCGenotypeSet + nullable: true mhc_genotype_list: description: List of MHCGenotypes included in this set type: array - nullable: false + x-airr: + miairr: important items: $ref: '#/MHCGenotype' - + nullable: true + # Genotype of major histocompatibility complex (MHC) class I, class II and non-classical loci MHCGenotype: @@ -1267,11 +1456,12 @@ MHCGenotype: properties: mhc_genotype_id: type: string - nullable: false + x-airr: + miairr: important description: A unique identifier for this MHCGenotype, assumed to be unique in the context of the study + nullable: true mhc_class: type: string - nullable: false description: Class of MHC alleles described by the MHCGenotype enum: - MHC-I @@ -1279,46 +1469,21 @@ MHCGenotype: - MHC-nonclassical example: MHC-I x-airr: + miairr: essential adc-query-support: true format: controlled vocabulary + nullable: true mhc_alleles: type: array - nullable: false description: List of MHC alleles of the indicated mhc_class identified in an individual items: - type: object - properties: - allele_designation: - type: string - nullable: false - description: > - The accepted designation of an allele, usually its gene symbol plus allele/sub-allele/etc - identifiers, if provided by the mhc_typing method - gene: - $ref: '#/Ontology' - nullable: true - description: The MHC gene to which the described allele belongs - title: MHC gene - example: - id: MRO:0000046 - label: HLA-A - x-airr: - adc-query-support: false - format: ontology - ontology: - draft: true - top_node: - id: MRO:0000004 - label: MHC gene - reference_set_ref: - type: string - nullable: false - description: Repository and list from which it was taken (issuer/name/version) + $ref: '#/MHCAllele' x-airr: + miairr: important adc-query-support: true + nullable: true mhc_genotyping_method: type: string - nullable: true description: > Information on how the genotype was determined. The content of this field should come from a list of recommended terms provided in the AIRR Schema documentation. @@ -1326,6 +1491,47 @@ MHCGenotype: example: pcr_low_resolution x-airr: adc-query-support: true + miairr: important + nullable: true + + +# Allele of an MHC gene +MHCAllele: + type: object + properties: + allele_designation: + type: string + x-airr: + miairr: important + description: > + The accepted designation of an allele, usually its gene symbol plus allele/sub-allele/etc + identifiers, if provided by the mhc_typing method + nullable: true + gene: + $ref: '#/Ontology' + nullable: true + description: The MHC gene to which the described allele belongs + title: MHC gene + example: + id: MRO:0000046 + label: HLA-A + x-airr: + adc-query-support: false + format: ontology + ontology: + draft: true + top_node: + id: MRO:0000004 + label: MHC gene + miairr: important + nullable: true + reference_set_ref: + type: string + x-airr: + miairr: important + nullable: true + description: Repository and list from which it was taken (issuer/name/version) + # # Repertoire metadata schema diff --git a/specs/airr-schema.yaml b/specs/airr-schema.yaml index 788d8155d..449734012 100644 --- a/specs/airr-schema.yaml +++ b/specs/airr-schema.yaml @@ -255,7 +255,7 @@ Attributes: - essential - important - defined - default: useful + default: defined identifier: type: boolean description: > @@ -490,7 +490,7 @@ Acknowledgement: type: string description: unique identifier of this Acknowledgement within the file x-airr: - nullable: false + miairr: important name: type: string description: Full name of individual @@ -525,12 +525,12 @@ RearrangedSequence: type: string description: Unique identifier of this RearrangedSequence within the file, typically generated by the repository hosting the schema, for example from the underlying ID of the database record x-airr: - nullable: false + miairr: important sequence: type: string description: nucleotide sequence x-airr: - nullable: false + miairr: essential derivation: type: string enum: @@ -538,7 +538,7 @@ RearrangedSequence: - RNA description: The class of nucleic acid that was used as primary starting material x-airr: - nullable: false + miairr: important observation_type: type: string description: The type of observation from which this sequence was drawn, e.g. direct sequencing, inference from repertoire @@ -546,7 +546,7 @@ RearrangedSequence: - direct sequencing - inference from repertoire x-airr: - nullable: false + miairr: essential curation: type: string description: Curational notes on the sequence @@ -554,27 +554,27 @@ RearrangedSequence: type: string description: Name of the repository in which the sequence has been deposited x-airr: - nullable: false + miairr: defined repository_ref: type: string description: Queryable id or accession number of the sequence published by the repository x-airr: - nullable: false + miairr: defined deposited_version: type: string description: Version number of the sequence within the repository x-airr: - nullable: false + miairr: defined sequence_start: type: integer description: Start co-ordinate of the sequence detailed in this record, within the sequence deposited x-airr: - nullable: false + miairr: essential sequence_end: type: integer description: End co-ordinate of the sequence detailed in this record, within the sequence deposited x-airr: - nullable: false + miairr: essential UnrearrangedSequence: discriminator: AIRR @@ -594,12 +594,12 @@ UnrearrangedSequence: type: string description: unique identifier of this UnrearrangedSequence within the file x-airr: - nullable: false + miairr: important sequence: type: string description: Sequence of interest described in this record (typically this will include gene and promoter region) x-airr: - nullable: false + miairr: essential curation: type: string description: Curational notes on the sequence @@ -607,12 +607,12 @@ UnrearrangedSequence: type: string description: Name of the repository in which the assembly or contig is deposited x-airr: - nullable: false + miairr: defined repository_ref: type: string description: Queryable id or accession number of the sequence published by the repository x-airr: - nullable: false + miairr: defined patch_no: type: string description: Genome assembly patch number in which this gene was determined @@ -656,73 +656,81 @@ SequenceDelineationV: type: string description: Unique identifier of this SequenceDelineationV within the file, typically generated by the repository hosting the schema, for example from the underlying ID of the database record x-airr: - nullable: false + miairr: important delineation_scheme: type: string description: Name of the delineation scheme example: Chothia x-airr: - nullable: false + miairr: important + unaligned_sequence: + type: string + x-airr: + miairr: important + description: entire V-sequence covered by this delineation + aligned_sequence: + type: string + description: aligned sequence, if this delineation provides an alignment (an aligned sequence should always be provided for IMGT delineations) fwr1_start: type: integer - description: FWR1 start co-ordinate in Gene Description 'alignment' field + description: FWR1 start co-ordinate in the 'unaligned sequence' field x-airr: - nullable: false + miairr: important fwr1_end: type: integer - description: FWR1 end co-ordinate in Gene Description 'alignment' field + description: FWR1 end co-ordinate in the 'unaligned sequence' field x-airr: - nullable: false + miairr: important cdr1_start: type: integer - description: CDR1 start co-ordinate in Gene Description 'alignment' field + description: CDR1 start co-ordinate in the 'unaligned sequence' field x-airr: - nullable: false + miairr: important cdr1_end: type: integer - description: CDR1 end co-ordinate in Gene Description 'alignment' field + description: CDR1 end co-ordinate in the 'unaligned sequence' field x-airr: - nullable: false + miairr: important fwr2_start: type: integer - description: FWR2 start co-ordinate in Gene Description 'alignment' field + description: FWR2 start co-ordinate in the 'unaligned sequence' field x-airr: - nullable: false + miairr: important fwr2_end: type: integer - description: FWR2 end co-ordinate in Gene Description 'alignment' field + description: FWR2 end co-ordinate in the 'unaligned sequence' field x-airr: - nullable: false + miairr: important cdr2_start: type: integer - description: CDR2 start co-ordinate in Gene Description 'alignment' field + description: CDR2 start co-ordinate in the 'unaligned sequence' field x-airr: - nullable: false + miairr: important cdr2_end: type: integer - description: CDR2 end co-ordinate in Gene Description 'alignment' field + description: CDR2 end co-ordinate in the 'unaligned sequence' field x-airr: - nullable: false + miairr: important fwr3_start: type: integer - description: FWR3 start co-ordinate in Gene Description 'alignment' field + description: FWR3 start co-ordinate in the 'unaligned sequence' field x-airr: - nullable: false + miairr: important fwr3_end: type: integer - description: FWR3 end co-ordinate in Gene Description 'alignment' field + description: FWR3 end co-ordinate in the 'unaligned sequence' field x-airr: - nullable: false + miairr: important cdr3_start: type: integer - description: CDR3 start co-ordinate in Gene Description 'alignment' field + description: CDR3 start co-ordinate in the 'unaligned sequence' field x-airr: - nullable: false - alignment: + miairr: important + alignment_labels: type: array items: type: string - description: one string for each codon in the fields v_start to cdr3_start indicating the label of that codon according to the numbering of the delineation scheme + description: one string for each codon in the aligned_sequence indicating the label of that codon according to the numbering of the delineation scheme, if it provides one # Description of a putative or confirmed Ig receptor gene/allele AlleleDescription: @@ -748,18 +756,18 @@ AlleleDescription: type: string description: Unique identifier of this AlleleDescription within the file, typically generated by the repository hosting the schema, for example from the underlying ID of the database record x-airr: - nullable: false + miairr: important allele_description_ref: type: string description: Unique reference to the allele description, in standardized form (Repo:Label:Version) example: OGRDB:Human_IGH:IGHV1-69*01.001 x-airr: - nullable: false + miairr: important maintainer: type: string description: Maintainer of this sequence record x-airr: - nullable: false + miairr: defined acknowledgements: type: array description: List of individuals whose contribution to the gene description should be acknowledged @@ -769,12 +777,12 @@ AlleleDescription: type: string description: Institution and full address of corresponding author x-airr: - nullable: false + miairr: defined release_version: type: integer description: Version number of this record, updated whenever a revised version is published or released x-airr: - nullable: false + miairr: important release_date: type: string format: date-time @@ -782,26 +790,28 @@ AlleleDescription: title: Release Date example: "2021-02-02" x-airr: - nullable: false + miairr: important release_description: type: string description: Brief descriptive notes of the reason for this release and the changes embodied x-airr: - nullable: false + miairr: important label: type: string description: The accepted name for this gene or allele example: IGHV1-69*01 + x-airr: + miairr: important sequence: type: string description: nt sequence of the gene. This should cover the full length that is available, including where possible RSS, and 5' UTR and lead-in for V-gene sequences x-airr: - nullable: false + miairr: essential coding_sequence: type: string - description: nucleotide sequence of the core region of the gene (V-, D-, J- or C-REGION), aligned, in the case of the V-REGION, with the IMGT numbering scheme + description: nucleotide sequence of the core coding region, i.e. the coding region of a D-, J- or C- gene, or the coding region of a V-gene excluding the leader x-airr: - nullable: false + miairr: important aliases: type: array items: @@ -819,7 +829,7 @@ AlleleDescription: - TRD description: Gene locus x-airr: - nullable: false + miairr: essential chromosome: type: integer description: chromosome on which the gene is located @@ -832,12 +842,12 @@ AlleleDescription: - C description: Sequence type (V, D, J, C) x-airr: - nullable: false + miairr: essential functional: type: boolean description: True if the gene is functional, false if it is a pseudogene x-airr: - nullable: false + miairr: important inference_type: type: string enum: @@ -846,7 +856,7 @@ AlleleDescription: - Rearranged only description: Type of inference(s) from which this gene sequence was inferred x-airr: - nullable: false + miairr: important species: $ref: '#/Ontology' description: Binomial designation of subject's species @@ -855,7 +865,7 @@ AlleleDescription: id: NCBITAXON:9606 label: Homo sapiens x-airr: - nullable: false + miairr: essential species_subgroup: type: string description: Race, strain or other species subgroup to which this subject belongs @@ -885,6 +895,12 @@ AlleleDescription: allele_designation: type: string description: Allele number or other identifier, as (and if) defined + allele_similarity_cluster_designation: + type: string + description: ID of the similarity cluster used in this germline set, if designated + allele_similarity_cluster_member_id: + type: string + description: Membership ID of the allele within the similarity cluster, if a cluster is designated j_codon_frame: type: integer enum: @@ -895,9 +911,13 @@ AlleleDescription: gene_start: type: integer description: Co-ordinate (in the sequence field) of the first nucleotide in the coding_sequence field + x-airr: + miairr: important gene_end: type: integer description: Co-ordinate (in the sequence field) of the last gene-coding nucleotide in the coding_sequence field + x-airr: + miairr: important utr_5_prime_start: type: integer description: Start co-ordinate (in the sequence field) of 5 prime UTR (V-genes only) @@ -998,22 +1018,22 @@ GermlineSet: type: string description: Unique identifier of the GermlineSet within this file, typically generated by the repository hosting the schema, for example from the underlying ID of the database record x-airr: - nullable: false + miairr: important author: type: string description: Corresponding author x-airr: - nullable: false + miairr: important lab_name: type: string description: Department of corresponding author x-airr: - nullable: false + miairr: important lab_address: type: string description: Institutional address of corresponding author x-airr: - nullable: false + miairr: important acknowledgements: type: array description: List of individuals whose contribution to the germline set should be acknowledged @@ -1023,12 +1043,12 @@ GermlineSet: type: number description: Version number of this record, allocated automatically x-airr: - nullable: false + miairr: important release_description: type: string description: Brief descriptive notes of the reason for this release and the changes embodied x-airr: - nullable: false + miairr: important release_date: type: string format: date-time @@ -1036,18 +1056,18 @@ GermlineSet: title: Release Date example: "2021-02-02" x-airr: - nullable: false + miairr: important germline_set_name: type: string description: descriptive name of this germline set x-airr: - nullable: false + miairr: important germline_set_ref: type: string description: Unique identifier of the germline set and version, in standardized form (Repo:Label:Version) example: OGRDB:Human_IGH:2021.11 x-airr: - nullable: false + miairr: important pub_ids: type: string description: Publications describing the germline set @@ -1060,7 +1080,7 @@ GermlineSet: id: NCBITAXON:9606 label: Homo sapiens x-airr: - nullable: false + miairr: essential species_subgroup: type: string description: Race, strain or other species subgroup to which this subject belongs @@ -1085,14 +1105,14 @@ GermlineSet: - TRD description: Gene locus x-airr: - nullable: false + miairr: essential allele_descriptions: type: array items: $ref: '#/AlleleDescription' description: list of allele_descriptions in the germline set x-airr: - nullable: false + miairr: important curation: type: string description: Curational notes on the GermlineSet. This can be used to give more extensive notes on the decisions taken than are provided in the release_description. @@ -1113,7 +1133,7 @@ GenotypeSet: type: string description: A unique identifier for this Receptor Genotype Set, typically generated by the repository hosting the schema, for example from the underlying ID of the database record x-airr: - nullable: false + miairr: important genotype_class_list: description: List of Genotypes included in this Receptor Genotype Set. type: array @@ -1135,7 +1155,7 @@ Genotype: type: string description: A unique identifier within the file for this Receptor Genotype, typically generated by the repository hosting the schema, for example from the underlying ID of the database record x-airr: - nullable: false + miairr: important locus: type: string enum: @@ -1148,75 +1168,31 @@ Genotype: - TRG example: IGH x-airr: - nullable: false + miairr: essential adc-query-support: true format: controlled vocabulary documented_alleles: type: array - description: Array of alleles inferred to be present which are documented in GermlineSets + description: List of alleles documented in reference set(s) items: - type: object - properties: - label: - type: string - description: The accepted name for this allele, taken from the GermlineSet - x-airr: - nullable: false - germline_set_ref: - type: string - description: GermlineSet from which it was taken, referenced in standardized form (Repo:Label:Version) - example: OGRDB:Human_IGH:2021.11 - x-airr: - nullable: false - phasing: - type: integer - description: Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the same chromosome + $ref: '#/DocumentedAllele' x-airr: - nullable: true - adc-query-support: true + miairr: important undocumented_alleles: type: array - description: Array of alleles inferred to be present and not documented in an identified GermlineSet + nullable: true + description: List of alleles inferred to be present and not documented in an identified GermlineSet items: - type: object - properties: - allele_name: - type: string - description: Allele name as allocated by the inference pipeline - x-airr: - nullable: false - sequence: - type: string - description: nt sequence of the allele, as provided by the inference pipeline - x-airr: - nullable: false - phasing: - type: integer - description: Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the same chromosome + $ref: '#/UndocumentedAllele' x-airr: - nullable: true adc-query-support: true deleted_genes: type: array + nullable: true description: Array of genes identified as being deleted in this genotype items: - type: object - properties: - label: - type: string - description: The accepted name for this gene, taken from the GermlineSet - x-airr: - nullable: false - germline_set_ref: - type: string - description: GermlineSet from which it was taken (issuer/name/version) - x-airr: - nullable: false - phasing: - type: integer - description: Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the same chromosome + $ref: '#/DeletedGene' x-airr: - nullable: true adc-query-support: true inference_process: type: string @@ -1231,6 +1207,84 @@ Genotype: adc-query-support: true format: controlled vocabulary +# Documented Allele +# This describes a 'known' allele found in a genotype +# It 'known' in the sense that it is documented in a reference set + +DocumentedAllele: + discriminator: AIRR + required: + - label + - germline_set_ref + properties: + label: + type: string + x-airr: + miairr: important + description: The accepted name for this allele, taken from the GermlineSet + germline_set_ref: + type: string + x-airr: + miairr: important + description: GermlineSet from which it was taken, referenced in standardized form (Repo:Label:Version) + example: OGRDB:Human_IGH:2021.11 + phasing: + type: integer + nullable: true + description: Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the same chromosome + +# Undocumented Allele +# This describes a 'undocumented' allele found in a genotype +# It is 'undocumented' in the sense that it was not found in reference sets consulted for the analysis + +UndocumentedAllele: + discriminator: AIRR + required: + - allele_name + - sequence + type: object + properties: + allele_name: + type: string + x-airr: + miairr: important + description: Allele name as allocated by the inference pipeline + sequence: + type: string + x-airr: + miairr: essential + description: nt sequence of the allele, as provided by the inference pipeline + phasing: + type: integer + nullable: true + description: Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the same chromosome + +# Deleted Gene +# It is regarded as 'deleted' in the sense that it was not identified during inference of the genotype + +DeletedGene: + discriminator: AIRR + required: + - label + - germline_set_ref + type: object + properties: + label: + type: string + x-airr: + miairr: essential + description: The accepted name for this gene, taken from the GermlineSet + germline_set_ref: + type: string + x-airr: + miairr: important + description: GermlineSet from which it was taken (issuer/name/version) + phasing: + type: integer + nullable: true + description: Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the same chromosome + + # List of MHCGenotypes describing a subject's genotype MHCGenotypeSet: discriminator: AIRR @@ -1243,14 +1297,14 @@ MHCGenotypeSet: type: string description: A unique identifier for this MHCGenotypeSet x-airr: - nullable: false + miairr: important mhc_genotype_list: description: List of MHCGenotypes included in this set type: array items: $ref: '#/MHCGenotype' x-airr: - nullable: false + miairr: important # Genotype of major histocompatibility complex (MHC) class I, class II and non-classical loci MHCGenotype: @@ -1265,7 +1319,7 @@ MHCGenotype: type: string description: A unique identifier for this MHCGenotype, assumed to be unique in the context of the study x-airr: - nullable: false + miairr: important mhc_class: type: string description: Class of MHC alleles described by the MHCGenotype @@ -1275,45 +1329,16 @@ MHCGenotype: - MHC-nonclassical example: MHC-I x-airr: - nullable: false + miairr: essential adc-query-support: true format: controlled vocabulary mhc_alleles: type: array description: List of MHC alleles of the indicated mhc_class identified in an individual items: - type: object - properties: - allele_designation: - type: string - description: > - The accepted designation of an allele, usually its gene symbol plus allele/sub-allele/etc - identifiers, if provided by the mhc_typing method - x-airr: - nullable: false - gene: - $ref: '#/Ontology' - description: The MHC gene to which the described allele belongs - title: MHC gene - example: - id: MRO:0000046 - label: HLA-A - x-airr: - nullable: true - adc-query-support: false - format: ontology - ontology: - draft: true - top_node: - id: MRO:0000004 - label: MHC gene - reference_set_ref: - type: string - description: Repository and list from which it was taken (issuer/name/version) - x-airr: - nullable: false + $ref: '#/MHCAllele' x-airr: - nullable: false + miairr: important adc-query-support: true mhc_genotyping_method: type: string @@ -1323,9 +1348,43 @@ MHCGenotype: title: MHC genotyping method example: pcr_low_resolution x-airr: - nullable: true + miairr: important adc-query-support: true + +# Allele of an MHC gene +MHCAllele: + type: object + properties: + allele_designation: + type: string + description: > + The accepted designation of an allele, usually its gene symbol plus allele/sub-allele/etc + identifiers, if provided by the mhc_typing method + x-airr: + miairr: important + gene: + $ref: '#/Ontology' + description: The MHC gene to which the described allele belongs + title: MHC gene + example: + id: MRO:0000046 + label: HLA-A + x-airr: + miairr: important + adc-query-support: false + format: ontology + ontology: + draft: true + top_node: + id: MRO:0000004 + label: MHC gene + reference_set_ref: + type: string + description: Repository and list from which it was taken (issuer/name/version) + x-airr: + miairr: important + # # Repertoire metadata schema # From 4c7cde184aa19f68a1dbf80b430a3e7c234d3467 Mon Sep 17 00:00:00 2001 From: Jason Vander Heiden Date: Sun, 11 Jun 2023 23:27:42 -0700 Subject: [PATCH 10/59] Various schema updates (#674) Various minor schema updates --------- Co-authored-by: Christian Busse Co-authored-by: Scott Christley --- README.rst | 2 +- docs/conf.py | 2 +- docs/standards/news.rst | 7 + lang/R/inst/extdata/airr-schema.yaml | 614 ++++++++-------- lang/python/airr/specs/airr-schema.yaml | 614 ++++++++-------- specs/airr-schema-openapi3.yaml | 889 ++++++++++++------------ specs/airr-schema.yaml | 608 +++++++++------- 7 files changed, 1488 insertions(+), 1248 deletions(-) diff --git a/README.rst b/README.rst index 23375b543..3f96b482f 100644 --- a/README.rst +++ b/README.rst @@ -110,7 +110,7 @@ References Copyright & License =================== -Copyright 2015 - 2022 by the AIRR Community and `contributors`_. This +Copyright 2015 - 2023 by the AIRR Community and `contributors`_. This work is licensed under a `Creative Commons Attribution 4.0 International License`_. diff --git a/docs/conf.py b/docs/conf.py index ffcf10245..74eb1d3c9 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -78,7 +78,7 @@ def __getattr__(cls, name): return MagicMock() # General information about the project. project = 'AIRR Standards' -copyright = '2017-2021, AIRR Community' +copyright = '2015-2023, AIRR Community' author = 'AIRR Community' # The name of the Pygments (syntax highlighting) style to use. diff --git a/docs/standards/news.rst b/docs/standards/news.rst index 849ded801..7b56d2837 100644 --- a/docs/standards/news.rst +++ b/docs/standards/news.rst @@ -3,6 +3,13 @@ Schema Release Notes ================================================================================ +Version 1.4.2: TBD, 2023 +-------------------------------------------------------------------------------- + +1. Added "IGI" to all locus enum fields. +2. Modified ``x-airr: nullable`` and ``x-airr: identifier`` values on multiple + Germline schema. + Version 1.4.1: August 27, 2022 -------------------------------------------------------------------------------- diff --git a/lang/R/inst/extdata/airr-schema.yaml b/lang/R/inst/extdata/airr-schema.yaml index 449734012..4b87c621c 100644 --- a/lang/R/inst/extdata/airr-schema.yaml +++ b/lang/R/inst/extdata/airr-schema.yaml @@ -4,7 +4,7 @@ Info: title: AIRR Schema description: Schema definitions for AIRR standards objects - version: "1.4" + version: 1.4 contact: name: AIRR Community url: https://github.com/airr-community @@ -16,7 +16,6 @@ Info: # Properties that are based upon an ontology use this # standard schema definition Ontology: - discriminator: AIRR type: object properties: id: @@ -38,11 +37,11 @@ CURIEMap: CHEBI: type: ontology default: - map: OBO - provider: OLS + map: OBO + provider: OLS map: - OBO: - iri_prefix: "http://purl.obolibrary.org/obo/CHEBI_" + OBO: + iri_prefix: "http://purl.obolibrary.org/obo/CHEBI_" CL: type: ontology default: @@ -245,7 +244,6 @@ InformationProvider: # attributes are attached to an AIRR field with the x-airr property. Attributes: - discriminator: AIRR type: object properties: miairr: @@ -336,7 +334,6 @@ Attributes: # the value should not be null. DataFile: - discriminator: AIRR type: object properties: Info: @@ -396,7 +393,6 @@ DataFile: # AIRR Info object, should be similar to openapi # should we point to an openapi schema? InfoObject: - discriminator: AIRR type: object description: Provides information about data and API responses. required: @@ -436,7 +432,6 @@ InfoObject: # A time point TimePoint: - discriminator: AIRR description: Time point at which an observation or other action was performed. type: object properties: @@ -478,7 +473,6 @@ TimePoint: # An individual Acknowledgement: - discriminator: AIRR description: Individual whose contribution to this work should be acknowledged type: object required: @@ -490,6 +484,7 @@ Acknowledgement: type: string description: unique identifier of this Acknowledgement within the file x-airr: + identifier: true miairr: important name: type: string @@ -507,9 +502,10 @@ Acknowledgement: # Rearranged and genomic germline sequences RearrangedSequence: - discriminator: AIRR - description: Details of a directly observed rearranged sequence or an inference from rearranged sequences contributing support for a gene or allele type: object + description: > + Details of a directly observed rearranged sequence or an inference from rearranged sequences + contributing support for a gene or allele. required: - sequence_id - sequence @@ -523,8 +519,11 @@ RearrangedSequence: properties: sequence_id: type: string - description: Unique identifier of this RearrangedSequence within the file, typically generated by the repository hosting the schema, for example from the underlying ID of the database record + description: > + Unique identifier of this RearrangedSequence within the file, typically generated by the repository + hosting the schema, for example from the underlying ID of the database record. x-airr: + identifier: true miairr: important sequence: type: string @@ -541,10 +540,12 @@ RearrangedSequence: miairr: important observation_type: type: string - description: The type of observation from which this sequence was drawn, e.g. direct sequencing, inference from repertoire enum: - - direct sequencing - - inference from repertoire + - "direct sequencing" + - "inference from repertoire" + description: > + The type of observation from which this sequence was drawn, such as direct sequencing or + inference from repertoire sequencing data. x-airr: miairr: essential curation: @@ -577,7 +578,6 @@ RearrangedSequence: miairr: essential UnrearrangedSequence: - discriminator: AIRR description: Details of an unrearranged sequence contributing support for a gene or allele type: object required: @@ -594,10 +594,12 @@ UnrearrangedSequence: type: string description: unique identifier of this UnrearrangedSequence within the file x-airr: + identifier: true miairr: important sequence: type: string - description: Sequence of interest described in this record (typically this will include gene and promoter region) + description: > + Sequence of interest described in this record. Typically, this will include gene and promoter region. x-airr: miairr: essential curation: @@ -618,23 +620,29 @@ UnrearrangedSequence: description: Genome assembly patch number in which this gene was determined gff_seqid: type: string - description: Sequence (from the assembly) of a window including the gene and preferably also the promoter region + description: > + Sequence (from the assembly) of a window including the gene and preferably also the promoter region. gff_start: type: integer - description: Genomic co-ordinates of the start of the sequence of interest described in this record, in Ensemble GFF version 3 + description: > + Genomic co-ordinates of the start of the sequence of interest described in this record in + Ensemble GFF version 3. gff_end: type: integer - description: Genomic co-ordinates of the end of the sequence of interest described in this record, in Ensemble GFF version 3 + description: > + Genomic co-ordinates of the end of the sequence of interest described in this record in + Ensemble GFF version 3. strand: type: string enum: - - + + - "+" - "-" description: sense (+ or -) + x-airr: + nullable: true # V gene delineation SequenceDelineationV: - discriminator: AIRR description: Delineation of a V-gene in a particular system type: object required: @@ -654,8 +662,11 @@ SequenceDelineationV: properties: sequence_delineation_id: type: string - description: Unique identifier of this SequenceDelineationV within the file, typically generated by the repository hosting the schema, for example from the underlying ID of the database record + description: > + Unique identifier of this SequenceDelineationV within the file. Typically, generated by the + repository hosting the record. x-airr: + identifier: true miairr: important delineation_scheme: type: string @@ -670,7 +681,9 @@ SequenceDelineationV: description: entire V-sequence covered by this delineation aligned_sequence: type: string - description: aligned sequence, if this delineation provides an alignment (an aligned sequence should always be provided for IMGT delineations) + description: > + Aligned sequence if this delineation provides an alignment. An aligned sequence should always be + provided for IMGT delineations. fwr1_start: type: integer description: FWR1 start co-ordinate in the 'unaligned sequence' field @@ -729,12 +742,13 @@ SequenceDelineationV: alignment_labels: type: array items: - type: string - description: one string for each codon in the aligned_sequence indicating the label of that codon according to the numbering of the delineation scheme, if it provides one + type: string + description: > + One string for each codon in the aligned_sequence indicating the label of that codon according to + the numbering of the delineation scheme if it provides one. # Description of a putative or confirmed Ig receptor gene/allele AlleleDescription: - discriminator: AIRR description: Details of a putative or confirmed Ig receptor gene/allele inferred from one or more observations type: object required: @@ -754,8 +768,11 @@ AlleleDescription: properties: allele_description_id: type: string - description: Unique identifier of this AlleleDescription within the file, typically generated by the repository hosting the schema, for example from the underlying ID of the database record + description: > + Unique identifier of this AlleleDescription within the file. Typically, generated by the + repository hosting the record. x-airr: + identifier: true miairr: important allele_description_ref: type: string @@ -804,12 +821,16 @@ AlleleDescription: miairr: important sequence: type: string - description: nt sequence of the gene. This should cover the full length that is available, including where possible RSS, and 5' UTR and lead-in for V-gene sequences + description: > + Nucleotide sequence of the gene. This should cover the full length that is available, + including where possible RSS, and 5' UTR and lead-in for V-gene sequences. x-airr: miairr: essential coding_sequence: type: string - description: nucleotide sequence of the core coding region, i.e. the coding region of a D-, J- or C- gene, or the coding region of a V-gene excluding the leader + description: > + Nucleotide sequence of the core coding region, such as the coding region of a D-, J- or C- gene + or the coding region of a V-gene excluding the leader. x-airr: miairr: important aliases: @@ -821,6 +842,7 @@ AlleleDescription: type: string enum: - IGH + - IGI - IGK - IGL - TRA @@ -832,7 +854,7 @@ AlleleDescription: miairr: essential chromosome: type: integer - description: chromosome on which the gene is located + description: chromosome on which the gene is located sequence_type: type: string enum: @@ -851,9 +873,9 @@ AlleleDescription: inference_type: type: string enum: - - Genomic and rearranged - - Genomic only - - Rearranged only + - "Genomic and rearranged" + - "Genomic only" + - "Rearranged only" description: Type of inference(s) from which this gene sequence was inferred x-airr: miairr: important @@ -878,6 +900,8 @@ AlleleDescription: - inbred - outbred - locational + x-airr: + nullable: true status: type: string enum: @@ -886,6 +910,8 @@ AlleleDescription: - retired - withdrawn description: Status of record, assumed active if the field is not present + x-airr: + nullable: true subgroup_designation: type: string description: Identifier of the gene subgroup or clade, as (and if) defined @@ -907,65 +933,74 @@ AlleleDescription: - 1 - 2 - 3 - description: Codon position of the first nucleotide in the 'coding_sequence' field. Mandatory for J genes. Not used for V or D genes. ('1' means the sequence is in-frame, '2' means that the first bp is missing from the first codon, '3' means that the first 2 bp are missing) + description: > + Codon position of the first nucleotide in the 'coding_sequence' field. Mandatory for J genes. + Not used for V or D genes. '1' means the sequence is in-frame, '2' means that the first bp is + missing from the first codon, and '3' means that the first 2 bp are missing. + x-airr: + nullable: true gene_start: type: integer - description: Co-ordinate (in the sequence field) of the first nucleotide in the coding_sequence field + description: > + Co-ordinate in the sequence field of the first nucleotide in the coding_sequence field. x-airr: miairr: important gene_end: type: integer - description: Co-ordinate (in the sequence field) of the last gene-coding nucleotide in the coding_sequence field + description: > + Co-ordinate in the sequence field of the last gene-coding nucleotide in the coding_sequence field. x-airr: miairr: important utr_5_prime_start: type: integer - description: Start co-ordinate (in the sequence field) of 5 prime UTR (V-genes only) + description: Start co-ordinate in the sequence field of the 5 prime UTR (V-genes only). utr_5_prime_end: type: integer - description: End co-ordinate (in the sequence field) of 5 prime UTR (V-genes only) + description: End co-ordinate in the sequence field of the 5 prime UTR (V-genes only). leader_1_start: type: integer - description: Start co-ordinate (in the sequence field) of L-PART1 (V-genes only) + description: Start co-ordinate in the sequence field of L-PART1 (V-genes only). leader_1_end: type: integer - description: End co-ordinate (in the sequence field) of L-PART1 (V-genes only) + description: End co-ordinate in the sequence field of L-PART1 (V-genes only). leader_2_start: type: integer - description: Start co-ordinate (in the sequence field) of L-PART2 (V-genes only) + description: Start co-ordinate in the sequence field of L-PART2 (V-genes only). leader_2_end: type: integer - description: End co-ordinate (in the sequence field) of L-PART2 (V-genes only) + description: End co-ordinate in the sequence field of L-PART2 (V-genes only). v_rs_start: type: integer - description: Start co-ordinate (in the sequence field) of V recombination site (V-genes only) + description: Start co-ordinate in the sequence field of the V recombination site (V-genes only). v_rs_end: type: integer - description: End co-ordinate (in the sequence field) of V recombination site (V-genes only) + description: End co-ordinate in the sequence field of the V recombination site (V-genes only). d_rs_3_prime_start: type: integer - description: Start co-ordinate (in the sequence field) of 3 prime D recombination site (D-genes only) + description: Start co-ordinate in the sequence field of the 3 prime D recombination site (D-genes only). d_rs_3_prime_end: type: integer - description: End co-ordinate (in the sequence field) of 3 prime D recombination site (D-genes only) + description: End co-ordinate in the sequence field of the 3 prime D recombination site (D-genes only). d_rs_5_prime_start: type: integer - description: Start co-ordinate (in the sequence field) of 5 prime D recombination site (D-genes only) + description: Start co-ordinate in the sequence field of the 5 prime D recombination site (D-genes only). d_rs_5_prime_end: type: integer - description: End co-ordinate (in the sequence field) of 5 prime D recombination site (D-genes only) + description: End co-ordinate in the sequence field of 5 the prime D recombination site (D-genes only). j_cdr3_end: type: integer - description: In the case of a J-gene, the co-ordinate (in the sequence field) of the first nucelotide of the conserved PHE or TRP (IMGT codon position 118) + description: > + In the case of a J-gene, the co-ordinate in the sequence field of the first nucelotide of the + conserved PHE or TRP (IMGT codon position 118). j_rs_start: type: integer - description: Start co-ordinate (in the sequence field) of J recombination site (J-genes only) + description: Start co-ordinate in the sequence field of J recombination site (J-genes only). j_rs_end: type: integer - description: End co-ordinate (in the sequence field) of J recombination site (J-genes only) + description: End co-ordinate in the sequence field of J recombination site (J-genes only). j_donor_splice: type: integer - description: Co-ordinate (in the sequence field) of the final 3' nucleotide of the J-REGION (J-genes only) + description: Co-ordinate in the sequence field of the final 3' nucleotide of the J-REGION (J-genes only). v_gene_delineations: type: array items: @@ -985,7 +1020,9 @@ AlleleDescription: description: Gene symbols of any paralogs curation: type: string - description: Curational notes on the AlleleDescription. This can be used to give more extensive notes on the decisions taken than are provided in the release_description. + description: > + Curational notes on the AlleleDescription. This can be used to give more extensive notes on the + decisions taken than are provided in the release_description. curational_tags: type: array items: @@ -997,9 +1034,10 @@ AlleleDescription: # Collection of gene descriptions into a germline set GermlineSet: - discriminator: AIRR - description: Details of a 'germline set' bringing together multiple AlleleDescriptions from the same strain or species. All genes in a GermlineSet should be from a single locus. type: object + description: > + A germline object set bringing together multiple AlleleDescriptions from the same strain or species. + All genes in a GermlineSet should be from a single locus. required: - germline_set_id - author @@ -1016,8 +1054,11 @@ GermlineSet: properties: germline_set_id: type: string - description: Unique identifier of the GermlineSet within this file, typically generated by the repository hosting the schema, for example from the underlying ID of the database record + description: > + Unique identifier of the GermlineSet within this file. Typically, generated by the + repository hosting the record. x-airr: + identifier: true miairr: important author: type: string @@ -1093,10 +1134,13 @@ GermlineSet: - inbred - outbred - locational + x-airr: + nullable: true locus: type: string enum: - IGH + - IGI - IGK - IGL - TRA @@ -1109,13 +1153,15 @@ GermlineSet: allele_descriptions: type: array items: - $ref: '#/AlleleDescription' + $ref: '#/AlleleDescription' description: list of allele_descriptions in the germline set x-airr: miairr: important curation: type: string - description: Curational notes on the GermlineSet. This can be used to give more extensive notes on the decisions taken than are provided in the release_description. + description: > + Curational notes on the GermlineSet. This can be used to give more extensive notes on the + decisions taken than are provided in the release_description. # # Genotype schema @@ -1124,15 +1170,17 @@ GermlineSet: # GenotypeSet lists the Genotypes (describing different loci) inferred for this subject GenotypeSet: - discriminator: AIRR type: object required: - receptor_genotype_set_id properties: receptor_genotype_set_id: type: string - description: A unique identifier for this Receptor Genotype Set, typically generated by the repository hosting the schema, for example from the underlying ID of the database record + description: > + A unique identifier for this Receptor Genotype Set, typically generated by the repository + hosting the schema, for example from the underlying ID of the database record. x-airr: + identifier: true miairr: important genotype_class_list: description: List of Genotypes included in this Receptor Genotype Set. @@ -1140,12 +1188,11 @@ GenotypeSet: items: $ref: '#/Genotype' -# This enumerates the alleles and gene deletions inferred in a single subject. Included alleles may either be listed by reference to a GermlineSet, or +# This enumerates the alleles and gene deletions inferred in a single subject. Included alleles may either be listed by reference to a GermlineSet, or # listed as 'undocumented', in which case the inferred sequence is provided # Genotype of adaptive immune receptors Genotype: - discriminator: AIRR type: object required: - receptor_genotype_id @@ -1153,19 +1200,24 @@ Genotype: properties: receptor_genotype_id: type: string - description: A unique identifier within the file for this Receptor Genotype, typically generated by the repository hosting the schema, for example from the underlying ID of the database record + description: > + A unique identifier within the file for this Receptor Genotype, typically generated by the + repository hosting the schema, for example from the underlying ID of the database record. x-airr: + identifier: true miairr: important locus: type: string enum: - IGH + - IGI - IGK - IGL - TRA - TRB - TRD - TRG + description: Gene locus example: IGH x-airr: miairr: essential @@ -1177,10 +1229,9 @@ Genotype: items: $ref: '#/DocumentedAllele' x-airr: - miairr: important + miairr: important undocumented_alleles: type: array - nullable: true description: List of alleles inferred to be present and not documented in an identified GermlineSet items: $ref: '#/UndocumentedAllele' @@ -1188,7 +1239,6 @@ Genotype: adc-query-support: true deleted_genes: type: array - nullable: true description: Array of genes identified as being deleted in this genotype items: $ref: '#/DeletedGene' @@ -1203,7 +1253,6 @@ Genotype: title: Genotype acquisition process example: repertoire_sequencing x-airr: - nullable: true adc-query-support: true format: controlled vocabulary @@ -1231,7 +1280,9 @@ DocumentedAllele: phasing: type: integer nullable: true - description: Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the same chromosome + description: > + Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the + same chromosome. # Undocumented Allele # This describes a 'undocumented' allele found in a genotype @@ -1257,8 +1308,10 @@ UndocumentedAllele: phasing: type: integer nullable: true - description: Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the same chromosome - + description: > + Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the + same chromosome. + # Deleted Gene # It is regarded as 'deleted' in the sense that it was not identified during inference of the genotype @@ -1282,12 +1335,13 @@ DeletedGene: phasing: type: integer nullable: true - description: Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the same chromosome + description: > + Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the + same chromosome. # List of MHCGenotypes describing a subject's genotype MHCGenotypeSet: - discriminator: AIRR type: object required: - mhc_genotype_set_id @@ -1297,6 +1351,7 @@ MHCGenotypeSet: type: string description: A unique identifier for this MHCGenotypeSet x-airr: + identifier: true miairr: important mhc_genotype_list: description: List of MHCGenotypes included in this set @@ -1308,7 +1363,6 @@ MHCGenotypeSet: # Genotype of major histocompatibility complex (MHC) class I, class II and non-classical loci MHCGenotype: - discriminator: AIRR type: object required: - mhc_genotype_id @@ -1319,14 +1373,15 @@ MHCGenotype: type: string description: A unique identifier for this MHCGenotype, assumed to be unique in the context of the study x-airr: + identifier: true miairr: important mhc_class: type: string - description: Class of MHC alleles described by the MHCGenotype enum: - - MHC-I - - MHC-II - - MHC-nonclassical + - MHC-I + - MHC-II + - MHC-nonclassical + description: Class of MHC alleles described by the MHCGenotype example: MHC-I x-airr: miairr: essential @@ -1391,7 +1446,6 @@ MHCAllele: # The overall study with a globally unique study_id Study: - discriminator: AIRR type: object required: - study_id @@ -1414,6 +1468,7 @@ Study: title: Study ID example: PRJNA001 x-airr: + identifier: true miairr: important nullable: true adc-query-support: true @@ -1579,10 +1634,10 @@ Study: description: Keywords describing properties of one or more data sets in a study title: Keywords for study example: - - contains_ig - - contains_schema_rearrangement - - contains_schema_clone - - contains_schema_cell + - contains_ig + - contains_schema_rearrangement + - contains_schema_clone + - contains_schema_cell x-airr: miairr: important nullable: true @@ -1617,7 +1672,6 @@ Study: # 1-to-n relationship between a study and its subjects # subject_id is unique within a study Subject: - discriminator: AIRR type: object required: - subject_id @@ -1643,6 +1697,7 @@ Subject: title: Subject ID example: SUB856413 x-airr: + identifier: true miairr: important nullable: true adc-query-support: true @@ -1872,7 +1927,6 @@ Subject: # 1-to-n relationship between a subject and its diagnoses Diagnosis: - discriminator: AIRR type: object required: - study_group_description @@ -1993,7 +2047,6 @@ Diagnosis: # 1-to-n relationship between a subject and its samples # sample_id is unique within a study Sample: - discriminator: AIRR type: object required: - sample_id @@ -2014,6 +2067,7 @@ Sample: title: Biological sample ID example: SUP52415 x-airr: + identifier: true miairr: important nullable: true adc-query-support: true @@ -2080,7 +2134,7 @@ Sample: type: number description: Time point at which sample was taken, relative to `Collection time event` title: Sample collection time - example: "14" + example: 14 x-airr: miairr: important nullable: true @@ -2135,7 +2189,6 @@ Sample: # 1-to-n relationship between a sample and processing of its cells CellProcessing: - discriminator: AIRR type: object required: - tissue_processing @@ -2306,7 +2359,6 @@ CellProcessing: # object for PCR primer targets PCRTarget: - discriminator: AIRR type: object required: - pcr_target_locus @@ -2366,7 +2418,6 @@ PCRTarget: # generally, a 1-to-1 relationship between a CellProcessing and processing of its nucleic acid # but may be 1-to-n for technical replicates. NucleicAcidProcessing: - discriminator: AIRR type: object required: - template_class @@ -2423,7 +2474,7 @@ NucleicAcidProcessing: template_amount_unit: $ref: '#/Ontology' description: Unit of template amount - title: Template amount time unit + title: Template amount time unit example: id: UO:0000024 label: nanogram @@ -2439,7 +2490,7 @@ NucleicAcidProcessing: draft: false top_node: id: UO:0000002 - label: physical quantity + label: physical quantity library_generation_method: type: string enum: @@ -2533,9 +2584,9 @@ NucleicAcidProcessing: type: string enum: - none - - "hetero_head-head" - - "hetero_tail-head" - - "hetero_prelinked" + - hetero_head-head + - hetero_tail-head + - hetero_prelinked description: > In case an experimental setup is used that physically links nucleic acids derived from distinct `Rearrangements` before library preparation, this field describes the mode of that linkage. All @@ -2558,7 +2609,6 @@ NucleicAcidProcessing: # 1-to-n relationship between a NucleicAcidProcessing and SequencingRun with resultant raw sequence file(s) SequencingRun: - discriminator: AIRR type: object required: - sequencing_run_id @@ -2574,6 +2624,7 @@ SequencingRun: title: Batch number example: 160101_M01234 x-airr: + identifier: true miairr: important nullable: true adc-query-support: true @@ -2650,7 +2701,6 @@ SequencingRun: # Resultant raw sequencing files from a SequencingRun SequencingData: - discriminator: AIRR type: object required: - sequencing_data_id @@ -2664,10 +2714,13 @@ SequencingData: properties: sequencing_data_id: type: string - description: Persistent identifier of raw data stored in an archive (e.g. INSDC run ID). Data archive should be identified in the CURIE prefix. + description: > + Persistent identifier of raw data stored in an archive (e.g. INSDC run ID). Data archive should + be identified in the CURIE prefix. title: Raw sequencing data persistent identifier example: "SRA:SRR11610494" x-airr: + identifier: true miairr: important nullable: true adc-query-support: true @@ -2793,7 +2846,6 @@ SequencingData: # Set of annotated rearrangement sequences produced by # data processing upon the raw sequence data for a repertoire. DataProcessing: - discriminator: AIRR type: object required: - software_versions @@ -2809,10 +2861,10 @@ DataProcessing: description: Identifier for the data processing object. title: Data processing ID x-airr: + identifier: true nullable: true name: Data processing ID adc-query-support: true - identifier: true primary_annotation: type: boolean default: false @@ -2939,7 +2991,6 @@ DataProcessing: adc-query-support: true SampleProcessing: - discriminator: AIRR allOf: - type: object properties: @@ -2951,10 +3002,10 @@ SampleProcessing: nucleic acid processing and sequencing run information for the repertoire. title: Sample processing ID x-airr: + identifier: true nullable: true name: Sample processing ID adc-query-support: true - identifier: true - $ref: '#/Sample' - $ref: '#/CellProcessing' - $ref: '#/NucleicAcidProcessing' @@ -2967,7 +3018,6 @@ SampleProcessing: # and experimentally observed by raw sequence data. A repertoire # can only be for one subject but may include multiple samples. Repertoire: - discriminator: AIRR type: object required: - study @@ -3034,7 +3084,6 @@ Repertoire: # A collection of repertoires for analysis purposes, includes optional time course RepertoireGroup: - discriminator: AIRR type: object required: - repertoire_group_id @@ -3043,6 +3092,8 @@ RepertoireGroup: repertoire_group_id: type: string description: Identifier for this repertoire collection + x-airr: + identifier: true repertoire_group_name: type: string description: Short display name for this repertoire collection @@ -3077,7 +3128,6 @@ RepertoireGroup: Alignment: - discriminator: AIRR type: object required: - sequence_id @@ -3092,6 +3142,8 @@ Alignment: Unique query sequence identifier within the file. Most often this will be the input sequence header or a substring thereof, but may also be a custom identifier defined by the tool in cases where query sequences have been combined in some fashion prior to alignment. + x-airr: + identifier: true segment: type: string description: > @@ -3169,7 +3221,6 @@ Alignment: # The extended rearrangement object Rearrangement: - discriminator: AIRR type: object required: - sequence_id @@ -3976,7 +4027,6 @@ Rearrangement: # A unique inferred clone object that has been constructed within a single data processing # for a single repertoire and a subset of its sequences and/or rearrangements. Clone: - discriminator: AIRR type: object required: - clone_id @@ -3985,6 +4035,8 @@ Clone: clone_id: type: string description: Identifier for the clone. + x-airr: + identifier: true repertoire_id: type: string description: Identifier to the associated repertoire in study metadata. @@ -4097,7 +4149,6 @@ Clone: # 1-to-n relationship for a clone to its trees. Tree: - discriminator: AIRR type: object required: - tree_id @@ -4107,6 +4158,8 @@ Tree: tree_id: type: string description: Identifier for the tree. + x-airr: + identifier: true clone_id: type: string description: Identifier for the clone. @@ -4121,7 +4174,6 @@ Tree: # 1-to-n relationship between a tree and its nodes Node: - discriminator: AIRR type: object required: - sequence_id @@ -4131,6 +4183,8 @@ Node: description: > Identifier for this node that matches the identifier in the newick string and, where possible, the sequence_id in the source repertoire. + x-airr: + identifier: true sequence_alignment: type: string description: > @@ -4149,7 +4203,6 @@ Node: # The cell object acts as point of reference for all data that can be related # to an individual cell, either by direct observation or inference. Cell: - discriminator: AIRR type: object required: - cell_id @@ -4164,6 +4217,7 @@ Cell: title: Cell index example: W06_046_091 x-airr: + identifier: true miairr: defined nullable: false adc-query-support: true @@ -4218,7 +4272,8 @@ Cell: - "flow cytometry" - "single-cell transcriptome" description: > - keyword describing the methodology used to assess expression. This values for this field MUST come from a controlled vocabulary + Keyword describing the methodology used to assess expression. This values for this field MUST + come from a controlled vocabulary. x-airr: miairr: defined nullable: true @@ -4251,15 +4306,14 @@ Cell: name: Virtual pairing # The CellExpression object acts as a container to hold a single expression level measurement from -# an experiment. Expression data is associated with a cell_id and the related repertoire_id and -# data_processing_id as cell_id is not guaranteed to be unique outside the data processing for +# an experiment. Expression data is associated with a cell_id and the related repertoire_id and +# data_processing_id as cell_id is not guaranteed to be unique outside the data processing for # a single repertoire. CellExpression: - discriminator: AIRR type: object required: - expression_id - - reperotire_id + - repertoire_id - data_processing_id - cell_id - property @@ -4271,6 +4325,7 @@ CellExpression: Identifier of this expression property measurement. title: Expression property measurement identifier x-airr: + identifier: true miairr: defined nullable: false adc-query-support: true @@ -4306,9 +4361,11 @@ CellExpression: name: Data processing for cell property: $ref: '#/Ontology' - description: Name of the property observed, typically a gene or antibody idenifier (and its label) from a canonical resource such as Ensembl (e.g. ENSG00000275747, IGHV3-79) or Antibody Registry (ABREG:1236456, Purified anti-mouse/rat/human CD27 antibody). title: Property information - nullable: true + description: > + Name of the property observed, typically a gene or antibody idenifier (and its label) from a + canonical resource such as Ensembl (e.g. ENSG00000275747, IGHV3-79) or + Antibody Registry (ABREG:1236456, Purified anti-mouse/rat/human CD27 antibody). example: id: ENSG:ENSG00000275747 label: IGHV3-79 @@ -4332,7 +4389,6 @@ CellExpression: # The Receptor object hold information about a receptor and its reactivity. # Receptor: - discriminator: AIRR type: object required: - receptor_id @@ -4349,6 +4405,7 @@ Receptor: title: Receptor ID example: TCR-MM-012345 x-airr: + identifier: true nullable: false adc-query-support: true receptor_hash: @@ -4406,6 +4463,7 @@ Receptor: receptor_variable_domain_2_locus: type: string enum: + - IGI - IGK - IGL - TRA @@ -4429,157 +4487,169 @@ Receptor: type: array description: Records of reactivity measurement items: - type: object - properties: - ligand_type: - type: string - enum: - - MHC:peptide - - MHC:non-peptide - - protein - - peptide - - non-peptidic - description: Classification of ligand binding to receptor - example: non-peptide - x-airr: - nullable: false - antigen_type: - type: string - enum: - - protein - - peptide - - non-peptidic - description: > - The type of antigen before processing by the immune system. - example: protein - x-airr: - nullable: false - antigen: - $ref: '#/Ontology' - description: > - The substance against which the receptor was tested. This can be any substance that - stimulates an adaptive immune response in the host, either through antibody production - or by T cell activation after presentation via an MHC molecule. - title: Antigen - example: - id: UNIPROT:P19597 - label: Circumsporozoite protein - x-airr: - nullable: false - adc-query-support: true - format: ontology - antigen_source_species: - $ref: '#/Ontology' - description: The species from which the antigen was isolated - title: Source species of antigen - example: - id: NCBITAXON:5843 - label: Plasmodium falciparum NF54 - x-airr: - nullable: true - format: ontology - ontology: - draft: true - top_node: - id: NCBITAXON:1 - label: root - peptide_start: - type: integer - description: Start position of the peptide within the reference protein sequence - x-airr: - nullable: true - peptide_end: - type: integer - description: End position of the peptide within the reference protein sequence - x-airr: - nullable: true - mhc_class: - type: string - enum: - - MHC-I - - MHC-II - - MHC-nonclassical - description: Class of MHC molecule, only present for MHC:x ligand types - example: MHC-II - x-airr: - nullable: true - mhc_gene_1: - $ref: '#/Ontology' - description: The MHC gene to which the mhc_allele_1 belongs - title: MHC gene 1 - example: - id: MRO:0000055 - label: HLA-DRA - x-airr: - nullable: true - format: ontology - ontology: - draft: true - top_node: - id: MRO:0000004 - label: MHC gene - mhc_allele_1: - type: string - description: Allele designation of the MHC alpha chain - example: HLA-DRA - x-airr: - nullable: true - mhc_gene_2: - $ref: '#/Ontology' - description: The MHC gene to which the mhc_allele_2 belongs - title: MHC gene 2 - example: - id: MRO:0000057 - label: HLA-DRB1 - x-airr: - nullable: true - format: ontology - ontology: - draft: true - top_node: - id: MRO:0000004 - mhc_allele_2: - type: string - description: > - Allele designation of the MHC class II beta chain or the invariant beta2-microglobin chain - example: HLA-DRB1*04:01 - x-airr: - nullable: true - reactivity_method: - type: string - enum: - - SPR - - ITC - - ELISA - - cytometry - - biological activity - description: The methodology used to assess expression (assay implemented in experiment) - x-airr: - nullable: false - reactivity_readout: - type: string - enum: - - binding strength - - cytokine release - - dissociation constant KD - - on rate - - off rate - - pathogen inhibition - description: Reactivity measurement read-out - example: cytokine release - x-airr: - nullable: false - reactivity_value: - type: number - description: The absolute (processed) value of the measurement - example: 162.26 - x-airr: - nullable: false - reactivity_unit: - type: string - description: The unit of the measurement - example: pg/ml - x-airr: - nullable: false + $ref: '#/ReceptorReactivity' x-airr: nullable: true + + +ReceptorReactivity: + type: object + required: + - ligand_type + - antigen_type + - antigen + - reactivity_method + - reactivity_readout + - reactivity_value + - reactivity_unit + properties: + ligand_type: + type: string + enum: + - MHC:peptide + - MHC:non-peptide + - protein + - peptide + - non-peptidic + description: Classification of ligand binding to receptor + example: non-peptide + x-airr: + nullable: false + antigen_type: + type: string + enum: + - protein + - peptide + - non-peptidic + description: > + The type of antigen before processing by the immune system. + example: protein + x-airr: + nullable: false + antigen: + $ref: '#/Ontology' + description: > + The substance against which the receptor was tested. This can be any substance that + stimulates an adaptive immune response in the host, either through antibody production + or by T cell activation after presentation via an MHC molecule. + title: Antigen + example: + id: UNIPROT:P19597 + label: Circumsporozoite protein + x-airr: + nullable: false + adc-query-support: true + format: ontology + antigen_source_species: + $ref: '#/Ontology' + description: The species from which the antigen was isolated + title: Source species of antigen + example: + id: NCBITAXON:5843 + label: Plasmodium falciparum NF54 + x-airr: + nullable: true + format: ontology + ontology: + draft: true + top_node: + id: NCBITAXON:1 + label: root + peptide_start: + type: integer + description: Start position of the peptide within the reference protein sequence + x-airr: + nullable: true + peptide_end: + type: integer + description: End position of the peptide within the reference protein sequence + x-airr: + nullable: true + mhc_class: + type: string + enum: + - MHC-I + - MHC-II + - MHC-nonclassical + description: Class of MHC molecule, only present for MHC:x ligand types + example: MHC-II + x-airr: + nullable: true + mhc_gene_1: + $ref: '#/Ontology' + description: The MHC gene to which the mhc_allele_1 belongs + title: MHC gene 1 + example: + id: MRO:0000055 + label: HLA-DRA + x-airr: + nullable: true + format: ontology + ontology: + draft: true + top_node: + id: MRO:0000004 + label: MHC gene + mhc_allele_1: + type: string + description: Allele designation of the MHC alpha chain + example: HLA-DRA + x-airr: + nullable: true + mhc_gene_2: + $ref: '#/Ontology' + description: The MHC gene to which the mhc_allele_2 belongs + title: MHC gene 2 + example: + id: MRO:0000057 + label: HLA-DRB1 + x-airr: + nullable: true + format: ontology + ontology: + draft: true + top_node: + id: MRO:0000004 + mhc_allele_2: + type: string + description: > + Allele designation of the MHC class II beta chain or the invariant beta2-microglobin chain + example: HLA-DRB1*04:01 + x-airr: + nullable: true + reactivity_method: + type: string + enum: + - SPR + - ITC + - ELISA + - cytometry + - biological activity + description: The methodology used to assess expression (assay implemented in experiment) + x-airr: + nullable: false + reactivity_readout: + type: string + enum: + - binding strength + - cytokine release + - dissociation constant KD + - on rate + - off rate + - pathogen inhibition + description: Reactivity measurement read-out + example: cytokine release + x-airr: + nullable: false + reactivity_value: + type: number + description: The absolute (processed) value of the measurement + example: 162.26 + x-airr: + nullable: false + reactivity_unit: + type: string + description: The unit of the measurement + example: pg/ml + x-airr: + nullable: false diff --git a/lang/python/airr/specs/airr-schema.yaml b/lang/python/airr/specs/airr-schema.yaml index 449734012..4b87c621c 100644 --- a/lang/python/airr/specs/airr-schema.yaml +++ b/lang/python/airr/specs/airr-schema.yaml @@ -4,7 +4,7 @@ Info: title: AIRR Schema description: Schema definitions for AIRR standards objects - version: "1.4" + version: 1.4 contact: name: AIRR Community url: https://github.com/airr-community @@ -16,7 +16,6 @@ Info: # Properties that are based upon an ontology use this # standard schema definition Ontology: - discriminator: AIRR type: object properties: id: @@ -38,11 +37,11 @@ CURIEMap: CHEBI: type: ontology default: - map: OBO - provider: OLS + map: OBO + provider: OLS map: - OBO: - iri_prefix: "http://purl.obolibrary.org/obo/CHEBI_" + OBO: + iri_prefix: "http://purl.obolibrary.org/obo/CHEBI_" CL: type: ontology default: @@ -245,7 +244,6 @@ InformationProvider: # attributes are attached to an AIRR field with the x-airr property. Attributes: - discriminator: AIRR type: object properties: miairr: @@ -336,7 +334,6 @@ Attributes: # the value should not be null. DataFile: - discriminator: AIRR type: object properties: Info: @@ -396,7 +393,6 @@ DataFile: # AIRR Info object, should be similar to openapi # should we point to an openapi schema? InfoObject: - discriminator: AIRR type: object description: Provides information about data and API responses. required: @@ -436,7 +432,6 @@ InfoObject: # A time point TimePoint: - discriminator: AIRR description: Time point at which an observation or other action was performed. type: object properties: @@ -478,7 +473,6 @@ TimePoint: # An individual Acknowledgement: - discriminator: AIRR description: Individual whose contribution to this work should be acknowledged type: object required: @@ -490,6 +484,7 @@ Acknowledgement: type: string description: unique identifier of this Acknowledgement within the file x-airr: + identifier: true miairr: important name: type: string @@ -507,9 +502,10 @@ Acknowledgement: # Rearranged and genomic germline sequences RearrangedSequence: - discriminator: AIRR - description: Details of a directly observed rearranged sequence or an inference from rearranged sequences contributing support for a gene or allele type: object + description: > + Details of a directly observed rearranged sequence or an inference from rearranged sequences + contributing support for a gene or allele. required: - sequence_id - sequence @@ -523,8 +519,11 @@ RearrangedSequence: properties: sequence_id: type: string - description: Unique identifier of this RearrangedSequence within the file, typically generated by the repository hosting the schema, for example from the underlying ID of the database record + description: > + Unique identifier of this RearrangedSequence within the file, typically generated by the repository + hosting the schema, for example from the underlying ID of the database record. x-airr: + identifier: true miairr: important sequence: type: string @@ -541,10 +540,12 @@ RearrangedSequence: miairr: important observation_type: type: string - description: The type of observation from which this sequence was drawn, e.g. direct sequencing, inference from repertoire enum: - - direct sequencing - - inference from repertoire + - "direct sequencing" + - "inference from repertoire" + description: > + The type of observation from which this sequence was drawn, such as direct sequencing or + inference from repertoire sequencing data. x-airr: miairr: essential curation: @@ -577,7 +578,6 @@ RearrangedSequence: miairr: essential UnrearrangedSequence: - discriminator: AIRR description: Details of an unrearranged sequence contributing support for a gene or allele type: object required: @@ -594,10 +594,12 @@ UnrearrangedSequence: type: string description: unique identifier of this UnrearrangedSequence within the file x-airr: + identifier: true miairr: important sequence: type: string - description: Sequence of interest described in this record (typically this will include gene and promoter region) + description: > + Sequence of interest described in this record. Typically, this will include gene and promoter region. x-airr: miairr: essential curation: @@ -618,23 +620,29 @@ UnrearrangedSequence: description: Genome assembly patch number in which this gene was determined gff_seqid: type: string - description: Sequence (from the assembly) of a window including the gene and preferably also the promoter region + description: > + Sequence (from the assembly) of a window including the gene and preferably also the promoter region. gff_start: type: integer - description: Genomic co-ordinates of the start of the sequence of interest described in this record, in Ensemble GFF version 3 + description: > + Genomic co-ordinates of the start of the sequence of interest described in this record in + Ensemble GFF version 3. gff_end: type: integer - description: Genomic co-ordinates of the end of the sequence of interest described in this record, in Ensemble GFF version 3 + description: > + Genomic co-ordinates of the end of the sequence of interest described in this record in + Ensemble GFF version 3. strand: type: string enum: - - + + - "+" - "-" description: sense (+ or -) + x-airr: + nullable: true # V gene delineation SequenceDelineationV: - discriminator: AIRR description: Delineation of a V-gene in a particular system type: object required: @@ -654,8 +662,11 @@ SequenceDelineationV: properties: sequence_delineation_id: type: string - description: Unique identifier of this SequenceDelineationV within the file, typically generated by the repository hosting the schema, for example from the underlying ID of the database record + description: > + Unique identifier of this SequenceDelineationV within the file. Typically, generated by the + repository hosting the record. x-airr: + identifier: true miairr: important delineation_scheme: type: string @@ -670,7 +681,9 @@ SequenceDelineationV: description: entire V-sequence covered by this delineation aligned_sequence: type: string - description: aligned sequence, if this delineation provides an alignment (an aligned sequence should always be provided for IMGT delineations) + description: > + Aligned sequence if this delineation provides an alignment. An aligned sequence should always be + provided for IMGT delineations. fwr1_start: type: integer description: FWR1 start co-ordinate in the 'unaligned sequence' field @@ -729,12 +742,13 @@ SequenceDelineationV: alignment_labels: type: array items: - type: string - description: one string for each codon in the aligned_sequence indicating the label of that codon according to the numbering of the delineation scheme, if it provides one + type: string + description: > + One string for each codon in the aligned_sequence indicating the label of that codon according to + the numbering of the delineation scheme if it provides one. # Description of a putative or confirmed Ig receptor gene/allele AlleleDescription: - discriminator: AIRR description: Details of a putative or confirmed Ig receptor gene/allele inferred from one or more observations type: object required: @@ -754,8 +768,11 @@ AlleleDescription: properties: allele_description_id: type: string - description: Unique identifier of this AlleleDescription within the file, typically generated by the repository hosting the schema, for example from the underlying ID of the database record + description: > + Unique identifier of this AlleleDescription within the file. Typically, generated by the + repository hosting the record. x-airr: + identifier: true miairr: important allele_description_ref: type: string @@ -804,12 +821,16 @@ AlleleDescription: miairr: important sequence: type: string - description: nt sequence of the gene. This should cover the full length that is available, including where possible RSS, and 5' UTR and lead-in for V-gene sequences + description: > + Nucleotide sequence of the gene. This should cover the full length that is available, + including where possible RSS, and 5' UTR and lead-in for V-gene sequences. x-airr: miairr: essential coding_sequence: type: string - description: nucleotide sequence of the core coding region, i.e. the coding region of a D-, J- or C- gene, or the coding region of a V-gene excluding the leader + description: > + Nucleotide sequence of the core coding region, such as the coding region of a D-, J- or C- gene + or the coding region of a V-gene excluding the leader. x-airr: miairr: important aliases: @@ -821,6 +842,7 @@ AlleleDescription: type: string enum: - IGH + - IGI - IGK - IGL - TRA @@ -832,7 +854,7 @@ AlleleDescription: miairr: essential chromosome: type: integer - description: chromosome on which the gene is located + description: chromosome on which the gene is located sequence_type: type: string enum: @@ -851,9 +873,9 @@ AlleleDescription: inference_type: type: string enum: - - Genomic and rearranged - - Genomic only - - Rearranged only + - "Genomic and rearranged" + - "Genomic only" + - "Rearranged only" description: Type of inference(s) from which this gene sequence was inferred x-airr: miairr: important @@ -878,6 +900,8 @@ AlleleDescription: - inbred - outbred - locational + x-airr: + nullable: true status: type: string enum: @@ -886,6 +910,8 @@ AlleleDescription: - retired - withdrawn description: Status of record, assumed active if the field is not present + x-airr: + nullable: true subgroup_designation: type: string description: Identifier of the gene subgroup or clade, as (and if) defined @@ -907,65 +933,74 @@ AlleleDescription: - 1 - 2 - 3 - description: Codon position of the first nucleotide in the 'coding_sequence' field. Mandatory for J genes. Not used for V or D genes. ('1' means the sequence is in-frame, '2' means that the first bp is missing from the first codon, '3' means that the first 2 bp are missing) + description: > + Codon position of the first nucleotide in the 'coding_sequence' field. Mandatory for J genes. + Not used for V or D genes. '1' means the sequence is in-frame, '2' means that the first bp is + missing from the first codon, and '3' means that the first 2 bp are missing. + x-airr: + nullable: true gene_start: type: integer - description: Co-ordinate (in the sequence field) of the first nucleotide in the coding_sequence field + description: > + Co-ordinate in the sequence field of the first nucleotide in the coding_sequence field. x-airr: miairr: important gene_end: type: integer - description: Co-ordinate (in the sequence field) of the last gene-coding nucleotide in the coding_sequence field + description: > + Co-ordinate in the sequence field of the last gene-coding nucleotide in the coding_sequence field. x-airr: miairr: important utr_5_prime_start: type: integer - description: Start co-ordinate (in the sequence field) of 5 prime UTR (V-genes only) + description: Start co-ordinate in the sequence field of the 5 prime UTR (V-genes only). utr_5_prime_end: type: integer - description: End co-ordinate (in the sequence field) of 5 prime UTR (V-genes only) + description: End co-ordinate in the sequence field of the 5 prime UTR (V-genes only). leader_1_start: type: integer - description: Start co-ordinate (in the sequence field) of L-PART1 (V-genes only) + description: Start co-ordinate in the sequence field of L-PART1 (V-genes only). leader_1_end: type: integer - description: End co-ordinate (in the sequence field) of L-PART1 (V-genes only) + description: End co-ordinate in the sequence field of L-PART1 (V-genes only). leader_2_start: type: integer - description: Start co-ordinate (in the sequence field) of L-PART2 (V-genes only) + description: Start co-ordinate in the sequence field of L-PART2 (V-genes only). leader_2_end: type: integer - description: End co-ordinate (in the sequence field) of L-PART2 (V-genes only) + description: End co-ordinate in the sequence field of L-PART2 (V-genes only). v_rs_start: type: integer - description: Start co-ordinate (in the sequence field) of V recombination site (V-genes only) + description: Start co-ordinate in the sequence field of the V recombination site (V-genes only). v_rs_end: type: integer - description: End co-ordinate (in the sequence field) of V recombination site (V-genes only) + description: End co-ordinate in the sequence field of the V recombination site (V-genes only). d_rs_3_prime_start: type: integer - description: Start co-ordinate (in the sequence field) of 3 prime D recombination site (D-genes only) + description: Start co-ordinate in the sequence field of the 3 prime D recombination site (D-genes only). d_rs_3_prime_end: type: integer - description: End co-ordinate (in the sequence field) of 3 prime D recombination site (D-genes only) + description: End co-ordinate in the sequence field of the 3 prime D recombination site (D-genes only). d_rs_5_prime_start: type: integer - description: Start co-ordinate (in the sequence field) of 5 prime D recombination site (D-genes only) + description: Start co-ordinate in the sequence field of the 5 prime D recombination site (D-genes only). d_rs_5_prime_end: type: integer - description: End co-ordinate (in the sequence field) of 5 prime D recombination site (D-genes only) + description: End co-ordinate in the sequence field of 5 the prime D recombination site (D-genes only). j_cdr3_end: type: integer - description: In the case of a J-gene, the co-ordinate (in the sequence field) of the first nucelotide of the conserved PHE or TRP (IMGT codon position 118) + description: > + In the case of a J-gene, the co-ordinate in the sequence field of the first nucelotide of the + conserved PHE or TRP (IMGT codon position 118). j_rs_start: type: integer - description: Start co-ordinate (in the sequence field) of J recombination site (J-genes only) + description: Start co-ordinate in the sequence field of J recombination site (J-genes only). j_rs_end: type: integer - description: End co-ordinate (in the sequence field) of J recombination site (J-genes only) + description: End co-ordinate in the sequence field of J recombination site (J-genes only). j_donor_splice: type: integer - description: Co-ordinate (in the sequence field) of the final 3' nucleotide of the J-REGION (J-genes only) + description: Co-ordinate in the sequence field of the final 3' nucleotide of the J-REGION (J-genes only). v_gene_delineations: type: array items: @@ -985,7 +1020,9 @@ AlleleDescription: description: Gene symbols of any paralogs curation: type: string - description: Curational notes on the AlleleDescription. This can be used to give more extensive notes on the decisions taken than are provided in the release_description. + description: > + Curational notes on the AlleleDescription. This can be used to give more extensive notes on the + decisions taken than are provided in the release_description. curational_tags: type: array items: @@ -997,9 +1034,10 @@ AlleleDescription: # Collection of gene descriptions into a germline set GermlineSet: - discriminator: AIRR - description: Details of a 'germline set' bringing together multiple AlleleDescriptions from the same strain or species. All genes in a GermlineSet should be from a single locus. type: object + description: > + A germline object set bringing together multiple AlleleDescriptions from the same strain or species. + All genes in a GermlineSet should be from a single locus. required: - germline_set_id - author @@ -1016,8 +1054,11 @@ GermlineSet: properties: germline_set_id: type: string - description: Unique identifier of the GermlineSet within this file, typically generated by the repository hosting the schema, for example from the underlying ID of the database record + description: > + Unique identifier of the GermlineSet within this file. Typically, generated by the + repository hosting the record. x-airr: + identifier: true miairr: important author: type: string @@ -1093,10 +1134,13 @@ GermlineSet: - inbred - outbred - locational + x-airr: + nullable: true locus: type: string enum: - IGH + - IGI - IGK - IGL - TRA @@ -1109,13 +1153,15 @@ GermlineSet: allele_descriptions: type: array items: - $ref: '#/AlleleDescription' + $ref: '#/AlleleDescription' description: list of allele_descriptions in the germline set x-airr: miairr: important curation: type: string - description: Curational notes on the GermlineSet. This can be used to give more extensive notes on the decisions taken than are provided in the release_description. + description: > + Curational notes on the GermlineSet. This can be used to give more extensive notes on the + decisions taken than are provided in the release_description. # # Genotype schema @@ -1124,15 +1170,17 @@ GermlineSet: # GenotypeSet lists the Genotypes (describing different loci) inferred for this subject GenotypeSet: - discriminator: AIRR type: object required: - receptor_genotype_set_id properties: receptor_genotype_set_id: type: string - description: A unique identifier for this Receptor Genotype Set, typically generated by the repository hosting the schema, for example from the underlying ID of the database record + description: > + A unique identifier for this Receptor Genotype Set, typically generated by the repository + hosting the schema, for example from the underlying ID of the database record. x-airr: + identifier: true miairr: important genotype_class_list: description: List of Genotypes included in this Receptor Genotype Set. @@ -1140,12 +1188,11 @@ GenotypeSet: items: $ref: '#/Genotype' -# This enumerates the alleles and gene deletions inferred in a single subject. Included alleles may either be listed by reference to a GermlineSet, or +# This enumerates the alleles and gene deletions inferred in a single subject. Included alleles may either be listed by reference to a GermlineSet, or # listed as 'undocumented', in which case the inferred sequence is provided # Genotype of adaptive immune receptors Genotype: - discriminator: AIRR type: object required: - receptor_genotype_id @@ -1153,19 +1200,24 @@ Genotype: properties: receptor_genotype_id: type: string - description: A unique identifier within the file for this Receptor Genotype, typically generated by the repository hosting the schema, for example from the underlying ID of the database record + description: > + A unique identifier within the file for this Receptor Genotype, typically generated by the + repository hosting the schema, for example from the underlying ID of the database record. x-airr: + identifier: true miairr: important locus: type: string enum: - IGH + - IGI - IGK - IGL - TRA - TRB - TRD - TRG + description: Gene locus example: IGH x-airr: miairr: essential @@ -1177,10 +1229,9 @@ Genotype: items: $ref: '#/DocumentedAllele' x-airr: - miairr: important + miairr: important undocumented_alleles: type: array - nullable: true description: List of alleles inferred to be present and not documented in an identified GermlineSet items: $ref: '#/UndocumentedAllele' @@ -1188,7 +1239,6 @@ Genotype: adc-query-support: true deleted_genes: type: array - nullable: true description: Array of genes identified as being deleted in this genotype items: $ref: '#/DeletedGene' @@ -1203,7 +1253,6 @@ Genotype: title: Genotype acquisition process example: repertoire_sequencing x-airr: - nullable: true adc-query-support: true format: controlled vocabulary @@ -1231,7 +1280,9 @@ DocumentedAllele: phasing: type: integer nullable: true - description: Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the same chromosome + description: > + Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the + same chromosome. # Undocumented Allele # This describes a 'undocumented' allele found in a genotype @@ -1257,8 +1308,10 @@ UndocumentedAllele: phasing: type: integer nullable: true - description: Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the same chromosome - + description: > + Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the + same chromosome. + # Deleted Gene # It is regarded as 'deleted' in the sense that it was not identified during inference of the genotype @@ -1282,12 +1335,13 @@ DeletedGene: phasing: type: integer nullable: true - description: Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the same chromosome + description: > + Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the + same chromosome. # List of MHCGenotypes describing a subject's genotype MHCGenotypeSet: - discriminator: AIRR type: object required: - mhc_genotype_set_id @@ -1297,6 +1351,7 @@ MHCGenotypeSet: type: string description: A unique identifier for this MHCGenotypeSet x-airr: + identifier: true miairr: important mhc_genotype_list: description: List of MHCGenotypes included in this set @@ -1308,7 +1363,6 @@ MHCGenotypeSet: # Genotype of major histocompatibility complex (MHC) class I, class II and non-classical loci MHCGenotype: - discriminator: AIRR type: object required: - mhc_genotype_id @@ -1319,14 +1373,15 @@ MHCGenotype: type: string description: A unique identifier for this MHCGenotype, assumed to be unique in the context of the study x-airr: + identifier: true miairr: important mhc_class: type: string - description: Class of MHC alleles described by the MHCGenotype enum: - - MHC-I - - MHC-II - - MHC-nonclassical + - MHC-I + - MHC-II + - MHC-nonclassical + description: Class of MHC alleles described by the MHCGenotype example: MHC-I x-airr: miairr: essential @@ -1391,7 +1446,6 @@ MHCAllele: # The overall study with a globally unique study_id Study: - discriminator: AIRR type: object required: - study_id @@ -1414,6 +1468,7 @@ Study: title: Study ID example: PRJNA001 x-airr: + identifier: true miairr: important nullable: true adc-query-support: true @@ -1579,10 +1634,10 @@ Study: description: Keywords describing properties of one or more data sets in a study title: Keywords for study example: - - contains_ig - - contains_schema_rearrangement - - contains_schema_clone - - contains_schema_cell + - contains_ig + - contains_schema_rearrangement + - contains_schema_clone + - contains_schema_cell x-airr: miairr: important nullable: true @@ -1617,7 +1672,6 @@ Study: # 1-to-n relationship between a study and its subjects # subject_id is unique within a study Subject: - discriminator: AIRR type: object required: - subject_id @@ -1643,6 +1697,7 @@ Subject: title: Subject ID example: SUB856413 x-airr: + identifier: true miairr: important nullable: true adc-query-support: true @@ -1872,7 +1927,6 @@ Subject: # 1-to-n relationship between a subject and its diagnoses Diagnosis: - discriminator: AIRR type: object required: - study_group_description @@ -1993,7 +2047,6 @@ Diagnosis: # 1-to-n relationship between a subject and its samples # sample_id is unique within a study Sample: - discriminator: AIRR type: object required: - sample_id @@ -2014,6 +2067,7 @@ Sample: title: Biological sample ID example: SUP52415 x-airr: + identifier: true miairr: important nullable: true adc-query-support: true @@ -2080,7 +2134,7 @@ Sample: type: number description: Time point at which sample was taken, relative to `Collection time event` title: Sample collection time - example: "14" + example: 14 x-airr: miairr: important nullable: true @@ -2135,7 +2189,6 @@ Sample: # 1-to-n relationship between a sample and processing of its cells CellProcessing: - discriminator: AIRR type: object required: - tissue_processing @@ -2306,7 +2359,6 @@ CellProcessing: # object for PCR primer targets PCRTarget: - discriminator: AIRR type: object required: - pcr_target_locus @@ -2366,7 +2418,6 @@ PCRTarget: # generally, a 1-to-1 relationship between a CellProcessing and processing of its nucleic acid # but may be 1-to-n for technical replicates. NucleicAcidProcessing: - discriminator: AIRR type: object required: - template_class @@ -2423,7 +2474,7 @@ NucleicAcidProcessing: template_amount_unit: $ref: '#/Ontology' description: Unit of template amount - title: Template amount time unit + title: Template amount time unit example: id: UO:0000024 label: nanogram @@ -2439,7 +2490,7 @@ NucleicAcidProcessing: draft: false top_node: id: UO:0000002 - label: physical quantity + label: physical quantity library_generation_method: type: string enum: @@ -2533,9 +2584,9 @@ NucleicAcidProcessing: type: string enum: - none - - "hetero_head-head" - - "hetero_tail-head" - - "hetero_prelinked" + - hetero_head-head + - hetero_tail-head + - hetero_prelinked description: > In case an experimental setup is used that physically links nucleic acids derived from distinct `Rearrangements` before library preparation, this field describes the mode of that linkage. All @@ -2558,7 +2609,6 @@ NucleicAcidProcessing: # 1-to-n relationship between a NucleicAcidProcessing and SequencingRun with resultant raw sequence file(s) SequencingRun: - discriminator: AIRR type: object required: - sequencing_run_id @@ -2574,6 +2624,7 @@ SequencingRun: title: Batch number example: 160101_M01234 x-airr: + identifier: true miairr: important nullable: true adc-query-support: true @@ -2650,7 +2701,6 @@ SequencingRun: # Resultant raw sequencing files from a SequencingRun SequencingData: - discriminator: AIRR type: object required: - sequencing_data_id @@ -2664,10 +2714,13 @@ SequencingData: properties: sequencing_data_id: type: string - description: Persistent identifier of raw data stored in an archive (e.g. INSDC run ID). Data archive should be identified in the CURIE prefix. + description: > + Persistent identifier of raw data stored in an archive (e.g. INSDC run ID). Data archive should + be identified in the CURIE prefix. title: Raw sequencing data persistent identifier example: "SRA:SRR11610494" x-airr: + identifier: true miairr: important nullable: true adc-query-support: true @@ -2793,7 +2846,6 @@ SequencingData: # Set of annotated rearrangement sequences produced by # data processing upon the raw sequence data for a repertoire. DataProcessing: - discriminator: AIRR type: object required: - software_versions @@ -2809,10 +2861,10 @@ DataProcessing: description: Identifier for the data processing object. title: Data processing ID x-airr: + identifier: true nullable: true name: Data processing ID adc-query-support: true - identifier: true primary_annotation: type: boolean default: false @@ -2939,7 +2991,6 @@ DataProcessing: adc-query-support: true SampleProcessing: - discriminator: AIRR allOf: - type: object properties: @@ -2951,10 +3002,10 @@ SampleProcessing: nucleic acid processing and sequencing run information for the repertoire. title: Sample processing ID x-airr: + identifier: true nullable: true name: Sample processing ID adc-query-support: true - identifier: true - $ref: '#/Sample' - $ref: '#/CellProcessing' - $ref: '#/NucleicAcidProcessing' @@ -2967,7 +3018,6 @@ SampleProcessing: # and experimentally observed by raw sequence data. A repertoire # can only be for one subject but may include multiple samples. Repertoire: - discriminator: AIRR type: object required: - study @@ -3034,7 +3084,6 @@ Repertoire: # A collection of repertoires for analysis purposes, includes optional time course RepertoireGroup: - discriminator: AIRR type: object required: - repertoire_group_id @@ -3043,6 +3092,8 @@ RepertoireGroup: repertoire_group_id: type: string description: Identifier for this repertoire collection + x-airr: + identifier: true repertoire_group_name: type: string description: Short display name for this repertoire collection @@ -3077,7 +3128,6 @@ RepertoireGroup: Alignment: - discriminator: AIRR type: object required: - sequence_id @@ -3092,6 +3142,8 @@ Alignment: Unique query sequence identifier within the file. Most often this will be the input sequence header or a substring thereof, but may also be a custom identifier defined by the tool in cases where query sequences have been combined in some fashion prior to alignment. + x-airr: + identifier: true segment: type: string description: > @@ -3169,7 +3221,6 @@ Alignment: # The extended rearrangement object Rearrangement: - discriminator: AIRR type: object required: - sequence_id @@ -3976,7 +4027,6 @@ Rearrangement: # A unique inferred clone object that has been constructed within a single data processing # for a single repertoire and a subset of its sequences and/or rearrangements. Clone: - discriminator: AIRR type: object required: - clone_id @@ -3985,6 +4035,8 @@ Clone: clone_id: type: string description: Identifier for the clone. + x-airr: + identifier: true repertoire_id: type: string description: Identifier to the associated repertoire in study metadata. @@ -4097,7 +4149,6 @@ Clone: # 1-to-n relationship for a clone to its trees. Tree: - discriminator: AIRR type: object required: - tree_id @@ -4107,6 +4158,8 @@ Tree: tree_id: type: string description: Identifier for the tree. + x-airr: + identifier: true clone_id: type: string description: Identifier for the clone. @@ -4121,7 +4174,6 @@ Tree: # 1-to-n relationship between a tree and its nodes Node: - discriminator: AIRR type: object required: - sequence_id @@ -4131,6 +4183,8 @@ Node: description: > Identifier for this node that matches the identifier in the newick string and, where possible, the sequence_id in the source repertoire. + x-airr: + identifier: true sequence_alignment: type: string description: > @@ -4149,7 +4203,6 @@ Node: # The cell object acts as point of reference for all data that can be related # to an individual cell, either by direct observation or inference. Cell: - discriminator: AIRR type: object required: - cell_id @@ -4164,6 +4217,7 @@ Cell: title: Cell index example: W06_046_091 x-airr: + identifier: true miairr: defined nullable: false adc-query-support: true @@ -4218,7 +4272,8 @@ Cell: - "flow cytometry" - "single-cell transcriptome" description: > - keyword describing the methodology used to assess expression. This values for this field MUST come from a controlled vocabulary + Keyword describing the methodology used to assess expression. This values for this field MUST + come from a controlled vocabulary. x-airr: miairr: defined nullable: true @@ -4251,15 +4306,14 @@ Cell: name: Virtual pairing # The CellExpression object acts as a container to hold a single expression level measurement from -# an experiment. Expression data is associated with a cell_id and the related repertoire_id and -# data_processing_id as cell_id is not guaranteed to be unique outside the data processing for +# an experiment. Expression data is associated with a cell_id and the related repertoire_id and +# data_processing_id as cell_id is not guaranteed to be unique outside the data processing for # a single repertoire. CellExpression: - discriminator: AIRR type: object required: - expression_id - - reperotire_id + - repertoire_id - data_processing_id - cell_id - property @@ -4271,6 +4325,7 @@ CellExpression: Identifier of this expression property measurement. title: Expression property measurement identifier x-airr: + identifier: true miairr: defined nullable: false adc-query-support: true @@ -4306,9 +4361,11 @@ CellExpression: name: Data processing for cell property: $ref: '#/Ontology' - description: Name of the property observed, typically a gene or antibody idenifier (and its label) from a canonical resource such as Ensembl (e.g. ENSG00000275747, IGHV3-79) or Antibody Registry (ABREG:1236456, Purified anti-mouse/rat/human CD27 antibody). title: Property information - nullable: true + description: > + Name of the property observed, typically a gene or antibody idenifier (and its label) from a + canonical resource such as Ensembl (e.g. ENSG00000275747, IGHV3-79) or + Antibody Registry (ABREG:1236456, Purified anti-mouse/rat/human CD27 antibody). example: id: ENSG:ENSG00000275747 label: IGHV3-79 @@ -4332,7 +4389,6 @@ CellExpression: # The Receptor object hold information about a receptor and its reactivity. # Receptor: - discriminator: AIRR type: object required: - receptor_id @@ -4349,6 +4405,7 @@ Receptor: title: Receptor ID example: TCR-MM-012345 x-airr: + identifier: true nullable: false adc-query-support: true receptor_hash: @@ -4406,6 +4463,7 @@ Receptor: receptor_variable_domain_2_locus: type: string enum: + - IGI - IGK - IGL - TRA @@ -4429,157 +4487,169 @@ Receptor: type: array description: Records of reactivity measurement items: - type: object - properties: - ligand_type: - type: string - enum: - - MHC:peptide - - MHC:non-peptide - - protein - - peptide - - non-peptidic - description: Classification of ligand binding to receptor - example: non-peptide - x-airr: - nullable: false - antigen_type: - type: string - enum: - - protein - - peptide - - non-peptidic - description: > - The type of antigen before processing by the immune system. - example: protein - x-airr: - nullable: false - antigen: - $ref: '#/Ontology' - description: > - The substance against which the receptor was tested. This can be any substance that - stimulates an adaptive immune response in the host, either through antibody production - or by T cell activation after presentation via an MHC molecule. - title: Antigen - example: - id: UNIPROT:P19597 - label: Circumsporozoite protein - x-airr: - nullable: false - adc-query-support: true - format: ontology - antigen_source_species: - $ref: '#/Ontology' - description: The species from which the antigen was isolated - title: Source species of antigen - example: - id: NCBITAXON:5843 - label: Plasmodium falciparum NF54 - x-airr: - nullable: true - format: ontology - ontology: - draft: true - top_node: - id: NCBITAXON:1 - label: root - peptide_start: - type: integer - description: Start position of the peptide within the reference protein sequence - x-airr: - nullable: true - peptide_end: - type: integer - description: End position of the peptide within the reference protein sequence - x-airr: - nullable: true - mhc_class: - type: string - enum: - - MHC-I - - MHC-II - - MHC-nonclassical - description: Class of MHC molecule, only present for MHC:x ligand types - example: MHC-II - x-airr: - nullable: true - mhc_gene_1: - $ref: '#/Ontology' - description: The MHC gene to which the mhc_allele_1 belongs - title: MHC gene 1 - example: - id: MRO:0000055 - label: HLA-DRA - x-airr: - nullable: true - format: ontology - ontology: - draft: true - top_node: - id: MRO:0000004 - label: MHC gene - mhc_allele_1: - type: string - description: Allele designation of the MHC alpha chain - example: HLA-DRA - x-airr: - nullable: true - mhc_gene_2: - $ref: '#/Ontology' - description: The MHC gene to which the mhc_allele_2 belongs - title: MHC gene 2 - example: - id: MRO:0000057 - label: HLA-DRB1 - x-airr: - nullable: true - format: ontology - ontology: - draft: true - top_node: - id: MRO:0000004 - mhc_allele_2: - type: string - description: > - Allele designation of the MHC class II beta chain or the invariant beta2-microglobin chain - example: HLA-DRB1*04:01 - x-airr: - nullable: true - reactivity_method: - type: string - enum: - - SPR - - ITC - - ELISA - - cytometry - - biological activity - description: The methodology used to assess expression (assay implemented in experiment) - x-airr: - nullable: false - reactivity_readout: - type: string - enum: - - binding strength - - cytokine release - - dissociation constant KD - - on rate - - off rate - - pathogen inhibition - description: Reactivity measurement read-out - example: cytokine release - x-airr: - nullable: false - reactivity_value: - type: number - description: The absolute (processed) value of the measurement - example: 162.26 - x-airr: - nullable: false - reactivity_unit: - type: string - description: The unit of the measurement - example: pg/ml - x-airr: - nullable: false + $ref: '#/ReceptorReactivity' x-airr: nullable: true + + +ReceptorReactivity: + type: object + required: + - ligand_type + - antigen_type + - antigen + - reactivity_method + - reactivity_readout + - reactivity_value + - reactivity_unit + properties: + ligand_type: + type: string + enum: + - MHC:peptide + - MHC:non-peptide + - protein + - peptide + - non-peptidic + description: Classification of ligand binding to receptor + example: non-peptide + x-airr: + nullable: false + antigen_type: + type: string + enum: + - protein + - peptide + - non-peptidic + description: > + The type of antigen before processing by the immune system. + example: protein + x-airr: + nullable: false + antigen: + $ref: '#/Ontology' + description: > + The substance against which the receptor was tested. This can be any substance that + stimulates an adaptive immune response in the host, either through antibody production + or by T cell activation after presentation via an MHC molecule. + title: Antigen + example: + id: UNIPROT:P19597 + label: Circumsporozoite protein + x-airr: + nullable: false + adc-query-support: true + format: ontology + antigen_source_species: + $ref: '#/Ontology' + description: The species from which the antigen was isolated + title: Source species of antigen + example: + id: NCBITAXON:5843 + label: Plasmodium falciparum NF54 + x-airr: + nullable: true + format: ontology + ontology: + draft: true + top_node: + id: NCBITAXON:1 + label: root + peptide_start: + type: integer + description: Start position of the peptide within the reference protein sequence + x-airr: + nullable: true + peptide_end: + type: integer + description: End position of the peptide within the reference protein sequence + x-airr: + nullable: true + mhc_class: + type: string + enum: + - MHC-I + - MHC-II + - MHC-nonclassical + description: Class of MHC molecule, only present for MHC:x ligand types + example: MHC-II + x-airr: + nullable: true + mhc_gene_1: + $ref: '#/Ontology' + description: The MHC gene to which the mhc_allele_1 belongs + title: MHC gene 1 + example: + id: MRO:0000055 + label: HLA-DRA + x-airr: + nullable: true + format: ontology + ontology: + draft: true + top_node: + id: MRO:0000004 + label: MHC gene + mhc_allele_1: + type: string + description: Allele designation of the MHC alpha chain + example: HLA-DRA + x-airr: + nullable: true + mhc_gene_2: + $ref: '#/Ontology' + description: The MHC gene to which the mhc_allele_2 belongs + title: MHC gene 2 + example: + id: MRO:0000057 + label: HLA-DRB1 + x-airr: + nullable: true + format: ontology + ontology: + draft: true + top_node: + id: MRO:0000004 + mhc_allele_2: + type: string + description: > + Allele designation of the MHC class II beta chain or the invariant beta2-microglobin chain + example: HLA-DRB1*04:01 + x-airr: + nullable: true + reactivity_method: + type: string + enum: + - SPR + - ITC + - ELISA + - cytometry + - biological activity + description: The methodology used to assess expression (assay implemented in experiment) + x-airr: + nullable: false + reactivity_readout: + type: string + enum: + - binding strength + - cytokine release + - dissociation constant KD + - on rate + - off rate + - pathogen inhibition + description: Reactivity measurement read-out + example: cytokine release + x-airr: + nullable: false + reactivity_value: + type: number + description: The absolute (processed) value of the measurement + example: 162.26 + x-airr: + nullable: false + reactivity_unit: + type: string + description: The unit of the measurement + example: pg/ml + x-airr: + nullable: false diff --git a/specs/airr-schema-openapi3.yaml b/specs/airr-schema-openapi3.yaml index 7d4489b45..46a4e3de3 100644 --- a/specs/airr-schema-openapi3.yaml +++ b/specs/airr-schema-openapi3.yaml @@ -4,7 +4,7 @@ Info: title: AIRR Schema description: Schema definitions for AIRR standards objects - version: "1.4" + version: 1.4 contact: name: AIRR Community url: https://github.com/airr-community @@ -16,8 +16,6 @@ Info: # Properties that are based upon an ontology use this # standard schema definition Ontology: - discriminator: - propertyName: AIRR type: object properties: id: @@ -41,11 +39,11 @@ CURIEMap: CHEBI: type: ontology default: - map: OBO - provider: OLS + map: OBO + provider: OLS map: - OBO: - iri_prefix: "http://purl.obolibrary.org/obo/CHEBI_" + OBO: + iri_prefix: "http://purl.obolibrary.org/obo/CHEBI_" CL: type: ontology default: @@ -248,8 +246,6 @@ InformationProvider: # attributes are attached to an AIRR field with the x-airr property. Attributes: - discriminator: - propertyName: AIRR type: object properties: miairr: @@ -336,8 +332,6 @@ Attributes: # the value should not be null. DataFile: - discriminator: - propertyName: AIRR type: object properties: Info: @@ -348,13 +342,13 @@ DataFile: nullable: false description: List of repertoires items: - $ref: '#/Repertoire' + $ref: '#/Repertoire' RepertoireGroup: type: array nullable: false description: List of repertoire collections items: - $ref: '#/RepertoireGroup' + $ref: '#/RepertoireGroup' Rearrangement: type: array nullable: false @@ -366,13 +360,13 @@ DataFile: nullable: false description: List of cells items: - $ref: '#/Cell' + $ref: '#/Cell' Clone: type: array nullable: false description: List of clones items: - $ref: '#/Clone' + $ref: '#/Clone' GermlineSet: type: array nullable: false @@ -389,53 +383,49 @@ DataFile: # AIRR Info object, should be similar to openapi # should we point to an openapi schema? InfoObject: - discriminator: - propertyName: AIRR type: object description: Provides information about data and API responses. required: - title - version properties: - title: - type: string - nullable: false - version: - type: string - nullable: false - description: - type: string - nullable: true - contact: - type: object - nullable: true - properties: - name: - type: string - nullable: true - url: - type: string - nullable: true - email: + title: type: string - nullable: true - license: - type: object - nullable: true - required: - - name - properties: - name: + nullable: false + version: type: string nullable: false - url: + description: type: string nullable: true + contact: + type: object + nullable: true + properties: + name: + type: string + nullable: true + url: + type: string + nullable: true + email: + type: string + nullable: true + license: + type: object + nullable: true + required: + - name + properties: + name: + type: string + nullable: false + url: + type: string + nullable: true # A time point TimePoint: - discriminator: - propertyName: AIRR description: Time point at which an observation or other action was performed. type: object properties: @@ -476,8 +466,6 @@ TimePoint: # An individual Acknowledgement: - discriminator: - propertyName: AIRR description: Individual whose contribution to this work should be acknowledged type: object required: @@ -489,6 +477,7 @@ Acknowledgement: type: string description: unique identifier of this Acknowledgement within the file x-airr: + identifier: true miairr: important nullable: true name: @@ -510,10 +499,10 @@ Acknowledgement: # Rearranged and genomic germline sequences RearrangedSequence: - discriminator: - propertyName: AIRR - description: Details of a directly observed rearranged sequence or an inference from rearranged sequences contributing support for a gene or allele type: object + description: > + Details of a directly observed rearranged sequence or an inference from rearranged sequences + contributing support for a gene or allele. required: - sequence_id - sequence @@ -527,73 +516,76 @@ RearrangedSequence: properties: sequence_id: type: string + nullable: true + description: > + Unique identifier of this RearrangedSequence within the file, typically generated by the repository + hosting the schema, for example from the underlying ID of the database record. x-airr: + identifier: true miairr: important - description: Unique identifier of this RearrangedSequence within the file, typically generated by the repository hosting the schema, for example from the underlying ID of the database record - nullable: true sequence: type: string + nullable: true x-airr: miairr: essential description: nucleotide sequence - nullable: true derivation: type: string + nullable: true enum: - DNA - RNA description: The class of nucleic acid that was used as primary starting material x-airr: miairr: important - nullable: true observation_type: type: string - x-airr: - miairr: essential - description: The type of observation from which this sequence was drawn, e.g. direct sequencing, inference from repertoire + nullable: true enum: - direct sequencing - inference from repertoire - nullable: true + description: > + The type of observation from which this sequence was drawn, such as direct sequencing or + inference from repertoire sequencing data. + x-airr: + miairr: essential + curation: type: string nullable: true description: Curational notes on the sequence - nullable: true repository_name: type: string + nullable: true x-airr: miairr: defined description: Name of the repository in which the sequence has been deposited - nullable: true repository_ref: type: string + nullable: true x-airr: miairr: defined description: Queryable id or accession number of the sequence published by the repository - nullable: true deposited_version: type: string + nullable: true x-airr: miairr: defined description: Version number of the sequence within the repository - nullable: true sequence_start: type: integer + nullable: true x-airr: miairr: essential description: Start co-ordinate of the sequence detailed in this record, within the sequence deposited - nullable: true sequence_end: type: integer + nullable: true x-airr: miairr: essential description: End co-ordinate of the sequence detailed in this record, within the sequence deposited - nullable: true UnrearrangedSequence: - discriminator: - propertyName: AIRR description: Details of an unrearranged sequence contributing support for a gene or allele type: object required: @@ -608,33 +600,34 @@ UnrearrangedSequence: properties: sequence_id: type: string + nullable: true x-airr: + identifier: true miairr: important description: unique identifier of this UnrearrangedSequence within the file - nullable: true sequence: type: string + nullable: true + description: > + Sequence of interest described in this record. Typically, this will include gene and promoter region. x-airr: miairr: essential - description: Sequence of interest described in this record (typically this will include gene and promoter region) - nullable: true curation: type: string nullable: true description: Curational notes on the sequence - nullable: true repository_name: type: string + nullable: true x-airr: miairr: defined description: Name of the repository in which the assembly or contig is deposited - nullable: true repository_ref: type: string + nullable: true x-airr: miairr: defined description: Queryable id or accession number of the sequence published by the repository - nullable: true patch_no: type: string nullable: true @@ -642,15 +635,20 @@ UnrearrangedSequence: gff_seqid: type: string nullable: true - description: Sequence (from the assembly) of a window including the gene and preferably also the promoter region + description: > + Sequence (from the assembly) of a window including the gene and preferably also the promoter region. gff_start: type: integer nullable: true - description: Genomic co-ordinates of the start of the sequence of interest described in this record, in Ensemble GFF version 3 + description: > + Genomic co-ordinates of the start of the sequence of interest described in this record in + Ensemble GFF version 3. gff_end: type: integer nullable: true - description: Genomic co-ordinates of the end of the sequence of interest described in this record, in Ensemble GFF version 3 + description: > + Genomic co-ordinates of the end of the sequence of interest described in this record in + Ensemble GFF version 3. strand: type: string nullable: true @@ -661,8 +659,6 @@ UnrearrangedSequence: # V gene delineation SequenceDelineationV: - discriminator: - propertyName: AIRR description: Delineation of a V-gene in a particular system type: object required: @@ -682,104 +678,110 @@ SequenceDelineationV: properties: sequence_delineation_id: type: string + nullable: true + description: > + Unique identifier of this SequenceDelineationV within the file. Typically, generated by the + repository hosting the record. x-airr: + identifier: true miairr: important - description: Unique identifier of this SequenceDelineationV within the file, typically generated by the repository hosting the schema, for example from the underlying ID of the database record - nullable: true + delineation_scheme: type: string + nullable: true x-airr: miairr: important description: Name of the delineation scheme example: Chothia - nullable: true unaligned_sequence: type: string + nullable: true x-airr: miairr: important description: entire V-sequence covered by this delineation - nullable: true aligned_sequence: type: string - description: aligned sequence, if this delineation provides an alignment (an aligned sequence should always be provided for IMGT delineations) nullable: true + description: > + Aligned sequence if this delineation provides an alignment. An aligned sequence should always be + provided for IMGT delineations. fwr1_start: type: integer + nullable: true x-airr: miairr: important description: FWR1 start co-ordinate in the 'unaligned sequence' field - nullable: true fwr1_end: type: integer + nullable: true x-airr: miairr: important description: FWR1 end co-ordinate in the 'unaligned sequence' field - nullable: true cdr1_start: type: integer + nullable: true x-airr: miairr: important description: CDR1 start co-ordinate in the 'unaligned sequence' field - nullable: true cdr1_end: type: integer + nullable: true x-airr: miairr: important description: CDR1 end co-ordinate in the 'unaligned sequence' field - nullable: true fwr2_start: type: integer + nullable: true x-airr: miairr: important description: FWR2 start co-ordinate in the 'unaligned sequence' field - nullable: true fwr2_end: type: integer + nullable: true x-airr: miairr: important description: FWR2 end co-ordinate in the 'unaligned sequence' field - nullable: true cdr2_start: type: integer + nullable: true x-airr: miairr: important description: CDR2 start co-ordinate in the 'unaligned sequence' field - nullable: true cdr2_end: type: integer + nullable: true x-airr: miairr: important description: CDR2 end co-ordinate in the 'unaligned sequence' field - nullable: true fwr3_start: type: integer + nullable: true x-airr: miairr: important description: FWR3 start co-ordinate in the 'unaligned sequence' field - nullable: true fwr3_end: type: integer + nullable: true x-airr: miairr: important description: FWR3 end co-ordinate in the 'unaligned sequence' field - nullable: true cdr3_start: type: integer + nullable: true x-airr: miairr: important description: CDR3 start co-ordinate in the 'unaligned sequence' field - nullable: true alignment_labels: type: array nullable: true items: - type: string - description: one string for each codon in the aligned_sequence indicating the label of that codon according to the numbering of the delineation scheme, if it provides one + type: string + description: > + One string for each codon in the aligned_sequence indicating the label of that codon according to + the numbering of the delineation scheme if it provides one. # Description of a putative or confirmed Ig receptor gene/allele AlleleDescription: - discriminator: - propertyName: AIRR description: Details of a putative or confirmed Ig receptor gene/allele inferred from one or more observations type: object required: @@ -799,23 +801,26 @@ AlleleDescription: properties: allele_description_id: type: string + nullable: true x-airr: + identifier: true miairr: important - description: Unique identifier of this AlleleDescription within the file, typically generated by the repository hosting the schema, for example from the underlying ID of the database record - nullable: true + description: > + Unique identifier of this AlleleDescription within the file. Typically, generated by the + repository hosting the record. allele_description_ref: type: string + nullable: true x-airr: miairr: important description: Unique reference to the allele description, in standardized form (Repo:Label:Version) example: OGRDB:Human_IGH:IGHV1-69*01.001 - nullable: true maintainer: type: string + nullable: true x-airr: miairr: defined description: Maintainer of this sequence record - nullable: true acknowledgements: type: array nullable: true @@ -824,50 +829,54 @@ AlleleDescription: $ref: '#/Acknowledgement' lab_address: type: string + nullable: true x-airr: miairr: defined description: Institution and full address of corresponding author - nullable: true release_version: type: integer + nullable: true x-airr: miairr: important description: Version number of this record, updated whenever a revised version is published or released - nullable: true release_date: type: string + nullable: true format: date-time x-airr: miairr: important description: Date of this release title: Release Date example: "2021-02-02" - nullable: true release_description: type: string + nullable: true x-airr: miairr: important description: Brief descriptive notes of the reason for this release and the changes embodied - nullable: true label: type: string + nullable: true x-airr: miairr: important description: The accepted name for this gene or allele example: IGHV1-69*01 - nullable: true sequence: type: string + nullable: true x-airr: miairr: essential - description: nt sequence of the gene. This should cover the full length that is available, including where possible RSS, and 5' UTR and lead-in for V-gene sequences - nullable: true + description: > + Nucleotide sequence of the gene. This should cover the full length that is available, + including where possible RSS, and 5' UTR and lead-in for V-gene sequences. coding_sequence: type: string + nullable: true x-airr: miairr: important - description: nucleotide sequence of the core coding region, i.e. the coding region of a D-, J- or C- gene, or the coding region of a V-gene excluding the leader - nullable: true + description: > + Nucleotide sequence of the core coding region, such as the coding region of a D-, J- or C- gene + or the coding region of a V-gene excluding the leader. aliases: type: array nullable: true @@ -876,10 +885,10 @@ AlleleDescription: description: Alternative names for this sequence locus: type: string - x-airr: - miairr: essential + nullable: true enum: - IGH + - IGI - IGK - IGL - TRA @@ -887,48 +896,49 @@ AlleleDescription: - TRG - TRD description: Gene locus - nullable: true + x-airr: + miairr: essential chromosome: type: integer nullable: true - description: chromosome on which the gene is located + description: chromosome on which the gene is located sequence_type: type: string - x-airr: - miairr: essential + nullable: true enum: - V - D - J - C description: Sequence type (V, D, J, C) - nullable: true + x-airr: + miairr: essential functional: type: boolean + nullable: true x-airr: miairr: important description: True if the gene is functional, false if it is a pseudogene - nullable: true inference_type: type: string - x-airr: - miairr: important + nullable: true enum: - Genomic and rearranged - Genomic only - Rearranged only description: Type of inference(s) from which this gene sequence was inferred - nullable: true + x-airr: + miairr: important species: $ref: '#/Ontology' - x-airr: - miairr: essential + nullable: true description: Binomial designation of subject's species title: Organism example: id: NCBITAXON:9606 label: Homo sapiens - nullable: true + x-airr: + miairr: essential species_subgroup: type: string nullable: true @@ -979,84 +989,90 @@ AlleleDescription: - 1 - 2 - 3 - description: Codon position of the first nucleotide in the 'coding_sequence' field. Mandatory for J genes. Not used for V or D genes. ('1' means the sequence is in-frame, '2' means that the first bp is missing from the first codon, '3' means that the first 2 bp are missing) + description: > + Codon position of the first nucleotide in the 'coding_sequence' field. Mandatory for J genes. + Not used for V or D genes. '1' means the sequence is in-frame, '2' means that the first bp is + missing from the first codon, and '3' means that the first 2 bp are missing. gene_start: type: integer - description: Co-ordinate (in the sequence field) of the first nucleotide in the coding_sequence field + nullable: true + description: > + Co-ordinate in the sequence field of the first nucleotide in the coding_sequence field. x-airr: miairr: important - nullable: true gene_end: type: integer nullable: true - description: Co-ordinate (in the sequence field) of the last gene-coding nucleotide in the coding_sequence field + description: > + Co-ordinate in the sequence field of the last gene-coding nucleotide in the coding_sequence field. x-airr: miairr: important - nullable: true utr_5_prime_start: type: integer nullable: true - description: Start co-ordinate (in the sequence field) of 5 prime UTR (V-genes only) + description: Start co-ordinate in the sequence field of the 5 prime UTR (V-genes only). utr_5_prime_end: type: integer nullable: true - description: End co-ordinate (in the sequence field) of 5 prime UTR (V-genes only) + description: End co-ordinate in the sequence field of the 5 prime UTR (V-genes only). leader_1_start: type: integer nullable: true - description: Start co-ordinate (in the sequence field) of L-PART1 (V-genes only) + description: Start co-ordinate in the sequence field of L-PART1 (V-genes only). leader_1_end: type: integer nullable: true - description: End co-ordinate (in the sequence field) of L-PART1 (V-genes only) + description: End co-ordinate in the sequence field of L-PART1 (V-genes only). leader_2_start: type: integer nullable: true - description: Start co-ordinate (in the sequence field) of L-PART2 (V-genes only) + description: Start co-ordinate in the sequence field of L-PART2 (V-genes only). leader_2_end: type: integer nullable: true - description: End co-ordinate (in the sequence field) of L-PART2 (V-genes only) + description: End co-ordinate in the sequence field of L-PART2 (V-genes only). v_rs_start: type: integer nullable: true - description: Start co-ordinate (in the sequence field) of V recombination site (V-genes only) + description: Start co-ordinate in the sequence field of the V recombination site (V-genes only). v_rs_end: type: integer nullable: true - description: End co-ordinate (in the sequence field) of V recombination site (V-genes only) + description: End co-ordinate in the sequence field of the V recombination site (V-genes only). d_rs_3_prime_start: type: integer nullable: true - description: Start co-ordinate (in the sequence field) of 3 prime D recombination site (D-genes only) + description: Start co-ordinate in the sequence field of the 3 prime D recombination site (D-genes only). d_rs_3_prime_end: type: integer nullable: true - description: End co-ordinate (in the sequence field) of 3 prime D recombination site (D-genes only) + description: End co-ordinate in the sequence field of the 3 prime D recombination site (D-genes only). d_rs_5_prime_start: type: integer nullable: true - description: Start co-ordinate (in the sequence field) of 5 prime D recombination site (D-genes only) + description: Start co-ordinate in the sequence field of the 5 prime D recombination site (D-genes only). d_rs_5_prime_end: type: integer nullable: true - description: End co-ordinate (in the sequence field) of 5 prime D recombination site (D-genes only) + description: End co-ordinate in the sequence field of 5 the prime D recombination site (D-genes only). j_cdr3_end: type: integer nullable: true - description: In the case of a J-gene, the co-ordinate (in the sequence field) of the first nucelotide of the conserved PHE or TRP (IMGT codon position 118) + description: > + In the case of a J-gene, the co-ordinate in the sequence field of the first nucelotide of the + conserved PHE or TRP (IMGT codon position 118). j_rs_start: type: integer nullable: true - description: Start co-ordinate (in the sequence field) of J recombination site (J-genes only) + description: Start co-ordinate in the sequence field of J recombination site (J-genes only). j_rs_end: type: integer nullable: true - description: End co-ordinate (in the sequence field) of J recombination site (J-genes only) + description: End co-ordinate in the sequence field of J recombination site (J-genes only). j_donor_splice: type: integer nullable: true - description: Co-ordinate (in the sequence field) of the final 3' nucleotide of the J-REGION (J-genes only) + description: Co-ordinate in the sequence field of the final 3' nucleotide of the J-REGION (J-genes only). v_gene_delineations: type: array nullable: true @@ -1081,7 +1097,9 @@ AlleleDescription: curation: type: string nullable: true - description: Curational notes on the AlleleDescription. This can be used to give more extensive notes on the decisions taken than are provided in the release_description. + description: > + Curational notes on the AlleleDescription. This can be used to give more extensive notes on the + decisions taken than are provided in the release_description. curational_tags: type: array nullable: true @@ -1094,10 +1112,10 @@ AlleleDescription: # Collection of gene descriptions into a germline set GermlineSet: - discriminator: - propertyName: AIRR - description: Details of a 'germline set' bringing together multiple AlleleDescriptions from the same strain or species. All genes in a GermlineSet should be from a single locus. type: object + description: > + A germline object set bringing together multiple AlleleDescriptions from the same strain or species. + All genes in a GermlineSet should be from a single locus. required: - germline_set_id - author @@ -1114,28 +1132,31 @@ GermlineSet: properties: germline_set_id: type: string + nullable: true + description: > + Unique identifier of the GermlineSet within this file. Typically, generated by the + repository hosting the record. x-airr: + identifier: true miairr: important - description: Unique identifier of the GermlineSet within this file, typically generated by the repository hosting the schema, for example from the underlying ID of the database record - nullable: true author: type: string + nullable: true x-airr: miairr: important description: Corresponding author - nullable: true lab_name: type: string + nullable: true x-airr: miairr: important description: Department of corresponding author - nullable: true lab_address: type: string + nullable: true x-airr: miairr: important description: Institutional address of corresponding author - nullable: true acknowledgements: type: array nullable: true @@ -1144,38 +1165,38 @@ GermlineSet: $ref: '#/Acknowledgement' release_version: type: number + nullable: true x-airr: miairr: important description: Version number of this record, allocated automatically - nullable: true release_description: type: string + nullable: true x-airr: miairr: important description: Brief descriptive notes of the reason for this release and the changes embodied - nullable: true release_date: type: string + nullable: true format: date-time x-airr: miairr: important description: Date of this release title: Release Date example: "2021-02-02" - nullable: true germline_set_name: type: string + nullable: true x-airr: miairr: important description: descriptive name of this germline set - nullable: true germline_set_ref: type: string + nullable: true x-airr: miairr: important description: Unique identifier of the germline set and version, in standardized form (Repo:Label:Version) example: OGRDB:Human_IGH:2021.11 - nullable: true pub_ids: type: string nullable: true @@ -1183,6 +1204,7 @@ GermlineSet: example: "PMID:85642,PMID:12345" species: $ref: '#/Ontology' + nullable: true x-airr: miairr: essential description: Binomial designation of subject's species @@ -1190,12 +1212,11 @@ GermlineSet: example: id: NCBITAXON:9606 label: Homo sapiens - nullable: true species_subgroup: type: string + nullable: true description: Race, strain or other species subgroup to which this subject belongs example: BALB/c - nullable: true species_subgroup_type: type: string nullable: true @@ -1207,10 +1228,10 @@ GermlineSet: - locational locus: type: string - x-airr: - miairr: essential + nullable: true enum: - IGH + - IGI - IGK - IGL - TRA @@ -1218,19 +1239,22 @@ GermlineSet: - TRG - TRD description: Gene locus - nullable: true + x-airr: + miairr: essential allele_descriptions: type: array - x-airr: - miairr: important + nullable: true items: - $ref: '#/AlleleDescription' + $ref: '#/AlleleDescription' description: list of allele_descriptions in the germline set - nullable: true + x-airr: + miairr: important curation: type: string nullable: true - description: Curational notes on the GermlineSet. This can be used to give more extensive notes on the decisions taken than are provided in the release_description. + description: > + Curational notes on the GermlineSet. This can be used to give more extensive notes on the + decisions taken than are provided in the release_description. # # Genotype schema @@ -1239,18 +1263,19 @@ GermlineSet: # GenotypeSet lists the Genotypes (describing different loci) inferred for this subject GenotypeSet: - discriminator: - propertyName: AIRR type: object required: - receptor_genotype_set_id properties: receptor_genotype_set_id: type: string + nullable: true x-airr: + identifier: true miairr: important - description: A unique identifier for this Receptor Genotype Set, typically generated by the repository hosting the schema, for example from the underlying ID of the database record - nullable: true + description: > + A unique identifier for this Receptor Genotype Set, typically generated by the repository + hosting the schema, for example from the underlying ID of the database record. genotype_class_list: description: List of Genotypes included in this Receptor Genotype Set. type: array @@ -1259,13 +1284,11 @@ GenotypeSet: $ref: '#/Genotype' # Genotype of adaptive immune receptors -# This enumerates the alleles and gene deletions inferred in a single subject. -# Included alleles may either be listed by reference to a GermlineSet, or +# This enumerates the alleles and gene deletions inferred in a single subject. +# Included alleles may either be listed by reference to a GermlineSet, or # listed as 'undocumented', in which case the inferred sequence is provided Genotype: - discriminator: - propertyName: AIRR type: object required: - receptor_genotype_id @@ -1273,34 +1296,39 @@ Genotype: properties: receptor_genotype_id: type: string + nullable: true x-airr: + identifier: true miairr: important - description: A unique identifier within the file for this Receptor Genotype, typically generated by the repository hosting the schema, for example from the underlying ID of the database record - nullable: true + description: > + A unique identifier within the file for this Receptor Genotype, typically generated by the + repository hosting the schema, for example from the underlying ID of the database record. locus: type: string + nullable: true enum: - IGH + - IGI - IGK - IGL - TRA - TRB - TRD - TRG + description: Gene locus example: IGH x-airr: adc-query-support: true format: controlled vocabulary miairr: essential - nullable: true documented_alleles: type: array + nullable: true description: List of alleles documented in reference set(s) items: $ref: '#/DocumentedAllele' x-airr: - miairr: important - nullable: true + miairr: important undocumented_alleles: type: array nullable: true @@ -1309,7 +1337,6 @@ Genotype: $ref: '#/UndocumentedAllele' x-airr: adc-query-support: true - nullable: true deleted_genes: type: array nullable: true @@ -1318,7 +1345,6 @@ Genotype: $ref: '#/DeletedGene' x-airr: adc-query-support: true - nullable: true inference_process: type: string nullable: true @@ -1345,21 +1371,23 @@ DocumentedAllele: properties: label: type: string + nullable: true x-airr: miairr: important description: The accepted name for this allele, taken from the GermlineSet - nullable: true germline_set_ref: type: string + nullable: true x-airr: miairr: important description: GermlineSet from which it was taken, referenced in standardized form (Repo:Label:Version) example: OGRDB:Human_IGH:2021.11 - nullable: true phasing: type: integer nullable: true - description: Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the same chromosome + description: > + Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the + same chromosome. # Undocumented Allele # This describes a 'undocumented' allele found in a genotype @@ -1375,21 +1403,23 @@ UndocumentedAllele: properties: allele_name: type: string + nullable: true + description: Allele name as allocated by the inference pipeline x-airr: miairr: important - description: Allele name as allocated by the inference pipeline - nullable: true sequence: type: string + nullable: true + description: nt sequence of the allele, as provided by the inference pipeline x-airr: miairr: essential - description: nt sequence of the allele, as provided by the inference pipeline - nullable: true phasing: type: integer nullable: true - description: Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the same chromosome - + description: > + Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the + same chromosome. + # Deleted Gene # It is regarded as 'deleted' in the sense that it was not identified during inference of the genotype @@ -1403,26 +1433,26 @@ DeletedGene: properties: label: type: string + nullable: true + description: The accepted name for this gene, taken from the GermlineSet x-airr: miairr: essential - description: The accepted name for this gene, taken from the GermlineSet - nullable: true germline_set_ref: type: string + nullable: true + description: GermlineSet from which it was taken (issuer/name/version) x-airr: miairr: important - description: GermlineSet from which it was taken (issuer/name/version) - nullable: true phasing: type: integer nullable: true - description: Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the same chromosome + description: > + Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the + same chromosome. # List of MHCGenotypes describing a subject's genotype MHCGenotypeSet: - discriminator: - propertyName: AIRR type: object required: - mhc_genotype_set_id @@ -1430,24 +1460,22 @@ MHCGenotypeSet: properties: mhc_genotype_set_id: type: string + nullable: true x-airr: + identifier: true miairr: important description: A unique identifier for this MHCGenotypeSet - nullable: true mhc_genotype_list: description: List of MHCGenotypes included in this set type: array + nullable: true x-airr: miairr: important items: $ref: '#/MHCGenotype' - nullable: true - # Genotype of major histocompatibility complex (MHC) class I, class II and non-classical loci MHCGenotype: - discriminator: - propertyName: AIRR type: object required: - mhc_genotype_id @@ -1456,34 +1484,36 @@ MHCGenotype: properties: mhc_genotype_id: type: string + nullable: true x-airr: + identifier: true miairr: important description: A unique identifier for this MHCGenotype, assumed to be unique in the context of the study - nullable: true mhc_class: type: string - description: Class of MHC alleles described by the MHCGenotype + nullable: true enum: - - MHC-I - - MHC-II - - MHC-nonclassical + - MHC-I + - MHC-II + - MHC-nonclassical + description: Class of MHC alleles described by the MHCGenotype example: MHC-I x-airr: miairr: essential adc-query-support: true format: controlled vocabulary - nullable: true mhc_alleles: type: array + nullable: true description: List of MHC alleles of the indicated mhc_class identified in an individual items: $ref: '#/MHCAllele' x-airr: miairr: important adc-query-support: true - nullable: true mhc_genotyping_method: type: string + nullable: true description: > Information on how the genotype was determined. The content of this field should come from a list of recommended terms provided in the AIRR Schema documentation. @@ -1492,7 +1522,6 @@ MHCGenotype: x-airr: adc-query-support: true miairr: important - nullable: true # Allele of an MHC gene @@ -1501,12 +1530,12 @@ MHCAllele: properties: allele_designation: type: string + nullable: true x-airr: miairr: important description: > The accepted designation of an allele, usually its gene symbol plus allele/sub-allele/etc identifiers, if provided by the mhc_typing method - nullable: true gene: $ref: '#/Ontology' nullable: true @@ -1524,12 +1553,11 @@ MHCAllele: id: MRO:0000004 label: MHC gene miairr: important - nullable: true reference_set_ref: type: string + nullable: true x-airr: miairr: important - nullable: true description: Repository and list from which it was taken (issuer/name/version) @@ -1539,8 +1567,6 @@ MHCAllele: # The overall study with a globally unique study_id Study: - discriminator: - propertyName: AIRR type: object required: - study_id @@ -1564,6 +1590,7 @@ Study: title: Study ID example: PRJNA001 x-airr: + identifier: true miairr: important adc-query-support: true set: 1 @@ -1729,10 +1756,10 @@ Study: description: Keywords describing properties of one or more data sets in a study title: Keywords for study example: - - contains_ig - - contains_schema_rearrangement - - contains_schema_clone - - contains_schema_cell + - contains_ig + - contains_schema_rearrangement + - contains_schema_clone + - contains_schema_cell x-airr: miairr: important adc-query-support: true @@ -1766,8 +1793,6 @@ Study: # 1-to-n relationship between a study and its subjects # subject_id is unique within a study Subject: - discriminator: - propertyName: AIRR type: object required: - subject_id @@ -1794,6 +1819,7 @@ Subject: title: Subject ID example: SUB856413 x-airr: + identifier: true miairr: important adc-query-support: true set: 1 @@ -2027,8 +2053,6 @@ Subject: # 1-to-n relationship between a subject and its diagnoses Diagnosis: - discriminator: - propertyName: AIRR type: object required: - study_group_description @@ -2149,8 +2173,6 @@ Diagnosis: # 1-to-n relationship between a subject and its samples # sample_id is unique within a study Sample: - discriminator: - propertyName: AIRR type: object required: - sample_id @@ -2172,6 +2194,7 @@ Sample: title: Biological sample ID example: SUP52415 x-airr: + identifier: true miairr: important adc-query-support: true set: 2 @@ -2238,7 +2261,7 @@ Sample: nullable: true description: Time point at which sample was taken, relative to `Collection time event` title: Sample collection time - example: "14" + example: 14 x-airr: miairr: important adc-query-support: true @@ -2292,8 +2315,6 @@ Sample: # 1-to-n relationship between a sample and processing of its cells CellProcessing: - discriminator: - propertyName: AIRR type: object required: - tissue_processing @@ -2464,8 +2485,6 @@ CellProcessing: # object for PCR primer targets PCRTarget: - discriminator: - propertyName: AIRR type: object required: - pcr_target_locus @@ -2525,8 +2544,6 @@ PCRTarget: # generally, a 1-to-1 relationship between a CellProcessing and processing of its nucleic acid # but may be 1-to-n for technical replicates. NucleicAcidProcessing: - discriminator: - propertyName: AIRR type: object required: - template_class @@ -2584,7 +2601,7 @@ NucleicAcidProcessing: $ref: '#/Ontology' nullable: true description: Unit of template amount - title: Template amount time unit + title: Template amount time unit example: id: UO:0000024 label: nanogram @@ -2599,7 +2616,7 @@ NucleicAcidProcessing: draft: false top_node: id: UO:0000002 - label: physical quantity + label: physical quantity library_generation_method: type: string enum: @@ -2718,8 +2735,6 @@ NucleicAcidProcessing: # 1-to-n relationship between a NucleicAcidProcessing and SequencingRun with resultant raw sequence file(s) SequencingRun: - discriminator: - propertyName: AIRR type: object required: - sequencing_run_id @@ -2736,6 +2751,7 @@ SequencingRun: title: Batch number example: 160101_M01234 x-airr: + identifier: true miairr: important adc-query-support: true set: 3 @@ -2811,8 +2827,6 @@ SequencingRun: # Resultant raw sequencing files from a SequencingRun SequencingData: - discriminator: - propertyName: AIRR type: object required: - sequencing_data_id @@ -2827,10 +2841,13 @@ SequencingData: sequencing_data_id: type: string nullable: true - description: Persistent identifier of raw data stored in an archive (e.g. INSDC run ID). Data archive should be identified in the CURIE prefix. + description: > + Persistent identifier of raw data stored in an archive (e.g. INSDC run ID). Data archive should + be identified in the CURIE prefix. title: Raw sequencing data persistent identifier example: "SRA:SRR11610494" x-airr: + identifier: true miairr: important adc-query-support: true set: 4 @@ -2955,8 +2972,6 @@ SequencingData: # Set of annotated rearrangement sequences produced by # data processing upon the raw sequence data for a repertoire. DataProcessing: - discriminator: - propertyName: AIRR type: object required: - software_versions @@ -3102,23 +3117,21 @@ DataProcessing: adc-query-support: true SampleProcessing: - discriminator: - propertyName: AIRR allOf: - type: object properties: - sample_processing_id: - type: string - nullable: true - description: > - Identifier for the sample processing object. This field should be unique within the repertoire. - This field can be used to uniquely identify the combination of sample, cell processing, - nucleic acid processing and sequencing run information for the repertoire. - title: Sample processing ID - x-airr: - name: Sample processing ID - adc-query-support: true - identifier: true + sample_processing_id: + type: string + nullable: true + description: > + Identifier for the sample processing object. This field should be unique within the repertoire. + This field can be used to uniquely identify the combination of sample, cell processing, + nucleic acid processing and sequencing run information for the repertoire. + title: Sample processing ID + x-airr: + name: Sample processing ID + adc-query-support: true + identifier: true - $ref: '#/Sample' - $ref: '#/CellProcessing' - $ref: '#/NucleicAcidProcessing' @@ -3131,8 +3144,6 @@ SampleProcessing: # and experimentally observed by raw sequence data. A repertoire # can only be for one subject but may include multiple samples. Repertoire: - discriminator: - propertyName: AIRR type: object required: - study @@ -3199,8 +3210,6 @@ Repertoire: # A collection of repertoires for analysis purposes, includes optional time course RepertoireGroup: - discriminator: - propertyName: AIRR type: object required: - repertoire_group_id @@ -3210,6 +3219,8 @@ RepertoireGroup: type: string nullable: true description: Identifier for this repertoire collection + x-airr: + identifier: true repertoire_group_name: type: string nullable: true @@ -3246,8 +3257,6 @@ RepertoireGroup: adc-query-support: true Alignment: - discriminator: - propertyName: AIRR type: object required: - sequence_id @@ -3263,6 +3272,8 @@ Alignment: Unique query sequence identifier within the file. Most often this will be the input sequence header or a substring thereof, but may also be a custom identifier defined by the tool in cases where query sequences have been combined in some fashion prior to alignment. + x-airr: + identifier: true segment: type: string nullable: true @@ -3355,8 +3366,6 @@ Alignment: # The extended rearrangement object Rearrangement: - discriminator: - propertyName: AIRR type: object required: - sequence_id @@ -4227,9 +4236,9 @@ Rearrangement: title: Cell index example: W06_046_091 x-airr: + identifier: true miairr: important adc-query-support: true - identifier: true set: 6 subset: data (processed sequence) name: Cell index @@ -4305,8 +4314,6 @@ Rearrangement: # A unique inferred clone object that has been constructed within a single data processing # for a single repertoire and a subset of its sequences and/or rearrangements. Clone: - discriminator: - propertyName: AIRR type: object required: - clone_id @@ -4316,6 +4323,8 @@ Clone: type: string nullable: true description: Identifier for the clone. + x-airr: + identifier: true repertoire_id: type: string nullable: true @@ -4449,8 +4458,6 @@ Clone: # 1-to-n relationship for a clone to its trees. Tree: - discriminator: - propertyName: AIRR type: object required: - tree_id @@ -4461,6 +4468,8 @@ Tree: type: string nullable: true description: Identifier for the tree. + x-airr: + identifier: true clone_id: type: string nullable: true @@ -4478,8 +4487,6 @@ Tree: # 1-to-n relationship between a tree and its nodes Node: - discriminator: - propertyName: AIRR type: object required: - sequence_id @@ -4490,6 +4497,8 @@ Node: description: > Identifier for this node that matches the identifier in the newick string and, where possible, the sequence_id in the source repertoire. + x-airr: + identifier: true sequence_alignment: type: string nullable: true @@ -4511,8 +4520,6 @@ Node: # The cell object acts as point of reference for all data that can be related # to an individual cell, either by direct observation or inference. Cell: - discriminator: - propertyName: AIRR type: object required: - cell_id @@ -4528,6 +4535,7 @@ Cell: title: Cell index example: W06_046_091 x-airr: + identifier: true miairr: defined adc-query-support: true name: Cell index @@ -4535,7 +4543,7 @@ Cell: type: array nullable: true description: > - Array of sequence identifiers defined for the Rearrangement object + Array of sequence identifiers defined for the Rearrangement object title: Cell-associated rearrangements items: type: string @@ -4548,7 +4556,7 @@ Cell: type: array nullable: true description: > - Array of receptor identifiers defined for the Receptor object + Array of receptor identifiers defined for the Receptor object title: Cell-associated receptors items: type: string @@ -4582,7 +4590,8 @@ Cell: - "single-cell transcriptome" nullable: true description: > - keyword describing the methodology used to assess expression. This values for this field MUST come from a controlled vocabulary + Keyword describing the methodology used to assess expression. This values for this field MUST + come from a controlled vocabulary. x-airr: miairr: defined adc-api-optional: true @@ -4590,7 +4599,7 @@ Cell: type: string nullable: true description: > - DOI of raw data set containing the current event + DOI of raw data set containing the current event x-airr: miairr: defined adc-api-optional: true @@ -4598,7 +4607,7 @@ Cell: type: string nullable: true description: > - Index addressing the current event within the raw data set. + Index addressing the current event within the raw data set. x-airr: miairr: defined adc-api-optional: true @@ -4614,16 +4623,14 @@ Cell: name: Virtual pairing # The CellExpression object acts as a container to hold a single expression level measurement from -# an experiment. Expression data is associated with a cell_id and the related repertoire_id and -# data_processing_id as cell_id is not guaranteed to be unique outside the data processing for +# an experiment. Expression data is associated with a cell_id and the related repertoire_id and +# data_processing_id as cell_id is not guaranteed to be unique outside the data processing for # a single repertoire. CellExpression: - discriminator: - propertyName: AIRR type: object required: - expression_id - - reperotire_id + - repertoire_id - data_processing_id - cell_id - property @@ -4636,6 +4643,7 @@ CellExpression: title: Expression property measurement identifier nullable: false x-airr: + identifier: true miairr: defined adc-query-support: true name: Expression measurement identifier @@ -4670,9 +4678,12 @@ CellExpression: name: Data processing for cell property: $ref: '#/Ontology' - description: Name of the property observed, typically a gene or antibody idenifier (and its label) from a canonical resource such as Ensembl (e.g. ENSG00000275747, IGHV3-79) or Antibody Registry (ABREG:1236456, Purified anti-mouse/rat/human CD27 antibody). - title: Property information nullable: true + title: Property information + description: > + Name of the property observed, typically a gene or antibody idenifier (and its label) from a + canonical resource such as Ensembl (e.g. ENSG00000275747, IGHV3-79) or + Antibody Registry (ABREG:1236456, Purified anti-mouse/rat/human CD27 antibody). example: id: ENSG:ENSG00000275747 label: IGHV3-79 @@ -4696,8 +4707,6 @@ CellExpression: # The Receptor object hold information about a receptor and its reactivity. # Receptor: - discriminator: - propertyName: AIRR type: object required: - receptor_id @@ -4715,6 +4724,7 @@ Receptor: title: Receptor ID example: TCR-MM-012345 x-airr: + identifier: true adc-query-support: true receptor_hash: type: string @@ -4772,6 +4782,7 @@ Receptor: type: string nullable: false enum: + - IGI - IGK - IGL - TRA @@ -4795,144 +4806,156 @@ Receptor: nullable: true description: Records of reactivity measurement items: - type: object - properties: - ligand_type: - type: string - nullable: false - enum: - - MHC:peptide - - MHC:non-peptide - - protein - - peptide - - non-peptidic - description: Classification of ligand binding to receptor - example: non-peptide - antigen_type: - type: string - nullable: false - enum: - - protein - - peptide - - non-peptidic - description: > - The type of antigen before processing by the immune system. - example: protein - antigen: - $ref: '#/Ontology' - nullable: false - description: > - The substance against which the receptor was tested. This can be any substance that - stimulates an adaptive immune response in the host, either through antibody production - or by T cell activation after presentation via an MHC molecule. - title: Antigen - example: - id: UNIPROT:P19597 - label: Circumsporozoite protein - x-airr: - adc-query-support: true - format: ontology - antigen_source_species: - $ref: '#/Ontology' - nullable: true - description: The species from which the antigen was isolated - title: Source species of antigen - example: - id: NCBITAXON:5843 - label: Plasmodium falciparum NF54 - x-airr: - format: ontology - ontology: - draft: true - top_node: - id: NCBITAXON:1 - label: root - peptide_start: - type: integer - nullable: true - description: Start position of the peptide within the reference protein sequence - peptide_end: - type: integer - nullable: true - description: End position of the peptide within the reference protein sequence - mhc_class: - type: string - nullable: true - enum: - - MHC-I - - MHC-II - - MHC-nonclassical - description: Class of MHC molecule, only present for MHC:x ligand types - example: MHC-II - mhc_gene_1: - $ref: '#/Ontology' - nullable: true - description: The MHC gene to which the mhc_allele_1 belongs - title: MHC gene 1 - example: - id: MRO:0000055 - label: HLA-DRA - x-airr: - format: ontology - ontology: - draft: true - top_node: - id: MRO:0000004 - label: MHC gene - mhc_allele_1: - type: string - nullable: true - description: Allele designation of the MHC alpha chain - example: HLA-DRA - mhc_gene_2: - $ref: '#/Ontology' - nullable: true - description: The MHC gene to which the mhc_allele_2 belongs - title: MHC gene 2 - example: - id: MRO:0000057 - label: HLA-DRB1 - x-airr: - format: ontology - ontology: - draft: true - top_node: - id: MRO:0000004 - mhc_allele_2: - type: string - nullable: true - description: > - Allele designation of the MHC class II beta chain or the invariant beta2-microglobin chain - example: HLA-DRB1*04:01 - reactivity_method: - type: string - nullable: false - enum: - - SPR - - ITC - - ELISA - - cytometry - - biological activity - description: The methodology used to assess expression (assay implemented in experiment) - reactivity_readout: - type: string - nullable: false - enum: - - binding strength - - cytokine release - - dissociation constant KD - - on rate - - off rate - - pathogen inhibition - description: Reactivity measurement read-out - example: cytokine release - reactivity_value: - type: number - nullable: false - description: The absolute (processed) value of the measurement - example: 162.26 - reactivity_unit: - type: string - nullable: false - description: The unit of the measurement - example: pg/ml + $ref: '#/ReceptorReactivity' + + +ReceptorReactivity: + type: object + required: + - ligand_type + - antigen_type + - antigen + - reactivity_method + - reactivity_readout + - reactivity_value + - reactivity_unit + properties: + ligand_type: + type: string + nullable: false + enum: + - MHC:peptide + - MHC:non-peptide + - protein + - peptide + - non-peptidic + description: Classification of ligand binding to receptor + example: non-peptide + antigen_type: + type: string + nullable: false + enum: + - protein + - peptide + - non-peptidic + description: > + The type of antigen before processing by the immune system. + example: protein + antigen: + $ref: '#/Ontology' + nullable: false + description: > + The substance against which the receptor was tested. This can be any substance that + stimulates an adaptive immune response in the host, either through antibody production + or by T cell activation after presentation via an MHC molecule. + title: Antigen + example: + id: UNIPROT:P19597 + label: Circumsporozoite protein + x-airr: + adc-query-support: true + format: ontology + antigen_source_species: + $ref: '#/Ontology' + nullable: true + description: The species from which the antigen was isolated + title: Source species of antigen + example: + id: NCBITAXON:5843 + label: Plasmodium falciparum NF54 + x-airr: + format: ontology + ontology: + draft: true + top_node: + id: NCBITAXON:1 + label: root + peptide_start: + type: integer + nullable: true + description: Start position of the peptide within the reference protein sequence + peptide_end: + type: integer + nullable: true + description: End position of the peptide within the reference protein sequence + mhc_class: + type: string + nullable: true + enum: + - MHC-I + - MHC-II + - MHC-nonclassical + description: Class of MHC molecule, only present for MHC:x ligand types + example: MHC-II + mhc_gene_1: + $ref: '#/Ontology' + nullable: true + description: The MHC gene to which the mhc_allele_1 belongs + title: MHC gene 1 + example: + id: MRO:0000055 + label: HLA-DRA + x-airr: + format: ontology + ontology: + draft: true + top_node: + id: MRO:0000004 + label: MHC gene + mhc_allele_1: + type: string + nullable: true + description: Allele designation of the MHC alpha chain + example: HLA-DRA + mhc_gene_2: + $ref: '#/Ontology' + nullable: true + description: The MHC gene to which the mhc_allele_2 belongs + title: MHC gene 2 + example: + id: MRO:0000057 + label: HLA-DRB1 + x-airr: + format: ontology + ontology: + draft: true + top_node: + id: MRO:0000004 + mhc_allele_2: + type: string + nullable: true + description: > + Allele designation of the MHC class II beta chain or the invariant beta2-microglobin chain + example: HLA-DRB1*04:01 + reactivity_method: + type: string + nullable: false + enum: + - SPR + - ITC + - ELISA + - cytometry + - biological activity + description: The methodology used to assess expression (assay implemented in experiment) + reactivity_readout: + type: string + nullable: false + enum: + - binding strength + - cytokine release + - dissociation constant KD + - on rate + - off rate + - pathogen inhibition + description: Reactivity measurement read-out + example: cytokine release + reactivity_value: + type: number + nullable: false + description: The absolute (processed) value of the measurement + example: 162.26 + reactivity_unit: + type: string + nullable: false + description: The unit of the measurement + example: pg/ml diff --git a/specs/airr-schema.yaml b/specs/airr-schema.yaml index 449734012..d53e28383 100644 --- a/specs/airr-schema.yaml +++ b/specs/airr-schema.yaml @@ -4,7 +4,7 @@ Info: title: AIRR Schema description: Schema definitions for AIRR standards objects - version: "1.4" + version: 1.4 contact: name: AIRR Community url: https://github.com/airr-community @@ -16,7 +16,6 @@ Info: # Properties that are based upon an ontology use this # standard schema definition Ontology: - discriminator: AIRR type: object properties: id: @@ -38,11 +37,11 @@ CURIEMap: CHEBI: type: ontology default: - map: OBO - provider: OLS + map: OBO + provider: OLS map: - OBO: - iri_prefix: "http://purl.obolibrary.org/obo/CHEBI_" + OBO: + iri_prefix: "http://purl.obolibrary.org/obo/CHEBI_" CL: type: ontology default: @@ -245,7 +244,6 @@ InformationProvider: # attributes are attached to an AIRR field with the x-airr property. Attributes: - discriminator: AIRR type: object properties: miairr: @@ -336,7 +334,6 @@ Attributes: # the value should not be null. DataFile: - discriminator: AIRR type: object properties: Info: @@ -396,7 +393,6 @@ DataFile: # AIRR Info object, should be similar to openapi # should we point to an openapi schema? InfoObject: - discriminator: AIRR type: object description: Provides information about data and API responses. required: @@ -436,7 +432,6 @@ InfoObject: # A time point TimePoint: - discriminator: AIRR description: Time point at which an observation or other action was performed. type: object properties: @@ -478,7 +473,6 @@ TimePoint: # An individual Acknowledgement: - discriminator: AIRR description: Individual whose contribution to this work should be acknowledged type: object required: @@ -490,6 +484,7 @@ Acknowledgement: type: string description: unique identifier of this Acknowledgement within the file x-airr: + identifier: true miairr: important name: type: string @@ -507,9 +502,10 @@ Acknowledgement: # Rearranged and genomic germline sequences RearrangedSequence: - discriminator: AIRR - description: Details of a directly observed rearranged sequence or an inference from rearranged sequences contributing support for a gene or allele type: object + description: > + Details of a directly observed rearranged sequence or an inference from rearranged sequences + contributing support for a gene or allele. required: - sequence_id - sequence @@ -523,8 +519,11 @@ RearrangedSequence: properties: sequence_id: type: string - description: Unique identifier of this RearrangedSequence within the file, typically generated by the repository hosting the schema, for example from the underlying ID of the database record + description: > + Unique identifier of this RearrangedSequence within the file, typically generated by the repository + hosting the schema, for example from the underlying ID of the database record. x-airr: + identifier: true miairr: important sequence: type: string @@ -541,10 +540,12 @@ RearrangedSequence: miairr: important observation_type: type: string - description: The type of observation from which this sequence was drawn, e.g. direct sequencing, inference from repertoire enum: - - direct sequencing - - inference from repertoire + - "direct sequencing" + - "inference from repertoire" + description: > + The type of observation from which this sequence was drawn, such as direct sequencing or + inference from repertoire sequencing data. x-airr: miairr: essential curation: @@ -577,7 +578,6 @@ RearrangedSequence: miairr: essential UnrearrangedSequence: - discriminator: AIRR description: Details of an unrearranged sequence contributing support for a gene or allele type: object required: @@ -594,10 +594,12 @@ UnrearrangedSequence: type: string description: unique identifier of this UnrearrangedSequence within the file x-airr: + identifier: true miairr: important sequence: type: string - description: Sequence of interest described in this record (typically this will include gene and promoter region) + description: > + Sequence of interest described in this record. Typically, this will include gene and promoter region. x-airr: miairr: essential curation: @@ -618,23 +620,29 @@ UnrearrangedSequence: description: Genome assembly patch number in which this gene was determined gff_seqid: type: string - description: Sequence (from the assembly) of a window including the gene and preferably also the promoter region + description: > + Sequence (from the assembly) of a window including the gene and preferably also the promoter region. gff_start: type: integer - description: Genomic co-ordinates of the start of the sequence of interest described in this record, in Ensemble GFF version 3 + description: > + Genomic co-ordinates of the start of the sequence of interest described in this record in + Ensemble GFF version 3. gff_end: type: integer - description: Genomic co-ordinates of the end of the sequence of interest described in this record, in Ensemble GFF version 3 + description: > + Genomic co-ordinates of the end of the sequence of interest described in this record in + Ensemble GFF version 3. strand: type: string enum: - - + + - "+" - "-" description: sense (+ or -) + x-airr: + nullable: true # V gene delineation SequenceDelineationV: - discriminator: AIRR description: Delineation of a V-gene in a particular system type: object required: @@ -654,8 +662,11 @@ SequenceDelineationV: properties: sequence_delineation_id: type: string - description: Unique identifier of this SequenceDelineationV within the file, typically generated by the repository hosting the schema, for example from the underlying ID of the database record + description: > + Unique identifier of this SequenceDelineationV within the file. Typically, generated by the + repository hosting the record. x-airr: + identifier: true miairr: important delineation_scheme: type: string @@ -670,7 +681,9 @@ SequenceDelineationV: description: entire V-sequence covered by this delineation aligned_sequence: type: string - description: aligned sequence, if this delineation provides an alignment (an aligned sequence should always be provided for IMGT delineations) + description: > + Aligned sequence if this delineation provides an alignment. An aligned sequence should always be + provided for IMGT delineations. fwr1_start: type: integer description: FWR1 start co-ordinate in the 'unaligned sequence' field @@ -729,12 +742,13 @@ SequenceDelineationV: alignment_labels: type: array items: - type: string - description: one string for each codon in the aligned_sequence indicating the label of that codon according to the numbering of the delineation scheme, if it provides one + type: string + description: > + One string for each codon in the aligned_sequence indicating the label of that codon according to + the numbering of the delineation scheme if it provides one. # Description of a putative or confirmed Ig receptor gene/allele AlleleDescription: - discriminator: AIRR description: Details of a putative or confirmed Ig receptor gene/allele inferred from one or more observations type: object required: @@ -754,8 +768,11 @@ AlleleDescription: properties: allele_description_id: type: string - description: Unique identifier of this AlleleDescription within the file, typically generated by the repository hosting the schema, for example from the underlying ID of the database record + description: > + Unique identifier of this AlleleDescription within the file. Typically, generated by the + repository hosting the record. x-airr: + identifier: true miairr: important allele_description_ref: type: string @@ -804,12 +821,16 @@ AlleleDescription: miairr: important sequence: type: string - description: nt sequence of the gene. This should cover the full length that is available, including where possible RSS, and 5' UTR and lead-in for V-gene sequences + description: > + Nucleotide sequence of the gene. This should cover the full length that is available, + including where possible RSS, and 5' UTR and lead-in for V-gene sequences. x-airr: miairr: essential coding_sequence: type: string - description: nucleotide sequence of the core coding region, i.e. the coding region of a D-, J- or C- gene, or the coding region of a V-gene excluding the leader + description: > + Nucleotide sequence of the core coding region, such as the coding region of a D-, J- or C- gene + or the coding region of a V-gene excluding the leader. x-airr: miairr: important aliases: @@ -821,6 +842,7 @@ AlleleDescription: type: string enum: - IGH + - IGI - IGK - IGL - TRA @@ -832,7 +854,7 @@ AlleleDescription: miairr: essential chromosome: type: integer - description: chromosome on which the gene is located + description: chromosome on which the gene is located sequence_type: type: string enum: @@ -851,9 +873,9 @@ AlleleDescription: inference_type: type: string enum: - - Genomic and rearranged - - Genomic only - - Rearranged only + - "Genomic and rearranged" + - "Genomic only" + - "Rearranged only" description: Type of inference(s) from which this gene sequence was inferred x-airr: miairr: important @@ -878,6 +900,8 @@ AlleleDescription: - inbred - outbred - locational + x-airr: + nullable: true status: type: string enum: @@ -886,6 +910,8 @@ AlleleDescription: - retired - withdrawn description: Status of record, assumed active if the field is not present + x-airr: + nullable: true subgroup_designation: type: string description: Identifier of the gene subgroup or clade, as (and if) defined @@ -907,65 +933,74 @@ AlleleDescription: - 1 - 2 - 3 - description: Codon position of the first nucleotide in the 'coding_sequence' field. Mandatory for J genes. Not used for V or D genes. ('1' means the sequence is in-frame, '2' means that the first bp is missing from the first codon, '3' means that the first 2 bp are missing) + description: > + Codon position of the first nucleotide in the 'coding_sequence' field. Mandatory for J genes. + Not used for V or D genes. '1' means the sequence is in-frame, '2' means that the first bp is + missing from the first codon, and '3' means that the first 2 bp are missing. + x-airr: + nullable: true gene_start: type: integer - description: Co-ordinate (in the sequence field) of the first nucleotide in the coding_sequence field + description: > + Co-ordinate in the sequence field of the first nucleotide in the coding_sequence field. x-airr: miairr: important gene_end: type: integer - description: Co-ordinate (in the sequence field) of the last gene-coding nucleotide in the coding_sequence field + description: > + Co-ordinate in the sequence field of the last gene-coding nucleotide in the coding_sequence field. x-airr: miairr: important utr_5_prime_start: type: integer - description: Start co-ordinate (in the sequence field) of 5 prime UTR (V-genes only) + description: Start co-ordinate in the sequence field of the 5 prime UTR (V-genes only). utr_5_prime_end: type: integer - description: End co-ordinate (in the sequence field) of 5 prime UTR (V-genes only) + description: End co-ordinate in the sequence field of the 5 prime UTR (V-genes only). leader_1_start: type: integer - description: Start co-ordinate (in the sequence field) of L-PART1 (V-genes only) + description: Start co-ordinate in the sequence field of L-PART1 (V-genes only). leader_1_end: type: integer - description: End co-ordinate (in the sequence field) of L-PART1 (V-genes only) + description: End co-ordinate in the sequence field of L-PART1 (V-genes only). leader_2_start: type: integer - description: Start co-ordinate (in the sequence field) of L-PART2 (V-genes only) + description: Start co-ordinate in the sequence field of L-PART2 (V-genes only). leader_2_end: type: integer - description: End co-ordinate (in the sequence field) of L-PART2 (V-genes only) + description: End co-ordinate in the sequence field of L-PART2 (V-genes only). v_rs_start: type: integer - description: Start co-ordinate (in the sequence field) of V recombination site (V-genes only) + description: Start co-ordinate in the sequence field of the V recombination site (V-genes only). v_rs_end: type: integer - description: End co-ordinate (in the sequence field) of V recombination site (V-genes only) + description: End co-ordinate in the sequence field of the V recombination site (V-genes only). d_rs_3_prime_start: type: integer - description: Start co-ordinate (in the sequence field) of 3 prime D recombination site (D-genes only) + description: Start co-ordinate in the sequence field of the 3 prime D recombination site (D-genes only). d_rs_3_prime_end: type: integer - description: End co-ordinate (in the sequence field) of 3 prime D recombination site (D-genes only) + description: End co-ordinate in the sequence field of the 3 prime D recombination site (D-genes only). d_rs_5_prime_start: type: integer - description: Start co-ordinate (in the sequence field) of 5 prime D recombination site (D-genes only) + description: Start co-ordinate in the sequence field of the 5 prime D recombination site (D-genes only). d_rs_5_prime_end: type: integer - description: End co-ordinate (in the sequence field) of 5 prime D recombination site (D-genes only) + description: End co-ordinate in the sequence field of 5 the prime D recombination site (D-genes only). j_cdr3_end: type: integer - description: In the case of a J-gene, the co-ordinate (in the sequence field) of the first nucelotide of the conserved PHE or TRP (IMGT codon position 118) + description: > + In the case of a J-gene, the co-ordinate in the sequence field of the first nucelotide of the + conserved PHE or TRP (IMGT codon position 118). j_rs_start: type: integer - description: Start co-ordinate (in the sequence field) of J recombination site (J-genes only) + description: Start co-ordinate in the sequence field of J recombination site (J-genes only). j_rs_end: type: integer - description: End co-ordinate (in the sequence field) of J recombination site (J-genes only) + description: End co-ordinate in the sequence field of J recombination site (J-genes only). j_donor_splice: type: integer - description: Co-ordinate (in the sequence field) of the final 3' nucleotide of the J-REGION (J-genes only) + description: Co-ordinate in the sequence field of the final 3' nucleotide of the J-REGION (J-genes only). v_gene_delineations: type: array items: @@ -985,7 +1020,9 @@ AlleleDescription: description: Gene symbols of any paralogs curation: type: string - description: Curational notes on the AlleleDescription. This can be used to give more extensive notes on the decisions taken than are provided in the release_description. + description: > + Curational notes on the AlleleDescription. This can be used to give more extensive notes on the + decisions taken than are provided in the release_description. curational_tags: type: array items: @@ -997,9 +1034,10 @@ AlleleDescription: # Collection of gene descriptions into a germline set GermlineSet: - discriminator: AIRR - description: Details of a 'germline set' bringing together multiple AlleleDescriptions from the same strain or species. All genes in a GermlineSet should be from a single locus. type: object + description: > + A germline object set bringing together multiple AlleleDescriptions from the same strain or species. + All genes in a GermlineSet should be from a single locus. required: - germline_set_id - author @@ -1016,8 +1054,11 @@ GermlineSet: properties: germline_set_id: type: string - description: Unique identifier of the GermlineSet within this file, typically generated by the repository hosting the schema, for example from the underlying ID of the database record + description: > + Unique identifier of the GermlineSet within this file. Typically, generated by the + repository hosting the record. x-airr: + identifier: true miairr: important author: type: string @@ -1093,10 +1134,13 @@ GermlineSet: - inbred - outbred - locational + x-airr: + nullable: true locus: type: string enum: - IGH + - IGI - IGK - IGL - TRA @@ -1109,13 +1153,15 @@ GermlineSet: allele_descriptions: type: array items: - $ref: '#/AlleleDescription' + $ref: '#/AlleleDescription' description: list of allele_descriptions in the germline set x-airr: miairr: important curation: type: string - description: Curational notes on the GermlineSet. This can be used to give more extensive notes on the decisions taken than are provided in the release_description. + description: > + Curational notes on the GermlineSet. This can be used to give more extensive notes on the + decisions taken than are provided in the release_description. # # Genotype schema @@ -1124,15 +1170,17 @@ GermlineSet: # GenotypeSet lists the Genotypes (describing different loci) inferred for this subject GenotypeSet: - discriminator: AIRR type: object required: - receptor_genotype_set_id properties: receptor_genotype_set_id: type: string - description: A unique identifier for this Receptor Genotype Set, typically generated by the repository hosting the schema, for example from the underlying ID of the database record + description: > + A unique identifier for this Receptor Genotype Set, typically generated by the repository + hosting the schema, for example from the underlying ID of the database record. x-airr: + identifier: true miairr: important genotype_class_list: description: List of Genotypes included in this Receptor Genotype Set. @@ -1145,7 +1193,6 @@ GenotypeSet: # Genotype of adaptive immune receptors Genotype: - discriminator: AIRR type: object required: - receptor_genotype_id @@ -1153,19 +1200,24 @@ Genotype: properties: receptor_genotype_id: type: string - description: A unique identifier within the file for this Receptor Genotype, typically generated by the repository hosting the schema, for example from the underlying ID of the database record + description: > + A unique identifier within the file for this Receptor Genotype, typically generated by the + repository hosting the schema, for example from the underlying ID of the database record. x-airr: + identifier: true miairr: important locus: type: string enum: - IGH + - IGI - IGK - IGL - TRA - TRB - TRD - TRG + description: Gene locus example: IGH x-airr: miairr: essential @@ -1177,10 +1229,9 @@ Genotype: items: $ref: '#/DocumentedAllele' x-airr: - miairr: important + miairr: important undocumented_alleles: type: array - nullable: true description: List of alleles inferred to be present and not documented in an identified GermlineSet items: $ref: '#/UndocumentedAllele' @@ -1188,7 +1239,6 @@ Genotype: adc-query-support: true deleted_genes: type: array - nullable: true description: Array of genes identified as being deleted in this genotype items: $ref: '#/DeletedGene' @@ -1203,7 +1253,6 @@ Genotype: title: Genotype acquisition process example: repertoire_sequencing x-airr: - nullable: true adc-query-support: true format: controlled vocabulary @@ -1231,7 +1280,9 @@ DocumentedAllele: phasing: type: integer nullable: true - description: Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the same chromosome + description: > + Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the + same chromosome. # Undocumented Allele # This describes a 'undocumented' allele found in a genotype @@ -1257,8 +1308,10 @@ UndocumentedAllele: phasing: type: integer nullable: true - description: Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the same chromosome - + description: > + Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the + same chromosome. + # Deleted Gene # It is regarded as 'deleted' in the sense that it was not identified during inference of the genotype @@ -1282,12 +1335,13 @@ DeletedGene: phasing: type: integer nullable: true - description: Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the same chromosome + description: > + Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the + same chromosome. # List of MHCGenotypes describing a subject's genotype MHCGenotypeSet: - discriminator: AIRR type: object required: - mhc_genotype_set_id @@ -1297,6 +1351,7 @@ MHCGenotypeSet: type: string description: A unique identifier for this MHCGenotypeSet x-airr: + identifier: true miairr: important mhc_genotype_list: description: List of MHCGenotypes included in this set @@ -1308,7 +1363,6 @@ MHCGenotypeSet: # Genotype of major histocompatibility complex (MHC) class I, class II and non-classical loci MHCGenotype: - discriminator: AIRR type: object required: - mhc_genotype_id @@ -1319,14 +1373,15 @@ MHCGenotype: type: string description: A unique identifier for this MHCGenotype, assumed to be unique in the context of the study x-airr: + identifier: true miairr: important mhc_class: type: string - description: Class of MHC alleles described by the MHCGenotype enum: - - MHC-I - - MHC-II - - MHC-nonclassical + - MHC-I + - MHC-II + - MHC-nonclassical + description: Class of MHC alleles described by the MHCGenotype example: MHC-I x-airr: miairr: essential @@ -1391,7 +1446,6 @@ MHCAllele: # The overall study with a globally unique study_id Study: - discriminator: AIRR type: object required: - study_id @@ -1414,6 +1468,7 @@ Study: title: Study ID example: PRJNA001 x-airr: + identifier: true miairr: important nullable: true adc-query-support: true @@ -1579,10 +1634,10 @@ Study: description: Keywords describing properties of one or more data sets in a study title: Keywords for study example: - - contains_ig - - contains_schema_rearrangement - - contains_schema_clone - - contains_schema_cell + - contains_ig + - contains_schema_rearrangement + - contains_schema_clone + - contains_schema_cell x-airr: miairr: important nullable: true @@ -1617,7 +1672,6 @@ Study: # 1-to-n relationship between a study and its subjects # subject_id is unique within a study Subject: - discriminator: AIRR type: object required: - subject_id @@ -1643,6 +1697,7 @@ Subject: title: Subject ID example: SUB856413 x-airr: + identifier: true miairr: important nullable: true adc-query-support: true @@ -1872,7 +1927,6 @@ Subject: # 1-to-n relationship between a subject and its diagnoses Diagnosis: - discriminator: AIRR type: object required: - study_group_description @@ -1993,7 +2047,6 @@ Diagnosis: # 1-to-n relationship between a subject and its samples # sample_id is unique within a study Sample: - discriminator: AIRR type: object required: - sample_id @@ -2014,6 +2067,7 @@ Sample: title: Biological sample ID example: SUP52415 x-airr: + identifier: true miairr: important nullable: true adc-query-support: true @@ -2080,7 +2134,7 @@ Sample: type: number description: Time point at which sample was taken, relative to `Collection time event` title: Sample collection time - example: "14" + example: 14 x-airr: miairr: important nullable: true @@ -2135,7 +2189,6 @@ Sample: # 1-to-n relationship between a sample and processing of its cells CellProcessing: - discriminator: AIRR type: object required: - tissue_processing @@ -2306,7 +2359,6 @@ CellProcessing: # object for PCR primer targets PCRTarget: - discriminator: AIRR type: object required: - pcr_target_locus @@ -2366,7 +2418,6 @@ PCRTarget: # generally, a 1-to-1 relationship between a CellProcessing and processing of its nucleic acid # but may be 1-to-n for technical replicates. NucleicAcidProcessing: - discriminator: AIRR type: object required: - template_class @@ -2423,7 +2474,7 @@ NucleicAcidProcessing: template_amount_unit: $ref: '#/Ontology' description: Unit of template amount - title: Template amount time unit + title: Template amount time unit example: id: UO:0000024 label: nanogram @@ -2439,7 +2490,7 @@ NucleicAcidProcessing: draft: false top_node: id: UO:0000002 - label: physical quantity + label: physical quantity library_generation_method: type: string enum: @@ -2533,9 +2584,9 @@ NucleicAcidProcessing: type: string enum: - none - - "hetero_head-head" - - "hetero_tail-head" - - "hetero_prelinked" + - hetero_head-head + - hetero_tail-head + - hetero_prelinked description: > In case an experimental setup is used that physically links nucleic acids derived from distinct `Rearrangements` before library preparation, this field describes the mode of that linkage. All @@ -2558,7 +2609,6 @@ NucleicAcidProcessing: # 1-to-n relationship between a NucleicAcidProcessing and SequencingRun with resultant raw sequence file(s) SequencingRun: - discriminator: AIRR type: object required: - sequencing_run_id @@ -2574,6 +2624,7 @@ SequencingRun: title: Batch number example: 160101_M01234 x-airr: + identifier: true miairr: important nullable: true adc-query-support: true @@ -2650,7 +2701,6 @@ SequencingRun: # Resultant raw sequencing files from a SequencingRun SequencingData: - discriminator: AIRR type: object required: - sequencing_data_id @@ -2664,10 +2714,13 @@ SequencingData: properties: sequencing_data_id: type: string - description: Persistent identifier of raw data stored in an archive (e.g. INSDC run ID). Data archive should be identified in the CURIE prefix. + description: > + Persistent identifier of raw data stored in an archive (e.g. INSDC run ID). Data archive should + be identified in the CURIE prefix. title: Raw sequencing data persistent identifier example: "SRA:SRR11610494" x-airr: + identifier: true miairr: important nullable: true adc-query-support: true @@ -2793,7 +2846,6 @@ SequencingData: # Set of annotated rearrangement sequences produced by # data processing upon the raw sequence data for a repertoire. DataProcessing: - discriminator: AIRR type: object required: - software_versions @@ -2809,10 +2861,10 @@ DataProcessing: description: Identifier for the data processing object. title: Data processing ID x-airr: + identifier: true nullable: true name: Data processing ID adc-query-support: true - identifier: true primary_annotation: type: boolean default: false @@ -2939,7 +2991,6 @@ DataProcessing: adc-query-support: true SampleProcessing: - discriminator: AIRR allOf: - type: object properties: @@ -2951,10 +3002,10 @@ SampleProcessing: nucleic acid processing and sequencing run information for the repertoire. title: Sample processing ID x-airr: + identifier: true nullable: true name: Sample processing ID adc-query-support: true - identifier: true - $ref: '#/Sample' - $ref: '#/CellProcessing' - $ref: '#/NucleicAcidProcessing' @@ -2967,7 +3018,6 @@ SampleProcessing: # and experimentally observed by raw sequence data. A repertoire # can only be for one subject but may include multiple samples. Repertoire: - discriminator: AIRR type: object required: - study @@ -3034,7 +3084,6 @@ Repertoire: # A collection of repertoires for analysis purposes, includes optional time course RepertoireGroup: - discriminator: AIRR type: object required: - repertoire_group_id @@ -3043,6 +3092,8 @@ RepertoireGroup: repertoire_group_id: type: string description: Identifier for this repertoire collection + x-airr: + identifier: true repertoire_group_name: type: string description: Short display name for this repertoire collection @@ -3077,7 +3128,6 @@ RepertoireGroup: Alignment: - discriminator: AIRR type: object required: - sequence_id @@ -3092,6 +3142,8 @@ Alignment: Unique query sequence identifier within the file. Most often this will be the input sequence header or a substring thereof, but may also be a custom identifier defined by the tool in cases where query sequences have been combined in some fashion prior to alignment. + x-airr: + identifier: true segment: type: string description: > @@ -3169,7 +3221,6 @@ Alignment: # The extended rearrangement object Rearrangement: - discriminator: AIRR type: object required: - sequence_id @@ -3976,7 +4027,6 @@ Rearrangement: # A unique inferred clone object that has been constructed within a single data processing # for a single repertoire and a subset of its sequences and/or rearrangements. Clone: - discriminator: AIRR type: object required: - clone_id @@ -3985,6 +4035,8 @@ Clone: clone_id: type: string description: Identifier for the clone. + x-airr: + identifier: true repertoire_id: type: string description: Identifier to the associated repertoire in study metadata. @@ -4097,7 +4149,6 @@ Clone: # 1-to-n relationship for a clone to its trees. Tree: - discriminator: AIRR type: object required: - tree_id @@ -4107,6 +4158,8 @@ Tree: tree_id: type: string description: Identifier for the tree. + x-airr: + identifier: true clone_id: type: string description: Identifier for the clone. @@ -4121,7 +4174,6 @@ Tree: # 1-to-n relationship between a tree and its nodes Node: - discriminator: AIRR type: object required: - sequence_id @@ -4131,6 +4183,8 @@ Node: description: > Identifier for this node that matches the identifier in the newick string and, where possible, the sequence_id in the source repertoire. + x-airr: + identifier: true sequence_alignment: type: string description: > @@ -4149,7 +4203,6 @@ Node: # The cell object acts as point of reference for all data that can be related # to an individual cell, either by direct observation or inference. Cell: - discriminator: AIRR type: object required: - cell_id @@ -4164,6 +4217,7 @@ Cell: title: Cell index example: W06_046_091 x-airr: + identifier: true miairr: defined nullable: false adc-query-support: true @@ -4218,7 +4272,8 @@ Cell: - "flow cytometry" - "single-cell transcriptome" description: > - keyword describing the methodology used to assess expression. This values for this field MUST come from a controlled vocabulary + Keyword describing the methodology used to assess expression. This values for this field MUST + come from a controlled vocabulary. x-airr: miairr: defined nullable: true @@ -4255,11 +4310,10 @@ Cell: # data_processing_id as cell_id is not guaranteed to be unique outside the data processing for # a single repertoire. CellExpression: - discriminator: AIRR type: object required: - expression_id - - reperotire_id + - repertoire_id - data_processing_id - cell_id - property @@ -4271,6 +4325,7 @@ CellExpression: Identifier of this expression property measurement. title: Expression property measurement identifier x-airr: + identifier: true miairr: defined nullable: false adc-query-support: true @@ -4306,9 +4361,11 @@ CellExpression: name: Data processing for cell property: $ref: '#/Ontology' - description: Name of the property observed, typically a gene or antibody idenifier (and its label) from a canonical resource such as Ensembl (e.g. ENSG00000275747, IGHV3-79) or Antibody Registry (ABREG:1236456, Purified anti-mouse/rat/human CD27 antibody). title: Property information - nullable: true + description: > + Name of the property observed, typically a gene or antibody idenifier (and its label) from a + canonical resource such as Ensembl (e.g. ENSG00000275747, IGHV3-79) or + Antibody Registry (ABREG:1236456, Purified anti-mouse/rat/human CD27 antibody). example: id: ENSG:ENSG00000275747 label: IGHV3-79 @@ -4332,7 +4389,6 @@ CellExpression: # The Receptor object hold information about a receptor and its reactivity. # Receptor: - discriminator: AIRR type: object required: - receptor_id @@ -4349,6 +4405,7 @@ Receptor: title: Receptor ID example: TCR-MM-012345 x-airr: + identifier: true nullable: false adc-query-support: true receptor_hash: @@ -4406,6 +4463,7 @@ Receptor: receptor_variable_domain_2_locus: type: string enum: + - IGI - IGK - IGL - TRA @@ -4429,157 +4487,169 @@ Receptor: type: array description: Records of reactivity measurement items: - type: object - properties: - ligand_type: - type: string - enum: - - MHC:peptide - - MHC:non-peptide - - protein - - peptide - - non-peptidic - description: Classification of ligand binding to receptor - example: non-peptide - x-airr: - nullable: false - antigen_type: - type: string - enum: - - protein - - peptide - - non-peptidic - description: > - The type of antigen before processing by the immune system. - example: protein - x-airr: - nullable: false - antigen: - $ref: '#/Ontology' - description: > - The substance against which the receptor was tested. This can be any substance that - stimulates an adaptive immune response in the host, either through antibody production - or by T cell activation after presentation via an MHC molecule. - title: Antigen - example: - id: UNIPROT:P19597 - label: Circumsporozoite protein - x-airr: - nullable: false - adc-query-support: true - format: ontology - antigen_source_species: - $ref: '#/Ontology' - description: The species from which the antigen was isolated - title: Source species of antigen - example: - id: NCBITAXON:5843 - label: Plasmodium falciparum NF54 - x-airr: - nullable: true - format: ontology - ontology: - draft: true - top_node: - id: NCBITAXON:1 - label: root - peptide_start: - type: integer - description: Start position of the peptide within the reference protein sequence - x-airr: - nullable: true - peptide_end: - type: integer - description: End position of the peptide within the reference protein sequence - x-airr: - nullable: true - mhc_class: - type: string - enum: - - MHC-I - - MHC-II - - MHC-nonclassical - description: Class of MHC molecule, only present for MHC:x ligand types - example: MHC-II - x-airr: - nullable: true - mhc_gene_1: - $ref: '#/Ontology' - description: The MHC gene to which the mhc_allele_1 belongs - title: MHC gene 1 - example: - id: MRO:0000055 - label: HLA-DRA - x-airr: - nullable: true - format: ontology - ontology: - draft: true - top_node: - id: MRO:0000004 - label: MHC gene - mhc_allele_1: - type: string - description: Allele designation of the MHC alpha chain - example: HLA-DRA - x-airr: - nullable: true - mhc_gene_2: - $ref: '#/Ontology' - description: The MHC gene to which the mhc_allele_2 belongs - title: MHC gene 2 - example: - id: MRO:0000057 - label: HLA-DRB1 - x-airr: - nullable: true - format: ontology - ontology: - draft: true - top_node: - id: MRO:0000004 - mhc_allele_2: - type: string - description: > - Allele designation of the MHC class II beta chain or the invariant beta2-microglobin chain - example: HLA-DRB1*04:01 - x-airr: - nullable: true - reactivity_method: - type: string - enum: - - SPR - - ITC - - ELISA - - cytometry - - biological activity - description: The methodology used to assess expression (assay implemented in experiment) - x-airr: - nullable: false - reactivity_readout: - type: string - enum: - - binding strength - - cytokine release - - dissociation constant KD - - on rate - - off rate - - pathogen inhibition - description: Reactivity measurement read-out - example: cytokine release - x-airr: - nullable: false - reactivity_value: - type: number - description: The absolute (processed) value of the measurement - example: 162.26 - x-airr: - nullable: false - reactivity_unit: - type: string - description: The unit of the measurement - example: pg/ml - x-airr: - nullable: false + $ref: '#/ReceptorReactivity' x-airr: nullable: true + + +ReceptorReactivity: + type: object + required: + - ligand_type + - antigen_type + - antigen + - reactivity_method + - reactivity_readout + - reactivity_value + - reactivity_unit + properties: + ligand_type: + type: string + enum: + - MHC:peptide + - MHC:non-peptide + - protein + - peptide + - non-peptidic + description: Classification of ligand binding to receptor + example: non-peptide + x-airr: + nullable: false + antigen_type: + type: string + enum: + - protein + - peptide + - non-peptidic + description: > + The type of antigen before processing by the immune system. + example: protein + x-airr: + nullable: false + antigen: + $ref: '#/Ontology' + description: > + The substance against which the receptor was tested. This can be any substance that + stimulates an adaptive immune response in the host, either through antibody production + or by T cell activation after presentation via an MHC molecule. + title: Antigen + example: + id: UNIPROT:P19597 + label: Circumsporozoite protein + x-airr: + nullable: false + adc-query-support: true + format: ontology + antigen_source_species: + $ref: '#/Ontology' + description: The species from which the antigen was isolated + title: Source species of antigen + example: + id: NCBITAXON:5843 + label: Plasmodium falciparum NF54 + x-airr: + nullable: true + format: ontology + ontology: + draft: true + top_node: + id: NCBITAXON:1 + label: root + peptide_start: + type: integer + description: Start position of the peptide within the reference protein sequence + x-airr: + nullable: true + peptide_end: + type: integer + description: End position of the peptide within the reference protein sequence + x-airr: + nullable: true + mhc_class: + type: string + enum: + - MHC-I + - MHC-II + - MHC-nonclassical + description: Class of MHC molecule, only present for MHC:x ligand types + example: MHC-II + x-airr: + nullable: true + mhc_gene_1: + $ref: '#/Ontology' + description: The MHC gene to which the mhc_allele_1 belongs + title: MHC gene 1 + example: + id: MRO:0000055 + label: HLA-DRA + x-airr: + nullable: true + format: ontology + ontology: + draft: true + top_node: + id: MRO:0000004 + label: MHC gene + mhc_allele_1: + type: string + description: Allele designation of the MHC alpha chain + example: HLA-DRA + x-airr: + nullable: true + mhc_gene_2: + $ref: '#/Ontology' + description: The MHC gene to which the mhc_allele_2 belongs + title: MHC gene 2 + example: + id: MRO:0000057 + label: HLA-DRB1 + x-airr: + nullable: true + format: ontology + ontology: + draft: true + top_node: + id: MRO:0000004 + mhc_allele_2: + type: string + description: > + Allele designation of the MHC class II beta chain or the invariant beta2-microglobin chain + example: HLA-DRB1*04:01 + x-airr: + nullable: true + reactivity_method: + type: string + enum: + - SPR + - ITC + - ELISA + - cytometry + - biological activity + description: The methodology used to assess expression (assay implemented in experiment) + x-airr: + nullable: false + reactivity_readout: + type: string + enum: + - binding strength + - cytokine release + - dissociation constant KD + - on rate + - off rate + - pathogen inhibition + description: Reactivity measurement read-out + example: cytokine release + x-airr: + nullable: false + reactivity_value: + type: number + description: The absolute (processed) value of the measurement + example: 162.26 + x-airr: + nullable: false + reactivity_unit: + type: string + description: The unit of the measurement + example: pg/ml + x-airr: + nullable: false From bd93f36020cbb006f6abb06f8efa669a7db019ca Mon Sep 17 00:00:00 2001 From: Jason Vander Heiden Date: Mon, 12 Jun 2023 10:45:01 -0700 Subject: [PATCH 11/59] Copy of PR #685 (#695) Introduced SubjectGenotype to allow testing of mhc_genotype_set and receptor_genotype_set --------- Co-authored-by: jday1 --- lang/R/inst/extdata/airr-schema.yaml | 22 ++++--- lang/python/airr/specs/airr-schema.yaml | 22 ++++--- lang/python/tests/data/bad_germline_set.json | 2 +- .../python/tests/data/good_combined_airr.json | 64 +++++++++++++++++-- .../python/tests/data/good_combined_airr.yaml | 34 ++++++++++ specs/airr-schema-openapi3.yaml | 27 ++++---- specs/airr-schema.yaml | 22 ++++--- 7 files changed, 150 insertions(+), 43 deletions(-) diff --git a/lang/R/inst/extdata/airr-schema.yaml b/lang/R/inst/extdata/airr-schema.yaml index 4b87c621c..fffe6d12b 100644 --- a/lang/R/inst/extdata/airr-schema.yaml +++ b/lang/R/inst/extdata/airr-schema.yaml @@ -1669,6 +1669,17 @@ Study: adc-query-support: true name: ADC Update Date +SubjectGenotype: + discriminator: AIRR + type: object + properties: + receptor_genotype_set: + $ref: '#/GenotypeSet' + description: Immune receptor genotype set for this subject. + mhc_genotype_set: + $ref: '#/MHCGenotypeSet' + description: MHC genotype set for this subject. + # 1-to-n relationship between a study and its subjects # subject_id is unique within a study Subject: @@ -1915,15 +1926,8 @@ Subject: nullable: false adc-query-support: true genotype: - type: object - description: Genotype for this subject, if known - properties: - receptor_genotype_set: - $ref: '#/GenotypeSet' - description: Immune receptor genotype set for this subject. - mhc_genotype_set: - $ref: '#/MHCGenotypeSet' - description: MHC genotype set for this subject. + $ref: '#/SubjectGenotype' + title: SubjectGenotype # 1-to-n relationship between a subject and its diagnoses Diagnosis: diff --git a/lang/python/airr/specs/airr-schema.yaml b/lang/python/airr/specs/airr-schema.yaml index 4b87c621c..fffe6d12b 100644 --- a/lang/python/airr/specs/airr-schema.yaml +++ b/lang/python/airr/specs/airr-schema.yaml @@ -1669,6 +1669,17 @@ Study: adc-query-support: true name: ADC Update Date +SubjectGenotype: + discriminator: AIRR + type: object + properties: + receptor_genotype_set: + $ref: '#/GenotypeSet' + description: Immune receptor genotype set for this subject. + mhc_genotype_set: + $ref: '#/MHCGenotypeSet' + description: MHC genotype set for this subject. + # 1-to-n relationship between a study and its subjects # subject_id is unique within a study Subject: @@ -1915,15 +1926,8 @@ Subject: nullable: false adc-query-support: true genotype: - type: object - description: Genotype for this subject, if known - properties: - receptor_genotype_set: - $ref: '#/GenotypeSet' - description: Immune receptor genotype set for this subject. - mhc_genotype_set: - $ref: '#/MHCGenotypeSet' - description: MHC genotype set for this subject. + $ref: '#/SubjectGenotype' + title: SubjectGenotype # 1-to-n relationship between a subject and its diagnoses Diagnosis: diff --git a/lang/python/tests/data/bad_germline_set.json b/lang/python/tests/data/bad_germline_set.json index f221dcf9e..33d2cfe62 100644 --- a/lang/python/tests/data/bad_germline_set.json +++ b/lang/python/tests/data/bad_germline_set.json @@ -348,4 +348,4 @@ ], "notes": "" }] -} \ No newline at end of file +} diff --git a/lang/python/tests/data/good_combined_airr.json b/lang/python/tests/data/good_combined_airr.json index 07b52ffe1..82d0dd987 100644 --- a/lang/python/tests/data/good_combined_airr.json +++ b/lang/python/tests/data/good_combined_airr.json @@ -58,7 +58,65 @@ "intervention": null, "medical_history": null } - ] + ], + "genotype": { + "receptor_genotype_set": { + "receptor_genotype_set_id": "1", + "genotype_class_list": [ + { + "receptor_genotype_id": "1", + "locus": "IGH", + "documented_alleles": [ + { + "label": "IGHV1-69*01", + "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", + "phasing": 1 + }, + { + "label": "IGHV1-69*02", + "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", + "phasing": 2 + } + ], + "undocumented_alleles": [ + { + "allele_name": "IGHD3-1*01_S1234", + "sequence": "agtagtagtagt", + "phasing": 1 + } + ], + "deleted_genes": [ + { + "label": "IGHV3-30-3", + "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", + "phasing": 1 + } + ], + "inference_process": "repertoire_sequencing" + } + ] + }, + "mhc_genotype_set": { + "mhc_genotype_set_id": "this is a unique identifier", + "mhc_genotype_list": [ + { + "mhc_genotype_id": "unique", + "mhc_class": "MHC-I", + "mhc_genotyping_method": "pcr_low_resolution", + "mhc_alleles": [ + { + "allele_designation": "01:01", + "gene": { + "id": "MRO-0000046", + "label": "HLA-A" + }, + "reference_set_ref": "blah" + } + ] + } + ] + } + } }, "sample": [ { @@ -803,8 +861,6 @@ ], "curation": null }], - - "GenotypeSet": [{ "receptor_genotype_set_id": "1", "genotype_class_list": [ @@ -841,4 +897,4 @@ } ] }] -} \ No newline at end of file +} diff --git a/lang/python/tests/data/good_combined_airr.yaml b/lang/python/tests/data/good_combined_airr.yaml index 89a515177..4dcc1d1a1 100644 --- a/lang/python/tests/data/good_combined_airr.yaml +++ b/lang/python/tests/data/good_combined_airr.yaml @@ -62,6 +62,40 @@ Repertoire: immunogen: intervention: medical_history: + genotype: + receptor_genotype_set: + receptor_genotype_set_id: '1' + genotype_class_list: + - receptor_genotype_id: '1' + locus: IGH + documented_alleles: + - label: IGHV1-69*01 + germline_set_ref: IMGT:Homo sapiens:2022.1.31 + phasing: 1 + - label: IGHV1-69*02 + germline_set_ref: IMGT:Homo sapiens:2022.1.31 + phasing: 2 + undocumented_alleles: + - allele_name: IGHD3-1*01_S1234 + sequence: agtagtagtagt + phasing: 1 + deleted_genes: + - label: IGHV3-30-3 + germline_set_ref: IMGT:Homo sapiens:2022.1.31 + phasing: 1 + inference_process: repertoire_sequencing + mhc_genotype_set: + mhc_genotype_set_id: "this is a unique identifier" + mhc_genotype_list: + - mhc_genotype_id: unique + mhc_class: MHC-I + mhc_genotyping_method: pcr_low_resolution + mhc_alleles: + - allele_designation: "01:01" + gene: + id: "MRO-0000046" + label: "HLA-A" + reference_set_ref: blah sample: - sample_id: TW01A_B_naive sample_processing_id: diff --git a/specs/airr-schema-openapi3.yaml b/specs/airr-schema-openapi3.yaml index 46a4e3de3..8997c48ed 100644 --- a/specs/airr-schema-openapi3.yaml +++ b/specs/airr-schema-openapi3.yaml @@ -1561,6 +1561,20 @@ MHCAllele: description: Repository and list from which it was taken (issuer/name/version) +SubjectGenotype: + discriminator: + propertyName: AIRR + type: object + properties: + receptor_genotype_set: + nullable: true + $ref: '#/GenotypeSet' + description: Immune receptor genotype set for this subject. + mhc_genotype_set: + nullable: true + $ref: '#/MHCGenotypeSet' + description: MHC genotype set for this subject. + # # Repertoire metadata schema # @@ -2038,18 +2052,9 @@ Subject: x-airr: adc-query-support: true genotype: - type: object nullable: true - description: Genotype for this subject, if known - properties: - receptor_genotype_set: - nullable: true - $ref: '#/GenotypeSet' - description: Immune receptor genotype set for this subject. - mhc_genotype_set: - nullable: true - $ref: '#/MHCGenotypeSet' - description: MHC genotype set for this subject. + $ref: '#/SubjectGenotype' + title: SubjectGenotype # 1-to-n relationship between a subject and its diagnoses Diagnosis: diff --git a/specs/airr-schema.yaml b/specs/airr-schema.yaml index d53e28383..c1ee64438 100644 --- a/specs/airr-schema.yaml +++ b/specs/airr-schema.yaml @@ -1669,6 +1669,17 @@ Study: adc-query-support: true name: ADC Update Date +SubjectGenotype: + discriminator: AIRR + type: object + properties: + receptor_genotype_set: + $ref: '#/GenotypeSet' + description: Immune receptor genotype set for this subject. + mhc_genotype_set: + $ref: '#/MHCGenotypeSet' + description: MHC genotype set for this subject. + # 1-to-n relationship between a study and its subjects # subject_id is unique within a study Subject: @@ -1915,15 +1926,8 @@ Subject: nullable: false adc-query-support: true genotype: - type: object - description: Genotype for this subject, if known - properties: - receptor_genotype_set: - $ref: '#/GenotypeSet' - description: Immune receptor genotype set for this subject. - mhc_genotype_set: - $ref: '#/MHCGenotypeSet' - description: MHC genotype set for this subject. + $ref: '#/SubjectGenotype' + title: SubjectGenotype # 1-to-n relationship between a subject and its diagnoses Diagnosis: From 1729129db66b19aa16b63667896ced10d72643ce Mon Sep 17 00:00:00 2001 From: Jason Vander Heiden Date: Mon, 12 Jun 2023 11:09:10 -0700 Subject: [PATCH 12/59] Remove RTD C compile dependency hack (#694) Remove RTD C compile dependency hack --- lang/python/setup.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/lang/python/setup.py b/lang/python/setup.py index 5a21155b3..65bf7a225 100644 --- a/lang/python/setup.py +++ b/lang/python/setup.py @@ -14,11 +14,7 @@ long_description = ip.read() # Parse requirements -if os.environ.get('READTHEDOCS', None) == 'True': - # Set empty install_requires to get install to work on readthedocs - install_requires = [] -else: - with open('requirements.txt') as req: +with open('requirements.txt') as req: install_requires = req.read().splitlines() # Setup From ec5a9ab20580de64d1a6d7c61c4c2866066bcc6c Mon Sep 17 00:00:00 2001 From: Christian Busse Date: Mon, 10 Jul 2023 20:58:40 +0200 Subject: [PATCH 13/59] Update datarep_cmd_example.rst Switch link to https --- docs/examples/datarep_cmd_example.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/examples/datarep_cmd_example.rst b/docs/examples/datarep_cmd_example.rst index a4ad1b452..7bac4e111 100644 --- a/docs/examples/datarep_cmd_example.rst +++ b/docs/examples/datarep_cmd_example.rst @@ -16,7 +16,7 @@ We've hosted a small set of example data from BioProject PRJNA338795 (Vander Heiden et al, 2017. J Immunol.) containing both input and output of the example. It may be downloaded from: -`Example Data `__ +`Example Data `__ Walkthrough -------------------------------------------------------------------------------- @@ -119,4 +119,4 @@ outputs into a single table:: .. figure:: images/datarep_cmd_vusage.png :align: center - **V family usage for the combined data set.** \ No newline at end of file + **V family usage for the combined data set.** From 9dd372795f008167677bf6e64aa8e2642530dcdf Mon Sep 17 00:00:00 2001 From: Brian Corrie Date: Mon, 10 Jul 2023 12:19:41 -0700 Subject: [PATCH 14/59] Update study_keywords documentation --- specs/airr-schema.yaml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/specs/airr-schema.yaml b/specs/airr-schema.yaml index c1ee64438..c0121905c 100644 --- a/specs/airr-schema.yaml +++ b/specs/airr-schema.yaml @@ -1631,7 +1631,11 @@ Study: - contains_schema_clone - contains_schema_cell - contains_schema_receptor - description: Keywords describing properties of one or more data sets in a study + description: > + Keywords describing properties of one or more data sets in a study. "contains_schema" keywords indicate that + the study contains data objects from the AIRR Schema of that type (Rearrangement, Clone, Cell, Receptor) while + the other keywords indicate that the study design considers the type of data indicated (e.g. it is possible to have + a study that "contains_paired_chain" but does not "contains_schema_cell"). title: Keywords for study example: - contains_ig From ffb5b29b84dfd4534797e08b9441579b330c2416 Mon Sep 17 00:00:00 2001 From: bcorrie Date: Mon, 10 Jul 2023 19:35:57 +0000 Subject: [PATCH 15/59] Replace adc-query_support with adc-query-support --- specs/adc-api-openapi3.yaml | 36 ++++++++++++++++++------------------ specs/adc-api.yaml | 36 ++++++++++++++++++------------------ 2 files changed, 36 insertions(+), 36 deletions(-) diff --git a/specs/adc-api-openapi3.yaml b/specs/adc-api-openapi3.yaml index 7eeda9d40..17115b832 100644 --- a/specs/adc-api-openapi3.yaml +++ b/specs/adc-api-openapi3.yaml @@ -137,7 +137,7 @@ components: x-airr: nullable: true adc-api-optional: false - adc-query_support: true + adc-query-support: true name: V gene subgroup adc_v_gene: type: string @@ -148,7 +148,7 @@ components: x-airr: nullable: true adc-api-optional: false - adc-query_support: true + adc-query-support: true name: V gene adc_d_subgroup: type: string @@ -159,7 +159,7 @@ components: x-airr: nullable: true adc-api-optional: false - adc-query_support: true + adc-query-support: true name: D gene subgroup adc_d_gene: type: string @@ -170,7 +170,7 @@ components: x-airr: nullable: true adc-api-optional: false - adc-query_support: true + adc-query-support: true name: D gene adc_j_subgroup: type: string @@ -181,7 +181,7 @@ components: x-airr: nullable: true adc-api-optional: false - adc-query_support: true + adc-query-support: true name: J gene subgroup adc_j_gene: type: string @@ -192,7 +192,7 @@ components: x-airr: nullable: true adc-api-optional: false - adc-query_support: true + adc-query-support: true name: J gene adc_c_subgroup: type: string @@ -203,7 +203,7 @@ components: x-airr: nullable: true adc-api-optional: false - adc-query_support: true + adc-query-support: true name: C gene subgroup adc_c_gene: type: string @@ -214,7 +214,7 @@ components: x-airr: nullable: true adc-api-optional: false - adc-query_support: true + adc-query-support: true name: C gene adc_annotation_cell_id: type: string @@ -224,7 +224,7 @@ components: example: AAACCTGAGCACCGCT-1 x-airr: adc-api-optional: false - adc-query_support: true + adc-query-support: true name: Tool Cell ID # list of rearrangement annotations @@ -249,7 +249,7 @@ components: x-airr: nullable: true adc-api-optional: false - adc-query_support: true + adc-query-support: true name: V gene subgroup adc_v_gene: type: string @@ -260,7 +260,7 @@ components: x-airr: nullable: true adc-api-optional: false - adc-query_support: true + adc-query-support: true name: V gene adc_d_subgroup: type: string @@ -271,7 +271,7 @@ components: x-airr: nullable: true adc-api-optional: false - adc-query_support: true + adc-query-support: true name: D gene subgroup adc_d_gene: type: string @@ -282,7 +282,7 @@ components: x-airr: nullable: true adc-api-optional: false - adc-query_support: true + adc-query-support: true name: D gene adc_j_subgroup: type: string @@ -293,7 +293,7 @@ components: x-airr: nullable: true adc-api-optional: false - adc-query_support: true + adc-query-support: true name: J gene subgroup adc_j_gene: type: string @@ -304,7 +304,7 @@ components: x-airr: nullable: true adc-api-optional: false - adc-query_support: true + adc-query-support: true name: J gene adc_c_subgroup: type: string @@ -315,7 +315,7 @@ components: x-airr: nullable: true adc-api-optional: false - adc-query_support: true + adc-query-support: true name: C gene subgroup adc_c_gene: type: string @@ -326,7 +326,7 @@ components: x-airr: nullable: true adc-api-optional: false - adc-query_support: true + adc-query-support: true name: C gene # list of clone annotations @@ -350,7 +350,7 @@ components: example: AAACCTGAGCACCGCT-1 x-airr: adc-api-optional: false - adc-query_support: true + adc-query-support: true name: Tool Cell ID # list of cell annotations diff --git a/specs/adc-api.yaml b/specs/adc-api.yaml index 1a1562459..13548eb3d 100644 --- a/specs/adc-api.yaml +++ b/specs/adc-api.yaml @@ -111,7 +111,7 @@ definitions: x-airr: nullable: true adc-api-optional: false - adc-query_support: true + adc-query-support: true name: V gene subgroup adc_v_gene: type: string @@ -122,7 +122,7 @@ definitions: x-airr: nullable: true adc-api-optional: false - adc-query_support: true + adc-query-support: true name: V gene adc_d_subgroup: type: string @@ -133,7 +133,7 @@ definitions: x-airr: nullable: true adc-api-optional: false - adc-query_support: true + adc-query-support: true name: D gene subgroup adc_d_gene: type: string @@ -144,7 +144,7 @@ definitions: x-airr: nullable: true adc-api-optional: false - adc-query_support: true + adc-query-support: true name: D gene adc_j_subgroup: type: string @@ -155,7 +155,7 @@ definitions: x-airr: nullable: true adc-api-optional: false - adc-query_support: true + adc-query-support: true name: J gene subgroup adc_j_gene: type: string @@ -166,7 +166,7 @@ definitions: x-airr: nullable: true adc-api-optional: false - adc-query_support: true + adc-query-support: true name: J gene adc_c_subgroup: type: string @@ -177,7 +177,7 @@ definitions: x-airr: nullable: true adc-api-optional: false - adc-query_support: true + adc-query-support: true name: C gene subgroup adc_c_gene: type: string @@ -188,7 +188,7 @@ definitions: x-airr: nullable: true adc-api-optional: false - adc-query_support: true + adc-query-support: true name: C gene adc_annotation_cell_id: type: string @@ -198,7 +198,7 @@ definitions: x-airr: nullable: true adc-api-optional: false - adc-query_support: true + adc-query-support: true name: Tool Cell ID # list of rearrangement annotations @@ -234,7 +234,7 @@ definitions: x-airr: nullable: true adc-api-optional: false - adc-query_support: true + adc-query-support: true name: V gene subgroup adc_v_gene: type: string @@ -245,7 +245,7 @@ definitions: x-airr: nullable: true adc-api-optional: false - adc-query_support: true + adc-query-support: true name: V gene adc_d_subgroup: type: string @@ -256,7 +256,7 @@ definitions: x-airr: nullable: true adc-api-optional: false - adc-query_support: true + adc-query-support: true name: D gene subgroup adc_d_gene: type: string @@ -267,7 +267,7 @@ definitions: x-airr: nullable: true adc-api-optional: false - adc-query_support: true + adc-query-support: true name: D gene adc_j_subgroup: type: string @@ -278,7 +278,7 @@ definitions: x-airr: nullable: true adc-api-optional: false - adc-query_support: true + adc-query-support: true name: J gene subgroup adc_j_gene: type: string @@ -289,7 +289,7 @@ definitions: x-airr: nullable: true adc-api-optional: false - adc-query_support: true + adc-query-support: true name: J gene adc_c_subgroup: type: string @@ -300,7 +300,7 @@ definitions: x-airr: nullable: true adc-api-optional: false - adc-query_support: true + adc-query-support: true name: C gene subgroup adc_c_gene: type: string @@ -311,7 +311,7 @@ definitions: x-airr: nullable: true adc-api-optional: false - adc-query_support: true + adc-query-support: true name: C gene # list of clone annotations @@ -355,7 +355,7 @@ definitions: x-airr: nullable: true adc-api-optional: false - adc-query_support: true + adc-query-support: true name: Tool Cell ID # list of cell annotations From 5efa9cc9b1b8bd7d1e0a7c03c9a3df26d10f6dd4 Mon Sep 17 00:00:00 2001 From: bcorrie Date: Mon, 10 Jul 2023 20:04:44 +0000 Subject: [PATCH 16/59] Changed adc-api-optional to adc-query-support. --- specs/airr-schema-openapi3.yaml | 15 +++++++-------- specs/airr-schema.yaml | 15 +++++++-------- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/specs/airr-schema-openapi3.yaml b/specs/airr-schema-openapi3.yaml index 8997c48ed..72767d291 100644 --- a/specs/airr-schema-openapi3.yaml +++ b/specs/airr-schema-openapi3.yaml @@ -4599,7 +4599,7 @@ Cell: come from a controlled vocabulary. x-airr: miairr: defined - adc-api-optional: true + adc-query-support: true expression_raw_doi: type: string nullable: true @@ -4607,7 +4607,7 @@ Cell: DOI of raw data set containing the current event x-airr: miairr: defined - adc-api-optional: true + adc-query-support: true expression_index: type: string nullable: true @@ -4615,7 +4615,6 @@ Cell: Index addressing the current event within the raw data set. x-airr: miairr: defined - adc-api-optional: true virtual_pairing: type: boolean nullable: true @@ -4749,7 +4748,7 @@ Receptor: - TCR description: The top-level receptor type, either Immunoglobulin (Ig) or T Cell Receptor (TCR). x-airr: - adc-api-optional: true + adc-query-support: true receptor_variable_domain_1_aa: type: string nullable: false @@ -4760,7 +4759,7 @@ Receptor: example: > QVQLQQPGAELVKPGASVKLSCKASGYTFTSYWMHWVKQRPGRGLEWIGRIDPNSGGTKYNEKFKSKATLTVDKPSSTAYMQLSSLTSEDSAVYYCARYDYYGSSYFDYWGQGTTLTVSS x-airr: - adc-api-optional: true + adc-query-support: true receptor_variable_domain_1_locus: type: string nullable: false @@ -4771,7 +4770,7 @@ Receptor: description: Locus from which the variable domain in receptor_variable_domain_1_aa originates example: IGH x-airr: - adc-api-optional: true + adc-query-support: true receptor_variable_domain_2_aa: type: string nullable: false @@ -4782,7 +4781,7 @@ Receptor: example: > QAVVTQESALTTSPGETVTLTCRSSTGAVTTSNYANWVQEKPDHLFTGLIGGTNNRAPGVPARFSGSLIGDKAALTITGAQTEDEAIYFCALWYSNHWVFGGGTKLTVL x-airr: - adc-api-optional: true + adc-query-support: true receptor_variable_domain_2_locus: type: string nullable: false @@ -4795,7 +4794,7 @@ Receptor: description: Locus from which the variable domain in receptor_variable_domain_2_aa originates example: IGL x-airr: - adc-api-optional: true + adc-query-support: true receptor_ref: type: array nullable: true diff --git a/specs/airr-schema.yaml b/specs/airr-schema.yaml index c0121905c..151dd181d 100644 --- a/specs/airr-schema.yaml +++ b/specs/airr-schema.yaml @@ -4285,7 +4285,7 @@ Cell: x-airr: miairr: defined nullable: true - adc-api-optional: true + adc-query-support: true expression_raw_doi: type: string description: > @@ -4293,7 +4293,7 @@ Cell: x-airr: miairr: defined nullable: true - adc-api-optional: true + adc-query-support: true expression_index: type: string description: > @@ -4301,7 +4301,6 @@ Cell: x-airr: miairr: defined nullable: true - adc-api-optional: true virtual_pairing: type: boolean description: > @@ -4434,7 +4433,7 @@ Receptor: description: The top-level receptor type, either Immunoglobulin (Ig) or T Cell Receptor (TCR). x-airr: nullable: false - adc-api-optional: true + adc-query-support: true receptor_variable_domain_1_aa: type: string description: > @@ -4445,7 +4444,7 @@ Receptor: QVQLQQPGAELVKPGASVKLSCKASGYTFTSYWMHWVKQRPGRGLEWIGRIDPNSGGTKYNEKFKSKATLTVDKPSSTAYMQLSSLTSEDSAVYYCARYDYYGSSYFDYWGQGTTLTVSS x-airr: nullable: false - adc-api-optional: true + adc-query-support: true receptor_variable_domain_1_locus: type: string enum: @@ -4456,7 +4455,7 @@ Receptor: example: IGH x-airr: nullable: false - adc-api-optional: true + adc-query-support: true receptor_variable_domain_2_aa: type: string description: > @@ -4467,7 +4466,7 @@ Receptor: QAVVTQESALTTSPGETVTLTCRSSTGAVTTSNYANWVQEKPDHLFTGLIGGTNNRAPGVPARFSGSLIGDKAALTITGAQTEDEAIYFCALWYSNHWVFGGGTKLTVL x-airr: nullable: false - adc-api-optional: true + adc-query-support: true receptor_variable_domain_2_locus: type: string enum: @@ -4480,7 +4479,7 @@ Receptor: example: IGL x-airr: nullable: false - adc-api-optional: true + adc-query-support: true receptor_ref: type: array description: Array of receptor identifiers defined for the Receptor object From 12019710cf840cf25c3e2d30f8b5d80f8bb4b43f Mon Sep 17 00:00:00 2001 From: Brian Corrie Date: Mon, 10 Jul 2023 13:22:46 -0700 Subject: [PATCH 17/59] Added adc-api-optional to AIRR extensions docs. --- docs/datarep/overview.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/datarep/overview.rst b/docs/datarep/overview.rst index 22d3fdbf3..57c0d3f6a 100644 --- a/docs/datarep/overview.rst +++ b/docs/datarep/overview.rst @@ -212,6 +212,9 @@ supported AIRR extension properties: * - ``adc-query-support`` - True if an ADC API implementation must support queries on the field. If false, query support for the field in ADC API implementations is optional. + * - ``adc-api-optional`` + - True if the field is specific to the ADC API and is not part of the AIRR specification proper. + These are typically "convenience" fields that make finding data easy or efficient (can be optimized by a repository). * - ``deprecated`` - True if the field has been deprecated from the schema. * - ``deprecated-description`` From 3ca4a8b373bc43208e06c375963c097ceb139f1b Mon Sep 17 00:00:00 2001 From: Scott Christley Date: Mon, 10 Jul 2023 15:48:37 -0500 Subject: [PATCH 18/59] newer version of sphinx required --- docker/Dockerfile | 3 ++- docs/requirements.txt | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index a4ff2043a..7cdce0d54 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -47,7 +47,7 @@ RUN mkdir /airr-standards COPY . /airr-standards # Install python package -RUN cd /airr-standards/lang/python && python3 setup.py install +RUN cd /airr-standards/lang/python && pip3 install . # nodejs package RUN cd /airr-standards/lang/js && npm install @@ -55,4 +55,5 @@ RUN cd /airr-standards/lang/js && npm run eslint RUN cd /airr-standards/lang/js && npm run test # Generate the documentation +RUN cd /airr-standards && pip3 install -r docs/requirements.txt RUN cd /airr-standards && sphinx-build -a -E -b html docs docs/_build/html diff --git a/docs/requirements.txt b/docs/requirements.txt index a822ec32d..3d187c6e8 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,4 +1,4 @@ -sphinx>=1.6 +sphinx>=5.3 sphinx-book-theme>=0.0.17 sphinxcontrib-autoprogram>=0.1.4 prov>=1.5.1 From d6aca534038c7472e574eb19b02825f3a5664d82 Mon Sep 17 00:00:00 2001 From: Brian Corrie Date: Tue, 11 Jul 2023 09:28:49 -0700 Subject: [PATCH 19/59] Add paragraph linking to ADC --- docs/api/adc_api.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/api/adc_api.rst b/docs/api/adc_api.rst index 00fc9e401..dd5948c49 100644 --- a/docs/api/adc_api.rst +++ b/docs/api/adc_api.rst @@ -24,6 +24,11 @@ community discussions at the AIRR 2016 and 2017 Community Meetings and were approved through a vote by the AIRR Community at the AIRR Community Meeting in December 2017. +This has resulted in the creation of the AIRR Data Commons (ADC), a network +of distributed AIRR Standards compliant repositories that can be queried by the +ADC API described here. More information about the ADC and the component repositories +in the ADC can be found on the :ref:`AIRR Data Commons page`. + .. __: https://github.com/airr-community/common-repo-wg/blob/v0.7.0/recommendations.md Overview From e8fe17e08a565dc994d4c2a4b75e3b9453479917 Mon Sep 17 00:00:00 2001 From: Brian Corrie Date: Tue, 11 Jul 2023 09:39:23 -0700 Subject: [PATCH 20/59] Added links to ADC registry github --- docs/api/adc.rst | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/docs/api/adc.rst b/docs/api/adc.rst index d3751cdc6..22d003d7b 100644 --- a/docs/api/adc.rst +++ b/docs/api/adc.rst @@ -41,16 +41,9 @@ AIRR Data Commons. AIRR Data Commons Repositories --------------------------------------- -These data repositories all implement the AIRR Data Commons (ADC) API programmatic access to -query and download AIRR-seq data. - -+ :ref:`iReceptor Public Archive ` (Canada) -+ :ref:`iReceptor COVID-19 Repository ` (Canada) -+ :ref:`VDJServer Community Data Portal ` (US) -+ sciReptor (Germany) -+ VDJBase (Canada) -+ NICD (Africa) -+ University of Muenster (Germany) +The repositories that are part of the ADC are listed on the `AIRR Community ADC Registry github site +`_. In order +to find data across all of the repositories in the ADC, it is necessary to query all of these repositories. Querying the AIRR Data Commons --------------------------------------- @@ -60,6 +53,7 @@ following tools and platforms implement web based user interfaces that use the A in the AIRR Data Commons: + :ref:`iReceptor Gateway ` ++ :ref:`VDJServer Community Data Portal ` There are :ref:`query and analysis use cases ` and :ref:`a set of example queries ` available for the From ff6c23fbdf1a0f970dfde5a958b7c94ec5aed37e Mon Sep 17 00:00:00 2001 From: Brian Corrie Date: Tue, 11 Jul 2023 09:49:07 -0700 Subject: [PATCH 21/59] Added a clairification about server names --- docs/api/adc_api.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/api/adc_api.rst b/docs/api/adc_api.rst index dd5948c49..8f80ce09a 100644 --- a/docs/api/adc_api.rst +++ b/docs/api/adc_api.rst @@ -203,6 +203,13 @@ The response should indicate success. {"result":"success"} +Note: In the above example, the repository name vdjserver.org is initiating a query against one of the repositories in +the ADC, in this case VDJServer. In order to query all of the repositories in the ADC, it is necessary to query each repository +separately. An up to date list of repositories in the ADC is provided at the `AIRR Community ADC Registry github site +`_. In the remainder of this document, we use +vdjserver.org as the repository name, but this can be replaced with any valid ADC repository name, resulting in the same +query working for any repository in the ADC. + **Service Info Example** The following is an example ``GET`` request to get information about the service. From c926196be569ab9c17c507128019754ed102ca8f Mon Sep 17 00:00:00 2001 From: Brian Corrie Date: Tue, 11 Jul 2023 09:56:39 -0700 Subject: [PATCH 22/59] Point to ADC registry github --- docs/resources/adc_support.rst | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/docs/resources/adc_support.rst b/docs/resources/adc_support.rst index b3618891c..b2df83eee 100644 --- a/docs/resources/adc_support.rst +++ b/docs/resources/adc_support.rst @@ -3,13 +3,5 @@ AIRR Data Commons Repositories --------------------------------------- -These data repositories all implement the AIRR Data Commons (ADC) API programmatic access to -query and download AIRR-seq data. - -+ :ref:`iReceptor Public Archive ` (Canada) -+ :ref:`iReceptor COVID-19 Repository ` (Canada) -+ :ref:`VDJServer Community Data Portal ` (US) -+ sciReptor (Germany) -+ VDJBase (Canada) -+ NICD (Africa) -+ University of Muenster (Germany) +A full list of AIRR Compliant repositories that implement the AIRR Data Commons API can be found +at the `AIRR Community ADC Registry github site `_. From 5886d7ddc7bc1c6d9e830c67ea3a008fc05db6b6 Mon Sep 17 00:00:00 2001 From: Brian Corrie Date: Tue, 11 Jul 2023 10:02:54 -0700 Subject: [PATCH 23/59] Change link to ADC page. --- docs/community.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/community.rst b/docs/community.rst index bb60d02db..a3f696ae5 100644 --- a/docs/community.rst +++ b/docs/community.rst @@ -10,7 +10,7 @@ Resources and Tools Supporting AIRR Standards :maxdepth: 1 Applications Supporting the Rearrangement Schema - AIRR Data Commons Repositories + AIRR Data Commons Useful Websites for the AIRR Community ------------------------------------------------------------------------------- From 5fa6f891e5920c0b6ffc39c2ceb64390367fb496 Mon Sep 17 00:00:00 2001 From: Christian Busse Date: Tue, 25 Jul 2023 00:16:03 +0200 Subject: [PATCH 24/59] Synchronize schema copies --- lang/R/inst/extdata/airr-schema.yaml | 21 ++++++++++++--------- lang/python/airr/specs/airr-schema.yaml | 21 ++++++++++++--------- specs/airr-schema.yaml | 8 ++++---- 3 files changed, 28 insertions(+), 22 deletions(-) diff --git a/lang/R/inst/extdata/airr-schema.yaml b/lang/R/inst/extdata/airr-schema.yaml index fffe6d12b..c18872424 100644 --- a/lang/R/inst/extdata/airr-schema.yaml +++ b/lang/R/inst/extdata/airr-schema.yaml @@ -1631,7 +1631,11 @@ Study: - contains_schema_clone - contains_schema_cell - contains_schema_receptor - description: Keywords describing properties of one or more data sets in a study + description: > + Keywords describing properties of one or more data sets in a study. "contains_schema" keywords indicate that + the study contains data objects from the AIRR Schema of that type (Rearrangement, Clone, Cell, Receptor) while + the other keywords indicate that the study design considers the type of data indicated (e.g. it is possible to have + a study that "contains_paired_chain" but does not "contains_schema_cell"). title: Keywords for study example: - contains_ig @@ -4281,7 +4285,7 @@ Cell: x-airr: miairr: defined nullable: true - adc-api-optional: true + adc-query-support: true expression_raw_doi: type: string description: > @@ -4289,7 +4293,7 @@ Cell: x-airr: miairr: defined nullable: true - adc-api-optional: true + adc-query-support: true expression_index: type: string description: > @@ -4297,7 +4301,6 @@ Cell: x-airr: miairr: defined nullable: true - adc-api-optional: true virtual_pairing: type: boolean description: > @@ -4430,7 +4433,7 @@ Receptor: description: The top-level receptor type, either Immunoglobulin (Ig) or T Cell Receptor (TCR). x-airr: nullable: false - adc-api-optional: true + adc-query-support: true receptor_variable_domain_1_aa: type: string description: > @@ -4441,7 +4444,7 @@ Receptor: QVQLQQPGAELVKPGASVKLSCKASGYTFTSYWMHWVKQRPGRGLEWIGRIDPNSGGTKYNEKFKSKATLTVDKPSSTAYMQLSSLTSEDSAVYYCARYDYYGSSYFDYWGQGTTLTVSS x-airr: nullable: false - adc-api-optional: true + adc-query-support: true receptor_variable_domain_1_locus: type: string enum: @@ -4452,7 +4455,7 @@ Receptor: example: IGH x-airr: nullable: false - adc-api-optional: true + adc-query-support: true receptor_variable_domain_2_aa: type: string description: > @@ -4463,7 +4466,7 @@ Receptor: QAVVTQESALTTSPGETVTLTCRSSTGAVTTSNYANWVQEKPDHLFTGLIGGTNNRAPGVPARFSGSLIGDKAALTITGAQTEDEAIYFCALWYSNHWVFGGGTKLTVL x-airr: nullable: false - adc-api-optional: true + adc-query-support: true receptor_variable_domain_2_locus: type: string enum: @@ -4476,7 +4479,7 @@ Receptor: example: IGL x-airr: nullable: false - adc-api-optional: true + adc-query-support: true receptor_ref: type: array description: Array of receptor identifiers defined for the Receptor object diff --git a/lang/python/airr/specs/airr-schema.yaml b/lang/python/airr/specs/airr-schema.yaml index fffe6d12b..c18872424 100644 --- a/lang/python/airr/specs/airr-schema.yaml +++ b/lang/python/airr/specs/airr-schema.yaml @@ -1631,7 +1631,11 @@ Study: - contains_schema_clone - contains_schema_cell - contains_schema_receptor - description: Keywords describing properties of one or more data sets in a study + description: > + Keywords describing properties of one or more data sets in a study. "contains_schema" keywords indicate that + the study contains data objects from the AIRR Schema of that type (Rearrangement, Clone, Cell, Receptor) while + the other keywords indicate that the study design considers the type of data indicated (e.g. it is possible to have + a study that "contains_paired_chain" but does not "contains_schema_cell"). title: Keywords for study example: - contains_ig @@ -4281,7 +4285,7 @@ Cell: x-airr: miairr: defined nullable: true - adc-api-optional: true + adc-query-support: true expression_raw_doi: type: string description: > @@ -4289,7 +4293,7 @@ Cell: x-airr: miairr: defined nullable: true - adc-api-optional: true + adc-query-support: true expression_index: type: string description: > @@ -4297,7 +4301,6 @@ Cell: x-airr: miairr: defined nullable: true - adc-api-optional: true virtual_pairing: type: boolean description: > @@ -4430,7 +4433,7 @@ Receptor: description: The top-level receptor type, either Immunoglobulin (Ig) or T Cell Receptor (TCR). x-airr: nullable: false - adc-api-optional: true + adc-query-support: true receptor_variable_domain_1_aa: type: string description: > @@ -4441,7 +4444,7 @@ Receptor: QVQLQQPGAELVKPGASVKLSCKASGYTFTSYWMHWVKQRPGRGLEWIGRIDPNSGGTKYNEKFKSKATLTVDKPSSTAYMQLSSLTSEDSAVYYCARYDYYGSSYFDYWGQGTTLTVSS x-airr: nullable: false - adc-api-optional: true + adc-query-support: true receptor_variable_domain_1_locus: type: string enum: @@ -4452,7 +4455,7 @@ Receptor: example: IGH x-airr: nullable: false - adc-api-optional: true + adc-query-support: true receptor_variable_domain_2_aa: type: string description: > @@ -4463,7 +4466,7 @@ Receptor: QAVVTQESALTTSPGETVTLTCRSSTGAVTTSNYANWVQEKPDHLFTGLIGGTNNRAPGVPARFSGSLIGDKAALTITGAQTEDEAIYFCALWYSNHWVFGGGTKLTVL x-airr: nullable: false - adc-api-optional: true + adc-query-support: true receptor_variable_domain_2_locus: type: string enum: @@ -4476,7 +4479,7 @@ Receptor: example: IGL x-airr: nullable: false - adc-api-optional: true + adc-query-support: true receptor_ref: type: array description: Array of receptor identifiers defined for the Receptor object diff --git a/specs/airr-schema.yaml b/specs/airr-schema.yaml index 151dd181d..c18872424 100644 --- a/specs/airr-schema.yaml +++ b/specs/airr-schema.yaml @@ -1188,7 +1188,7 @@ GenotypeSet: items: $ref: '#/Genotype' -# This enumerates the alleles and gene deletions inferred in a single subject. Included alleles may either be listed by reference to a GermlineSet, or +# This enumerates the alleles and gene deletions inferred in a single subject. Included alleles may either be listed by reference to a GermlineSet, or # listed as 'undocumented', in which case the inferred sequence is provided # Genotype of adaptive immune receptors @@ -1635,7 +1635,7 @@ Study: Keywords describing properties of one or more data sets in a study. "contains_schema" keywords indicate that the study contains data objects from the AIRR Schema of that type (Rearrangement, Clone, Cell, Receptor) while the other keywords indicate that the study design considers the type of data indicated (e.g. it is possible to have - a study that "contains_paired_chain" but does not "contains_schema_cell"). + a study that "contains_paired_chain" but does not "contains_schema_cell"). title: Keywords for study example: - contains_ig @@ -4313,8 +4313,8 @@ Cell: name: Virtual pairing # The CellExpression object acts as a container to hold a single expression level measurement from -# an experiment. Expression data is associated with a cell_id and the related repertoire_id and -# data_processing_id as cell_id is not guaranteed to be unique outside the data processing for +# an experiment. Expression data is associated with a cell_id and the related repertoire_id and +# data_processing_id as cell_id is not guaranteed to be unique outside the data processing for # a single repertoire. CellExpression: type: object From 82a16ae65650fc6142f3ae2b05c0f91420f362b3 Mon Sep 17 00:00:00 2001 From: Christian Busse Date: Tue, 25 Jul 2023 00:32:35 +0200 Subject: [PATCH 25/59] Sync schema v3 with v2 --- specs/airr-schema-openapi3.yaml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/specs/airr-schema-openapi3.yaml b/specs/airr-schema-openapi3.yaml index 72767d291..dd18a9722 100644 --- a/specs/airr-schema-openapi3.yaml +++ b/specs/airr-schema-openapi3.yaml @@ -1767,7 +1767,11 @@ Study: - contains_schema_cell - contains_schema_receptor nullable: true - description: Keywords describing properties of one or more data sets in a study + description: > + Keywords describing properties of one or more data sets in a study. "contains_schema" keywords indicate that + the study contains data objects from the AIRR Schema of that type (Rearrangement, Clone, Cell, Receptor) while + the other keywords indicate that the study design considers the type of data indicated (e.g. it is possible to have + a study that "contains_paired_chain" but does not "contains_schema_cell"). title: Keywords for study example: - contains_ig From 2f9bf56399d503692ca76c8f125f954215fa4f68 Mon Sep 17 00:00:00 2001 From: Scott Christley Date: Tue, 25 Jul 2023 12:18:28 -0500 Subject: [PATCH 26/59] remove duplicates --- lang/R/inst/extdata/airr-schema.yaml | 85 ------------------------- lang/python/airr/specs/airr-schema.yaml | 85 ------------------------- specs/airr-schema.yaml | 85 ------------------------- 3 files changed, 255 deletions(-) diff --git a/lang/R/inst/extdata/airr-schema.yaml b/lang/R/inst/extdata/airr-schema.yaml index 213369861..9507bf904 100644 --- a/lang/R/inst/extdata/airr-schema.yaml +++ b/lang/R/inst/extdata/airr-schema.yaml @@ -1362,91 +1362,6 @@ DeletedGene: Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the same chromosome. - -# Documented Allele -# This describes a 'known' allele found in a genotype -# It 'known' in the sense that it is documented in a reference set - -DocumentedAllele: - discriminator: AIRR - required: - - label - - germline_set_ref - properties: - label: - type: string - x-airr: - miairr: important - description: The accepted name for this allele, taken from the GermlineSet - germline_set_ref: - type: string - x-airr: - miairr: important - description: GermlineSet from which it was taken, referenced in standardized form (Repo:Label:Version) - example: OGRDB:Human_IGH:2021.11 - phasing: - type: integer - nullable: true - description: > - Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the - same chromosome. - -# Undocumented Allele -# This describes a 'undocumented' allele found in a genotype -# It is 'undocumented' in the sense that it was not found in reference sets consulted for the analysis - -UndocumentedAllele: - discriminator: AIRR - required: - - allele_name - - sequence - type: object - properties: - allele_name: - type: string - x-airr: - miairr: important - description: Allele name as allocated by the inference pipeline - sequence: - type: string - x-airr: - miairr: essential - description: nt sequence of the allele, as provided by the inference pipeline - phasing: - type: integer - nullable: true - description: > - Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the - same chromosome. - -# Deleted Gene -# It is regarded as 'deleted' in the sense that it was not identified during inference of the genotype - -DeletedGene: - discriminator: AIRR - required: - - label - - germline_set_ref - type: object - properties: - label: - type: string - x-airr: - miairr: essential - description: The accepted name for this gene, taken from the GermlineSet - germline_set_ref: - type: string - x-airr: - miairr: important - description: GermlineSet from which it was taken (issuer/name/version) - phasing: - type: integer - nullable: true - description: > - Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the - same chromosome. - - # List of MHCGenotypes describing a subject's genotype MHCGenotypeSet: type: object diff --git a/lang/python/airr/specs/airr-schema.yaml b/lang/python/airr/specs/airr-schema.yaml index 213369861..9507bf904 100644 --- a/lang/python/airr/specs/airr-schema.yaml +++ b/lang/python/airr/specs/airr-schema.yaml @@ -1362,91 +1362,6 @@ DeletedGene: Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the same chromosome. - -# Documented Allele -# This describes a 'known' allele found in a genotype -# It 'known' in the sense that it is documented in a reference set - -DocumentedAllele: - discriminator: AIRR - required: - - label - - germline_set_ref - properties: - label: - type: string - x-airr: - miairr: important - description: The accepted name for this allele, taken from the GermlineSet - germline_set_ref: - type: string - x-airr: - miairr: important - description: GermlineSet from which it was taken, referenced in standardized form (Repo:Label:Version) - example: OGRDB:Human_IGH:2021.11 - phasing: - type: integer - nullable: true - description: > - Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the - same chromosome. - -# Undocumented Allele -# This describes a 'undocumented' allele found in a genotype -# It is 'undocumented' in the sense that it was not found in reference sets consulted for the analysis - -UndocumentedAllele: - discriminator: AIRR - required: - - allele_name - - sequence - type: object - properties: - allele_name: - type: string - x-airr: - miairr: important - description: Allele name as allocated by the inference pipeline - sequence: - type: string - x-airr: - miairr: essential - description: nt sequence of the allele, as provided by the inference pipeline - phasing: - type: integer - nullable: true - description: > - Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the - same chromosome. - -# Deleted Gene -# It is regarded as 'deleted' in the sense that it was not identified during inference of the genotype - -DeletedGene: - discriminator: AIRR - required: - - label - - germline_set_ref - type: object - properties: - label: - type: string - x-airr: - miairr: essential - description: The accepted name for this gene, taken from the GermlineSet - germline_set_ref: - type: string - x-airr: - miairr: important - description: GermlineSet from which it was taken (issuer/name/version) - phasing: - type: integer - nullable: true - description: > - Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the - same chromosome. - - # List of MHCGenotypes describing a subject's genotype MHCGenotypeSet: type: object diff --git a/specs/airr-schema.yaml b/specs/airr-schema.yaml index 213369861..9507bf904 100644 --- a/specs/airr-schema.yaml +++ b/specs/airr-schema.yaml @@ -1362,91 +1362,6 @@ DeletedGene: Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the same chromosome. - -# Documented Allele -# This describes a 'known' allele found in a genotype -# It 'known' in the sense that it is documented in a reference set - -DocumentedAllele: - discriminator: AIRR - required: - - label - - germline_set_ref - properties: - label: - type: string - x-airr: - miairr: important - description: The accepted name for this allele, taken from the GermlineSet - germline_set_ref: - type: string - x-airr: - miairr: important - description: GermlineSet from which it was taken, referenced in standardized form (Repo:Label:Version) - example: OGRDB:Human_IGH:2021.11 - phasing: - type: integer - nullable: true - description: > - Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the - same chromosome. - -# Undocumented Allele -# This describes a 'undocumented' allele found in a genotype -# It is 'undocumented' in the sense that it was not found in reference sets consulted for the analysis - -UndocumentedAllele: - discriminator: AIRR - required: - - allele_name - - sequence - type: object - properties: - allele_name: - type: string - x-airr: - miairr: important - description: Allele name as allocated by the inference pipeline - sequence: - type: string - x-airr: - miairr: essential - description: nt sequence of the allele, as provided by the inference pipeline - phasing: - type: integer - nullable: true - description: > - Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the - same chromosome. - -# Deleted Gene -# It is regarded as 'deleted' in the sense that it was not identified during inference of the genotype - -DeletedGene: - discriminator: AIRR - required: - - label - - germline_set_ref - type: object - properties: - label: - type: string - x-airr: - miairr: essential - description: The accepted name for this gene, taken from the GermlineSet - germline_set_ref: - type: string - x-airr: - miairr: important - description: GermlineSet from which it was taken (issuer/name/version) - phasing: - type: integer - nullable: true - description: > - Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the - same chromosome. - - # List of MHCGenotypes describing a subject's genotype MHCGenotypeSet: type: object From 1f3d4a75a92070823bde8337a471dd0b2e44284f Mon Sep 17 00:00:00 2001 From: Scott Christley Date: Tue, 25 Jul 2023 13:42:12 -0500 Subject: [PATCH 27/59] proper return value --- lang/js/schema.js | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lang/js/schema.js b/lang/js/schema.js index 8cc01e69c..8a6d9be29 100644 --- a/lang/js/schema.js +++ b/lang/js/schema.js @@ -202,9 +202,8 @@ module.exports = function(airr, schema) { const validate = ajv.compile(this.definition) const valid = validate(object) - if (!valid) console.log(validate.errors) - - return valid; + if (!valid) return validate.errors; + else return null; } airr.SchemaDefinition.prototype.template = function() { From 609a7596951803bfa8695b3396da7a8ccc54e6f4 Mon Sep 17 00:00:00 2001 From: Scott Christley Date: Tue, 25 Jul 2023 13:42:44 -0500 Subject: [PATCH 28/59] remove discriminator --- lang/R/inst/extdata/airr-schema.yaml | 5 +- lang/js/airr-schema-openapi3.yaml | 1178 +++++++++++++++-------- lang/python/airr/specs/airr-schema.yaml | 5 +- specs/airr-schema-openapi3.yaml | 9 +- specs/airr-schema.yaml | 5 +- 5 files changed, 755 insertions(+), 447 deletions(-) diff --git a/lang/R/inst/extdata/airr-schema.yaml b/lang/R/inst/extdata/airr-schema.yaml index 9507bf904..1a6ddb5c5 100644 --- a/lang/R/inst/extdata/airr-schema.yaml +++ b/lang/R/inst/extdata/airr-schema.yaml @@ -1284,7 +1284,7 @@ Genotype: # It 'known' in the sense that it is documented in a reference set DocumentedAllele: - discriminator: AIRR + type: object required: - label - germline_set_ref @@ -1312,7 +1312,6 @@ DocumentedAllele: # It is 'undocumented' in the sense that it was not found in reference sets consulted for the analysis UndocumentedAllele: - discriminator: AIRR required: - allele_name - sequence @@ -1339,7 +1338,6 @@ UndocumentedAllele: # It is regarded as 'deleted' in the sense that it was not identified during inference of the genotype DeletedGene: - discriminator: AIRR required: - label - germline_set_ref @@ -1698,7 +1696,6 @@ Study: name: ADC Update Date SubjectGenotype: - discriminator: AIRR type: object properties: receptor_genotype_set: diff --git a/lang/js/airr-schema-openapi3.yaml b/lang/js/airr-schema-openapi3.yaml index 21772e41f..d0886743d 100644 --- a/lang/js/airr-schema-openapi3.yaml +++ b/lang/js/airr-schema-openapi3.yaml @@ -4,7 +4,7 @@ Info: title: AIRR Schema description: Schema definitions for AIRR standards objects - version: "1.4" + version: 1.4 contact: name: AIRR Community url: https://github.com/airr-community @@ -39,11 +39,11 @@ CURIEMap: CHEBI: type: ontology default: - map: OBO - provider: OLS + map: OBO + provider: OLS map: - OBO: - iri_prefix: "http://purl.obolibrary.org/obo/CHEBI_" + OBO: + iri_prefix: "http://purl.obolibrary.org/obo/CHEBI_" CL: type: ontology default: @@ -255,7 +255,7 @@ Attributes: - essential - important - defined - default: useful + default: defined identifier: type: boolean description: > @@ -342,13 +342,13 @@ DataFile: nullable: false description: List of repertoires items: - $ref: '#/Repertoire' + $ref: '#/Repertoire' RepertoireGroup: type: array nullable: false description: List of repertoire collections items: - $ref: '#/RepertoireGroup' + $ref: '#/RepertoireGroup' Rearrangement: type: array nullable: false @@ -360,13 +360,13 @@ DataFile: nullable: false description: List of cells items: - $ref: '#/Cell' + $ref: '#/Cell' Clone: type: array nullable: false description: List of clones items: - $ref: '#/Clone' + $ref: '#/Clone' GermlineSet: type: array nullable: false @@ -389,40 +389,40 @@ InfoObject: - title - version properties: - title: - type: string - nullable: false - version: - type: string - nullable: false - description: - type: string - nullable: true - contact: - type: object - nullable: true - properties: - name: - type: string - nullable: true - url: + title: type: string - nullable: true - email: - type: string - nullable: true - license: - type: object - nullable: true - required: - - name - properties: - name: + nullable: false + version: type: string nullable: false - url: + description: type: string nullable: true + contact: + type: object + nullable: true + properties: + name: + type: string + nullable: true + url: + type: string + nullable: true + email: + type: string + nullable: true + license: + type: object + nullable: true + required: + - name + properties: + name: + type: string + nullable: false + url: + type: string + nullable: true # A time point TimePoint: @@ -475,8 +475,11 @@ Acknowledgement: properties: acknowledgement_id: type: string - nullable: false description: unique identifier of this Acknowledgement within the file + x-airr: + identifier: true + miairr: important + nullable: true name: type: string nullable: true @@ -496,8 +499,10 @@ Acknowledgement: # Rearranged and genomic germline sequences RearrangedSequence: - description: Details of a directly observed rearranged sequence or an inference from rearranged sequences contributing support for a gene or allele type: object + description: > + Details of a directly observed rearranged sequence or an inference from rearranged sequences + contributing support for a gene or allele. required: - sequence_id - sequence @@ -511,49 +516,75 @@ RearrangedSequence: properties: sequence_id: type: string - nullable: false - description: Unique identifier of this RearrangedSequence within the file, typically generated by the repository hosting the schema, for example from the underlying ID of the database record + nullable: true + description: > + Unique identifier of this RearrangedSequence within the file, typically generated by the repository + hosting the schema, for example from the underlying ID of the database record. + x-airr: + identifier: true + miairr: important sequence: type: string - nullable: false + nullable: true + x-airr: + miairr: essential description: nucleotide sequence derivation: type: string - nullable: false + nullable: true enum: - DNA - RNA + - null description: The class of nucleic acid that was used as primary starting material + x-airr: + miairr: important observation_type: type: string - nullable: false - description: The type of observation from which this sequence was drawn, e.g. direct sequencing, inference from repertoire + nullable: true enum: - direct sequencing - inference from repertoire + - null + description: > + The type of observation from which this sequence was drawn, such as direct sequencing or + inference from repertoire sequencing data. + x-airr: + miairr: essential + curation: type: string nullable: true description: Curational notes on the sequence repository_name: type: string - nullable: false + nullable: true + x-airr: + miairr: defined description: Name of the repository in which the sequence has been deposited repository_ref: type: string - nullable: false + nullable: true + x-airr: + miairr: defined description: Queryable id or accession number of the sequence published by the repository deposited_version: type: string - nullable: false + nullable: true + x-airr: + miairr: defined description: Version number of the sequence within the repository sequence_start: type: integer - nullable: false + nullable: true + x-airr: + miairr: essential description: Start co-ordinate of the sequence detailed in this record, within the sequence deposited sequence_end: type: integer - nullable: false + nullable: true + x-airr: + miairr: essential description: End co-ordinate of the sequence detailed in this record, within the sequence deposited UnrearrangedSequence: @@ -571,23 +602,33 @@ UnrearrangedSequence: properties: sequence_id: type: string - nullable: false + nullable: true + x-airr: + identifier: true + miairr: important description: unique identifier of this UnrearrangedSequence within the file sequence: type: string - nullable: false - description: Sequence of interest described in this record (typically this will include gene and promoter region) + nullable: true + description: > + Sequence of interest described in this record. Typically, this will include gene and promoter region. + x-airr: + miairr: essential curation: type: string nullable: true description: Curational notes on the sequence repository_name: type: string - nullable: false + nullable: true + x-airr: + miairr: defined description: Name of the repository in which the assembly or contig is deposited repository_ref: type: string - nullable: false + nullable: true + x-airr: + miairr: defined description: Queryable id or accession number of the sequence published by the repository patch_no: type: string @@ -596,21 +637,27 @@ UnrearrangedSequence: gff_seqid: type: string nullable: true - description: Sequence (from the assembly) of a window including the gene and preferably also the promoter region + description: > + Sequence (from the assembly) of a window including the gene and preferably also the promoter region. gff_start: type: integer nullable: true - description: Genomic co-ordinates of the start of the sequence of interest described in this record, in Ensemble GFF version 3 + description: > + Genomic co-ordinates of the start of the sequence of interest described in this record in + Ensemble GFF version 3. gff_end: type: integer nullable: true - description: Genomic co-ordinates of the end of the sequence of interest described in this record, in Ensemble GFF version 3 + description: > + Genomic co-ordinates of the end of the sequence of interest described in this record in + Ensemble GFF version 3. strand: type: string nullable: true enum: - + - "-" + - null description: sense (+ or -) # V gene delineation @@ -634,63 +681,107 @@ SequenceDelineationV: properties: sequence_delineation_id: type: string - nullable: false - description: Unique identifier of this SequenceDelineationV within the file, typically generated by the repository hosting the schema, for example from the underlying ID of the database record + nullable: true + description: > + Unique identifier of this SequenceDelineationV within the file. Typically, generated by the + repository hosting the record. + x-airr: + identifier: true + miairr: important + delineation_scheme: type: string - nullable: false + nullable: true + x-airr: + miairr: important description: Name of the delineation scheme example: Chothia + unaligned_sequence: + type: string + nullable: true + x-airr: + miairr: important + description: entire V-sequence covered by this delineation + aligned_sequence: + type: string + nullable: true + description: > + Aligned sequence if this delineation provides an alignment. An aligned sequence should always be + provided for IMGT delineations. fwr1_start: type: integer - nullable: false - description: FWR1 start co-ordinate in Gene Description 'alignment' field + nullable: true + x-airr: + miairr: important + description: FWR1 start co-ordinate in the 'unaligned sequence' field fwr1_end: type: integer - nullable: false - description: FWR1 end co-ordinate in Gene Description 'alignment' field + nullable: true + x-airr: + miairr: important + description: FWR1 end co-ordinate in the 'unaligned sequence' field cdr1_start: type: integer - nullable: false - description: CDR1 start co-ordinate in Gene Description 'alignment' field + nullable: true + x-airr: + miairr: important + description: CDR1 start co-ordinate in the 'unaligned sequence' field cdr1_end: type: integer - nullable: false - description: CDR1 end co-ordinate in Gene Description 'alignment' field + nullable: true + x-airr: + miairr: important + description: CDR1 end co-ordinate in the 'unaligned sequence' field fwr2_start: type: integer - nullable: false - description: FWR2 start co-ordinate in Gene Description 'alignment' field + nullable: true + x-airr: + miairr: important + description: FWR2 start co-ordinate in the 'unaligned sequence' field fwr2_end: type: integer - nullable: false - description: FWR2 end co-ordinate in Gene Description 'alignment' field + nullable: true + x-airr: + miairr: important + description: FWR2 end co-ordinate in the 'unaligned sequence' field cdr2_start: type: integer - nullable: false - description: CDR2 start co-ordinate in Gene Description 'alignment' field + nullable: true + x-airr: + miairr: important + description: CDR2 start co-ordinate in the 'unaligned sequence' field cdr2_end: type: integer - nullable: false - description: CDR2 end co-ordinate in Gene Description 'alignment' field + nullable: true + x-airr: + miairr: important + description: CDR2 end co-ordinate in the 'unaligned sequence' field fwr3_start: type: integer - nullable: false - description: FWR3 start co-ordinate in Gene Description 'alignment' field + nullable: true + x-airr: + miairr: important + description: FWR3 start co-ordinate in the 'unaligned sequence' field fwr3_end: type: integer - nullable: false - description: FWR3 end co-ordinate in Gene Description 'alignment' field + nullable: true + x-airr: + miairr: important + description: FWR3 end co-ordinate in the 'unaligned sequence' field cdr3_start: type: integer - nullable: false - description: CDR3 start co-ordinate in Gene Description 'alignment' field - alignment: + nullable: true + x-airr: + miairr: important + description: CDR3 start co-ordinate in the 'unaligned sequence' field + alignment_labels: type: array nullable: true items: - type: string - description: one string for each codon in the fields v_start to cdr3_start indicating the label of that codon according to the numbering of the delineation scheme + type: string + description: > + One string for each codon in the aligned_sequence indicating the label of that codon according to + the numbering of the delineation scheme if it provides one. # Description of a putative or confirmed Ig receptor gene/allele AlleleDescription: @@ -713,16 +804,25 @@ AlleleDescription: properties: allele_description_id: type: string - nullable: false - description: Unique identifier of this AlleleDescription within the file, typically generated by the repository hosting the schema, for example from the underlying ID of the database record + nullable: true + x-airr: + identifier: true + miairr: important + description: > + Unique identifier of this AlleleDescription within the file. Typically, generated by the + repository hosting the record. allele_description_ref: type: string - nullable: false + nullable: true + x-airr: + miairr: important description: Unique reference to the allele description, in standardized form (Repo:Label:Version) example: OGRDB:Human_IGH:IGHV1-69*01.001 maintainer: type: string - nullable: false + nullable: true + x-airr: + miairr: defined description: Maintainer of this sequence record acknowledgements: type: array @@ -732,36 +832,54 @@ AlleleDescription: $ref: '#/Acknowledgement' lab_address: type: string - nullable: false + nullable: true + x-airr: + miairr: defined description: Institution and full address of corresponding author release_version: type: integer - nullable: false + nullable: true + x-airr: + miairr: important description: Version number of this record, updated whenever a revised version is published or released release_date: type: string + nullable: true format: date-time - nullable: false + x-airr: + miairr: important description: Date of this release title: Release Date example: "2021-02-02" release_description: type: string - nullable: false + nullable: true + x-airr: + miairr: important description: Brief descriptive notes of the reason for this release and the changes embodied label: type: string nullable: true + x-airr: + miairr: important description: The accepted name for this gene or allele example: IGHV1-69*01 sequence: type: string - nullable: false - description: nt sequence of the gene. This should cover the full length that is available, including where possible RSS, and 5' UTR and lead-in for V-gene sequences + nullable: true + x-airr: + miairr: essential + description: > + Nucleotide sequence of the gene. This should cover the full length that is available, + including where possible RSS, and 5' UTR and lead-in for V-gene sequences. coding_sequence: type: string - nullable: false - description: nucleotide sequence of the core region of the gene (V-, D-, J- or C-REGION), aligned, in the case of the V-REGION, with the IMGT numbering scheme + nullable: true + x-airr: + miairr: important + description: > + Nucleotide sequence of the core coding region, such as the coding region of a D-, J- or C- gene + or the coding region of a V-gene excluding the leader. aliases: type: array nullable: true @@ -770,49 +888,63 @@ AlleleDescription: description: Alternative names for this sequence locus: type: string - nullable: false + nullable: true enum: - IGH + - IGI - IGK - IGL - TRA - TRB - TRG - TRD + - null description: Gene locus + x-airr: + miairr: essential chromosome: type: integer nullable: true - description: chromosome on which the gene is located + description: chromosome on which the gene is located sequence_type: type: string - nullable: false + nullable: true enum: - V - D - J - C + - null description: Sequence type (V, D, J, C) + x-airr: + miairr: essential functional: type: boolean - nullable: false + nullable: true + x-airr: + miairr: important description: True if the gene is functional, false if it is a pseudogene inference_type: type: string - nullable: false + nullable: true enum: - Genomic and rearranged - Genomic only - Rearranged only + - null description: Type of inference(s) from which this gene sequence was inferred + x-airr: + miairr: important species: $ref: '#/Ontology' - nullable: false + nullable: true description: Binomial designation of subject's species title: Organism example: id: NCBITAXON:9606 label: Homo sapiens + x-airr: + miairr: essential species_subgroup: type: string nullable: true @@ -827,6 +959,7 @@ AlleleDescription: - inbred - outbred - locational + - null status: type: string nullable: true @@ -835,6 +968,7 @@ AlleleDescription: - draft - retired - withdrawn + - null description: Status of record, assumed active if the field is not present subgroup_designation: type: string @@ -848,6 +982,14 @@ AlleleDescription: type: string nullable: true description: Allele number or other identifier, as (and if) defined + allele_similarity_cluster_designation: + type: string + nullable: true + description: ID of the similarity cluster used in this germline set, if designated + allele_similarity_cluster_member_id: + type: string + nullable: true + description: Membership ID of the allele within the similarity cluster, if a cluster is designated j_codon_frame: type: integer nullable: true @@ -855,79 +997,91 @@ AlleleDescription: - 1 - 2 - 3 - description: Codon position of the first nucleotide in the 'coding_sequence' field. Mandatory for J genes. Not used for V or D genes. ('1' means the sequence is in-frame, '2' means that the first bp is missing from the first codon, '3' means that the first 2 bp are missing) + - null + description: > + Codon position of the first nucleotide in the 'coding_sequence' field. Mandatory for J genes. + Not used for V or D genes. '1' means the sequence is in-frame, '2' means that the first bp is + missing from the first codon, and '3' means that the first 2 bp are missing. gene_start: type: integer nullable: true - description: Co-ordinate (in the sequence field) of the first nucleotide in the coding_sequence field + description: > + Co-ordinate in the sequence field of the first nucleotide in the coding_sequence field. + x-airr: + miairr: important gene_end: type: integer nullable: true - description: Co-ordinate (in the sequence field) of the last gene-coding nucleotide in the coding_sequence field + description: > + Co-ordinate in the sequence field of the last gene-coding nucleotide in the coding_sequence field. + x-airr: + miairr: important utr_5_prime_start: type: integer nullable: true - description: Start co-ordinate (in the sequence field) of 5 prime UTR (V-genes only) + description: Start co-ordinate in the sequence field of the 5 prime UTR (V-genes only). utr_5_prime_end: type: integer nullable: true - description: End co-ordinate (in the sequence field) of 5 prime UTR (V-genes only) + description: End co-ordinate in the sequence field of the 5 prime UTR (V-genes only). leader_1_start: type: integer nullable: true - description: Start co-ordinate (in the sequence field) of L-PART1 (V-genes only) + description: Start co-ordinate in the sequence field of L-PART1 (V-genes only). leader_1_end: type: integer nullable: true - description: End co-ordinate (in the sequence field) of L-PART1 (V-genes only) + description: End co-ordinate in the sequence field of L-PART1 (V-genes only). leader_2_start: type: integer nullable: true - description: Start co-ordinate (in the sequence field) of L-PART2 (V-genes only) + description: Start co-ordinate in the sequence field of L-PART2 (V-genes only). leader_2_end: type: integer nullable: true - description: End co-ordinate (in the sequence field) of L-PART2 (V-genes only) + description: End co-ordinate in the sequence field of L-PART2 (V-genes only). v_rs_start: type: integer nullable: true - description: Start co-ordinate (in the sequence field) of V recombination site (V-genes only) + description: Start co-ordinate in the sequence field of the V recombination site (V-genes only). v_rs_end: type: integer nullable: true - description: End co-ordinate (in the sequence field) of V recombination site (V-genes only) + description: End co-ordinate in the sequence field of the V recombination site (V-genes only). d_rs_3_prime_start: type: integer nullable: true - description: Start co-ordinate (in the sequence field) of 3 prime D recombination site (D-genes only) + description: Start co-ordinate in the sequence field of the 3 prime D recombination site (D-genes only). d_rs_3_prime_end: type: integer nullable: true - description: End co-ordinate (in the sequence field) of 3 prime D recombination site (D-genes only) + description: End co-ordinate in the sequence field of the 3 prime D recombination site (D-genes only). d_rs_5_prime_start: type: integer nullable: true - description: Start co-ordinate (in the sequence field) of 5 prime D recombination site (D-genes only) + description: Start co-ordinate in the sequence field of the 5 prime D recombination site (D-genes only). d_rs_5_prime_end: type: integer nullable: true - description: End co-ordinate (in the sequence field) of 5 prime D recombination site (D-genes only) + description: End co-ordinate in the sequence field of 5 the prime D recombination site (D-genes only). j_cdr3_end: type: integer nullable: true - description: In the case of a J-gene, the co-ordinate (in the sequence field) of the first nucelotide of the conserved PHE or TRP (IMGT codon position 118) + description: > + In the case of a J-gene, the co-ordinate in the sequence field of the first nucelotide of the + conserved PHE or TRP (IMGT codon position 118). j_rs_start: type: integer nullable: true - description: Start co-ordinate (in the sequence field) of J recombination site (J-genes only) + description: Start co-ordinate in the sequence field of J recombination site (J-genes only). j_rs_end: type: integer nullable: true - description: End co-ordinate (in the sequence field) of J recombination site (J-genes only) + description: End co-ordinate in the sequence field of J recombination site (J-genes only). j_donor_splice: type: integer nullable: true - description: Co-ordinate (in the sequence field) of the final 3' nucleotide of the J-REGION (J-genes only) + description: Co-ordinate in the sequence field of the final 3' nucleotide of the J-REGION (J-genes only). v_gene_delineations: type: array nullable: true @@ -952,7 +1106,9 @@ AlleleDescription: curation: type: string nullable: true - description: Curational notes on the AlleleDescription. This can be used to give more extensive notes on the decisions taken than are provided in the release_description. + description: > + Curational notes on the AlleleDescription. This can be used to give more extensive notes on the + decisions taken than are provided in the release_description. curational_tags: type: array nullable: true @@ -965,8 +1121,10 @@ AlleleDescription: # Collection of gene descriptions into a germline set GermlineSet: - description: Details of a 'germline set' bringing together multiple AlleleDescriptions from the same strain or species. All genes in a GermlineSet should be from a single locus. type: object + description: > + A germline object set bringing together multiple AlleleDescriptions from the same strain or species. + All genes in a GermlineSet should be from a single locus. required: - germline_set_id - author @@ -983,19 +1141,30 @@ GermlineSet: properties: germline_set_id: type: string - nullable: false - description: Unique identifier of the GermlineSet within this file, typically generated by the repository hosting the schema, for example from the underlying ID of the database record + nullable: true + description: > + Unique identifier of the GermlineSet within this file. Typically, generated by the + repository hosting the record. + x-airr: + identifier: true + miairr: important author: type: string - nullable: false + nullable: true + x-airr: + miairr: important description: Corresponding author lab_name: type: string - nullable: false + nullable: true + x-airr: + miairr: important description: Department of corresponding author lab_address: type: string - nullable: false + nullable: true + x-airr: + miairr: important description: Institutional address of corresponding author acknowledgements: type: array @@ -1005,26 +1174,36 @@ GermlineSet: $ref: '#/Acknowledgement' release_version: type: number - nullable: false + nullable: true + x-airr: + miairr: important description: Version number of this record, allocated automatically release_description: type: string - nullable: false + nullable: true + x-airr: + miairr: important description: Brief descriptive notes of the reason for this release and the changes embodied release_date: type: string + nullable: true format: date-time - nullable: false + x-airr: + miairr: important description: Date of this release title: Release Date example: "2021-02-02" germline_set_name: type: string - nullable: false + nullable: true + x-airr: + miairr: important description: descriptive name of this germline set germline_set_ref: type: string - nullable: false + nullable: true + x-airr: + miairr: important description: Unique identifier of the germline set and version, in standardized form (Repo:Label:Version) example: OGRDB:Human_IGH:2021.11 pub_ids: @@ -1034,7 +1213,9 @@ GermlineSet: example: "PMID:85642,PMID:12345" species: $ref: '#/Ontology' - nullable: false + nullable: true + x-airr: + miairr: essential description: Binomial designation of subject's species title: Organism example: @@ -1042,9 +1223,9 @@ GermlineSet: label: Homo sapiens species_subgroup: type: string + nullable: true description: Race, strain or other species subgroup to which this subject belongs example: BALB/c - nullable: true species_subgroup_type: type: string nullable: true @@ -1054,28 +1235,37 @@ GermlineSet: - inbred - outbred - locational + - null locus: type: string - nullable: false + nullable: true enum: - IGH + - IGI - IGK - IGL - TRA - TRB - TRG - TRD + - null description: Gene locus + x-airr: + miairr: essential allele_descriptions: type: array - nullable: false + nullable: true items: - $ref: '#/AlleleDescription' + $ref: '#/AlleleDescription' description: list of allele_descriptions in the germline set + x-airr: + miairr: important curation: type: string nullable: true - description: Curational notes on the GermlineSet. This can be used to give more extensive notes on the decisions taken than are provided in the release_description. + description: > + Curational notes on the GermlineSet. This can be used to give more extensive notes on the + decisions taken than are provided in the release_description. # # Genotype schema @@ -1090,8 +1280,13 @@ GenotypeSet: properties: receptor_genotype_set_id: type: string - nullable: false - description: A unique identifier for this Receptor Genotype Set, typically generated by the repository hosting the schema, for example from the underlying ID of the database record + nullable: true + x-airr: + identifier: true + miairr: important + description: > + A unique identifier for this Receptor Genotype Set, typically generated by the repository + hosting the schema, for example from the underlying ID of the database record. genotype_class_list: description: List of Genotypes included in this Receptor Genotype Set. type: array @@ -1099,10 +1294,9 @@ GenotypeSet: items: $ref: '#/Genotype' - # Genotype of adaptive immune receptors -# This enumerates the alleles and gene deletions inferred in a single subject. -# Included alleles may either be listed by reference to a GermlineSet, or +# This enumerates the alleles and gene deletions inferred in a single subject. +# Included alleles may either be listed by reference to a GermlineSet, or # listed as 'undocumented', in which case the inferred sequence is provided Genotype: @@ -1113,64 +1307,46 @@ Genotype: properties: receptor_genotype_id: type: string - nullable: false - description: A unique identifier within the file for this Receptor Genotype, typically generated by the repository hosting the schema, for example from the underlying ID of the database record + nullable: true + x-airr: + identifier: true + miairr: important + description: > + A unique identifier within the file for this Receptor Genotype, typically generated by the + repository hosting the schema, for example from the underlying ID of the database record. locus: type: string - nullable: false + nullable: true enum: - IGH + - IGI - IGK - IGL - TRA - TRB - TRD - TRG + - null + description: Gene locus example: IGH x-airr: adc-query-support: true format: controlled vocabulary + miairr: essential documented_alleles: type: array nullable: true - description: Array of alleles inferred to be present which are documented in GermlineSets + description: List of alleles documented in reference set(s) items: - type: object - properties: - label: - type: string - nullable: false - description: The accepted name for this allele, taken from the GermlineSet - germline_set_ref: - type: string - nullable: false - description: GermlineSet from which it was taken, referenced in standardized form (Repo:Label:Version) - example: OGRDB:Human_IGH:2021.11 - phasing: - type: integer - nullable: true - description: Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the same chromosome + $ref: '#/DocumentedAllele' x-airr: - adc-query-support: true + miairr: important undocumented_alleles: type: array nullable: true - description: Array of alleles inferred to be present and not documented in an identified GermlineSet + description: List of alleles inferred to be present and not documented in an identified GermlineSet items: - type: object - properties: - allele_name: - type: string - nullable: false - description: Allele name as allocated by the inference pipeline - sequence: - type: string - nullable: false - description: nt sequence of the allele, as provided by the inference pipeline - phasing: - type: integer - nullable: true - description: Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the same chromosome + $ref: '#/UndocumentedAllele' x-airr: adc-query-support: true deleted_genes: @@ -1178,20 +1354,7 @@ Genotype: nullable: true description: Array of genes identified as being deleted in this genotype items: - type: object - properties: - label: - type: string - nullable: false - description: The accepted name for this gene, taken from the GermlineSet - germline_set_ref: - type: string - nullable: false - description: GermlineSet from which it was taken (issuer/name/version) - phasing: - type: integer - nullable: true - description: Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the same chromosome + $ref: '#/DeletedGene' x-airr: adc-query-support: true inference_process: @@ -1200,6 +1363,7 @@ Genotype: enum: - genomic_sequencing - repertoire_sequencing + - null description: Information on how the genotype was acquired. Controlled vocabulary. title: Genotype acquisition process example: repertoire_sequencing @@ -1207,6 +1371,93 @@ Genotype: adc-query-support: true format: controlled vocabulary +# Documented Allele +# This describes a 'known' allele found in a genotype +# It 'known' in the sense that it is documented in a reference set + +DocumentedAllele: + type: object + required: + - label + - germline_set_ref + properties: + label: + type: string + nullable: true + x-airr: + miairr: important + description: The accepted name for this allele, taken from the GermlineSet + germline_set_ref: + type: string + nullable: true + x-airr: + miairr: important + description: GermlineSet from which it was taken, referenced in standardized form (Repo:Label:Version) + example: OGRDB:Human_IGH:2021.11 + phasing: + type: integer + nullable: true + description: > + Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the + same chromosome. + +# Undocumented Allele +# This describes a 'undocumented' allele found in a genotype +# It is 'undocumented' in the sense that it was not found in reference sets consulted for the analysis + +UndocumentedAllele: + required: + - allele_name + - sequence + type: object + properties: + allele_name: + type: string + nullable: true + description: Allele name as allocated by the inference pipeline + x-airr: + miairr: important + sequence: + type: string + nullable: true + description: nt sequence of the allele, as provided by the inference pipeline + x-airr: + miairr: essential + phasing: + type: integer + nullable: true + description: > + Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the + same chromosome. + +# Deleted Gene +# It is regarded as 'deleted' in the sense that it was not identified during inference of the genotype + +DeletedGene: + required: + - label + - germline_set_ref + type: object + properties: + label: + type: string + nullable: true + description: The accepted name for this gene, taken from the GermlineSet + x-airr: + miairr: essential + germline_set_ref: + type: string + nullable: true + description: GermlineSet from which it was taken (issuer/name/version) + x-airr: + miairr: important + phasing: + type: integer + nullable: true + description: > + Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the + same chromosome. + # List of MHCGenotypes describing a subject's genotype MHCGenotypeSet: @@ -1217,16 +1468,20 @@ MHCGenotypeSet: properties: mhc_genotype_set_id: type: string - nullable: false + nullable: true + x-airr: + identifier: true + miairr: important description: A unique identifier for this MHCGenotypeSet mhc_genotype_list: description: List of MHCGenotypes included in this set type: array - nullable: false + nullable: true + x-airr: + miairr: important items: $ref: '#/MHCGenotype' - # Genotype of major histocompatibility complex (MHC) class I, class II and non-classical loci MHCGenotype: type: object @@ -1237,54 +1492,33 @@ MHCGenotype: properties: mhc_genotype_id: type: string - nullable: false + nullable: true + x-airr: + identifier: true + miairr: important description: A unique identifier for this MHCGenotype, assumed to be unique in the context of the study mhc_class: type: string - nullable: false - description: Class of MHC alleles described by the MHCGenotype + nullable: true enum: - - MHC-I - - MHC-II - - MHC-nonclassical + - MHC-I + - MHC-II + - MHC-nonclassical + - null + description: Class of MHC alleles described by the MHCGenotype example: MHC-I x-airr: + miairr: essential adc-query-support: true format: controlled vocabulary mhc_alleles: type: array - nullable: false + nullable: true description: List of MHC alleles of the indicated mhc_class identified in an individual items: - type: object - properties: - allele_designation: - type: string - nullable: false - description: > - The accepted designation of an allele, usually its gene symbol plus allele/sub-allele/etc - identifiers, if provided by the mhc_typing method - gene: - $ref: '#/Ontology' - nullable: true - description: The MHC gene to which the described allele belongs - title: MHC gene - example: - id: MRO:0000046 - label: HLA-A - x-airr: - adc-query-support: false - format: ontology - ontology: - draft: true - top_node: - id: MRO:0000004 - label: MHC gene - reference_set_ref: - type: string - nullable: false - description: Repository and list from which it was taken (issuer/name/version) + $ref: '#/MHCAllele' x-airr: + miairr: important adc-query-support: true mhc_genotyping_method: type: string @@ -1296,6 +1530,57 @@ MHCGenotype: example: pcr_low_resolution x-airr: adc-query-support: true + miairr: important + + +# Allele of an MHC gene +MHCAllele: + type: object + properties: + allele_designation: + type: string + nullable: true + x-airr: + miairr: important + description: > + The accepted designation of an allele, usually its gene symbol plus allele/sub-allele/etc + identifiers, if provided by the mhc_typing method + gene: + $ref: '#/Ontology' + nullable: true + description: The MHC gene to which the described allele belongs + title: MHC gene + example: + id: MRO:0000046 + label: HLA-A + x-airr: + adc-query-support: false + format: ontology + ontology: + draft: true + top_node: + id: MRO:0000004 + label: MHC gene + miairr: important + reference_set_ref: + type: string + nullable: true + x-airr: + miairr: important + description: Repository and list from which it was taken (issuer/name/version) + + +SubjectGenotype: + type: object + properties: + receptor_genotype_set: + nullable: true + $ref: '#/GenotypeSet' + description: Immune receptor genotype set for this subject. + mhc_genotype_set: + nullable: true + $ref: '#/MHCGenotypeSet' + description: MHC genotype set for this subject. # # Repertoire metadata schema @@ -1326,6 +1611,7 @@ Study: title: Study ID example: PRJNA001 x-airr: + identifier: true miairr: important adc-query-support: true set: 1 @@ -1488,13 +1774,17 @@ Study: - contains_schema_cell - contains_schema_receptor nullable: true - description: Keywords describing properties of one or more data sets in a study + description: > + Keywords describing properties of one or more data sets in a study. "contains_schema" keywords indicate that + the study contains data objects from the AIRR Schema of that type (Rearrangement, Clone, Cell, Receptor) while + the other keywords indicate that the study design considers the type of data indicated (e.g. it is possible to have + a study that "contains_paired_chain" but does not "contains_schema_cell"). title: Keywords for study example: - - contains_ig - - contains_schema_rearrangement - - contains_schema_clone - - contains_schema_cell + - contains_ig + - contains_schema_rearrangement + - contains_schema_clone + - contains_schema_cell x-airr: miairr: important adc-query-support: true @@ -1554,6 +1844,7 @@ Subject: title: Subject ID example: SUB856413 x-airr: + identifier: true miairr: important adc-query-support: true set: 1 @@ -1609,6 +1900,7 @@ Subject: - intersex - "not collected" - "not applicable" + - null nullable: true description: Biological sex of subject title: Sex @@ -1772,18 +2064,9 @@ Subject: x-airr: adc-query-support: true genotype: - type: object nullable: true - description: Genotype for this subject, if known - properties: - receptor_genotype_set: - nullable: true - $ref: '#/GenotypeSet' - description: Immune receptor genotype set for this subject. - mhc_genotype_set: - nullable: true - $ref: '#/MHCGenotypeSet' - description: MHC genotype set for this subject. + $ref: '#/SubjectGenotype' + title: SubjectGenotype # 1-to-n relationship between a subject and its diagnoses Diagnosis: @@ -1928,6 +2211,7 @@ Sample: title: Biological sample ID example: SUP52415 x-airr: + identifier: true miairr: important adc-query-support: true set: 2 @@ -1994,7 +2278,7 @@ Sample: nullable: true description: Time point at which sample was taken, relative to `Collection time event` title: Sample collection time - example: "14" + example: 14 x-airr: miairr: important adc-query-support: true @@ -2235,6 +2519,7 @@ PCRTarget: - TRB - TRD - TRG + - null nullable: true description: > Designation of the target locus. Note that this field uses a controlled vocubulary that is meant to @@ -2334,7 +2619,7 @@ NucleicAcidProcessing: $ref: '#/Ontology' nullable: true description: Unit of template amount - title: Template amount time unit + title: Template amount time unit example: id: UO:0000024 label: nanogram @@ -2349,7 +2634,7 @@ NucleicAcidProcessing: draft: false top_node: id: UO:0000002 - label: physical quantity + label: physical quantity library_generation_method: type: string enum: @@ -2484,6 +2769,7 @@ SequencingRun: title: Batch number example: 160101_M01234 x-airr: + identifier: true miairr: important adc-query-support: true set: 3 @@ -2573,10 +2859,13 @@ SequencingData: sequencing_data_id: type: string nullable: true - description: Persistent identifier of raw data stored in an archive (e.g. INSDC run ID). Data archive should be identified in the CURIE prefix. + description: > + Persistent identifier of raw data stored in an archive (e.g. INSDC run ID). Data archive should + be identified in the CURIE prefix. title: Raw sequencing data persistent identifier example: "SRA:SRR11610494" x-airr: + identifier: true miairr: important adc-query-support: true set: 4 @@ -2590,6 +2879,7 @@ SequencingData: enum: - fasta - fastq + - null x-airr: miairr: important adc-query-support: true @@ -2619,6 +2909,7 @@ SequencingData: - forward - reverse - mixed + - null x-airr: miairr: important adc-query-support: true @@ -2660,6 +2951,7 @@ SequencingData: - forward - reverse - mixed + - null x-airr: miairr: important adc-query-support: true @@ -2849,18 +3141,18 @@ SampleProcessing: allOf: - type: object properties: - sample_processing_id: - type: string - nullable: true - description: > - Identifier for the sample processing object. This field should be unique within the repertoire. - This field can be used to uniquely identify the combination of sample, cell processing, - nucleic acid processing and sequencing run information for the repertoire. - title: Sample processing ID - x-airr: - name: Sample processing ID - adc-query-support: true - identifier: true + sample_processing_id: + type: string + nullable: true + description: > + Identifier for the sample processing object. This field should be unique within the repertoire. + This field can be used to uniquely identify the combination of sample, cell processing, + nucleic acid processing and sequencing run information for the repertoire. + title: Sample processing ID + x-airr: + name: Sample processing ID + adc-query-support: true + identifier: true - $ref: '#/Sample' - $ref: '#/CellProcessing' - $ref: '#/NucleicAcidProcessing' @@ -2948,6 +3240,8 @@ RepertoireGroup: type: string nullable: true description: Identifier for this repertoire collection + x-airr: + identifier: true repertoire_group_name: type: string nullable: true @@ -2999,6 +3293,8 @@ Alignment: Unique query sequence identifier within the file. Most often this will be the input sequence header or a substring thereof, but may also be a custom identifier defined by the tool in cases where query sequences have been combined in some fashion prior to alignment. + x-airr: + identifier: true segment: type: string nullable: true @@ -3181,6 +3477,7 @@ Rearrangement: - TRB - TRD - TRG + - null nullable: true description: > Gene locus (chain type). Note that this field uses a controlled vocabulary that is meant to provide a @@ -3961,9 +4258,9 @@ Rearrangement: title: Cell index example: W06_046_091 x-airr: + identifier: true miairr: important adc-query-support: true - identifier: true set: 6 subset: data (processed sequence) name: Cell index @@ -4048,6 +4345,8 @@ Clone: type: string nullable: true description: Identifier for the clone. + x-airr: + identifier: true repertoire_id: type: string nullable: true @@ -4191,6 +4490,8 @@ Tree: type: string nullable: true description: Identifier for the tree. + x-airr: + identifier: true clone_id: type: string nullable: true @@ -4218,6 +4519,8 @@ Node: description: > Identifier for this node that matches the identifier in the newick string and, where possible, the sequence_id in the source repertoire. + x-airr: + identifier: true sequence_alignment: type: string nullable: true @@ -4254,6 +4557,7 @@ Cell: title: Cell index example: W06_046_091 x-airr: + identifier: true miairr: defined adc-query-support: true name: Cell index @@ -4261,7 +4565,7 @@ Cell: type: array nullable: true description: > - Array of sequence identifiers defined for the Rearrangement object + Array of sequence identifiers defined for the Rearrangement object title: Cell-associated rearrangements items: type: string @@ -4274,7 +4578,7 @@ Cell: type: array nullable: true description: > - Array of receptor identifiers defined for the Receptor object + Array of receptor identifiers defined for the Receptor object title: Cell-associated receptors items: type: string @@ -4306,28 +4610,29 @@ Cell: enum: - "flow cytometry" - "single-cell transcriptome" + - null nullable: true description: > - keyword describing the methodology used to assess expression. This values for this field MUST come from a controlled vocabulary + Keyword describing the methodology used to assess expression. This values for this field MUST + come from a controlled vocabulary. x-airr: miairr: defined - adc-api-optional: true + adc-query-support: true expression_raw_doi: type: string nullable: true description: > - DOI of raw data set containing the current event + DOI of raw data set containing the current event x-airr: miairr: defined - adc-api-optional: true + adc-query-support: true expression_index: type: string nullable: true description: > - Index addressing the current event within the raw data set. + Index addressing the current event within the raw data set. x-airr: miairr: defined - adc-api-optional: true virtual_pairing: type: boolean nullable: true @@ -4340,14 +4645,14 @@ Cell: name: Virtual pairing # The CellExpression object acts as a container to hold a single expression level measurement from -# an experiment. Expression data is associated with a cell_id and the related repertoire_id and -# data_processing_id as cell_id is not guaranteed to be unique outside the data processing for +# an experiment. Expression data is associated with a cell_id and the related repertoire_id and +# data_processing_id as cell_id is not guaranteed to be unique outside the data processing for # a single repertoire. CellExpression: type: object required: - expression_id - - reperotire_id + - repertoire_id - data_processing_id - cell_id - property @@ -4360,6 +4665,7 @@ CellExpression: title: Expression property measurement identifier nullable: false x-airr: + identifier: true miairr: defined adc-query-support: true name: Expression measurement identifier @@ -4394,9 +4700,12 @@ CellExpression: name: Data processing for cell property: $ref: '#/Ontology' - description: Name of the property observed, typically a gene or antibody idenifier (and its label) from a canonical resource such as Ensembl (e.g. ENSG00000275747, IGHV3-79) or Antibody Registry (ABREG:1236456, Purified anti-mouse/rat/human CD27 antibody). - title: Property information nullable: true + title: Property information + description: > + Name of the property observed, typically a gene or antibody idenifier (and its label) from a + canonical resource such as Ensembl (e.g. ENSG00000275747, IGHV3-79) or + Antibody Registry (ABREG:1236456, Purified anti-mouse/rat/human CD27 antibody). example: id: ENSG:ENSG00000275747 label: IGHV3-79 @@ -4437,6 +4746,7 @@ Receptor: title: Receptor ID example: TCR-MM-012345 x-airr: + identifier: true adc-query-support: true receptor_hash: type: string @@ -4456,7 +4766,7 @@ Receptor: - TCR description: The top-level receptor type, either Immunoglobulin (Ig) or T Cell Receptor (TCR). x-airr: - adc-api-optional: true + adc-query-support: true receptor_variable_domain_1_aa: type: string nullable: false @@ -4467,7 +4777,7 @@ Receptor: example: > QVQLQQPGAELVKPGASVKLSCKASGYTFTSYWMHWVKQRPGRGLEWIGRIDPNSGGTKYNEKFKSKATLTVDKPSSTAYMQLSSLTSEDSAVYYCARYDYYGSSYFDYWGQGTTLTVSS x-airr: - adc-api-optional: true + adc-query-support: true receptor_variable_domain_1_locus: type: string nullable: false @@ -4478,7 +4788,7 @@ Receptor: description: Locus from which the variable domain in receptor_variable_domain_1_aa originates example: IGH x-airr: - adc-api-optional: true + adc-query-support: true receptor_variable_domain_2_aa: type: string nullable: false @@ -4489,11 +4799,12 @@ Receptor: example: > QAVVTQESALTTSPGETVTLTCRSSTGAVTTSNYANWVQEKPDHLFTGLIGGTNNRAPGVPARFSGSLIGDKAALTITGAQTEDEAIYFCALWYSNHWVFGGGTKLTVL x-airr: - adc-api-optional: true + adc-query-support: true receptor_variable_domain_2_locus: type: string nullable: false enum: + - IGI - IGK - IGL - TRA @@ -4501,7 +4812,7 @@ Receptor: description: Locus from which the variable domain in receptor_variable_domain_2_aa originates example: IGL x-airr: - adc-api-optional: true + adc-query-support: true receptor_ref: type: array nullable: true @@ -4517,144 +4828,157 @@ Receptor: nullable: true description: Records of reactivity measurement items: - type: object - properties: - ligand_type: - type: string - nullable: false - enum: - - MHC:peptide - - MHC:non-peptide - - protein - - peptide - - non-peptidic - description: Classification of ligand binding to receptor - example: non-peptide - antigen_type: - type: string - nullable: false - enum: - - protein - - peptide - - non-peptidic - description: > - The type of antigen before processing by the immune system. - example: protein - antigen: - $ref: '#/Ontology' - nullable: false - description: > - The substance against which the receptor was tested. This can be any substance that - stimulates an adaptive immune response in the host, either through antibody production - or by T cell activation after presentation via an MHC molecule. - title: Antigen - example: - id: UNIPROT:P19597 - label: Circumsporozoite protein - x-airr: - adc-query-support: true - format: ontology - antigen_source_species: - $ref: '#/Ontology' - nullable: true - description: The species from which the antigen was isolated - title: Source species of antigen - example: - id: NCBITAXON:5843 - label: Plasmodium falciparum NF54 - x-airr: - format: ontology - ontology: - draft: true - top_node: - id: NCBITAXON:1 - label: root - peptide_start: - type: integer - nullable: true - description: Start position of the peptide within the reference protein sequence - peptide_end: - type: integer - nullable: true - description: End position of the peptide within the reference protein sequence - mhc_class: - type: string - nullable: true - enum: - - MHC-I - - MHC-II - - MHC-nonclassical - description: Class of MHC molecule, only present for MHC:x ligand types - example: MHC-II - mhc_gene_1: - $ref: '#/Ontology' - nullable: true - description: The MHC gene to which the mhc_allele_1 belongs - title: MHC gene 1 - example: - id: MRO:0000055 - label: HLA-DRA - x-airr: - format: ontology - ontology: - draft: true - top_node: - id: MRO:0000004 - label: MHC gene - mhc_allele_1: - type: string - nullable: true - description: Allele designation of the MHC alpha chain - example: HLA-DRA - mhc_gene_2: - $ref: '#/Ontology' - nullable: true - description: The MHC gene to which the mhc_allele_2 belongs - title: MHC gene 2 - example: - id: MRO:0000057 - label: HLA-DRB1 - x-airr: - format: ontology - ontology: - draft: true - top_node: - id: MRO:0000004 - mhc_allele_2: - type: string - nullable: true - description: > - Allele designation of the MHC class II beta chain or the invariant beta2-microglobin chain - example: HLA-DRB1*04:01 - reactivity_method: - type: string - nullable: false - enum: - - SPR - - ITC - - ELISA - - cytometry - - biological activity - description: The methodology used to assess expression (assay implemented in experiment) - reactivity_readout: - type: string - nullable: false - enum: - - binding strength - - cytokine release - - dissociation constant KD - - on rate - - off rate - - pathogen inhibition - description: Reactivity measurement read-out - example: cytokine release - reactivity_value: - type: number - nullable: false - description: The absolute (processed) value of the measurement - example: 162.26 - reactivity_unit: - type: string - nullable: false - description: The unit of the measurement - example: pg/ml + $ref: '#/ReceptorReactivity' + + +ReceptorReactivity: + type: object + required: + - ligand_type + - antigen_type + - antigen + - reactivity_method + - reactivity_readout + - reactivity_value + - reactivity_unit + properties: + ligand_type: + type: string + nullable: false + enum: + - MHC:peptide + - MHC:non-peptide + - protein + - peptide + - non-peptidic + description: Classification of ligand binding to receptor + example: non-peptide + antigen_type: + type: string + nullable: false + enum: + - protein + - peptide + - non-peptidic + description: > + The type of antigen before processing by the immune system. + example: protein + antigen: + $ref: '#/Ontology' + nullable: false + description: > + The substance against which the receptor was tested. This can be any substance that + stimulates an adaptive immune response in the host, either through antibody production + or by T cell activation after presentation via an MHC molecule. + title: Antigen + example: + id: UNIPROT:P19597 + label: Circumsporozoite protein + x-airr: + adc-query-support: true + format: ontology + antigen_source_species: + $ref: '#/Ontology' + nullable: true + description: The species from which the antigen was isolated + title: Source species of antigen + example: + id: NCBITAXON:5843 + label: Plasmodium falciparum NF54 + x-airr: + format: ontology + ontology: + draft: true + top_node: + id: NCBITAXON:1 + label: root + peptide_start: + type: integer + nullable: true + description: Start position of the peptide within the reference protein sequence + peptide_end: + type: integer + nullable: true + description: End position of the peptide within the reference protein sequence + mhc_class: + type: string + nullable: true + enum: + - MHC-I + - MHC-II + - MHC-nonclassical + - null + description: Class of MHC molecule, only present for MHC:x ligand types + example: MHC-II + mhc_gene_1: + $ref: '#/Ontology' + nullable: true + description: The MHC gene to which the mhc_allele_1 belongs + title: MHC gene 1 + example: + id: MRO:0000055 + label: HLA-DRA + x-airr: + format: ontology + ontology: + draft: true + top_node: + id: MRO:0000004 + label: MHC gene + mhc_allele_1: + type: string + nullable: true + description: Allele designation of the MHC alpha chain + example: HLA-DRA + mhc_gene_2: + $ref: '#/Ontology' + nullable: true + description: The MHC gene to which the mhc_allele_2 belongs + title: MHC gene 2 + example: + id: MRO:0000057 + label: HLA-DRB1 + x-airr: + format: ontology + ontology: + draft: true + top_node: + id: MRO:0000004 + mhc_allele_2: + type: string + nullable: true + description: > + Allele designation of the MHC class II beta chain or the invariant beta2-microglobin chain + example: HLA-DRB1*04:01 + reactivity_method: + type: string + nullable: false + enum: + - SPR + - ITC + - ELISA + - cytometry + - biological activity + description: The methodology used to assess expression (assay implemented in experiment) + reactivity_readout: + type: string + nullable: false + enum: + - binding strength + - cytokine release + - dissociation constant KD + - on rate + - off rate + - pathogen inhibition + description: Reactivity measurement read-out + example: cytokine release + reactivity_value: + type: number + nullable: false + description: The absolute (processed) value of the measurement + example: 162.26 + reactivity_unit: + type: string + nullable: false + description: The unit of the measurement + example: pg/ml diff --git a/lang/python/airr/specs/airr-schema.yaml b/lang/python/airr/specs/airr-schema.yaml index 9507bf904..1a6ddb5c5 100644 --- a/lang/python/airr/specs/airr-schema.yaml +++ b/lang/python/airr/specs/airr-schema.yaml @@ -1284,7 +1284,7 @@ Genotype: # It 'known' in the sense that it is documented in a reference set DocumentedAllele: - discriminator: AIRR + type: object required: - label - germline_set_ref @@ -1312,7 +1312,6 @@ DocumentedAllele: # It is 'undocumented' in the sense that it was not found in reference sets consulted for the analysis UndocumentedAllele: - discriminator: AIRR required: - allele_name - sequence @@ -1339,7 +1338,6 @@ UndocumentedAllele: # It is regarded as 'deleted' in the sense that it was not identified during inference of the genotype DeletedGene: - discriminator: AIRR required: - label - germline_set_ref @@ -1698,7 +1696,6 @@ Study: name: ADC Update Date SubjectGenotype: - discriminator: AIRR type: object properties: receptor_genotype_set: diff --git a/specs/airr-schema-openapi3.yaml b/specs/airr-schema-openapi3.yaml index 7bed452dc..d0886743d 100644 --- a/specs/airr-schema-openapi3.yaml +++ b/specs/airr-schema-openapi3.yaml @@ -1376,8 +1376,7 @@ Genotype: # It 'known' in the sense that it is documented in a reference set DocumentedAllele: - discriminator: - propertyName: AIRR + type: object required: - label - germline_set_ref @@ -1407,8 +1406,6 @@ DocumentedAllele: # It is 'undocumented' in the sense that it was not found in reference sets consulted for the analysis UndocumentedAllele: - discriminator: - propertyName: AIRR required: - allele_name - sequence @@ -1437,8 +1434,6 @@ UndocumentedAllele: # It is regarded as 'deleted' in the sense that it was not identified during inference of the genotype DeletedGene: - discriminator: - propertyName: AIRR required: - label - germline_set_ref @@ -1576,8 +1571,6 @@ MHCAllele: SubjectGenotype: - discriminator: - propertyName: AIRR type: object properties: receptor_genotype_set: diff --git a/specs/airr-schema.yaml b/specs/airr-schema.yaml index 9507bf904..1a6ddb5c5 100644 --- a/specs/airr-schema.yaml +++ b/specs/airr-schema.yaml @@ -1284,7 +1284,7 @@ Genotype: # It 'known' in the sense that it is documented in a reference set DocumentedAllele: - discriminator: AIRR + type: object required: - label - germline_set_ref @@ -1312,7 +1312,6 @@ DocumentedAllele: # It is 'undocumented' in the sense that it was not found in reference sets consulted for the analysis UndocumentedAllele: - discriminator: AIRR required: - allele_name - sequence @@ -1339,7 +1338,6 @@ UndocumentedAllele: # It is regarded as 'deleted' in the sense that it was not identified during inference of the genotype DeletedGene: - discriminator: AIRR required: - label - germline_set_ref @@ -1698,7 +1696,6 @@ Study: name: ADC Update Date SubjectGenotype: - discriminator: AIRR type: object properties: receptor_genotype_set: From 275e2c090e17cb94451210f418868adddea6a075 Mon Sep 17 00:00:00 2001 From: Scott Christley Date: Tue, 29 Aug 2023 16:28:40 -0500 Subject: [PATCH 29/59] allow custom schema --- lang/js/airr.js | 38 +++++++++++++------------------------- lang/js/io.js | 4 ++++ 2 files changed, 17 insertions(+), 25 deletions(-) diff --git a/lang/js/airr.js b/lang/js/airr.js index fd15ef16f..e34fdeae0 100644 --- a/lang/js/airr.js +++ b/lang/js/airr.js @@ -21,6 +21,7 @@ const $RefParser = require("@apidevtools/json-schema-ref-parser"); var airr = {}; module.exports = airr; +// load AIRR standards schema airr.load_schema = async function() { // Load AIRR spec var airrFile = path.resolve(__dirname, './airr-schema-openapi3.yaml'); @@ -35,29 +36,16 @@ airr.load_schema = async function() { return Promise.resolve(spec); }; -// schema functions -//const schema = require('./schema')(AIRRSchema); -// i/o functions -//const io = require('./io'); - -/* TODO? UMD -(function (root, factory) { - if (typeof define === 'function' && define.amd) { - // AMD. Register as an anonymous module. - define(['b'], factory); - } else if (typeof module === 'object' && module.exports) { - // Node. - module.exports = factory(require('b')); - } else { - // Browser globals (root is window) - root.returnExports = factory(root.b); - } -}(typeof self !== 'undefined' ? self : this, function (b) { - // Use b in some fashion. - - // Just return a value to define the module export. - // This example returns an object, but the module - // can return a function as the exported value. - return {}; -})); */ +// load custom schema +airr.load_custom_schema = async function(obj, filename) { + // Load schema file + //var airrFile = path.resolve(__dirname, filename); + var doc = yaml.safeLoad(fs.readFileSync(filename)); + if (!doc) Promise.reject(new Error('Could not load custom schema yaml file.')); + // dereference all $ref objects + var spec = await $RefParser.dereference(doc); + var schema = require('./schema')(obj, spec); + + return Promise.resolve(spec); +}; diff --git a/lang/js/io.js b/lang/js/io.js index b7a36e0a6..bbdc2b8a3 100644 --- a/lang/js/io.js +++ b/lang/js/io.js @@ -15,6 +15,10 @@ var fs = require('fs'); const zlib = require('zlib'); var csv = require('csv-parser'); +function isPromise(promise) { + return !!promise && typeof promise.then === 'function' +} + // // Interface functions for file operations // From 251aa6fa69f476b16cc2071dc6651d1db7c23bf0 Mon Sep 17 00:00:00 2001 From: Scott Christley Date: Mon, 25 Sep 2023 17:49:20 -0500 Subject: [PATCH 30/59] return all errors --- lang/js/schema.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lang/js/schema.js b/lang/js/schema.js index 8a6d9be29..b822cf1bb 100644 --- a/lang/js/schema.js +++ b/lang/js/schema.js @@ -31,11 +31,11 @@ var _from_bool_map = function(x) { return ''; }; -function isPromise(promise) { +function isPromise(promise) { return !!promise && typeof promise.then === 'function' } -class ValidationError extends Error { +class ValidationError extends Error { constructor (message) { super(message) @@ -196,7 +196,7 @@ module.exports = function(airr, schema) { } airr.SchemaDefinition.prototype.validate_object = function(object) { - const ajv = new AJV(); + const ajv = new AJV({allErrors: true}); addFormats(ajv); ajv.addVocabulary(['x-airr', 'example']); From 7658f5937ad14147eb2346422e5db4d69c3c33a8 Mon Sep 17 00:00:00 2001 From: Scott Christley Date: Mon, 4 Dec 2023 20:34:06 -0600 Subject: [PATCH 31/59] functions for CURIE --- lang/js/schema.js | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/lang/js/schema.js b/lang/js/schema.js index b822cf1bb..6ee5fa545 100644 --- a/lang/js/schema.js +++ b/lang/js/schema.js @@ -52,6 +52,11 @@ module.exports = function(airr, schema) { //console.log('airr-js schema:', JSON.stringify(schema, null, 2)); airr.Schema = {"specification": schema}; + airr.get_specification = function() { + if (!airr.Schema) throw new Error('AIRR schema is not loaded.'); + return airr.Schema['specification']; + } + // return schemas in format appropriate for API doc airr.get_schemas = function() { if (! airr.Schema['specification']) return null; @@ -73,6 +78,16 @@ module.exports = function(airr, schema) { return airr.Schema['specification']['Info']; } + airr.get_curie_map = function() { + if (!airr.Schema) throw new Error('AIRR schema is not loaded.'); + return airr.Schema['specification']['CURIEMap']; + } + + airr.get_iri_providers = function() { + if (!airr.Schema) throw new Error('AIRR schema is not loaded.'); + return airr.Schema['specification']['InformationProvider']; + } + airr.SchemaDefinition = function(definition) { if (!airr.Schema) throw new Error('AIRR schema is not loaded.'); From 5d46e80c5a5427dc70b6e2262f4c158607d24edd Mon Sep 17 00:00:00 2001 From: Scott Christley Date: Wed, 10 Jan 2024 13:44:11 -0600 Subject: [PATCH 32/59] handle allOf at top level of object --- lang/js/schema.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lang/js/schema.js b/lang/js/schema.js index 6ee5fa545..f3aa859f7 100644 --- a/lang/js/schema.js +++ b/lang/js/schema.js @@ -266,7 +266,9 @@ module.exports = function(airr, schema) { }; var obj = {}; - _populate(this, obj); + if (this.definition.allOf) { + for (const k in this.definition.allOf) _populate(this.definition['allOf'][k], obj); + } else _populate(this, obj); return (obj); } From d4a41e28e3709b76381f34a34ab321062556e428 Mon Sep 17 00:00:00 2001 From: Scott Christley Date: Mon, 12 Feb 2024 21:20:23 -0600 Subject: [PATCH 33/59] ignore discriminator for older schemas --- lang/js/schema.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lang/js/schema.js b/lang/js/schema.js index f3aa859f7..8d75b75a3 100644 --- a/lang/js/schema.js +++ b/lang/js/schema.js @@ -213,7 +213,7 @@ module.exports = function(airr, schema) { airr.SchemaDefinition.prototype.validate_object = function(object) { const ajv = new AJV({allErrors: true}); addFormats(ajv); - ajv.addVocabulary(['x-airr', 'example']); + ajv.addVocabulary(['x-airr', 'example', 'discriminator']); const validate = ajv.compile(this.definition) const valid = validate(object) From b8d5490675a8fa9f71ad0bd151227a162f00f1ad Mon Sep 17 00:00:00 2001 From: Scott Christley Date: Mon, 12 Feb 2024 21:21:04 -0600 Subject: [PATCH 34/59] merge allOfs as dereference does not --- lang/js/airr.js | 13 +++++++++++++ lang/js/package.json | 1 + 2 files changed, 14 insertions(+) diff --git a/lang/js/airr.js b/lang/js/airr.js index e34fdeae0..09857b531 100644 --- a/lang/js/airr.js +++ b/lang/js/airr.js @@ -17,6 +17,7 @@ var yaml = require('js-yaml'); var path = require('path'); var fs = require('fs'); const $RefParser = require("@apidevtools/json-schema-ref-parser"); +const merge = require('allof-merge'); var airr = {}; module.exports = airr; @@ -30,6 +31,12 @@ airr.load_schema = async function() { // dereference all $ref objects var spec = await $RefParser.dereference(doc); + // merge allOfs + for (let obj in spec) { + if (spec[obj]['type'] || spec[obj]['allOf']) { + spec[obj] = merge.merge(spec[obj]); + } + } var schema = require('./schema')(airr, spec); var io = require('./io')(airr); @@ -45,6 +52,12 @@ airr.load_custom_schema = async function(obj, filename) { // dereference all $ref objects var spec = await $RefParser.dereference(doc); + // merge allOfs + for (let obj in spec) { + if (spec[obj]['type'] || spec[obj]['allOf']) { + spec[obj] = merge.merge(spec[obj]); + } + } var schema = require('./schema')(obj, spec); return Promise.resolve(spec); diff --git a/lang/js/package.json b/lang/js/package.json index 29b889986..e80c567d4 100644 --- a/lang/js/package.json +++ b/lang/js/package.json @@ -25,6 +25,7 @@ "@apidevtools/json-schema-ref-parser": "^10.1.0", "ajv": "^8.12.0", "ajv-formats": "^2.1.1", + "allof-merge": "^0.6.5", "csv-parser": "^2.3.2", "js-yaml": "^3.10.0" }, From 2028d476a809ef5e875c9e40d83ae62f20af1602 Mon Sep 17 00:00:00 2001 From: Scott Christley Date: Fri, 15 Mar 2024 11:53:13 -0500 Subject: [PATCH 35/59] spec to lang dir --- lang/js/airr-schema-openapi3.yaml | 912 ++++++++++++++++++++---------- 1 file changed, 620 insertions(+), 292 deletions(-) diff --git a/lang/js/airr-schema-openapi3.yaml b/lang/js/airr-schema-openapi3.yaml index d0886743d..e833ff937 100644 --- a/lang/js/airr-schema-openapi3.yaml +++ b/lang/js/airr-schema-openapi3.yaml @@ -81,6 +81,14 @@ CURIEMap: map: ENSG: iri_prefix: "https://www.ensembl.org/Multi/Search/Results?q=" + GAZ: + type: ontology + default: + map: OBO + provider: OLS + map: + OBO: + iri_prefix: "http://purl.obolibrary.org/obo/GAZ_" IEDB_RECEPTOR: type: identifier default: @@ -210,6 +218,11 @@ InformationProvider: ontology_id: DOID OLS: ontology_id: doid + GAZ: + Ontobee: + ontology_id: GAZ + OLS: + ontology_id: gaz MRO: Ontobee: ontology_id: MRO @@ -268,6 +281,15 @@ Attributes: True if an ADC API implementation must support queries on the field. If false, query support for the field in ADC API implementations is optional. default: false + adc-api-optional: + type: boolean + description: > + If false, repositories must implement these fields both for queries and query repsonse. + Only applies to fields in the ADC API spec that are extensions to the AIRR Standard, + targeted at "convenience query fields" that make queries against repositories more + efficient than if queries were limited to AIRR fields only. + If true, repositories can choose to support the field or not. + default: false deprecated: type: boolean description: True if the field has been deprecated from the schema. @@ -294,8 +316,10 @@ Attributes: description: Field format. If null then assume the full range of the field data type enum: - ontology - - controlled vocabulary - - physical quantity + - controlled_vocabulary + - physical_quantity + - time_point + - time_interval - CURIE ontology: type: object @@ -346,7 +370,7 @@ DataFile: RepertoireGroup: type: array nullable: false - description: List of repertoire collections + description: List of repertoire groups items: $ref: '#/RepertoireGroup' Rearrangement: @@ -424,6 +448,10 @@ InfoObject: type: string nullable: true +# +# General objects +# + # A time point TimePoint: description: Time point at which an observation or other action was performed. @@ -438,14 +466,14 @@ TimePoint: adc-query-support: true value: type: number - nullable: true + nullable: false description: Value of the time point example: -5.0 x-airr: adc-query-support: true unit: $ref: '#/Ontology' - nullable: true + nullable: false description: Unit of the time point title: Unit of immunization schedule example: @@ -460,38 +488,204 @@ TimePoint: id: UO:0000003 label: time unit -# -# General objects -# +# A time range or interval +TimeInterval: + description: Time range or interval for a measurement, observation or action. + type: object + properties: + min: + type: number + nullable: false + description: Lower/minimum value of the time interval + example: 5.0 + x-airr: + adc-query-support: true + max: + type: number + nullable: false + description: Upper/maximum value of the time interval + example: 10.0 + x-airr: + adc-query-support: true + unit: + $ref: '#/Ontology' + nullable: false + description: Unit of the time interval + example: + id: UO:0000033 + label: day + x-airr: + adc-query-support: true + format: ontology + ontology: + draft: false + top_node: + id: UO:0000003 + label: time unit + +# A physical quantity +PhysicalQuantity: + description: A physical quantity from a measurement or observation. + type: object + properties: + quantity: + type: number + nullable: false + description: Physical quantity + example: -5.0 + x-airr: + adc-query-support: true + unit: + $ref: '#/Ontology' + nullable: false + description: Unit of physical quantity + example: + id: UO:0000024 + label: nanogram + x-airr: + adc-query-support: true + format: ontology + ontology: + draft: false + top_node: + id: UO:0000002 + label: physical quantity + -# An individual -Acknowledgement: +# A time quantity +TimeQuantity: + description: A time quantity + type: object + properties: + quantity: + type: number + nullable: false + description: Time quantity + example: 30.0 + x-airr: + adc-query-support: true + unit: + $ref: '#/Ontology' + nullable: false + description: Unit of time + example: + id: UO:0000033 + label: day + x-airr: + adc-query-support: true + format: ontology + ontology: + draft: false + top_node: + id: UO:0000003 + label: time unit + + +# Contributor record to describe invididuals and their contribution to a data set +# +Contributor: description: Individual whose contribution to this work should be acknowledged type: object required: - - acknowledgement_id + - contributor_id - name - - institution_name properties: - acknowledgement_id: + contributor_id: type: string - description: unique identifier of this Acknowledgement within the file + nullable: true + description: Unique identifier of this contributor within the file x-airr: identifier: true miairr: important - nullable: true name: type: string + nullable: false + description: Full name of contributor + orcid_id: + $ref: '#/Ontology' nullable: true - description: Full name of individual - institution_name: - type: string + description: > + ORCID identifier of the contributor. Note that if present, the label of the ORCID record should take + precedence over the name reported in the `name` property. + title: ORCID iD + example: + id: ORCID:0000-0002-1825-0097 + label: Josiah Carberry + x-airr: + adc-query-support: true + format: ontology + ontology: + draft: false + top_node: + id: null + label: null + affiliation: + $ref: '#/Ontology' nullable: true - description: Individual's department and institution name - orcid_id: + description: > + ROR of the contributor's primary affiliation. Note that ROR are only minted for institutions, not + from individuals institutes, divisions or departments. + title: ROR + example: + id: ROR:05h7xva58 + label: Wesleyan University + x-airr: + adc-query-support: true + format: ontology + ontology: + draft: false + top_node: + id: null + label: null + affiliation_department: type: string nullable: true - description: Individual's ORCID identifier + description: > + Additional information regarding the contributor's primary affiliation. Can be used to specify + individual institutes, divisions or departments. + example: Department for Psychoceramics + contributions: + type: array + nullable: true + description: List of all roles the contributor had in a project + items: + $ref: '#/ContributorContribution' + +ContributorContribution: + type: object + required: + - role + properties: + role: + type: string + nullable: false + description: Role according to CRediT taxonomy + enum: + - conceptualization + - data curation + - formal analysis + - funding acquisition + - investigation + - methodology + - project administration + - resources + - software + - supervision + - validation + - visualization + - writing - original draft + - writing - review & editing + degree: + type: string + nullable: true + description: > + Optional specification of the degree of contribution, should be used if multiple individuals serve + the same role. + enum: + - lead + - equal + - supporting + # # Germline gene schema @@ -525,7 +719,7 @@ RearrangedSequence: miairr: important sequence: type: string - nullable: true + nullable: false x-airr: miairr: essential description: nucleotide sequence @@ -541,17 +735,15 @@ RearrangedSequence: miairr: important observation_type: type: string - nullable: true + nullable: false enum: - - direct sequencing - - inference from repertoire - - null + - direct_sequencing + - inference_from_repertoire description: > The type of observation from which this sequence was drawn, such as direct sequencing or inference from repertoire sequencing data. x-airr: miairr: essential - curation: type: string nullable: true @@ -576,13 +768,13 @@ RearrangedSequence: description: Version number of the sequence within the repository sequence_start: type: integer - nullable: true + nullable: false x-airr: miairr: essential description: Start co-ordinate of the sequence detailed in this record, within the sequence deposited sequence_end: type: integer - nullable: true + nullable: false x-airr: miairr: essential description: End co-ordinate of the sequence detailed in this record, within the sequence deposited @@ -609,7 +801,7 @@ UnrearrangedSequence: description: unique identifier of this UnrearrangedSequence within the file sequence: type: string - nullable: true + nullable: false description: > Sequence of interest described in this record. Typically, this will include gene and promoter region. x-airr: @@ -789,8 +981,7 @@ AlleleDescription: type: object required: - allele_description_id - - maintainer - - lab_address + - acknowledgements - release_version - release_date - release_description @@ -818,24 +1009,16 @@ AlleleDescription: miairr: important description: Unique reference to the allele description, in standardized form (Repo:Label:Version) example: OGRDB:Human_IGH:IGHV1-69*01.001 - maintainer: - type: string - nullable: true - x-airr: - miairr: defined - description: Maintainer of this sequence record acknowledgements: type: array nullable: true - description: List of individuals whose contribution to the gene description should be acknowledged + description: > + List of individuals whose contribution to the gene description should be acknowledged. Note that these + are not necessarily identical with the authors on an associated manuscript or other scholarly + communication. Further note that typically at least the three CRediT contributor roles "supervision", + "investigation" and "data curation" should be assigned. The current maintainer should be listed first. items: - $ref: '#/Acknowledgement' - lab_address: - type: string - nullable: true - x-airr: - miairr: defined - description: Institution and full address of corresponding author + $ref: '#/Contributor' release_version: type: integer nullable: true @@ -862,11 +1045,14 @@ AlleleDescription: nullable: true x-airr: miairr: important - description: The accepted name for this gene or allele + description: > + The accepted name for this gene or allele following the relevant nomenclature. + The value in this field should correspond to values in acceptable name fields of other schemas, + such as v_call, d_call, and j_call fields. example: IGHV1-69*01 sequence: type: string - nullable: true + nullable: false x-airr: miairr: essential description: > @@ -888,7 +1074,7 @@ AlleleDescription: description: Alternative names for this sequence locus: type: string - nullable: true + nullable: false enum: - IGH - IGI @@ -898,7 +1084,6 @@ AlleleDescription: - TRB - TRG - TRD - - null description: Gene locus x-airr: miairr: essential @@ -908,13 +1093,12 @@ AlleleDescription: description: chromosome on which the gene is located sequence_type: type: string - nullable: true + nullable: false enum: - V - D - J - C - - null description: Sequence type (V, D, J, C) x-airr: miairr: essential @@ -928,16 +1112,16 @@ AlleleDescription: type: string nullable: true enum: - - Genomic and rearranged - - Genomic only - - Rearranged only + - genomic_and_rearranged + - genomic_only + - rearranged_only - null description: Type of inference(s) from which this gene sequence was inferred x-airr: miairr: important species: $ref: '#/Ontology' - nullable: true + nullable: false description: Binomial designation of subject's species title: Organism example: @@ -1127,9 +1311,7 @@ GermlineSet: All genes in a GermlineSet should be from a single locus. required: - germline_set_id - - author - - lab_name - - lab_address + - acknowledgements - release_version - release_description - release_date @@ -1148,30 +1330,16 @@ GermlineSet: x-airr: identifier: true miairr: important - author: - type: string - nullable: true - x-airr: - miairr: important - description: Corresponding author - lab_name: - type: string - nullable: true - x-airr: - miairr: important - description: Department of corresponding author - lab_address: - type: string - nullable: true - x-airr: - miairr: important - description: Institutional address of corresponding author acknowledgements: type: array nullable: true - description: List of individuals whose contribution to the germline set should be acknowledged + description: > + List of individuals whose contribution to the germline set should be acknowledged. Note that these are + not necessarily identical with the authors on an associated manuscript or other scholarly communication. + Further note that typically at least the three CRediT contributor roles "supervision", "investigation" + and "data curation" should be assigned. The coresponding author should be listed last. items: - $ref: '#/Acknowledgement' + $ref: '#/Contributor' release_version: type: number nullable: true @@ -1207,13 +1375,15 @@ GermlineSet: description: Unique identifier of the germline set and version, in standardized form (Repo:Label:Version) example: OGRDB:Human_IGH:2021.11 pub_ids: - type: string + type: array + items: + type: string nullable: true description: Publications describing the germline set - example: "PMID:85642,PMID:12345" + example: ["PMID:35720344"] species: $ref: '#/Ontology' - nullable: true + nullable: false x-airr: miairr: essential description: Binomial designation of subject's species @@ -1238,7 +1408,7 @@ GermlineSet: - null locus: type: string - nullable: true + nullable: false enum: - IGH - IGI @@ -1248,7 +1418,6 @@ GermlineSet: - TRB - TRG - TRD - - null description: Gene locus x-airr: miairr: essential @@ -1316,7 +1485,7 @@ Genotype: repository hosting the schema, for example from the underlying ID of the database record. locus: type: string - nullable: true + nullable: false enum: - IGH - IGI @@ -1326,12 +1495,11 @@ Genotype: - TRB - TRD - TRG - - null description: Gene locus example: IGH x-airr: adc-query-support: true - format: controlled vocabulary + format: controlled_vocabulary miairr: essential documented_alleles: type: array @@ -1369,7 +1537,7 @@ Genotype: example: repertoire_sequencing x-airr: adc-query-support: true - format: controlled vocabulary + format: controlled_vocabulary # Documented Allele # This describes a 'known' allele found in a genotype @@ -1419,7 +1587,7 @@ UndocumentedAllele: miairr: important sequence: type: string - nullable: true + nullable: false description: nt sequence of the allele, as provided by the inference pipeline x-airr: miairr: essential @@ -1441,7 +1609,7 @@ DeletedGene: properties: label: type: string - nullable: true + nullable: false description: The accepted name for this gene, taken from the GermlineSet x-airr: miairr: essential @@ -1499,18 +1667,17 @@ MHCGenotype: description: A unique identifier for this MHCGenotype, assumed to be unique in the context of the study mhc_class: type: string - nullable: true + nullable: false enum: - MHC-I - MHC-II - MHC-nonclassical - - null description: Class of MHC alleles described by the MHCGenotype example: MHC-I x-airr: miairr: essential adc-query-support: true - format: controlled vocabulary + format: controlled_vocabulary mhc_alleles: type: array nullable: true @@ -1595,10 +1762,7 @@ Study: - study_type - inclusion_exclusion_criteria - grants - - collected_by - - lab_name - - lab_address - - submitted_by + - contributors - pub_ids - keywords_study properties: @@ -1682,17 +1846,36 @@ Study: set: 1 subset: study name: Grant funding agency + contributors: + type: array + nullable: false + description: > + List of individuals who contributed to the study. Note that these are not necessarily identical with + the authors on an associated manuscript or other scholarly communication. Further note that typically + at least the three CRediT contributor roles "supervision", "investigation" and "data curation" should + be assigned. The coresponding author should be listed last. + title: Contributors + items: + $ref: '#/Contributor' + x-airr: + miairr: essential + adc-query-support: true + set: 1 + subset: study + name: Contributors study_contact: type: string nullable: true description: > Full contact information of the contact persons for this study This should include an e-mail address and a persistent identifier such as an ORCID ID. - title: Contact information (study) - example: Dr. P. Stibbons, p.stibbons@unseenu.edu, https://orcid.org/0000-0002-1825-0097 x-airr: - adc-query-support: true - name: Contact information (study) + deprecated: true + deprecated-description: > + Acknowledgements and contact information was re-organized into the contributors property, which + is an array of Contributor objects. + deprecated-replaced-by: + - contributors collected_by: type: string nullable: true @@ -1700,38 +1883,35 @@ Study: Full contact information of the data collector, i.e. the person who is legally responsible for data collection and release. This should include an e-mail address and a persistent identifier such as an ORCID ID. - title: Contact information (data collection) - example: Dr. P. Stibbons, p.stibbons@unseenu.edu, https://orcid.org/0000-0002-1825-0097 x-airr: - miairr: important - adc-query-support: true - set: 1 - subset: study - name: Contact information (data collection) + deprecated: true + deprecated-description: > + Acknowledgements and contact information was re-organized into the contributors property, which + is an array of Contributor objects. + deprecated-replaced-by: + - contributors lab_name: type: string nullable: true description: Department of data collector - title: Lab name - example: Department for Planar Immunology x-airr: - miairr: important - adc-query-support: true - set: 1 - subset: study - name: Lab name + deprecated: true + deprecated-description: > + Acknowledgements and contact information was re-organized into the contributors property, which + is an array of Contributor objects. + deprecated-replaced-by: + - contributors lab_address: type: string nullable: true description: Institution and institutional address of data collector - title: Lab address - example: School of Medicine, Unseen University, Ankh-Morpork, Disk World x-airr: - miairr: important - adc-query-support: true - set: 1 - subset: study - name: Lab address + deprecated: true + deprecated-description: > + Acknowledgements and contact information was re-organized into the contributors property, which + is an array of Contributor objects. + deprecated-replaced-by: + - contributors submitted_by: type: string nullable: true @@ -1739,22 +1919,24 @@ Study: Full contact information of the data depositor, i.e., the person submitting the data to a repository. This should include an e-mail address and a persistent identifier such as an ORCID ID. This is supposed to be a short-lived and technical role until the submission is relased. - title: Contact information (data deposition) - example: Adrian Turnipseed, a.turnipseed@unseenu.edu, https://orcid.org/0000-0002-1825-0097 x-airr: - miairr: important - adc-query-support: true - set: 1 - subset: study - name: Contact information (data deposition) + deprecated: true + deprecated-description: > + Acknowledgements and contact information was re-organized into the contributors property, which + is an array of Contributor objects. + deprecated-replaced-by: + - contributors pub_ids: - type: string + type: array + items: + type: string nullable: true description: > - Publications describing the rationale and/or outcome of the study. Where ever possible, a persistent - identifier should be used such as a DOI or a Pubmed ID + Array of publications describing the rationale and/or outcome of the study as an array of CURIE objects such as + a DOI or Pubmed ID. Where more than one publication is given, if there is a primary publication for the study it + should come first. title: Relevant publications - example: "PMID:85642" + example: ["PMID:29144493", "DOI:10.1038/ni.3873"] x-airr: miairr: important adc-query-support: true @@ -1773,6 +1955,8 @@ Study: - contains_schema_clone - contains_schema_cell - contains_schema_receptor + - contains_schema_cellexpression + - contains_schema_receptorreactivity nullable: true description: > Keywords describing properties of one or more data sets in a study. "contains_schema" keywords indicate that @@ -1791,7 +1975,7 @@ Study: set: 1 subset: study name: Keywords for study - format: controlled vocabulary + format: controlled_vocabulary adc_publish_date: type: string format: date-time @@ -1824,9 +2008,7 @@ Subject: - synthetic - species - sex - - age_min - - age_max - - age_unit + - age - age_event - ancestry_population - ethnicity @@ -1874,7 +2056,7 @@ Subject: adc-query-support: true set: 1 subset: subject - name: Organism + name: Species format: ontology ontology: draft: false @@ -1898,8 +2080,6 @@ Subject: - pooled - hermaphrodite - intersex - - "not collected" - - "not applicable" - null nullable: true description: Biological sex of subject @@ -1911,53 +2091,31 @@ Subject: set: 1 subset: subject name: Sex - format: controlled vocabulary - age_min: - type: number - nullable: true - description: Specific age or lower boundary of age range. - title: Age minimum - example: 60 - x-airr: - miairr: important - adc-query-support: true - set: 1 - subset: subject - name: Age minimum - age_max: - type: number + format: controlled_vocabulary + age: + $ref: '#/TimeInterval' nullable: true description: > - Upper boundary of age range or equal to age_min for specific age. - This field should only be null if age_min is null. - title: Age maximum - example: 80 + Age of subject expressed as a time interval. If singular time point then + min == max in the time interval. + examples: + - min: 50 + max: 50 + unit: + id: UO:0000036 + label: year + - min: 5 + max: 10 + unit: + id: UO:0000034 + label: week x-airr: miairr: important adc-query-support: true set: 1 subset: subject - name: Age maximum - age_unit: - $ref: '#/Ontology' - nullable: true - description: Unit of age range - title: Age unit - example: - id: UO:0000036 - label: year - x-airr: - miairr: important - adc-query-support: true - set: 1 - subset: subject - name: Age unit - format: ontology - ontology: - draft: false - top_node: - id: UO:0000003 - label: time unit + name: Age + format: time_interval age_event: type: string nullable: true @@ -1973,28 +2131,69 @@ Subject: set: 1 subset: subject name: Age event - age: - type: string + age_min: + type: number + nullable: true + x-airr: + deprecated: true + deprecated-description: Combined into single age field. + deprecated-replaced-by: + - age + age_max: + type: number + nullable: true + x-airr: + deprecated: true + deprecated-description: Combined into single age field. + deprecated-replaced-by: + - age + age_unit: + $ref: '#/Ontology' nullable: true x-airr: deprecated: true - deprecated-description: Split into two fields to specify as an age range. + deprecated-description: Combined into single age field. deprecated-replaced-by: - - age_min - - age_max - - age_unit + - age ancestry_population: - type: string + $ref: '#/Ontology' nullable: true description: Broad geographic origin of ancestry (continent) title: Ancestry population - example: list of continents, mixed or unknown + example: + id: GAZ:00000459 + label: South America x-airr: miairr: important adc-query-support: true set: 1 subset: subject name: Ancestry population + format: ontology + ontology: + draft: true + top_node: + id: GAZ:00000448 + label: geographic location + location_birth: + $ref: '#/Ontology' + nullable: true + description: Self-reported location of birth of the subject, preferred granularity is country-level + example: + id: GAZ:00002939 + label: Poland + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: subject + name: Location of birth + format: ontology + ontology: + draft: true + top_node: + id: GAZ:00000448 + label: geographic location ethnicity: type: string nullable: true @@ -2093,6 +2292,25 @@ Diagnosis: set: 1 subset: diagnosis and intervention name: Study group description + diagnosis_timepoint: + $ref: '#/TimePoint' + nullable: true + description: Time point for the diagnosis + title: Diagnosis timepoint + example: + label: Study enrollment + value: 60 + unit: + id: UO:0000033 + label: day + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: diagnosis and intervention + name: Diagnosis timepoint + format: time_point + disease_diagnosis: $ref: '#/Ontology' nullable: true @@ -2114,18 +2332,22 @@ Diagnosis: id: DOID:4 label: disease disease_length: - type: string + $ref: '#/TimeQuantity' nullable: true description: Time duration between initial diagnosis and current intervention title: Length of disease - example: 23 months + example: + quantity: 23 + unit: + id: UO:0000035 + label: month x-airr: miairr: important adc-query-support: true set: 1 subset: diagnosis and intervention name: Length of disease - format: physical quantity + format: physical_quantity disease_stage: type: string nullable: true @@ -2198,8 +2420,6 @@ Sample: - anatomic_site - disease_state_sample - collection_time_point_relative - - collection_time_point_relative_unit - - collection_time_point_reference - biomaterial_provider properties: sample_id: @@ -2274,49 +2494,59 @@ Sample: subset: sample name: Disease state of sample collection_time_point_relative: - type: number + $ref: '#/TimePoint' nullable: true - description: Time point at which sample was taken, relative to `Collection time event` + description: Time point at which sample was taken, relative to `label` event title: Sample collection time - example: 14 + example: + label: Primary vaccination + value: 14 + unit: + id: UO:0000033 + label: day x-airr: miairr: important adc-query-support: true set: 2 subset: sample name: Sample collection time + format: time_point collection_time_point_relative_unit: $ref: '#/Ontology' nullable: true - description: Unit of Sample collection time - title: Sample collection time unit - example: - id: UO:0000033 - label: day x-airr: - miairr: important - adc-query-support: true - set: 2 - subset: sample - name: Sample collection time unit - format: ontology - ontology: - draft: false - top_node: - id: UO:0000003 - label: time unit + deprecated: true + deprecated-description: Field has been merged with collection_time_point_relative. + deprecated-replaced-by: + - collection_time_point_relative collection_time_point_reference: type: string nullable: true description: Event in the study schedule to which `Sample collection time` relates to - title: Collection time event - example: Primary vaccination + x-airr: + deprecated: true + deprecated-description: Field has been merged with collection_time_point_relative. + deprecated-replaced-by: + - collection_time_point_relative + collection_location: + $ref: '#/Ontology' + nullable: true + description: Location where the sample was taken, preferred granularity is country-level + title: Sample collection location + example: + id: GAZ:00002939 + label: Poland x-airr: miairr: important - adc-query-support: true set: 2 subset: sample - name: Collection time event + name: Sample collection location + format: ontology + ontology: + draft: true + top_node: + id: GAZ:00000448 + label: geographic location biomaterial_provider: type: string nullable: true @@ -2533,7 +2763,7 @@ PCRTarget: set: 3 subset: process (nucleic acid [pcr]) name: Target locus for PCR - format: controlled vocabulary + format: controlled_vocabulary forward_pcr_primer_target_location: type: string nullable: true @@ -2567,7 +2797,6 @@ NucleicAcidProcessing: - template_class - template_quality - template_amount - - template_amount_unit - library_generation_method - library_generation_protocol - library_generation_kit_version @@ -2590,7 +2819,7 @@ NucleicAcidProcessing: set: 3 subset: process (nucleic acid) name: Target substrate - format: controlled vocabulary + format: controlled_vocabulary template_quality: type: string nullable: true @@ -2604,37 +2833,30 @@ NucleicAcidProcessing: subset: process (nucleic acid) name: Target substrate quality template_amount: - type: number + $ref: '#/PhysicalQuantity' nullable: true description: Amount of template that went into the process title: Template amount - example: 1000 + example: + quantity: 1000 + unit: + id: UO:0000024 + label: nanogram x-airr: miairr: important adc-query-support: true set: 3 subset: process (nucleic acid) name: Template amount + format: physical_quantity template_amount_unit: $ref: '#/Ontology' nullable: true - description: Unit of template amount - title: Template amount time unit - example: - id: UO:0000024 - label: nanogram x-airr: - miairr: important - adc-query-support: true - set: 3 - subset: process (nucleic acid) - name: Template amount time unit - format: ontology - ontology: - draft: false - top_node: - id: UO:0000002 - label: physical quantity + deprecated: true + deprecated-description: Field has been merged with template_amount. + deprecated-replaced-by: + - template_amount library_generation_method: type: string enum: @@ -2660,7 +2882,7 @@ NucleicAcidProcessing: set: 3 subset: process (nucleic acid) name: Library generation method - format: controlled vocabulary + format: controlled_vocabulary library_generation_protocol: type: string nullable: true @@ -2723,7 +2945,7 @@ NucleicAcidProcessing: set: 3 subset: process (nucleic acid) name: Complete sequences - format: controlled vocabulary + format: controlled_vocabulary physical_linkage: type: string enum: @@ -2749,7 +2971,7 @@ NucleicAcidProcessing: set: 3 subset: process (nucleic acid) name: Physical linkage of different rearrangements - format: controlled vocabulary + format: controlled_vocabulary # 1-to-n relationship between a NucleicAcidProcessing and SequencingRun with resultant raw sequence file(s) SequencingRun: @@ -2886,7 +3108,7 @@ SequencingData: set: 4 subset: data (raw reads) name: Raw sequencing data file type - format: controlled vocabulary + format: controlled_vocabulary filename: type: string nullable: true @@ -2916,7 +3138,7 @@ SequencingData: set: 4 subset: data (raw reads) name: Read direction - format: controlled vocabulary + format: controlled_vocabulary read_length: type: integer nullable: true @@ -2958,7 +3180,7 @@ SequencingData: set: 4 subset: data (raw reads) name: Paired read direction - format: controlled vocabulary + format: controlled_vocabulary paired_read_length: type: integer nullable: true @@ -3051,7 +3273,7 @@ DataProcessing: quality_thresholds: type: string nullable: true - description: How sequences were removed from (4) based on base quality scores + description: How/if sequences were removed from (4) based on base quality scores title: Quality thresholds example: Average Phred score >=20 x-airr: @@ -3229,7 +3451,8 @@ Repertoire: x-airr: adc-query-support: true -# A collection of repertoires for analysis purposes, includes optional time course +# An ordered group of repertoires for analysis purposes, includes optional time course +# Can be treated as a set if all repertoire_group_id are unique RepertoireGroup: type: object required: @@ -3239,22 +3462,22 @@ RepertoireGroup: repertoire_group_id: type: string nullable: true - description: Identifier for this repertoire collection + description: Identifier for this repertoire group x-airr: identifier: true repertoire_group_name: type: string nullable: true - description: Short display name for this repertoire collection + description: Short display name for this repertoire group repertoire_group_description: type: string nullable: true - description: Repertoire collection description + description: Repertoire group description repertoires: type: array nullable: true description: > - List of repertoires in this collection with an associated description and time point designation + List of repertoires in this group with an associated description and time point designation items: type: object properties: @@ -3488,7 +3711,29 @@ Rearrangement: x-airr: adc-query-support: true name: Gene locus - format: controlled vocabulary + format: controlled_vocabulary + locus_species: + $ref: '#/Ontology' + nullable: true + description: > + Binomial designation of the species from which the locus originates. Typically, this value should be + identical to `organism`, if which case it SHOULD NOT be set explicitly. However, there are valid + experimental setups in which the two might differ, e.g. transgenic animal models. If set, this key + will overwrite the `organism` information for all lower layers of the schema. + title: Locus species + example: + id: NCBITAXON:9606 + label: Homo sapiens + x-airr: + miairr: defined + adc-query-support: true + name: Locus species + format: ontology + ontology: + draft: false + top_node: + id: NCBITAXON:7776 + label: Gnathostomata v_call: type: string nullable: true @@ -4553,9 +4798,10 @@ Cell: type: string nullable: false description: > - Identifier defining the cell of origin for the query sequence. + Identifier for the Cell object. This identifier must be unique within + a given study, but it is recommended that it be a universally unique record locator + to enable database applications. title: Cell index - example: W06_046_091 x-airr: identifier: true miairr: defined @@ -4565,11 +4811,11 @@ Cell: type: array nullable: true description: > - Array of sequence identifiers defined for the Rearrangement object + Array of sequence identifiers defined for the Rearrangement objects associated with this cell title: Cell-associated rearrangements items: type: string - example: [id1, id2] #empty vs NULL? + example: [id1, id2] x-airr: miairr: defined adc-query-support: true @@ -4578,11 +4824,11 @@ Cell: type: array nullable: true description: > - Array of receptor identifiers defined for the Receptor object + Array of receptor identifiers defined for the Receptor objects associated with this cell title: Cell-associated receptors items: type: string - example: [id1, id2] #empty vs NULL? + example: [id1, id2] x-airr: miairr: defined adc-query-support: true @@ -4599,7 +4845,7 @@ Cell: data_processing_id: type: string nullable: true - description: Identifier of the data processing object in the repertoire metadata for this clone. + description: Identifier of the data processing object in the repertoire metadata for this cell. title: Data processing for cell x-airr: miairr: defined @@ -4608,8 +4854,8 @@ Cell: expression_study_method: type: string enum: - - "flow cytometry" - - "single-cell transcriptome" + - flow_cytometry + - single-cell_transcriptome - null nullable: true description: > @@ -4646,8 +4892,7 @@ Cell: # The CellExpression object acts as a container to hold a single expression level measurement from # an experiment. Expression data is associated with a cell_id and the related repertoire_id and -# data_processing_id as cell_id is not guaranteed to be unique outside the data processing for -# a single repertoire. +# data_processing_id. CellExpression: type: object required: @@ -4656,12 +4901,15 @@ CellExpression: - data_processing_id - cell_id - property + - property_type - value properties: expression_id: type: string description: > - Identifier of this expression property measurement. + Identifier for the CellExpression object. This identifier must be unique within + a given study, but it is recommended that it be a universally unique record locator + to enable database applications. title: Expression property measurement identifier nullable: false x-airr: @@ -4691,19 +4939,33 @@ CellExpression: name: Parental repertoire of cell data_processing_id: type: string - description: Identifier of the data processing object in the repertoire metadata for this clone. + description: Identifier of the data processing object in the repertoire metadata for this cell. title: Data processing for cell nullable: true x-airr: miairr: defined adc-query-support: true name: Data processing for cell + property_type: + type: string + description: > + Keyword describing the property type and detection method used to measure the property value. + The following keywords are recommended, but custom property types are also valid: + "mrna_expression_by_read_count", + "protein_expression_by_fluorescence_intensity", "antigen_bait_binding_by_fluorescence_intensity", + "protein_expression_by_dna_barcode_count" and "antigen_bait_binding_by_dna_barcode_count". + nullable: false + title: Property type and detection method + x-airr: + miairr: defined + adc-query-support: true + name: Property type and detection method property: $ref: '#/Ontology' nullable: true title: Property information description: > - Name of the property observed, typically a gene or antibody idenifier (and its label) from a + Name of the property observed, typically a gene or antibody identifier (and label) from a canonical resource such as Ensembl (e.g. ENSG00000275747, IGHV3-79) or Antibody Registry (ABREG:1236456, Purified anti-mouse/rat/human CD27 antibody). example: @@ -4726,7 +4988,7 @@ CellExpression: name: Property value -# The Receptor object hold information about a receptor and its reactivity. +# The Receptor object holds information about a receptor (immunoglobulin or TCR) # Receptor: type: object @@ -4742,7 +5004,10 @@ Receptor: receptor_id: type: string nullable: false - description: ID of the current Receptor object, unique within the local repository. + description: > + Identifier for the Receptor object. This identifier must be unique within + a given study, but it is recommended that it be a universally unique record locator + to enable database applications. title: Receptor ID example: TCR-MM-012345 x-airr: @@ -4766,6 +5031,7 @@ Receptor: - TCR description: The top-level receptor type, either Immunoglobulin (Ig) or T Cell Receptor (TCR). x-airr: + format: controlled_vocabulary adc-query-support: true receptor_variable_domain_1_aa: type: string @@ -4788,6 +5054,7 @@ Receptor: description: Locus from which the variable domain in receptor_variable_domain_1_aa originates example: IGH x-airr: + format: controlled_vocabulary adc-query-support: true receptor_variable_domain_2_aa: type: string @@ -4812,6 +5079,7 @@ Receptor: description: Locus from which the variable domain in receptor_variable_domain_2_aa originates example: IGL x-airr: + format: controlled_vocabulary adc-query-support: true receptor_ref: type: array @@ -4823,17 +5091,12 @@ Receptor: example: ["IEDB_RECEPTOR:10"] x-airr: adc-query-support: true - reactivity_measurements: - type: array - nullable: true - description: Records of reactivity measurement - items: - $ref: '#/ReceptorReactivity' - -ReceptorReactivity: +CellReactivity: type: object required: + - cell_reactivity_id + - cell_id - ligand_type - antigen_type - antigen @@ -4842,17 +5105,57 @@ ReceptorReactivity: - reactivity_value - reactivity_unit properties: + cell_reactivity_id: + type: string + nullable: false + description: > + Identifier for the CellReactivity object. This identifier must be unique within + a given study, but it is recommended that it be a universally unique record locator + to enable database applications. + title: CellReactivity ID + x-airr: + identifier: true + adc-query-support: true + cell_id: + type: string + nullable: false + description: > + Identifier of the Cell in the context of which the reactivity measurement was conducted. + title: Cell ID + x-airr: + adc-query-support: true + repertoire_id: + type: string + description: Identifier for the associated repertoire in study metadata. + title: Parental repertoire of cell + nullable: true + x-airr: + miairr: defined + adc-query-support: true + name: Parental repertoire of cell + data_processing_id: + type: string + description: Identifier of the data processing object in the repertoire metadata for this cell. + title: Data processing for cell + nullable: true + x-airr: + miairr: defined + adc-query-support: true + name: Data processing for cell ligand_type: type: string nullable: false enum: - - MHC:peptide - - MHC:non-peptide + - "MHC:peptide" + - "MHC:non-peptide" - protein - peptide - non-peptidic - description: Classification of ligand binding to receptor + description: Classification of ligand binding to the cell example: non-peptide + x-airr: + format: controlled_vocabulary + adc-query-support: true antigen_type: type: string nullable: false @@ -4863,6 +5166,9 @@ ReceptorReactivity: description: > The type of antigen before processing by the immune system. example: protein + x-airr: + format: controlled_vocabulary + adc-query-support: true antigen: $ref: '#/Ontology' nullable: false @@ -4886,6 +5192,7 @@ ReceptorReactivity: id: NCBITAXON:5843 label: Plasmodium falciparum NF54 x-airr: + adc-query-support: true format: ontology ontology: draft: true @@ -4900,6 +5207,14 @@ ReceptorReactivity: type: integer nullable: true description: End position of the peptide within the reference protein sequence + peptide_sequence_aa: + type: string + nullable: true + description: > + The actual peptide sequence against which the receptor reactivity was measured. This field should be + used as a convenience for antigens of antigen_type `protein` or `peptide`. + x-airr: + adc-query-support: true mhc_class: type: string nullable: true @@ -4910,6 +5225,9 @@ ReceptorReactivity: - null description: Class of MHC molecule, only present for MHC:x ligand types example: MHC-II + x-airr: + format: controlled_vocabulary + adc-query-support: true mhc_gene_1: $ref: '#/Ontology' nullable: true @@ -4919,6 +5237,7 @@ ReceptorReactivity: id: MRO:0000055 label: HLA-DRA x-airr: + adc-query-support: true format: ontology ontology: draft: true @@ -4929,7 +5248,9 @@ ReceptorReactivity: type: string nullable: true description: Allele designation of the MHC alpha chain - example: HLA-DRA + example: HLA-DRA*01:01 + x-airr: + adc-query-support: true mhc_gene_2: $ref: '#/Ontology' nullable: true @@ -4939,46 +5260,53 @@ ReceptorReactivity: id: MRO:0000057 label: HLA-DRB1 x-airr: + adc-query-support: true format: ontology ontology: draft: true top_node: id: MRO:0000004 + label: MHC gene mhc_allele_2: type: string nullable: true description: > Allele designation of the MHC class II beta chain or the invariant beta2-microglobin chain example: HLA-DRB1*04:01 + x-airr: + adc-query-support: true reactivity_method: type: string nullable: false enum: - - SPR - - ITC - - ELISA - - cytometry - - biological activity + - native_protein + - MHC_peptide_multimer description: The methodology used to assess expression (assay implemented in experiment) + x-airr: + adc-query-support: true + format: controlled_vocabulary reactivity_readout: type: string nullable: false enum: - - binding strength - - cytokine release - - dissociation constant KD - - on rate - - off rate - - pathogen inhibition + - fluorescence_intensity + - barcode_count description: Reactivity measurement read-out - example: cytokine release + example: barcode_count + x-airr: + adc-query-support: true + format: controlled_vocabulary reactivity_value: type: number nullable: false description: The absolute (processed) value of the measurement - example: 162.26 + example: 162 + x-airr: + adc-query-support: true reactivity_unit: type: string nullable: false description: The unit of the measurement - example: pg/ml + example: read count + x-airr: + adc-query-support: true From 1d252a857c678f4f4ca0ffb52447839c6faf41ec Mon Sep 17 00:00:00 2001 From: Scott Christley Date: Thu, 25 Apr 2024 12:33:31 -0500 Subject: [PATCH 36/59] enable enum default --- lang/js/schema.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lang/js/schema.js b/lang/js/schema.js index 8d75b75a3..0a4b0168d 100644 --- a/lang/js/schema.js +++ b/lang/js/schema.js @@ -228,7 +228,7 @@ module.exports = function(airr, schema) { var _default = function(spec) { if (spec['default']) return spec['default']; if (spec['nullable']) return null; - //if (spec['enum']) return spec['enum'][0]; + if (spec['enum']) return spec['enum'][0]; return type_default[spec['type']]; }; From b3e4a69169200b2644cb7e0e534f1781b68829f1 Mon Sep 17 00:00:00 2001 From: jday1 <45389553+jday1@users.noreply.github.com> Date: Tue, 30 Apr 2024 18:25:17 +0100 Subject: [PATCH 37/59] MiAIRR Compliance Flag (#750) Added `check_nullable` flag to python validation --------- Co-authored-by: jday1 --- lang/python/airr/interface.py | 14 ++++++++------ lang/python/airr/schema.py | 18 ++++++++++-------- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/lang/python/airr/interface.py b/lang/python/airr/interface.py index 590c3a762..a8f60298f 100644 --- a/lang/python/airr/interface.py +++ b/lang/python/airr/interface.py @@ -224,7 +224,7 @@ def validate_rearrangement(filename, debug=False): #### AIRR Data Model #### -def read_airr(filename, format=None, validate=False, model=True, debug=False): +def read_airr(filename, format=None, validate=False, model=True, debug=False, check_nullable=True): """ Load an AIRR Data file @@ -238,6 +238,7 @@ def read_airr(filename, format=None, validate=False, model=True, debug=False): If False, attempt validation of all top-level objects. Ignored if validate=False. debug (bool): debug flag. If True print debugging information to standard error. + check_nullable (bool): whether to check for nullable fields if validating the data. Returns: dict: dictionary of AIRR Data objects. @@ -260,7 +261,7 @@ def read_airr(filename, format=None, validate=False, model=True, debug=False): if validate: if debug: sys.stderr.write('Validating: %s\n' % filename) try: - valid = validate_airr(data, model=model, debug=debug) + valid = validate_airr(data, model=model, debug=debug, check_nullable=check_nullable) except ValidationError as e: if debug: sys.stderr.write('%s failed validation\n' % filename) raise ValidationError(e) @@ -269,7 +270,7 @@ def read_airr(filename, format=None, validate=False, model=True, debug=False): return data -def validate_airr(data, model=True, debug=False): +def validate_airr(data, model=True, debug=False, check_nullable=True): """ Validates an AIRR Data file @@ -319,7 +320,7 @@ def validate_airr(data, model=True, debug=False): # Validate each record in array for i, record in obj_iter: try: - schema.validate_object(record) + schema.validate_object(record, check_nullable=check_nullable) except ValidationError as e: valid = False if debug: sys.stderr.write('%s at array position %s with validation error: %s\n' % (k, i, e)) @@ -330,7 +331,7 @@ def validate_airr(data, model=True, debug=False): return valid -def write_airr(filename, data, format=None, info=None, validate=False, model=True, debug=False): +def write_airr(filename, data, format=None, info=None, validate=False, model=True, debug=False, check_nullable=True): """ Write an AIRR Data file @@ -345,6 +346,7 @@ def write_airr(filename, data, format=None, info=None, validate=False, model=Tru model (bool): If True only validate and write objects defined in the AIRR DataFile schema. If False, attempt validation and write of all top-level objects debug (bool): debug flag. If True print debugging information to standard error. + check_nullable (bool): whether to check for nullable fields if validating the data. Returns: bool: True if the file is written without error. @@ -358,7 +360,7 @@ def write_airr(filename, data, format=None, info=None, validate=False, model=Tru if validate: if debug: sys.stderr.write('Validating: %s\n' % filename) try: - valid = validate_airr(data, model=model, debug=debug) + valid = validate_airr(data, model=model, debug=debug, check_nullable=check_nullable) except ValidationError as e: if debug: sys.stderr.write(e) raise ValidationError(e) diff --git a/lang/python/airr/schema.py b/lang/python/airr/schema.py index 28de0a859..59f8adcb0 100644 --- a/lang/python/airr/schema.py +++ b/lang/python/airr/schema.py @@ -309,7 +309,7 @@ def validate_row(self, row): return True - def validate_object(self, obj, missing=True, nonairr=True, context=None): + def validate_object(self, obj, missing=True, nonairr=True, context=None, check_nullable=True): """ Validate Repertoire object data against schema @@ -318,6 +318,7 @@ def validate_object(self, obj, missing=True, nonairr=True, context=None): missing (bool): provides warnings for missing optional fields. nonairr (bool: provides warning for non-AIRR fields that cannot be validated. context (string): used by recursion to indicate place in object hierarchy + check_nullable (bool): check if data complies with the required fields as determined by the nullable flag. Returns: bool: True if a ValidationError exception is not raised. @@ -360,12 +361,11 @@ def validate_object(self, obj, missing=True, nonairr=True, context=None): is_missing_key = True # check MiAIRR keys exist - if xairr and xairr.get('miairr'): - if is_missing_key: + if check_nullable and xairr and xairr.get('miairr') == "" and is_missing_key: raise ValidationError('MiAIRR field "%s" is missing' % full_field) # check if required field - if f in self.required and is_missing_key: + if check_nullable and f in self.required and is_missing_key: raise ValidationError('Required field "%s" is missing' % full_field) # check if identifier field @@ -379,6 +379,8 @@ def validate_object(self, obj, missing=True, nonairr=True, context=None): # check nullable requirements if is_null: + if not check_nullable: + continue if not xairr: # default is true continue @@ -401,7 +403,7 @@ def validate_object(self, obj, missing=True, nonairr=True, context=None): schema = AIRRSchema[schema_name] else: schema = Schema(schema_name) - schema.validate_object(obj[f], missing, nonairr, full_field) + schema.validate_object(obj[f], missing, nonairr, full_field, check_nullable) else: raise ValidationError('Internal error: field "%s" in schema not handled by validation. File a bug report.' % full_field) elif field_type == 'array': @@ -413,7 +415,7 @@ def validate_object(self, obj, missing=True, nonairr=True, context=None): if spec['items'].get('$ref') is not None: schema_name = spec['items']['$ref'].split('/')[-1] schema = Schema(schema_name) - schema.validate_object(row, missing, nonairr, full_field) + schema.validate_object(row, missing, nonairr, full_field, check_nullable) elif spec['items'].get('allOf') is not None: for s in spec['items']['allOf']: if s.get('$ref') is not None: @@ -422,7 +424,7 @@ def validate_object(self, obj, missing=True, nonairr=True, context=None): schema = AIRRSchema[schema_name] else: schema = Schema(schema_name) - schema.validate_object(row, missing, False, full_field) + schema.validate_object(row, missing, False, full_field, check_nullable) elif spec['items'].get('enum') is not None: if row not in spec['items']['enum']: raise ValidationError('field "%s" has value "%s" not among possible enumeration values' % (full_field, row)) @@ -440,7 +442,7 @@ def validate_object(self, obj, missing=True, nonairr=True, context=None): raise ValidationError('array field "%s" does not have number type: %s' % (full_field, row)) elif spec['items'].get('type') == 'object': sub_schema = Schema({'properties': spec['items'].get('properties')}) - sub_schema.validate_object(row, missing, nonairr, context) + sub_schema.validate_object(row, missing, nonairr, context, check_nullable) else: raise ValidationError('Internal error: array field "%s" in schema not handled by validation. File a bug report.' % full_field) elif field_type == 'object': From 154bb00ff1f9e14663b61774a144dd419f958da9 Mon Sep 17 00:00:00 2001 From: Jason Vander Heiden Date: Wed, 15 May 2024 10:52:16 -0700 Subject: [PATCH 38/59] Fix table names in doc config --- docs/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/conf.py b/docs/conf.py index d459b12a0..edfb27edc 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -414,7 +414,7 @@ def parse_schema(spec, schema): fields = ['Name', 'Type', 'Attributes', 'Definition'] tables = ['Repertoire', 'Study', 'Subject', 'Diagnosis', 'Sample', 'CellProcessing', 'NucleicAcidProcessing', 'PCRTarget', 'SequencingRun', 'SequencingData', 'DataProcessing', - 'Rearrangement', 'Alignment', 'Clone', 'Tree', 'Node', 'Cell', 'CellExpression', + 'Rearrangement', 'Alignment', 'Clone', 'Tree', 'Node', 'Cell', 'Expression', 'RearrangedSequence', 'UnrearrangedSequence', 'SequenceDelineationV', 'AlleleDescription', 'GermlineSet', 'GenotypeSet', 'Genotype', 'MHCGenotypeSet', 'MHCGenotype', 'Receptor'] for spec in tables: From 0c00c3015a8d1eea9d114afa893f85487b9e81aa Mon Sep 17 00:00:00 2001 From: Scott Christley Date: Wed, 12 Jun 2024 19:07:17 -0500 Subject: [PATCH 39/59] exclude deprecated --- lang/js/schema.js | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/lang/js/schema.js b/lang/js/schema.js index 0a4b0168d..7058c00cd 100644 --- a/lang/js/schema.js +++ b/lang/js/schema.js @@ -103,7 +103,15 @@ module.exports = function(airr, schema) { if (! this.info) throw new Error('Info object cannot be found in the specifications'); - this.properties = this.definition['properties'] + this.properties = {}; + this.deprecated = {}; + for (let prop in this.definition['properties']) { + let spec = this.definition['properties'][prop]; + if ((spec['x-airr']) && (spec['x-airr']['deprecated'])) + this.properties[prop] = spec; + else + this.properties[prop] = spec; + } this.required = this.definition['required'] if (! this.required) this.required = []; From 4f1529dbacbfae9b515c8c36504114390c003acd Mon Sep 17 00:00:00 2001 From: Scott Christley Date: Wed, 12 Jun 2024 19:24:16 -0500 Subject: [PATCH 40/59] curie resolve, handle undefined, fix deprecate bug, additional vocabulary for validate --- lang/js/schema.js | 45 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 39 insertions(+), 6 deletions(-) diff --git a/lang/js/schema.js b/lang/js/schema.js index 7058c00cd..66326d5ab 100644 --- a/lang/js/schema.js +++ b/lang/js/schema.js @@ -108,15 +108,13 @@ module.exports = function(airr, schema) { for (let prop in this.definition['properties']) { let spec = this.definition['properties'][prop]; if ((spec['x-airr']) && (spec['x-airr']['deprecated'])) - this.properties[prop] = spec; + this.deprecated[prop] = spec; else this.properties[prop] = spec; } this.required = this.definition['required'] if (! this.required) this.required = []; - //this.optional = [f for f in self.properties if f not in self.required] - return this; } @@ -143,6 +141,7 @@ module.exports = function(airr, schema) { airr.SchemaDefinition.prototype.to_bool = function(value, validate) { if (value == null) return null; + if (value == undefined) return null; var bool_value = _to_bool_map(value); if (validate && (bool_value == null)) @@ -161,6 +160,7 @@ module.exports = function(airr, schema) { airr.SchemaDefinition.prototype.to_int = function(value, validate) { if (value == null) return null; + if (value == undefined) return null; if (value == '') return null; var int_value = parseInt(value); @@ -175,6 +175,7 @@ module.exports = function(airr, schema) { airr.SchemaDefinition.prototype.to_float = function(value, validate) { if (value == null) return null; + if (value == undefined) return null; if (value == '') return null; var float_value = parseFloat(value); @@ -202,10 +203,41 @@ module.exports = function(airr, schema) { case 'number': field_value = this.to_float(field_value); break; + case 'string': + if (field_value == '') field_value = null; + if (field_value == undefined) field_value = null; + break; } return field_value; }; + airr.SchemaDefinition.prototype.resolve_curie = function(field, curie_value) { + if (!field) return null + if (curie_value == null) return null; + if (curie_value == '') return null; + var spec = this.spec(field); + if (!spec) return null; + if (!spec['x-airr']) return null; + if (!spec['x-airr']['ontology']) return null; + + var cmap = airr.get_curie_map(); + var iprov = airr.get_iri_providers(); + var f = curie_value.split(':'); + if (cmap[f[0]]) { + var p = cmap[f[0]]['default']['provider']; + var m = cmap[f[0]]['default']['map']; + if (p && m) { + var ontology = iprov['parameter'][f[0]][p]['ontology_id']; + var iri = cmap[f[0]]['map'][m]['iri_prefix'] + f[1]; + var ols_api = iprov['provider']['OLS']['request']['url']; + ols_api = ols_api.replace('{ontology_id}', ontology); + ols_api = ols_api.replace('{iri}', iri); + return ols_api; + } + } + return null; + }; + // // Validation functions // @@ -218,13 +250,14 @@ module.exports = function(airr, schema) { return false; } - airr.SchemaDefinition.prototype.validate_object = function(object) { + airr.SchemaDefinition.prototype.validate_object = function(object, vocabulary) { const ajv = new AJV({allErrors: true}); addFormats(ajv); ajv.addVocabulary(['x-airr', 'example', 'discriminator']); + if (vocabulary) ajv.addVocabulary(vocabulary); - const validate = ajv.compile(this.definition) - const valid = validate(object) + const validate = ajv.compile(this.definition); + const valid = validate(object); if (!valid) return validate.errors; else return null; } From 04e67d7e5e5ce11f5b0d35bc47fa17925e6d4847 Mon Sep 17 00:00:00 2001 From: Christian Busse Date: Mon, 7 Oct 2024 20:55:38 +0200 Subject: [PATCH 41/59] Add MiAIRR data elements SVG --- .../images/MiAIRR_data_elements_traced.svg | 687 ++++++++++++++++++ specs/airr-schema.yaml | 2 +- 2 files changed, 688 insertions(+), 1 deletion(-) create mode 100644 docs/miairr/images/MiAIRR_data_elements_traced.svg diff --git a/docs/miairr/images/MiAIRR_data_elements_traced.svg b/docs/miairr/images/MiAIRR_data_elements_traced.svg new file mode 100644 index 000000000..4a3497533 --- /dev/null +++ b/docs/miairr/images/MiAIRR_data_elements_traced.svg @@ -0,0 +1,687 @@ + + + + + MiAIRR data standard elements + + + + + image/svg+xml + + MiAIRR data standard elements + + + + Christian Busse + + + 2017 + + + Corey Watson, Syed Ahmad Chan Bukhari, Christian Busse + + + + + AIRR + MiAIRR + + + en-US + This figure shows the sets, subsets and individual items of the MiAIRR (minimal information about adaptive immune receptor repertoires) standard. It further includes the mapping of the NCBI implementation of the standard, the respective target repositories for the items are depicted with a solid line. + + + Copyright 2017 Watson, Chan, Busse + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/specs/airr-schema.yaml b/specs/airr-schema.yaml index a38f3c089..345e473b6 100644 --- a/specs/airr-schema.yaml +++ b/specs/airr-schema.yaml @@ -4738,7 +4738,7 @@ Cell: # The Expression object acts as a container to hold a single expression level measurement from # an experiment. Expression data is associated with a cell_id and the related repertoire_id and -# data_processing_id +# data_processing_id Expression: type: object required: From 2458a37b9ccc68cfed2ecddd857bf36694da6669 Mon Sep 17 00:00:00 2001 From: Scott Christley Date: Wed, 16 Oct 2024 16:29:28 -0500 Subject: [PATCH 42/59] implement write_airr and test case --- lang/js/io.js | 60 ++++++++++++++++++++++++++++++++++++-- lang/js/package.json | 3 +- lang/js/tests/airr.test.js | 28 +++++++++++++++++- 3 files changed, 87 insertions(+), 4 deletions(-) diff --git a/lang/js/io.js b/lang/js/io.js index bbdc2b8a3..bbc604691 100644 --- a/lang/js/io.js +++ b/lang/js/io.js @@ -140,8 +140,64 @@ module.exports = function(airr) { return airr.read_airr(filename, true); } - airr.write_airr = function(file) { - return null; + airr.write_airr = function(filename, data, format=null, info=null, validate=false, model=true, debug=false, check_nullable=true) { + // data parameter should be an object + if (typeof data != 'object') { + let msg = 'Data parameter is not an object.'; + if (debug) console.error(msg) + throw new Error(msg); + } + + var DataFileSchema = new airr.SchemaDefinition('DataFile'); + + // Validate if requested + if (validate) { + if (debug) console.log('Validating:', filename); + try { + let schema = new airr.SchemaDefinition('DataFile'); + schema.validate_object(data); + } catch (err) { + if (debug) console.error(filename, 'failed validation.'); + throw new ValidationError(err); + } + } + + // output object + var md = {} + md['Info'] = info + if (!info) { + md['Info'] = DataFileSchema.info.copy() + md['Info']['title'] = 'AIRR Data File' + md['Info']['description'] = 'AIRR Data File written by AIRR Standards JavaScript Library' + } + + // Loop through each entry and add them to the output object + for (let k in data) { + if (k == 'Info') continue; + if (k == 'DataFile') continue; + if (!data[k]) continue; + if (model && !DataFileSchema.properties[k]) { + if (debug) console.error('Skipping non-DataFile object:', k); + continue + } + md[k] = data[k]; + } + + // Determine file type from extension and use appropriate format + var ext = filename.split('.').pop().toLowerCase(); + if (ext == 'yaml' || ext == 'yml') { + const yamlString = yaml.dump(data); + fs.writeFileSync(filename, yamlString); + } else if (ext == 'json') { + const jsonData = JSON.stringify(data, null, 2); + fs.writeFileSync(filename, jsonData); + } else { + let msg = 'Unknown file type:' + ext + '. Supported file extensions are "yaml", "yml" or "json"'; + if (debug) console.error(msg); + throw new Error(msg); + } + + return true; } return airr; diff --git a/lang/js/package.json b/lang/js/package.json index e80c567d4..231a903b1 100644 --- a/lang/js/package.json +++ b/lang/js/package.json @@ -27,7 +27,8 @@ "ajv-formats": "^2.1.1", "allof-merge": "^0.6.5", "csv-parser": "^2.3.2", - "js-yaml": "^3.10.0" + "js-yaml": "^3.10.0", + "json-diff": "^1.0.6" }, "devDependencies": { "eslint": "^7.9.0", diff --git a/lang/js/tests/airr.test.js b/lang/js/tests/airr.test.js index 3219d7f4f..5dbc78019 100644 --- a/lang/js/tests/airr.test.js +++ b/lang/js/tests/airr.test.js @@ -9,6 +9,7 @@ var path = require('path'); var airr = require("../airr") +var jsonDiff = require('json-diff'); // Paths var data_path = path.resolve(__dirname, 'data'); @@ -27,7 +28,8 @@ var combined_json = path.resolve(data_path, 'good_combined_airr.json') // Output data //var output_rep = os.path.join(data_path, 'output_rep.json') -//var output_good = os.path.join(data_path, 'output_data.json') +var output_good = path.resolve(data_path, 'output_data.json') +var output_good_yaml = path.resolve(data_path, 'output_data.yaml') //var output_blank = os.path.join(data_path, 'output_blank.json') test('load schema', async () => { @@ -44,3 +46,27 @@ test('load good rearrangement tsv', () => { const data = airr.load_rearrangement(rearrangement_good, true); expect(data).not.toBeNull(); }); + +test('write good AIRR DataFile', () => { + const repertoire_data = airr.read_airr(rep_good, validate=true, debug=true); + expect(repertoire_data).not.toBeNull(); + const germline_data = airr.read_airr(germline_good, validate=true, debug=true); + expect(germline_data).not.toBeNull(); + const genotype_data = airr.read_airr(genotype_good, validate=true, debug=true); + expect(genotype_data).not.toBeNull(); + + // combine together and write + let obj = {} + obj['Repertoire'] = repertoire_data['Repertoire']; + obj['GermlineSet'] = germline_data['GermlineSet']; + obj['GenotypeSet'] = genotype_data['GenotypeSet']; + airr.write_airr(output_good, obj, validate=true, debug=true); + + // verify we can read it + let data = airr.read_airr(output_good, validate=true, debug=true); + + // is the data identical? + delete data['Info']; + let d = jsonDiff.diff(obj, data); + expect(d).toBeUndefined(); +}); From 48b6ce693f2e63661b9401364446dd46387cc510 Mon Sep 17 00:00:00 2001 From: Scott Christley Date: Wed, 16 Oct 2024 16:30:18 -0500 Subject: [PATCH 43/59] add nano, fix ENV syntax --- docker/Dockerfile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 7cdce0d54..2f62cdac0 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -18,7 +18,8 @@ RUN export DEBIAN_FRONTEND=noninteractive && apt-get update && apt-get install - libfontconfig1-dev \ libssl-dev \ libharfbuzz-dev libfribidi-dev \ - libfreetype6-dev libpng-dev libtiff5-dev libjpeg-dev + libfreetype6-dev libpng-dev libtiff5-dev libjpeg-dev \ + nano RUN pip3 install \ pandas \ @@ -34,7 +35,7 @@ RUN pip3 install \ RUN R -e 'install.packages(c("devtools","knitr","rmarkdown","testthat","readr"),dependencies=T)' # node -ENV NODE_VER v14.21.3 +ENV NODE_VER=v14.21.3 RUN wget https://nodejs.org/dist/$NODE_VER/node-$NODE_VER-linux-x64.tar.xz RUN tar xf node-$NODE_VER-linux-x64.tar.xz RUN cp -rf /node-$NODE_VER-linux-x64/bin/* /usr/bin From a494e51aec6225612856743a4b1caf8c03b7744c Mon Sep 17 00:00:00 2001 From: Scott Christley Date: Wed, 16 Oct 2024 21:55:54 -0500 Subject: [PATCH 44/59] implement rearrangement write functions and tests --- lang/js/io.js | 211 +++++++++++++++++++++++++++++++++---- lang/js/schema.js | 9 +- lang/js/tests/airr.test.js | 81 ++++++++++++-- 3 files changed, 273 insertions(+), 28 deletions(-) diff --git a/lang/js/io.js b/lang/js/io.js index bbc604691..82619214f 100644 --- a/lang/js/io.js +++ b/lang/js/io.js @@ -43,11 +43,8 @@ module.exports = function(airr) { readable.pause(); if (validate) { - try { - schema.validate_header(headers); - } catch (err) { - reject(err); - } + let errors = schema.validate_header(headers); + if (errors) return reject(errors); } if (header_callback) { @@ -61,16 +58,17 @@ module.exports = function(airr) { readable.pause(); if (validate) { - try { - schema.validate_row(row); - } catch (err) { - reject(err); - } + let errors = schema.validate_object(row); + if (errors) return reject(errors); } if (row_callback) { if (isPromise(row_callback)) await row_callback(row); else row_callback(row); + } else { + // no reason to read the rows if no callback, so just end the stream + readable.destroy(); + return resolve(); } readable.resume(); @@ -81,12 +79,123 @@ module.exports = function(airr) { }); } - airr.create_rearrangement = function(file) { - return null; + airr.create_rearrangement = async function(filename, row_callback, fields=null, debug=false) { + if (!row_callback) { + let msg = 'Row callback function not provided.'; + if (debug) console.error(msg); + throw new Error(msg); + } + + var is_gz = false; + var ext = filename.split('.').pop().toLowerCase(); + if (ext == 'gz') is_gz = true; + + var schema = new airr.SchemaDefinition('Rearrangement'); + + // order fields according to spec + var field_names = schema.required.slice(); + if (fields) { + var additional_fields = []; + for (let f in fields) { + if (schema.required.includes(fields[f])) + continue; + else if (schema.properties.includes(fields[f])) + field_names.push(fields[f]); + else + additional_fields.push(fields[f]); + } + field_names = field_names.concat(additional_fields); + } + + return new Promise(async function(resolve, reject) { + var writable = fs.createWriteStream(filename); + if (is_gz) writable.pipe(zlib.createGunzip()); + + // write header + writable.write(field_names.join('\t')); + writable.write('\n'); + + let row = null; + if (isPromise(row_callback)) row = await row_callback(field_names); + else row = row_callback(field_names); + + while (row) { + let vals = []; + for (let i = 0; i < field_names.length; ++i) { + let p = field_names[i]; + if (row[p] == undefined) vals.push(''); + else vals.push(row[p]); + } + writable.write(vals.join('\t')); + writable.write('\n'); + + if (isPromise(row_callback)) row = await row_callback(field_names); + else row = row_callback(field_names); + } + + writable.end(); + return resolve(); + }); } - airr.derive_rearrangement = function(file) { - return null; + airr.derive_rearrangement = async function(out_filename, in_filename, row_callback, fields=null, debug=false) { + if (!row_callback) { + let msg = 'Row callback function not provided.'; + if (debug) console.error(msg); + throw new Error(msg); + } + + var is_gz = false; + var ext = out_filename.split('.').pop().toLowerCase(); + if (ext == 'gz') is_gz = true; + + // get fields from input file + var field_names = null; + var got_headers = function(h) { field_names = h; } + await airr.read_rearrangement(in_filename, got_headers, null, false, debug) + .catch(function(error) { Promise.reject(error); }); + + // add any additional fields + if (fields) { + var additional_fields = []; + for (let f in fields) { + if (field_names.includes(fields[f])) + continue; + else + additional_fields.push(fields[f]); + } + field_names = field_names.concat(additional_fields); + } + + return new Promise(async function(resolve, reject) { + var writable = fs.createWriteStream(out_filename); + if (is_gz) writable.pipe(zlib.createGunzip()); + + // write header + writable.write(field_names.join('\t')); + writable.write('\n'); + + let row = null; + if (isPromise(row_callback)) row = await row_callback(field_names); + else row = row_callback(field_names); + + while (row) { + let vals = []; + for (let i = 0; i < field_names.length; ++i) { + let p = field_names[i]; + if (row[p] == undefined) vals.push(''); + else vals.push(row[p]); + } + writable.write(vals.join('\t')); + writable.write('\n'); + + if (isPromise(row_callback)) row = await row_callback(field_names); + else row = row_callback(field_names); + } + + writable.end(); + return resolve(); + }); } airr.load_rearrangement = async function(filename, validate=false, debug=false) { @@ -99,16 +208,78 @@ module.exports = function(airr) { return Promise.resolve(rows); } - airr.dump_rearrangement = function(file) { - return null; + airr.dump_rearrangement = async function(data, filename, fields=null, debug=false) { + var idx = 0; + var row_callback = function(field_names) { + if (idx >= data.length) return null; + else return data[idx++]; + }; + + return airr.create_rearrangement(filename, row_callback, fields, debug); } - airr.merge_rearrangement = function(file) { - return null; + airr.merge_rearrangement = async function(out_filename, in_filenames, drop=false, debug=false) { + var is_gz = false; + var ext = out_filename.split('.').pop().toLowerCase(); + if (ext == 'gz') is_gz = true; + + // gather fields from input files + var first = true; + var field_names = []; + var got_headers = function(headers) { + if (first) { + field_names = headers; + first = false; + } else { + // intersection + if (drop) field_names = field_names.filter(value => headers.includes(value)); + else { // or union + for (let h in headers) { + if (!field_names.includes(headers[h])) { + field_names.push(headers[h]); + } + } + } + } + } + for (let f in in_filenames) { + await airr.read_rearrangement(in_filenames[f], got_headers, null, false, debug) + .catch(function(error) { Promise.reject(error); }); + } + + // write input files to output file sequentially + return new Promise(async function(resolve, reject) { + var writable = fs.createWriteStream(out_filename); + if (is_gz) writable.pipe(zlib.createGunzip()); + + // write header + writable.write(field_names.join('\t')); + writable.write('\n'); + + var got_row = function(row) { + let vals = []; + for (let i = 0; i < field_names.length; ++i) { + let p = field_names[i]; + if (row[p] == undefined) vals.push(''); + else vals.push(row[p]); + } + writable.write(vals.join('\t')); + writable.write('\n'); + } + + for (let f in in_filenames) { + await airr.read_rearrangement(in_filenames[f], null, got_row, false, debug) + .catch(function(error) { Promise.reject(error); }); + } + + writable.end(); + return resolve(); + }); } - airr.validate_rearrangement = function(file) { - return null; + airr.validate_rearrangement = async function(filename, debug=false) { + var got_row = function(row) { }; + return airr.read_rearrangement(filename, null, got_row, true, true, debug); } airr.read_airr = function(filename, validate=false, model=true, debug=false) { diff --git a/lang/js/schema.js b/lang/js/schema.js index 66326d5ab..541dec6fe 100644 --- a/lang/js/schema.js +++ b/lang/js/schema.js @@ -243,7 +243,14 @@ module.exports = function(airr, schema) { // airr.SchemaDefinition.prototype.validate_header = function(header) { - return false; + var missing_fields = []; + + // Check required fields + for (let f in this.required) { + if (!header.includes(this.required[f])) return missing_fields.push(this.required[f]); + } + if (missing_fields.length == 0) return null; + else return missing_fields; } airr.SchemaDefinition.prototype.validate_row = function(row) { diff --git a/lang/js/tests/airr.test.js b/lang/js/tests/airr.test.js index 5dbc78019..897e5502d 100644 --- a/lang/js/tests/airr.test.js +++ b/lang/js/tests/airr.test.js @@ -27,24 +27,91 @@ var combined_yaml = path.resolve(data_path, 'good_combined_airr.yaml') var combined_json = path.resolve(data_path, 'good_combined_airr.json') // Output data +var output_rearrangement_good = path.resolve(data_path, 'output_good_rearrangement.tsv') +var output_rearrangement_good_gz = path.resolve(data_path, 'output_good_rearrangement.tsv.gz') //var output_rep = os.path.join(data_path, 'output_rep.json') var output_good = path.resolve(data_path, 'output_data.json') var output_good_yaml = path.resolve(data_path, 'output_data.yaml') //var output_blank = os.path.join(data_path, 'output_blank.json') test('load schema', async () => { - const schema = await airr.load_schema(); - expect(schema).not.toBeNull(); + const schema = await airr.load_schema(); + expect(schema).not.toBeNull(); }); test('load good airr yaml', () => { - const data = airr.read_airr(rep_good, true); - expect(data).not.toBeNull(); + const data = airr.read_airr(rep_good, true); + expect(data).not.toBeNull(); }); -test('load good rearrangement tsv', () => { - const data = airr.load_rearrangement(rearrangement_good, true); - expect(data).not.toBeNull(); +test('load good rearrangement tsv', async () => { + const data = await airr.load_rearrangement(rearrangement_good, true); + expect(data).not.toBeNull(); +}); + +test('write good AIRR Rearrangement TSV', async () => { + const data = await airr.load_rearrangement(rearrangement_good, true); + expect(data).not.toBeNull(); + + var idx = 0; + var row_callback = function(fields) { + if (idx >= data.length) return null; + else return data[idx++]; + }; + + await airr.create_rearrangement(output_rearrangement_good, row_callback); + + const new_data = await airr.load_rearrangement(output_rearrangement_good, true); + expect(new_data).not.toBeNull(); +}); + +test('dump good AIRR Rearrangement TSV', async () => { + const data = await airr.load_rearrangement(rearrangement_good, true); + expect(data).not.toBeNull(); + + await airr.dump_rearrangement(data, output_rearrangement_good); + + const new_data = await airr.load_rearrangement(output_rearrangement_good, true); + expect(new_data).not.toBeNull(); +}); + +test('derive good AIRR Rearrangement TSV', async () => { + const data = await airr.load_rearrangement(rearrangement_good, true); + expect(data).not.toBeNull(); + + var idx = 0; + var row_callback = function(fields) { + if (idx >= data.length) return null; + else return data[idx++]; + }; + + await airr.derive_rearrangement(output_rearrangement_good, rearrangement_good, row_callback); + + const new_data = await airr.load_rearrangement(output_rearrangement_good, true); + expect(new_data).not.toBeNull(); +}); + +test('validate good rearrangement tsv', async () => { + let isValid = true; + const data = await airr.validate_rearrangement(rearrangement_good, true) + .catch(function(error) { console.error(error); isValid = false; }); + expect(data).not.toBeNull(); + expect(isValid).toBe(true); +}); + +test('validate bad rearrangement tsv', async () => { + let isValid = true; + const data = await airr.validate_rearrangement(rearrangement_bad, true) + .catch(function(error) { console.error(error); isValid = false; }); + expect(isValid).toBe(false); +}); + +test('merge good AIRR Rearrangement TSV', async () => { + var in_files = [ rearrangement_good, rearrangement_good, rearrangement_good]; + await airr.merge_rearrangement(output_rearrangement_good, in_files, false, true); + + const new_data = await airr.load_rearrangement(output_rearrangement_good, true); + expect(new_data).not.toBeNull(); }); test('write good AIRR DataFile', () => { From fe105773669658b4be691acaa04f0ba7fd59354a Mon Sep 17 00:00:00 2001 From: Scott Christley Date: Thu, 17 Oct 2024 11:08:26 -0500 Subject: [PATCH 45/59] additional doc --- lang/js/README.rst | 40 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/lang/js/README.rst b/lang/js/README.rst index 03cc88f3e..fa5ff9f9c 100644 --- a/lang/js/README.rst +++ b/lang/js/README.rst @@ -42,6 +42,9 @@ like webpack to provide an alternative entry point, and browser code can import import { airr } from 'airr-js'; +The read and write functions for AIRR Rearrangement TSV files support gzip compressed +data. File names that end with ``.gz`` extension will automatically be uncompressed +when reading or automatically compressed when writing. Create Blank Template Schema Objects (browser, nodejs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -105,10 +108,11 @@ there is no streaming interface:: Reading AIRR Rearrangement TSV files (nodejs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The ``airr-js`` package contains functions to read and write AIRR Rearrangement +The ``airr-js`` package contains functions to read AIRR Rearrangement TSV files as either a stream or the complete file. The streaming interface requires two callback functions to be provided; one for the header and another for each -row as it is read:: +row as it is read. The callback functions can be synchronous or they can +return a Promise:: var airr = require('airr-js'); await airr.load_schema(); @@ -125,4 +129,34 @@ row as it is read:: Writing AIRR Rearrangement TSV files (nodejs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -To be implemented. These write functions will been implemented in a patch release. +The ``airr-js`` package contains functions to write AIRR Rearrangement +TSV files as either a stream or the complete file. The streaming interface requires +a callback function which provides the data for each row or returns ``null`` to indicate +no more data. The callback function can be synchronous or it can return a Promise:: + + var airr = require('airr-js'); + await airr.load_schema(); + + // read some data + var data = await airr.load_rearrangement('input.airr.tsv'); + + // write file completely + var data = await airr.load_rearrangement(data, 'output.airr.tsv'); + + // for streaming, need a callback function to provide the row data + var idx = 0; + var row_callback = function(fields) { + if (idx >= data.length) return null; + else return data[idx++]; + }; + // write the file + await airr.create_rearrangement('output.airr.tsv', row_callback) + + // callback function which returns a promise + var row_callback = function(fields) { + return new Promise(function(resolve, reject) { + // acquire some data asynchronously, e.g. from a database + row = await read_from_database(); + return resolve(row); + }); + }; From a8bb911473cb4923a8d9028418551115119b6610 Mon Sep 17 00:00:00 2001 From: Scott Christley Date: Thu, 17 Oct 2024 11:14:26 -0500 Subject: [PATCH 46/59] update version --- lang/js/NEWS.rst | 2 +- lang/js/package.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lang/js/NEWS.rst b/lang/js/NEWS.rst index 93791d62e..e40acf150 100644 --- a/lang/js/NEWS.rst +++ b/lang/js/NEWS.rst @@ -1,4 +1,4 @@ -Version 1.5.0: DATE +Version 2.0.0: DATE -------------------------------------------------------------------------------- Initial release. diff --git a/lang/js/package.json b/lang/js/package.json index 231a903b1..3f771f1d3 100644 --- a/lang/js/package.json +++ b/lang/js/package.json @@ -1,6 +1,6 @@ { "name": "airr-js", - "version": "1.4.1", + "version": "2.0.0", "description": "AIRR Community Data Representation Standard reference library for antibody and TCR sequencing data.", "author": [ { From 0e091db4c99c9242feca8649623253aba7a93147 Mon Sep 17 00:00:00 2001 From: Scott Christley Date: Thu, 17 Oct 2024 16:08:33 -0500 Subject: [PATCH 47/59] Create node.js.yml --- .github/workflows/node.js.yml | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 .github/workflows/node.js.yml diff --git a/.github/workflows/node.js.yml b/.github/workflows/node.js.yml new file mode 100644 index 000000000..198ea1695 --- /dev/null +++ b/.github/workflows/node.js.yml @@ -0,0 +1,35 @@ +# This workflow will do a clean installation of node dependencies, cache/restore them, build the source code and run tests across different versions of node +# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-nodejs + +name: Node.js CI + +on: + push: + branches: [ "master" ] + pull_request: + branches: [ "master" ] + +defaults: + run: + working-directory: lang/js + +jobs: + build: + + runs-on: ubuntu-latest + + strategy: + matrix: + node-version: [18.x, 20.x, 22.x] + # See supported Node.js release schedule at https://nodejs.org/en/about/releases/ + + steps: + - uses: actions/checkout@v4 + - name: Use Node.js ${{ matrix.node-version }} + uses: actions/setup-node@v4 + with: + node-version: ${{ matrix.node-version }} + cache: 'npm' + - run: npm ci + - run: npm run build --if-present + - run: npm test From ae333a08d598b43bf8f11a91bfb580220a1c9348 Mon Sep 17 00:00:00 2001 From: Scott Christley Date: Thu, 17 Oct 2024 16:42:24 -0500 Subject: [PATCH 48/59] sync with v1.5 --- lang/js/schema.js | 49 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/lang/js/schema.js b/lang/js/schema.js index 541dec6fe..a995944ac 100644 --- a/lang/js/schema.js +++ b/lang/js/schema.js @@ -320,6 +320,51 @@ module.exports = function(airr, schema) { return (obj); } + // + // Utility functions for supporting ADC queries + // + + // Given a field in the query point syntax + // traverse the schema and return the field definition + airr.specForQueryField = function(schema, field) { + //console.log(JSON.stringify(schema, null, 2)); + var content_properties = null; + if (field != undefined) { + var props = schema; + + // traverse down the object schema hierarchy to find field definition + let objs = field.split('.'); + for (let i = 0; i < objs.length; ++i) { + let p = objs[i]; + if (props.type == 'array') { + if (props.items.type == 'object') { + props = props.items.properties[p]; + } else if (props.items['allOf'] != undefined) { + var new_props = undefined; + for (let j = 0; j < props.items['allOf'].length; ++j) { + if (props.items['allOf'][j].properties != undefined) + if (props.items['allOf'][j].properties[p] != undefined) { + new_props = props.items['allOf'][j].properties[p]; + break; + } + } + props = new_props; + } + } else if (props.type == 'object') { + props = props.properties[p]; + } else props = undefined; + if (props == undefined) break; + } + + // field definition must have type + if (props != undefined) { + if (props['type'] != undefined) { + content_properties = props; + } + } + } + return content_properties; + } // Given a field, check if included in field set // Field sets include: @@ -401,7 +446,7 @@ module.exports = function(airr, schema) { break; default: // unhandled schema structure - console.error('VDJServer ADC API INFO: Unhandled schema structure: ' + full_field); + console.error('airr-js internal error (airr.collectFields): Unhandled schema structure: ' + full_field); break; } } @@ -449,7 +494,7 @@ module.exports = function(airr, schema) { obj = obj[path[p]]; } else if (obj[path[p]] != undefined) obj = obj[path[p]]; else if (p == path.length - 1) obj[path[p]] = null; - else console.error('VDJServer ADC API ERROR: Internal error (addFields) do not know how to handle path element: ' + p); + else console.error('airr-js internal error (airr.addFields): Unhandled path element: ' + p); } } }; From 2a5446f7f1a6b3a4bea2440e068200e5705ed181 Mon Sep 17 00:00:00 2001 From: Scott Christley Date: Thu, 17 Oct 2024 16:42:43 -0500 Subject: [PATCH 49/59] add lock file --- lang/js/package-lock.json | 3481 +++++++++++++++++++++++++++++++++++++ 1 file changed, 3481 insertions(+) create mode 100644 lang/js/package-lock.json diff --git a/lang/js/package-lock.json b/lang/js/package-lock.json new file mode 100644 index 000000000..88c3c24cf --- /dev/null +++ b/lang/js/package-lock.json @@ -0,0 +1,3481 @@ +{ + "name": "airr-js", + "version": "2.0.0", + "lockfileVersion": 1, + "requires": true, + "dependencies": { + "@ampproject/remapping": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/@ampproject/remapping/-/remapping-2.3.0.tgz", + "integrity": "sha512-30iZtAPgz+LTIYoeivqYo853f02jBYSd5uGnGpkFV0M3xOt9aN73erkgYAmZU43x4VfqcnLxW9Kpg3R5LC4YYw==", + "dev": true, + "requires": { + "@jridgewell/gen-mapping": "^0.3.5", + "@jridgewell/trace-mapping": "^0.3.24" + } + }, + "@apidevtools/json-schema-ref-parser": { + "version": "10.1.0", + "resolved": "https://registry.npmjs.org/@apidevtools/json-schema-ref-parser/-/json-schema-ref-parser-10.1.0.tgz", + "integrity": "sha512-3e+viyMuXdrcK8v5pvP+SDoAQ77FH6OyRmuK48SZKmdHJRFm87RsSs8qm6kP39a/pOPURByJw+OXzQIqcfmKtA==", + "requires": { + "@jsdevtools/ono": "^7.1.3", + "@types/json-schema": "^7.0.11", + "@types/lodash.clonedeep": "^4.5.7", + "js-yaml": "^4.1.0", + "lodash.clonedeep": "^4.5.0" + }, + "dependencies": { + "js-yaml": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz", + "integrity": "sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==", + "requires": { + "argparse": "^2.0.1" + } + } + } + }, + "@babel/code-frame": { + "version": "7.12.11", + "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.12.11.tgz", + "integrity": "sha512-Zt1yodBx1UcyiePMSkWnU4hPqhwq7hGi2nFL1LeA3EUl+q2LQx16MISgJ0+z7dnmgvP9QtIleuETGOiOH1RcIw==", + "dev": true, + "requires": { + "@babel/highlight": "^7.10.4" + } + }, + "@babel/compat-data": { + "version": "7.25.8", + "resolved": "https://registry.npmjs.org/@babel/compat-data/-/compat-data-7.25.8.tgz", + "integrity": "sha512-ZsysZyXY4Tlx+Q53XdnOFmqwfB9QDTHYxaZYajWRoBLuLEAwI2UIbtxOjWh/cFaa9IKUlcB+DDuoskLuKu56JA==", + "dev": true + }, + "@babel/core": { + "version": "7.25.8", + "resolved": "https://registry.npmjs.org/@babel/core/-/core-7.25.8.tgz", + "integrity": "sha512-Oixnb+DzmRT30qu9d3tJSQkxuygWm32DFykT4bRoORPa9hZ/L4KhVB/XiRm6KG+roIEM7DBQlmg27kw2HZkdZg==", + "dev": true, + "requires": { + "@ampproject/remapping": "^2.2.0", + "@babel/code-frame": "^7.25.7", + "@babel/generator": "^7.25.7", + "@babel/helper-compilation-targets": "^7.25.7", + "@babel/helper-module-transforms": "^7.25.7", + "@babel/helpers": "^7.25.7", + "@babel/parser": "^7.25.8", + "@babel/template": "^7.25.7", + "@babel/traverse": "^7.25.7", + "@babel/types": "^7.25.8", + "convert-source-map": "^2.0.0", + "debug": "^4.1.0", + "gensync": "^1.0.0-beta.2", + "json5": "^2.2.3", + "semver": "^6.3.1" + }, + "dependencies": { + "@babel/code-frame": { + "version": "7.25.7", + "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.25.7.tgz", + "integrity": "sha512-0xZJFNE5XMpENsgfHYTw8FbX4kv53mFLn2i3XPoq69LyhYSCBJtitaHx9QnsVTrsogI4Z3+HtEfZ2/GFPOtf5g==", + "dev": true, + "requires": { + "@babel/highlight": "^7.25.7", + "picocolors": "^1.0.0" + } + }, + "semver": { + "version": "6.3.1", + "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz", + "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==", + "dev": true + } + } + }, + "@babel/generator": { + "version": "7.25.7", + "resolved": "https://registry.npmjs.org/@babel/generator/-/generator-7.25.7.tgz", + "integrity": "sha512-5Dqpl5fyV9pIAD62yK9P7fcA768uVPUyrQmqpqstHWgMma4feF1x/oFysBCVZLY5wJ2GkMUCdsNDnGZrPoR6rA==", + "dev": true, + "requires": { + "@babel/types": "^7.25.7", + "@jridgewell/gen-mapping": "^0.3.5", + "@jridgewell/trace-mapping": "^0.3.25", + "jsesc": "^3.0.2" + } + }, + "@babel/helper-compilation-targets": { + "version": "7.25.7", + "resolved": "https://registry.npmjs.org/@babel/helper-compilation-targets/-/helper-compilation-targets-7.25.7.tgz", + "integrity": "sha512-DniTEax0sv6isaw6qSQSfV4gVRNtw2rte8HHM45t9ZR0xILaufBRNkpMifCRiAPyvL4ACD6v0gfCwCmtOQaV4A==", + "dev": true, + "requires": { + "@babel/compat-data": "^7.25.7", + "@babel/helper-validator-option": "^7.25.7", + "browserslist": "^4.24.0", + "lru-cache": "^5.1.1", + "semver": "^6.3.1" + }, + "dependencies": { + "semver": { + "version": "6.3.1", + "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz", + "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==", + "dev": true + } + } + }, + "@babel/helper-module-imports": { + "version": "7.25.7", + "resolved": "https://registry.npmjs.org/@babel/helper-module-imports/-/helper-module-imports-7.25.7.tgz", + "integrity": "sha512-o0xCgpNmRohmnoWKQ0Ij8IdddjyBFE4T2kagL/x6M3+4zUgc+4qTOUBoNe4XxDskt1HPKO007ZPiMgLDq2s7Kw==", + "dev": true, + "requires": { + "@babel/traverse": "^7.25.7", + "@babel/types": "^7.25.7" + } + }, + "@babel/helper-module-transforms": { + "version": "7.25.7", + "resolved": "https://registry.npmjs.org/@babel/helper-module-transforms/-/helper-module-transforms-7.25.7.tgz", + "integrity": "sha512-k/6f8dKG3yDz/qCwSM+RKovjMix563SLxQFo0UhRNo239SP6n9u5/eLtKD6EAjwta2JHJ49CsD8pms2HdNiMMQ==", + "dev": true, + "requires": { + "@babel/helper-module-imports": "^7.25.7", + "@babel/helper-simple-access": "^7.25.7", + "@babel/helper-validator-identifier": "^7.25.7", + "@babel/traverse": "^7.25.7" + } + }, + "@babel/helper-plugin-utils": { + "version": "7.25.7", + "resolved": "https://registry.npmjs.org/@babel/helper-plugin-utils/-/helper-plugin-utils-7.25.7.tgz", + "integrity": "sha512-eaPZai0PiqCi09pPs3pAFfl/zYgGaE6IdXtYvmf0qlcDTd3WCtO7JWCcRd64e0EQrcYgiHibEZnOGsSY4QSgaw==", + "dev": true + }, + "@babel/helper-simple-access": { + "version": "7.25.7", + "resolved": "https://registry.npmjs.org/@babel/helper-simple-access/-/helper-simple-access-7.25.7.tgz", + "integrity": "sha512-FPGAkJmyoChQeM+ruBGIDyrT2tKfZJO8NcxdC+CWNJi7N8/rZpSxK7yvBJ5O/nF1gfu5KzN7VKG3YVSLFfRSxQ==", + "dev": true, + "requires": { + "@babel/traverse": "^7.25.7", + "@babel/types": "^7.25.7" + } + }, + "@babel/helper-string-parser": { + "version": "7.25.7", + "resolved": "https://registry.npmjs.org/@babel/helper-string-parser/-/helper-string-parser-7.25.7.tgz", + "integrity": "sha512-CbkjYdsJNHFk8uqpEkpCvRs3YRp9tY6FmFY7wLMSYuGYkrdUi7r2lc4/wqsvlHoMznX3WJ9IP8giGPq68T/Y6g==", + "dev": true + }, + "@babel/helper-validator-identifier": { + "version": "7.25.7", + "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.25.7.tgz", + "integrity": "sha512-AM6TzwYqGChO45oiuPqwL2t20/HdMC1rTPAesnBCgPCSF1x3oN9MVUwQV2iyz4xqWrctwK5RNC8LV22kaQCNYg==", + "dev": true + }, + "@babel/helper-validator-option": { + "version": "7.25.7", + "resolved": "https://registry.npmjs.org/@babel/helper-validator-option/-/helper-validator-option-7.25.7.tgz", + "integrity": "sha512-ytbPLsm+GjArDYXJ8Ydr1c/KJuutjF2besPNbIZnZ6MKUxi/uTA22t2ymmA4WFjZFpjiAMO0xuuJPqK2nvDVfQ==", + "dev": true + }, + "@babel/helpers": { + "version": "7.25.7", + "resolved": "https://registry.npmjs.org/@babel/helpers/-/helpers-7.25.7.tgz", + "integrity": "sha512-Sv6pASx7Esm38KQpF/U/OXLwPPrdGHNKoeblRxgZRLXnAtnkEe4ptJPDtAZM7fBLadbc1Q07kQpSiGQ0Jg6tRA==", + "dev": true, + "requires": { + "@babel/template": "^7.25.7", + "@babel/types": "^7.25.7" + } + }, + "@babel/highlight": { + "version": "7.25.7", + "resolved": "https://registry.npmjs.org/@babel/highlight/-/highlight-7.25.7.tgz", + "integrity": "sha512-iYyACpW3iW8Fw+ZybQK+drQre+ns/tKpXbNESfrhNnPLIklLbXr7MYJ6gPEd0iETGLOK+SxMjVvKb/ffmk+FEw==", + "dev": true, + "requires": { + "@babel/helper-validator-identifier": "^7.25.7", + "chalk": "^2.4.2", + "js-tokens": "^4.0.0", + "picocolors": "^1.0.0" + }, + "dependencies": { + "chalk": { + "version": "2.4.2", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-2.4.2.tgz", + "integrity": "sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==", + "dev": true, + "requires": { + "ansi-styles": "^3.2.1", + "escape-string-regexp": "^1.0.5", + "supports-color": "^5.3.0" + } + }, + "escape-string-regexp": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz", + "integrity": "sha512-vbRorB5FUQWvla16U8R/qgaFIya2qGzwDrNmCZuYKrbdSUMG6I1ZCGQRefkRVhuOkIGVne7BQ35DSfo1qvJqFg==", + "dev": true + } + } + }, + "@babel/parser": { + "version": "7.25.8", + "resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.25.8.tgz", + "integrity": "sha512-HcttkxzdPucv3nNFmfOOMfFf64KgdJVqm1KaCm25dPGMLElo9nsLvXeJECQg8UzPuBGLyTSA0ZzqCtDSzKTEoQ==", + "dev": true, + "requires": { + "@babel/types": "^7.25.8" + } + }, + "@babel/plugin-syntax-async-generators": { + "version": "7.8.4", + "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-async-generators/-/plugin-syntax-async-generators-7.8.4.tgz", + "integrity": "sha512-tycmZxkGfZaxhMRbXlPXuVFpdWlXpir2W4AMhSJgRKzk/eDlIXOhb2LHWoLpDF7TEHylV5zNhykX6KAgHJmTNw==", + "dev": true, + "requires": { + "@babel/helper-plugin-utils": "^7.8.0" + } + }, + "@babel/plugin-syntax-bigint": { + "version": "7.8.3", + "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-bigint/-/plugin-syntax-bigint-7.8.3.tgz", + "integrity": "sha512-wnTnFlG+YxQm3vDxpGE57Pj0srRU4sHE/mDkt1qv2YJJSeUAec2ma4WLUnUPeKjyrfntVwe/N6dCXpU+zL3Npg==", + "dev": true, + "requires": { + "@babel/helper-plugin-utils": "^7.8.0" + } + }, + "@babel/plugin-syntax-class-properties": { + "version": "7.12.13", + "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-class-properties/-/plugin-syntax-class-properties-7.12.13.tgz", + "integrity": "sha512-fm4idjKla0YahUNgFNLCB0qySdsoPiZP3iQE3rky0mBUtMZ23yDJ9SJdg6dXTSDnulOVqiF3Hgr9nbXvXTQZYA==", + "dev": true, + "requires": { + "@babel/helper-plugin-utils": "^7.12.13" + } + }, + "@babel/plugin-syntax-class-static-block": { + "version": "7.14.5", + "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-class-static-block/-/plugin-syntax-class-static-block-7.14.5.tgz", + "integrity": "sha512-b+YyPmr6ldyNnM6sqYeMWE+bgJcJpO6yS4QD7ymxgH34GBPNDM/THBh8iunyvKIZztiwLH4CJZ0RxTk9emgpjw==", + "dev": true, + "requires": { + "@babel/helper-plugin-utils": "^7.14.5" + } + }, + "@babel/plugin-syntax-import-attributes": { + "version": "7.25.7", + "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-import-attributes/-/plugin-syntax-import-attributes-7.25.7.tgz", + "integrity": "sha512-AqVo+dguCgmpi/3mYBdu9lkngOBlQ2w2vnNpa6gfiCxQZLzV4ZbhsXitJ2Yblkoe1VQwtHSaNmIaGll/26YWRw==", + "dev": true, + "requires": { + "@babel/helper-plugin-utils": "^7.25.7" + } + }, + "@babel/plugin-syntax-import-meta": { + "version": "7.10.4", + "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-import-meta/-/plugin-syntax-import-meta-7.10.4.tgz", + "integrity": "sha512-Yqfm+XDx0+Prh3VSeEQCPU81yC+JWZ2pDPFSS4ZdpfZhp4MkFMaDC1UqseovEKwSUpnIL7+vK+Clp7bfh0iD7g==", + "dev": true, + "requires": { + "@babel/helper-plugin-utils": "^7.10.4" + } + }, + "@babel/plugin-syntax-json-strings": { + "version": "7.8.3", + "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-json-strings/-/plugin-syntax-json-strings-7.8.3.tgz", + "integrity": "sha512-lY6kdGpWHvjoe2vk4WrAapEuBR69EMxZl+RoGRhrFGNYVK8mOPAW8VfbT/ZgrFbXlDNiiaxQnAtgVCZ6jv30EA==", + "dev": true, + "requires": { + "@babel/helper-plugin-utils": "^7.8.0" + } + }, + "@babel/plugin-syntax-jsx": { + "version": "7.25.7", + "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-jsx/-/plugin-syntax-jsx-7.25.7.tgz", + "integrity": "sha512-ruZOnKO+ajVL/MVx+PwNBPOkrnXTXoWMtte1MBpegfCArhqOe3Bj52avVj1huLLxNKYKXYaSxZ2F+woK1ekXfw==", + "dev": true, + "requires": { + "@babel/helper-plugin-utils": "^7.25.7" + } + }, + "@babel/plugin-syntax-logical-assignment-operators": { + "version": "7.10.4", + "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-logical-assignment-operators/-/plugin-syntax-logical-assignment-operators-7.10.4.tgz", + "integrity": "sha512-d8waShlpFDinQ5MtvGU9xDAOzKH47+FFoney2baFIoMr952hKOLp1HR7VszoZvOsV/4+RRszNY7D17ba0te0ig==", + "dev": true, + "requires": { + "@babel/helper-plugin-utils": "^7.10.4" + } + }, + "@babel/plugin-syntax-nullish-coalescing-operator": { + "version": "7.8.3", + "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-nullish-coalescing-operator/-/plugin-syntax-nullish-coalescing-operator-7.8.3.tgz", + "integrity": "sha512-aSff4zPII1u2QD7y+F8oDsz19ew4IGEJg9SVW+bqwpwtfFleiQDMdzA/R+UlWDzfnHFCxxleFT0PMIrR36XLNQ==", + "dev": true, + "requires": { + "@babel/helper-plugin-utils": "^7.8.0" + } + }, + "@babel/plugin-syntax-numeric-separator": { + "version": "7.10.4", + "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-numeric-separator/-/plugin-syntax-numeric-separator-7.10.4.tgz", + "integrity": "sha512-9H6YdfkcK/uOnY/K7/aA2xpzaAgkQn37yzWUMRK7OaPOqOpGS1+n0H5hxT9AUw9EsSjPW8SVyMJwYRtWs3X3ug==", + "dev": true, + "requires": { + "@babel/helper-plugin-utils": "^7.10.4" + } + }, + "@babel/plugin-syntax-object-rest-spread": { + "version": "7.8.3", + "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-object-rest-spread/-/plugin-syntax-object-rest-spread-7.8.3.tgz", + "integrity": "sha512-XoqMijGZb9y3y2XskN+P1wUGiVwWZ5JmoDRwx5+3GmEplNyVM2s2Dg8ILFQm8rWM48orGy5YpI5Bl8U1y7ydlA==", + "dev": true, + "requires": { + "@babel/helper-plugin-utils": "^7.8.0" + } + }, + "@babel/plugin-syntax-optional-catch-binding": { + "version": "7.8.3", + "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-optional-catch-binding/-/plugin-syntax-optional-catch-binding-7.8.3.tgz", + "integrity": "sha512-6VPD0Pc1lpTqw0aKoeRTMiB+kWhAoT24PA+ksWSBrFtl5SIRVpZlwN3NNPQjehA2E/91FV3RjLWoVTglWcSV3Q==", + "dev": true, + "requires": { + "@babel/helper-plugin-utils": "^7.8.0" + } + }, + "@babel/plugin-syntax-optional-chaining": { + "version": "7.8.3", + "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-optional-chaining/-/plugin-syntax-optional-chaining-7.8.3.tgz", + "integrity": "sha512-KoK9ErH1MBlCPxV0VANkXW2/dw4vlbGDrFgz8bmUsBGYkFRcbRwMh6cIJubdPrkxRwuGdtCk0v/wPTKbQgBjkg==", + "dev": true, + "requires": { + "@babel/helper-plugin-utils": "^7.8.0" + } + }, + "@babel/plugin-syntax-private-property-in-object": { + "version": "7.14.5", + "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-private-property-in-object/-/plugin-syntax-private-property-in-object-7.14.5.tgz", + "integrity": "sha512-0wVnp9dxJ72ZUJDV27ZfbSj6iHLoytYZmh3rFcxNnvsJF3ktkzLDZPy/mA17HGsaQT3/DQsWYX1f1QGWkCoVUg==", + "dev": true, + "requires": { + "@babel/helper-plugin-utils": "^7.14.5" + } + }, + "@babel/plugin-syntax-top-level-await": { + "version": "7.14.5", + "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-top-level-await/-/plugin-syntax-top-level-await-7.14.5.tgz", + "integrity": "sha512-hx++upLv5U1rgYfwe1xBQUhRmU41NEvpUvrp8jkrSCdvGSnM5/qdRMtylJ6PG5OFkBaHkbTAKTnd3/YyESRHFw==", + "dev": true, + "requires": { + "@babel/helper-plugin-utils": "^7.14.5" + } + }, + "@babel/plugin-syntax-typescript": { + "version": "7.25.7", + "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-typescript/-/plugin-syntax-typescript-7.25.7.tgz", + "integrity": "sha512-rR+5FDjpCHqqZN2bzZm18bVYGaejGq5ZkpVCJLXor/+zlSrSoc4KWcHI0URVWjl/68Dyr1uwZUz/1njycEAv9g==", + "dev": true, + "requires": { + "@babel/helper-plugin-utils": "^7.25.7" + } + }, + "@babel/template": { + "version": "7.25.7", + "resolved": "https://registry.npmjs.org/@babel/template/-/template-7.25.7.tgz", + "integrity": "sha512-wRwtAgI3bAS+JGU2upWNL9lSlDcRCqD05BZ1n3X2ONLH1WilFP6O1otQjeMK/1g0pvYcXC7b/qVUB1keofjtZA==", + "dev": true, + "requires": { + "@babel/code-frame": "^7.25.7", + "@babel/parser": "^7.25.7", + "@babel/types": "^7.25.7" + }, + "dependencies": { + "@babel/code-frame": { + "version": "7.25.7", + "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.25.7.tgz", + "integrity": "sha512-0xZJFNE5XMpENsgfHYTw8FbX4kv53mFLn2i3XPoq69LyhYSCBJtitaHx9QnsVTrsogI4Z3+HtEfZ2/GFPOtf5g==", + "dev": true, + "requires": { + "@babel/highlight": "^7.25.7", + "picocolors": "^1.0.0" + } + } + } + }, + "@babel/traverse": { + "version": "7.25.7", + "resolved": "https://registry.npmjs.org/@babel/traverse/-/traverse-7.25.7.tgz", + "integrity": "sha512-jatJPT1Zjqvh/1FyJs6qAHL+Dzb7sTb+xr7Q+gM1b+1oBsMsQQ4FkVKb6dFlJvLlVssqkRzV05Jzervt9yhnzg==", + "dev": true, + "requires": { + "@babel/code-frame": "^7.25.7", + "@babel/generator": "^7.25.7", + "@babel/parser": "^7.25.7", + "@babel/template": "^7.25.7", + "@babel/types": "^7.25.7", + "debug": "^4.3.1", + "globals": "^11.1.0" + }, + "dependencies": { + "@babel/code-frame": { + "version": "7.25.7", + "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.25.7.tgz", + "integrity": "sha512-0xZJFNE5XMpENsgfHYTw8FbX4kv53mFLn2i3XPoq69LyhYSCBJtitaHx9QnsVTrsogI4Z3+HtEfZ2/GFPOtf5g==", + "dev": true, + "requires": { + "@babel/highlight": "^7.25.7", + "picocolors": "^1.0.0" + } + }, + "globals": { + "version": "11.12.0", + "resolved": "https://registry.npmjs.org/globals/-/globals-11.12.0.tgz", + "integrity": "sha512-WOBp/EEGUiIsJSp7wcv/y6MO+lV9UoncWqxuFfm8eBwzWNgyfBd6Gz+IeKQ9jCmyhoH99g15M3T+QaVHFjizVA==", + "dev": true + } + } + }, + "@babel/types": { + "version": "7.25.8", + "resolved": "https://registry.npmjs.org/@babel/types/-/types-7.25.8.tgz", + "integrity": "sha512-JWtuCu8VQsMladxVz/P4HzHUGCAwpuqacmowgXFs5XjxIgKuNjnLokQzuVjlTvIzODaDmpjT3oxcC48vyk9EWg==", + "dev": true, + "requires": { + "@babel/helper-string-parser": "^7.25.7", + "@babel/helper-validator-identifier": "^7.25.7", + "to-fast-properties": "^2.0.0" + } + }, + "@bcoe/v8-coverage": { + "version": "0.2.3", + "resolved": "https://registry.npmjs.org/@bcoe/v8-coverage/-/v8-coverage-0.2.3.tgz", + "integrity": "sha512-0hYQ8SB4Db5zvZB4axdMHGwEaQjkZzFjQiN9LVYvIFB2nSUHW9tYpxWriPrWDASIxiaXax83REcLxuSdnGPZtw==", + "dev": true + }, + "@eslint/eslintrc": { + "version": "0.4.3", + "resolved": "https://registry.npmjs.org/@eslint/eslintrc/-/eslintrc-0.4.3.tgz", + "integrity": "sha512-J6KFFz5QCYUJq3pf0mjEcCJVERbzv71PUIDczuh9JkwGEzced6CO5ADLHB1rbf/+oPBtoPfMYNOpGDzCANlbXw==", + "dev": true, + "requires": { + "ajv": "^6.12.4", + "debug": "^4.1.1", + "espree": "^7.3.0", + "globals": "^13.9.0", + "ignore": "^4.0.6", + "import-fresh": "^3.2.1", + "js-yaml": "^3.13.1", + "minimatch": "^3.0.4", + "strip-json-comments": "^3.1.1" + }, + "dependencies": { + "ajv": { + "version": "6.12.6", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz", + "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==", + "dev": true, + "requires": { + "fast-deep-equal": "^3.1.1", + "fast-json-stable-stringify": "^2.0.0", + "json-schema-traverse": "^0.4.1", + "uri-js": "^4.2.2" + } + }, + "json-schema-traverse": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz", + "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==", + "dev": true + } + } + }, + "@ewoudenberg/difflib": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/@ewoudenberg/difflib/-/difflib-0.1.0.tgz", + "integrity": "sha512-OU5P5mJyD3OoWYMWY+yIgwvgNS9cFAU10f+DDuvtogcWQOoJIsQ4Hy2McSfUfhKjq8L0FuWVb4Rt7kgA+XK86A==", + "requires": { + "heap": ">= 0.2.0" + } + }, + "@humanwhocodes/config-array": { + "version": "0.5.0", + "resolved": "https://registry.npmjs.org/@humanwhocodes/config-array/-/config-array-0.5.0.tgz", + "integrity": "sha512-FagtKFz74XrTl7y6HCzQpwDfXP0yhxe9lHLD1UZxjvZIcbyRz8zTFF/yYNfSfzU414eDwZ1SrO0Qvtyf+wFMQg==", + "dev": true, + "requires": { + "@humanwhocodes/object-schema": "^1.2.0", + "debug": "^4.1.1", + "minimatch": "^3.0.4" + } + }, + "@humanwhocodes/object-schema": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/@humanwhocodes/object-schema/-/object-schema-1.2.1.tgz", + "integrity": "sha512-ZnQMnLV4e7hDlUvw8H+U8ASL02SS2Gn6+9Ac3wGGLIe7+je2AeAOxPY+izIPJDfFDb7eDjev0Us8MO1iFRN8hA==", + "dev": true + }, + "@istanbuljs/load-nyc-config": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@istanbuljs/load-nyc-config/-/load-nyc-config-1.1.0.tgz", + "integrity": "sha512-VjeHSlIzpv/NyD3N0YuHfXOPDIixcA1q2ZV98wsMqcYlPmv2n3Yb2lYP9XMElnaFVXg5A7YLTeLu6V84uQDjmQ==", + "dev": true, + "requires": { + "camelcase": "^5.3.1", + "find-up": "^4.1.0", + "get-package-type": "^0.1.0", + "js-yaml": "^3.13.1", + "resolve-from": "^5.0.0" + }, + "dependencies": { + "resolve-from": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-5.0.0.tgz", + "integrity": "sha512-qYg9KP24dD5qka9J47d0aVky0N+b4fTU89LN9iDnjB5waksiC49rvMB0PrUJQGoTmH50XPiqOvAjDfaijGxYZw==", + "dev": true + } + } + }, + "@istanbuljs/schema": { + "version": "0.1.3", + "resolved": "https://registry.npmjs.org/@istanbuljs/schema/-/schema-0.1.3.tgz", + "integrity": "sha512-ZXRY4jNvVgSVQ8DL3LTcakaAtXwTVUxE81hslsyD2AtoXW/wVob10HkOJ1X/pAlcI7D+2YoZKg5do8G/w6RYgA==", + "dev": true + }, + "@jest/console": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/@jest/console/-/console-29.7.0.tgz", + "integrity": "sha512-5Ni4CU7XHQi32IJ398EEP4RrB8eV09sXP2ROqD4bksHrnTree52PsxvX8tpL8LvTZ3pFzXyPbNQReSN41CAhOg==", + "dev": true, + "requires": { + "@jest/types": "^29.6.3", + "@types/node": "*", + "chalk": "^4.0.0", + "jest-message-util": "^29.7.0", + "jest-util": "^29.7.0", + "slash": "^3.0.0" + } + }, + "@jest/core": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/@jest/core/-/core-29.7.0.tgz", + "integrity": "sha512-n7aeXWKMnGtDA48y8TLWJPJmLmmZ642Ceo78cYWEpiD7FzDgmNDV/GCVRorPABdXLJZ/9wzzgZAlHjXjxDHGsg==", + "dev": true, + "requires": { + "@jest/console": "^29.7.0", + "@jest/reporters": "^29.7.0", + "@jest/test-result": "^29.7.0", + "@jest/transform": "^29.7.0", + "@jest/types": "^29.6.3", + "@types/node": "*", + "ansi-escapes": "^4.2.1", + "chalk": "^4.0.0", + "ci-info": "^3.2.0", + "exit": "^0.1.2", + "graceful-fs": "^4.2.9", + "jest-changed-files": "^29.7.0", + "jest-config": "^29.7.0", + "jest-haste-map": "^29.7.0", + "jest-message-util": "^29.7.0", + "jest-regex-util": "^29.6.3", + "jest-resolve": "^29.7.0", + "jest-resolve-dependencies": "^29.7.0", + "jest-runner": "^29.7.0", + "jest-runtime": "^29.7.0", + "jest-snapshot": "^29.7.0", + "jest-util": "^29.7.0", + "jest-validate": "^29.7.0", + "jest-watcher": "^29.7.0", + "micromatch": "^4.0.4", + "pretty-format": "^29.7.0", + "slash": "^3.0.0", + "strip-ansi": "^6.0.0" + } + }, + "@jest/environment": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/@jest/environment/-/environment-29.7.0.tgz", + "integrity": "sha512-aQIfHDq33ExsN4jP1NWGXhxgQ/wixs60gDiKO+XVMd8Mn0NWPWgc34ZQDTb2jKaUWQ7MuwoitXAsN2XVXNMpAw==", + "dev": true, + "requires": { + "@jest/fake-timers": "^29.7.0", + "@jest/types": "^29.6.3", + "@types/node": "*", + "jest-mock": "^29.7.0" + } + }, + "@jest/expect": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/@jest/expect/-/expect-29.7.0.tgz", + "integrity": "sha512-8uMeAMycttpva3P1lBHB8VciS9V0XAr3GymPpipdyQXbBcuhkLQOSe8E/p92RyAdToS6ZD1tFkX+CkhoECE0dQ==", + "dev": true, + "requires": { + "expect": "^29.7.0", + "jest-snapshot": "^29.7.0" + } + }, + "@jest/expect-utils": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/@jest/expect-utils/-/expect-utils-29.7.0.tgz", + "integrity": "sha512-GlsNBWiFQFCVi9QVSx7f5AgMeLxe9YCCs5PuP2O2LdjDAA8Jh9eX7lA1Jq/xdXw3Wb3hyvlFNfZIfcRetSzYcA==", + "dev": true, + "requires": { + "jest-get-type": "^29.6.3" + } + }, + "@jest/fake-timers": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/@jest/fake-timers/-/fake-timers-29.7.0.tgz", + "integrity": "sha512-q4DH1Ha4TTFPdxLsqDXK1d3+ioSL7yL5oCMJZgDYm6i+6CygW5E5xVr/D1HdsGxjt1ZWSfUAs9OxSB/BNelWrQ==", + "dev": true, + "requires": { + "@jest/types": "^29.6.3", + "@sinonjs/fake-timers": "^10.0.2", + "@types/node": "*", + "jest-message-util": "^29.7.0", + "jest-mock": "^29.7.0", + "jest-util": "^29.7.0" + } + }, + "@jest/globals": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/@jest/globals/-/globals-29.7.0.tgz", + "integrity": "sha512-mpiz3dutLbkW2MNFubUGUEVLkTGiqW6yLVTA+JbP6fI6J5iL9Y0Nlg8k95pcF8ctKwCS7WVxteBs29hhfAotzQ==", + "dev": true, + "requires": { + "@jest/environment": "^29.7.0", + "@jest/expect": "^29.7.0", + "@jest/types": "^29.6.3", + "jest-mock": "^29.7.0" + } + }, + "@jest/reporters": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/@jest/reporters/-/reporters-29.7.0.tgz", + "integrity": "sha512-DApq0KJbJOEzAFYjHADNNxAE3KbhxQB1y5Kplb5Waqw6zVbuWatSnMjE5gs8FUgEPmNsnZA3NCWl9NG0ia04Pg==", + "dev": true, + "requires": { + "@bcoe/v8-coverage": "^0.2.3", + "@jest/console": "^29.7.0", + "@jest/test-result": "^29.7.0", + "@jest/transform": "^29.7.0", + "@jest/types": "^29.6.3", + "@jridgewell/trace-mapping": "^0.3.18", + "@types/node": "*", + "chalk": "^4.0.0", + "collect-v8-coverage": "^1.0.0", + "exit": "^0.1.2", + "glob": "^7.1.3", + "graceful-fs": "^4.2.9", + "istanbul-lib-coverage": "^3.0.0", + "istanbul-lib-instrument": "^6.0.0", + "istanbul-lib-report": "^3.0.0", + "istanbul-lib-source-maps": "^4.0.0", + "istanbul-reports": "^3.1.3", + "jest-message-util": "^29.7.0", + "jest-util": "^29.7.0", + "jest-worker": "^29.7.0", + "slash": "^3.0.0", + "string-length": "^4.0.1", + "strip-ansi": "^6.0.0", + "v8-to-istanbul": "^9.0.1" + } + }, + "@jest/schemas": { + "version": "29.6.3", + "resolved": "https://registry.npmjs.org/@jest/schemas/-/schemas-29.6.3.tgz", + "integrity": "sha512-mo5j5X+jIZmJQveBKeS/clAueipV7KgiX1vMgCxam1RNYiqE1w62n0/tJJnHtjW8ZHcQco5gY85jA3mi0L+nSA==", + "dev": true, + "requires": { + "@sinclair/typebox": "^0.27.8" + } + }, + "@jest/source-map": { + "version": "29.6.3", + "resolved": "https://registry.npmjs.org/@jest/source-map/-/source-map-29.6.3.tgz", + "integrity": "sha512-MHjT95QuipcPrpLM+8JMSzFx6eHp5Bm+4XeFDJlwsvVBjmKNiIAvasGK2fxz2WbGRlnvqehFbh07MMa7n3YJnw==", + "dev": true, + "requires": { + "@jridgewell/trace-mapping": "^0.3.18", + "callsites": "^3.0.0", + "graceful-fs": "^4.2.9" + } + }, + "@jest/test-result": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/@jest/test-result/-/test-result-29.7.0.tgz", + "integrity": "sha512-Fdx+tv6x1zlkJPcWXmMDAG2HBnaR9XPSd5aDWQVsfrZmLVT3lU1cwyxLgRmXR9yrq4NBoEm9BMsfgFzTQAbJYA==", + "dev": true, + "requires": { + "@jest/console": "^29.7.0", + "@jest/types": "^29.6.3", + "@types/istanbul-lib-coverage": "^2.0.0", + "collect-v8-coverage": "^1.0.0" + } + }, + "@jest/test-sequencer": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/@jest/test-sequencer/-/test-sequencer-29.7.0.tgz", + "integrity": "sha512-GQwJ5WZVrKnOJuiYiAF52UNUJXgTZx1NHjFSEB0qEMmSZKAkdMoIzw/Cj6x6NF4AvV23AUqDpFzQkN/eYCYTxw==", + "dev": true, + "requires": { + "@jest/test-result": "^29.7.0", + "graceful-fs": "^4.2.9", + "jest-haste-map": "^29.7.0", + "slash": "^3.0.0" + } + }, + "@jest/transform": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/@jest/transform/-/transform-29.7.0.tgz", + "integrity": "sha512-ok/BTPFzFKVMwO5eOHRrvnBVHdRy9IrsrW1GpMaQ9MCnilNLXQKmAX8s1YXDFaai9xJpac2ySzV0YeRRECr2Vw==", + "dev": true, + "requires": { + "@babel/core": "^7.11.6", + "@jest/types": "^29.6.3", + "@jridgewell/trace-mapping": "^0.3.18", + "babel-plugin-istanbul": "^6.1.1", + "chalk": "^4.0.0", + "convert-source-map": "^2.0.0", + "fast-json-stable-stringify": "^2.1.0", + "graceful-fs": "^4.2.9", + "jest-haste-map": "^29.7.0", + "jest-regex-util": "^29.6.3", + "jest-util": "^29.7.0", + "micromatch": "^4.0.4", + "pirates": "^4.0.4", + "slash": "^3.0.0", + "write-file-atomic": "^4.0.2" + } + }, + "@jest/types": { + "version": "29.6.3", + "resolved": "https://registry.npmjs.org/@jest/types/-/types-29.6.3.tgz", + "integrity": "sha512-u3UPsIilWKOM3F9CXtrG8LEJmNxwoCQC/XVj4IKYXvvpx7QIi/Kg1LI5uDmDpKlac62NUtX7eLjRh+jVZcLOzw==", + "dev": true, + "requires": { + "@jest/schemas": "^29.6.3", + "@types/istanbul-lib-coverage": "^2.0.0", + "@types/istanbul-reports": "^3.0.0", + "@types/node": "*", + "@types/yargs": "^17.0.8", + "chalk": "^4.0.0" + } + }, + "@jridgewell/gen-mapping": { + "version": "0.3.5", + "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.5.tgz", + "integrity": "sha512-IzL8ZoEDIBRWEzlCcRhOaCupYyN5gdIK+Q6fbFdPDg6HqX6jpkItn7DFIpW9LQzXG6Df9sA7+OKnq0qlz/GaQg==", + "dev": true, + "requires": { + "@jridgewell/set-array": "^1.2.1", + "@jridgewell/sourcemap-codec": "^1.4.10", + "@jridgewell/trace-mapping": "^0.3.24" + } + }, + "@jridgewell/resolve-uri": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.2.tgz", + "integrity": "sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==", + "dev": true + }, + "@jridgewell/set-array": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/@jridgewell/set-array/-/set-array-1.2.1.tgz", + "integrity": "sha512-R8gLRTZeyp03ymzP/6Lil/28tGeGEzhx1q2k703KGWRAI1VdvPIXdG70VJc2pAMw3NA6JKL5hhFu1sJX0Mnn/A==", + "dev": true + }, + "@jridgewell/sourcemap-codec": { + "version": "1.5.0", + "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.0.tgz", + "integrity": "sha512-gv3ZRaISU3fjPAgNsriBRqGWQL6quFx04YMPW/zD8XMLsU32mhCCbfbO6KZFLjvYpCZ8zyDEgqsgf+PwPaM7GQ==", + "dev": true + }, + "@jridgewell/trace-mapping": { + "version": "0.3.25", + "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.25.tgz", + "integrity": "sha512-vNk6aEwybGtawWmy/PzwnGDOjCkLWSD2wqvjGGAgOAwCGWySYXfYoxt00IJkTF+8Lb57DwOb3Aa0o9CApepiYQ==", + "dev": true, + "requires": { + "@jridgewell/resolve-uri": "^3.1.0", + "@jridgewell/sourcemap-codec": "^1.4.14" + } + }, + "@jsdevtools/ono": { + "version": "7.1.3", + "resolved": "https://registry.npmjs.org/@jsdevtools/ono/-/ono-7.1.3.tgz", + "integrity": "sha512-4JQNk+3mVzK3xh2rqd6RB4J46qUR19azEHBneZyTZM+c456qOrbbM/5xcR8huNCCcbVt7+UmizG6GuUvPvKUYg==" + }, + "@sinclair/typebox": { + "version": "0.27.8", + "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.27.8.tgz", + "integrity": "sha512-+Fj43pSMwJs4KRrH/938Uf+uAELIgVBmQzg/q1YG10djyfA3TnrU8N8XzqCh/okZdszqBQTZf96idMfE5lnwTA==", + "dev": true + }, + "@sinonjs/commons": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/@sinonjs/commons/-/commons-3.0.1.tgz", + "integrity": "sha512-K3mCHKQ9sVh8o1C9cxkwxaOmXoAMlDxC1mYyHrjqOWEcBjYr76t96zL2zlj5dUGZ3HSw240X1qgH3Mjf1yJWpQ==", + "dev": true, + "requires": { + "type-detect": "4.0.8" + } + }, + "@sinonjs/fake-timers": { + "version": "10.3.0", + "resolved": "https://registry.npmjs.org/@sinonjs/fake-timers/-/fake-timers-10.3.0.tgz", + "integrity": "sha512-V4BG07kuYSUkTCSBHG8G8TNhM+F19jXFWnQtzj+we8DrkpSBCee9Z3Ms8yiGer/dlmhe35/Xdgyo3/0rQKg7YA==", + "dev": true, + "requires": { + "@sinonjs/commons": "^3.0.0" + } + }, + "@types/babel__core": { + "version": "7.20.5", + "resolved": "https://registry.npmjs.org/@types/babel__core/-/babel__core-7.20.5.tgz", + "integrity": "sha512-qoQprZvz5wQFJwMDqeseRXWv3rqMvhgpbXFfVyWhbx9X47POIA6i/+dXefEmZKoAgOaTdaIgNSMqMIU61yRyzA==", + "dev": true, + "requires": { + "@babel/parser": "^7.20.7", + "@babel/types": "^7.20.7", + "@types/babel__generator": "*", + "@types/babel__template": "*", + "@types/babel__traverse": "*" + } + }, + "@types/babel__generator": { + "version": "7.6.8", + "resolved": "https://registry.npmjs.org/@types/babel__generator/-/babel__generator-7.6.8.tgz", + "integrity": "sha512-ASsj+tpEDsEiFr1arWrlN6V3mdfjRMZt6LtK/Vp/kreFLnr5QH5+DhvD5nINYZXzwJvXeGq+05iUXcAzVrqWtw==", + "dev": true, + "requires": { + "@babel/types": "^7.0.0" + } + }, + "@types/babel__template": { + "version": "7.4.4", + "resolved": "https://registry.npmjs.org/@types/babel__template/-/babel__template-7.4.4.tgz", + "integrity": "sha512-h/NUaSyG5EyxBIp8YRxo4RMe2/qQgvyowRwVMzhYhBCONbW8PUsg4lkFMrhgZhUe5z3L3MiLDuvyJ/CaPa2A8A==", + "dev": true, + "requires": { + "@babel/parser": "^7.1.0", + "@babel/types": "^7.0.0" + } + }, + "@types/babel__traverse": { + "version": "7.20.6", + "resolved": "https://registry.npmjs.org/@types/babel__traverse/-/babel__traverse-7.20.6.tgz", + "integrity": "sha512-r1bzfrm0tomOI8g1SzvCaQHo6Lcv6zu0EA+W2kHrt8dyrHQxGzBBL4kdkzIS+jBMV+EYcMAEAqXqYaLJq5rOZg==", + "dev": true, + "requires": { + "@babel/types": "^7.20.7" + } + }, + "@types/graceful-fs": { + "version": "4.1.9", + "resolved": "https://registry.npmjs.org/@types/graceful-fs/-/graceful-fs-4.1.9.tgz", + "integrity": "sha512-olP3sd1qOEe5dXTSaFvQG+02VdRXcdytWLAZsAq1PecU8uqQAhkrnbli7DagjtXKW/Bl7YJbUsa8MPcuc8LHEQ==", + "dev": true, + "requires": { + "@types/node": "*" + } + }, + "@types/istanbul-lib-coverage": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/@types/istanbul-lib-coverage/-/istanbul-lib-coverage-2.0.6.tgz", + "integrity": "sha512-2QF/t/auWm0lsy8XtKVPG19v3sSOQlJe/YHZgfjb/KBBHOGSV+J2q/S671rcq9uTBrLAXmZpqJiaQbMT+zNU1w==", + "dev": true + }, + "@types/istanbul-lib-report": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/@types/istanbul-lib-report/-/istanbul-lib-report-3.0.3.tgz", + "integrity": "sha512-NQn7AHQnk/RSLOxrBbGyJM/aVQ+pjj5HCgasFxc0K/KhoATfQ/47AyUl15I2yBUpihjmas+a+VJBOqecrFH+uA==", + "dev": true, + "requires": { + "@types/istanbul-lib-coverage": "*" + } + }, + "@types/istanbul-reports": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/@types/istanbul-reports/-/istanbul-reports-3.0.4.tgz", + "integrity": "sha512-pk2B1NWalF9toCRu6gjBzR69syFjP4Od8WRAX+0mmf9lAjCRicLOWc+ZrxZHx/0XRjotgkF9t6iaMJ+aXcOdZQ==", + "dev": true, + "requires": { + "@types/istanbul-lib-report": "*" + } + }, + "@types/json-schema": { + "version": "7.0.15", + "resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.15.tgz", + "integrity": "sha512-5+fP8P8MFNC+AyZCDxrB2pkZFPGzqQWUzpSeuuVLvm8VMcorNYavBqoFcxK8bQz4Qsbn4oUEEem4wDLfcysGHA==" + }, + "@types/lodash": { + "version": "4.17.10", + "resolved": "https://registry.npmjs.org/@types/lodash/-/lodash-4.17.10.tgz", + "integrity": "sha512-YpS0zzoduEhuOWjAotS6A5AVCva7X4lVlYLF0FYHAY9sdraBfnatttHItlWeZdGhuEkf+OzMNg2ZYAx8t+52uQ==" + }, + "@types/lodash.clonedeep": { + "version": "4.5.9", + "resolved": "https://registry.npmjs.org/@types/lodash.clonedeep/-/lodash.clonedeep-4.5.9.tgz", + "integrity": "sha512-19429mWC+FyaAhOLzsS8kZUsI+/GmBAQ0HFiCPsKGU+7pBXOQWhyrY6xNNDwUSX8SMZMJvuFVMF9O5dQOlQK9Q==", + "requires": { + "@types/lodash": "*" + } + }, + "@types/node": { + "version": "22.7.6", + "resolved": "https://registry.npmjs.org/@types/node/-/node-22.7.6.tgz", + "integrity": "sha512-/d7Rnj0/ExXDMcioS78/kf1lMzYk4BZV8MZGTBKzTGZ6/406ukkbYlIsZmMPhcR5KlkunDHQLrtAVmSq7r+mSw==", + "dev": true, + "requires": { + "undici-types": "~6.19.2" + } + }, + "@types/stack-utils": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/@types/stack-utils/-/stack-utils-2.0.3.tgz", + "integrity": "sha512-9aEbYZ3TbYMznPdcdr3SmIrLXwC/AKZXQeCf9Pgao5CKb8CyHuEX5jzWPTkvregvhRJHcpRO6BFoGW9ycaOkYw==", + "dev": true + }, + "@types/yargs": { + "version": "17.0.33", + "resolved": "https://registry.npmjs.org/@types/yargs/-/yargs-17.0.33.tgz", + "integrity": "sha512-WpxBCKWPLr4xSsHgz511rFJAM+wS28w2zEO1QDNY5zM/S8ok70NNfztH0xwhqKyaK0OHCbN98LDAZuy1ctxDkA==", + "dev": true, + "requires": { + "@types/yargs-parser": "*" + } + }, + "@types/yargs-parser": { + "version": "21.0.3", + "resolved": "https://registry.npmjs.org/@types/yargs-parser/-/yargs-parser-21.0.3.tgz", + "integrity": "sha512-I4q9QU9MQv4oEOz4tAHJtNz1cwuLxn2F3xcc2iV5WdqLPpUnj30aUuxt1mAxYTG+oe8CZMV/+6rU4S4gRDzqtQ==", + "dev": true + }, + "acorn": { + "version": "7.4.1", + "resolved": "https://registry.npmjs.org/acorn/-/acorn-7.4.1.tgz", + "integrity": "sha512-nQyp0o1/mNdbTO1PO6kHkwSrmgZ0MT/jCCpNiwbUjGoRN4dlBhqJtoQuCnEOKzgTVwg0ZWiCoQy6SxMebQVh8A==", + "dev": true + }, + "acorn-jsx": { + "version": "5.3.2", + "resolved": "https://registry.npmjs.org/acorn-jsx/-/acorn-jsx-5.3.2.tgz", + "integrity": "sha512-rq9s+JNhf0IChjtDXxllJ7g41oZk5SlXtp0LHwyA5cejwn7vKmKp4pPri6YEePv2PU65sAsegbXtIinmDFDXgQ==", + "dev": true + }, + "ajv": { + "version": "8.17.1", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz", + "integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==", + "requires": { + "fast-deep-equal": "^3.1.3", + "fast-uri": "^3.0.1", + "json-schema-traverse": "^1.0.0", + "require-from-string": "^2.0.2" + } + }, + "ajv-formats": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/ajv-formats/-/ajv-formats-2.1.1.tgz", + "integrity": "sha512-Wx0Kx52hxE7C18hkMEggYlEifqWZtYaRgouJor+WMdPnQyEK13vgEWyVNup7SoeeoLMsr4kf5h6dOW11I15MUA==", + "requires": { + "ajv": "^8.0.0" + } + }, + "allof-merge": { + "version": "0.6.6", + "resolved": "https://registry.npmjs.org/allof-merge/-/allof-merge-0.6.6.tgz", + "integrity": "sha512-116eZBf2he0/J4Tl7EYMz96I5Anaeio+VL0j/H2yxW9CoYQAMMv8gYcwkVRoO7XfIOv/qzSTfVzDVGAYxKFi3g==", + "requires": { + "json-crawl": "^0.5.3" + } + }, + "ansi-colors": { + "version": "4.1.3", + "resolved": "https://registry.npmjs.org/ansi-colors/-/ansi-colors-4.1.3.tgz", + "integrity": "sha512-/6w/C21Pm1A7aZitlI5Ni/2J6FFQN8i1Cvz3kHABAAbw93v/NlvKdVOqz7CCWz/3iv/JplRSEEZ83XION15ovw==", + "dev": true + }, + "ansi-escapes": { + "version": "4.3.2", + "resolved": "https://registry.npmjs.org/ansi-escapes/-/ansi-escapes-4.3.2.tgz", + "integrity": "sha512-gKXj5ALrKWQLsYG9jlTRmR/xKluxHV+Z9QEwNIgCfM1/uwPMCuzVVnh5mwTd+OuBZcwSIMbqssNWRm1lE51QaQ==", + "dev": true, + "requires": { + "type-fest": "^0.21.3" + }, + "dependencies": { + "type-fest": { + "version": "0.21.3", + "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.21.3.tgz", + "integrity": "sha512-t0rzBq87m3fVcduHDUFhKmyyX+9eo6WQjZvf51Ea/M0Q7+T374Jp1aUiyUl0GKxp8M/OETVHSDvmkyPgvX+X2w==", + "dev": true + } + } + }, + "ansi-regex": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", + "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", + "dev": true + }, + "ansi-styles": { + "version": "3.2.1", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-3.2.1.tgz", + "integrity": "sha512-VT0ZI6kZRdTh8YyJw3SMbYm/u+NqfsAxEpWO0Pf9sq8/e94WxxOpPKx9FR1FlyCtOVDNOQ+8ntlqFxiRc+r5qA==", + "dev": true, + "requires": { + "color-convert": "^1.9.0" + } + }, + "anymatch": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/anymatch/-/anymatch-3.1.3.tgz", + "integrity": "sha512-KMReFUr0B4t+D+OBkjR3KYqvocp2XaSzO55UcB6mgQMd3KbcE+mWTyvVV7D/zsdEbNnV6acZUutkiHQXvTr1Rw==", + "dev": true, + "requires": { + "normalize-path": "^3.0.0", + "picomatch": "^2.0.4" + } + }, + "argparse": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz", + "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==" + }, + "astral-regex": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/astral-regex/-/astral-regex-2.0.0.tgz", + "integrity": "sha512-Z7tMw1ytTXt5jqMcOP+OQteU1VuNK9Y02uuJtKQ1Sv69jXQKKg5cibLwGJow8yzZP+eAc18EmLGPal0bp36rvQ==", + "dev": true + }, + "babel-jest": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/babel-jest/-/babel-jest-29.7.0.tgz", + "integrity": "sha512-BrvGY3xZSwEcCzKvKsCi2GgHqDqsYkOP4/by5xCgIwGXQxIEh+8ew3gmrE1y7XRR6LHZIj6yLYnUi/mm2KXKBg==", + "dev": true, + "requires": { + "@jest/transform": "^29.7.0", + "@types/babel__core": "^7.1.14", + "babel-plugin-istanbul": "^6.1.1", + "babel-preset-jest": "^29.6.3", + "chalk": "^4.0.0", + "graceful-fs": "^4.2.9", + "slash": "^3.0.0" + } + }, + "babel-plugin-istanbul": { + "version": "6.1.1", + "resolved": "https://registry.npmjs.org/babel-plugin-istanbul/-/babel-plugin-istanbul-6.1.1.tgz", + "integrity": "sha512-Y1IQok9821cC9onCx5otgFfRm7Lm+I+wwxOx738M/WLPZ9Q42m4IG5W0FNX8WLL2gYMZo3JkuXIH2DOpWM+qwA==", + "dev": true, + "requires": { + "@babel/helper-plugin-utils": "^7.0.0", + "@istanbuljs/load-nyc-config": "^1.0.0", + "@istanbuljs/schema": "^0.1.2", + "istanbul-lib-instrument": "^5.0.4", + "test-exclude": "^6.0.0" + }, + "dependencies": { + "istanbul-lib-instrument": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/istanbul-lib-instrument/-/istanbul-lib-instrument-5.2.1.tgz", + "integrity": "sha512-pzqtp31nLv/XFOzXGuvhCb8qhjmTVo5vjVk19XE4CRlSWz0KoeJ3bw9XsA7nOp9YBf4qHjwBxkDzKcME/J29Yg==", + "dev": true, + "requires": { + "@babel/core": "^7.12.3", + "@babel/parser": "^7.14.7", + "@istanbuljs/schema": "^0.1.2", + "istanbul-lib-coverage": "^3.2.0", + "semver": "^6.3.0" + } + }, + "semver": { + "version": "6.3.1", + "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz", + "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==", + "dev": true + } + } + }, + "babel-plugin-jest-hoist": { + "version": "29.6.3", + "resolved": "https://registry.npmjs.org/babel-plugin-jest-hoist/-/babel-plugin-jest-hoist-29.6.3.tgz", + "integrity": "sha512-ESAc/RJvGTFEzRwOTT4+lNDk/GNHMkKbNzsvT0qKRfDyyYTskxB5rnU2njIDYVxXCBHHEI1c0YwHob3WaYujOg==", + "dev": true, + "requires": { + "@babel/template": "^7.3.3", + "@babel/types": "^7.3.3", + "@types/babel__core": "^7.1.14", + "@types/babel__traverse": "^7.0.6" + } + }, + "babel-preset-current-node-syntax": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/babel-preset-current-node-syntax/-/babel-preset-current-node-syntax-1.1.0.tgz", + "integrity": "sha512-ldYss8SbBlWva1bs28q78Ju5Zq1F+8BrqBZZ0VFhLBvhh6lCpC2o3gDJi/5DRLs9FgYZCnmPYIVFU4lRXCkyUw==", + "dev": true, + "requires": { + "@babel/plugin-syntax-async-generators": "^7.8.4", + "@babel/plugin-syntax-bigint": "^7.8.3", + "@babel/plugin-syntax-class-properties": "^7.12.13", + "@babel/plugin-syntax-class-static-block": "^7.14.5", + "@babel/plugin-syntax-import-attributes": "^7.24.7", + "@babel/plugin-syntax-import-meta": "^7.10.4", + "@babel/plugin-syntax-json-strings": "^7.8.3", + "@babel/plugin-syntax-logical-assignment-operators": "^7.10.4", + "@babel/plugin-syntax-nullish-coalescing-operator": "^7.8.3", + "@babel/plugin-syntax-numeric-separator": "^7.10.4", + "@babel/plugin-syntax-object-rest-spread": "^7.8.3", + "@babel/plugin-syntax-optional-catch-binding": "^7.8.3", + "@babel/plugin-syntax-optional-chaining": "^7.8.3", + "@babel/plugin-syntax-private-property-in-object": "^7.14.5", + "@babel/plugin-syntax-top-level-await": "^7.14.5" + } + }, + "babel-preset-jest": { + "version": "29.6.3", + "resolved": "https://registry.npmjs.org/babel-preset-jest/-/babel-preset-jest-29.6.3.tgz", + "integrity": "sha512-0B3bhxR6snWXJZtR/RliHTDPRgn1sNHOR0yVtq/IiQFyuOVjFS+wuio/R4gSNkyYmKmJB4wGZv2NZanmKmTnNA==", + "dev": true, + "requires": { + "babel-plugin-jest-hoist": "^29.6.3", + "babel-preset-current-node-syntax": "^1.0.0" + } + }, + "balanced-match": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", + "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", + "dev": true + }, + "brace-expansion": { + "version": "1.1.11", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", + "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==", + "dev": true, + "requires": { + "balanced-match": "^1.0.0", + "concat-map": "0.0.1" + } + }, + "braces": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz", + "integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==", + "dev": true, + "requires": { + "fill-range": "^7.1.1" + } + }, + "browserslist": { + "version": "4.24.0", + "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.24.0.tgz", + "integrity": "sha512-Rmb62sR1Zpjql25eSanFGEhAxcFwfA1K0GuQcLoaJBAcENegrQut3hYdhXFF1obQfiDyqIW/cLM5HSJ/9k884A==", + "dev": true, + "requires": { + "caniuse-lite": "^1.0.30001663", + "electron-to-chromium": "^1.5.28", + "node-releases": "^2.0.18", + "update-browserslist-db": "^1.1.0" + } + }, + "bser": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/bser/-/bser-2.1.1.tgz", + "integrity": "sha512-gQxTNE/GAfIIrmHLUE3oJyp5FO6HRBfhjnw4/wMmA63ZGDJnWBmgY/lyQBpnDUkGmAhbSe39tx2d/iTOAfglwQ==", + "dev": true, + "requires": { + "node-int64": "^0.4.0" + } + }, + "buffer-from": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.2.tgz", + "integrity": "sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==", + "dev": true + }, + "callsites": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz", + "integrity": "sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==", + "dev": true + }, + "camelcase": { + "version": "5.3.1", + "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-5.3.1.tgz", + "integrity": "sha512-L28STB170nwWS63UjtlEOE3dldQApaJXZkOI1uMFfzf3rRuPegHaHesyee+YxQ+W6SvRDQV6UrdOdRiR153wJg==", + "dev": true + }, + "caniuse-lite": { + "version": "1.0.30001669", + "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001669.tgz", + "integrity": "sha512-DlWzFDJqstqtIVx1zeSpIMLjunf5SmwOw0N2Ck/QSQdS8PLS4+9HrLaYei4w8BIAL7IB/UEDu889d8vhCTPA0w==", + "dev": true + }, + "chalk": { + "version": "4.1.2", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz", + "integrity": "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==", + "dev": true, + "requires": { + "ansi-styles": "^4.1.0", + "supports-color": "^7.1.0" + }, + "dependencies": { + "ansi-styles": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", + "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "dev": true, + "requires": { + "color-convert": "^2.0.1" + } + }, + "color-convert": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", + "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", + "dev": true, + "requires": { + "color-name": "~1.1.4" + } + }, + "color-name": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", + "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", + "dev": true + }, + "has-flag": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", + "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==", + "dev": true + }, + "supports-color": { + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", + "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==", + "dev": true, + "requires": { + "has-flag": "^4.0.0" + } + } + } + }, + "char-regex": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/char-regex/-/char-regex-1.0.2.tgz", + "integrity": "sha512-kWWXztvZ5SBQV+eRgKFeh8q5sLuZY2+8WUIzlxWVTg+oGwY14qylx1KbKzHd8P6ZYkAg0xyIDU9JMHhyJMZ1jw==", + "dev": true + }, + "ci-info": { + "version": "3.9.0", + "resolved": "https://registry.npmjs.org/ci-info/-/ci-info-3.9.0.tgz", + "integrity": "sha512-NIxF55hv4nSqQswkAeiOi1r83xy8JldOFDTWiug55KBu9Jnblncd2U6ViHmYgHf01TPZS77NJBhBMKdWj9HQMQ==", + "dev": true + }, + "cjs-module-lexer": { + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/cjs-module-lexer/-/cjs-module-lexer-1.4.1.tgz", + "integrity": "sha512-cuSVIHi9/9E/+821Qjdvngor+xpnlwnuwIyZOaLmHBVdXL+gP+I6QQB9VkO7RI77YIcTV+S1W9AreJ5eN63JBA==", + "dev": true + }, + "cliui": { + "version": "8.0.1", + "resolved": "https://registry.npmjs.org/cliui/-/cliui-8.0.1.tgz", + "integrity": "sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==", + "dev": true, + "requires": { + "string-width": "^4.2.0", + "strip-ansi": "^6.0.1", + "wrap-ansi": "^7.0.0" + } + }, + "co": { + "version": "4.6.0", + "resolved": "https://registry.npmjs.org/co/-/co-4.6.0.tgz", + "integrity": "sha512-QVb0dM5HvG+uaxitm8wONl7jltx8dqhfU33DcqtOZcLSVIKSDDLDi7+0LbAKiyI8hD9u42m2YxXSkMGWThaecQ==", + "dev": true + }, + "collect-v8-coverage": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/collect-v8-coverage/-/collect-v8-coverage-1.0.2.tgz", + "integrity": "sha512-lHl4d5/ONEbLlJvaJNtsF/Lz+WvB07u2ycqTYbdrq7UypDXailES4valYb2eWiJFxZlVmpGekfqoxQhzyFdT4Q==", + "dev": true + }, + "color-convert": { + "version": "1.9.3", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.3.tgz", + "integrity": "sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==", + "dev": true, + "requires": { + "color-name": "1.1.3" + } + }, + "color-name": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz", + "integrity": "sha512-72fSenhMw2HZMTVHeCA9KCmpEIbzWiQsjN+BHcBbS9vr1mtt+vJjPdksIBNUmKAW8TFUDPJK5SUU3QhE9NEXDw==", + "dev": true + }, + "colors": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/colors/-/colors-1.4.0.tgz", + "integrity": "sha512-a+UqTh4kgZg/SlGvfbzDHpgRu7AAQOmmqRHJnxhRZICKFUT91brVhNNt58CMWU9PsBbv3PDCZUHbVxuDiH2mtA==" + }, + "concat-map": { + "version": "0.0.1", + "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", + "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==", + "dev": true + }, + "convert-source-map": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/convert-source-map/-/convert-source-map-2.0.0.tgz", + "integrity": "sha512-Kvp459HrV2FEJ1CAsi1Ku+MY3kasH19TFykTz2xWmMeq6bk2NU3XXvfJ+Q61m0xktWwt+1HSYf3JZsTms3aRJg==", + "dev": true + }, + "create-jest": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/create-jest/-/create-jest-29.7.0.tgz", + "integrity": "sha512-Adz2bdH0Vq3F53KEMJOoftQFutWCukm6J24wbPWRO4k1kMY7gS7ds/uoJkNuV8wDCtWWnuwGcJwpWcih+zEW1Q==", + "dev": true, + "requires": { + "@jest/types": "^29.6.3", + "chalk": "^4.0.0", + "exit": "^0.1.2", + "graceful-fs": "^4.2.9", + "jest-config": "^29.7.0", + "jest-util": "^29.7.0", + "prompts": "^2.0.1" + } + }, + "cross-spawn": { + "version": "7.0.3", + "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz", + "integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==", + "dev": true, + "requires": { + "path-key": "^3.1.0", + "shebang-command": "^2.0.0", + "which": "^2.0.1" + } + }, + "csv-parser": { + "version": "2.3.5", + "resolved": "https://registry.npmjs.org/csv-parser/-/csv-parser-2.3.5.tgz", + "integrity": "sha512-LCHolC4AlNwL+5EuD5LH2VVNKpD8QixZW2zzK1XmrVYUaslFY4c5BooERHOCIubG9iv/DAyFjs4x0HvWNZuyWg==", + "requires": { + "minimist": "^1.2.0", + "through2": "^3.0.1" + } + }, + "debug": { + "version": "4.3.7", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.7.tgz", + "integrity": "sha512-Er2nc/H7RrMXZBFCEim6TCmMk02Z8vLC2Rbi1KEBggpo0fS6l0S1nnapwmIi3yW/+GOJap1Krg4w0Hg80oCqgQ==", + "dev": true, + "requires": { + "ms": "^2.1.3" + } + }, + "dedent": { + "version": "1.5.3", + "resolved": "https://registry.npmjs.org/dedent/-/dedent-1.5.3.tgz", + "integrity": "sha512-NHQtfOOW68WD8lgypbLA5oT+Bt0xXJhiYvoR6SmmNXZfpzOGXwdKWmcwG8N7PwVVWV3eF/68nmD9BaJSsTBhyQ==", + "dev": true + }, + "deep-is": { + "version": "0.1.4", + "resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.4.tgz", + "integrity": "sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ==", + "dev": true + }, + "deepmerge": { + "version": "4.3.1", + "resolved": "https://registry.npmjs.org/deepmerge/-/deepmerge-4.3.1.tgz", + "integrity": "sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A==", + "dev": true + }, + "detect-newline": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/detect-newline/-/detect-newline-3.1.0.tgz", + "integrity": "sha512-TLz+x/vEXm/Y7P7wn1EJFNLxYpUD4TgMosxY6fAVJUnJMbupHBOncxyWUG9OpTaH9EBD7uFI5LfEgmMOc54DsA==", + "dev": true + }, + "diff-sequences": { + "version": "29.6.3", + "resolved": "https://registry.npmjs.org/diff-sequences/-/diff-sequences-29.6.3.tgz", + "integrity": "sha512-EjePK1srD3P08o2j4f0ExnylqRs5B9tJjcp9t1krH2qRi8CCdsYfwe9JgSLurFBWwq4uOlipzfk5fHNvwFKr8Q==", + "dev": true + }, + "doctrine": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/doctrine/-/doctrine-3.0.0.tgz", + "integrity": "sha512-yS+Q5i3hBf7GBkd4KG8a7eBNNWNGLTaEwwYWUijIYM7zrlYDM0BFXHjjPWlWZ1Rg7UaddZeIDmi9jF3HmqiQ2w==", + "dev": true, + "requires": { + "esutils": "^2.0.2" + } + }, + "dreamopt": { + "version": "0.8.0", + "resolved": "https://registry.npmjs.org/dreamopt/-/dreamopt-0.8.0.tgz", + "integrity": "sha512-vyJTp8+mC+G+5dfgsY+r3ckxlz+QMX40VjPQsZc5gxVAxLmi64TBoVkP54A/pRAXMXsbu2GMMBrZPxNv23waMg==", + "requires": { + "wordwrap": ">=0.0.2" + } + }, + "electron-to-chromium": { + "version": "1.5.41", + "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.41.tgz", + "integrity": "sha512-dfdv/2xNjX0P8Vzme4cfzHqnPm5xsZXwsolTYr0eyW18IUmNyG08vL+fttvinTfhKfIKdRoqkDIC9e9iWQCNYQ==", + "dev": true + }, + "emittery": { + "version": "0.13.1", + "resolved": "https://registry.npmjs.org/emittery/-/emittery-0.13.1.tgz", + "integrity": "sha512-DeWwawk6r5yR9jFgnDKYt4sLS0LmHJJi3ZOnb5/JdbYwj3nW+FxQnHIjhBKz8YLC7oRNPVM9NQ47I3CVx34eqQ==", + "dev": true + }, + "emoji-regex": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", + "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", + "dev": true + }, + "enquirer": { + "version": "2.4.1", + "resolved": "https://registry.npmjs.org/enquirer/-/enquirer-2.4.1.tgz", + "integrity": "sha512-rRqJg/6gd538VHvR3PSrdRBb/1Vy2YfzHqzvbhGIQpDRKIa4FgV/54b5Q1xYSxOOwKvjXweS26E0Q+nAMwp2pQ==", + "dev": true, + "requires": { + "ansi-colors": "^4.1.1", + "strip-ansi": "^6.0.1" + } + }, + "error-ex": { + "version": "1.3.2", + "resolved": "https://registry.npmjs.org/error-ex/-/error-ex-1.3.2.tgz", + "integrity": "sha512-7dFHNmqeFSEt2ZBsCriorKnn3Z2pj+fd9kmI6QoWw4//DL+icEBfc0U7qJCisqrTsKTjw4fNFy2pW9OqStD84g==", + "dev": true, + "requires": { + "is-arrayish": "^0.2.1" + } + }, + "escalade": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz", + "integrity": "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==", + "dev": true + }, + "escape-string-regexp": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz", + "integrity": "sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA==", + "dev": true + }, + "eslint": { + "version": "7.32.0", + "resolved": "https://registry.npmjs.org/eslint/-/eslint-7.32.0.tgz", + "integrity": "sha512-VHZ8gX+EDfz+97jGcgyGCyRia/dPOd6Xh9yPv8Bl1+SoaIwD+a/vlrOmGRUyOYu7MwUhc7CxqeaDZU13S4+EpA==", + "dev": true, + "requires": { + "@babel/code-frame": "7.12.11", + "@eslint/eslintrc": "^0.4.3", + "@humanwhocodes/config-array": "^0.5.0", + "ajv": "^6.10.0", + "chalk": "^4.0.0", + "cross-spawn": "^7.0.2", + "debug": "^4.0.1", + "doctrine": "^3.0.0", + "enquirer": "^2.3.5", + "escape-string-regexp": "^4.0.0", + "eslint-scope": "^5.1.1", + "eslint-utils": "^2.1.0", + "eslint-visitor-keys": "^2.0.0", + "espree": "^7.3.1", + "esquery": "^1.4.0", + "esutils": "^2.0.2", + "fast-deep-equal": "^3.1.3", + "file-entry-cache": "^6.0.1", + "functional-red-black-tree": "^1.0.1", + "glob-parent": "^5.1.2", + "globals": "^13.6.0", + "ignore": "^4.0.6", + "import-fresh": "^3.0.0", + "imurmurhash": "^0.1.4", + "is-glob": "^4.0.0", + "js-yaml": "^3.13.1", + "json-stable-stringify-without-jsonify": "^1.0.1", + "levn": "^0.4.1", + "lodash.merge": "^4.6.2", + "minimatch": "^3.0.4", + "natural-compare": "^1.4.0", + "optionator": "^0.9.1", + "progress": "^2.0.0", + "regexpp": "^3.1.0", + "semver": "^7.2.1", + "strip-ansi": "^6.0.0", + "strip-json-comments": "^3.1.0", + "table": "^6.0.9", + "text-table": "^0.2.0", + "v8-compile-cache": "^2.0.3" + }, + "dependencies": { + "ajv": { + "version": "6.12.6", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz", + "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==", + "dev": true, + "requires": { + "fast-deep-equal": "^3.1.1", + "fast-json-stable-stringify": "^2.0.0", + "json-schema-traverse": "^0.4.1", + "uri-js": "^4.2.2" + } + }, + "json-schema-traverse": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz", + "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==", + "dev": true + } + } + }, + "eslint-scope": { + "version": "5.1.1", + "resolved": "https://registry.npmjs.org/eslint-scope/-/eslint-scope-5.1.1.tgz", + "integrity": "sha512-2NxwbF/hZ0KpepYN0cNbo+FN6XoK7GaHlQhgx/hIZl6Va0bF45RQOOwhLIy8lQDbuCiadSLCBnH2CFYquit5bw==", + "dev": true, + "requires": { + "esrecurse": "^4.3.0", + "estraverse": "^4.1.1" + } + }, + "eslint-utils": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/eslint-utils/-/eslint-utils-2.1.0.tgz", + "integrity": "sha512-w94dQYoauyvlDc43XnGB8lU3Zt713vNChgt4EWwhXAP2XkBvndfxF0AgIqKOOasjPIPzj9JqgwkwbCYD0/V3Zg==", + "dev": true, + "requires": { + "eslint-visitor-keys": "^1.1.0" + }, + "dependencies": { + "eslint-visitor-keys": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-1.3.0.tgz", + "integrity": "sha512-6J72N8UNa462wa/KFODt/PJ3IU60SDpC3QXC1Hjc1BXXpfL2C9R5+AU7jhe0F6GREqVMh4Juu+NY7xn+6dipUQ==", + "dev": true + } + } + }, + "eslint-visitor-keys": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-2.1.0.tgz", + "integrity": "sha512-0rSmRBzXgDzIsD6mGdJgevzgezI534Cer5L/vyMX0kHzT/jiB43jRhd9YUlMGYLQy2zprNmoT8qasCGtY+QaKw==", + "dev": true + }, + "espree": { + "version": "7.3.1", + "resolved": "https://registry.npmjs.org/espree/-/espree-7.3.1.tgz", + "integrity": "sha512-v3JCNCE64umkFpmkFGqzVKsOT0tN1Zr+ueqLZfpV1Ob8e+CEgPWa+OxCoGH3tnhimMKIaBm4m/vaRpJ/krRz2g==", + "dev": true, + "requires": { + "acorn": "^7.4.0", + "acorn-jsx": "^5.3.1", + "eslint-visitor-keys": "^1.3.0" + }, + "dependencies": { + "eslint-visitor-keys": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-1.3.0.tgz", + "integrity": "sha512-6J72N8UNa462wa/KFODt/PJ3IU60SDpC3QXC1Hjc1BXXpfL2C9R5+AU7jhe0F6GREqVMh4Juu+NY7xn+6dipUQ==", + "dev": true + } + } + }, + "esprima": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/esprima/-/esprima-4.0.1.tgz", + "integrity": "sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==" + }, + "esquery": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/esquery/-/esquery-1.6.0.tgz", + "integrity": "sha512-ca9pw9fomFcKPvFLXhBKUK90ZvGibiGOvRJNbjljY7s7uq/5YO4BOzcYtJqExdx99rF6aAcnRxHmcUHcz6sQsg==", + "dev": true, + "requires": { + "estraverse": "^5.1.0" + }, + "dependencies": { + "estraverse": { + "version": "5.3.0", + "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.3.0.tgz", + "integrity": "sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA==", + "dev": true + } + } + }, + "esrecurse": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/esrecurse/-/esrecurse-4.3.0.tgz", + "integrity": "sha512-KmfKL3b6G+RXvP8N1vr3Tq1kL/oCFgn2NYXEtqP8/L3pKapUA4G8cFVaoF3SU323CD4XypR/ffioHmkti6/Tag==", + "dev": true, + "requires": { + "estraverse": "^5.2.0" + }, + "dependencies": { + "estraverse": { + "version": "5.3.0", + "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.3.0.tgz", + "integrity": "sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA==", + "dev": true + } + } + }, + "estraverse": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-4.3.0.tgz", + "integrity": "sha512-39nnKffWz8xN1BU/2c79n9nB9HDzo0niYUqx6xyqUnyoAnQyyWpOTdZEeiCch8BBu515t4wp9ZmgVfVhn9EBpw==", + "dev": true + }, + "esutils": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/esutils/-/esutils-2.0.3.tgz", + "integrity": "sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==", + "dev": true + }, + "execa": { + "version": "5.1.1", + "resolved": "https://registry.npmjs.org/execa/-/execa-5.1.1.tgz", + "integrity": "sha512-8uSpZZocAZRBAPIEINJj3Lo9HyGitllczc27Eh5YYojjMFMn8yHMDMaUHE2Jqfq05D/wucwI4JGURyXt1vchyg==", + "dev": true, + "requires": { + "cross-spawn": "^7.0.3", + "get-stream": "^6.0.0", + "human-signals": "^2.1.0", + "is-stream": "^2.0.0", + "merge-stream": "^2.0.0", + "npm-run-path": "^4.0.1", + "onetime": "^5.1.2", + "signal-exit": "^3.0.3", + "strip-final-newline": "^2.0.0" + } + }, + "exit": { + "version": "0.1.2", + "resolved": "https://registry.npmjs.org/exit/-/exit-0.1.2.tgz", + "integrity": "sha512-Zk/eNKV2zbjpKzrsQ+n1G6poVbErQxJ0LBOJXaKZ1EViLzH+hrLu9cdXI4zw9dBQJslwBEpbQ2P1oS7nDxs6jQ==", + "dev": true + }, + "expect": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/expect/-/expect-29.7.0.tgz", + "integrity": "sha512-2Zks0hf1VLFYI1kbh0I5jP3KHHyCHpkfyHBzsSXRFgl/Bg9mWYfMW8oD+PdMPlEwy5HNsR9JutYy6pMeOh61nw==", + "dev": true, + "requires": { + "@jest/expect-utils": "^29.7.0", + "jest-get-type": "^29.6.3", + "jest-matcher-utils": "^29.7.0", + "jest-message-util": "^29.7.0", + "jest-util": "^29.7.0" + } + }, + "fast-deep-equal": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", + "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==" + }, + "fast-json-stable-stringify": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.1.0.tgz", + "integrity": "sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw==", + "dev": true + }, + "fast-levenshtein": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/fast-levenshtein/-/fast-levenshtein-2.0.6.tgz", + "integrity": "sha512-DCXu6Ifhqcks7TZKY3Hxp3y6qphY5SJZmrWMDrKcERSOXWQdMhU9Ig/PYrzyw/ul9jOIyh0N4M0tbC5hodg8dw==", + "dev": true + }, + "fast-uri": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/fast-uri/-/fast-uri-3.0.3.tgz", + "integrity": "sha512-aLrHthzCjH5He4Z2H9YZ+v6Ujb9ocRuW6ZzkJQOrTxleEijANq4v1TsaPaVG1PZcuurEzrLcWRyYBYXD5cEiaw==" + }, + "fb-watchman": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/fb-watchman/-/fb-watchman-2.0.2.tgz", + "integrity": "sha512-p5161BqbuCaSnB8jIbzQHOlpgsPmK5rJVDfDKO91Axs5NC1uu3HRQm6wt9cd9/+GtQQIO53JdGXXoyDpTAsgYA==", + "dev": true, + "requires": { + "bser": "2.1.1" + } + }, + "file-entry-cache": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/file-entry-cache/-/file-entry-cache-6.0.1.tgz", + "integrity": "sha512-7Gps/XWymbLk2QLYK4NzpMOrYjMhdIxXuIvy2QBsLE6ljuodKvdkWs/cpyJJ3CVIVpH0Oi1Hvg1ovbMzLdFBBg==", + "dev": true, + "requires": { + "flat-cache": "^3.0.4" + } + }, + "fill-range": { + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz", + "integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==", + "dev": true, + "requires": { + "to-regex-range": "^5.0.1" + } + }, + "find-up": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/find-up/-/find-up-4.1.0.tgz", + "integrity": "sha512-PpOwAdQ/YlXQ2vj8a3h8IipDuYRi3wceVQQGYWxNINccq40Anw7BlsEXCMbt1Zt+OLA6Fq9suIpIWD0OsnISlw==", + "dev": true, + "requires": { + "locate-path": "^5.0.0", + "path-exists": "^4.0.0" + } + }, + "flat-cache": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/flat-cache/-/flat-cache-3.2.0.tgz", + "integrity": "sha512-CYcENa+FtcUKLmhhqyctpclsq7QF38pKjZHsGNiSQF5r4FtoKDWabFDl3hzaEQMvT1LHEysw5twgLvpYYb4vbw==", + "dev": true, + "requires": { + "flatted": "^3.2.9", + "keyv": "^4.5.3", + "rimraf": "^3.0.2" + } + }, + "flatted": { + "version": "3.3.1", + "resolved": "https://registry.npmjs.org/flatted/-/flatted-3.3.1.tgz", + "integrity": "sha512-X8cqMLLie7KsNUDSdzeN8FYK9rEt4Dt67OsG/DNGnYTSDBG4uFAJFBnUeiV+zCVAvwFy56IjM9sH51jVaEhNxw==", + "dev": true + }, + "fs.realpath": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", + "integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==", + "dev": true + }, + "fsevents": { + "version": "2.3.3", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", + "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==", + "dev": true, + "optional": true + }, + "function-bind": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz", + "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==", + "dev": true + }, + "functional-red-black-tree": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/functional-red-black-tree/-/functional-red-black-tree-1.0.1.tgz", + "integrity": "sha512-dsKNQNdj6xA3T+QlADDA7mOSlX0qiMINjn0cgr+eGHGsbSHzTabcIogz2+p/iqP1Xs6EP/sS2SbqH+brGTbq0g==", + "dev": true + }, + "gensync": { + "version": "1.0.0-beta.2", + "resolved": "https://registry.npmjs.org/gensync/-/gensync-1.0.0-beta.2.tgz", + "integrity": "sha512-3hN7NaskYvMDLQY55gnW3NQ+mesEAepTqlg+VEbj7zzqEMBVNhzcGYYeqFo/TlYz6eQiFcp1HcsCZO+nGgS8zg==", + "dev": true + }, + "get-caller-file": { + "version": "2.0.5", + "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz", + "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==", + "dev": true + }, + "get-package-type": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/get-package-type/-/get-package-type-0.1.0.tgz", + "integrity": "sha512-pjzuKtY64GYfWizNAJ0fr9VqttZkNiK2iS430LtIHzjBEr6bX8Am2zm4sW4Ro5wjWW5cAlRL1qAMTcXbjNAO2Q==", + "dev": true + }, + "get-stream": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-6.0.1.tgz", + "integrity": "sha512-ts6Wi+2j3jQjqi70w5AlN8DFnkSwC+MqmxEzdEALB2qXZYV3X/b1CTfgPLGJNMeAWxdPfU8FO1ms3NUfaHCPYg==", + "dev": true + }, + "glob": { + "version": "7.2.3", + "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz", + "integrity": "sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==", + "dev": true, + "requires": { + "fs.realpath": "^1.0.0", + "inflight": "^1.0.4", + "inherits": "2", + "minimatch": "^3.1.1", + "once": "^1.3.0", + "path-is-absolute": "^1.0.0" + } + }, + "glob-parent": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz", + "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==", + "dev": true, + "requires": { + "is-glob": "^4.0.1" + } + }, + "globals": { + "version": "13.24.0", + "resolved": "https://registry.npmjs.org/globals/-/globals-13.24.0.tgz", + "integrity": "sha512-AhO5QUcj8llrbG09iWhPU2B204J1xnPeL8kQmVorSsy+Sjj1sk8gIyh6cUocGmH4L0UuhAJy+hJMRA4mgA4mFQ==", + "dev": true, + "requires": { + "type-fest": "^0.20.2" + } + }, + "graceful-fs": { + "version": "4.2.11", + "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz", + "integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==", + "dev": true + }, + "has-flag": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-3.0.0.tgz", + "integrity": "sha512-sKJf1+ceQBr4SMkvQnBDNDtf4TXpVhVGateu0t918bl30FnbE2m4vNLX+VWe/dpjlb+HugGYzW7uQXH98HPEYw==", + "dev": true + }, + "hasown": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz", + "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==", + "dev": true, + "requires": { + "function-bind": "^1.1.2" + } + }, + "heap": { + "version": "0.2.7", + "resolved": "https://registry.npmjs.org/heap/-/heap-0.2.7.tgz", + "integrity": "sha512-2bsegYkkHO+h/9MGbn6KWcE45cHZgPANo5LXF7EvWdT0yT2EguSVO1nDgU5c8+ZOPwp2vMNa7YFsJhVcDR9Sdg==" + }, + "html-escaper": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/html-escaper/-/html-escaper-2.0.2.tgz", + "integrity": "sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==", + "dev": true + }, + "human-signals": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/human-signals/-/human-signals-2.1.0.tgz", + "integrity": "sha512-B4FFZ6q/T2jhhksgkbEW3HBvWIfDW85snkQgawt07S7J5QXTk6BkNV+0yAeZrM5QpMAdYlocGoljn0sJ/WQkFw==", + "dev": true + }, + "ignore": { + "version": "4.0.6", + "resolved": "https://registry.npmjs.org/ignore/-/ignore-4.0.6.tgz", + "integrity": "sha512-cyFDKrqc/YdcWFniJhzI42+AzS+gNwmUzOSFcRCQYwySuBBBy/KjuxWLZ/FHEH6Moq1NizMOBWyTcv8O4OZIMg==", + "dev": true + }, + "import-fresh": { + "version": "3.3.0", + "resolved": "https://registry.npmjs.org/import-fresh/-/import-fresh-3.3.0.tgz", + "integrity": "sha512-veYYhQa+D1QBKznvhUHxb8faxlrwUnxseDAbAp457E0wLNio2bOSKnjYDhMj+YiAq61xrMGhQk9iXVk5FzgQMw==", + "dev": true, + "requires": { + "parent-module": "^1.0.0", + "resolve-from": "^4.0.0" + } + }, + "import-local": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/import-local/-/import-local-3.2.0.tgz", + "integrity": "sha512-2SPlun1JUPWoM6t3F0dw0FkCF/jWY8kttcY4f599GLTSjh2OCuuhdTkJQsEcZzBqbXZGKMK2OqW1oZsjtf/gQA==", + "dev": true, + "requires": { + "pkg-dir": "^4.2.0", + "resolve-cwd": "^3.0.0" + } + }, + "imurmurhash": { + "version": "0.1.4", + "resolved": "https://registry.npmjs.org/imurmurhash/-/imurmurhash-0.1.4.tgz", + "integrity": "sha512-JmXMZ6wuvDmLiHEml9ykzqO6lwFbof0GG4IkcGaENdCRDDmMVnny7s5HsIgHCbaq0w2MyPhDqkhTUgS2LU2PHA==", + "dev": true + }, + "inflight": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz", + "integrity": "sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==", + "dev": true, + "requires": { + "once": "^1.3.0", + "wrappy": "1" + } + }, + "inherits": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", + "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==" + }, + "is-arrayish": { + "version": "0.2.1", + "resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.2.1.tgz", + "integrity": "sha512-zz06S8t0ozoDXMG+ube26zeCTNXcKIPJZJi8hBrF4idCLms4CG9QtK7qBl1boi5ODzFpjswb5JPmHCbMpjaYzg==", + "dev": true + }, + "is-core-module": { + "version": "2.15.1", + "resolved": "https://registry.npmjs.org/is-core-module/-/is-core-module-2.15.1.tgz", + "integrity": "sha512-z0vtXSwucUJtANQWldhbtbt7BnL0vxiFjIdDLAatwhDYty2bad6s+rijD6Ri4YuYJubLzIJLUidCh09e1djEVQ==", + "dev": true, + "requires": { + "hasown": "^2.0.2" + } + }, + "is-extglob": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", + "integrity": "sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ==", + "dev": true + }, + "is-fullwidth-code-point": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", + "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", + "dev": true + }, + "is-generator-fn": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/is-generator-fn/-/is-generator-fn-2.1.0.tgz", + "integrity": "sha512-cTIB4yPYL/Grw0EaSzASzg6bBy9gqCofvWN8okThAYIxKJZC+udlRAmGbM0XLeniEJSs8uEgHPGuHSe1XsOLSQ==", + "dev": true + }, + "is-glob": { + "version": "4.0.3", + "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.3.tgz", + "integrity": "sha512-xelSayHH36ZgE7ZWhli7pW34hNbNl8Ojv5KVmkJD4hBdD3th8Tfk9vYasLM+mXWOZhFkgZfxhLSnrwRr4elSSg==", + "dev": true, + "requires": { + "is-extglob": "^2.1.1" + } + }, + "is-number": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz", + "integrity": "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==", + "dev": true + }, + "is-stream": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/is-stream/-/is-stream-2.0.1.tgz", + "integrity": "sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg==", + "dev": true + }, + "isexe": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", + "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==", + "dev": true + }, + "istanbul-lib-coverage": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/istanbul-lib-coverage/-/istanbul-lib-coverage-3.2.2.tgz", + "integrity": "sha512-O8dpsF+r0WV/8MNRKfnmrtCWhuKjxrq2w+jpzBL5UZKTi2LeVWnWOmWRxFlesJONmc+wLAGvKQZEOanko0LFTg==", + "dev": true + }, + "istanbul-lib-instrument": { + "version": "6.0.3", + "resolved": "https://registry.npmjs.org/istanbul-lib-instrument/-/istanbul-lib-instrument-6.0.3.tgz", + "integrity": "sha512-Vtgk7L/R2JHyyGW07spoFlB8/lpjiOLTjMdms6AFMraYt3BaJauod/NGrfnVG/y4Ix1JEuMRPDPEj2ua+zz1/Q==", + "dev": true, + "requires": { + "@babel/core": "^7.23.9", + "@babel/parser": "^7.23.9", + "@istanbuljs/schema": "^0.1.3", + "istanbul-lib-coverage": "^3.2.0", + "semver": "^7.5.4" + } + }, + "istanbul-lib-report": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/istanbul-lib-report/-/istanbul-lib-report-3.0.1.tgz", + "integrity": "sha512-GCfE1mtsHGOELCU8e/Z7YWzpmybrx/+dSTfLrvY8qRmaY6zXTKWn6WQIjaAFw069icm6GVMNkgu0NzI4iPZUNw==", + "dev": true, + "requires": { + "istanbul-lib-coverage": "^3.0.0", + "make-dir": "^4.0.0", + "supports-color": "^7.1.0" + }, + "dependencies": { + "has-flag": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", + "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==", + "dev": true + }, + "supports-color": { + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", + "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==", + "dev": true, + "requires": { + "has-flag": "^4.0.0" + } + } + } + }, + "istanbul-lib-source-maps": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/istanbul-lib-source-maps/-/istanbul-lib-source-maps-4.0.1.tgz", + "integrity": "sha512-n3s8EwkdFIJCG3BPKBYvskgXGoy88ARzvegkitk60NxRdwltLOTaH7CUiMRXvwYorl0Q712iEjcWB+fK/MrWVw==", + "dev": true, + "requires": { + "debug": "^4.1.1", + "istanbul-lib-coverage": "^3.0.0", + "source-map": "^0.6.1" + } + }, + "istanbul-reports": { + "version": "3.1.7", + "resolved": "https://registry.npmjs.org/istanbul-reports/-/istanbul-reports-3.1.7.tgz", + "integrity": "sha512-BewmUXImeuRk2YY0PVbxgKAysvhRPUQE0h5QRM++nVWyubKGV0l8qQ5op8+B2DOmwSe63Jivj0BjkPQVf8fP5g==", + "dev": true, + "requires": { + "html-escaper": "^2.0.0", + "istanbul-lib-report": "^3.0.0" + } + }, + "jest": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest/-/jest-29.7.0.tgz", + "integrity": "sha512-NIy3oAFp9shda19hy4HK0HRTWKtPJmGdnvywu01nOqNC2vZg+Z+fvJDxpMQA88eb2I9EcafcdjYgsDthnYTvGw==", + "dev": true, + "requires": { + "@jest/core": "^29.7.0", + "@jest/types": "^29.6.3", + "import-local": "^3.0.2", + "jest-cli": "^29.7.0" + } + }, + "jest-changed-files": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-changed-files/-/jest-changed-files-29.7.0.tgz", + "integrity": "sha512-fEArFiwf1BpQ+4bXSprcDc3/x4HSzL4al2tozwVpDFpsxALjLYdyiIK4e5Vz66GQJIbXJ82+35PtysofptNX2w==", + "dev": true, + "requires": { + "execa": "^5.0.0", + "jest-util": "^29.7.0", + "p-limit": "^3.1.0" + }, + "dependencies": { + "p-limit": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-3.1.0.tgz", + "integrity": "sha512-TYOanM3wGwNGsZN2cVTYPArw454xnXj5qmWF1bEoAc4+cU/ol7GVh7odevjp1FNHduHc3KZMcFduxU5Xc6uJRQ==", + "dev": true, + "requires": { + "yocto-queue": "^0.1.0" + } + } + } + }, + "jest-circus": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-circus/-/jest-circus-29.7.0.tgz", + "integrity": "sha512-3E1nCMgipcTkCocFwM90XXQab9bS+GMsjdpmPrlelaxwD93Ad8iVEjX/vvHPdLPnFf+L40u+5+iutRdA1N9myw==", + "dev": true, + "requires": { + "@jest/environment": "^29.7.0", + "@jest/expect": "^29.7.0", + "@jest/test-result": "^29.7.0", + "@jest/types": "^29.6.3", + "@types/node": "*", + "chalk": "^4.0.0", + "co": "^4.6.0", + "dedent": "^1.0.0", + "is-generator-fn": "^2.0.0", + "jest-each": "^29.7.0", + "jest-matcher-utils": "^29.7.0", + "jest-message-util": "^29.7.0", + "jest-runtime": "^29.7.0", + "jest-snapshot": "^29.7.0", + "jest-util": "^29.7.0", + "p-limit": "^3.1.0", + "pretty-format": "^29.7.0", + "pure-rand": "^6.0.0", + "slash": "^3.0.0", + "stack-utils": "^2.0.3" + }, + "dependencies": { + "p-limit": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-3.1.0.tgz", + "integrity": "sha512-TYOanM3wGwNGsZN2cVTYPArw454xnXj5qmWF1bEoAc4+cU/ol7GVh7odevjp1FNHduHc3KZMcFduxU5Xc6uJRQ==", + "dev": true, + "requires": { + "yocto-queue": "^0.1.0" + } + } + } + }, + "jest-cli": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-cli/-/jest-cli-29.7.0.tgz", + "integrity": "sha512-OVVobw2IubN/GSYsxETi+gOe7Ka59EFMR/twOU3Jb2GnKKeMGJB5SGUUrEz3SFVmJASUdZUzy83sLNNQ2gZslg==", + "dev": true, + "requires": { + "@jest/core": "^29.7.0", + "@jest/test-result": "^29.7.0", + "@jest/types": "^29.6.3", + "chalk": "^4.0.0", + "create-jest": "^29.7.0", + "exit": "^0.1.2", + "import-local": "^3.0.2", + "jest-config": "^29.7.0", + "jest-util": "^29.7.0", + "jest-validate": "^29.7.0", + "yargs": "^17.3.1" + } + }, + "jest-config": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-config/-/jest-config-29.7.0.tgz", + "integrity": "sha512-uXbpfeQ7R6TZBqI3/TxCU4q4ttk3u0PJeC+E0zbfSoSjq6bJ7buBPxzQPL0ifrkY4DNu4JUdk0ImlBUYi840eQ==", + "dev": true, + "requires": { + "@babel/core": "^7.11.6", + "@jest/test-sequencer": "^29.7.0", + "@jest/types": "^29.6.3", + "babel-jest": "^29.7.0", + "chalk": "^4.0.0", + "ci-info": "^3.2.0", + "deepmerge": "^4.2.2", + "glob": "^7.1.3", + "graceful-fs": "^4.2.9", + "jest-circus": "^29.7.0", + "jest-environment-node": "^29.7.0", + "jest-get-type": "^29.6.3", + "jest-regex-util": "^29.6.3", + "jest-resolve": "^29.7.0", + "jest-runner": "^29.7.0", + "jest-util": "^29.7.0", + "jest-validate": "^29.7.0", + "micromatch": "^4.0.4", + "parse-json": "^5.2.0", + "pretty-format": "^29.7.0", + "slash": "^3.0.0", + "strip-json-comments": "^3.1.1" + } + }, + "jest-diff": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-diff/-/jest-diff-29.7.0.tgz", + "integrity": "sha512-LMIgiIrhigmPrs03JHpxUh2yISK3vLFPkAodPeo0+BuF7wA2FoQbkEg1u8gBYBThncu7e1oEDUfIXVuTqLRUjw==", + "dev": true, + "requires": { + "chalk": "^4.0.0", + "diff-sequences": "^29.6.3", + "jest-get-type": "^29.6.3", + "pretty-format": "^29.7.0" + } + }, + "jest-docblock": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-docblock/-/jest-docblock-29.7.0.tgz", + "integrity": "sha512-q617Auw3A612guyaFgsbFeYpNP5t2aoUNLwBUbc/0kD1R4t9ixDbyFTHd1nok4epoVFpr7PmeWHrhvuV3XaJ4g==", + "dev": true, + "requires": { + "detect-newline": "^3.0.0" + } + }, + "jest-each": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-each/-/jest-each-29.7.0.tgz", + "integrity": "sha512-gns+Er14+ZrEoC5fhOfYCY1LOHHr0TI+rQUHZS8Ttw2l7gl+80eHc/gFf2Ktkw0+SIACDTeWvpFcv3B04VembQ==", + "dev": true, + "requires": { + "@jest/types": "^29.6.3", + "chalk": "^4.0.0", + "jest-get-type": "^29.6.3", + "jest-util": "^29.7.0", + "pretty-format": "^29.7.0" + } + }, + "jest-environment-node": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-environment-node/-/jest-environment-node-29.7.0.tgz", + "integrity": "sha512-DOSwCRqXirTOyheM+4d5YZOrWcdu0LNZ87ewUoywbcb2XR4wKgqiG8vNeYwhjFMbEkfju7wx2GYH0P2gevGvFw==", + "dev": true, + "requires": { + "@jest/environment": "^29.7.0", + "@jest/fake-timers": "^29.7.0", + "@jest/types": "^29.6.3", + "@types/node": "*", + "jest-mock": "^29.7.0", + "jest-util": "^29.7.0" + } + }, + "jest-get-type": { + "version": "29.6.3", + "resolved": "https://registry.npmjs.org/jest-get-type/-/jest-get-type-29.6.3.tgz", + "integrity": "sha512-zrteXnqYxfQh7l5FHyL38jL39di8H8rHoecLH3JNxH3BwOrBsNeabdap5e0I23lD4HHI8W5VFBZqG4Eaq5LNcw==", + "dev": true + }, + "jest-haste-map": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-haste-map/-/jest-haste-map-29.7.0.tgz", + "integrity": "sha512-fP8u2pyfqx0K1rGn1R9pyE0/KTn+G7PxktWidOBTqFPLYX0b9ksaMFkhK5vrS3DVun09pckLdlx90QthlW7AmA==", + "dev": true, + "requires": { + "@jest/types": "^29.6.3", + "@types/graceful-fs": "^4.1.3", + "@types/node": "*", + "anymatch": "^3.0.3", + "fb-watchman": "^2.0.0", + "fsevents": "^2.3.2", + "graceful-fs": "^4.2.9", + "jest-regex-util": "^29.6.3", + "jest-util": "^29.7.0", + "jest-worker": "^29.7.0", + "micromatch": "^4.0.4", + "walker": "^1.0.8" + } + }, + "jest-leak-detector": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-leak-detector/-/jest-leak-detector-29.7.0.tgz", + "integrity": "sha512-kYA8IJcSYtST2BY9I+SMC32nDpBT3J2NvWJx8+JCuCdl/CR1I4EKUJROiP8XtCcxqgTTBGJNdbB1A8XRKbTetw==", + "dev": true, + "requires": { + "jest-get-type": "^29.6.3", + "pretty-format": "^29.7.0" + } + }, + "jest-matcher-utils": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-matcher-utils/-/jest-matcher-utils-29.7.0.tgz", + "integrity": "sha512-sBkD+Xi9DtcChsI3L3u0+N0opgPYnCRPtGcQYrgXmR+hmt/fYfWAL0xRXYU8eWOdfuLgBe0YCW3AFtnRLagq/g==", + "dev": true, + "requires": { + "chalk": "^4.0.0", + "jest-diff": "^29.7.0", + "jest-get-type": "^29.6.3", + "pretty-format": "^29.7.0" + } + }, + "jest-message-util": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-message-util/-/jest-message-util-29.7.0.tgz", + "integrity": "sha512-GBEV4GRADeP+qtB2+6u61stea8mGcOT4mCtrYISZwfu9/ISHFJ/5zOMXYbpBE9RsS5+Gb63DW4FgmnKJ79Kf6w==", + "dev": true, + "requires": { + "@babel/code-frame": "^7.12.13", + "@jest/types": "^29.6.3", + "@types/stack-utils": "^2.0.0", + "chalk": "^4.0.0", + "graceful-fs": "^4.2.9", + "micromatch": "^4.0.4", + "pretty-format": "^29.7.0", + "slash": "^3.0.0", + "stack-utils": "^2.0.3" + }, + "dependencies": { + "@babel/code-frame": { + "version": "7.25.7", + "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.25.7.tgz", + "integrity": "sha512-0xZJFNE5XMpENsgfHYTw8FbX4kv53mFLn2i3XPoq69LyhYSCBJtitaHx9QnsVTrsogI4Z3+HtEfZ2/GFPOtf5g==", + "dev": true, + "requires": { + "@babel/highlight": "^7.25.7", + "picocolors": "^1.0.0" + } + } + } + }, + "jest-mock": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-mock/-/jest-mock-29.7.0.tgz", + "integrity": "sha512-ITOMZn+UkYS4ZFh83xYAOzWStloNzJFO2s8DWrE4lhtGD+AorgnbkiKERe4wQVBydIGPx059g6riW5Btp6Llnw==", + "dev": true, + "requires": { + "@jest/types": "^29.6.3", + "@types/node": "*", + "jest-util": "^29.7.0" + } + }, + "jest-pnp-resolver": { + "version": "1.2.3", + "resolved": "https://registry.npmjs.org/jest-pnp-resolver/-/jest-pnp-resolver-1.2.3.tgz", + "integrity": "sha512-+3NpwQEnRoIBtx4fyhblQDPgJI0H1IEIkX7ShLUjPGA7TtUTvI1oiKi3SR4oBR0hQhQR80l4WAe5RrXBwWMA8w==", + "dev": true + }, + "jest-regex-util": { + "version": "29.6.3", + "resolved": "https://registry.npmjs.org/jest-regex-util/-/jest-regex-util-29.6.3.tgz", + "integrity": "sha512-KJJBsRCyyLNWCNBOvZyRDnAIfUiRJ8v+hOBQYGn8gDyF3UegwiP4gwRR3/SDa42g1YbVycTidUF3rKjyLFDWbg==", + "dev": true + }, + "jest-resolve": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-resolve/-/jest-resolve-29.7.0.tgz", + "integrity": "sha512-IOVhZSrg+UvVAshDSDtHyFCCBUl/Q3AAJv8iZ6ZjnZ74xzvwuzLXid9IIIPgTnY62SJjfuupMKZsZQRsCvxEgA==", + "dev": true, + "requires": { + "chalk": "^4.0.0", + "graceful-fs": "^4.2.9", + "jest-haste-map": "^29.7.0", + "jest-pnp-resolver": "^1.2.2", + "jest-util": "^29.7.0", + "jest-validate": "^29.7.0", + "resolve": "^1.20.0", + "resolve.exports": "^2.0.0", + "slash": "^3.0.0" + } + }, + "jest-resolve-dependencies": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-resolve-dependencies/-/jest-resolve-dependencies-29.7.0.tgz", + "integrity": "sha512-un0zD/6qxJ+S0et7WxeI3H5XSe9lTBBR7bOHCHXkKR6luG5mwDDlIzVQ0V5cZCuoTgEdcdwzTghYkTWfubi+nA==", + "dev": true, + "requires": { + "jest-regex-util": "^29.6.3", + "jest-snapshot": "^29.7.0" + } + }, + "jest-runner": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-runner/-/jest-runner-29.7.0.tgz", + "integrity": "sha512-fsc4N6cPCAahybGBfTRcq5wFR6fpLznMg47sY5aDpsoejOcVYFb07AHuSnR0liMcPTgBsA3ZJL6kFOjPdoNipQ==", + "dev": true, + "requires": { + "@jest/console": "^29.7.0", + "@jest/environment": "^29.7.0", + "@jest/test-result": "^29.7.0", + "@jest/transform": "^29.7.0", + "@jest/types": "^29.6.3", + "@types/node": "*", + "chalk": "^4.0.0", + "emittery": "^0.13.1", + "graceful-fs": "^4.2.9", + "jest-docblock": "^29.7.0", + "jest-environment-node": "^29.7.0", + "jest-haste-map": "^29.7.0", + "jest-leak-detector": "^29.7.0", + "jest-message-util": "^29.7.0", + "jest-resolve": "^29.7.0", + "jest-runtime": "^29.7.0", + "jest-util": "^29.7.0", + "jest-watcher": "^29.7.0", + "jest-worker": "^29.7.0", + "p-limit": "^3.1.0", + "source-map-support": "0.5.13" + }, + "dependencies": { + "p-limit": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-3.1.0.tgz", + "integrity": "sha512-TYOanM3wGwNGsZN2cVTYPArw454xnXj5qmWF1bEoAc4+cU/ol7GVh7odevjp1FNHduHc3KZMcFduxU5Xc6uJRQ==", + "dev": true, + "requires": { + "yocto-queue": "^0.1.0" + } + } + } + }, + "jest-runtime": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-runtime/-/jest-runtime-29.7.0.tgz", + "integrity": "sha512-gUnLjgwdGqW7B4LvOIkbKs9WGbn+QLqRQQ9juC6HndeDiezIwhDP+mhMwHWCEcfQ5RUXa6OPnFF8BJh5xegwwQ==", + "dev": true, + "requires": { + "@jest/environment": "^29.7.0", + "@jest/fake-timers": "^29.7.0", + "@jest/globals": "^29.7.0", + "@jest/source-map": "^29.6.3", + "@jest/test-result": "^29.7.0", + "@jest/transform": "^29.7.0", + "@jest/types": "^29.6.3", + "@types/node": "*", + "chalk": "^4.0.0", + "cjs-module-lexer": "^1.0.0", + "collect-v8-coverage": "^1.0.0", + "glob": "^7.1.3", + "graceful-fs": "^4.2.9", + "jest-haste-map": "^29.7.0", + "jest-message-util": "^29.7.0", + "jest-mock": "^29.7.0", + "jest-regex-util": "^29.6.3", + "jest-resolve": "^29.7.0", + "jest-snapshot": "^29.7.0", + "jest-util": "^29.7.0", + "slash": "^3.0.0", + "strip-bom": "^4.0.0" + } + }, + "jest-snapshot": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-snapshot/-/jest-snapshot-29.7.0.tgz", + "integrity": "sha512-Rm0BMWtxBcioHr1/OX5YCP8Uov4riHvKPknOGs804Zg9JGZgmIBkbtlxJC/7Z4msKYVbIJtfU+tKb8xlYNfdkw==", + "dev": true, + "requires": { + "@babel/core": "^7.11.6", + "@babel/generator": "^7.7.2", + "@babel/plugin-syntax-jsx": "^7.7.2", + "@babel/plugin-syntax-typescript": "^7.7.2", + "@babel/types": "^7.3.3", + "@jest/expect-utils": "^29.7.0", + "@jest/transform": "^29.7.0", + "@jest/types": "^29.6.3", + "babel-preset-current-node-syntax": "^1.0.0", + "chalk": "^4.0.0", + "expect": "^29.7.0", + "graceful-fs": "^4.2.9", + "jest-diff": "^29.7.0", + "jest-get-type": "^29.6.3", + "jest-matcher-utils": "^29.7.0", + "jest-message-util": "^29.7.0", + "jest-util": "^29.7.0", + "natural-compare": "^1.4.0", + "pretty-format": "^29.7.0", + "semver": "^7.5.3" + } + }, + "jest-util": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-util/-/jest-util-29.7.0.tgz", + "integrity": "sha512-z6EbKajIpqGKU56y5KBUgy1dt1ihhQJgWzUlZHArA/+X2ad7Cb5iF+AK1EWVL/Bo7Rz9uurpqw6SiBCefUbCGA==", + "dev": true, + "requires": { + "@jest/types": "^29.6.3", + "@types/node": "*", + "chalk": "^4.0.0", + "ci-info": "^3.2.0", + "graceful-fs": "^4.2.9", + "picomatch": "^2.2.3" + } + }, + "jest-validate": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-validate/-/jest-validate-29.7.0.tgz", + "integrity": "sha512-ZB7wHqaRGVw/9hST/OuFUReG7M8vKeq0/J2egIGLdvjHCmYqGARhzXmtgi+gVeZ5uXFF219aOc3Ls2yLg27tkw==", + "dev": true, + "requires": { + "@jest/types": "^29.6.3", + "camelcase": "^6.2.0", + "chalk": "^4.0.0", + "jest-get-type": "^29.6.3", + "leven": "^3.1.0", + "pretty-format": "^29.7.0" + }, + "dependencies": { + "camelcase": { + "version": "6.3.0", + "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-6.3.0.tgz", + "integrity": "sha512-Gmy6FhYlCY7uOElZUSbxo2UCDH8owEk996gkbrpsgGtrJLM3J7jGxl9Ic7Qwwj4ivOE5AWZWRMecDdF7hqGjFA==", + "dev": true + } + } + }, + "jest-watcher": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-watcher/-/jest-watcher-29.7.0.tgz", + "integrity": "sha512-49Fg7WXkU3Vl2h6LbLtMQ/HyB6rXSIX7SqvBLQmssRBGN9I0PNvPmAmCWSOY6SOvrjhI/F7/bGAv9RtnsPA03g==", + "dev": true, + "requires": { + "@jest/test-result": "^29.7.0", + "@jest/types": "^29.6.3", + "@types/node": "*", + "ansi-escapes": "^4.2.1", + "chalk": "^4.0.0", + "emittery": "^0.13.1", + "jest-util": "^29.7.0", + "string-length": "^4.0.1" + } + }, + "jest-worker": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest-worker/-/jest-worker-29.7.0.tgz", + "integrity": "sha512-eIz2msL/EzL9UFTFFx7jBTkeZfku0yUAyZZZmJ93H2TYEiroIx2PQjEXcwYtYl8zXCxb+PAmA2hLIt/6ZEkPHw==", + "dev": true, + "requires": { + "@types/node": "*", + "jest-util": "^29.7.0", + "merge-stream": "^2.0.0", + "supports-color": "^8.0.0" + }, + "dependencies": { + "has-flag": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", + "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==", + "dev": true + }, + "supports-color": { + "version": "8.1.1", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-8.1.1.tgz", + "integrity": "sha512-MpUEN2OodtUzxvKQl72cUF7RQ5EiHsGvSsVG0ia9c5RbWGL2CI4C7EpPS8UTBIplnlzZiNuV56w+FuNxy3ty2Q==", + "dev": true, + "requires": { + "has-flag": "^4.0.0" + } + } + } + }, + "js-tokens": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz", + "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==", + "dev": true + }, + "js-yaml": { + "version": "3.14.1", + "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-3.14.1.tgz", + "integrity": "sha512-okMH7OXXJ7YrN9Ok3/SXrnu4iX9yOk+25nqX4imS2npuvTYDmo/QEZoqwZkYaIDk3jVvBOTOIEgEhaLOynBS9g==", + "requires": { + "argparse": "^1.0.7", + "esprima": "^4.0.0" + }, + "dependencies": { + "argparse": { + "version": "1.0.10", + "resolved": "https://registry.npmjs.org/argparse/-/argparse-1.0.10.tgz", + "integrity": "sha512-o5Roy6tNG4SL/FOkCAN6RzjiakZS25RLYFrcMttJqbdd8BWrnA+fGz57iN5Pb06pvBGvl5gQ0B48dJlslXvoTg==", + "requires": { + "sprintf-js": "~1.0.2" + } + } + } + }, + "jsesc": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/jsesc/-/jsesc-3.0.2.tgz", + "integrity": "sha512-xKqzzWXDttJuOcawBt4KnKHHIf5oQ/Cxax+0PWFG+DFDgHNAdi+TXECADI+RYiFUMmx8792xsMbbgXj4CwnP4g==", + "dev": true + }, + "json-buffer": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/json-buffer/-/json-buffer-3.0.1.tgz", + "integrity": "sha512-4bV5BfR2mqfQTJm+V5tPPdf+ZpuhiIvTuAB5g8kcrXOZpTT/QwwVRWBywX1ozr6lEuPdbHxwaJlm9G6mI2sfSQ==", + "dev": true + }, + "json-crawl": { + "version": "0.5.3", + "resolved": "https://registry.npmjs.org/json-crawl/-/json-crawl-0.5.3.tgz", + "integrity": "sha512-BEjjCw8c7SxzNK4orhlWD5cXQh8vCk2LqDr4WgQq4CV+5dvopeYwt1Tskg67SuSLKvoFH5g0yuYtg7rcfKV6YA==" + }, + "json-diff": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/json-diff/-/json-diff-1.0.6.tgz", + "integrity": "sha512-tcFIPRdlc35YkYdGxcamJjllUhXWv4n2rK9oJ2RsAzV4FBkuV4ojKEDgcZ+kpKxDmJKv+PFK65+1tVVOnSeEqA==", + "requires": { + "@ewoudenberg/difflib": "0.1.0", + "colors": "^1.4.0", + "dreamopt": "~0.8.0" + } + }, + "json-parse-even-better-errors": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz", + "integrity": "sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w==", + "dev": true + }, + "json-schema-traverse": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz", + "integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==" + }, + "json-stable-stringify-without-jsonify": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/json-stable-stringify-without-jsonify/-/json-stable-stringify-without-jsonify-1.0.1.tgz", + "integrity": "sha512-Bdboy+l7tA3OGW6FjyFHWkP5LuByj1Tk33Ljyq0axyzdk9//JSi2u3fP1QSmd1KNwq6VOKYGlAu87CisVir6Pw==", + "dev": true + }, + "json5": { + "version": "2.2.3", + "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.3.tgz", + "integrity": "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==", + "dev": true + }, + "keyv": { + "version": "4.5.4", + "resolved": "https://registry.npmjs.org/keyv/-/keyv-4.5.4.tgz", + "integrity": "sha512-oxVHkHR/EJf2CNXnWxRLW6mg7JyCCUcG0DtEGmL2ctUo1PNTin1PUil+r/+4r5MpVgC/fn1kjsx7mjSujKqIpw==", + "dev": true, + "requires": { + "json-buffer": "3.0.1" + } + }, + "kleur": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/kleur/-/kleur-3.0.3.tgz", + "integrity": "sha512-eTIzlVOSUR+JxdDFepEYcBMtZ9Qqdef+rnzWdRZuMbOywu5tO2w2N7rqjoANZ5k9vywhL6Br1VRjUIgTQx4E8w==", + "dev": true + }, + "leven": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/leven/-/leven-3.1.0.tgz", + "integrity": "sha512-qsda+H8jTaUaN/x5vzW2rzc+8Rw4TAQ/4KjB46IwK5VH+IlVeeeje/EoZRpiXvIqjFgK84QffqPztGI3VBLG1A==", + "dev": true + }, + "levn": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/levn/-/levn-0.4.1.tgz", + "integrity": "sha512-+bT2uH4E5LGE7h/n3evcS/sQlJXCpIp6ym8OWJ5eV6+67Dsql/LaaT7qJBAt2rzfoa/5QBGBhxDix1dMt2kQKQ==", + "dev": true, + "requires": { + "prelude-ls": "^1.2.1", + "type-check": "~0.4.0" + } + }, + "lines-and-columns": { + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/lines-and-columns/-/lines-and-columns-1.2.4.tgz", + "integrity": "sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==", + "dev": true + }, + "locate-path": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-5.0.0.tgz", + "integrity": "sha512-t7hw9pI+WvuwNJXwk5zVHpyhIqzg2qTlklJOf0mVxGSbe3Fp2VieZcduNYjaLDoy6p9uGpQEGWG87WpMKlNq8g==", + "dev": true, + "requires": { + "p-locate": "^4.1.0" + } + }, + "lodash.clonedeep": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/lodash.clonedeep/-/lodash.clonedeep-4.5.0.tgz", + "integrity": "sha512-H5ZhCF25riFd9uB5UCkVKo61m3S/xZk1x4wA6yp/L3RFP6Z/eHH1ymQcGLo7J3GMPfm0V/7m1tryHuGVxpqEBQ==" + }, + "lodash.merge": { + "version": "4.6.2", + "resolved": "https://registry.npmjs.org/lodash.merge/-/lodash.merge-4.6.2.tgz", + "integrity": "sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ==", + "dev": true + }, + "lodash.truncate": { + "version": "4.4.2", + "resolved": "https://registry.npmjs.org/lodash.truncate/-/lodash.truncate-4.4.2.tgz", + "integrity": "sha512-jttmRe7bRse52OsWIMDLaXxWqRAmtIUccAQ3garviCqJjafXOfNMO0yMfNpdD6zbGaTU0P5Nz7e7gAT6cKmJRw==", + "dev": true + }, + "lru-cache": { + "version": "5.1.1", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-5.1.1.tgz", + "integrity": "sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w==", + "dev": true, + "requires": { + "yallist": "^3.0.2" + } + }, + "make-dir": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/make-dir/-/make-dir-4.0.0.tgz", + "integrity": "sha512-hXdUTZYIVOt1Ex//jAQi+wTZZpUpwBj/0QsOzqegb3rGMMeJiSEu5xLHnYfBrRV4RH2+OCSOO95Is/7x1WJ4bw==", + "dev": true, + "requires": { + "semver": "^7.5.3" + } + }, + "makeerror": { + "version": "1.0.12", + "resolved": "https://registry.npmjs.org/makeerror/-/makeerror-1.0.12.tgz", + "integrity": "sha512-JmqCvUhmt43madlpFzG4BQzG2Z3m6tvQDNKdClZnO3VbIudJYmxsT0FNJMeiB2+JTSlTQTSbU8QdesVmwJcmLg==", + "dev": true, + "requires": { + "tmpl": "1.0.5" + } + }, + "merge-stream": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/merge-stream/-/merge-stream-2.0.0.tgz", + "integrity": "sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w==", + "dev": true + }, + "micromatch": { + "version": "4.0.8", + "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.8.tgz", + "integrity": "sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA==", + "dev": true, + "requires": { + "braces": "^3.0.3", + "picomatch": "^2.3.1" + } + }, + "mimic-fn": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/mimic-fn/-/mimic-fn-2.1.0.tgz", + "integrity": "sha512-OqbOk5oEQeAZ8WXWydlu9HJjz9WVdEIvamMCcXmuqUYjTknH/sqsWvhQ3vgwKFRR1HpjvNBKQ37nbJgYzGqGcg==", + "dev": true + }, + "minimatch": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", + "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", + "dev": true, + "requires": { + "brace-expansion": "^1.1.7" + } + }, + "minimist": { + "version": "1.2.8", + "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz", + "integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==" + }, + "ms": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", + "dev": true + }, + "natural-compare": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/natural-compare/-/natural-compare-1.4.0.tgz", + "integrity": "sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw==", + "dev": true + }, + "node-int64": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/node-int64/-/node-int64-0.4.0.tgz", + "integrity": "sha512-O5lz91xSOeoXP6DulyHfllpq+Eg00MWitZIbtPfoSEvqIHdl5gfcY6hYzDWnj0qD5tz52PI08u9qUvSVeUBeHw==", + "dev": true + }, + "node-releases": { + "version": "2.0.18", + "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.18.tgz", + "integrity": "sha512-d9VeXT4SJ7ZeOqGX6R5EM022wpL+eWPooLI+5UpWn2jCT1aosUQEhQP214x33Wkwx3JQMvIm+tIoVOdodFS40g==", + "dev": true + }, + "normalize-path": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-3.0.0.tgz", + "integrity": "sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==", + "dev": true + }, + "npm-run-path": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/npm-run-path/-/npm-run-path-4.0.1.tgz", + "integrity": "sha512-S48WzZW777zhNIrn7gxOlISNAqi9ZC/uQFnRdbeIHhZhCA6UqpkOT8T1G7BvfdgP4Er8gF4sUbaS0i7QvIfCWw==", + "dev": true, + "requires": { + "path-key": "^3.0.0" + } + }, + "once": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", + "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==", + "dev": true, + "requires": { + "wrappy": "1" + } + }, + "onetime": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/onetime/-/onetime-5.1.2.tgz", + "integrity": "sha512-kbpaSSGJTWdAY5KPVeMOKXSrPtr8C8C7wodJbcsd51jRnmD+GZu8Y0VoU6Dm5Z4vWr0Ig/1NKuWRKf7j5aaYSg==", + "dev": true, + "requires": { + "mimic-fn": "^2.1.0" + } + }, + "optionator": { + "version": "0.9.4", + "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.4.tgz", + "integrity": "sha512-6IpQ7mKUxRcZNLIObR0hz7lxsapSSIYNZJwXPGeF0mTVqGKFIXj1DQcMoT22S3ROcLyY/rz0PWaWZ9ayWmad9g==", + "dev": true, + "requires": { + "deep-is": "^0.1.3", + "fast-levenshtein": "^2.0.6", + "levn": "^0.4.1", + "prelude-ls": "^1.2.1", + "type-check": "^0.4.0", + "word-wrap": "^1.2.5" + } + }, + "p-limit": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-2.3.0.tgz", + "integrity": "sha512-//88mFWSJx8lxCzwdAABTJL2MyWB12+eIY7MDL2SqLmAkeKU9qxRvWuSyTjm3FUmpBEMuFfckAIqEaVGUDxb6w==", + "dev": true, + "requires": { + "p-try": "^2.0.0" + } + }, + "p-locate": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/p-locate/-/p-locate-4.1.0.tgz", + "integrity": "sha512-R79ZZ/0wAxKGu3oYMlz8jy/kbhsNrS7SKZ7PxEHBgJ5+F2mtFW2fK2cOtBh1cHYkQsbzFV7I+EoRKe6Yt0oK7A==", + "dev": true, + "requires": { + "p-limit": "^2.2.0" + } + }, + "p-try": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/p-try/-/p-try-2.2.0.tgz", + "integrity": "sha512-R4nPAVTAU0B9D35/Gk3uJf/7XYbQcyohSKdvAxIRSNghFl4e71hVoGnBNQz9cWaXxO2I10KTC+3jMdvvoKw6dQ==", + "dev": true + }, + "parent-module": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/parent-module/-/parent-module-1.0.1.tgz", + "integrity": "sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g==", + "dev": true, + "requires": { + "callsites": "^3.0.0" + } + }, + "parse-json": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/parse-json/-/parse-json-5.2.0.tgz", + "integrity": "sha512-ayCKvm/phCGxOkYRSCM82iDwct8/EonSEgCSxWxD7ve6jHggsFl4fZVQBPRNgQoKiuV/odhFrGzQXZwbifC8Rg==", + "dev": true, + "requires": { + "@babel/code-frame": "^7.0.0", + "error-ex": "^1.3.1", + "json-parse-even-better-errors": "^2.3.0", + "lines-and-columns": "^1.1.6" + } + }, + "path-exists": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz", + "integrity": "sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w==", + "dev": true + }, + "path-is-absolute": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz", + "integrity": "sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==", + "dev": true + }, + "path-key": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", + "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==", + "dev": true + }, + "path-parse": { + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/path-parse/-/path-parse-1.0.7.tgz", + "integrity": "sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==", + "dev": true + }, + "picocolors": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz", + "integrity": "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==", + "dev": true + }, + "picomatch": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz", + "integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==", + "dev": true + }, + "pirates": { + "version": "4.0.6", + "resolved": "https://registry.npmjs.org/pirates/-/pirates-4.0.6.tgz", + "integrity": "sha512-saLsH7WeYYPiD25LDuLRRY/i+6HaPYr6G1OUlN39otzkSTxKnubR9RTxS3/Kk50s1g2JTgFwWQDQyplC5/SHZg==", + "dev": true + }, + "pkg-dir": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/pkg-dir/-/pkg-dir-4.2.0.tgz", + "integrity": "sha512-HRDzbaKjC+AOWVXxAU/x54COGeIv9eb+6CkDSQoNTt4XyWoIJvuPsXizxu/Fr23EiekbtZwmh1IcIG/l/a10GQ==", + "dev": true, + "requires": { + "find-up": "^4.0.0" + } + }, + "prelude-ls": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.2.1.tgz", + "integrity": "sha512-vkcDPrRZo1QZLbn5RLGPpg/WmIQ65qoWWhcGKf/b5eplkkarX0m9z8ppCat4mlOqUsWpyNuYgO3VRyrYHSzX5g==", + "dev": true + }, + "pretty-format": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/pretty-format/-/pretty-format-29.7.0.tgz", + "integrity": "sha512-Pdlw/oPxN+aXdmM9R00JVC9WVFoCLTKJvDVLgmJ+qAffBMxsV85l/Lu7sNx4zSzPyoL2euImuEwHhOXdEgNFZQ==", + "dev": true, + "requires": { + "@jest/schemas": "^29.6.3", + "ansi-styles": "^5.0.0", + "react-is": "^18.0.0" + }, + "dependencies": { + "ansi-styles": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz", + "integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==", + "dev": true + } + } + }, + "progress": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/progress/-/progress-2.0.3.tgz", + "integrity": "sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==", + "dev": true + }, + "prompts": { + "version": "2.4.2", + "resolved": "https://registry.npmjs.org/prompts/-/prompts-2.4.2.tgz", + "integrity": "sha512-NxNv/kLguCA7p3jE8oL2aEBsrJWgAakBpgmgK6lpPWV+WuOmY6r2/zbAVnP+T8bQlA0nzHXSJSJW0Hq7ylaD2Q==", + "dev": true, + "requires": { + "kleur": "^3.0.3", + "sisteransi": "^1.0.5" + } + }, + "punycode": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz", + "integrity": "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==", + "dev": true + }, + "pure-rand": { + "version": "6.1.0", + "resolved": "https://registry.npmjs.org/pure-rand/-/pure-rand-6.1.0.tgz", + "integrity": "sha512-bVWawvoZoBYpp6yIoQtQXHZjmz35RSVHnUOTefl8Vcjr8snTPY1wnpSPMWekcFwbxI6gtmT7rSYPFvz71ldiOA==", + "dev": true + }, + "react-is": { + "version": "18.3.1", + "resolved": "https://registry.npmjs.org/react-is/-/react-is-18.3.1.tgz", + "integrity": "sha512-/LLMVyas0ljjAtoYiPqYiL8VWXzUUdThrmU5+n20DZv+a+ClRoevUzw5JxU+Ieh5/c87ytoTBV9G1FiKfNJdmg==", + "dev": true + }, + "readable-stream": { + "version": "3.6.2", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz", + "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==", + "requires": { + "inherits": "^2.0.3", + "string_decoder": "^1.1.1", + "util-deprecate": "^1.0.1" + } + }, + "regexpp": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/regexpp/-/regexpp-3.2.0.tgz", + "integrity": "sha512-pq2bWo9mVD43nbts2wGv17XLiNLya+GklZ8kaDLV2Z08gDCsGpnKn9BFMepvWuHCbyVvY7J5o5+BVvoQbmlJLg==", + "dev": true + }, + "require-directory": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", + "integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==", + "dev": true + }, + "require-from-string": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/require-from-string/-/require-from-string-2.0.2.tgz", + "integrity": "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==" + }, + "resolve": { + "version": "1.22.8", + "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.22.8.tgz", + "integrity": "sha512-oKWePCxqpd6FlLvGV1VU0x7bkPmmCNolxzjMf4NczoDnQcIWrAF+cPtZn5i6n+RfD2d9i0tzpKnG6Yk168yIyw==", + "dev": true, + "requires": { + "is-core-module": "^2.13.0", + "path-parse": "^1.0.7", + "supports-preserve-symlinks-flag": "^1.0.0" + } + }, + "resolve-cwd": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/resolve-cwd/-/resolve-cwd-3.0.0.tgz", + "integrity": "sha512-OrZaX2Mb+rJCpH/6CpSqt9xFVpN++x01XnN2ie9g6P5/3xelLAkXWVADpdz1IHD/KFfEXyE6V0U01OQ3UO2rEg==", + "dev": true, + "requires": { + "resolve-from": "^5.0.0" + }, + "dependencies": { + "resolve-from": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-5.0.0.tgz", + "integrity": "sha512-qYg9KP24dD5qka9J47d0aVky0N+b4fTU89LN9iDnjB5waksiC49rvMB0PrUJQGoTmH50XPiqOvAjDfaijGxYZw==", + "dev": true + } + } + }, + "resolve-from": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-4.0.0.tgz", + "integrity": "sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g==", + "dev": true + }, + "resolve.exports": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/resolve.exports/-/resolve.exports-2.0.2.tgz", + "integrity": "sha512-X2UW6Nw3n/aMgDVy+0rSqgHlv39WZAlZrXCdnbyEiKm17DSqHX4MmQMaST3FbeWR5FTuRcUwYAziZajji0Y7mg==", + "dev": true + }, + "rimraf": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz", + "integrity": "sha512-JZkJMZkAGFFPP2YqXZXPbMlMBgsxzE8ILs4lMIX/2o0L9UBw9O/Y3o6wFw/i9YLapcUJWwqbi3kdxIPdC62TIA==", + "dev": true, + "requires": { + "glob": "^7.1.3" + } + }, + "safe-buffer": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", + "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==" + }, + "semver": { + "version": "7.6.3", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.6.3.tgz", + "integrity": "sha512-oVekP1cKtI+CTDvHWYFUcMtsK/00wmAEfyqKfNdARm8u1wNVhSgaX7A8d4UuIlUI5e84iEwOhs7ZPYRmzU9U6A==", + "dev": true + }, + "shebang-command": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", + "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", + "dev": true, + "requires": { + "shebang-regex": "^3.0.0" + } + }, + "shebang-regex": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz", + "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==", + "dev": true + }, + "signal-exit": { + "version": "3.0.7", + "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-3.0.7.tgz", + "integrity": "sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==", + "dev": true + }, + "sisteransi": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/sisteransi/-/sisteransi-1.0.5.tgz", + "integrity": "sha512-bLGGlR1QxBcynn2d5YmDX4MGjlZvy2MRBDRNHLJ8VI6l6+9FUiyTFNJ0IveOSP0bcXgVDPRcfGqA0pjaqUpfVg==", + "dev": true + }, + "slash": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/slash/-/slash-3.0.0.tgz", + "integrity": "sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q==", + "dev": true + }, + "slice-ansi": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/slice-ansi/-/slice-ansi-4.0.0.tgz", + "integrity": "sha512-qMCMfhY040cVHT43K9BFygqYbUPFZKHOg7K73mtTWJRb8pyP3fzf4Ixd5SzdEJQ6MRUg/WBnOLxghZtKKurENQ==", + "dev": true, + "requires": { + "ansi-styles": "^4.0.0", + "astral-regex": "^2.0.0", + "is-fullwidth-code-point": "^3.0.0" + }, + "dependencies": { + "ansi-styles": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", + "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "dev": true, + "requires": { + "color-convert": "^2.0.1" + } + }, + "color-convert": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", + "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", + "dev": true, + "requires": { + "color-name": "~1.1.4" + } + }, + "color-name": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", + "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", + "dev": true + } + } + }, + "source-map": { + "version": "0.6.1", + "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", + "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==", + "dev": true + }, + "source-map-support": { + "version": "0.5.13", + "resolved": "https://registry.npmjs.org/source-map-support/-/source-map-support-0.5.13.tgz", + "integrity": "sha512-SHSKFHadjVA5oR4PPqhtAVdcBWwRYVd6g6cAXnIbRiIwc2EhPrTuKUBdSLvlEKyIP3GCf89fltvcZiP9MMFA1w==", + "dev": true, + "requires": { + "buffer-from": "^1.0.0", + "source-map": "^0.6.0" + } + }, + "sprintf-js": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.0.3.tgz", + "integrity": "sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g==" + }, + "stack-utils": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/stack-utils/-/stack-utils-2.0.6.tgz", + "integrity": "sha512-XlkWvfIm6RmsWtNJx+uqtKLS8eqFbxUg0ZzLXqY0caEy9l7hruX8IpiDnjsLavoBgqCCR71TqWO8MaXYheJ3RQ==", + "dev": true, + "requires": { + "escape-string-regexp": "^2.0.0" + }, + "dependencies": { + "escape-string-regexp": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-2.0.0.tgz", + "integrity": "sha512-UpzcLCXolUWcNu5HtVMHYdXJjArjsF9C0aNnquZYY4uW/Vu0miy5YoWvbV345HauVvcAUnpRuhMMcqTcGOY2+w==", + "dev": true + } + } + }, + "string-length": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/string-length/-/string-length-4.0.2.tgz", + "integrity": "sha512-+l6rNN5fYHNhZZy41RXsYptCjA2Igmq4EG7kZAYFQI1E1VTXarr6ZPXBg6eq7Y6eK4FEhY6AJlyuFIb/v/S0VQ==", + "dev": true, + "requires": { + "char-regex": "^1.0.2", + "strip-ansi": "^6.0.0" + } + }, + "string-width": { + "version": "4.2.3", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", + "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", + "dev": true, + "requires": { + "emoji-regex": "^8.0.0", + "is-fullwidth-code-point": "^3.0.0", + "strip-ansi": "^6.0.1" + } + }, + "string_decoder": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz", + "integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==", + "requires": { + "safe-buffer": "~5.2.0" + } + }, + "strip-ansi": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", + "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", + "dev": true, + "requires": { + "ansi-regex": "^5.0.1" + } + }, + "strip-bom": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/strip-bom/-/strip-bom-4.0.0.tgz", + "integrity": "sha512-3xurFv5tEgii33Zi8Jtp55wEIILR9eh34FAW00PZf+JnSsTmV/ioewSgQl97JHvgjoRGwPShsWm+IdrxB35d0w==", + "dev": true + }, + "strip-final-newline": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/strip-final-newline/-/strip-final-newline-2.0.0.tgz", + "integrity": "sha512-BrpvfNAE3dcvq7ll3xVumzjKjZQ5tI1sEUIKr3Uoks0XUl45St3FlatVqef9prk4jRDzhW6WZg+3bk93y6pLjA==", + "dev": true + }, + "strip-json-comments": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-3.1.1.tgz", + "integrity": "sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==", + "dev": true + }, + "supports-color": { + "version": "5.5.0", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-5.5.0.tgz", + "integrity": "sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==", + "dev": true, + "requires": { + "has-flag": "^3.0.0" + } + }, + "supports-preserve-symlinks-flag": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/supports-preserve-symlinks-flag/-/supports-preserve-symlinks-flag-1.0.0.tgz", + "integrity": "sha512-ot0WnXS9fgdkgIcePe6RHNk1WA8+muPa6cSjeR3V8K27q9BB1rTE3R1p7Hv0z1ZyAc8s6Vvv8DIyWf681MAt0w==", + "dev": true + }, + "table": { + "version": "6.8.2", + "resolved": "https://registry.npmjs.org/table/-/table-6.8.2.tgz", + "integrity": "sha512-w2sfv80nrAh2VCbqR5AK27wswXhqcck2AhfnNW76beQXskGZ1V12GwS//yYVa3d3fcvAip2OUnbDAjW2k3v9fA==", + "dev": true, + "requires": { + "ajv": "^8.0.1", + "lodash.truncate": "^4.4.2", + "slice-ansi": "^4.0.0", + "string-width": "^4.2.3", + "strip-ansi": "^6.0.1" + } + }, + "test-exclude": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/test-exclude/-/test-exclude-6.0.0.tgz", + "integrity": "sha512-cAGWPIyOHU6zlmg88jwm7VRyXnMN7iV68OGAbYDk/Mh/xC/pzVPlQtY6ngoIH/5/tciuhGfvESU8GrHrcxD56w==", + "dev": true, + "requires": { + "@istanbuljs/schema": "^0.1.2", + "glob": "^7.1.4", + "minimatch": "^3.0.4" + } + }, + "text-table": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/text-table/-/text-table-0.2.0.tgz", + "integrity": "sha512-N+8UisAXDGk8PFXP4HAzVR9nbfmVJ3zYLAWiTIoqC5v5isinhr+r5uaO8+7r3BMfuNIufIsA7RdpVgacC2cSpw==", + "dev": true + }, + "through2": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/through2/-/through2-3.0.2.tgz", + "integrity": "sha512-enaDQ4MUyP2W6ZyT6EsMzqBPZaM/avg8iuo+l2d3QCs0J+6RaqkHV/2/lOwDTueBHeJ/2LG9lrLW3d5rWPucuQ==", + "requires": { + "inherits": "^2.0.4", + "readable-stream": "2 || 3" + } + }, + "tmpl": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/tmpl/-/tmpl-1.0.5.tgz", + "integrity": "sha512-3f0uOEAQwIqGuWW2MVzYg8fV/QNnc/IpuJNG837rLuczAaLVHslWHZQj4IGiEl5Hs3kkbhwL9Ab7Hrsmuj+Smw==", + "dev": true + }, + "to-fast-properties": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/to-fast-properties/-/to-fast-properties-2.0.0.tgz", + "integrity": "sha512-/OaKK0xYrs3DmxRYqL/yDc+FxFUVYhDlXMhRmv3z915w2HF1tnN1omB354j8VUGO/hbRzyD6Y3sA7v7GS/ceog==", + "dev": true + }, + "to-regex-range": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz", + "integrity": "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==", + "dev": true, + "requires": { + "is-number": "^7.0.0" + } + }, + "type-check": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/type-check/-/type-check-0.4.0.tgz", + "integrity": "sha512-XleUoc9uwGXqjWwXaUTZAmzMcFZ5858QA2vvx1Ur5xIcixXIP+8LnFDgRplU30us6teqdlskFfu+ae4K79Ooew==", + "dev": true, + "requires": { + "prelude-ls": "^1.2.1" + } + }, + "type-detect": { + "version": "4.0.8", + "resolved": "https://registry.npmjs.org/type-detect/-/type-detect-4.0.8.tgz", + "integrity": "sha512-0fr/mIH1dlO+x7TlcMy+bIDqKPsw/70tVyeHW787goQjhmqaZe10uwLujubK9q9Lg6Fiho1KUKDYz0Z7k7g5/g==", + "dev": true + }, + "type-fest": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.20.2.tgz", + "integrity": "sha512-Ne+eE4r0/iWnpAxD852z3A+N0Bt5RN//NjJwRd2VFHEmrywxf5vsZlh4R6lixl6B+wz/8d+maTSAkN1FIkI3LQ==", + "dev": true + }, + "undici-types": { + "version": "6.19.8", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.19.8.tgz", + "integrity": "sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw==", + "dev": true + }, + "update-browserslist-db": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.1.1.tgz", + "integrity": "sha512-R8UzCaa9Az+38REPiJ1tXlImTJXlVfgHZsglwBD/k6nj76ctsH1E3q4doGrukiLQd3sGQYu56r5+lo5r94l29A==", + "dev": true, + "requires": { + "escalade": "^3.2.0", + "picocolors": "^1.1.0" + } + }, + "uri-js": { + "version": "4.4.1", + "resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.4.1.tgz", + "integrity": "sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==", + "dev": true, + "requires": { + "punycode": "^2.1.0" + } + }, + "util-deprecate": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", + "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==" + }, + "v8-compile-cache": { + "version": "2.4.0", + "resolved": "https://registry.npmjs.org/v8-compile-cache/-/v8-compile-cache-2.4.0.tgz", + "integrity": "sha512-ocyWc3bAHBB/guyqJQVI5o4BZkPhznPYUG2ea80Gond/BgNWpap8TOmLSeeQG7bnh2KMISxskdADG59j7zruhw==", + "dev": true + }, + "v8-to-istanbul": { + "version": "9.3.0", + "resolved": "https://registry.npmjs.org/v8-to-istanbul/-/v8-to-istanbul-9.3.0.tgz", + "integrity": "sha512-kiGUalWN+rgBJ/1OHZsBtU4rXZOfj/7rKQxULKlIzwzQSvMJUUNgPwJEEh7gU6xEVxC0ahoOBvN2YI8GH6FNgA==", + "dev": true, + "requires": { + "@jridgewell/trace-mapping": "^0.3.12", + "@types/istanbul-lib-coverage": "^2.0.1", + "convert-source-map": "^2.0.0" + } + }, + "walker": { + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/walker/-/walker-1.0.8.tgz", + "integrity": "sha512-ts/8E8l5b7kY0vlWLewOkDXMmPdLcVV4GmOQLyxuSswIJsweeFZtAsMF7k1Nszz+TYBQrlYRmzOnr398y1JemQ==", + "dev": true, + "requires": { + "makeerror": "1.0.12" + } + }, + "which": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", + "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", + "dev": true, + "requires": { + "isexe": "^2.0.0" + } + }, + "word-wrap": { + "version": "1.2.5", + "resolved": "https://registry.npmjs.org/word-wrap/-/word-wrap-1.2.5.tgz", + "integrity": "sha512-BN22B5eaMMI9UMtjrGd5g5eCYPpCPDUy0FJXbYsaT5zYxjFOckS53SQDE3pWkVoWpHXVb3BrYcEN4Twa55B5cA==", + "dev": true + }, + "wordwrap": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/wordwrap/-/wordwrap-1.0.0.tgz", + "integrity": "sha512-gvVzJFlPycKc5dZN4yPkP8w7Dc37BtP1yczEneOb4uq34pXZcvrtRTmWV8W+Ume+XCxKgbjM+nevkyFPMybd4Q==" + }, + "wrap-ansi": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", + "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", + "dev": true, + "requires": { + "ansi-styles": "^4.0.0", + "string-width": "^4.1.0", + "strip-ansi": "^6.0.0" + }, + "dependencies": { + "ansi-styles": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", + "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "dev": true, + "requires": { + "color-convert": "^2.0.1" + } + }, + "color-convert": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", + "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", + "dev": true, + "requires": { + "color-name": "~1.1.4" + } + }, + "color-name": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", + "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", + "dev": true + } + } + }, + "wrappy": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", + "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==", + "dev": true + }, + "write-file-atomic": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/write-file-atomic/-/write-file-atomic-4.0.2.tgz", + "integrity": "sha512-7KxauUdBmSdWnmpaGFg+ppNjKF8uNLry8LyzjauQDOVONfFLNKrKvQOxZ/VuTIcS/gge/YNahf5RIIQWTSarlg==", + "dev": true, + "requires": { + "imurmurhash": "^0.1.4", + "signal-exit": "^3.0.7" + } + }, + "y18n": { + "version": "5.0.8", + "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz", + "integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==", + "dev": true + }, + "yallist": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/yallist/-/yallist-3.1.1.tgz", + "integrity": "sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g==", + "dev": true + }, + "yargs": { + "version": "17.7.2", + "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz", + "integrity": "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==", + "dev": true, + "requires": { + "cliui": "^8.0.1", + "escalade": "^3.1.1", + "get-caller-file": "^2.0.5", + "require-directory": "^2.1.1", + "string-width": "^4.2.3", + "y18n": "^5.0.5", + "yargs-parser": "^21.1.1" + } + }, + "yargs-parser": { + "version": "21.1.1", + "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.1.tgz", + "integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==", + "dev": true + }, + "yocto-queue": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-0.1.0.tgz", + "integrity": "sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==", + "dev": true + } + } +} From 659c81e1912988f2b26fe17c286d827fd3f5a971 Mon Sep 17 00:00:00 2001 From: Scott Christley Date: Thu, 17 Oct 2024 16:51:32 -0500 Subject: [PATCH 50/59] specify working directory --- .github/workflows/node.js.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/node.js.yml b/.github/workflows/node.js.yml index 198ea1695..29aac259f 100644 --- a/.github/workflows/node.js.yml +++ b/.github/workflows/node.js.yml @@ -18,6 +18,10 @@ jobs: runs-on: ubuntu-latest + defaults: + run: + working-directory: lang/js + strategy: matrix: node-version: [18.x, 20.x, 22.x] From ad5519e44b7f27190094eb5fcb3148298598b066 Mon Sep 17 00:00:00 2001 From: Scott Christley Date: Thu, 17 Oct 2024 16:57:39 -0500 Subject: [PATCH 51/59] is it the cache? --- .github/workflows/node.js.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/node.js.yml b/.github/workflows/node.js.yml index 29aac259f..59fb87396 100644 --- a/.github/workflows/node.js.yml +++ b/.github/workflows/node.js.yml @@ -33,7 +33,7 @@ jobs: uses: actions/setup-node@v4 with: node-version: ${{ matrix.node-version }} - cache: 'npm' +# cache: 'npm' - run: npm ci - run: npm run build --if-present - run: npm test From 094d9cf10f86df30a2a6dbe12fdab5a6aea0be63 Mon Sep 17 00:00:00 2001 From: Scott Christley Date: Thu, 17 Oct 2024 17:01:32 -0500 Subject: [PATCH 52/59] cleanup --- .github/workflows/node.js.yml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.github/workflows/node.js.yml b/.github/workflows/node.js.yml index 59fb87396..169b4f95c 100644 --- a/.github/workflows/node.js.yml +++ b/.github/workflows/node.js.yml @@ -18,10 +18,6 @@ jobs: runs-on: ubuntu-latest - defaults: - run: - working-directory: lang/js - strategy: matrix: node-version: [18.x, 20.x, 22.x] @@ -33,7 +29,6 @@ jobs: uses: actions/setup-node@v4 with: node-version: ${{ matrix.node-version }} -# cache: 'npm' - run: npm ci - run: npm run build --if-present - run: npm test From a87eac9f1b187456706eb71e2808a22f4cc45c9f Mon Sep 17 00:00:00 2001 From: Scott Christley Date: Thu, 17 Oct 2024 17:05:12 -0500 Subject: [PATCH 53/59] try again --- .github/workflows/node.js.yml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/node.js.yml b/.github/workflows/node.js.yml index 169b4f95c..31f309b36 100644 --- a/.github/workflows/node.js.yml +++ b/.github/workflows/node.js.yml @@ -9,15 +9,15 @@ on: pull_request: branches: [ "master" ] -defaults: - run: - working-directory: lang/js - jobs: build: runs-on: ubuntu-latest + defaults: + run: + working-directory: lang/js + strategy: matrix: node-version: [18.x, 20.x, 22.x] @@ -29,6 +29,7 @@ jobs: uses: actions/setup-node@v4 with: node-version: ${{ matrix.node-version }} +# cache: 'npm' - run: npm ci - run: npm run build --if-present - run: npm test From 871cf2e1234f0b1337161d078ab899ac279f48a2 Mon Sep 17 00:00:00 2001 From: Scott Christley Date: Fri, 18 Oct 2024 11:45:16 -0500 Subject: [PATCH 54/59] do not resolve until writable is finished --- lang/js/io.js | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lang/js/io.js b/lang/js/io.js index 82619214f..4e8350deb 100644 --- a/lang/js/io.js +++ b/lang/js/io.js @@ -111,6 +111,10 @@ module.exports = function(airr) { var writable = fs.createWriteStream(filename); if (is_gz) writable.pipe(zlib.createGunzip()); + writable.on('finish', function() { + return resolve(); + }); + // write header writable.write(field_names.join('\t')); writable.write('\n'); @@ -134,7 +138,7 @@ module.exports = function(airr) { } writable.end(); - return resolve(); +// return resolve(); }); } From 875f20ec92314b3bf97a459b1af8e93a364979fc Mon Sep 17 00:00:00 2001 From: Scott Christley Date: Fri, 18 Oct 2024 12:12:08 -0500 Subject: [PATCH 55/59] fix read gzip, add test case --- lang/python/airr/interface.py | 2 +- .../tests/data/good_rearrangement.tsv.gz | Bin 0 -> 1106 bytes lang/python/tests/test_interface.py | 19 ++++++++++++++++++ 3 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 lang/python/tests/data/good_rearrangement.tsv.gz diff --git a/lang/python/airr/interface.py b/lang/python/airr/interface.py index a8f60298f..b58811dc7 100644 --- a/lang/python/airr/interface.py +++ b/lang/python/airr/interface.py @@ -41,7 +41,7 @@ def read_rearrangement(filename, validate=False, debug=False): airr.io.RearrangementReader: iterable reader class. """ if filename.endswith(".gz"): - handle = gzip.open(filename, 'r') + handle = gzip.open(filename, 'rt') else: handle = open(filename, 'r') diff --git a/lang/python/tests/data/good_rearrangement.tsv.gz b/lang/python/tests/data/good_rearrangement.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..c838c584d5e10ba715dd1ebb7fbe1925d847ae23 GIT binary patch literal 1106 zcmV-Y1g-lYiwFpPH)>`817~kil^8C_U(5LsjD3)P1|fK78v5?Paa;Z9}W*y zW=UBl#WK6iitWuRC1p>z&#ZW#{k6-AMb<4t&xf1E-R*{K%DZ&8*sdP3wqvqhEejli zEVJ@7m`r2CukkVainz=8?B9wBB&b^N1#Q}Vzj{&>QIa$TQ{z&hC&=c6X@D0#y}4{ z5{Q^W4;|YcZWbILXS6+R2S_xSKu;5)xs|oEaHB%%$6yC+ zfcDs$*c}@^w_e(TW0GG_$%{b zxXvDd>+6U*n2YeBMrwLC`1tYK`YGo1ndzK|^6`kX2|OGgR+Uk+hagpswLny?DcPKT zk|`vNQg`;rE^snzfa3rht$|ZM;VkHcR-Lo&)6}2#&OR%=JFXWn7d;=VKX&weCv(w0 zbA#4#$zT*d<+yB^#puz1-DkpT{)+&Qe;)QT@g@_e>GFVs0k=GCf5OLoT1*c!{x_2(bya$vOUOmA2nNa#c zAEtbON4{EsB@aGK3EzfM%!X0C3aJ=+CsD;%_L2T!eAW0c8)Nx4#)9C`#aNXe^J_mQ Y152I%RG=m8H>?l;0K5(Ux-J(00KYL1`v3p{ literal 0 HcmV?d00001 diff --git a/lang/python/tests/test_interface.py b/lang/python/tests/test_interface.py index 5fc54060c..0585cac0d 100644 --- a/lang/python/tests/test_interface.py +++ b/lang/python/tests/test_interface.py @@ -23,6 +23,7 @@ def setUp(self): # Test data self.rearrangement_good = os.path.join(data_path, 'good_rearrangement.tsv') + self.rearrangement_good_gz = os.path.join(data_path, 'good_rearrangement.tsv.gz') self.rearrangement_bad = os.path.join(data_path, 'bad_rearrangement.tsv') self.rep_good = os.path.join(data_path, 'good_repertoire.yaml') self.rep_bad = os.path.join(data_path, 'bad_repertoire.yaml') @@ -59,6 +60,24 @@ def test_load_rearrangement(self): result = airr.load_rearrangement(self.rearrangement_bad) self.assertTupleEqual(result.shape, self.shape_bad, 'load(): bad data failed') + # @unittest.skip('-> read_rearrangement(): skipped\n') + def test_read_rearrangement(self): + # Good data + result = [] + reader = airr.read_rearrangement(self.rearrangement_good) + for row in reader: + result.append(row) + reader.close() + self.assertTrue(len(result) == self.shape_good[0], 'read_rearrangement(): good data failed') + + # Good data, gzip + result = [] + reader = airr.read_rearrangement(self.rearrangement_good_gz) + for row in reader: + result.append(row) + reader.close() + self.assertTrue(len(result) == self.shape_good[0], 'read_rearrangement(): good data (gzip) failed') + # @unittest.skip('-> repertoire_template(): skipped\n') def test_repertoire_template(self): try: From a1a0a9fe7498913c61f7cc2f24bb78ccd1fc6505 Mon Sep 17 00:00:00 2001 From: Scott Christley Date: Mon, 26 Feb 2024 16:28:34 -0600 Subject: [PATCH 56/59] make targets to copy specs and test data, centralize test data --- .github/workflows/py-unittest.yaml | 5 + .github/workflows/r-check.yaml | 4 + Makefile | 55 + lang/R/inst/extdata/airr-schema-openapi3.yaml | 5156 +++++++++++++++++ .../airr/specs/airr-schema-openapi3.yaml | 5156 +++++++++++++++++ tests/check-consistency-formats.py | 41 +- tests/data/bad_genotype_set.json | 44 + tests/data/bad_germline_set.json | 417 ++ tests/data/bad_rearrangement.tsv | 10 + tests/data/bad_repertoire.yaml | 202 + tests/data/extra_rearrangement.tsv | 2 + tests/data/good_combined_airr.json | 1124 ++++ tests/data/good_combined_airr.yaml | 934 +++ tests/data/good_genotype_set.json | 38 + tests/data/good_germline_set.json | 432 ++ tests/data/good_rearrangement.tsv | 10 + tests/data/good_repertoire.yaml | 469 ++ 17 files changed, 14097 insertions(+), 2 deletions(-) create mode 100644 Makefile create mode 100644 lang/R/inst/extdata/airr-schema-openapi3.yaml create mode 100644 lang/python/airr/specs/airr-schema-openapi3.yaml create mode 100644 tests/data/bad_genotype_set.json create mode 100644 tests/data/bad_germline_set.json create mode 100644 tests/data/bad_rearrangement.tsv create mode 100644 tests/data/bad_repertoire.yaml create mode 100644 tests/data/extra_rearrangement.tsv create mode 100644 tests/data/good_combined_airr.json create mode 100644 tests/data/good_combined_airr.yaml create mode 100644 tests/data/good_genotype_set.json create mode 100644 tests/data/good_germline_set.json create mode 100644 tests/data/good_rearrangement.tsv create mode 100644 tests/data/good_repertoire.yaml diff --git a/.github/workflows/py-unittest.yaml b/.github/workflows/py-unittest.yaml index f531fd12f..4d24c2020 100644 --- a/.github/workflows/py-unittest.yaml +++ b/.github/workflows/py-unittest.yaml @@ -26,6 +26,11 @@ jobs: python-version: [ '3.8' ] steps: - uses: actions/checkout@v2 + + - name: Check test data matches the global test data files + run: diff -rc tests/data ../../tests/data + shell: bash + - name: Set up Python uses: actions/setup-python@v2 with: diff --git a/.github/workflows/r-check.yaml b/.github/workflows/r-check.yaml index 8732673a4..1127abf90 100644 --- a/.github/workflows/r-check.yaml +++ b/.github/workflows/r-check.yaml @@ -26,6 +26,10 @@ jobs: steps: - uses: actions/checkout@v2 + - name: Check test data matches the global test data files + run: diff -rc tests/data-tests ../../tests/data + shell: bash + - name: Install dependencies run: | install.packages(c("remotes", "testthat", "roxygen2", "devtools", "rcmdcheck")) diff --git a/Makefile b/Makefile new file mode 100644 index 000000000..a18207a95 --- /dev/null +++ b/Makefile @@ -0,0 +1,55 @@ +# helper commands for keeping the language directories in sync + +# note: "help" MUST be the first target in the file, +# when the user types "make" they should get help info +help: + @echo "" + @echo "Helper commands for AIRR Standards repository" + @echo "" + @echo "make gen-v2 -- Generate OpenAPI V2 spec from the V3 spec" + @echo "make build-docs -- Build documentation" + @echo "make spec-copy -- Copy spec files to language directories" + @echo "make data-copy -- Copy test data files to language directories" + @echo "make checks -- Run consistency checks on spec files" + @echo "make tests -- Run all language test suites" + @echo "make python-tests -- Run Python test suite" + @echo "make r-tests -- Run R test suite" + @echo "make js-tests -- Run Javascript test suite" + @echo "" + +gen-v2: + @echo "Not implemented" + +build-docs: + sphinx-build -a -E -b html docs docs/_build/html + +spec-copy: + @echo "Copying specs to language directories" + cp specs/airr-schema.yaml lang/python/airr/specs + cp specs/airr-schema-openapi3.yaml lang/python/airr/specs + cp specs/airr-schema.yaml lang/R/inst/extdata + cp specs/airr-schema-openapi3.yaml lang/R/inst/extdata +# cp specs/airr-schema.yaml lang/js/ +# cp specs/airr-schema-openapi3.yaml lang/js/ + +data-copy: + @echo "Copying test data to language directories" + cp tests/data/* lang/python/tests/data + cp tests/data/* lang/R/tests/data-tests + +checks: + @echo "Running consistency checks on spec files" + python3 tests/check-consistency-formats.py + +tests: python-tests r-tests js-tests + +python-tests: + @echo "Running Python test suite" + cd lang/python; python3 -m unittest discover + +r-tests: + @echo "Running R test suite" + cd lang/R; R -e "library(devtools); test()" + +js-tests: + @echo "Running Javascript test suite" diff --git a/lang/R/inst/extdata/airr-schema-openapi3.yaml b/lang/R/inst/extdata/airr-schema-openapi3.yaml new file mode 100644 index 000000000..d6c6d48e2 --- /dev/null +++ b/lang/R/inst/extdata/airr-schema-openapi3.yaml @@ -0,0 +1,5156 @@ +# +# Schema definitions for AIRR standards objects +# +Info: + title: AIRR Schema + description: Schema definitions for AIRR standards objects + version: 1.4 + contact: + name: AIRR Community + url: https://github.com/airr-community + license: + name: Creative Commons Attribution 4.0 International + url: https://creativecommons.org/licenses/by/4.0/ + + +# Properties that are based upon an ontology use this +# standard schema definition +Ontology: + type: object + properties: + id: + type: string + nullable: true + description: CURIE of the concept, encoding the ontology and the local ID + label: + type: string + nullable: true + description: Label of the concept in the respective ontology + +# Map to expand CURIE prefixes to full IRIs +CURIEMap: + ABREG: + type: identifier + default: + map: ABREG + map: + ABREG: + iri_prefix: "http://antibodyregistry.org/AB_" + CHEBI: + type: ontology + default: + map: OBO + provider: OLS + map: + OBO: + iri_prefix: "http://purl.obolibrary.org/obo/CHEBI_" + CL: + type: ontology + default: + map: OBO + provider: OLS + map: + OBO: + iri_prefix: "http://purl.obolibrary.org/obo/CL_" + DOI: + type: identifier + default: + map: DOI + map: + DOI: + iri_prefix: "https://doi.org/" + DOID: + type: ontology + default: + map: OBO + provider: OLS + map: + OBO: + iri_prefix: "http://purl.obolibrary.org/obo/DOID_" + ENA: + type: identifier + default: + map: ENA + map: + ENA: + iri_prefix: "https://www.ebi.ac.uk/ena/browser/view/" + ENSG: + type: identifier + default: + map: ENSG + map: + ENSG: + iri_prefix: "https://www.ensembl.org/Multi/Search/Results?q=" + GAZ: + type: ontology + default: + map: OBO + provider: OLS + map: + OBO: + iri_prefix: "http://purl.obolibrary.org/obo/GAZ_" + IEDB_RECEPTOR: + type: identifier + default: + map: IEDB + provider: IEDB + map: + IEDB: + iri_prefix: "https://www.iedb.org/receptor/" + MRO: + type: ontology + default: + map: OBO + provider: OLS + map: + OBO: + iri_prefix: "http://purl.obolibrary.org/obo/MRO_" + NCBITAXON: + type: taxonomy + default: + map: OBO + provider: OLS + map: + OBO: + iri_prefix: "http://purl.obolibrary.org/obo/NCBITaxon_" + BioPortal: + iri_prefix: "http://purl.bioontology.org/ontology/NCBITAXON/" + NCIT: + type: ontology + default: + map: OBO + provider: OLS + map: + OBO: + iri_prefix: "http://purl.obolibrary.org/obo/NCIT_" + ORCID: + type: catalog + default: + map: ORCID + provider: ORCID + map: + ORCID: + iri_prefix: "https://orcid.org/" + ROR: + type: catalog + default: + map: ROR + provider: ROR + map: + ROR: + iri_prefix: "https://ror.org/" + SRA: + type: identifier + default: + map: SRA + map: + SRA: + iri_prefix: "https://trace.ncbi.nlm.nih.gov/Traces/sra/?run=" + UBERON: + type: ontology + default: + map: OBO + provider: OLS + map: + OBO: + iri_prefix: "http://purl.obolibrary.org/obo/UBERON_" + UNIPROT: + type: identifier + default: + map: UNIPROT + map: + UniProt: + iri_prefix: "http://purl.uniprot.org/uniprot/" + UO: + type: ontology + default: + map: OBO + provider: OLS + map: + OBO: + iri_prefix: "http://purl.obolibrary.org/obo/UO_" + +InformationProvider: + provider: + ENA: + request: + url: "{iri}" + response: text/html + IEDB: + request: + url: "https://query-api.iedb.org/tcr_search?receptor_group_id=eq.{local_id}" + response: application/json + OLS: + request: + url: "https://www.ebi.ac.uk/ols/api/ontologies/{ontology_id}/terms?iri={iri}" + response: application/json + Ontobee: + request: + url: "http://www.ontobee.org/ontology/rdf/{ontology_id}?iri={iri}" + response: application/rdf+xml + ORCID: + request: + url: "https://pub.orcid.org/v2.1/{local_id}" + header: + Accept: application/json + response: application/json + ROR: + request: + url: "https://api.ror.org/organizations/{iri}" + response: application/json + SRA: + request: + url: "{iri}" + response: text/html + parameter: + CHEBI: + Ontobee: + ontology_id: CHEBI + OLS: + ontology_id: chebi + CL: + Ontobee: + ontology_id: CL + OLS: + ontology_id: cl + DOID: + Ontobee: + ontology_id: DOID + OLS: + ontology_id: doid + GAZ: + Ontobee: + ontology_id: GAZ + OLS: + ontology_id: gaz + MRO: + Ontobee: + ontology_id: MRO + OLS: + ontology_id: mro + NCBITAXON: + Ontobee: + ontology_id: NCBITaxon + OLS: + ontology_id: ncbitaxon + BioPortal: + ontology_id: NCBITAXON + NCIT: + Ontobee: + ontology_id: NCIT + OLS: + ontology_id: ncit + UBERON: + Ontobee: + ontology_id: UBERON + OLS: + ontology_id: uberon + UO: + Ontobee: + ontology_id: UO + OLS: + ontology_id: uo + +# AIRR specification extensions +# +# The schema definitions for AIRR standards objects is extended to +# provide a number of AIRR specific attributes. This schema definition +# specifies the structure, property names and data types. These +# attributes are attached to an AIRR field with the x-airr property. + +Attributes: + type: object + properties: + miairr: + type: string + description: MiAIRR requirement level. + enum: + - essential + - important + - defined + default: defined + identifier: + type: boolean + description: > + True if the field is an identifier required to link metadata and/or individual + sequence records across objects in the complete AIRR Data Model and ADC API. + default: false + adc-query-support: + type: boolean + description: > + True if an ADC API implementation must support queries on the field. + If false, query support for the field in ADC API implementations is optional. + default: false + adc-api-optional: + type: boolean + description: > + If false, repositories must implement these fields both for queries and query repsonse. + Only applies to fields in the ADC API spec that are extensions to the AIRR Standard, + targeted at "convenience query fields" that make queries against repositories more + efficient than if queries were limited to AIRR fields only. + If true, repositories can choose to support the field or not. + default: false + deprecated: + type: boolean + description: True if the field has been deprecated from the schema. + default: false + deprecated-description: + type: string + description: Information regarding the deprecation of the field. + deprecated-replaced-by: + type: array + items: + type: string + description: The deprecated field is replaced by this list of fields. + set: + type: integer + description: MiAIRR set + subset: + type: string + description: MiAIRR subset + name: + type: string + description: MiAIRR name + format: + type: string + description: Field format. If null then assume the full range of the field data type + enum: + - ontology + - controlled_vocabulary + - physical_quantity + - CURIE + ontology: + type: object + description: Ontology definition for field + properties: + draft: + type: boolean + description: Indicates if ontology definition is a draft + top_node: + type: object + description: > + Concept to use as top node for ontology. Note that this must have the same CURIE namespace + as the actually annotated concept. + properties: + id: + type: string + description: CURIE for the top node term + label: + type: string + description: Ontology name for the top node term + +# AIRR Data File +# +# A JSON data file that holds Repertoire metadata, data processing +# analysis objects, or any object in the AIRR Data Model. +# +# It is presumed that the objects gathered together in an AIRR Data File are related +# or relevant to each other, e.g. part of the same study; thus, the ID fields can be +# internally resolved unless the ID contains an external PID. This implies that AIRR +# Data Files cannot be merged simply by concatenating arrays; any merge program +# would need to manage duplicate or conflicting ID values. +# +# While the properties in an AIRR Data File are not required, if one is provided then +# the value should not be null. + +DataFile: + type: object + properties: + Info: + nullable: false + $ref: '#/InfoObject' + Repertoire: + type: array + nullable: false + description: List of repertoires + items: + $ref: '#/Repertoire' + RepertoireGroup: + type: array + nullable: false + description: List of repertoire groups + items: + $ref: '#/RepertoireGroup' + Rearrangement: + type: array + nullable: false + description: List of rearrangement records + items: + $ref: '#/Rearrangement' + Cell: + type: array + nullable: false + description: List of cells + items: + $ref: '#/Cell' + Clone: + type: array + nullable: false + description: List of clones + items: + $ref: '#/Clone' + GermlineSet: + type: array + nullable: false + description: List of germline sets + items: + $ref: '#/GermlineSet' + GenotypeSet: + type: array + nullable: false + description: List of genotype sets + items: + $ref: '#/GenotypeSet' + +# AIRR Info object, should be similar to openapi +# should we point to an openapi schema? +InfoObject: + type: object + description: Provides information about data and API responses. + required: + - title + - version + properties: + title: + type: string + nullable: false + version: + type: string + nullable: false + description: + type: string + nullable: true + contact: + type: object + nullable: true + properties: + name: + type: string + nullable: true + url: + type: string + nullable: true + email: + type: string + nullable: true + license: + type: object + nullable: true + required: + - name + properties: + name: + type: string + nullable: false + url: + type: string + nullable: true + +# A time point +TimePoint: + description: Time point at which an observation or other action was performed. + type: object + properties: + label: + type: string + nullable: true + description: Informative label for the time point + example: Pre-operative sampling of cancer tissue + x-airr: + adc-query-support: true + value: + type: number + nullable: true + description: Value of the time point + example: -5.0 + x-airr: + adc-query-support: true + unit: + $ref: '#/Ontology' + nullable: true + description: Unit of the time point + title: Unit of immunization schedule + example: + id: UO:0000033 + label: day + x-airr: + adc-query-support: true + format: ontology + ontology: + draft: false + top_node: + id: UO:0000003 + label: time unit + +# +# General objects +# + +# Contributor record to describe invididuals and their contribution to a data set +# +Contributor: + description: Individual whose contribution to this work should be acknowledged + type: object + required: + - contributor_id + - name + properties: + contributor_id: + type: string + nullable: true + description: Unique identifier of this contributor within the file + x-airr: + identifier: true + miairr: important + name: + type: string + nullable: false + description: Full name of contributor + orcid_id: + $ref: '#/Ontology' + nullable: true + description: > + ORCID identifier of the contributor. Note that if present, the label of the ORCID record should take + precedence over the name reported in the `name` property. + title: ORCID iD + example: + id: ORCID:0000-0002-1825-0097 + label: Josiah Carberry + x-airr: + adc-query-support: true + format: ontology + ontology: + draft: false + top_node: + id: null + label: null + affiliation: + $ref: '#/Ontology' + nullable: true + description: > + ROR of the contributor's primary affiliation. Note that ROR are only minted for institutions, not + from individuals institutes, divisions or departments. + title: ROR + example: + id: ROR:05h7xva58 + label: Wesleyan University + x-airr: + adc-query-support: true + format: ontology + ontology: + draft: false + top_node: + id: null + label: null + affiliation_department: + type: string + nullable: true + description: > + Additional information regarding the contributor's primary affiliation. Can be used to specify + individual institutes, divisions or departments. + example: Department for Psychoceramics + contributions: + type: array + nullable: true + description: List of all roles the contributor had in a project + items: + $ref: '#/ContributorContribution' + +ContributorContribution: + type: object + required: + - role + properties: + role: + type: string + nullable: false + description: Role according to CRediT taxonomy + enum: + - conceptualization + - data curation + - formal analysis + - funding acquisition + - investigation + - methodology + - project administration + - resources + - software + - supervision + - validation + - visualization + - writing - original draft + - writing - review & editing + degree: + type: string + nullable: true + description: > + Optional specification of the degree of contribution, should be used if multiple individuals serve + the same role. + enum: + - lead + - equal + - supporting + + +# +# Germline gene schema +# + +# Rearranged and genomic germline sequences +RearrangedSequence: + type: object + description: > + Details of a directly observed rearranged sequence or an inference from rearranged sequences + contributing support for a gene or allele. + required: + - sequence_id + - sequence + - derivation + - observation_type + - repository_name + - repository_id + - deposited_version + - seq_start + - seq_end + properties: + sequence_id: + type: string + nullable: true + description: > + Unique identifier of this RearrangedSequence within the file, typically generated by the repository + hosting the schema, for example from the underlying ID of the database record. + x-airr: + identifier: true + miairr: important + sequence: + type: string + nullable: false + x-airr: + miairr: essential + description: nucleotide sequence + derivation: + type: string + nullable: true + enum: + - DNA + - RNA + - null + description: The class of nucleic acid that was used as primary starting material + x-airr: + miairr: important + observation_type: + type: string + nullable: false + enum: + - direct_sequencing + - inference_from_repertoire + description: > + The type of observation from which this sequence was drawn, such as direct sequencing or + inference from repertoire sequencing data. + x-airr: + miairr: essential + curation: + type: string + nullable: true + description: Curational notes on the sequence + repository_name: + type: string + nullable: true + x-airr: + miairr: defined + description: Name of the repository in which the sequence has been deposited + repository_ref: + type: string + nullable: true + x-airr: + miairr: defined + description: Queryable id or accession number of the sequence published by the repository + deposited_version: + type: string + nullable: true + x-airr: + miairr: defined + description: Version number of the sequence within the repository + sequence_start: + type: integer + nullable: false + x-airr: + miairr: essential + description: Start co-ordinate of the sequence detailed in this record, within the sequence deposited + sequence_end: + type: integer + nullable: false + x-airr: + miairr: essential + description: End co-ordinate of the sequence detailed in this record, within the sequence deposited + +UnrearrangedSequence: + description: Details of an unrearranged sequence contributing support for a gene or allele + type: object + required: + - sequence_id + - sequence + - repository_name + - assembly_id + - gff_seqid + - gff_start + - gff_end + - strand + properties: + sequence_id: + type: string + nullable: true + x-airr: + identifier: true + miairr: important + description: unique identifier of this UnrearrangedSequence within the file + sequence: + type: string + nullable: false + description: > + Sequence of interest described in this record. Typically, this will include gene and promoter region. + x-airr: + miairr: essential + curation: + type: string + nullable: true + description: Curational notes on the sequence + repository_name: + type: string + nullable: true + x-airr: + miairr: defined + description: Name of the repository in which the assembly or contig is deposited + repository_ref: + type: string + nullable: true + x-airr: + miairr: defined + description: Queryable id or accession number of the sequence published by the repository + patch_no: + type: string + nullable: true + description: Genome assembly patch number in which this gene was determined + gff_seqid: + type: string + nullable: true + description: > + Sequence (from the assembly) of a window including the gene and preferably also the promoter region. + gff_start: + type: integer + nullable: true + description: > + Genomic co-ordinates of the start of the sequence of interest described in this record in + Ensemble GFF version 3. + gff_end: + type: integer + nullable: true + description: > + Genomic co-ordinates of the end of the sequence of interest described in this record in + Ensemble GFF version 3. + strand: + type: string + nullable: true + enum: + - + + - "-" + - null + description: sense (+ or -) + +# V gene delineation +SequenceDelineationV: + description: Delineation of a V-gene in a particular system + type: object + required: + - sequence_delineation_id + - delineation_scheme + - fwr1_start + - fwr1_end + - cdr1_start + - cdr1_end + - fwr2_start + - fwr2_end + - cdr2_start + - cdr2_end + - fwr3_start + - fwr3_end + - cdr3_start + properties: + sequence_delineation_id: + type: string + nullable: true + description: > + Unique identifier of this SequenceDelineationV within the file. Typically, generated by the + repository hosting the record. + x-airr: + identifier: true + miairr: important + + delineation_scheme: + type: string + nullable: true + x-airr: + miairr: important + description: Name of the delineation scheme + example: Chothia + unaligned_sequence: + type: string + nullable: true + x-airr: + miairr: important + description: entire V-sequence covered by this delineation + aligned_sequence: + type: string + nullable: true + description: > + Aligned sequence if this delineation provides an alignment. An aligned sequence should always be + provided for IMGT delineations. + fwr1_start: + type: integer + nullable: true + x-airr: + miairr: important + description: FWR1 start co-ordinate in the 'unaligned sequence' field + fwr1_end: + type: integer + nullable: true + x-airr: + miairr: important + description: FWR1 end co-ordinate in the 'unaligned sequence' field + cdr1_start: + type: integer + nullable: true + x-airr: + miairr: important + description: CDR1 start co-ordinate in the 'unaligned sequence' field + cdr1_end: + type: integer + nullable: true + x-airr: + miairr: important + description: CDR1 end co-ordinate in the 'unaligned sequence' field + fwr2_start: + type: integer + nullable: true + x-airr: + miairr: important + description: FWR2 start co-ordinate in the 'unaligned sequence' field + fwr2_end: + type: integer + nullable: true + x-airr: + miairr: important + description: FWR2 end co-ordinate in the 'unaligned sequence' field + cdr2_start: + type: integer + nullable: true + x-airr: + miairr: important + description: CDR2 start co-ordinate in the 'unaligned sequence' field + cdr2_end: + type: integer + nullable: true + x-airr: + miairr: important + description: CDR2 end co-ordinate in the 'unaligned sequence' field + fwr3_start: + type: integer + nullable: true + x-airr: + miairr: important + description: FWR3 start co-ordinate in the 'unaligned sequence' field + fwr3_end: + type: integer + nullable: true + x-airr: + miairr: important + description: FWR3 end co-ordinate in the 'unaligned sequence' field + cdr3_start: + type: integer + nullable: true + x-airr: + miairr: important + description: CDR3 start co-ordinate in the 'unaligned sequence' field + alignment_labels: + type: array + nullable: true + items: + type: string + description: > + One string for each codon in the aligned_sequence indicating the label of that codon according to + the numbering of the delineation scheme if it provides one. + +# Description of a putative or confirmed Ig receptor gene/allele +AlleleDescription: + description: Details of a putative or confirmed Ig receptor gene/allele inferred from one or more observations + type: object + required: + - allele_description_id + - acknowledgements + - release_version + - release_date + - release_description + - sequence + - coding_sequence + - locus + - sequence_type + - functional + - inference_type + - species + properties: + allele_description_id: + type: string + nullable: true + x-airr: + identifier: true + miairr: important + description: > + Unique identifier of this AlleleDescription within the file. Typically, generated by the + repository hosting the record. + allele_description_ref: + type: string + nullable: true + x-airr: + miairr: important + description: Unique reference to the allele description, in standardized form (Repo:Label:Version) + example: OGRDB:Human_IGH:IGHV1-69*01.001 + acknowledgements: + type: array + nullable: true + description: > + List of individuals whose contribution to the gene description should be acknowledged. Note that these + are not necessarily identical with the authors on an associated manuscript or other scholarly + communication. Further note that typically at least the three CRediT contributor roles "supervision", + "investigation" and "data curation" should be assigned. The current maintainer should be listed first. + items: + $ref: '#/Contributor' + release_version: + type: integer + nullable: true + x-airr: + miairr: important + description: Version number of this record, updated whenever a revised version is published or released + release_date: + type: string + nullable: true + format: date-time + x-airr: + miairr: important + description: Date of this release + title: Release Date + example: "2021-02-02" + release_description: + type: string + nullable: true + x-airr: + miairr: important + description: Brief descriptive notes of the reason for this release and the changes embodied + label: + type: string + nullable: true + x-airr: + miairr: important + description: > + The accepted name for this gene or allele following the relevant nomenclature. + The value in this field should correspond to values in acceptable name fields of other schemas, + such as v_call, d_call, and j_call fields. + example: IGHV1-69*01 + sequence: + type: string + nullable: false + x-airr: + miairr: essential + description: > + Nucleotide sequence of the gene. This should cover the full length that is available, + including where possible RSS, and 5' UTR and lead-in for V-gene sequences. + coding_sequence: + type: string + nullable: true + x-airr: + miairr: important + description: > + Nucleotide sequence of the core coding region, such as the coding region of a D-, J- or C- gene + or the coding region of a V-gene excluding the leader. + aliases: + type: array + nullable: true + items: + type: string + description: Alternative names for this sequence + locus: + type: string + nullable: false + enum: + - IGH + - IGI + - IGK + - IGL + - TRA + - TRB + - TRG + - TRD + description: Gene locus + x-airr: + miairr: essential + chromosome: + type: integer + nullable: true + description: chromosome on which the gene is located + sequence_type: + type: string + nullable: false + enum: + - V + - D + - J + - C + description: Sequence type (V, D, J, C) + x-airr: + miairr: essential + functional: + type: boolean + nullable: true + x-airr: + miairr: important + description: True if the gene is functional, false if it is a pseudogene + inference_type: + type: string + nullable: true + enum: + - genomic_and_rearranged + - genomic_only + - rearranged_only + - null + description: Type of inference(s) from which this gene sequence was inferred + x-airr: + miairr: important + species: + $ref: '#/Ontology' + nullable: false + description: Binomial designation of subject's species + title: Organism + example: + id: NCBITAXON:9606 + label: Homo sapiens + x-airr: + miairr: essential + species_subgroup: + type: string + nullable: true + description: Race, strain or other species subgroup to which this subject belongs + example: BALB/c + species_subgroup_type: + type: string + nullable: true + enum: + - breed + - strain + - inbred + - outbred + - locational + - null + status: + type: string + nullable: true + enum: + - active + - draft + - retired + - withdrawn + - null + description: Status of record, assumed active if the field is not present + subgroup_designation: + type: string + nullable: true + description: Identifier of the gene subgroup or clade, as (and if) defined + gene_designation: + type: string + nullable: true + description: Gene number or other identifier, as (and if) defined + allele_designation: + type: string + nullable: true + description: Allele number or other identifier, as (and if) defined + allele_similarity_cluster_designation: + type: string + nullable: true + description: ID of the similarity cluster used in this germline set, if designated + allele_similarity_cluster_member_id: + type: string + nullable: true + description: Membership ID of the allele within the similarity cluster, if a cluster is designated + j_codon_frame: + type: integer + nullable: true + enum: + - 1 + - 2 + - 3 + - null + description: > + Codon position of the first nucleotide in the 'coding_sequence' field. Mandatory for J genes. + Not used for V or D genes. '1' means the sequence is in-frame, '2' means that the first bp is + missing from the first codon, and '3' means that the first 2 bp are missing. + gene_start: + type: integer + nullable: true + description: > + Co-ordinate in the sequence field of the first nucleotide in the coding_sequence field. + x-airr: + miairr: important + gene_end: + type: integer + nullable: true + description: > + Co-ordinate in the sequence field of the last gene-coding nucleotide in the coding_sequence field. + x-airr: + miairr: important + utr_5_prime_start: + type: integer + nullable: true + description: Start co-ordinate in the sequence field of the 5 prime UTR (V-genes only). + utr_5_prime_end: + type: integer + nullable: true + description: End co-ordinate in the sequence field of the 5 prime UTR (V-genes only). + leader_1_start: + type: integer + nullable: true + description: Start co-ordinate in the sequence field of L-PART1 (V-genes only). + leader_1_end: + type: integer + nullable: true + description: End co-ordinate in the sequence field of L-PART1 (V-genes only). + leader_2_start: + type: integer + nullable: true + description: Start co-ordinate in the sequence field of L-PART2 (V-genes only). + leader_2_end: + type: integer + nullable: true + description: End co-ordinate in the sequence field of L-PART2 (V-genes only). + v_rs_start: + type: integer + nullable: true + description: Start co-ordinate in the sequence field of the V recombination site (V-genes only). + v_rs_end: + type: integer + nullable: true + description: End co-ordinate in the sequence field of the V recombination site (V-genes only). + d_rs_3_prime_start: + type: integer + nullable: true + description: Start co-ordinate in the sequence field of the 3 prime D recombination site (D-genes only). + d_rs_3_prime_end: + type: integer + nullable: true + description: End co-ordinate in the sequence field of the 3 prime D recombination site (D-genes only). + d_rs_5_prime_start: + type: integer + nullable: true + description: Start co-ordinate in the sequence field of the 5 prime D recombination site (D-genes only). + d_rs_5_prime_end: + type: integer + nullable: true + description: End co-ordinate in the sequence field of 5 the prime D recombination site (D-genes only). + j_cdr3_end: + type: integer + nullable: true + description: > + In the case of a J-gene, the co-ordinate in the sequence field of the first nucelotide of the + conserved PHE or TRP (IMGT codon position 118). + j_rs_start: + type: integer + nullable: true + description: Start co-ordinate in the sequence field of J recombination site (J-genes only). + j_rs_end: + type: integer + nullable: true + description: End co-ordinate in the sequence field of J recombination site (J-genes only). + j_donor_splice: + type: integer + nullable: true + description: Co-ordinate in the sequence field of the final 3' nucleotide of the J-REGION (J-genes only). + v_gene_delineations: + type: array + nullable: true + items: + $ref: '#/SequenceDelineationV' + unrearranged_support: + type: array + nullable: true + items: + $ref: '#/UnrearrangedSequence' + rearranged_support: + type: array + nullable: true + items: + $ref: '#/RearrangedSequence' + paralogs: + type: array + nullable: true + items: + type: string + description: Gene symbols of any paralogs + curation: + type: string + nullable: true + description: > + Curational notes on the AlleleDescription. This can be used to give more extensive notes on the + decisions taken than are provided in the release_description. + curational_tags: + type: array + nullable: true + items: + type: string + enum: + - likely_truncated + - likely_full_length + description: Controlled-vocabulary tags applied to this description + +# Collection of gene descriptions into a germline set +GermlineSet: + type: object + description: > + A germline object set bringing together multiple AlleleDescriptions from the same strain or species. + All genes in a GermlineSet should be from a single locus. + required: + - germline_set_id + - acknowledgements + - release_version + - release_description + - release_date + - germline_set_name + - germline_set_ref + - species + - locus + - allele_descriptions + properties: + germline_set_id: + type: string + nullable: true + description: > + Unique identifier of the GermlineSet within this file. Typically, generated by the + repository hosting the record. + x-airr: + identifier: true + miairr: important + acknowledgements: + type: array + nullable: true + description: > + List of individuals whose contribution to the germline set should be acknowledged. Note that these are + not necessarily identical with the authors on an associated manuscript or other scholarly communication. + Further note that typically at least the three CRediT contributor roles "supervision", "investigation" + and "data curation" should be assigned. The coresponding author should be listed last. + items: + $ref: '#/Contributor' + release_version: + type: number + nullable: true + x-airr: + miairr: important + description: Version number of this record, allocated automatically + release_description: + type: string + nullable: true + x-airr: + miairr: important + description: Brief descriptive notes of the reason for this release and the changes embodied + release_date: + type: string + nullable: true + format: date-time + x-airr: + miairr: important + description: Date of this release + title: Release Date + example: "2021-02-02" + germline_set_name: + type: string + nullable: true + x-airr: + miairr: important + description: descriptive name of this germline set + germline_set_ref: + type: string + nullable: true + x-airr: + miairr: important + description: Unique identifier of the germline set and version, in standardized form (Repo:Label:Version) + example: OGRDB:Human_IGH:2021.11 + pub_ids: + type: array + items: + type: string + nullable: true + description: Publications describing the germline set + example: ["PMID:35720344"] + species: + $ref: '#/Ontology' + nullable: false + x-airr: + miairr: essential + description: Binomial designation of subject's species + title: Organism + example: + id: NCBITAXON:9606 + label: Homo sapiens + species_subgroup: + type: string + nullable: true + description: Race, strain or other species subgroup to which this subject belongs + example: BALB/c + species_subgroup_type: + type: string + nullable: true + enum: + - breed + - strain + - inbred + - outbred + - locational + - null + locus: + type: string + nullable: false + enum: + - IGH + - IGI + - IGK + - IGL + - TRA + - TRB + - TRG + - TRD + description: Gene locus + x-airr: + miairr: essential + allele_descriptions: + type: array + nullable: true + items: + $ref: '#/AlleleDescription' + description: list of allele_descriptions in the germline set + x-airr: + miairr: important + curation: + type: string + nullable: true + description: > + Curational notes on the GermlineSet. This can be used to give more extensive notes on the + decisions taken than are provided in the release_description. + +# +# Genotype schema +# + +# GenotypeSet lists the Genotypes (describing different loci) inferred for this subject + +GenotypeSet: + type: object + required: + - receptor_genotype_set_id + properties: + receptor_genotype_set_id: + type: string + nullable: true + x-airr: + identifier: true + miairr: important + description: > + A unique identifier for this Receptor Genotype Set, typically generated by the repository + hosting the schema, for example from the underlying ID of the database record. + genotype_class_list: + description: List of Genotypes included in this Receptor Genotype Set. + type: array + nullable: true + items: + $ref: '#/Genotype' + +# Genotype of adaptive immune receptors +# This enumerates the alleles and gene deletions inferred in a single subject. +# Included alleles may either be listed by reference to a GermlineSet, or +# listed as 'undocumented', in which case the inferred sequence is provided + +Genotype: + type: object + required: + - receptor_genotype_id + - locus + properties: + receptor_genotype_id: + type: string + nullable: true + x-airr: + identifier: true + miairr: important + description: > + A unique identifier within the file for this Receptor Genotype, typically generated by the + repository hosting the schema, for example from the underlying ID of the database record. + locus: + type: string + nullable: false + enum: + - IGH + - IGI + - IGK + - IGL + - TRA + - TRB + - TRD + - TRG + description: Gene locus + example: IGH + x-airr: + adc-query-support: true + format: controlled_vocabulary + miairr: essential + documented_alleles: + type: array + nullable: true + description: List of alleles documented in reference set(s) + items: + $ref: '#/DocumentedAllele' + x-airr: + miairr: important + undocumented_alleles: + type: array + nullable: true + description: List of alleles inferred to be present and not documented in an identified GermlineSet + items: + $ref: '#/UndocumentedAllele' + x-airr: + adc-query-support: true + deleted_genes: + type: array + nullable: true + description: Array of genes identified as being deleted in this genotype + items: + $ref: '#/DeletedGene' + x-airr: + adc-query-support: true + inference_process: + type: string + nullable: true + enum: + - genomic_sequencing + - repertoire_sequencing + - null + description: Information on how the genotype was acquired. Controlled vocabulary. + title: Genotype acquisition process + example: repertoire_sequencing + x-airr: + adc-query-support: true + format: controlled_vocabulary + +# Documented Allele +# This describes a 'known' allele found in a genotype +# It 'known' in the sense that it is documented in a reference set + +DocumentedAllele: + type: object + required: + - label + - germline_set_ref + properties: + label: + type: string + nullable: true + x-airr: + miairr: important + description: The accepted name for this allele, taken from the GermlineSet + germline_set_ref: + type: string + nullable: true + x-airr: + miairr: important + description: GermlineSet from which it was taken, referenced in standardized form (Repo:Label:Version) + example: OGRDB:Human_IGH:2021.11 + phasing: + type: integer + nullable: true + description: > + Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the + same chromosome. + +# Undocumented Allele +# This describes a 'undocumented' allele found in a genotype +# It is 'undocumented' in the sense that it was not found in reference sets consulted for the analysis + +UndocumentedAllele: + required: + - allele_name + - sequence + type: object + properties: + allele_name: + type: string + nullable: true + description: Allele name as allocated by the inference pipeline + x-airr: + miairr: important + sequence: + type: string + nullable: false + description: nt sequence of the allele, as provided by the inference pipeline + x-airr: + miairr: essential + phasing: + type: integer + nullable: true + description: > + Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the + same chromosome. + +# Deleted Gene +# It is regarded as 'deleted' in the sense that it was not identified during inference of the genotype + +DeletedGene: + required: + - label + - germline_set_ref + type: object + properties: + label: + type: string + nullable: false + description: The accepted name for this gene, taken from the GermlineSet + x-airr: + miairr: essential + germline_set_ref: + type: string + nullable: true + description: GermlineSet from which it was taken (issuer/name/version) + x-airr: + miairr: important + phasing: + type: integer + nullable: true + description: > + Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the + same chromosome. + + +# List of MHCGenotypes describing a subject's genotype +MHCGenotypeSet: + type: object + required: + - mhc_genotype_set_id + - mhc_genotype_list + properties: + mhc_genotype_set_id: + type: string + nullable: true + x-airr: + identifier: true + miairr: important + description: A unique identifier for this MHCGenotypeSet + mhc_genotype_list: + description: List of MHCGenotypes included in this set + type: array + nullable: true + x-airr: + miairr: important + items: + $ref: '#/MHCGenotype' + +# Genotype of major histocompatibility complex (MHC) class I, class II and non-classical loci +MHCGenotype: + type: object + required: + - mhc_genotype_id + - mhc_class + - mhc_alleles + properties: + mhc_genotype_id: + type: string + nullable: true + x-airr: + identifier: true + miairr: important + description: A unique identifier for this MHCGenotype, assumed to be unique in the context of the study + mhc_class: + type: string + nullable: false + enum: + - MHC-I + - MHC-II + - MHC-nonclassical + description: Class of MHC alleles described by the MHCGenotype + example: MHC-I + x-airr: + miairr: essential + adc-query-support: true + format: controlled_vocabulary + mhc_alleles: + type: array + nullable: true + description: List of MHC alleles of the indicated mhc_class identified in an individual + items: + $ref: '#/MHCAllele' + x-airr: + miairr: important + adc-query-support: true + mhc_genotyping_method: + type: string + nullable: true + description: > + Information on how the genotype was determined. The content of this field should come from a list of + recommended terms provided in the AIRR Schema documentation. + title: MHC genotyping method + example: pcr_low_resolution + x-airr: + adc-query-support: true + miairr: important + + +# Allele of an MHC gene +MHCAllele: + type: object + properties: + allele_designation: + type: string + nullable: true + x-airr: + miairr: important + description: > + The accepted designation of an allele, usually its gene symbol plus allele/sub-allele/etc + identifiers, if provided by the mhc_typing method + gene: + $ref: '#/Ontology' + nullable: true + description: The MHC gene to which the described allele belongs + title: MHC gene + example: + id: MRO:0000046 + label: HLA-A + x-airr: + adc-query-support: false + format: ontology + ontology: + draft: true + top_node: + id: MRO:0000004 + label: MHC gene + miairr: important + reference_set_ref: + type: string + nullable: true + x-airr: + miairr: important + description: Repository and list from which it was taken (issuer/name/version) + + +SubjectGenotype: + type: object + properties: + receptor_genotype_set: + nullable: true + $ref: '#/GenotypeSet' + description: Immune receptor genotype set for this subject. + mhc_genotype_set: + nullable: true + $ref: '#/MHCGenotypeSet' + description: MHC genotype set for this subject. + +# +# Repertoire metadata schema +# + +# The overall study with a globally unique study_id +Study: + type: object + required: + - study_id + - study_title + - study_type + - inclusion_exclusion_criteria + - grants + - contributors + - pub_ids + - keywords_study + properties: + study_id: + type: string + nullable: true + description: > + Unique ID assigned by study registry such as one of the International Nucleotide Sequence Database + Collaboration (INSDC) repositories. + title: Study ID + example: PRJNA001 + x-airr: + identifier: true + miairr: important + adc-query-support: true + set: 1 + subset: study + name: Study ID + study_title: + type: string + nullable: true + description: Descriptive study title + title: Study title + example: Effects of sun light exposure of the Treg repertoire + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: study + name: Study title + study_type: + $ref: '#/Ontology' + nullable: true + description: Type of study design + title: Study type + example: + id: NCIT:C15197 + label: Case-Control Study + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: study + name: Study type + format: ontology + ontology: + draft: false + top_node: + id: NCIT:C63536 + label: Study + study_description: + type: string + nullable: true + description: Generic study description + title: Study description + example: Longer description + x-airr: + name: Study description + adc-query-support: true + inclusion_exclusion_criteria: + type: string + nullable: true + description: List of criteria for inclusion/exclusion for the study + title: Study inclusion/exclusion criteria + example: "Include: Clinical P. falciparum infection; Exclude: Seropositive for HIV" + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: study + name: Study inclusion/exclusion criteria + grants: + type: string + nullable: true + description: Funding agencies and grant numbers + title: Grant funding agency + example: NIH, award number R01GM987654 + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: study + name: Grant funding agency + contributors: + type: array + nullable: false + description: > + List of individuals who contributed to the study. Note that these are not necessarily identical with + the authors on an associated manuscript or other scholarly communication. Further note that typically + at least the three CRediT contributor roles "supervision", "investigation" and "data curation" should + be assigned. The coresponding author should be listed last. + title: Contributors + items: + $ref: '#/Contributor' + x-airr: + miairr: essential + adc-query-support: true + set: 1 + subset: study + name: Contributors + study_contact: + type: string + nullable: true + description: > + Full contact information of the contact persons for this study This should include an e-mail address + and a persistent identifier such as an ORCID ID. + x-airr: + deprecated: true + deprecated-description: > + Acknowledgements and contact information was re-organized into the contributors property, which + is an array of Contributor objects. + deprecated-replaced-by: + - contributors + collected_by: + type: string + nullable: true + description: > + Full contact information of the data collector, i.e. the person who is legally responsible for data + collection and release. This should include an e-mail address and a persistent identifier such as an + ORCID ID. + x-airr: + deprecated: true + deprecated-description: > + Acknowledgements and contact information was re-organized into the contributors property, which + is an array of Contributor objects. + deprecated-replaced-by: + - contributors + lab_name: + type: string + nullable: true + description: Department of data collector + x-airr: + deprecated: true + deprecated-description: > + Acknowledgements and contact information was re-organized into the contributors property, which + is an array of Contributor objects. + deprecated-replaced-by: + - contributors + lab_address: + type: string + nullable: true + description: Institution and institutional address of data collector + x-airr: + deprecated: true + deprecated-description: > + Acknowledgements and contact information was re-organized into the contributors property, which + is an array of Contributor objects. + deprecated-replaced-by: + - contributors + submitted_by: + type: string + nullable: true + description: > + Full contact information of the data depositor, i.e., the person submitting the data to a repository. + This should include an e-mail address and a persistent identifier such as an ORCID ID. This is + supposed to be a short-lived and technical role until the submission is relased. + x-airr: + deprecated: true + deprecated-description: > + Acknowledgements and contact information was re-organized into the contributors property, which + is an array of Contributor objects. + deprecated-replaced-by: + - contributors + pub_ids: + type: array + items: + type: string + nullable: true + description: > + Array of publications describing the rationale and/or outcome of the study as an array of CURIE objects such as + a DOI or Pubmed ID. Where more than one publication is given, if there is a primary publication for the study it + should come first. + title: Relevant publications + example: ["PMID:29144493", "DOI:10.1038/ni.3873"] + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: study + name: Relevant publications + keywords_study: + type: array + items: + type: string + enum: + - contains_ig + - contains_tr + - contains_paired_chain + - contains_schema_rearrangement + - contains_schema_clone + - contains_schema_cell + - contains_schema_receptor + - contains_schema_cellexpression + - contains_schema_receptorreactivity + nullable: true + description: > + Keywords describing properties of one or more data sets in a study. "contains_schema" keywords indicate that + the study contains data objects from the AIRR Schema of that type (Rearrangement, Clone, Cell, Receptor) while + the other keywords indicate that the study design considers the type of data indicated (e.g. it is possible to have + a study that "contains_paired_chain" but does not "contains_schema_cell"). + title: Keywords for study + example: + - contains_ig + - contains_schema_rearrangement + - contains_schema_clone + - contains_schema_cell + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: study + name: Keywords for study + format: controlled_vocabulary + adc_publish_date: + type: string + format: date-time + nullable: true + description: > + Date the study was first published in the AIRR Data Commons. + title: ADC Publish Date + example: "2021-02-02" + x-airr: + adc-query-support: true + name: ADC Publish Date + adc_update_date: + type: string + format: date-time + nullable: true + description: > + Date the study data was updated in the AIRR Data Commons. + title: ADC Update Date + example: "2021-02-02" + x-airr: + adc-query-support: true + name: ADC Update Date + +# 1-to-n relationship between a study and its subjects +# subject_id is unique within a study +Subject: + type: object + required: + - subject_id + - synthetic + - species + - sex + - age_min + - age_max + - age_unit + - age_event + - ancestry_population + - ethnicity + - race + - strain_name + - linked_subjects + - link_type + properties: + subject_id: + type: string + nullable: true + description: > + Subject ID assigned by submitter, unique within study. If possible, a persistent subject ID linked to + an INSDC or similar repository study should be used. + title: Subject ID + example: SUB856413 + x-airr: + identifier: true + miairr: important + adc-query-support: true + set: 1 + subset: subject + name: Subject ID + synthetic: + type: boolean + nullable: false + description: TRUE for libraries in which the diversity has been synthetically generated (e.g. phage display) + title: Synthetic library + x-airr: + miairr: essential + adc-query-support: true + set: 1 + subset: subject + name: Synthetic library + species: + $ref: '#/Ontology' + nullable: false + description: Binomial designation of subject's species + title: Organism + example: + id: NCBITAXON:9606 + label: Homo sapiens + x-airr: + miairr: essential + adc-query-support: true + set: 1 + subset: subject + name: Species + format: ontology + ontology: + draft: false + top_node: + id: NCBITAXON:7776 + label: Gnathostomata + organism: + $ref: '#/Ontology' + nullable: true + description: Binomial designation of subject's species + x-airr: + deprecated: true + deprecated-description: Field was renamed to species for clarity. + deprecated-replaced-by: + - species + sex: + type: string + enum: + - male + - female + - pooled + - hermaphrodite + - intersex + - null + nullable: true + description: Biological sex of subject + title: Sex + example: female + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: subject + name: Sex + format: controlled_vocabulary + age_min: + type: number + nullable: true + description: Specific age or lower boundary of age range. + title: Age minimum + example: 60 + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: subject + name: Age minimum + age_max: + type: number + nullable: true + description: > + Upper boundary of age range or equal to age_min for specific age. + This field should only be null if age_min is null. + title: Age maximum + example: 80 + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: subject + name: Age maximum + age_unit: + $ref: '#/Ontology' + nullable: true + description: Unit of age range + title: Age unit + example: + id: UO:0000036 + label: year + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: subject + name: Age unit + format: ontology + ontology: + draft: false + top_node: + id: UO:0000003 + label: time unit + age_event: + type: string + nullable: true + description: > + Event in the study schedule to which `Age` refers. For NCBI BioSample this MUST be `sampling`. For other + implementations submitters need to be aware that there is currently no mechanism to encode to potential + delta between `Age event` and `Sample collection time`, hence the chosen events should be in temporal proximity. + title: Age event + example: enrollment + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: subject + name: Age event + age: + type: string + nullable: true + x-airr: + deprecated: true + deprecated-description: Split into two fields to specify as an age range. + deprecated-replaced-by: + - age_min + - age_max + - age_unit + ancestry_population: + $ref: '#/Ontology' + nullable: true + description: Broad geographic origin of ancestry (continent) + title: Ancestry population + example: + id: GAZ:00000459 + label: South America + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: subject + name: Ancestry population + format: ontology + ontology: + draft: true + top_node: + id: GAZ:00000448 + label: geographic location + location_birth: + $ref: '#/Ontology' + nullable: true + description: Self-reported location of birth of the subject, preferred granularity is country-level + example: + id: GAZ:00002939 + label: Poland + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: subject + name: Location of birth + format: ontology + ontology: + draft: true + top_node: + id: GAZ:00000448 + label: geographic location + ethnicity: + type: string + nullable: true + description: Ethnic group of subject (defined as cultural/language-based membership) + title: Ethnicity + example: English, Kurds, Manchu, Yakuts (and other fields from Wikipedia) + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: subject + name: Ethnicity + race: + type: string + nullable: true + description: Racial group of subject (as defined by NIH) + title: Race + example: White, American Indian or Alaska Native, Black, Asian, Native Hawaiian or Other Pacific Islander, Other + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: subject + name: Race + strain_name: + type: string + nullable: true + description: Non-human designation of the strain or breed of animal used + title: Strain name + example: C57BL/6J + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: subject + name: Strain name + linked_subjects: + type: string + nullable: true + description: Subject ID to which `Relation type` refers + title: Relation to other subjects + example: SUB1355648 + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: subject + name: Relation to other subjects + link_type: + type: string + nullable: true + description: Relation between subject and `linked_subjects`, can be genetic or environmental (e.g.exposure) + title: Relation type + example: father, daughter, household + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: subject + name: Relation type + diagnosis: + type: array + nullable: false + description: Diagnosis information for subject + items: + $ref: '#/Diagnosis' + x-airr: + adc-query-support: true + genotype: + nullable: true + $ref: '#/SubjectGenotype' + title: SubjectGenotype + +# 1-to-n relationship between a subject and its diagnoses +Diagnosis: + type: object + required: + - study_group_description + - disease_diagnosis + - disease_length + - disease_stage + - prior_therapies + - immunogen + - intervention + - medical_history + properties: + study_group_description: + type: string + nullable: true + description: Designation of study arm to which the subject is assigned to + title: Study group description + example: control + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: diagnosis and intervention + name: Study group description + disease_diagnosis: + $ref: '#/Ontology' + nullable: true + description: Diagnosis of subject + title: Diagnosis + example: + id: DOID:9538 + label: multiple myeloma + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: diagnosis and intervention + name: Diagnosis + format: ontology + ontology: + draft: false + top_node: + id: DOID:4 + label: disease + disease_length: + type: string + nullable: true + description: Time duration between initial diagnosis and current intervention + title: Length of disease + example: 23 months + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: diagnosis and intervention + name: Length of disease + format: physical_quantity + disease_stage: + type: string + nullable: true + description: Stage of disease at current intervention + title: Disease stage + example: Stage II + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: diagnosis and intervention + name: Disease stage + prior_therapies: + type: string + nullable: true + description: List of all relevant previous therapies applied to subject for treatment of `Diagnosis` + title: Prior therapies for primary disease under study + example: melphalan/prednisone + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: diagnosis and intervention + name: Prior therapies for primary disease under study + immunogen: + type: string + nullable: true + description: Antigen, vaccine or drug applied to subject at this intervention + title: Immunogen/agent + example: bortezomib + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: diagnosis and intervention + name: Immunogen/agent + intervention: + type: string + nullable: true + description: Description of intervention + title: Intervention definition + example: systemic chemotherapy, 6 cycles, 1.25 mg/m2 + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: diagnosis and intervention + name: Intervention definition + medical_history: + type: string + nullable: true + description: Medical history of subject that is relevant to assess the course of disease and/or treatment + title: Other relevant medical history + example: MGUS, first diagnosed 5 years prior + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: diagnosis and intervention + name: Other relevant medical history + +# 1-to-n relationship between a subject and its samples +# sample_id is unique within a study +Sample: + type: object + required: + - sample_id + - sample_type + - tissue + - anatomic_site + - disease_state_sample + - collection_time_point_relative + - collection_time_point_relative_unit + - collection_time_point_reference + - biomaterial_provider + properties: + sample_id: + type: string + nullable: true + description: > + Sample ID assigned by submitter, unique within study. If possible, a persistent sample ID linked to + INSDC or similar repository study should be used. + title: Biological sample ID + example: SUP52415 + x-airr: + identifier: true + miairr: important + adc-query-support: true + set: 2 + subset: sample + name: Biological sample ID + sample_type: + type: string + nullable: true + description: The way the sample was obtained, e.g. fine-needle aspirate, organ harvest, peripheral venous puncture + title: Sample type + example: Biopsy + x-airr: + miairr: important + adc-query-support: true + set: 2 + subset: sample + name: Sample type + tissue: + $ref: '#/Ontology' + nullable: true + description: The actual tissue sampled, e.g. lymph node, liver, peripheral blood + title: Tissue + example: + id: UBERON:0002371 + label: bone marrow + x-airr: + miairr: important + adc-query-support: true + set: 2 + subset: sample + name: Tissue + format: ontology + ontology: + draft: false + top_node: + id: UBERON:0010000 + label: multicellular anatomical structure + anatomic_site: + type: string + nullable: true + description: The anatomic location of the tissue, e.g. Inguinal, femur + title: Anatomic site + example: Iliac crest + x-airr: + miairr: important + adc-query-support: true + set: 2 + subset: sample + name: Anatomic site + disease_state_sample: + type: string + nullable: true + description: Histopathologic evaluation of the sample + title: Disease state of sample + example: Tumor infiltration + x-airr: + miairr: important + adc-query-support: true + set: 2 + subset: sample + name: Disease state of sample + collection_time_point_relative: + type: number + nullable: true + description: Time point at which sample was taken, relative to `Collection time event` + title: Sample collection time + example: 14 + x-airr: + miairr: important + adc-query-support: true + set: 2 + subset: sample + name: Sample collection time + collection_time_point_relative_unit: + $ref: '#/Ontology' + nullable: true + description: Unit of Sample collection time + title: Sample collection time unit + example: + id: UO:0000033 + label: day + x-airr: + miairr: important + adc-query-support: true + set: 2 + subset: sample + name: Sample collection time unit + format: ontology + ontology: + draft: false + top_node: + id: UO:0000003 + label: time unit + collection_time_point_reference: + type: string + nullable: true + description: Event in the study schedule to which `Sample collection time` relates to + title: Collection time event + example: Primary vaccination + x-airr: + miairr: important + adc-query-support: true + set: 2 + subset: sample + name: Collection time event + collection_location: + $ref: '#/Ontology' + nullable: true + description: Location where the sample was taken, preferred granularity is country-level + title: Sample collection location + example: + id: GAZ:00002939 + label: Poland + x-airr: + miairr: important + set: 2 + subset: sample + name: Sample collection location + format: ontology + ontology: + draft: true + top_node: + id: GAZ:00000448 + label: geographic location + biomaterial_provider: + type: string + nullable: true + description: Name and address of the entity providing the sample + title: Biomaterial provider + example: Tissues-R-Us, Tampa, FL, USA + x-airr: + miairr: important + adc-query-support: true + set: 2 + subset: sample + name: Biomaterial provider + +# 1-to-n relationship between a sample and processing of its cells +CellProcessing: + type: object + required: + - tissue_processing + - cell_subset + - cell_phenotype + - single_cell + - cell_number + - cells_per_reaction + - cell_storage + - cell_quality + - cell_isolation + - cell_processing_protocol + properties: + tissue_processing: + type: string + nullable: true + description: Enzymatic digestion and/or physical methods used to isolate cells from sample + title: Tissue processing + example: Collagenase A/Dnase I digested, followed by Percoll gradient + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (cell) + name: Tissue processing + cell_subset: + $ref: '#/Ontology' + nullable: true + description: Commonly-used designation of isolated cell population + title: Cell subset + example: + id: CL:0000972 + label: class switched memory B cell + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (cell) + name: Cell subset + format: ontology + ontology: + draft: false + top_node: + id: CL:0000542 + label: lymphocyte + cell_phenotype: + type: string + nullable: true + description: List of cellular markers and their expression levels used to isolate the cell population + title: Cell subset phenotype + example: CD19+ CD38+ CD27+ IgM- IgD- + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (cell) + name: Cell subset phenotype + cell_species: + $ref: '#/Ontology' + nullable: true + description: > + Binomial designation of the species from which the analyzed cells originate. Typically, this value + should be identical to `species`, in which case it SHOULD NOT be set explicitly. However, there are + valid experimental setups in which the two might differ, e.g., chimeric animal models. If set, this + key will overwrite the `species` information for all lower layers of the schema. + title: Cell species + example: + id: NCBITAXON:9606 + label: Homo sapiens + x-airr: + miairr: defined + adc-query-support: true + set: 3 + subset: process (cell) + name: Cell species + format: ontology + ontology: + draft: false + top_node: + id: NCBITAXON:7776 + label: Gnathostomata + single_cell: + type: boolean + nullable: true + description: TRUE if single cells were isolated into separate compartments + title: Single-cell sort + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (cell) + name: Single-cell sort + cell_number: + type: integer + nullable: true + description: Total number of cells that went into the experiment + title: Number of cells in experiment + example: 1000000 + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (cell) + name: Number of cells in experiment + cells_per_reaction: + type: integer + nullable: true + description: Number of cells for each biological replicate + title: Number of cells per sequencing reaction + example: 50000 + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (cell) + name: Number of cells per sequencing reaction + cell_storage: + type: boolean + nullable: true + description: TRUE if cells were cryo-preserved between isolation and further processing + title: Cell storage + example: TRUE + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (cell) + name: Cell storage + cell_quality: + type: string + nullable: true + description: Relative amount of viable cells after preparation and (if applicable) thawing + title: Cell quality + example: 90% viability as determined by 7-AAD + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (cell) + name: Cell quality + cell_isolation: + type: string + nullable: true + description: Description of the procedure used for marker-based isolation or enrich cells + title: Cell isolation / enrichment procedure + example: > + Cells were stained with fluorochrome labeled antibodies and then sorted on a FlowMerlin (CE) cytometer. + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (cell) + name: Cell isolation / enrichment procedure + cell_processing_protocol: + type: string + nullable: true + description: > + Description of the methods applied to the sample including cell preparation/ isolation/enrichment and + nucleic acid extraction. This should closely mirror the Materials and methods section in the manuscript. + title: Processing protocol + example: Stimulated wih anti-CD3/anti-CD28 + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (cell) + name: Processing protocol + +# object for PCR primer targets +PCRTarget: + type: object + required: + - pcr_target_locus + - forward_pcr_primer_target_location + - reverse_pcr_primer_target_location + properties: + pcr_target_locus: + type: string + enum: + - IGH + - IGI + - IGK + - IGL + - TRA + - TRB + - TRD + - TRG + - null + nullable: true + description: > + Designation of the target locus. Note that this field uses a controlled vocubulary that is meant to + provide a generic classification of the locus, not necessarily the correct designation according to + a specific nomenclature. + title: Target locus for PCR + example: IGK + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (nucleic acid [pcr]) + name: Target locus for PCR + format: controlled_vocabulary + forward_pcr_primer_target_location: + type: string + nullable: true + description: Position of the most distal nucleotide templated by the forward primer or primer mix + title: Forward PCR primer target location + example: IGHV, +23 + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (nucleic acid [pcr]) + name: Forward PCR primer target location + reverse_pcr_primer_target_location: + type: string + nullable: true + description: Position of the most proximal nucleotide templated by the reverse primer or primer mix + title: Reverse PCR primer target location + example: IGHG, +57 + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (nucleic acid [pcr]) + name: Reverse PCR primer target location + +# generally, a 1-to-1 relationship between a CellProcessing and processing of its nucleic acid +# but may be 1-to-n for technical replicates. +NucleicAcidProcessing: + type: object + required: + - template_class + - template_quality + - template_amount + - template_amount_unit + - library_generation_method + - library_generation_protocol + - library_generation_kit_version + - complete_sequences + - physical_linkage + properties: + template_class: + type: string + enum: + - DNA + - RNA + nullable: false + description: > + The class of nucleic acid that was used as primary starting material for the following procedures + title: Target substrate + example: RNA + x-airr: + miairr: essential + adc-query-support: true + set: 3 + subset: process (nucleic acid) + name: Target substrate + format: controlled_vocabulary + template_quality: + type: string + nullable: true + description: Description and results of the quality control performed on the template material + title: Target substrate quality + example: RIN 9.2 + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (nucleic acid) + name: Target substrate quality + template_amount: + type: number + nullable: true + description: Amount of template that went into the process + title: Template amount + example: 1000 + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (nucleic acid) + name: Template amount + template_amount_unit: + $ref: '#/Ontology' + nullable: true + description: Unit of template amount + title: Template amount time unit + example: + id: UO:0000024 + label: nanogram + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (nucleic acid) + name: Template amount time unit + format: ontology + ontology: + draft: false + top_node: + id: UO:0000002 + label: physical quantity + library_generation_method: + type: string + enum: + - "PCR" + - "RT(RHP)+PCR" + - "RT(oligo-dT)+PCR" + - "RT(oligo-dT)+TS+PCR" + - "RT(oligo-dT)+TS(UMI)+PCR" + - "RT(specific)+PCR" + - "RT(specific)+TS+PCR" + - "RT(specific)+TS(UMI)+PCR" + - "RT(specific+UMI)+PCR" + - "RT(specific+UMI)+TS+PCR" + - "RT(specific)+TS" + - "other" + nullable: false + description: Generic type of library generation + title: Library generation method + example: RT(oligo-dT)+TS(UMI)+PCR + x-airr: + miairr: essential + adc-query-support: true + set: 3 + subset: process (nucleic acid) + name: Library generation method + format: controlled_vocabulary + library_generation_protocol: + type: string + nullable: true + description: Description of processes applied to substrate to obtain a library that is ready for sequencing + title: Library generation protocol + example: cDNA was generated using + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (nucleic acid) + name: Library generation protocol + library_generation_kit_version: + type: string + nullable: true + description: When using a library generation protocol from a commercial provider, provide the protocol version number + title: Protocol IDs + example: v2.1 (2016-09-15) + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (nucleic acid) + name: Protocol IDs + pcr_target: + type: array + nullable: false + description: > + If a PCR step was performed that specifically targets the IG/TR loci, the target and primer locations + need to be provided here. This field holds an array of PCRTarget objects, so that multiplex PCR setups + amplifying multiple loci at the same time can be annotated using one record per locus. PCR setups not + targeting any specific locus must not annotate this field but select the appropriate + library_generation_method instead. + items: + $ref: '#/PCRTarget' + x-airr: + adc-query-support: true + complete_sequences: + type: string + enum: + - partial + - complete + - "complete+untemplated" + - mixed + nullable: false + description: > + To be considered `complete`, the procedure used for library construction MUST generate sequences that + 1) include the first V gene codon that encodes the mature polypeptide chain (i.e. after the + leader sequence) and 2) include the last complete codon of the J gene (i.e. 1 bp 5' of the J->C + splice site) and 3) provide sequence information for all positions between 1) and 2). To be considered + `complete & untemplated`, the sections of the sequences defined in points 1) to 3) of the previous + sentence MUST be untemplated, i.e. MUST NOT overlap with the primers used in library preparation. + `mixed` should only be used if the procedure used for library construction will likely produce multiple + categories of sequences in the given experiment. It SHOULD NOT be used as a replacement of a NULL value. + title: Complete sequences + example: partial + x-airr: + miairr: essential + adc-query-support: true + set: 3 + subset: process (nucleic acid) + name: Complete sequences + format: controlled_vocabulary + physical_linkage: + type: string + enum: + - none + - "hetero_head-head" + - "hetero_tail-head" + - "hetero_prelinked" + nullable: false + description: > + In case an experimental setup is used that physically links nucleic acids derived from distinct + `Rearrangements` before library preparation, this field describes the mode of that linkage. All + `hetero_*` terms indicate that in case of paired-read sequencing, the two reads should be expected + to map to distinct IG/TR loci. `*_head-head` refers to techniques that link the 5' ends of transcripts + in a single-cell context. `*_tail-head` refers to techniques that link the 3' end of one transcript to + the 5' end of another one in a single-cell context. This term does not provide any information whether + a continuous reading-frame between the two is generated. `*_prelinked` refers to constructs in which + the linkage was already present on the DNA level (e.g. scFv). + title: Physical linkage of different rearrangements + example: hetero_head-head + x-airr: + miairr: essential + adc-query-support: true + set: 3 + subset: process (nucleic acid) + name: Physical linkage of different rearrangements + format: controlled_vocabulary + +# 1-to-n relationship between a NucleicAcidProcessing and SequencingRun with resultant raw sequence file(s) +SequencingRun: + type: object + required: + - sequencing_run_id + - total_reads_passing_qc_filter + - sequencing_platform + - sequencing_facility + - sequencing_run_date + - sequencing_kit + properties: + sequencing_run_id: + type: string + nullable: true + description: ID of sequencing run assigned by the sequencing facility + title: Batch number + example: 160101_M01234 + x-airr: + identifier: true + miairr: important + adc-query-support: true + set: 3 + subset: process (sequencing) + name: Batch number + total_reads_passing_qc_filter: + type: integer + nullable: true + description: Number of usable reads for analysis + title: Total reads passing QC filter + example: 10365118 + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (sequencing) + name: Total reads passing QC filter + sequencing_platform: + type: string + nullable: true + description: Designation of sequencing instrument used + title: Sequencing platform + example: Alumina LoSeq 1000 + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (sequencing) + name: Sequencing platform + sequencing_facility: + type: string + nullable: true + description: Name and address of sequencing facility + title: Sequencing facility + example: Seqs-R-Us, Vancouver, BC, Canada + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (sequencing) + name: Sequencing facility + sequencing_run_date: + type: string + nullable: true + description: Date of sequencing run + title: Date of sequencing run + format: date + example: 2016-12-16 + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (sequencing) + name: Date of sequencing run + sequencing_kit: + type: string + nullable: true + description: Name, manufacturer, order and lot numbers of sequencing kit + title: Sequencing kit + example: "FullSeq 600, Alumina, #M123456C0, 789G1HK" + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (sequencing) + name: Sequencing kit + sequencing_files: + $ref: '#/SequencingData' + nullable: false + description: Set of sequencing files produced by the sequencing run + x-airr: + adc-query-support: true + +# Resultant raw sequencing files from a SequencingRun +SequencingData: + type: object + required: + - sequencing_data_id + - file_type + - filename + - read_direction + - read_length + - paired_filename + - paired_read_direction + - paired_read_length + properties: + sequencing_data_id: + type: string + nullable: true + description: > + Persistent identifier of raw data stored in an archive (e.g. INSDC run ID). Data archive should + be identified in the CURIE prefix. + title: Raw sequencing data persistent identifier + example: "SRA:SRR11610494" + x-airr: + identifier: true + miairr: important + adc-query-support: true + set: 4 + subset: data (raw reads) + format: CURIE + file_type: + type: string + nullable: true + description: File format for the raw reads or sequences + title: Raw sequencing data file type + enum: + - fasta + - fastq + - null + x-airr: + miairr: important + adc-query-support: true + set: 4 + subset: data (raw reads) + name: Raw sequencing data file type + format: controlled_vocabulary + filename: + type: string + nullable: true + description: File name for the raw reads or sequences. The first file in paired-read sequencing. + title: Raw sequencing data file name + example: MS10R-NMonson-C7JR9_S1_R1_001.fastq + x-airr: + miairr: important + adc-query-support: true + set: 4 + subset: data (raw reads) + name: Raw sequencing data file name + read_direction: + type: string + nullable: true + description: Read direction for the raw reads or sequences. The first file in paired-read sequencing. + title: Read direction + example: forward + enum: + - forward + - reverse + - mixed + - null + x-airr: + miairr: important + adc-query-support: true + set: 4 + subset: data (raw reads) + name: Read direction + format: controlled_vocabulary + read_length: + type: integer + nullable: true + description: Read length in bases for the first file in paired-read sequencing + title: Forward read length + example: 300 + x-airr: + miairr: important + adc-query-support: true + set: 4 + subset: data (raw reads) + name: Forward read length + paired_filename: + type: string + nullable: true + description: File name for the second file in paired-read sequencing + title: Paired raw sequencing data file name + example: MS10R-NMonson-C7JR9_S1_R2_001.fastq + x-airr: + miairr: important + adc-query-support: true + set: 4 + subset: data (raw reads) + name: Paired raw sequencing data file name + paired_read_direction: + type: string + nullable: true + description: Read direction for the second file in paired-read sequencing + title: Paired read direction + example: reverse + enum: + - forward + - reverse + - mixed + - null + x-airr: + miairr: important + adc-query-support: true + set: 4 + subset: data (raw reads) + name: Paired read direction + format: controlled_vocabulary + paired_read_length: + type: integer + nullable: true + description: Read length in bases for the second file in paired-read sequencing + title: Paired read length + example: 300 + x-airr: + miairr: important + adc-query-support: true + set: 4 + subset: data (raw reads) + name: Paired read length + index_filename: + type: string + nullable: true + description: File name for the index file + title: Sequencing index file name + example: MS10R-NMonson-C7JR9_S1_R3_001.fastq + x-airr: + adc-query-support: true + index_length: + type: integer + nullable: true + description: Read length in bases for the index file + title: Index read length + example: 8 + x-airr: + adc-query-support: true + +# 1-to-n relationship between a repertoire and data processing +# +# Set of annotated rearrangement sequences produced by +# data processing upon the raw sequence data for a repertoire. +DataProcessing: + type: object + required: + - software_versions + - paired_reads_assembly + - quality_thresholds + - primer_match_cutoffs + - collapsing_method + - data_processing_protocols + - germline_database + properties: + data_processing_id: + type: string + nullable: true + description: Identifier for the data processing object. + title: Data processing ID + x-airr: + name: Data processing ID + adc-query-support: true + identifier: true + primary_annotation: + type: boolean + default: false + nullable: false + description: > + If true, indicates this is the primary or default data processing for + the repertoire and its rearrangements. If false, indicates this is a secondary + or additional data processing. + title: Primary annotation + x-airr: + adc-query-support: true + identifier: true + software_versions: + type: string + nullable: true + description: Version number and / or date, include company pipelines + title: Software tools and version numbers + example: IgBLAST 1.6 + x-airr: + miairr: important + adc-query-support: true + set: 5 + subset: process (computational) + name: Software tools and version numbers + paired_reads_assembly: + type: string + nullable: true + description: How paired end reads were assembled into a single receptor sequence + title: Paired read assembly + example: PandaSeq (minimal overlap 50, threshold 0.8) + x-airr: + miairr: important + adc-query-support: true + set: 5 + subset: process (computational) + name: Paired read assembly + quality_thresholds: + type: string + nullable: true + description: How/if sequences were removed from (4) based on base quality scores + title: Quality thresholds + example: Average Phred score >=20 + x-airr: + miairr: important + adc-query-support: true + set: 5 + subset: process (computational) + name: Quality thresholds + primer_match_cutoffs: + type: string + nullable: true + description: How primers were identified in the sequences, were they removed/masked/etc? + title: Primer match cutoffs + example: Hamming distance <= 2 + x-airr: + miairr: important + adc-query-support: true + set: 5 + subset: process (computational) + name: Primer match cutoffs + collapsing_method: + type: string + nullable: true + description: The method used for combining multiple sequences from (4) into a single sequence in (5) + title: Collapsing method + example: MUSCLE 3.8.31 + x-airr: + miairr: important + adc-query-support: true + set: 5 + subset: process (computational) + name: Collapsing method + data_processing_protocols: + type: string + nullable: true + description: General description of how QC is performed + title: Data processing protocols + example: Data was processed using [...] + x-airr: + miairr: important + adc-query-support: true + set: 5 + subset: process (computational) + name: Data processing protocols + data_processing_files: + type: array + items: + type: string + nullable: true + description: Array of file names for data produced by this data processing. + title: Processed data file names + example: + - 'ERR1278153_aa.txz' + - 'ERR1278153_ab.txz' + - 'ERR1278153_ac.txz' + x-airr: + adc-query-support: true + name: Processed data file names + germline_database: + type: string + nullable: true + description: Source of germline V(D)J genes with version number or date accessed. + title: V(D)J germline reference database + example: ENSEMBL, Homo sapiens build 90, 2017-10-01 + x-airr: + miairr: important + adc-query-support: true + set: 5 + subset: data (processed sequence) + name: V(D)J germline reference database + germline_set_ref: + type: string + nullable: true + description: Unique identifier of the germline set and version, in standardized form (Repo:Label:Version) + example: OGRDB:Human_IGH:2021.11 + x-airr: + adc-query-support: true + analysis_provenance_id: + type: string + nullable: true + description: Identifier for machine-readable PROV model of analysis provenance + title: Analysis provenance ID + x-airr: + adc-query-support: true + +SampleProcessing: + allOf: + - type: object + properties: + sample_processing_id: + type: string + nullable: true + description: > + Identifier for the sample processing object. This field should be unique within the repertoire. + This field can be used to uniquely identify the combination of sample, cell processing, + nucleic acid processing and sequencing run information for the repertoire. + title: Sample processing ID + x-airr: + name: Sample processing ID + adc-query-support: true + identifier: true + - $ref: '#/Sample' + - $ref: '#/CellProcessing' + - $ref: '#/NucleicAcidProcessing' + - $ref: '#/SequencingRun' + + +# The composite schema for the repertoire object +# +# This represents a sample repertoire as defined by the study +# and experimentally observed by raw sequence data. A repertoire +# can only be for one subject but may include multiple samples. +Repertoire: + type: object + required: + - study + - subject + - sample + - data_processing + properties: + repertoire_id: + type: string + nullable: true + description: > + Identifier for the repertoire object. This identifier should be globally unique so that repertoires + from multiple studies can be combined together without conflict. The repertoire_id is used to link + other AIRR data to a Repertoire. Specifically, the Rearrangements Schema includes repertoire_id for + referencing the specific Repertoire for that Rearrangement. + title: Repertoire ID + x-airr: + adc-query-support: true + identifier: true + repertoire_name: + type: string + nullable: true + description: Short generic display name for the repertoire + title: Repertoire name + x-airr: + name: Repertoire name + adc-query-support: true + repertoire_description: + type: string + nullable: true + description: Generic repertoire description + title: Repertoire description + x-airr: + name: Repertoire description + adc-query-support: true + study: + $ref: '#/Study' + nullable: false + description: Study object + x-airr: + adc-query-support: true + subject: + $ref: '#/Subject' + nullable: false + description: Subject object + x-airr: + adc-query-support: true + sample: + type: array + nullable: false + description: List of Sample Processing objects + items: + $ref: '#/SampleProcessing' + x-airr: + adc-query-support: true + data_processing: + type: array + nullable: false + description: List of Data Processing objects + items: + $ref: '#/DataProcessing' + x-airr: + adc-query-support: true + +# An ordered group of repertoires for analysis purposes, includes optional time course +# Can be treated as a set if all repertoire_group_id are unique +RepertoireGroup: + type: object + required: + - repertoire_group_id + - repertoires + properties: + repertoire_group_id: + type: string + nullable: true + description: Identifier for this repertoire group + x-airr: + identifier: true + repertoire_group_name: + type: string + nullable: true + description: Short display name for this repertoire group + repertoire_group_description: + type: string + nullable: true + description: Repertoire group description + repertoires: + type: array + nullable: true + description: > + List of repertoires in this group with an associated description and time point designation + items: + type: object + properties: + repertoire_id: + type: string + nullable: false + description: Identifier to the repertoire + x-airr: + adc-query-support: true + repertoire_description: + type: string + nullable: true + description: Description of this repertoire within the group + x-airr: + adc-query-support: true + time_point: + $ref: '#/TimePoint' + nullable: true + description: Time point designation for this repertoire within the group + x-airr: + adc-query-support: true + +Alignment: + type: object + required: + - sequence_id + - segment + - call + - score + - cigar + properties: + sequence_id: + type: string + nullable: true + description: > + Unique query sequence identifier within the file. Most often this will be the input sequence + header or a substring thereof, but may also be a custom identifier defined by the tool in + cases where query sequences have been combined in some fashion prior to alignment. + x-airr: + identifier: true + segment: + type: string + nullable: true + description: > + The segment for this alignment. One of V, D, J or C. + rev_comp: + type: boolean + nullable: true + description: > + Alignment result is from the reverse complement of the query sequence. + call: + type: string + nullable: true + description: > + Gene assignment with allele. + score: + type: number + nullable: true + description: > + Alignment score. + identity: + type: number + nullable: true + description: > + Alignment fractional identity. + support: + type: number + nullable: true + description: > + Alignment E-value, p-value, likelihood, probability or other similar measure of + support for the gene assignment as defined by the alignment tool. + cigar: + type: string + nullable: true + description: > + Alignment CIGAR string. + sequence_start: + type: integer + nullable: true + description: > + Start position of the segment in the query sequence (1-based closed interval). + sequence_end: + type: integer + nullable: true + description: > + End position of the segment in the query sequence (1-based closed interval). + germline_start: + type: integer + nullable: true + description: > + Alignment start position in the reference sequence (1-based closed interval). + germline_end: + type: integer + nullable: true + description: > + Alignment end position in the reference sequence (1-based closed interval). + rank: + type: integer + nullable: true + description: > + Alignment rank. + rearrangement_id: + type: string + nullable: true + description: > + Identifier for the Rearrangement object. May be identical to sequence_id, + but will usually be a universally unique record locator for database applications. + x-airr: + deprecated: true + deprecated-description: Field has been merged with sequence_id to avoid confusion. + deprecated-replaced-by: + - sequence_id + data_processing_id: + type: string + nullable: true + description: > + Identifier to the data processing object in the repertoire metadata + for this rearrangement. If this field is empty than the primary data processing object is assumed. + germline_database: + type: string + nullable: true + description: Source of germline V(D)J genes with version number or date accessed. + example: ENSEMBL, Homo sapiens build 90, 2017-10-01 + x-airr: + deprecated: true + deprecated-description: Field was moved up to the DataProcessing level to avoid data duplication. + deprecated-replaced-by: + - "DataProcessing:germline_database" + + +# The extended rearrangement object +Rearrangement: + type: object + required: + - sequence_id + - sequence + - rev_comp + - productive + - v_call + - d_call + - j_call + - sequence_alignment + - germline_alignment + - junction + - junction_aa + - v_cigar + - d_cigar + - j_cigar + properties: + sequence_id: + type: string + nullable: true + description: > + Unique query sequence identifier for the Rearrangement. Most often this will be the input sequence + header or a substring thereof, but may also be a custom identifier defined by the tool in + cases where query sequences have been combined in some fashion prior to alignment. When + downloaded from an AIRR Data Commons repository, this will usually be a universally unique + record locator for linking with other objects in the AIRR Data Model. + x-airr: + adc-query-support: true + identifier: true + sequence: + type: string + nullable: true + description: > + The query nucleotide sequence. Usually, this is the unmodified input sequence, which may be + reverse complemented if necessary. In some cases, this field may contain consensus sequences or + other types of collapsed input sequences if these steps are performed prior to alignment. + quality: + type: string + nullable: true + description: > + The Sanger/Phred quality scores for assessment of sequence quality. + Phred quality scores from 0 to 93 are encoded using ASCII 33 to 126 (Used by Illumina from v1.8.) + sequence_aa: + type: string + nullable: true + description: > + Amino acid translation of the query nucleotide sequence. + rev_comp: + type: boolean + nullable: true + description: > + True if the alignment is on the opposite strand (reverse complemented) with respect to the + query sequence. If True then all output data, such as alignment coordinates and sequences, + are based on the reverse complement of 'sequence'. + productive: + type: boolean + nullable: true + description: > + True if the V(D)J sequence is predicted to be productive. + x-airr: + adc-query-support: true + vj_in_frame: + type: boolean + nullable: true + description: True if the V and J gene alignments are in-frame. + stop_codon: + type: boolean + nullable: true + description: True if the aligned sequence contains a stop codon. + complete_vdj: + type: boolean + nullable: true + description: > + True if the sequence alignment spans the entire V(D)J region. Meaning, + sequence_alignment includes both the first V gene codon that encodes the + mature polypeptide chain (i.e., after the leader sequence) and the last + complete codon of the J gene (i.e., before the J-C splice site). + This does not require an absence of deletions within the internal + FWR and CDR regions of the alignment. + locus: + type: string + enum: + - IGH + - IGI + - IGK + - IGL + - TRA + - TRB + - TRD + - TRG + - null + nullable: true + description: > + Gene locus (chain type). Note that this field uses a controlled vocabulary that is meant to provide a + generic classification of the locus, not necessarily the correct designation according to a specific + nomenclature. + title: Gene locus + example: IGH + x-airr: + adc-query-support: true + name: Gene locus + format: controlled_vocabulary + locus_species: + $ref: '#/Ontology' + nullable: true + description: > + Binomial designation of the species from which the locus originates. Typically, this value should be + identical to `organism`, if which case it SHOULD NOT be set explicitly. However, there are valid + experimental setups in which the two might differ, e.g. transgenic animal models. If set, this key + will overwrite the `organism` information for all lower layers of the schema. + title: Locus species + example: + id: NCBITAXON:9606 + label: Homo sapiens + x-airr: + miairr: defined + adc-query-support: true + name: Locus species + format: ontology + ontology: + draft: false + top_node: + id: NCBITAXON:7776 + label: Gnathostomata + v_call: + type: string + nullable: true + description: > + V gene with allele. If referring to a known reference sequence in a database + the relevant gene/allele nomenclature should be followed (e.g., IGHV4-59*01 if using IMGT/GENE-DB). + title: V gene with allele + example: IGHV4-59*01 + x-airr: + miairr: important + adc-query-support: true + set: 6 + subset: data (processed sequence) + name: V gene with allele + d_call: + type: string + nullable: true + description: > + First or only D gene with allele. If referring to a known reference sequence in a database + the relevant gene/allele nomenclature should be followed (e.g., IGHD3-10*01 if using IMGT/GENE-DB). + title: D gene with allele + example: IGHD3-10*01 + x-airr: + miairr: important + adc-query-support: true + set: 6 + subset: data (processed sequence) + name: D gene with allele + d2_call: + type: string + nullable: true + description: > + Second D gene with allele. If referring to a known reference sequence in a database the relevant + gene/allele nomenclature should be followed (e.g., IGHD3-10*01 if using IMGT/GENE-DB). + example: IGHD3-10*01 + j_call: + type: string + nullable: true + description: > + J gene with allele. If referring to a known reference sequence in a database the relevant + gene/allele nomenclature should be followed (e.g., IGHJ4*02 if using IMGT/GENE-DB). + title: J gene with allele + example: IGHJ4*02 + x-airr: + miairr: important + adc-query-support: true + set: 6 + subset: data (processed sequence) + name: J gene with allele + c_call: + type: string + nullable: true + description: > + Constant region gene with allele. If referring to a known reference sequence in a database the + relevant gene/allele nomenclature should be followed (e.g., IGHG1*01 if using IMGT/GENE-DB). + title: C region + example: IGHG1*01 + x-airr: + miairr: important + adc-query-support: true + set: 6 + subset: data (processed sequence) + name: C region + sequence_alignment: + type: string + nullable: true + description: > + Aligned portion of query sequence, including any indel corrections or numbering spacers, + such as IMGT-gaps. Typically, this will include only the V(D)J region, but that is not + a requirement. + quality_alignment: + type: string + nullable: true + description: > + Sanger/Phred quality scores for assessment of sequence_alignment quality. + Phred quality scores from 0 to 93 are encoded using ASCII 33 to 126 (Used by Illumina from v1.8.) + sequence_alignment_aa: + type: string + nullable: true + description: > + Amino acid translation of the aligned query sequence. + germline_alignment: + type: string + nullable: true + description: > + Assembled, aligned, full-length inferred germline sequence spanning the same region + as the sequence_alignment field (typically the V(D)J region) and including the same set + of corrections and spacers (if any). + germline_alignment_aa: + type: string + nullable: true + description: > + Amino acid translation of the assembled germline sequence. + junction: + type: string + nullable: true + description: > + Junction region nucleotide sequence, where the junction is defined as + the CDR3 plus the two flanking conserved codons. + title: IMGT-JUNCTION nucleotide sequence + example: TGTGCAAGAGCGGGAGTTTACGACGGATATACTATGGACTACTGG + x-airr: + miairr: important + set: 6 + subset: data (processed sequence) + name: IMGT-JUNCTION nucleotide sequence + junction_aa: + type: string + nullable: true + description: > + Amino acid translation of the junction. + title: IMGT-JUNCTION amino acid sequence + example: CARAGVYDGYTMDYW + x-airr: + miairr: important + adc-query-support: true + set: 6 + subset: data (processed sequence) + name: IMGT-JUNCTION amino acid sequence + np1: + type: string + nullable: true + description: > + Nucleotide sequence of the combined N/P region between the V gene and + first D gene alignment or between the V gene and J gene alignments. + np1_aa: + type: string + nullable: true + description: > + Amino acid translation of the np1 field. + np2: + type: string + nullable: true + description: > + Nucleotide sequence of the combined N/P region between either the first D gene and J gene + alignments or the first D gene and second D gene alignments. + np2_aa: + type: string + nullable: true + description: > + Amino acid translation of the np2 field. + np3: + type: string + nullable: true + description: > + Nucleotide sequence of the combined N/P region between the second D gene + and J gene alignments. + np3_aa: + type: string + nullable: true + description: > + Amino acid translation of the np3 field. + cdr1: + type: string + nullable: true + description: > + Nucleotide sequence of the aligned CDR1 region. + cdr1_aa: + type: string + nullable: true + description: > + Amino acid translation of the cdr1 field. + cdr2: + type: string + nullable: true + description: > + Nucleotide sequence of the aligned CDR2 region. + cdr2_aa: + type: string + nullable: true + description: > + Amino acid translation of the cdr2 field. + cdr3: + type: string + nullable: true + description: > + Nucleotide sequence of the aligned CDR3 region. + cdr3_aa: + type: string + nullable: true + description: > + Amino acid translation of the cdr3 field. + fwr1: + type: string + nullable: true + description: > + Nucleotide sequence of the aligned FWR1 region. + fwr1_aa: + type: string + nullable: true + description: > + Amino acid translation of the fwr1 field. + fwr2: + type: string + nullable: true + description: > + Nucleotide sequence of the aligned FWR2 region. + fwr2_aa: + type: string + nullable: true + description: > + Amino acid translation of the fwr2 field. + fwr3: + type: string + nullable: true + description: > + Nucleotide sequence of the aligned FWR3 region. + fwr3_aa: + type: string + nullable: true + description: > + Amino acid translation of the fwr3 field. + fwr4: + type: string + nullable: true + description: > + Nucleotide sequence of the aligned FWR4 region. + fwr4_aa: + type: string + nullable: true + description: > + Amino acid translation of the fwr4 field. + v_score: + type: number + nullable: true + description: Alignment score for the V gene. + v_identity: + type: number + nullable: true + description: Fractional identity for the V gene alignment. + v_support: + type: number + nullable: true + description: > + V gene alignment E-value, p-value, likelihood, probability or other similar measure of + support for the V gene assignment as defined by the alignment tool. + v_cigar: + type: string + nullable: true + description: CIGAR string for the V gene alignment. + d_score: + type: number + nullable: true + description: Alignment score for the first or only D gene alignment. + d_identity: + type: number + nullable: true + description: Fractional identity for the first or only D gene alignment. + d_support: + type: number + nullable: true + description: > + D gene alignment E-value, p-value, likelihood, probability or other similar measure of + support for the first or only D gene as defined by the alignment tool. + d_cigar: + type: string + nullable: true + description: CIGAR string for the first or only D gene alignment. + d2_score: + type: number + nullable: true + description: Alignment score for the second D gene alignment. + d2_identity: + type: number + nullable: true + description: Fractional identity for the second D gene alignment. + d2_support: + type: number + nullable: true + description: > + D gene alignment E-value, p-value, likelihood, probability or other similar measure of + support for the second D gene as defined by the alignment tool. + d2_cigar: + type: string + nullable: true + description: CIGAR string for the second D gene alignment. + j_score: + type: number + nullable: true + description: Alignment score for the J gene alignment. + j_identity: + type: number + nullable: true + description: Fractional identity for the J gene alignment. + j_support: + type: number + nullable: true + description: > + J gene alignment E-value, p-value, likelihood, probability or other similar measure of + support for the J gene assignment as defined by the alignment tool. + j_cigar: + type: string + nullable: true + description: CIGAR string for the J gene alignment. + c_score: + type: number + nullable: true + description: Alignment score for the C gene alignment. + c_identity: + type: number + nullable: true + description: Fractional identity for the C gene alignment. + c_support: + type: number + nullable: true + description: > + C gene alignment E-value, p-value, likelihood, probability or other similar measure of + support for the C gene assignment as defined by the alignment tool. + c_cigar: + type: string + nullable: true + description: CIGAR string for the C gene alignment. + v_sequence_start: + type: integer + nullable: true + description: > + Start position of the V gene in the query sequence (1-based closed interval). + v_sequence_end: + type: integer + nullable: true + description: > + End position of the V gene in the query sequence (1-based closed interval). + v_germline_start: + type: integer + nullable: true + description: > + Alignment start position in the V gene reference sequence (1-based closed interval). + v_germline_end: + type: integer + nullable: true + description: > + Alignment end position in the V gene reference sequence (1-based closed interval). + v_alignment_start: + type: integer + nullable: true + description: > + Start position of the V gene alignment in both the sequence_alignment and germline_alignment + fields (1-based closed interval). + v_alignment_end: + type: integer + nullable: true + description: > + End position of the V gene alignment in both the sequence_alignment and germline_alignment + fields (1-based closed interval). + d_sequence_start: + type: integer + nullable: true + description: > + Start position of the first or only D gene in the query sequence. + (1-based closed interval). + d_sequence_end: + type: integer + nullable: true + description: > + End position of the first or only D gene in the query sequence. + (1-based closed interval). + d_germline_start: + type: integer + nullable: true + description: > + Alignment start position in the D gene reference sequence for the first or only + D gene (1-based closed interval). + d_germline_end: + type: integer + nullable: true + description: > + Alignment end position in the D gene reference sequence for the first or only + D gene (1-based closed interval). + d_alignment_start: + type: integer + nullable: true + description: > + Start position of the first or only D gene in both the sequence_alignment + and germline_alignment fields (1-based closed interval). + d_alignment_end: + type: integer + nullable: true + description: > + End position of the first or only D gene in both the sequence_alignment + and germline_alignment fields (1-based closed interval). + d2_sequence_start: + type: integer + nullable: true + description: > + Start position of the second D gene in the query sequence (1-based closed interval). + d2_sequence_end: + type: integer + nullable: true + description: > + End position of the second D gene in the query sequence (1-based closed interval). + d2_germline_start: + type: integer + nullable: true + description: > + Alignment start position in the second D gene reference sequence (1-based closed interval). + d2_germline_end: + type: integer + nullable: true + description: > + Alignment end position in the second D gene reference sequence (1-based closed interval). + d2_alignment_start: + type: integer + nullable: true + description: > + Start position of the second D gene alignment in both the sequence_alignment and + germline_alignment fields (1-based closed interval). + d2_alignment_end: + type: integer + nullable: true + description: > + End position of the second D gene alignment in both the sequence_alignment and + germline_alignment fields (1-based closed interval). + j_sequence_start: + type: integer + nullable: true + description: > + Start position of the J gene in the query sequence (1-based closed interval). + j_sequence_end: + type: integer + nullable: true + description: > + End position of the J gene in the query sequence (1-based closed interval). + j_germline_start: + type: integer + nullable: true + description: > + Alignment start position in the J gene reference sequence (1-based closed interval). + j_germline_end: + type: integer + nullable: true + description: > + Alignment end position in the J gene reference sequence (1-based closed interval). + j_alignment_start: + type: integer + nullable: true + description: > + Start position of the J gene alignment in both the sequence_alignment and germline_alignment + fields (1-based closed interval). + j_alignment_end: + type: integer + nullable: true + description: > + End position of the J gene alignment in both the sequence_alignment and germline_alignment + fields (1-based closed interval). + c_sequence_start: + type: integer + nullable: true + description: > + Start position of the C gene in the query sequence (1-based closed interval). + c_sequence_end: + type: integer + nullable: true + description: > + End position of the C gene in the query sequence (1-based closed interval). + c_germline_start: + type: integer + nullable: true + description: > + Alignment start position in the C gene reference sequence (1-based closed interval). + c_germline_end: + type: integer + nullable: true + description: > + Alignment end position in the C gene reference sequence (1-based closed interval). + c_alignment_start: + type: integer + nullable: true + description: > + Start position of the C gene alignment in both the sequence_alignment and germline_alignment + fields (1-based closed interval). + c_alignment_end: + type: integer + nullable: true + description: > + End position of the C gene alignment in both the sequence_alignment and germline_alignment + fields (1-based closed interval). + cdr1_start: + type: integer + nullable: true + description: CDR1 start position in the query sequence (1-based closed interval). + cdr1_end: + type: integer + nullable: true + description: CDR1 end position in the query sequence (1-based closed interval). + cdr2_start: + type: integer + nullable: true + description: CDR2 start position in the query sequence (1-based closed interval). + cdr2_end: + type: integer + nullable: true + description: CDR2 end position in the query sequence (1-based closed interval). + cdr3_start: + type: integer + nullable: true + description: CDR3 start position in the query sequence (1-based closed interval). + cdr3_end: + type: integer + nullable: true + description: CDR3 end position in the query sequence (1-based closed interval). + fwr1_start: + type: integer + nullable: true + description: FWR1 start position in the query sequence (1-based closed interval). + fwr1_end: + type: integer + nullable: true + description: FWR1 end position in the query sequence (1-based closed interval). + fwr2_start: + type: integer + nullable: true + description: FWR2 start position in the query sequence (1-based closed interval). + fwr2_end: + type: integer + nullable: true + description: FWR2 end position in the query sequence (1-based closed interval). + fwr3_start: + type: integer + nullable: true + description: FWR3 start position in the query sequence (1-based closed interval). + fwr3_end: + type: integer + nullable: true + description: FWR3 end position in the query sequence (1-based closed interval). + fwr4_start: + type: integer + nullable: true + description: FWR4 start position in the query sequence (1-based closed interval). + fwr4_end: + type: integer + nullable: true + description: FWR4 end position in the query sequence (1-based closed interval). + v_sequence_alignment: + type: string + nullable: true + description: > + Aligned portion of query sequence assigned to the V gene, including any + indel corrections or numbering spacers. + v_sequence_alignment_aa: + type: string + nullable: true + description: > + Amino acid translation of the v_sequence_alignment field. + d_sequence_alignment: + type: string + nullable: true + description: > + Aligned portion of query sequence assigned to the first or only D gene, including any + indel corrections or numbering spacers. + d_sequence_alignment_aa: + type: string + nullable: true + description: > + Amino acid translation of the d_sequence_alignment field. + d2_sequence_alignment: + type: string + nullable: true + description: > + Aligned portion of query sequence assigned to the second D gene, including any + indel corrections or numbering spacers. + d2_sequence_alignment_aa: + type: string + nullable: true + description: > + Amino acid translation of the d2_sequence_alignment field. + j_sequence_alignment: + type: string + nullable: true + description: > + Aligned portion of query sequence assigned to the J gene, including any + indel corrections or numbering spacers. + j_sequence_alignment_aa: + type: string + nullable: true + description: > + Amino acid translation of the j_sequence_alignment field. + c_sequence_alignment: + type: string + nullable: true + description: > + Aligned portion of query sequence assigned to the constant region, including + any indel corrections or numbering spacers. + c_sequence_alignment_aa: + type: string + nullable: true + description: > + Amino acid translation of the c_sequence_alignment field. + v_germline_alignment: + type: string + nullable: true + description: > + Aligned V gene germline sequence spanning the same region + as the v_sequence_alignment field and including the same set + of corrections and spacers (if any). + v_germline_alignment_aa: + type: string + nullable: true + description: > + Amino acid translation of the v_germline_alignment field. + d_germline_alignment: + type: string + nullable: true + description: > + Aligned D gene germline sequence spanning the same region + as the d_sequence_alignment field and including the same set + of corrections and spacers (if any). + d_germline_alignment_aa: + type: string + nullable: true + description: > + Amino acid translation of the d_germline_alignment field. + d2_germline_alignment: + type: string + nullable: true + description: > + Aligned D gene germline sequence spanning the same region + as the d2_sequence_alignment field and including the same set + of corrections and spacers (if any). + d2_germline_alignment_aa: + type: string + nullable: true + description: > + Amino acid translation of the d2_germline_alignment field. + j_germline_alignment: + type: string + nullable: true + description: > + Aligned J gene germline sequence spanning the same region + as the j_sequence_alignment field and including the same set + of corrections and spacers (if any). + j_germline_alignment_aa: + type: string + nullable: true + description: > + Amino acid translation of the j_germline_alignment field. + c_germline_alignment: + type: string + nullable: true + description: > + Aligned constant region germline sequence spanning the same region + as the c_sequence_alignment field and including the same set + of corrections and spacers (if any). + c_germline_alignment_aa: + type: string + nullable: true + description: > + Amino acid translation of the c_germline_aligment field. + junction_length: + type: integer + nullable: true + description: Number of nucleotides in the junction sequence. + junction_aa_length: + type: integer + nullable: true + description: Number of amino acids in the junction sequence. + x-airr: + adc-query-support: true + np1_length: + type: integer + nullable: true + description: > + Number of nucleotides between the V gene and first D gene alignments or + between the V gene and J gene alignments. + np2_length: + type: integer + nullable: true + description: > + Number of nucleotides between either the first D gene and J gene alignments + or the first D gene and second D gene alignments. + np3_length: + type: integer + nullable: true + description: > + Number of nucleotides between the second D gene and J gene alignments. + n1_length: + type: integer + nullable: true + description: Number of untemplated nucleotides 5' of the first or only D gene alignment. + n2_length: + type: integer + nullable: true + description: Number of untemplated nucleotides 3' of the first or only D gene alignment. + n3_length: + type: integer + nullable: true + description: Number of untemplated nucleotides 3' of the second D gene alignment. + p3v_length: + type: integer + nullable: true + description: Number of palindromic nucleotides 3' of the V gene alignment. + p5d_length: + type: integer + nullable: true + description: Number of palindromic nucleotides 5' of the first or only D gene alignment. + p3d_length: + type: integer + nullable: true + description: Number of palindromic nucleotides 3' of the first or only D gene alignment. + p5d2_length: + type: integer + nullable: true + description: Number of palindromic nucleotides 5' of the second D gene alignment. + p3d2_length: + type: integer + nullable: true + description: Number of palindromic nucleotides 3' of the second D gene alignment. + p5j_length: + type: integer + nullable: true + description: Number of palindromic nucleotides 5' of the J gene alignment. + v_frameshift: + type: boolean + nullable: true + description: > + True if the V gene in the query nucleotide sequence contains a translational + frameshift relative to the frame of the V gene reference sequence. + j_frameshift: + type: boolean + nullable: true + description: > + True if the J gene in the query nucleotide sequence contains a translational + frameshift relative to the frame of the J gene reference sequence. + d_frame: + type: integer + nullable: true + description: > + Numerical reading frame (1, 2, 3) of the first or only D gene in the query nucleotide sequence, + where frame 1 is relative to the first codon of D gene reference sequence. + d2_frame: + type: integer + nullable: true + description: > + Numerical reading frame (1, 2, 3) of the second D gene in the query nucleotide sequence, + where frame 1 is relative to the first codon of D gene reference sequence. + consensus_count: + type: integer + nullable: true + description: > + Number of reads contributing to the UMI consensus or contig assembly for this sequence. + For example, the sum of the number of reads for all UMIs that contribute to + the query sequence. + duplicate_count: + type: integer + nullable: true + description: > + Copy number or number of duplicate observations for the query sequence. + For example, the number of identical reads observed for this sequence. + title: Read count + example: 123 + x-airr: + miairr: important + set: 6 + subset: data (processed sequence) + name: Read count + umi_count: + type: integer + nullable: true + description: > + Number of distinct UMIs represented by this sequence. + For example, the total number of UMIs that contribute to + the contig assembly for the query sequence. + cell_id: + type: string + nullable: true + description: > + Identifier defining the cell of origin for the query sequence. + title: Cell index + example: W06_046_091 + x-airr: + identifier: true + miairr: important + adc-query-support: true + set: 6 + subset: data (processed sequence) + name: Cell index + clone_id: + type: string + nullable: true + description: Clonal cluster assignment for the query sequence. + x-airr: + adc-query-support: true + identifier: true + repertoire_id: + type: string + nullable: true + description: Identifier to the associated repertoire in study metadata. + x-airr: + adc-query-support: true + identifier: true + sample_processing_id: + type: string + nullable: true + description: > + Identifier to the sample processing object in the repertoire metadata + for this rearrangement. If the repertoire has a single sample then + this field may be empty or missing. If the repertoire has multiple samples then + this field may be empty or missing if the sample cannot be differentiated or + the relationship is not maintained by the data processing. + x-airr: + adc-query-support: true + identifier: true + data_processing_id: + type: string + nullable: true + description: > + Identifier to the data processing object in the repertoire metadata + for this rearrangement. If this field is empty than the primary data processing object is assumed. + x-airr: + adc-query-support: true + identifier: true + rearrangement_id: + type: string + nullable: true + description: > + Identifier for the Rearrangement object. May be identical to sequence_id, + but will usually be a universally unique record locator for database applications. + x-airr: + deprecated: true + deprecated-description: Field has been merged with sequence_id to avoid confusion. + deprecated-replaced-by: + - sequence_id + rearrangement_set_id: + type: string + nullable: true + description: > + Identifier for grouping Rearrangement objects. + x-airr: + deprecated: true + deprecated-description: Field has been replaced by other specialized identifiers. + deprecated-replaced-by: + - repertoire_id + - sample_processing_id + - data_processing_id + germline_database: + type: string + nullable: true + description: Source of germline V(D)J genes with version number or date accessed. + example: ENSEMBL, Homo sapiens build 90, 2017-10-01 + x-airr: + deprecated: true + deprecated-description: Field was moved up to the DataProcessing level to avoid data duplication. + deprecated-replaced-by: + - "DataProcessing:germline_database" + +# A unique inferred clone object that has been constructed within a single data processing +# for a single repertoire and a subset of its sequences and/or rearrangements. +Clone: + type: object + required: + - clone_id + - germline_alignment + properties: + clone_id: + type: string + nullable: true + description: Identifier for the clone. + x-airr: + identifier: true + repertoire_id: + type: string + nullable: true + description: Identifier to the associated repertoire in study metadata. + x-airr: + adc-query-support: true + data_processing_id: + type: string + nullable: true + description: Identifier of the data processing object in the repertoire metadata for this clone. + x-airr: + adc-query-support: true + sequences: + type: array + items: + type: string + nullable: true + description: > + List sequence_id strings that act as keys to the Rearrangement records for members of the clone. + v_call: + type: string + nullable: true + description: > + V gene with allele of the inferred ancestral of the clone. For example, IGHV4-59*01. + example: IGHV4-59*01 + d_call: + type: string + nullable: true + description: > + D gene with allele of the inferred ancestor of the clone. For example, IGHD3-10*01. + example: IGHD3-10*01 + j_call: + type: string + nullable: true + description: > + J gene with allele of the inferred ancestor of the clone. For example, IGHJ4*02. + example: IGHJ4*02 + junction: + type: string + nullable: true + description: > + Nucleotide sequence for the junction region of the inferred ancestor of the clone, + where the junction is defined as the CDR3 plus the two flanking conserved codons. + junction_aa: + type: string + nullable: true + description: > + Amino acid translation of the junction. + junction_length: + type: integer + nullable: true + description: Number of nucleotides in the junction. + junction_aa_length: + type: integer + nullable: true + description: Number of amino acids in junction_aa. + germline_alignment: + type: string + nullable: true + description: > + Assembled, aligned, full-length inferred ancestor of the clone spanning the same region + as the sequence_alignment field of nodes (typically the V(D)J region) and including the + same set of corrections and spacers (if any). + germline_alignment_aa: + type: string + nullable: true + description: > + Amino acid translation of germline_alignment. + v_alignment_start: + type: integer + nullable: true + description: > + Start position in the V gene alignment in both the sequence_alignment and germline_alignment + fields (1-based closed interval). + v_alignment_end: + type: integer + nullable: true + description: > + End position in the V gene alignment in both the sequence_alignment and germline_alignment + fields (1-based closed interval). + d_alignment_start: + type: integer + nullable: true + description: > + Start position of the D gene alignment in both the sequence_alignment and germline_alignment + fields (1-based closed interval). + d_alignment_end: + type: integer + nullable: true + description: > + End position of the D gene alignment in both the sequence_alignment and germline_alignment + fields (1-based closed interval). + j_alignment_start: + type: integer + nullable: true + description: > + Start position of the J gene alignment in both the sequence_alignment and germline_alignment + fields (1-based closed interval). + j_alignment_end: + type: integer + nullable: true + description: > + End position of the J gene alignment in both the sequence_alignment and germline_alignment + fields (1-based closed interval). + junction_start: + type: integer + nullable: true + description: Junction region start position in the alignment (1-based closed interval). + junction_end: + type: integer + nullable: true + description: Junction region end position in the alignment (1-based closed interval). + umi_count: + type: integer + nullable: true + description: > + Number of distinct UMIs observed across all sequences (Rearrangement records) in this clone. + clone_count: + type: integer + nullable: true + description: > + Absolute count of the size (number of members) of this clone in the repertoire. + This could simply be the number of sequences (Rearrangement records) observed in this clone, + the number of distinct cell barcodes (unique cell_id values), + or a more sophisticated calculation appropriate to the experimental protocol. + Absolute count is provided versus a frequency so that downstream analysis tools can perform their own normalization. + seed_id: + type: string + nullable: true + description: sequence_id of the seed sequence. Empty string (or null) if there is no seed sequence. + +# 1-to-n relationship for a clone to its trees. +Tree: + type: object + required: + - tree_id + - clone_id + - newick + properties: + tree_id: + type: string + nullable: true + description: Identifier for the tree. + x-airr: + identifier: true + clone_id: + type: string + nullable: true + description: Identifier for the clone. + newick: + type: string + nullable: true + description: Newick string of the tree edges. + nodes: + type: object + nullable: true + description: Dictionary of nodes in the tree, keyed by sequence_id string + additionalProperties: + $ref: '#/Node' + +# 1-to-n relationship between a tree and its nodes +Node: + type: object + required: + - sequence_id + properties: + sequence_id: + type: string + nullable: true + description: > + Identifier for this node that matches the identifier in the newick string and, where possible, + the sequence_id in the source repertoire. + x-airr: + identifier: true + sequence_alignment: + type: string + nullable: true + description: > + Nucleotide sequence of the node, aligned to the germline_alignment for this clone, including + including any indel corrections or spacers. + junction: + type: string + nullable: true + description: > + Junction region nucleotide sequence for the node, where the junction is defined as + the CDR3 plus the two flanking conserved codons. + junction_aa: + type: string + nullable: true + description: > + Amino acid translation of the junction. + +# The cell object acts as point of reference for all data that can be related +# to an individual cell, either by direct observation or inference. +Cell: + type: object + required: + - cell_id + - rearrangements + - repertoire_id + - virtual_pairing + properties: + cell_id: + type: string + nullable: false + description: > + Identifier defining the cell of origin for the query sequence. + title: Cell index + example: W06_046_091 + x-airr: + identifier: true + miairr: defined + adc-query-support: true + name: Cell index + rearrangements: + type: array + nullable: true + description: > + Array of sequence identifiers defined for the Rearrangement object + title: Cell-associated rearrangements + items: + type: string + example: [id1, id2] #empty vs NULL? + x-airr: + miairr: defined + adc-query-support: true + name: Cell-associated rearrangements + receptors: + type: array + nullable: true + description: > + Array of receptor identifiers defined for the Receptor object + title: Cell-associated receptors + items: + type: string + example: [id1, id2] #empty vs NULL? + x-airr: + miairr: defined + adc-query-support: true + name: Cell-associated receptors + repertoire_id: + type: string + nullable: true + description: Identifier to the associated repertoire in study metadata. + title: Parental repertoire of cell + x-airr: + miairr: defined + adc-query-support: true + name: Parental repertoire of cell + data_processing_id: + type: string + nullable: true + description: Identifier of the data processing object in the repertoire metadata for this clone. + title: Data processing for cell + x-airr: + miairr: defined + adc-query-support: true + name: Data processing for cell + expression_study_method: + type: string + enum: + - flow_cytometry + - single-cell_transcriptome + - null + nullable: true + description: > + Keyword describing the methodology used to assess expression. This values for this field MUST + come from a controlled vocabulary. + x-airr: + miairr: defined + adc-query-support: true + expression_raw_doi: + type: string + nullable: true + description: > + DOI of raw data set containing the current event + x-airr: + miairr: defined + adc-query-support: true + expression_index: + type: string + nullable: true + description: > + Index addressing the current event within the raw data set. + x-airr: + miairr: defined + virtual_pairing: + type: boolean + nullable: true + description: > + boolean to indicate if pairing was inferred. + title: Virtual pairing + x-airr: + miairr: defined + adc-query-support: true + name: Virtual pairing + +# The CellExpression object acts as a container to hold a single expression level measurement from +# an experiment. Expression data is associated with a cell_id and the related repertoire_id and +# data_processing_id as cell_id is not guaranteed to be unique outside the data processing for +# a single repertoire. +CellExpression: + type: object + required: + - expression_id + - repertoire_id + - data_processing_id + - cell_id + - property + - property_type + - value + properties: + expression_id: + type: string + description: > + Identifier of this expression property measurement. + title: Expression property measurement identifier + nullable: false + x-airr: + identifier: true + miairr: defined + adc-query-support: true + name: Expression measurement identifier + cell_id: + type: string + description: > + Identifier of the cell to which this expression data is related. + title: Cell identifier + nullable: false + example: W06_046_091 + x-airr: + miairr: defined + adc-query-support: true + name: Cell identifier + repertoire_id: + type: string + description: Identifier for the associated repertoire in study metadata. + title: Parental repertoire of cell + nullable: true + x-airr: + miairr: defined + adc-query-support: true + name: Parental repertoire of cell + data_processing_id: + type: string + description: Identifier of the data processing object in the repertoire metadata for this clone. + title: Data processing for cell + nullable: true + x-airr: + miairr: defined + adc-query-support: true + name: Data processing for cell + property_type: + type: string + description: > + Keyword describing the property type and detection method used to measure the property value. + The following keywords are recommended, but custom property types are also valid: + "mrna_expression_by_read_count", + "protein_expression_by_fluorescence_intensity", "antigen_bait_binding_by_fluorescence_intensity", + "protein_expression_by_dna_barcode_count" and "antigen_bait_binding_by_dna_barcode_count". + nullable: false + title: Property type and detection method + x-airr: + miairr: defined + adc-query-support: true + name: Property type and detection method + property: + $ref: '#/Ontology' + nullable: true + title: Property information + description: > + Name of the property observed, typically a gene or antibody identifier (and label) from a + canonical resource such as Ensembl (e.g. ENSG00000275747, IGHV3-79) or + Antibody Registry (ABREG:1236456, Purified anti-mouse/rat/human CD27 antibody). + example: + id: ENSG:ENSG00000275747 + label: IGHV3-79 + x-airr: + miairr: defined + adc-query-support: true + format: ontology + name: Property information + value: + type: number + description: Level at which the property was observed in the experiment (non-normalized). + title: Property value + nullable: true + example: 3 + x-airr: + miairr: defined + adc-query-support: true + name: Property value + + +# The Receptor object hold information about a receptor and its reactivity. +# +Receptor: + type: object + required: + - receptor_id + - receptor_hash + - receptor_type + - receptor_variable_domain_1_aa + - receptor_variable_domain_1_locus + - receptor_variable_domain_2_aa + - receptor_variable_domain_2_locus + properties: + receptor_id: + type: string + nullable: false + description: ID of the current Receptor object, unique within the local repository. + title: Receptor ID + example: TCR-MM-012345 + x-airr: + identifier: true + adc-query-support: true + receptor_hash: + type: string + nullable: false + description: > + The SHA256 hash of the receptor amino acid sequence, calculated on the concatenated + ``receptor_variable_domain_*_aa`` sequences and represented as base16-encoded string. + title: Receptor hash ID + example: aa1c4b77a6f4927611ab39f5267415beaa0ba07a952c233d803b07e52261f026 + x-airr: + adc-query-support: true + receptor_type: + type: string + nullable: false + enum: + - Ig + - TCR + description: The top-level receptor type, either Immunoglobulin (Ig) or T Cell Receptor (TCR). + x-airr: + adc-query-support: true + receptor_variable_domain_1_aa: + type: string + nullable: false + description: > + Complete amino acid sequence of the mature variable domain of the Ig heavy, TCR beta or TCR delta chain. + The mature variable domain is defined as encompassing all AA from and including first AA after the the + signal peptide to and including the last AA that is completely encoded by the J gene. + example: > + QVQLQQPGAELVKPGASVKLSCKASGYTFTSYWMHWVKQRPGRGLEWIGRIDPNSGGTKYNEKFKSKATLTVDKPSSTAYMQLSSLTSEDSAVYYCARYDYYGSSYFDYWGQGTTLTVSS + x-airr: + adc-query-support: true + receptor_variable_domain_1_locus: + type: string + nullable: false + enum: + - IGH + - TRB + - TRD + description: Locus from which the variable domain in receptor_variable_domain_1_aa originates + example: IGH + x-airr: + adc-query-support: true + receptor_variable_domain_2_aa: + type: string + nullable: false + description: > + Complete amino acid sequence of the mature variable domain of the Ig light, TCR alpha or TCR gamma chain. + The mature variable domain is defined as encompassing all AA from and including first AA after the the + signal peptide to and including the last AA that is completely encoded by the J gene. + example: > + QAVVTQESALTTSPGETVTLTCRSSTGAVTTSNYANWVQEKPDHLFTGLIGGTNNRAPGVPARFSGSLIGDKAALTITGAQTEDEAIYFCALWYSNHWVFGGGTKLTVL + x-airr: + adc-query-support: true + receptor_variable_domain_2_locus: + type: string + nullable: false + enum: + - IGI + - IGK + - IGL + - TRA + - TRG + description: Locus from which the variable domain in receptor_variable_domain_2_aa originates + example: IGL + x-airr: + adc-query-support: true + receptor_ref: + type: array + nullable: true + description: Array of receptor identifiers defined for the Receptor object + title: Receptor cross-references + items: + type: string + example: ["IEDB_RECEPTOR:10"] + x-airr: + adc-query-support: true + reactivity_measurements: + type: array + nullable: true + description: Records of reactivity measurement + items: + $ref: '#/ReceptorReactivity' + + +ReceptorReactivity: + type: object + required: + - ligand_type + - antigen_type + - antigen + - reactivity_method + - reactivity_readout + - reactivity_value + - reactivity_unit + properties: + ligand_type: + type: string + nullable: false + enum: + - "MHC:peptide" + - "MHC:non-peptide" + - protein + - peptide + - non-peptidic + description: Classification of ligand binding to receptor + example: non-peptide + antigen_type: + type: string + nullable: false + enum: + - protein + - peptide + - non-peptidic + description: > + The type of antigen before processing by the immune system. + example: protein + antigen: + $ref: '#/Ontology' + nullable: false + description: > + The substance against which the receptor was tested. This can be any substance that + stimulates an adaptive immune response in the host, either through antibody production + or by T cell activation after presentation via an MHC molecule. + title: Antigen + example: + id: UNIPROT:P19597 + label: Circumsporozoite protein + x-airr: + adc-query-support: true + format: ontology + antigen_source_species: + $ref: '#/Ontology' + nullable: true + description: The species from which the antigen was isolated + title: Source species of antigen + example: + id: NCBITAXON:5843 + label: Plasmodium falciparum NF54 + x-airr: + format: ontology + ontology: + draft: true + top_node: + id: NCBITAXON:1 + label: root + peptide_start: + type: integer + nullable: true + description: Start position of the peptide within the reference protein sequence + peptide_end: + type: integer + nullable: true + description: End position of the peptide within the reference protein sequence + mhc_class: + type: string + nullable: true + enum: + - MHC-I + - MHC-II + - MHC-nonclassical + - null + description: Class of MHC molecule, only present for MHC:x ligand types + example: MHC-II + mhc_gene_1: + $ref: '#/Ontology' + nullable: true + description: The MHC gene to which the mhc_allele_1 belongs + title: MHC gene 1 + example: + id: MRO:0000055 + label: HLA-DRA + x-airr: + format: ontology + ontology: + draft: true + top_node: + id: MRO:0000004 + label: MHC gene + mhc_allele_1: + type: string + nullable: true + description: Allele designation of the MHC alpha chain + example: HLA-DRA + mhc_gene_2: + $ref: '#/Ontology' + nullable: true + description: The MHC gene to which the mhc_allele_2 belongs + title: MHC gene 2 + example: + id: MRO:0000057 + label: HLA-DRB1 + x-airr: + format: ontology + ontology: + draft: true + top_node: + id: MRO:0000004 + label: MHC gene + mhc_allele_2: + type: string + nullable: true + description: > + Allele designation of the MHC class II beta chain or the invariant beta2-microglobin chain + example: HLA-DRB1*04:01 + reactivity_method: + type: string + nullable: false + enum: + - SPR + - ITC + - ELISA + - cytometry + - biological_activity + description: The methodology used to assess expression (assay implemented in experiment) + reactivity_readout: + type: string + nullable: false + enum: + - binding_strength + - cytokine_release + - dissociation_constant_kd + - on_rate + - off_rate + - pathogen_inhibition + description: Reactivity measurement read-out + example: cytokine release + reactivity_value: + type: number + nullable: false + description: The absolute (processed) value of the measurement + example: 162.26 + reactivity_unit: + type: string + nullable: false + description: The unit of the measurement + example: pg/ml diff --git a/lang/python/airr/specs/airr-schema-openapi3.yaml b/lang/python/airr/specs/airr-schema-openapi3.yaml new file mode 100644 index 000000000..d6c6d48e2 --- /dev/null +++ b/lang/python/airr/specs/airr-schema-openapi3.yaml @@ -0,0 +1,5156 @@ +# +# Schema definitions for AIRR standards objects +# +Info: + title: AIRR Schema + description: Schema definitions for AIRR standards objects + version: 1.4 + contact: + name: AIRR Community + url: https://github.com/airr-community + license: + name: Creative Commons Attribution 4.0 International + url: https://creativecommons.org/licenses/by/4.0/ + + +# Properties that are based upon an ontology use this +# standard schema definition +Ontology: + type: object + properties: + id: + type: string + nullable: true + description: CURIE of the concept, encoding the ontology and the local ID + label: + type: string + nullable: true + description: Label of the concept in the respective ontology + +# Map to expand CURIE prefixes to full IRIs +CURIEMap: + ABREG: + type: identifier + default: + map: ABREG + map: + ABREG: + iri_prefix: "http://antibodyregistry.org/AB_" + CHEBI: + type: ontology + default: + map: OBO + provider: OLS + map: + OBO: + iri_prefix: "http://purl.obolibrary.org/obo/CHEBI_" + CL: + type: ontology + default: + map: OBO + provider: OLS + map: + OBO: + iri_prefix: "http://purl.obolibrary.org/obo/CL_" + DOI: + type: identifier + default: + map: DOI + map: + DOI: + iri_prefix: "https://doi.org/" + DOID: + type: ontology + default: + map: OBO + provider: OLS + map: + OBO: + iri_prefix: "http://purl.obolibrary.org/obo/DOID_" + ENA: + type: identifier + default: + map: ENA + map: + ENA: + iri_prefix: "https://www.ebi.ac.uk/ena/browser/view/" + ENSG: + type: identifier + default: + map: ENSG + map: + ENSG: + iri_prefix: "https://www.ensembl.org/Multi/Search/Results?q=" + GAZ: + type: ontology + default: + map: OBO + provider: OLS + map: + OBO: + iri_prefix: "http://purl.obolibrary.org/obo/GAZ_" + IEDB_RECEPTOR: + type: identifier + default: + map: IEDB + provider: IEDB + map: + IEDB: + iri_prefix: "https://www.iedb.org/receptor/" + MRO: + type: ontology + default: + map: OBO + provider: OLS + map: + OBO: + iri_prefix: "http://purl.obolibrary.org/obo/MRO_" + NCBITAXON: + type: taxonomy + default: + map: OBO + provider: OLS + map: + OBO: + iri_prefix: "http://purl.obolibrary.org/obo/NCBITaxon_" + BioPortal: + iri_prefix: "http://purl.bioontology.org/ontology/NCBITAXON/" + NCIT: + type: ontology + default: + map: OBO + provider: OLS + map: + OBO: + iri_prefix: "http://purl.obolibrary.org/obo/NCIT_" + ORCID: + type: catalog + default: + map: ORCID + provider: ORCID + map: + ORCID: + iri_prefix: "https://orcid.org/" + ROR: + type: catalog + default: + map: ROR + provider: ROR + map: + ROR: + iri_prefix: "https://ror.org/" + SRA: + type: identifier + default: + map: SRA + map: + SRA: + iri_prefix: "https://trace.ncbi.nlm.nih.gov/Traces/sra/?run=" + UBERON: + type: ontology + default: + map: OBO + provider: OLS + map: + OBO: + iri_prefix: "http://purl.obolibrary.org/obo/UBERON_" + UNIPROT: + type: identifier + default: + map: UNIPROT + map: + UniProt: + iri_prefix: "http://purl.uniprot.org/uniprot/" + UO: + type: ontology + default: + map: OBO + provider: OLS + map: + OBO: + iri_prefix: "http://purl.obolibrary.org/obo/UO_" + +InformationProvider: + provider: + ENA: + request: + url: "{iri}" + response: text/html + IEDB: + request: + url: "https://query-api.iedb.org/tcr_search?receptor_group_id=eq.{local_id}" + response: application/json + OLS: + request: + url: "https://www.ebi.ac.uk/ols/api/ontologies/{ontology_id}/terms?iri={iri}" + response: application/json + Ontobee: + request: + url: "http://www.ontobee.org/ontology/rdf/{ontology_id}?iri={iri}" + response: application/rdf+xml + ORCID: + request: + url: "https://pub.orcid.org/v2.1/{local_id}" + header: + Accept: application/json + response: application/json + ROR: + request: + url: "https://api.ror.org/organizations/{iri}" + response: application/json + SRA: + request: + url: "{iri}" + response: text/html + parameter: + CHEBI: + Ontobee: + ontology_id: CHEBI + OLS: + ontology_id: chebi + CL: + Ontobee: + ontology_id: CL + OLS: + ontology_id: cl + DOID: + Ontobee: + ontology_id: DOID + OLS: + ontology_id: doid + GAZ: + Ontobee: + ontology_id: GAZ + OLS: + ontology_id: gaz + MRO: + Ontobee: + ontology_id: MRO + OLS: + ontology_id: mro + NCBITAXON: + Ontobee: + ontology_id: NCBITaxon + OLS: + ontology_id: ncbitaxon + BioPortal: + ontology_id: NCBITAXON + NCIT: + Ontobee: + ontology_id: NCIT + OLS: + ontology_id: ncit + UBERON: + Ontobee: + ontology_id: UBERON + OLS: + ontology_id: uberon + UO: + Ontobee: + ontology_id: UO + OLS: + ontology_id: uo + +# AIRR specification extensions +# +# The schema definitions for AIRR standards objects is extended to +# provide a number of AIRR specific attributes. This schema definition +# specifies the structure, property names and data types. These +# attributes are attached to an AIRR field with the x-airr property. + +Attributes: + type: object + properties: + miairr: + type: string + description: MiAIRR requirement level. + enum: + - essential + - important + - defined + default: defined + identifier: + type: boolean + description: > + True if the field is an identifier required to link metadata and/or individual + sequence records across objects in the complete AIRR Data Model and ADC API. + default: false + adc-query-support: + type: boolean + description: > + True if an ADC API implementation must support queries on the field. + If false, query support for the field in ADC API implementations is optional. + default: false + adc-api-optional: + type: boolean + description: > + If false, repositories must implement these fields both for queries and query repsonse. + Only applies to fields in the ADC API spec that are extensions to the AIRR Standard, + targeted at "convenience query fields" that make queries against repositories more + efficient than if queries were limited to AIRR fields only. + If true, repositories can choose to support the field or not. + default: false + deprecated: + type: boolean + description: True if the field has been deprecated from the schema. + default: false + deprecated-description: + type: string + description: Information regarding the deprecation of the field. + deprecated-replaced-by: + type: array + items: + type: string + description: The deprecated field is replaced by this list of fields. + set: + type: integer + description: MiAIRR set + subset: + type: string + description: MiAIRR subset + name: + type: string + description: MiAIRR name + format: + type: string + description: Field format. If null then assume the full range of the field data type + enum: + - ontology + - controlled_vocabulary + - physical_quantity + - CURIE + ontology: + type: object + description: Ontology definition for field + properties: + draft: + type: boolean + description: Indicates if ontology definition is a draft + top_node: + type: object + description: > + Concept to use as top node for ontology. Note that this must have the same CURIE namespace + as the actually annotated concept. + properties: + id: + type: string + description: CURIE for the top node term + label: + type: string + description: Ontology name for the top node term + +# AIRR Data File +# +# A JSON data file that holds Repertoire metadata, data processing +# analysis objects, or any object in the AIRR Data Model. +# +# It is presumed that the objects gathered together in an AIRR Data File are related +# or relevant to each other, e.g. part of the same study; thus, the ID fields can be +# internally resolved unless the ID contains an external PID. This implies that AIRR +# Data Files cannot be merged simply by concatenating arrays; any merge program +# would need to manage duplicate or conflicting ID values. +# +# While the properties in an AIRR Data File are not required, if one is provided then +# the value should not be null. + +DataFile: + type: object + properties: + Info: + nullable: false + $ref: '#/InfoObject' + Repertoire: + type: array + nullable: false + description: List of repertoires + items: + $ref: '#/Repertoire' + RepertoireGroup: + type: array + nullable: false + description: List of repertoire groups + items: + $ref: '#/RepertoireGroup' + Rearrangement: + type: array + nullable: false + description: List of rearrangement records + items: + $ref: '#/Rearrangement' + Cell: + type: array + nullable: false + description: List of cells + items: + $ref: '#/Cell' + Clone: + type: array + nullable: false + description: List of clones + items: + $ref: '#/Clone' + GermlineSet: + type: array + nullable: false + description: List of germline sets + items: + $ref: '#/GermlineSet' + GenotypeSet: + type: array + nullable: false + description: List of genotype sets + items: + $ref: '#/GenotypeSet' + +# AIRR Info object, should be similar to openapi +# should we point to an openapi schema? +InfoObject: + type: object + description: Provides information about data and API responses. + required: + - title + - version + properties: + title: + type: string + nullable: false + version: + type: string + nullable: false + description: + type: string + nullable: true + contact: + type: object + nullable: true + properties: + name: + type: string + nullable: true + url: + type: string + nullable: true + email: + type: string + nullable: true + license: + type: object + nullable: true + required: + - name + properties: + name: + type: string + nullable: false + url: + type: string + nullable: true + +# A time point +TimePoint: + description: Time point at which an observation or other action was performed. + type: object + properties: + label: + type: string + nullable: true + description: Informative label for the time point + example: Pre-operative sampling of cancer tissue + x-airr: + adc-query-support: true + value: + type: number + nullable: true + description: Value of the time point + example: -5.0 + x-airr: + adc-query-support: true + unit: + $ref: '#/Ontology' + nullable: true + description: Unit of the time point + title: Unit of immunization schedule + example: + id: UO:0000033 + label: day + x-airr: + adc-query-support: true + format: ontology + ontology: + draft: false + top_node: + id: UO:0000003 + label: time unit + +# +# General objects +# + +# Contributor record to describe invididuals and their contribution to a data set +# +Contributor: + description: Individual whose contribution to this work should be acknowledged + type: object + required: + - contributor_id + - name + properties: + contributor_id: + type: string + nullable: true + description: Unique identifier of this contributor within the file + x-airr: + identifier: true + miairr: important + name: + type: string + nullable: false + description: Full name of contributor + orcid_id: + $ref: '#/Ontology' + nullable: true + description: > + ORCID identifier of the contributor. Note that if present, the label of the ORCID record should take + precedence over the name reported in the `name` property. + title: ORCID iD + example: + id: ORCID:0000-0002-1825-0097 + label: Josiah Carberry + x-airr: + adc-query-support: true + format: ontology + ontology: + draft: false + top_node: + id: null + label: null + affiliation: + $ref: '#/Ontology' + nullable: true + description: > + ROR of the contributor's primary affiliation. Note that ROR are only minted for institutions, not + from individuals institutes, divisions or departments. + title: ROR + example: + id: ROR:05h7xva58 + label: Wesleyan University + x-airr: + adc-query-support: true + format: ontology + ontology: + draft: false + top_node: + id: null + label: null + affiliation_department: + type: string + nullable: true + description: > + Additional information regarding the contributor's primary affiliation. Can be used to specify + individual institutes, divisions or departments. + example: Department for Psychoceramics + contributions: + type: array + nullable: true + description: List of all roles the contributor had in a project + items: + $ref: '#/ContributorContribution' + +ContributorContribution: + type: object + required: + - role + properties: + role: + type: string + nullable: false + description: Role according to CRediT taxonomy + enum: + - conceptualization + - data curation + - formal analysis + - funding acquisition + - investigation + - methodology + - project administration + - resources + - software + - supervision + - validation + - visualization + - writing - original draft + - writing - review & editing + degree: + type: string + nullable: true + description: > + Optional specification of the degree of contribution, should be used if multiple individuals serve + the same role. + enum: + - lead + - equal + - supporting + + +# +# Germline gene schema +# + +# Rearranged and genomic germline sequences +RearrangedSequence: + type: object + description: > + Details of a directly observed rearranged sequence or an inference from rearranged sequences + contributing support for a gene or allele. + required: + - sequence_id + - sequence + - derivation + - observation_type + - repository_name + - repository_id + - deposited_version + - seq_start + - seq_end + properties: + sequence_id: + type: string + nullable: true + description: > + Unique identifier of this RearrangedSequence within the file, typically generated by the repository + hosting the schema, for example from the underlying ID of the database record. + x-airr: + identifier: true + miairr: important + sequence: + type: string + nullable: false + x-airr: + miairr: essential + description: nucleotide sequence + derivation: + type: string + nullable: true + enum: + - DNA + - RNA + - null + description: The class of nucleic acid that was used as primary starting material + x-airr: + miairr: important + observation_type: + type: string + nullable: false + enum: + - direct_sequencing + - inference_from_repertoire + description: > + The type of observation from which this sequence was drawn, such as direct sequencing or + inference from repertoire sequencing data. + x-airr: + miairr: essential + curation: + type: string + nullable: true + description: Curational notes on the sequence + repository_name: + type: string + nullable: true + x-airr: + miairr: defined + description: Name of the repository in which the sequence has been deposited + repository_ref: + type: string + nullable: true + x-airr: + miairr: defined + description: Queryable id or accession number of the sequence published by the repository + deposited_version: + type: string + nullable: true + x-airr: + miairr: defined + description: Version number of the sequence within the repository + sequence_start: + type: integer + nullable: false + x-airr: + miairr: essential + description: Start co-ordinate of the sequence detailed in this record, within the sequence deposited + sequence_end: + type: integer + nullable: false + x-airr: + miairr: essential + description: End co-ordinate of the sequence detailed in this record, within the sequence deposited + +UnrearrangedSequence: + description: Details of an unrearranged sequence contributing support for a gene or allele + type: object + required: + - sequence_id + - sequence + - repository_name + - assembly_id + - gff_seqid + - gff_start + - gff_end + - strand + properties: + sequence_id: + type: string + nullable: true + x-airr: + identifier: true + miairr: important + description: unique identifier of this UnrearrangedSequence within the file + sequence: + type: string + nullable: false + description: > + Sequence of interest described in this record. Typically, this will include gene and promoter region. + x-airr: + miairr: essential + curation: + type: string + nullable: true + description: Curational notes on the sequence + repository_name: + type: string + nullable: true + x-airr: + miairr: defined + description: Name of the repository in which the assembly or contig is deposited + repository_ref: + type: string + nullable: true + x-airr: + miairr: defined + description: Queryable id or accession number of the sequence published by the repository + patch_no: + type: string + nullable: true + description: Genome assembly patch number in which this gene was determined + gff_seqid: + type: string + nullable: true + description: > + Sequence (from the assembly) of a window including the gene and preferably also the promoter region. + gff_start: + type: integer + nullable: true + description: > + Genomic co-ordinates of the start of the sequence of interest described in this record in + Ensemble GFF version 3. + gff_end: + type: integer + nullable: true + description: > + Genomic co-ordinates of the end of the sequence of interest described in this record in + Ensemble GFF version 3. + strand: + type: string + nullable: true + enum: + - + + - "-" + - null + description: sense (+ or -) + +# V gene delineation +SequenceDelineationV: + description: Delineation of a V-gene in a particular system + type: object + required: + - sequence_delineation_id + - delineation_scheme + - fwr1_start + - fwr1_end + - cdr1_start + - cdr1_end + - fwr2_start + - fwr2_end + - cdr2_start + - cdr2_end + - fwr3_start + - fwr3_end + - cdr3_start + properties: + sequence_delineation_id: + type: string + nullable: true + description: > + Unique identifier of this SequenceDelineationV within the file. Typically, generated by the + repository hosting the record. + x-airr: + identifier: true + miairr: important + + delineation_scheme: + type: string + nullable: true + x-airr: + miairr: important + description: Name of the delineation scheme + example: Chothia + unaligned_sequence: + type: string + nullable: true + x-airr: + miairr: important + description: entire V-sequence covered by this delineation + aligned_sequence: + type: string + nullable: true + description: > + Aligned sequence if this delineation provides an alignment. An aligned sequence should always be + provided for IMGT delineations. + fwr1_start: + type: integer + nullable: true + x-airr: + miairr: important + description: FWR1 start co-ordinate in the 'unaligned sequence' field + fwr1_end: + type: integer + nullable: true + x-airr: + miairr: important + description: FWR1 end co-ordinate in the 'unaligned sequence' field + cdr1_start: + type: integer + nullable: true + x-airr: + miairr: important + description: CDR1 start co-ordinate in the 'unaligned sequence' field + cdr1_end: + type: integer + nullable: true + x-airr: + miairr: important + description: CDR1 end co-ordinate in the 'unaligned sequence' field + fwr2_start: + type: integer + nullable: true + x-airr: + miairr: important + description: FWR2 start co-ordinate in the 'unaligned sequence' field + fwr2_end: + type: integer + nullable: true + x-airr: + miairr: important + description: FWR2 end co-ordinate in the 'unaligned sequence' field + cdr2_start: + type: integer + nullable: true + x-airr: + miairr: important + description: CDR2 start co-ordinate in the 'unaligned sequence' field + cdr2_end: + type: integer + nullable: true + x-airr: + miairr: important + description: CDR2 end co-ordinate in the 'unaligned sequence' field + fwr3_start: + type: integer + nullable: true + x-airr: + miairr: important + description: FWR3 start co-ordinate in the 'unaligned sequence' field + fwr3_end: + type: integer + nullable: true + x-airr: + miairr: important + description: FWR3 end co-ordinate in the 'unaligned sequence' field + cdr3_start: + type: integer + nullable: true + x-airr: + miairr: important + description: CDR3 start co-ordinate in the 'unaligned sequence' field + alignment_labels: + type: array + nullable: true + items: + type: string + description: > + One string for each codon in the aligned_sequence indicating the label of that codon according to + the numbering of the delineation scheme if it provides one. + +# Description of a putative or confirmed Ig receptor gene/allele +AlleleDescription: + description: Details of a putative or confirmed Ig receptor gene/allele inferred from one or more observations + type: object + required: + - allele_description_id + - acknowledgements + - release_version + - release_date + - release_description + - sequence + - coding_sequence + - locus + - sequence_type + - functional + - inference_type + - species + properties: + allele_description_id: + type: string + nullable: true + x-airr: + identifier: true + miairr: important + description: > + Unique identifier of this AlleleDescription within the file. Typically, generated by the + repository hosting the record. + allele_description_ref: + type: string + nullable: true + x-airr: + miairr: important + description: Unique reference to the allele description, in standardized form (Repo:Label:Version) + example: OGRDB:Human_IGH:IGHV1-69*01.001 + acknowledgements: + type: array + nullable: true + description: > + List of individuals whose contribution to the gene description should be acknowledged. Note that these + are not necessarily identical with the authors on an associated manuscript or other scholarly + communication. Further note that typically at least the three CRediT contributor roles "supervision", + "investigation" and "data curation" should be assigned. The current maintainer should be listed first. + items: + $ref: '#/Contributor' + release_version: + type: integer + nullable: true + x-airr: + miairr: important + description: Version number of this record, updated whenever a revised version is published or released + release_date: + type: string + nullable: true + format: date-time + x-airr: + miairr: important + description: Date of this release + title: Release Date + example: "2021-02-02" + release_description: + type: string + nullable: true + x-airr: + miairr: important + description: Brief descriptive notes of the reason for this release and the changes embodied + label: + type: string + nullable: true + x-airr: + miairr: important + description: > + The accepted name for this gene or allele following the relevant nomenclature. + The value in this field should correspond to values in acceptable name fields of other schemas, + such as v_call, d_call, and j_call fields. + example: IGHV1-69*01 + sequence: + type: string + nullable: false + x-airr: + miairr: essential + description: > + Nucleotide sequence of the gene. This should cover the full length that is available, + including where possible RSS, and 5' UTR and lead-in for V-gene sequences. + coding_sequence: + type: string + nullable: true + x-airr: + miairr: important + description: > + Nucleotide sequence of the core coding region, such as the coding region of a D-, J- or C- gene + or the coding region of a V-gene excluding the leader. + aliases: + type: array + nullable: true + items: + type: string + description: Alternative names for this sequence + locus: + type: string + nullable: false + enum: + - IGH + - IGI + - IGK + - IGL + - TRA + - TRB + - TRG + - TRD + description: Gene locus + x-airr: + miairr: essential + chromosome: + type: integer + nullable: true + description: chromosome on which the gene is located + sequence_type: + type: string + nullable: false + enum: + - V + - D + - J + - C + description: Sequence type (V, D, J, C) + x-airr: + miairr: essential + functional: + type: boolean + nullable: true + x-airr: + miairr: important + description: True if the gene is functional, false if it is a pseudogene + inference_type: + type: string + nullable: true + enum: + - genomic_and_rearranged + - genomic_only + - rearranged_only + - null + description: Type of inference(s) from which this gene sequence was inferred + x-airr: + miairr: important + species: + $ref: '#/Ontology' + nullable: false + description: Binomial designation of subject's species + title: Organism + example: + id: NCBITAXON:9606 + label: Homo sapiens + x-airr: + miairr: essential + species_subgroup: + type: string + nullable: true + description: Race, strain or other species subgroup to which this subject belongs + example: BALB/c + species_subgroup_type: + type: string + nullable: true + enum: + - breed + - strain + - inbred + - outbred + - locational + - null + status: + type: string + nullable: true + enum: + - active + - draft + - retired + - withdrawn + - null + description: Status of record, assumed active if the field is not present + subgroup_designation: + type: string + nullable: true + description: Identifier of the gene subgroup or clade, as (and if) defined + gene_designation: + type: string + nullable: true + description: Gene number or other identifier, as (and if) defined + allele_designation: + type: string + nullable: true + description: Allele number or other identifier, as (and if) defined + allele_similarity_cluster_designation: + type: string + nullable: true + description: ID of the similarity cluster used in this germline set, if designated + allele_similarity_cluster_member_id: + type: string + nullable: true + description: Membership ID of the allele within the similarity cluster, if a cluster is designated + j_codon_frame: + type: integer + nullable: true + enum: + - 1 + - 2 + - 3 + - null + description: > + Codon position of the first nucleotide in the 'coding_sequence' field. Mandatory for J genes. + Not used for V or D genes. '1' means the sequence is in-frame, '2' means that the first bp is + missing from the first codon, and '3' means that the first 2 bp are missing. + gene_start: + type: integer + nullable: true + description: > + Co-ordinate in the sequence field of the first nucleotide in the coding_sequence field. + x-airr: + miairr: important + gene_end: + type: integer + nullable: true + description: > + Co-ordinate in the sequence field of the last gene-coding nucleotide in the coding_sequence field. + x-airr: + miairr: important + utr_5_prime_start: + type: integer + nullable: true + description: Start co-ordinate in the sequence field of the 5 prime UTR (V-genes only). + utr_5_prime_end: + type: integer + nullable: true + description: End co-ordinate in the sequence field of the 5 prime UTR (V-genes only). + leader_1_start: + type: integer + nullable: true + description: Start co-ordinate in the sequence field of L-PART1 (V-genes only). + leader_1_end: + type: integer + nullable: true + description: End co-ordinate in the sequence field of L-PART1 (V-genes only). + leader_2_start: + type: integer + nullable: true + description: Start co-ordinate in the sequence field of L-PART2 (V-genes only). + leader_2_end: + type: integer + nullable: true + description: End co-ordinate in the sequence field of L-PART2 (V-genes only). + v_rs_start: + type: integer + nullable: true + description: Start co-ordinate in the sequence field of the V recombination site (V-genes only). + v_rs_end: + type: integer + nullable: true + description: End co-ordinate in the sequence field of the V recombination site (V-genes only). + d_rs_3_prime_start: + type: integer + nullable: true + description: Start co-ordinate in the sequence field of the 3 prime D recombination site (D-genes only). + d_rs_3_prime_end: + type: integer + nullable: true + description: End co-ordinate in the sequence field of the 3 prime D recombination site (D-genes only). + d_rs_5_prime_start: + type: integer + nullable: true + description: Start co-ordinate in the sequence field of the 5 prime D recombination site (D-genes only). + d_rs_5_prime_end: + type: integer + nullable: true + description: End co-ordinate in the sequence field of 5 the prime D recombination site (D-genes only). + j_cdr3_end: + type: integer + nullable: true + description: > + In the case of a J-gene, the co-ordinate in the sequence field of the first nucelotide of the + conserved PHE or TRP (IMGT codon position 118). + j_rs_start: + type: integer + nullable: true + description: Start co-ordinate in the sequence field of J recombination site (J-genes only). + j_rs_end: + type: integer + nullable: true + description: End co-ordinate in the sequence field of J recombination site (J-genes only). + j_donor_splice: + type: integer + nullable: true + description: Co-ordinate in the sequence field of the final 3' nucleotide of the J-REGION (J-genes only). + v_gene_delineations: + type: array + nullable: true + items: + $ref: '#/SequenceDelineationV' + unrearranged_support: + type: array + nullable: true + items: + $ref: '#/UnrearrangedSequence' + rearranged_support: + type: array + nullable: true + items: + $ref: '#/RearrangedSequence' + paralogs: + type: array + nullable: true + items: + type: string + description: Gene symbols of any paralogs + curation: + type: string + nullable: true + description: > + Curational notes on the AlleleDescription. This can be used to give more extensive notes on the + decisions taken than are provided in the release_description. + curational_tags: + type: array + nullable: true + items: + type: string + enum: + - likely_truncated + - likely_full_length + description: Controlled-vocabulary tags applied to this description + +# Collection of gene descriptions into a germline set +GermlineSet: + type: object + description: > + A germline object set bringing together multiple AlleleDescriptions from the same strain or species. + All genes in a GermlineSet should be from a single locus. + required: + - germline_set_id + - acknowledgements + - release_version + - release_description + - release_date + - germline_set_name + - germline_set_ref + - species + - locus + - allele_descriptions + properties: + germline_set_id: + type: string + nullable: true + description: > + Unique identifier of the GermlineSet within this file. Typically, generated by the + repository hosting the record. + x-airr: + identifier: true + miairr: important + acknowledgements: + type: array + nullable: true + description: > + List of individuals whose contribution to the germline set should be acknowledged. Note that these are + not necessarily identical with the authors on an associated manuscript or other scholarly communication. + Further note that typically at least the three CRediT contributor roles "supervision", "investigation" + and "data curation" should be assigned. The coresponding author should be listed last. + items: + $ref: '#/Contributor' + release_version: + type: number + nullable: true + x-airr: + miairr: important + description: Version number of this record, allocated automatically + release_description: + type: string + nullable: true + x-airr: + miairr: important + description: Brief descriptive notes of the reason for this release and the changes embodied + release_date: + type: string + nullable: true + format: date-time + x-airr: + miairr: important + description: Date of this release + title: Release Date + example: "2021-02-02" + germline_set_name: + type: string + nullable: true + x-airr: + miairr: important + description: descriptive name of this germline set + germline_set_ref: + type: string + nullable: true + x-airr: + miairr: important + description: Unique identifier of the germline set and version, in standardized form (Repo:Label:Version) + example: OGRDB:Human_IGH:2021.11 + pub_ids: + type: array + items: + type: string + nullable: true + description: Publications describing the germline set + example: ["PMID:35720344"] + species: + $ref: '#/Ontology' + nullable: false + x-airr: + miairr: essential + description: Binomial designation of subject's species + title: Organism + example: + id: NCBITAXON:9606 + label: Homo sapiens + species_subgroup: + type: string + nullable: true + description: Race, strain or other species subgroup to which this subject belongs + example: BALB/c + species_subgroup_type: + type: string + nullable: true + enum: + - breed + - strain + - inbred + - outbred + - locational + - null + locus: + type: string + nullable: false + enum: + - IGH + - IGI + - IGK + - IGL + - TRA + - TRB + - TRG + - TRD + description: Gene locus + x-airr: + miairr: essential + allele_descriptions: + type: array + nullable: true + items: + $ref: '#/AlleleDescription' + description: list of allele_descriptions in the germline set + x-airr: + miairr: important + curation: + type: string + nullable: true + description: > + Curational notes on the GermlineSet. This can be used to give more extensive notes on the + decisions taken than are provided in the release_description. + +# +# Genotype schema +# + +# GenotypeSet lists the Genotypes (describing different loci) inferred for this subject + +GenotypeSet: + type: object + required: + - receptor_genotype_set_id + properties: + receptor_genotype_set_id: + type: string + nullable: true + x-airr: + identifier: true + miairr: important + description: > + A unique identifier for this Receptor Genotype Set, typically generated by the repository + hosting the schema, for example from the underlying ID of the database record. + genotype_class_list: + description: List of Genotypes included in this Receptor Genotype Set. + type: array + nullable: true + items: + $ref: '#/Genotype' + +# Genotype of adaptive immune receptors +# This enumerates the alleles and gene deletions inferred in a single subject. +# Included alleles may either be listed by reference to a GermlineSet, or +# listed as 'undocumented', in which case the inferred sequence is provided + +Genotype: + type: object + required: + - receptor_genotype_id + - locus + properties: + receptor_genotype_id: + type: string + nullable: true + x-airr: + identifier: true + miairr: important + description: > + A unique identifier within the file for this Receptor Genotype, typically generated by the + repository hosting the schema, for example from the underlying ID of the database record. + locus: + type: string + nullable: false + enum: + - IGH + - IGI + - IGK + - IGL + - TRA + - TRB + - TRD + - TRG + description: Gene locus + example: IGH + x-airr: + adc-query-support: true + format: controlled_vocabulary + miairr: essential + documented_alleles: + type: array + nullable: true + description: List of alleles documented in reference set(s) + items: + $ref: '#/DocumentedAllele' + x-airr: + miairr: important + undocumented_alleles: + type: array + nullable: true + description: List of alleles inferred to be present and not documented in an identified GermlineSet + items: + $ref: '#/UndocumentedAllele' + x-airr: + adc-query-support: true + deleted_genes: + type: array + nullable: true + description: Array of genes identified as being deleted in this genotype + items: + $ref: '#/DeletedGene' + x-airr: + adc-query-support: true + inference_process: + type: string + nullable: true + enum: + - genomic_sequencing + - repertoire_sequencing + - null + description: Information on how the genotype was acquired. Controlled vocabulary. + title: Genotype acquisition process + example: repertoire_sequencing + x-airr: + adc-query-support: true + format: controlled_vocabulary + +# Documented Allele +# This describes a 'known' allele found in a genotype +# It 'known' in the sense that it is documented in a reference set + +DocumentedAllele: + type: object + required: + - label + - germline_set_ref + properties: + label: + type: string + nullable: true + x-airr: + miairr: important + description: The accepted name for this allele, taken from the GermlineSet + germline_set_ref: + type: string + nullable: true + x-airr: + miairr: important + description: GermlineSet from which it was taken, referenced in standardized form (Repo:Label:Version) + example: OGRDB:Human_IGH:2021.11 + phasing: + type: integer + nullable: true + description: > + Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the + same chromosome. + +# Undocumented Allele +# This describes a 'undocumented' allele found in a genotype +# It is 'undocumented' in the sense that it was not found in reference sets consulted for the analysis + +UndocumentedAllele: + required: + - allele_name + - sequence + type: object + properties: + allele_name: + type: string + nullable: true + description: Allele name as allocated by the inference pipeline + x-airr: + miairr: important + sequence: + type: string + nullable: false + description: nt sequence of the allele, as provided by the inference pipeline + x-airr: + miairr: essential + phasing: + type: integer + nullable: true + description: > + Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the + same chromosome. + +# Deleted Gene +# It is regarded as 'deleted' in the sense that it was not identified during inference of the genotype + +DeletedGene: + required: + - label + - germline_set_ref + type: object + properties: + label: + type: string + nullable: false + description: The accepted name for this gene, taken from the GermlineSet + x-airr: + miairr: essential + germline_set_ref: + type: string + nullable: true + description: GermlineSet from which it was taken (issuer/name/version) + x-airr: + miairr: important + phasing: + type: integer + nullable: true + description: > + Chromosomal phasing indicator. Alleles with the same value are inferred to be located on the + same chromosome. + + +# List of MHCGenotypes describing a subject's genotype +MHCGenotypeSet: + type: object + required: + - mhc_genotype_set_id + - mhc_genotype_list + properties: + mhc_genotype_set_id: + type: string + nullable: true + x-airr: + identifier: true + miairr: important + description: A unique identifier for this MHCGenotypeSet + mhc_genotype_list: + description: List of MHCGenotypes included in this set + type: array + nullable: true + x-airr: + miairr: important + items: + $ref: '#/MHCGenotype' + +# Genotype of major histocompatibility complex (MHC) class I, class II and non-classical loci +MHCGenotype: + type: object + required: + - mhc_genotype_id + - mhc_class + - mhc_alleles + properties: + mhc_genotype_id: + type: string + nullable: true + x-airr: + identifier: true + miairr: important + description: A unique identifier for this MHCGenotype, assumed to be unique in the context of the study + mhc_class: + type: string + nullable: false + enum: + - MHC-I + - MHC-II + - MHC-nonclassical + description: Class of MHC alleles described by the MHCGenotype + example: MHC-I + x-airr: + miairr: essential + adc-query-support: true + format: controlled_vocabulary + mhc_alleles: + type: array + nullable: true + description: List of MHC alleles of the indicated mhc_class identified in an individual + items: + $ref: '#/MHCAllele' + x-airr: + miairr: important + adc-query-support: true + mhc_genotyping_method: + type: string + nullable: true + description: > + Information on how the genotype was determined. The content of this field should come from a list of + recommended terms provided in the AIRR Schema documentation. + title: MHC genotyping method + example: pcr_low_resolution + x-airr: + adc-query-support: true + miairr: important + + +# Allele of an MHC gene +MHCAllele: + type: object + properties: + allele_designation: + type: string + nullable: true + x-airr: + miairr: important + description: > + The accepted designation of an allele, usually its gene symbol plus allele/sub-allele/etc + identifiers, if provided by the mhc_typing method + gene: + $ref: '#/Ontology' + nullable: true + description: The MHC gene to which the described allele belongs + title: MHC gene + example: + id: MRO:0000046 + label: HLA-A + x-airr: + adc-query-support: false + format: ontology + ontology: + draft: true + top_node: + id: MRO:0000004 + label: MHC gene + miairr: important + reference_set_ref: + type: string + nullable: true + x-airr: + miairr: important + description: Repository and list from which it was taken (issuer/name/version) + + +SubjectGenotype: + type: object + properties: + receptor_genotype_set: + nullable: true + $ref: '#/GenotypeSet' + description: Immune receptor genotype set for this subject. + mhc_genotype_set: + nullable: true + $ref: '#/MHCGenotypeSet' + description: MHC genotype set for this subject. + +# +# Repertoire metadata schema +# + +# The overall study with a globally unique study_id +Study: + type: object + required: + - study_id + - study_title + - study_type + - inclusion_exclusion_criteria + - grants + - contributors + - pub_ids + - keywords_study + properties: + study_id: + type: string + nullable: true + description: > + Unique ID assigned by study registry such as one of the International Nucleotide Sequence Database + Collaboration (INSDC) repositories. + title: Study ID + example: PRJNA001 + x-airr: + identifier: true + miairr: important + adc-query-support: true + set: 1 + subset: study + name: Study ID + study_title: + type: string + nullable: true + description: Descriptive study title + title: Study title + example: Effects of sun light exposure of the Treg repertoire + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: study + name: Study title + study_type: + $ref: '#/Ontology' + nullable: true + description: Type of study design + title: Study type + example: + id: NCIT:C15197 + label: Case-Control Study + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: study + name: Study type + format: ontology + ontology: + draft: false + top_node: + id: NCIT:C63536 + label: Study + study_description: + type: string + nullable: true + description: Generic study description + title: Study description + example: Longer description + x-airr: + name: Study description + adc-query-support: true + inclusion_exclusion_criteria: + type: string + nullable: true + description: List of criteria for inclusion/exclusion for the study + title: Study inclusion/exclusion criteria + example: "Include: Clinical P. falciparum infection; Exclude: Seropositive for HIV" + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: study + name: Study inclusion/exclusion criteria + grants: + type: string + nullable: true + description: Funding agencies and grant numbers + title: Grant funding agency + example: NIH, award number R01GM987654 + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: study + name: Grant funding agency + contributors: + type: array + nullable: false + description: > + List of individuals who contributed to the study. Note that these are not necessarily identical with + the authors on an associated manuscript or other scholarly communication. Further note that typically + at least the three CRediT contributor roles "supervision", "investigation" and "data curation" should + be assigned. The coresponding author should be listed last. + title: Contributors + items: + $ref: '#/Contributor' + x-airr: + miairr: essential + adc-query-support: true + set: 1 + subset: study + name: Contributors + study_contact: + type: string + nullable: true + description: > + Full contact information of the contact persons for this study This should include an e-mail address + and a persistent identifier such as an ORCID ID. + x-airr: + deprecated: true + deprecated-description: > + Acknowledgements and contact information was re-organized into the contributors property, which + is an array of Contributor objects. + deprecated-replaced-by: + - contributors + collected_by: + type: string + nullable: true + description: > + Full contact information of the data collector, i.e. the person who is legally responsible for data + collection and release. This should include an e-mail address and a persistent identifier such as an + ORCID ID. + x-airr: + deprecated: true + deprecated-description: > + Acknowledgements and contact information was re-organized into the contributors property, which + is an array of Contributor objects. + deprecated-replaced-by: + - contributors + lab_name: + type: string + nullable: true + description: Department of data collector + x-airr: + deprecated: true + deprecated-description: > + Acknowledgements and contact information was re-organized into the contributors property, which + is an array of Contributor objects. + deprecated-replaced-by: + - contributors + lab_address: + type: string + nullable: true + description: Institution and institutional address of data collector + x-airr: + deprecated: true + deprecated-description: > + Acknowledgements and contact information was re-organized into the contributors property, which + is an array of Contributor objects. + deprecated-replaced-by: + - contributors + submitted_by: + type: string + nullable: true + description: > + Full contact information of the data depositor, i.e., the person submitting the data to a repository. + This should include an e-mail address and a persistent identifier such as an ORCID ID. This is + supposed to be a short-lived and technical role until the submission is relased. + x-airr: + deprecated: true + deprecated-description: > + Acknowledgements and contact information was re-organized into the contributors property, which + is an array of Contributor objects. + deprecated-replaced-by: + - contributors + pub_ids: + type: array + items: + type: string + nullable: true + description: > + Array of publications describing the rationale and/or outcome of the study as an array of CURIE objects such as + a DOI or Pubmed ID. Where more than one publication is given, if there is a primary publication for the study it + should come first. + title: Relevant publications + example: ["PMID:29144493", "DOI:10.1038/ni.3873"] + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: study + name: Relevant publications + keywords_study: + type: array + items: + type: string + enum: + - contains_ig + - contains_tr + - contains_paired_chain + - contains_schema_rearrangement + - contains_schema_clone + - contains_schema_cell + - contains_schema_receptor + - contains_schema_cellexpression + - contains_schema_receptorreactivity + nullable: true + description: > + Keywords describing properties of one or more data sets in a study. "contains_schema" keywords indicate that + the study contains data objects from the AIRR Schema of that type (Rearrangement, Clone, Cell, Receptor) while + the other keywords indicate that the study design considers the type of data indicated (e.g. it is possible to have + a study that "contains_paired_chain" but does not "contains_schema_cell"). + title: Keywords for study + example: + - contains_ig + - contains_schema_rearrangement + - contains_schema_clone + - contains_schema_cell + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: study + name: Keywords for study + format: controlled_vocabulary + adc_publish_date: + type: string + format: date-time + nullable: true + description: > + Date the study was first published in the AIRR Data Commons. + title: ADC Publish Date + example: "2021-02-02" + x-airr: + adc-query-support: true + name: ADC Publish Date + adc_update_date: + type: string + format: date-time + nullable: true + description: > + Date the study data was updated in the AIRR Data Commons. + title: ADC Update Date + example: "2021-02-02" + x-airr: + adc-query-support: true + name: ADC Update Date + +# 1-to-n relationship between a study and its subjects +# subject_id is unique within a study +Subject: + type: object + required: + - subject_id + - synthetic + - species + - sex + - age_min + - age_max + - age_unit + - age_event + - ancestry_population + - ethnicity + - race + - strain_name + - linked_subjects + - link_type + properties: + subject_id: + type: string + nullable: true + description: > + Subject ID assigned by submitter, unique within study. If possible, a persistent subject ID linked to + an INSDC or similar repository study should be used. + title: Subject ID + example: SUB856413 + x-airr: + identifier: true + miairr: important + adc-query-support: true + set: 1 + subset: subject + name: Subject ID + synthetic: + type: boolean + nullable: false + description: TRUE for libraries in which the diversity has been synthetically generated (e.g. phage display) + title: Synthetic library + x-airr: + miairr: essential + adc-query-support: true + set: 1 + subset: subject + name: Synthetic library + species: + $ref: '#/Ontology' + nullable: false + description: Binomial designation of subject's species + title: Organism + example: + id: NCBITAXON:9606 + label: Homo sapiens + x-airr: + miairr: essential + adc-query-support: true + set: 1 + subset: subject + name: Species + format: ontology + ontology: + draft: false + top_node: + id: NCBITAXON:7776 + label: Gnathostomata + organism: + $ref: '#/Ontology' + nullable: true + description: Binomial designation of subject's species + x-airr: + deprecated: true + deprecated-description: Field was renamed to species for clarity. + deprecated-replaced-by: + - species + sex: + type: string + enum: + - male + - female + - pooled + - hermaphrodite + - intersex + - null + nullable: true + description: Biological sex of subject + title: Sex + example: female + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: subject + name: Sex + format: controlled_vocabulary + age_min: + type: number + nullable: true + description: Specific age or lower boundary of age range. + title: Age minimum + example: 60 + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: subject + name: Age minimum + age_max: + type: number + nullable: true + description: > + Upper boundary of age range or equal to age_min for specific age. + This field should only be null if age_min is null. + title: Age maximum + example: 80 + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: subject + name: Age maximum + age_unit: + $ref: '#/Ontology' + nullable: true + description: Unit of age range + title: Age unit + example: + id: UO:0000036 + label: year + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: subject + name: Age unit + format: ontology + ontology: + draft: false + top_node: + id: UO:0000003 + label: time unit + age_event: + type: string + nullable: true + description: > + Event in the study schedule to which `Age` refers. For NCBI BioSample this MUST be `sampling`. For other + implementations submitters need to be aware that there is currently no mechanism to encode to potential + delta between `Age event` and `Sample collection time`, hence the chosen events should be in temporal proximity. + title: Age event + example: enrollment + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: subject + name: Age event + age: + type: string + nullable: true + x-airr: + deprecated: true + deprecated-description: Split into two fields to specify as an age range. + deprecated-replaced-by: + - age_min + - age_max + - age_unit + ancestry_population: + $ref: '#/Ontology' + nullable: true + description: Broad geographic origin of ancestry (continent) + title: Ancestry population + example: + id: GAZ:00000459 + label: South America + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: subject + name: Ancestry population + format: ontology + ontology: + draft: true + top_node: + id: GAZ:00000448 + label: geographic location + location_birth: + $ref: '#/Ontology' + nullable: true + description: Self-reported location of birth of the subject, preferred granularity is country-level + example: + id: GAZ:00002939 + label: Poland + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: subject + name: Location of birth + format: ontology + ontology: + draft: true + top_node: + id: GAZ:00000448 + label: geographic location + ethnicity: + type: string + nullable: true + description: Ethnic group of subject (defined as cultural/language-based membership) + title: Ethnicity + example: English, Kurds, Manchu, Yakuts (and other fields from Wikipedia) + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: subject + name: Ethnicity + race: + type: string + nullable: true + description: Racial group of subject (as defined by NIH) + title: Race + example: White, American Indian or Alaska Native, Black, Asian, Native Hawaiian or Other Pacific Islander, Other + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: subject + name: Race + strain_name: + type: string + nullable: true + description: Non-human designation of the strain or breed of animal used + title: Strain name + example: C57BL/6J + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: subject + name: Strain name + linked_subjects: + type: string + nullable: true + description: Subject ID to which `Relation type` refers + title: Relation to other subjects + example: SUB1355648 + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: subject + name: Relation to other subjects + link_type: + type: string + nullable: true + description: Relation between subject and `linked_subjects`, can be genetic or environmental (e.g.exposure) + title: Relation type + example: father, daughter, household + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: subject + name: Relation type + diagnosis: + type: array + nullable: false + description: Diagnosis information for subject + items: + $ref: '#/Diagnosis' + x-airr: + adc-query-support: true + genotype: + nullable: true + $ref: '#/SubjectGenotype' + title: SubjectGenotype + +# 1-to-n relationship between a subject and its diagnoses +Diagnosis: + type: object + required: + - study_group_description + - disease_diagnosis + - disease_length + - disease_stage + - prior_therapies + - immunogen + - intervention + - medical_history + properties: + study_group_description: + type: string + nullable: true + description: Designation of study arm to which the subject is assigned to + title: Study group description + example: control + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: diagnosis and intervention + name: Study group description + disease_diagnosis: + $ref: '#/Ontology' + nullable: true + description: Diagnosis of subject + title: Diagnosis + example: + id: DOID:9538 + label: multiple myeloma + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: diagnosis and intervention + name: Diagnosis + format: ontology + ontology: + draft: false + top_node: + id: DOID:4 + label: disease + disease_length: + type: string + nullable: true + description: Time duration between initial diagnosis and current intervention + title: Length of disease + example: 23 months + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: diagnosis and intervention + name: Length of disease + format: physical_quantity + disease_stage: + type: string + nullable: true + description: Stage of disease at current intervention + title: Disease stage + example: Stage II + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: diagnosis and intervention + name: Disease stage + prior_therapies: + type: string + nullable: true + description: List of all relevant previous therapies applied to subject for treatment of `Diagnosis` + title: Prior therapies for primary disease under study + example: melphalan/prednisone + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: diagnosis and intervention + name: Prior therapies for primary disease under study + immunogen: + type: string + nullable: true + description: Antigen, vaccine or drug applied to subject at this intervention + title: Immunogen/agent + example: bortezomib + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: diagnosis and intervention + name: Immunogen/agent + intervention: + type: string + nullable: true + description: Description of intervention + title: Intervention definition + example: systemic chemotherapy, 6 cycles, 1.25 mg/m2 + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: diagnosis and intervention + name: Intervention definition + medical_history: + type: string + nullable: true + description: Medical history of subject that is relevant to assess the course of disease and/or treatment + title: Other relevant medical history + example: MGUS, first diagnosed 5 years prior + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: diagnosis and intervention + name: Other relevant medical history + +# 1-to-n relationship between a subject and its samples +# sample_id is unique within a study +Sample: + type: object + required: + - sample_id + - sample_type + - tissue + - anatomic_site + - disease_state_sample + - collection_time_point_relative + - collection_time_point_relative_unit + - collection_time_point_reference + - biomaterial_provider + properties: + sample_id: + type: string + nullable: true + description: > + Sample ID assigned by submitter, unique within study. If possible, a persistent sample ID linked to + INSDC or similar repository study should be used. + title: Biological sample ID + example: SUP52415 + x-airr: + identifier: true + miairr: important + adc-query-support: true + set: 2 + subset: sample + name: Biological sample ID + sample_type: + type: string + nullable: true + description: The way the sample was obtained, e.g. fine-needle aspirate, organ harvest, peripheral venous puncture + title: Sample type + example: Biopsy + x-airr: + miairr: important + adc-query-support: true + set: 2 + subset: sample + name: Sample type + tissue: + $ref: '#/Ontology' + nullable: true + description: The actual tissue sampled, e.g. lymph node, liver, peripheral blood + title: Tissue + example: + id: UBERON:0002371 + label: bone marrow + x-airr: + miairr: important + adc-query-support: true + set: 2 + subset: sample + name: Tissue + format: ontology + ontology: + draft: false + top_node: + id: UBERON:0010000 + label: multicellular anatomical structure + anatomic_site: + type: string + nullable: true + description: The anatomic location of the tissue, e.g. Inguinal, femur + title: Anatomic site + example: Iliac crest + x-airr: + miairr: important + adc-query-support: true + set: 2 + subset: sample + name: Anatomic site + disease_state_sample: + type: string + nullable: true + description: Histopathologic evaluation of the sample + title: Disease state of sample + example: Tumor infiltration + x-airr: + miairr: important + adc-query-support: true + set: 2 + subset: sample + name: Disease state of sample + collection_time_point_relative: + type: number + nullable: true + description: Time point at which sample was taken, relative to `Collection time event` + title: Sample collection time + example: 14 + x-airr: + miairr: important + adc-query-support: true + set: 2 + subset: sample + name: Sample collection time + collection_time_point_relative_unit: + $ref: '#/Ontology' + nullable: true + description: Unit of Sample collection time + title: Sample collection time unit + example: + id: UO:0000033 + label: day + x-airr: + miairr: important + adc-query-support: true + set: 2 + subset: sample + name: Sample collection time unit + format: ontology + ontology: + draft: false + top_node: + id: UO:0000003 + label: time unit + collection_time_point_reference: + type: string + nullable: true + description: Event in the study schedule to which `Sample collection time` relates to + title: Collection time event + example: Primary vaccination + x-airr: + miairr: important + adc-query-support: true + set: 2 + subset: sample + name: Collection time event + collection_location: + $ref: '#/Ontology' + nullable: true + description: Location where the sample was taken, preferred granularity is country-level + title: Sample collection location + example: + id: GAZ:00002939 + label: Poland + x-airr: + miairr: important + set: 2 + subset: sample + name: Sample collection location + format: ontology + ontology: + draft: true + top_node: + id: GAZ:00000448 + label: geographic location + biomaterial_provider: + type: string + nullable: true + description: Name and address of the entity providing the sample + title: Biomaterial provider + example: Tissues-R-Us, Tampa, FL, USA + x-airr: + miairr: important + adc-query-support: true + set: 2 + subset: sample + name: Biomaterial provider + +# 1-to-n relationship between a sample and processing of its cells +CellProcessing: + type: object + required: + - tissue_processing + - cell_subset + - cell_phenotype + - single_cell + - cell_number + - cells_per_reaction + - cell_storage + - cell_quality + - cell_isolation + - cell_processing_protocol + properties: + tissue_processing: + type: string + nullable: true + description: Enzymatic digestion and/or physical methods used to isolate cells from sample + title: Tissue processing + example: Collagenase A/Dnase I digested, followed by Percoll gradient + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (cell) + name: Tissue processing + cell_subset: + $ref: '#/Ontology' + nullable: true + description: Commonly-used designation of isolated cell population + title: Cell subset + example: + id: CL:0000972 + label: class switched memory B cell + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (cell) + name: Cell subset + format: ontology + ontology: + draft: false + top_node: + id: CL:0000542 + label: lymphocyte + cell_phenotype: + type: string + nullable: true + description: List of cellular markers and their expression levels used to isolate the cell population + title: Cell subset phenotype + example: CD19+ CD38+ CD27+ IgM- IgD- + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (cell) + name: Cell subset phenotype + cell_species: + $ref: '#/Ontology' + nullable: true + description: > + Binomial designation of the species from which the analyzed cells originate. Typically, this value + should be identical to `species`, in which case it SHOULD NOT be set explicitly. However, there are + valid experimental setups in which the two might differ, e.g., chimeric animal models. If set, this + key will overwrite the `species` information for all lower layers of the schema. + title: Cell species + example: + id: NCBITAXON:9606 + label: Homo sapiens + x-airr: + miairr: defined + adc-query-support: true + set: 3 + subset: process (cell) + name: Cell species + format: ontology + ontology: + draft: false + top_node: + id: NCBITAXON:7776 + label: Gnathostomata + single_cell: + type: boolean + nullable: true + description: TRUE if single cells were isolated into separate compartments + title: Single-cell sort + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (cell) + name: Single-cell sort + cell_number: + type: integer + nullable: true + description: Total number of cells that went into the experiment + title: Number of cells in experiment + example: 1000000 + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (cell) + name: Number of cells in experiment + cells_per_reaction: + type: integer + nullable: true + description: Number of cells for each biological replicate + title: Number of cells per sequencing reaction + example: 50000 + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (cell) + name: Number of cells per sequencing reaction + cell_storage: + type: boolean + nullable: true + description: TRUE if cells were cryo-preserved between isolation and further processing + title: Cell storage + example: TRUE + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (cell) + name: Cell storage + cell_quality: + type: string + nullable: true + description: Relative amount of viable cells after preparation and (if applicable) thawing + title: Cell quality + example: 90% viability as determined by 7-AAD + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (cell) + name: Cell quality + cell_isolation: + type: string + nullable: true + description: Description of the procedure used for marker-based isolation or enrich cells + title: Cell isolation / enrichment procedure + example: > + Cells were stained with fluorochrome labeled antibodies and then sorted on a FlowMerlin (CE) cytometer. + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (cell) + name: Cell isolation / enrichment procedure + cell_processing_protocol: + type: string + nullable: true + description: > + Description of the methods applied to the sample including cell preparation/ isolation/enrichment and + nucleic acid extraction. This should closely mirror the Materials and methods section in the manuscript. + title: Processing protocol + example: Stimulated wih anti-CD3/anti-CD28 + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (cell) + name: Processing protocol + +# object for PCR primer targets +PCRTarget: + type: object + required: + - pcr_target_locus + - forward_pcr_primer_target_location + - reverse_pcr_primer_target_location + properties: + pcr_target_locus: + type: string + enum: + - IGH + - IGI + - IGK + - IGL + - TRA + - TRB + - TRD + - TRG + - null + nullable: true + description: > + Designation of the target locus. Note that this field uses a controlled vocubulary that is meant to + provide a generic classification of the locus, not necessarily the correct designation according to + a specific nomenclature. + title: Target locus for PCR + example: IGK + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (nucleic acid [pcr]) + name: Target locus for PCR + format: controlled_vocabulary + forward_pcr_primer_target_location: + type: string + nullable: true + description: Position of the most distal nucleotide templated by the forward primer or primer mix + title: Forward PCR primer target location + example: IGHV, +23 + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (nucleic acid [pcr]) + name: Forward PCR primer target location + reverse_pcr_primer_target_location: + type: string + nullable: true + description: Position of the most proximal nucleotide templated by the reverse primer or primer mix + title: Reverse PCR primer target location + example: IGHG, +57 + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (nucleic acid [pcr]) + name: Reverse PCR primer target location + +# generally, a 1-to-1 relationship between a CellProcessing and processing of its nucleic acid +# but may be 1-to-n for technical replicates. +NucleicAcidProcessing: + type: object + required: + - template_class + - template_quality + - template_amount + - template_amount_unit + - library_generation_method + - library_generation_protocol + - library_generation_kit_version + - complete_sequences + - physical_linkage + properties: + template_class: + type: string + enum: + - DNA + - RNA + nullable: false + description: > + The class of nucleic acid that was used as primary starting material for the following procedures + title: Target substrate + example: RNA + x-airr: + miairr: essential + adc-query-support: true + set: 3 + subset: process (nucleic acid) + name: Target substrate + format: controlled_vocabulary + template_quality: + type: string + nullable: true + description: Description and results of the quality control performed on the template material + title: Target substrate quality + example: RIN 9.2 + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (nucleic acid) + name: Target substrate quality + template_amount: + type: number + nullable: true + description: Amount of template that went into the process + title: Template amount + example: 1000 + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (nucleic acid) + name: Template amount + template_amount_unit: + $ref: '#/Ontology' + nullable: true + description: Unit of template amount + title: Template amount time unit + example: + id: UO:0000024 + label: nanogram + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (nucleic acid) + name: Template amount time unit + format: ontology + ontology: + draft: false + top_node: + id: UO:0000002 + label: physical quantity + library_generation_method: + type: string + enum: + - "PCR" + - "RT(RHP)+PCR" + - "RT(oligo-dT)+PCR" + - "RT(oligo-dT)+TS+PCR" + - "RT(oligo-dT)+TS(UMI)+PCR" + - "RT(specific)+PCR" + - "RT(specific)+TS+PCR" + - "RT(specific)+TS(UMI)+PCR" + - "RT(specific+UMI)+PCR" + - "RT(specific+UMI)+TS+PCR" + - "RT(specific)+TS" + - "other" + nullable: false + description: Generic type of library generation + title: Library generation method + example: RT(oligo-dT)+TS(UMI)+PCR + x-airr: + miairr: essential + adc-query-support: true + set: 3 + subset: process (nucleic acid) + name: Library generation method + format: controlled_vocabulary + library_generation_protocol: + type: string + nullable: true + description: Description of processes applied to substrate to obtain a library that is ready for sequencing + title: Library generation protocol + example: cDNA was generated using + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (nucleic acid) + name: Library generation protocol + library_generation_kit_version: + type: string + nullable: true + description: When using a library generation protocol from a commercial provider, provide the protocol version number + title: Protocol IDs + example: v2.1 (2016-09-15) + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (nucleic acid) + name: Protocol IDs + pcr_target: + type: array + nullable: false + description: > + If a PCR step was performed that specifically targets the IG/TR loci, the target and primer locations + need to be provided here. This field holds an array of PCRTarget objects, so that multiplex PCR setups + amplifying multiple loci at the same time can be annotated using one record per locus. PCR setups not + targeting any specific locus must not annotate this field but select the appropriate + library_generation_method instead. + items: + $ref: '#/PCRTarget' + x-airr: + adc-query-support: true + complete_sequences: + type: string + enum: + - partial + - complete + - "complete+untemplated" + - mixed + nullable: false + description: > + To be considered `complete`, the procedure used for library construction MUST generate sequences that + 1) include the first V gene codon that encodes the mature polypeptide chain (i.e. after the + leader sequence) and 2) include the last complete codon of the J gene (i.e. 1 bp 5' of the J->C + splice site) and 3) provide sequence information for all positions between 1) and 2). To be considered + `complete & untemplated`, the sections of the sequences defined in points 1) to 3) of the previous + sentence MUST be untemplated, i.e. MUST NOT overlap with the primers used in library preparation. + `mixed` should only be used if the procedure used for library construction will likely produce multiple + categories of sequences in the given experiment. It SHOULD NOT be used as a replacement of a NULL value. + title: Complete sequences + example: partial + x-airr: + miairr: essential + adc-query-support: true + set: 3 + subset: process (nucleic acid) + name: Complete sequences + format: controlled_vocabulary + physical_linkage: + type: string + enum: + - none + - "hetero_head-head" + - "hetero_tail-head" + - "hetero_prelinked" + nullable: false + description: > + In case an experimental setup is used that physically links nucleic acids derived from distinct + `Rearrangements` before library preparation, this field describes the mode of that linkage. All + `hetero_*` terms indicate that in case of paired-read sequencing, the two reads should be expected + to map to distinct IG/TR loci. `*_head-head` refers to techniques that link the 5' ends of transcripts + in a single-cell context. `*_tail-head` refers to techniques that link the 3' end of one transcript to + the 5' end of another one in a single-cell context. This term does not provide any information whether + a continuous reading-frame between the two is generated. `*_prelinked` refers to constructs in which + the linkage was already present on the DNA level (e.g. scFv). + title: Physical linkage of different rearrangements + example: hetero_head-head + x-airr: + miairr: essential + adc-query-support: true + set: 3 + subset: process (nucleic acid) + name: Physical linkage of different rearrangements + format: controlled_vocabulary + +# 1-to-n relationship between a NucleicAcidProcessing and SequencingRun with resultant raw sequence file(s) +SequencingRun: + type: object + required: + - sequencing_run_id + - total_reads_passing_qc_filter + - sequencing_platform + - sequencing_facility + - sequencing_run_date + - sequencing_kit + properties: + sequencing_run_id: + type: string + nullable: true + description: ID of sequencing run assigned by the sequencing facility + title: Batch number + example: 160101_M01234 + x-airr: + identifier: true + miairr: important + adc-query-support: true + set: 3 + subset: process (sequencing) + name: Batch number + total_reads_passing_qc_filter: + type: integer + nullable: true + description: Number of usable reads for analysis + title: Total reads passing QC filter + example: 10365118 + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (sequencing) + name: Total reads passing QC filter + sequencing_platform: + type: string + nullable: true + description: Designation of sequencing instrument used + title: Sequencing platform + example: Alumina LoSeq 1000 + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (sequencing) + name: Sequencing platform + sequencing_facility: + type: string + nullable: true + description: Name and address of sequencing facility + title: Sequencing facility + example: Seqs-R-Us, Vancouver, BC, Canada + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (sequencing) + name: Sequencing facility + sequencing_run_date: + type: string + nullable: true + description: Date of sequencing run + title: Date of sequencing run + format: date + example: 2016-12-16 + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (sequencing) + name: Date of sequencing run + sequencing_kit: + type: string + nullable: true + description: Name, manufacturer, order and lot numbers of sequencing kit + title: Sequencing kit + example: "FullSeq 600, Alumina, #M123456C0, 789G1HK" + x-airr: + miairr: important + adc-query-support: true + set: 3 + subset: process (sequencing) + name: Sequencing kit + sequencing_files: + $ref: '#/SequencingData' + nullable: false + description: Set of sequencing files produced by the sequencing run + x-airr: + adc-query-support: true + +# Resultant raw sequencing files from a SequencingRun +SequencingData: + type: object + required: + - sequencing_data_id + - file_type + - filename + - read_direction + - read_length + - paired_filename + - paired_read_direction + - paired_read_length + properties: + sequencing_data_id: + type: string + nullable: true + description: > + Persistent identifier of raw data stored in an archive (e.g. INSDC run ID). Data archive should + be identified in the CURIE prefix. + title: Raw sequencing data persistent identifier + example: "SRA:SRR11610494" + x-airr: + identifier: true + miairr: important + adc-query-support: true + set: 4 + subset: data (raw reads) + format: CURIE + file_type: + type: string + nullable: true + description: File format for the raw reads or sequences + title: Raw sequencing data file type + enum: + - fasta + - fastq + - null + x-airr: + miairr: important + adc-query-support: true + set: 4 + subset: data (raw reads) + name: Raw sequencing data file type + format: controlled_vocabulary + filename: + type: string + nullable: true + description: File name for the raw reads or sequences. The first file in paired-read sequencing. + title: Raw sequencing data file name + example: MS10R-NMonson-C7JR9_S1_R1_001.fastq + x-airr: + miairr: important + adc-query-support: true + set: 4 + subset: data (raw reads) + name: Raw sequencing data file name + read_direction: + type: string + nullable: true + description: Read direction for the raw reads or sequences. The first file in paired-read sequencing. + title: Read direction + example: forward + enum: + - forward + - reverse + - mixed + - null + x-airr: + miairr: important + adc-query-support: true + set: 4 + subset: data (raw reads) + name: Read direction + format: controlled_vocabulary + read_length: + type: integer + nullable: true + description: Read length in bases for the first file in paired-read sequencing + title: Forward read length + example: 300 + x-airr: + miairr: important + adc-query-support: true + set: 4 + subset: data (raw reads) + name: Forward read length + paired_filename: + type: string + nullable: true + description: File name for the second file in paired-read sequencing + title: Paired raw sequencing data file name + example: MS10R-NMonson-C7JR9_S1_R2_001.fastq + x-airr: + miairr: important + adc-query-support: true + set: 4 + subset: data (raw reads) + name: Paired raw sequencing data file name + paired_read_direction: + type: string + nullable: true + description: Read direction for the second file in paired-read sequencing + title: Paired read direction + example: reverse + enum: + - forward + - reverse + - mixed + - null + x-airr: + miairr: important + adc-query-support: true + set: 4 + subset: data (raw reads) + name: Paired read direction + format: controlled_vocabulary + paired_read_length: + type: integer + nullable: true + description: Read length in bases for the second file in paired-read sequencing + title: Paired read length + example: 300 + x-airr: + miairr: important + adc-query-support: true + set: 4 + subset: data (raw reads) + name: Paired read length + index_filename: + type: string + nullable: true + description: File name for the index file + title: Sequencing index file name + example: MS10R-NMonson-C7JR9_S1_R3_001.fastq + x-airr: + adc-query-support: true + index_length: + type: integer + nullable: true + description: Read length in bases for the index file + title: Index read length + example: 8 + x-airr: + adc-query-support: true + +# 1-to-n relationship between a repertoire and data processing +# +# Set of annotated rearrangement sequences produced by +# data processing upon the raw sequence data for a repertoire. +DataProcessing: + type: object + required: + - software_versions + - paired_reads_assembly + - quality_thresholds + - primer_match_cutoffs + - collapsing_method + - data_processing_protocols + - germline_database + properties: + data_processing_id: + type: string + nullable: true + description: Identifier for the data processing object. + title: Data processing ID + x-airr: + name: Data processing ID + adc-query-support: true + identifier: true + primary_annotation: + type: boolean + default: false + nullable: false + description: > + If true, indicates this is the primary or default data processing for + the repertoire and its rearrangements. If false, indicates this is a secondary + or additional data processing. + title: Primary annotation + x-airr: + adc-query-support: true + identifier: true + software_versions: + type: string + nullable: true + description: Version number and / or date, include company pipelines + title: Software tools and version numbers + example: IgBLAST 1.6 + x-airr: + miairr: important + adc-query-support: true + set: 5 + subset: process (computational) + name: Software tools and version numbers + paired_reads_assembly: + type: string + nullable: true + description: How paired end reads were assembled into a single receptor sequence + title: Paired read assembly + example: PandaSeq (minimal overlap 50, threshold 0.8) + x-airr: + miairr: important + adc-query-support: true + set: 5 + subset: process (computational) + name: Paired read assembly + quality_thresholds: + type: string + nullable: true + description: How/if sequences were removed from (4) based on base quality scores + title: Quality thresholds + example: Average Phred score >=20 + x-airr: + miairr: important + adc-query-support: true + set: 5 + subset: process (computational) + name: Quality thresholds + primer_match_cutoffs: + type: string + nullable: true + description: How primers were identified in the sequences, were they removed/masked/etc? + title: Primer match cutoffs + example: Hamming distance <= 2 + x-airr: + miairr: important + adc-query-support: true + set: 5 + subset: process (computational) + name: Primer match cutoffs + collapsing_method: + type: string + nullable: true + description: The method used for combining multiple sequences from (4) into a single sequence in (5) + title: Collapsing method + example: MUSCLE 3.8.31 + x-airr: + miairr: important + adc-query-support: true + set: 5 + subset: process (computational) + name: Collapsing method + data_processing_protocols: + type: string + nullable: true + description: General description of how QC is performed + title: Data processing protocols + example: Data was processed using [...] + x-airr: + miairr: important + adc-query-support: true + set: 5 + subset: process (computational) + name: Data processing protocols + data_processing_files: + type: array + items: + type: string + nullable: true + description: Array of file names for data produced by this data processing. + title: Processed data file names + example: + - 'ERR1278153_aa.txz' + - 'ERR1278153_ab.txz' + - 'ERR1278153_ac.txz' + x-airr: + adc-query-support: true + name: Processed data file names + germline_database: + type: string + nullable: true + description: Source of germline V(D)J genes with version number or date accessed. + title: V(D)J germline reference database + example: ENSEMBL, Homo sapiens build 90, 2017-10-01 + x-airr: + miairr: important + adc-query-support: true + set: 5 + subset: data (processed sequence) + name: V(D)J germline reference database + germline_set_ref: + type: string + nullable: true + description: Unique identifier of the germline set and version, in standardized form (Repo:Label:Version) + example: OGRDB:Human_IGH:2021.11 + x-airr: + adc-query-support: true + analysis_provenance_id: + type: string + nullable: true + description: Identifier for machine-readable PROV model of analysis provenance + title: Analysis provenance ID + x-airr: + adc-query-support: true + +SampleProcessing: + allOf: + - type: object + properties: + sample_processing_id: + type: string + nullable: true + description: > + Identifier for the sample processing object. This field should be unique within the repertoire. + This field can be used to uniquely identify the combination of sample, cell processing, + nucleic acid processing and sequencing run information for the repertoire. + title: Sample processing ID + x-airr: + name: Sample processing ID + adc-query-support: true + identifier: true + - $ref: '#/Sample' + - $ref: '#/CellProcessing' + - $ref: '#/NucleicAcidProcessing' + - $ref: '#/SequencingRun' + + +# The composite schema for the repertoire object +# +# This represents a sample repertoire as defined by the study +# and experimentally observed by raw sequence data. A repertoire +# can only be for one subject but may include multiple samples. +Repertoire: + type: object + required: + - study + - subject + - sample + - data_processing + properties: + repertoire_id: + type: string + nullable: true + description: > + Identifier for the repertoire object. This identifier should be globally unique so that repertoires + from multiple studies can be combined together without conflict. The repertoire_id is used to link + other AIRR data to a Repertoire. Specifically, the Rearrangements Schema includes repertoire_id for + referencing the specific Repertoire for that Rearrangement. + title: Repertoire ID + x-airr: + adc-query-support: true + identifier: true + repertoire_name: + type: string + nullable: true + description: Short generic display name for the repertoire + title: Repertoire name + x-airr: + name: Repertoire name + adc-query-support: true + repertoire_description: + type: string + nullable: true + description: Generic repertoire description + title: Repertoire description + x-airr: + name: Repertoire description + adc-query-support: true + study: + $ref: '#/Study' + nullable: false + description: Study object + x-airr: + adc-query-support: true + subject: + $ref: '#/Subject' + nullable: false + description: Subject object + x-airr: + adc-query-support: true + sample: + type: array + nullable: false + description: List of Sample Processing objects + items: + $ref: '#/SampleProcessing' + x-airr: + adc-query-support: true + data_processing: + type: array + nullable: false + description: List of Data Processing objects + items: + $ref: '#/DataProcessing' + x-airr: + adc-query-support: true + +# An ordered group of repertoires for analysis purposes, includes optional time course +# Can be treated as a set if all repertoire_group_id are unique +RepertoireGroup: + type: object + required: + - repertoire_group_id + - repertoires + properties: + repertoire_group_id: + type: string + nullable: true + description: Identifier for this repertoire group + x-airr: + identifier: true + repertoire_group_name: + type: string + nullable: true + description: Short display name for this repertoire group + repertoire_group_description: + type: string + nullable: true + description: Repertoire group description + repertoires: + type: array + nullable: true + description: > + List of repertoires in this group with an associated description and time point designation + items: + type: object + properties: + repertoire_id: + type: string + nullable: false + description: Identifier to the repertoire + x-airr: + adc-query-support: true + repertoire_description: + type: string + nullable: true + description: Description of this repertoire within the group + x-airr: + adc-query-support: true + time_point: + $ref: '#/TimePoint' + nullable: true + description: Time point designation for this repertoire within the group + x-airr: + adc-query-support: true + +Alignment: + type: object + required: + - sequence_id + - segment + - call + - score + - cigar + properties: + sequence_id: + type: string + nullable: true + description: > + Unique query sequence identifier within the file. Most often this will be the input sequence + header or a substring thereof, but may also be a custom identifier defined by the tool in + cases where query sequences have been combined in some fashion prior to alignment. + x-airr: + identifier: true + segment: + type: string + nullable: true + description: > + The segment for this alignment. One of V, D, J or C. + rev_comp: + type: boolean + nullable: true + description: > + Alignment result is from the reverse complement of the query sequence. + call: + type: string + nullable: true + description: > + Gene assignment with allele. + score: + type: number + nullable: true + description: > + Alignment score. + identity: + type: number + nullable: true + description: > + Alignment fractional identity. + support: + type: number + nullable: true + description: > + Alignment E-value, p-value, likelihood, probability or other similar measure of + support for the gene assignment as defined by the alignment tool. + cigar: + type: string + nullable: true + description: > + Alignment CIGAR string. + sequence_start: + type: integer + nullable: true + description: > + Start position of the segment in the query sequence (1-based closed interval). + sequence_end: + type: integer + nullable: true + description: > + End position of the segment in the query sequence (1-based closed interval). + germline_start: + type: integer + nullable: true + description: > + Alignment start position in the reference sequence (1-based closed interval). + germline_end: + type: integer + nullable: true + description: > + Alignment end position in the reference sequence (1-based closed interval). + rank: + type: integer + nullable: true + description: > + Alignment rank. + rearrangement_id: + type: string + nullable: true + description: > + Identifier for the Rearrangement object. May be identical to sequence_id, + but will usually be a universally unique record locator for database applications. + x-airr: + deprecated: true + deprecated-description: Field has been merged with sequence_id to avoid confusion. + deprecated-replaced-by: + - sequence_id + data_processing_id: + type: string + nullable: true + description: > + Identifier to the data processing object in the repertoire metadata + for this rearrangement. If this field is empty than the primary data processing object is assumed. + germline_database: + type: string + nullable: true + description: Source of germline V(D)J genes with version number or date accessed. + example: ENSEMBL, Homo sapiens build 90, 2017-10-01 + x-airr: + deprecated: true + deprecated-description: Field was moved up to the DataProcessing level to avoid data duplication. + deprecated-replaced-by: + - "DataProcessing:germline_database" + + +# The extended rearrangement object +Rearrangement: + type: object + required: + - sequence_id + - sequence + - rev_comp + - productive + - v_call + - d_call + - j_call + - sequence_alignment + - germline_alignment + - junction + - junction_aa + - v_cigar + - d_cigar + - j_cigar + properties: + sequence_id: + type: string + nullable: true + description: > + Unique query sequence identifier for the Rearrangement. Most often this will be the input sequence + header or a substring thereof, but may also be a custom identifier defined by the tool in + cases where query sequences have been combined in some fashion prior to alignment. When + downloaded from an AIRR Data Commons repository, this will usually be a universally unique + record locator for linking with other objects in the AIRR Data Model. + x-airr: + adc-query-support: true + identifier: true + sequence: + type: string + nullable: true + description: > + The query nucleotide sequence. Usually, this is the unmodified input sequence, which may be + reverse complemented if necessary. In some cases, this field may contain consensus sequences or + other types of collapsed input sequences if these steps are performed prior to alignment. + quality: + type: string + nullable: true + description: > + The Sanger/Phred quality scores for assessment of sequence quality. + Phred quality scores from 0 to 93 are encoded using ASCII 33 to 126 (Used by Illumina from v1.8.) + sequence_aa: + type: string + nullable: true + description: > + Amino acid translation of the query nucleotide sequence. + rev_comp: + type: boolean + nullable: true + description: > + True if the alignment is on the opposite strand (reverse complemented) with respect to the + query sequence. If True then all output data, such as alignment coordinates and sequences, + are based on the reverse complement of 'sequence'. + productive: + type: boolean + nullable: true + description: > + True if the V(D)J sequence is predicted to be productive. + x-airr: + adc-query-support: true + vj_in_frame: + type: boolean + nullable: true + description: True if the V and J gene alignments are in-frame. + stop_codon: + type: boolean + nullable: true + description: True if the aligned sequence contains a stop codon. + complete_vdj: + type: boolean + nullable: true + description: > + True if the sequence alignment spans the entire V(D)J region. Meaning, + sequence_alignment includes both the first V gene codon that encodes the + mature polypeptide chain (i.e., after the leader sequence) and the last + complete codon of the J gene (i.e., before the J-C splice site). + This does not require an absence of deletions within the internal + FWR and CDR regions of the alignment. + locus: + type: string + enum: + - IGH + - IGI + - IGK + - IGL + - TRA + - TRB + - TRD + - TRG + - null + nullable: true + description: > + Gene locus (chain type). Note that this field uses a controlled vocabulary that is meant to provide a + generic classification of the locus, not necessarily the correct designation according to a specific + nomenclature. + title: Gene locus + example: IGH + x-airr: + adc-query-support: true + name: Gene locus + format: controlled_vocabulary + locus_species: + $ref: '#/Ontology' + nullable: true + description: > + Binomial designation of the species from which the locus originates. Typically, this value should be + identical to `organism`, if which case it SHOULD NOT be set explicitly. However, there are valid + experimental setups in which the two might differ, e.g. transgenic animal models. If set, this key + will overwrite the `organism` information for all lower layers of the schema. + title: Locus species + example: + id: NCBITAXON:9606 + label: Homo sapiens + x-airr: + miairr: defined + adc-query-support: true + name: Locus species + format: ontology + ontology: + draft: false + top_node: + id: NCBITAXON:7776 + label: Gnathostomata + v_call: + type: string + nullable: true + description: > + V gene with allele. If referring to a known reference sequence in a database + the relevant gene/allele nomenclature should be followed (e.g., IGHV4-59*01 if using IMGT/GENE-DB). + title: V gene with allele + example: IGHV4-59*01 + x-airr: + miairr: important + adc-query-support: true + set: 6 + subset: data (processed sequence) + name: V gene with allele + d_call: + type: string + nullable: true + description: > + First or only D gene with allele. If referring to a known reference sequence in a database + the relevant gene/allele nomenclature should be followed (e.g., IGHD3-10*01 if using IMGT/GENE-DB). + title: D gene with allele + example: IGHD3-10*01 + x-airr: + miairr: important + adc-query-support: true + set: 6 + subset: data (processed sequence) + name: D gene with allele + d2_call: + type: string + nullable: true + description: > + Second D gene with allele. If referring to a known reference sequence in a database the relevant + gene/allele nomenclature should be followed (e.g., IGHD3-10*01 if using IMGT/GENE-DB). + example: IGHD3-10*01 + j_call: + type: string + nullable: true + description: > + J gene with allele. If referring to a known reference sequence in a database the relevant + gene/allele nomenclature should be followed (e.g., IGHJ4*02 if using IMGT/GENE-DB). + title: J gene with allele + example: IGHJ4*02 + x-airr: + miairr: important + adc-query-support: true + set: 6 + subset: data (processed sequence) + name: J gene with allele + c_call: + type: string + nullable: true + description: > + Constant region gene with allele. If referring to a known reference sequence in a database the + relevant gene/allele nomenclature should be followed (e.g., IGHG1*01 if using IMGT/GENE-DB). + title: C region + example: IGHG1*01 + x-airr: + miairr: important + adc-query-support: true + set: 6 + subset: data (processed sequence) + name: C region + sequence_alignment: + type: string + nullable: true + description: > + Aligned portion of query sequence, including any indel corrections or numbering spacers, + such as IMGT-gaps. Typically, this will include only the V(D)J region, but that is not + a requirement. + quality_alignment: + type: string + nullable: true + description: > + Sanger/Phred quality scores for assessment of sequence_alignment quality. + Phred quality scores from 0 to 93 are encoded using ASCII 33 to 126 (Used by Illumina from v1.8.) + sequence_alignment_aa: + type: string + nullable: true + description: > + Amino acid translation of the aligned query sequence. + germline_alignment: + type: string + nullable: true + description: > + Assembled, aligned, full-length inferred germline sequence spanning the same region + as the sequence_alignment field (typically the V(D)J region) and including the same set + of corrections and spacers (if any). + germline_alignment_aa: + type: string + nullable: true + description: > + Amino acid translation of the assembled germline sequence. + junction: + type: string + nullable: true + description: > + Junction region nucleotide sequence, where the junction is defined as + the CDR3 plus the two flanking conserved codons. + title: IMGT-JUNCTION nucleotide sequence + example: TGTGCAAGAGCGGGAGTTTACGACGGATATACTATGGACTACTGG + x-airr: + miairr: important + set: 6 + subset: data (processed sequence) + name: IMGT-JUNCTION nucleotide sequence + junction_aa: + type: string + nullable: true + description: > + Amino acid translation of the junction. + title: IMGT-JUNCTION amino acid sequence + example: CARAGVYDGYTMDYW + x-airr: + miairr: important + adc-query-support: true + set: 6 + subset: data (processed sequence) + name: IMGT-JUNCTION amino acid sequence + np1: + type: string + nullable: true + description: > + Nucleotide sequence of the combined N/P region between the V gene and + first D gene alignment or between the V gene and J gene alignments. + np1_aa: + type: string + nullable: true + description: > + Amino acid translation of the np1 field. + np2: + type: string + nullable: true + description: > + Nucleotide sequence of the combined N/P region between either the first D gene and J gene + alignments or the first D gene and second D gene alignments. + np2_aa: + type: string + nullable: true + description: > + Amino acid translation of the np2 field. + np3: + type: string + nullable: true + description: > + Nucleotide sequence of the combined N/P region between the second D gene + and J gene alignments. + np3_aa: + type: string + nullable: true + description: > + Amino acid translation of the np3 field. + cdr1: + type: string + nullable: true + description: > + Nucleotide sequence of the aligned CDR1 region. + cdr1_aa: + type: string + nullable: true + description: > + Amino acid translation of the cdr1 field. + cdr2: + type: string + nullable: true + description: > + Nucleotide sequence of the aligned CDR2 region. + cdr2_aa: + type: string + nullable: true + description: > + Amino acid translation of the cdr2 field. + cdr3: + type: string + nullable: true + description: > + Nucleotide sequence of the aligned CDR3 region. + cdr3_aa: + type: string + nullable: true + description: > + Amino acid translation of the cdr3 field. + fwr1: + type: string + nullable: true + description: > + Nucleotide sequence of the aligned FWR1 region. + fwr1_aa: + type: string + nullable: true + description: > + Amino acid translation of the fwr1 field. + fwr2: + type: string + nullable: true + description: > + Nucleotide sequence of the aligned FWR2 region. + fwr2_aa: + type: string + nullable: true + description: > + Amino acid translation of the fwr2 field. + fwr3: + type: string + nullable: true + description: > + Nucleotide sequence of the aligned FWR3 region. + fwr3_aa: + type: string + nullable: true + description: > + Amino acid translation of the fwr3 field. + fwr4: + type: string + nullable: true + description: > + Nucleotide sequence of the aligned FWR4 region. + fwr4_aa: + type: string + nullable: true + description: > + Amino acid translation of the fwr4 field. + v_score: + type: number + nullable: true + description: Alignment score for the V gene. + v_identity: + type: number + nullable: true + description: Fractional identity for the V gene alignment. + v_support: + type: number + nullable: true + description: > + V gene alignment E-value, p-value, likelihood, probability or other similar measure of + support for the V gene assignment as defined by the alignment tool. + v_cigar: + type: string + nullable: true + description: CIGAR string for the V gene alignment. + d_score: + type: number + nullable: true + description: Alignment score for the first or only D gene alignment. + d_identity: + type: number + nullable: true + description: Fractional identity for the first or only D gene alignment. + d_support: + type: number + nullable: true + description: > + D gene alignment E-value, p-value, likelihood, probability or other similar measure of + support for the first or only D gene as defined by the alignment tool. + d_cigar: + type: string + nullable: true + description: CIGAR string for the first or only D gene alignment. + d2_score: + type: number + nullable: true + description: Alignment score for the second D gene alignment. + d2_identity: + type: number + nullable: true + description: Fractional identity for the second D gene alignment. + d2_support: + type: number + nullable: true + description: > + D gene alignment E-value, p-value, likelihood, probability or other similar measure of + support for the second D gene as defined by the alignment tool. + d2_cigar: + type: string + nullable: true + description: CIGAR string for the second D gene alignment. + j_score: + type: number + nullable: true + description: Alignment score for the J gene alignment. + j_identity: + type: number + nullable: true + description: Fractional identity for the J gene alignment. + j_support: + type: number + nullable: true + description: > + J gene alignment E-value, p-value, likelihood, probability or other similar measure of + support for the J gene assignment as defined by the alignment tool. + j_cigar: + type: string + nullable: true + description: CIGAR string for the J gene alignment. + c_score: + type: number + nullable: true + description: Alignment score for the C gene alignment. + c_identity: + type: number + nullable: true + description: Fractional identity for the C gene alignment. + c_support: + type: number + nullable: true + description: > + C gene alignment E-value, p-value, likelihood, probability or other similar measure of + support for the C gene assignment as defined by the alignment tool. + c_cigar: + type: string + nullable: true + description: CIGAR string for the C gene alignment. + v_sequence_start: + type: integer + nullable: true + description: > + Start position of the V gene in the query sequence (1-based closed interval). + v_sequence_end: + type: integer + nullable: true + description: > + End position of the V gene in the query sequence (1-based closed interval). + v_germline_start: + type: integer + nullable: true + description: > + Alignment start position in the V gene reference sequence (1-based closed interval). + v_germline_end: + type: integer + nullable: true + description: > + Alignment end position in the V gene reference sequence (1-based closed interval). + v_alignment_start: + type: integer + nullable: true + description: > + Start position of the V gene alignment in both the sequence_alignment and germline_alignment + fields (1-based closed interval). + v_alignment_end: + type: integer + nullable: true + description: > + End position of the V gene alignment in both the sequence_alignment and germline_alignment + fields (1-based closed interval). + d_sequence_start: + type: integer + nullable: true + description: > + Start position of the first or only D gene in the query sequence. + (1-based closed interval). + d_sequence_end: + type: integer + nullable: true + description: > + End position of the first or only D gene in the query sequence. + (1-based closed interval). + d_germline_start: + type: integer + nullable: true + description: > + Alignment start position in the D gene reference sequence for the first or only + D gene (1-based closed interval). + d_germline_end: + type: integer + nullable: true + description: > + Alignment end position in the D gene reference sequence for the first or only + D gene (1-based closed interval). + d_alignment_start: + type: integer + nullable: true + description: > + Start position of the first or only D gene in both the sequence_alignment + and germline_alignment fields (1-based closed interval). + d_alignment_end: + type: integer + nullable: true + description: > + End position of the first or only D gene in both the sequence_alignment + and germline_alignment fields (1-based closed interval). + d2_sequence_start: + type: integer + nullable: true + description: > + Start position of the second D gene in the query sequence (1-based closed interval). + d2_sequence_end: + type: integer + nullable: true + description: > + End position of the second D gene in the query sequence (1-based closed interval). + d2_germline_start: + type: integer + nullable: true + description: > + Alignment start position in the second D gene reference sequence (1-based closed interval). + d2_germline_end: + type: integer + nullable: true + description: > + Alignment end position in the second D gene reference sequence (1-based closed interval). + d2_alignment_start: + type: integer + nullable: true + description: > + Start position of the second D gene alignment in both the sequence_alignment and + germline_alignment fields (1-based closed interval). + d2_alignment_end: + type: integer + nullable: true + description: > + End position of the second D gene alignment in both the sequence_alignment and + germline_alignment fields (1-based closed interval). + j_sequence_start: + type: integer + nullable: true + description: > + Start position of the J gene in the query sequence (1-based closed interval). + j_sequence_end: + type: integer + nullable: true + description: > + End position of the J gene in the query sequence (1-based closed interval). + j_germline_start: + type: integer + nullable: true + description: > + Alignment start position in the J gene reference sequence (1-based closed interval). + j_germline_end: + type: integer + nullable: true + description: > + Alignment end position in the J gene reference sequence (1-based closed interval). + j_alignment_start: + type: integer + nullable: true + description: > + Start position of the J gene alignment in both the sequence_alignment and germline_alignment + fields (1-based closed interval). + j_alignment_end: + type: integer + nullable: true + description: > + End position of the J gene alignment in both the sequence_alignment and germline_alignment + fields (1-based closed interval). + c_sequence_start: + type: integer + nullable: true + description: > + Start position of the C gene in the query sequence (1-based closed interval). + c_sequence_end: + type: integer + nullable: true + description: > + End position of the C gene in the query sequence (1-based closed interval). + c_germline_start: + type: integer + nullable: true + description: > + Alignment start position in the C gene reference sequence (1-based closed interval). + c_germline_end: + type: integer + nullable: true + description: > + Alignment end position in the C gene reference sequence (1-based closed interval). + c_alignment_start: + type: integer + nullable: true + description: > + Start position of the C gene alignment in both the sequence_alignment and germline_alignment + fields (1-based closed interval). + c_alignment_end: + type: integer + nullable: true + description: > + End position of the C gene alignment in both the sequence_alignment and germline_alignment + fields (1-based closed interval). + cdr1_start: + type: integer + nullable: true + description: CDR1 start position in the query sequence (1-based closed interval). + cdr1_end: + type: integer + nullable: true + description: CDR1 end position in the query sequence (1-based closed interval). + cdr2_start: + type: integer + nullable: true + description: CDR2 start position in the query sequence (1-based closed interval). + cdr2_end: + type: integer + nullable: true + description: CDR2 end position in the query sequence (1-based closed interval). + cdr3_start: + type: integer + nullable: true + description: CDR3 start position in the query sequence (1-based closed interval). + cdr3_end: + type: integer + nullable: true + description: CDR3 end position in the query sequence (1-based closed interval). + fwr1_start: + type: integer + nullable: true + description: FWR1 start position in the query sequence (1-based closed interval). + fwr1_end: + type: integer + nullable: true + description: FWR1 end position in the query sequence (1-based closed interval). + fwr2_start: + type: integer + nullable: true + description: FWR2 start position in the query sequence (1-based closed interval). + fwr2_end: + type: integer + nullable: true + description: FWR2 end position in the query sequence (1-based closed interval). + fwr3_start: + type: integer + nullable: true + description: FWR3 start position in the query sequence (1-based closed interval). + fwr3_end: + type: integer + nullable: true + description: FWR3 end position in the query sequence (1-based closed interval). + fwr4_start: + type: integer + nullable: true + description: FWR4 start position in the query sequence (1-based closed interval). + fwr4_end: + type: integer + nullable: true + description: FWR4 end position in the query sequence (1-based closed interval). + v_sequence_alignment: + type: string + nullable: true + description: > + Aligned portion of query sequence assigned to the V gene, including any + indel corrections or numbering spacers. + v_sequence_alignment_aa: + type: string + nullable: true + description: > + Amino acid translation of the v_sequence_alignment field. + d_sequence_alignment: + type: string + nullable: true + description: > + Aligned portion of query sequence assigned to the first or only D gene, including any + indel corrections or numbering spacers. + d_sequence_alignment_aa: + type: string + nullable: true + description: > + Amino acid translation of the d_sequence_alignment field. + d2_sequence_alignment: + type: string + nullable: true + description: > + Aligned portion of query sequence assigned to the second D gene, including any + indel corrections or numbering spacers. + d2_sequence_alignment_aa: + type: string + nullable: true + description: > + Amino acid translation of the d2_sequence_alignment field. + j_sequence_alignment: + type: string + nullable: true + description: > + Aligned portion of query sequence assigned to the J gene, including any + indel corrections or numbering spacers. + j_sequence_alignment_aa: + type: string + nullable: true + description: > + Amino acid translation of the j_sequence_alignment field. + c_sequence_alignment: + type: string + nullable: true + description: > + Aligned portion of query sequence assigned to the constant region, including + any indel corrections or numbering spacers. + c_sequence_alignment_aa: + type: string + nullable: true + description: > + Amino acid translation of the c_sequence_alignment field. + v_germline_alignment: + type: string + nullable: true + description: > + Aligned V gene germline sequence spanning the same region + as the v_sequence_alignment field and including the same set + of corrections and spacers (if any). + v_germline_alignment_aa: + type: string + nullable: true + description: > + Amino acid translation of the v_germline_alignment field. + d_germline_alignment: + type: string + nullable: true + description: > + Aligned D gene germline sequence spanning the same region + as the d_sequence_alignment field and including the same set + of corrections and spacers (if any). + d_germline_alignment_aa: + type: string + nullable: true + description: > + Amino acid translation of the d_germline_alignment field. + d2_germline_alignment: + type: string + nullable: true + description: > + Aligned D gene germline sequence spanning the same region + as the d2_sequence_alignment field and including the same set + of corrections and spacers (if any). + d2_germline_alignment_aa: + type: string + nullable: true + description: > + Amino acid translation of the d2_germline_alignment field. + j_germline_alignment: + type: string + nullable: true + description: > + Aligned J gene germline sequence spanning the same region + as the j_sequence_alignment field and including the same set + of corrections and spacers (if any). + j_germline_alignment_aa: + type: string + nullable: true + description: > + Amino acid translation of the j_germline_alignment field. + c_germline_alignment: + type: string + nullable: true + description: > + Aligned constant region germline sequence spanning the same region + as the c_sequence_alignment field and including the same set + of corrections and spacers (if any). + c_germline_alignment_aa: + type: string + nullable: true + description: > + Amino acid translation of the c_germline_aligment field. + junction_length: + type: integer + nullable: true + description: Number of nucleotides in the junction sequence. + junction_aa_length: + type: integer + nullable: true + description: Number of amino acids in the junction sequence. + x-airr: + adc-query-support: true + np1_length: + type: integer + nullable: true + description: > + Number of nucleotides between the V gene and first D gene alignments or + between the V gene and J gene alignments. + np2_length: + type: integer + nullable: true + description: > + Number of nucleotides between either the first D gene and J gene alignments + or the first D gene and second D gene alignments. + np3_length: + type: integer + nullable: true + description: > + Number of nucleotides between the second D gene and J gene alignments. + n1_length: + type: integer + nullable: true + description: Number of untemplated nucleotides 5' of the first or only D gene alignment. + n2_length: + type: integer + nullable: true + description: Number of untemplated nucleotides 3' of the first or only D gene alignment. + n3_length: + type: integer + nullable: true + description: Number of untemplated nucleotides 3' of the second D gene alignment. + p3v_length: + type: integer + nullable: true + description: Number of palindromic nucleotides 3' of the V gene alignment. + p5d_length: + type: integer + nullable: true + description: Number of palindromic nucleotides 5' of the first or only D gene alignment. + p3d_length: + type: integer + nullable: true + description: Number of palindromic nucleotides 3' of the first or only D gene alignment. + p5d2_length: + type: integer + nullable: true + description: Number of palindromic nucleotides 5' of the second D gene alignment. + p3d2_length: + type: integer + nullable: true + description: Number of palindromic nucleotides 3' of the second D gene alignment. + p5j_length: + type: integer + nullable: true + description: Number of palindromic nucleotides 5' of the J gene alignment. + v_frameshift: + type: boolean + nullable: true + description: > + True if the V gene in the query nucleotide sequence contains a translational + frameshift relative to the frame of the V gene reference sequence. + j_frameshift: + type: boolean + nullable: true + description: > + True if the J gene in the query nucleotide sequence contains a translational + frameshift relative to the frame of the J gene reference sequence. + d_frame: + type: integer + nullable: true + description: > + Numerical reading frame (1, 2, 3) of the first or only D gene in the query nucleotide sequence, + where frame 1 is relative to the first codon of D gene reference sequence. + d2_frame: + type: integer + nullable: true + description: > + Numerical reading frame (1, 2, 3) of the second D gene in the query nucleotide sequence, + where frame 1 is relative to the first codon of D gene reference sequence. + consensus_count: + type: integer + nullable: true + description: > + Number of reads contributing to the UMI consensus or contig assembly for this sequence. + For example, the sum of the number of reads for all UMIs that contribute to + the query sequence. + duplicate_count: + type: integer + nullable: true + description: > + Copy number or number of duplicate observations for the query sequence. + For example, the number of identical reads observed for this sequence. + title: Read count + example: 123 + x-airr: + miairr: important + set: 6 + subset: data (processed sequence) + name: Read count + umi_count: + type: integer + nullable: true + description: > + Number of distinct UMIs represented by this sequence. + For example, the total number of UMIs that contribute to + the contig assembly for the query sequence. + cell_id: + type: string + nullable: true + description: > + Identifier defining the cell of origin for the query sequence. + title: Cell index + example: W06_046_091 + x-airr: + identifier: true + miairr: important + adc-query-support: true + set: 6 + subset: data (processed sequence) + name: Cell index + clone_id: + type: string + nullable: true + description: Clonal cluster assignment for the query sequence. + x-airr: + adc-query-support: true + identifier: true + repertoire_id: + type: string + nullable: true + description: Identifier to the associated repertoire in study metadata. + x-airr: + adc-query-support: true + identifier: true + sample_processing_id: + type: string + nullable: true + description: > + Identifier to the sample processing object in the repertoire metadata + for this rearrangement. If the repertoire has a single sample then + this field may be empty or missing. If the repertoire has multiple samples then + this field may be empty or missing if the sample cannot be differentiated or + the relationship is not maintained by the data processing. + x-airr: + adc-query-support: true + identifier: true + data_processing_id: + type: string + nullable: true + description: > + Identifier to the data processing object in the repertoire metadata + for this rearrangement. If this field is empty than the primary data processing object is assumed. + x-airr: + adc-query-support: true + identifier: true + rearrangement_id: + type: string + nullable: true + description: > + Identifier for the Rearrangement object. May be identical to sequence_id, + but will usually be a universally unique record locator for database applications. + x-airr: + deprecated: true + deprecated-description: Field has been merged with sequence_id to avoid confusion. + deprecated-replaced-by: + - sequence_id + rearrangement_set_id: + type: string + nullable: true + description: > + Identifier for grouping Rearrangement objects. + x-airr: + deprecated: true + deprecated-description: Field has been replaced by other specialized identifiers. + deprecated-replaced-by: + - repertoire_id + - sample_processing_id + - data_processing_id + germline_database: + type: string + nullable: true + description: Source of germline V(D)J genes with version number or date accessed. + example: ENSEMBL, Homo sapiens build 90, 2017-10-01 + x-airr: + deprecated: true + deprecated-description: Field was moved up to the DataProcessing level to avoid data duplication. + deprecated-replaced-by: + - "DataProcessing:germline_database" + +# A unique inferred clone object that has been constructed within a single data processing +# for a single repertoire and a subset of its sequences and/or rearrangements. +Clone: + type: object + required: + - clone_id + - germline_alignment + properties: + clone_id: + type: string + nullable: true + description: Identifier for the clone. + x-airr: + identifier: true + repertoire_id: + type: string + nullable: true + description: Identifier to the associated repertoire in study metadata. + x-airr: + adc-query-support: true + data_processing_id: + type: string + nullable: true + description: Identifier of the data processing object in the repertoire metadata for this clone. + x-airr: + adc-query-support: true + sequences: + type: array + items: + type: string + nullable: true + description: > + List sequence_id strings that act as keys to the Rearrangement records for members of the clone. + v_call: + type: string + nullable: true + description: > + V gene with allele of the inferred ancestral of the clone. For example, IGHV4-59*01. + example: IGHV4-59*01 + d_call: + type: string + nullable: true + description: > + D gene with allele of the inferred ancestor of the clone. For example, IGHD3-10*01. + example: IGHD3-10*01 + j_call: + type: string + nullable: true + description: > + J gene with allele of the inferred ancestor of the clone. For example, IGHJ4*02. + example: IGHJ4*02 + junction: + type: string + nullable: true + description: > + Nucleotide sequence for the junction region of the inferred ancestor of the clone, + where the junction is defined as the CDR3 plus the two flanking conserved codons. + junction_aa: + type: string + nullable: true + description: > + Amino acid translation of the junction. + junction_length: + type: integer + nullable: true + description: Number of nucleotides in the junction. + junction_aa_length: + type: integer + nullable: true + description: Number of amino acids in junction_aa. + germline_alignment: + type: string + nullable: true + description: > + Assembled, aligned, full-length inferred ancestor of the clone spanning the same region + as the sequence_alignment field of nodes (typically the V(D)J region) and including the + same set of corrections and spacers (if any). + germline_alignment_aa: + type: string + nullable: true + description: > + Amino acid translation of germline_alignment. + v_alignment_start: + type: integer + nullable: true + description: > + Start position in the V gene alignment in both the sequence_alignment and germline_alignment + fields (1-based closed interval). + v_alignment_end: + type: integer + nullable: true + description: > + End position in the V gene alignment in both the sequence_alignment and germline_alignment + fields (1-based closed interval). + d_alignment_start: + type: integer + nullable: true + description: > + Start position of the D gene alignment in both the sequence_alignment and germline_alignment + fields (1-based closed interval). + d_alignment_end: + type: integer + nullable: true + description: > + End position of the D gene alignment in both the sequence_alignment and germline_alignment + fields (1-based closed interval). + j_alignment_start: + type: integer + nullable: true + description: > + Start position of the J gene alignment in both the sequence_alignment and germline_alignment + fields (1-based closed interval). + j_alignment_end: + type: integer + nullable: true + description: > + End position of the J gene alignment in both the sequence_alignment and germline_alignment + fields (1-based closed interval). + junction_start: + type: integer + nullable: true + description: Junction region start position in the alignment (1-based closed interval). + junction_end: + type: integer + nullable: true + description: Junction region end position in the alignment (1-based closed interval). + umi_count: + type: integer + nullable: true + description: > + Number of distinct UMIs observed across all sequences (Rearrangement records) in this clone. + clone_count: + type: integer + nullable: true + description: > + Absolute count of the size (number of members) of this clone in the repertoire. + This could simply be the number of sequences (Rearrangement records) observed in this clone, + the number of distinct cell barcodes (unique cell_id values), + or a more sophisticated calculation appropriate to the experimental protocol. + Absolute count is provided versus a frequency so that downstream analysis tools can perform their own normalization. + seed_id: + type: string + nullable: true + description: sequence_id of the seed sequence. Empty string (or null) if there is no seed sequence. + +# 1-to-n relationship for a clone to its trees. +Tree: + type: object + required: + - tree_id + - clone_id + - newick + properties: + tree_id: + type: string + nullable: true + description: Identifier for the tree. + x-airr: + identifier: true + clone_id: + type: string + nullable: true + description: Identifier for the clone. + newick: + type: string + nullable: true + description: Newick string of the tree edges. + nodes: + type: object + nullable: true + description: Dictionary of nodes in the tree, keyed by sequence_id string + additionalProperties: + $ref: '#/Node' + +# 1-to-n relationship between a tree and its nodes +Node: + type: object + required: + - sequence_id + properties: + sequence_id: + type: string + nullable: true + description: > + Identifier for this node that matches the identifier in the newick string and, where possible, + the sequence_id in the source repertoire. + x-airr: + identifier: true + sequence_alignment: + type: string + nullable: true + description: > + Nucleotide sequence of the node, aligned to the germline_alignment for this clone, including + including any indel corrections or spacers. + junction: + type: string + nullable: true + description: > + Junction region nucleotide sequence for the node, where the junction is defined as + the CDR3 plus the two flanking conserved codons. + junction_aa: + type: string + nullable: true + description: > + Amino acid translation of the junction. + +# The cell object acts as point of reference for all data that can be related +# to an individual cell, either by direct observation or inference. +Cell: + type: object + required: + - cell_id + - rearrangements + - repertoire_id + - virtual_pairing + properties: + cell_id: + type: string + nullable: false + description: > + Identifier defining the cell of origin for the query sequence. + title: Cell index + example: W06_046_091 + x-airr: + identifier: true + miairr: defined + adc-query-support: true + name: Cell index + rearrangements: + type: array + nullable: true + description: > + Array of sequence identifiers defined for the Rearrangement object + title: Cell-associated rearrangements + items: + type: string + example: [id1, id2] #empty vs NULL? + x-airr: + miairr: defined + adc-query-support: true + name: Cell-associated rearrangements + receptors: + type: array + nullable: true + description: > + Array of receptor identifiers defined for the Receptor object + title: Cell-associated receptors + items: + type: string + example: [id1, id2] #empty vs NULL? + x-airr: + miairr: defined + adc-query-support: true + name: Cell-associated receptors + repertoire_id: + type: string + nullable: true + description: Identifier to the associated repertoire in study metadata. + title: Parental repertoire of cell + x-airr: + miairr: defined + adc-query-support: true + name: Parental repertoire of cell + data_processing_id: + type: string + nullable: true + description: Identifier of the data processing object in the repertoire metadata for this clone. + title: Data processing for cell + x-airr: + miairr: defined + adc-query-support: true + name: Data processing for cell + expression_study_method: + type: string + enum: + - flow_cytometry + - single-cell_transcriptome + - null + nullable: true + description: > + Keyword describing the methodology used to assess expression. This values for this field MUST + come from a controlled vocabulary. + x-airr: + miairr: defined + adc-query-support: true + expression_raw_doi: + type: string + nullable: true + description: > + DOI of raw data set containing the current event + x-airr: + miairr: defined + adc-query-support: true + expression_index: + type: string + nullable: true + description: > + Index addressing the current event within the raw data set. + x-airr: + miairr: defined + virtual_pairing: + type: boolean + nullable: true + description: > + boolean to indicate if pairing was inferred. + title: Virtual pairing + x-airr: + miairr: defined + adc-query-support: true + name: Virtual pairing + +# The CellExpression object acts as a container to hold a single expression level measurement from +# an experiment. Expression data is associated with a cell_id and the related repertoire_id and +# data_processing_id as cell_id is not guaranteed to be unique outside the data processing for +# a single repertoire. +CellExpression: + type: object + required: + - expression_id + - repertoire_id + - data_processing_id + - cell_id + - property + - property_type + - value + properties: + expression_id: + type: string + description: > + Identifier of this expression property measurement. + title: Expression property measurement identifier + nullable: false + x-airr: + identifier: true + miairr: defined + adc-query-support: true + name: Expression measurement identifier + cell_id: + type: string + description: > + Identifier of the cell to which this expression data is related. + title: Cell identifier + nullable: false + example: W06_046_091 + x-airr: + miairr: defined + adc-query-support: true + name: Cell identifier + repertoire_id: + type: string + description: Identifier for the associated repertoire in study metadata. + title: Parental repertoire of cell + nullable: true + x-airr: + miairr: defined + adc-query-support: true + name: Parental repertoire of cell + data_processing_id: + type: string + description: Identifier of the data processing object in the repertoire metadata for this clone. + title: Data processing for cell + nullable: true + x-airr: + miairr: defined + adc-query-support: true + name: Data processing for cell + property_type: + type: string + description: > + Keyword describing the property type and detection method used to measure the property value. + The following keywords are recommended, but custom property types are also valid: + "mrna_expression_by_read_count", + "protein_expression_by_fluorescence_intensity", "antigen_bait_binding_by_fluorescence_intensity", + "protein_expression_by_dna_barcode_count" and "antigen_bait_binding_by_dna_barcode_count". + nullable: false + title: Property type and detection method + x-airr: + miairr: defined + adc-query-support: true + name: Property type and detection method + property: + $ref: '#/Ontology' + nullable: true + title: Property information + description: > + Name of the property observed, typically a gene or antibody identifier (and label) from a + canonical resource such as Ensembl (e.g. ENSG00000275747, IGHV3-79) or + Antibody Registry (ABREG:1236456, Purified anti-mouse/rat/human CD27 antibody). + example: + id: ENSG:ENSG00000275747 + label: IGHV3-79 + x-airr: + miairr: defined + adc-query-support: true + format: ontology + name: Property information + value: + type: number + description: Level at which the property was observed in the experiment (non-normalized). + title: Property value + nullable: true + example: 3 + x-airr: + miairr: defined + adc-query-support: true + name: Property value + + +# The Receptor object hold information about a receptor and its reactivity. +# +Receptor: + type: object + required: + - receptor_id + - receptor_hash + - receptor_type + - receptor_variable_domain_1_aa + - receptor_variable_domain_1_locus + - receptor_variable_domain_2_aa + - receptor_variable_domain_2_locus + properties: + receptor_id: + type: string + nullable: false + description: ID of the current Receptor object, unique within the local repository. + title: Receptor ID + example: TCR-MM-012345 + x-airr: + identifier: true + adc-query-support: true + receptor_hash: + type: string + nullable: false + description: > + The SHA256 hash of the receptor amino acid sequence, calculated on the concatenated + ``receptor_variable_domain_*_aa`` sequences and represented as base16-encoded string. + title: Receptor hash ID + example: aa1c4b77a6f4927611ab39f5267415beaa0ba07a952c233d803b07e52261f026 + x-airr: + adc-query-support: true + receptor_type: + type: string + nullable: false + enum: + - Ig + - TCR + description: The top-level receptor type, either Immunoglobulin (Ig) or T Cell Receptor (TCR). + x-airr: + adc-query-support: true + receptor_variable_domain_1_aa: + type: string + nullable: false + description: > + Complete amino acid sequence of the mature variable domain of the Ig heavy, TCR beta or TCR delta chain. + The mature variable domain is defined as encompassing all AA from and including first AA after the the + signal peptide to and including the last AA that is completely encoded by the J gene. + example: > + QVQLQQPGAELVKPGASVKLSCKASGYTFTSYWMHWVKQRPGRGLEWIGRIDPNSGGTKYNEKFKSKATLTVDKPSSTAYMQLSSLTSEDSAVYYCARYDYYGSSYFDYWGQGTTLTVSS + x-airr: + adc-query-support: true + receptor_variable_domain_1_locus: + type: string + nullable: false + enum: + - IGH + - TRB + - TRD + description: Locus from which the variable domain in receptor_variable_domain_1_aa originates + example: IGH + x-airr: + adc-query-support: true + receptor_variable_domain_2_aa: + type: string + nullable: false + description: > + Complete amino acid sequence of the mature variable domain of the Ig light, TCR alpha or TCR gamma chain. + The mature variable domain is defined as encompassing all AA from and including first AA after the the + signal peptide to and including the last AA that is completely encoded by the J gene. + example: > + QAVVTQESALTTSPGETVTLTCRSSTGAVTTSNYANWVQEKPDHLFTGLIGGTNNRAPGVPARFSGSLIGDKAALTITGAQTEDEAIYFCALWYSNHWVFGGGTKLTVL + x-airr: + adc-query-support: true + receptor_variable_domain_2_locus: + type: string + nullable: false + enum: + - IGI + - IGK + - IGL + - TRA + - TRG + description: Locus from which the variable domain in receptor_variable_domain_2_aa originates + example: IGL + x-airr: + adc-query-support: true + receptor_ref: + type: array + nullable: true + description: Array of receptor identifiers defined for the Receptor object + title: Receptor cross-references + items: + type: string + example: ["IEDB_RECEPTOR:10"] + x-airr: + adc-query-support: true + reactivity_measurements: + type: array + nullable: true + description: Records of reactivity measurement + items: + $ref: '#/ReceptorReactivity' + + +ReceptorReactivity: + type: object + required: + - ligand_type + - antigen_type + - antigen + - reactivity_method + - reactivity_readout + - reactivity_value + - reactivity_unit + properties: + ligand_type: + type: string + nullable: false + enum: + - "MHC:peptide" + - "MHC:non-peptide" + - protein + - peptide + - non-peptidic + description: Classification of ligand binding to receptor + example: non-peptide + antigen_type: + type: string + nullable: false + enum: + - protein + - peptide + - non-peptidic + description: > + The type of antigen before processing by the immune system. + example: protein + antigen: + $ref: '#/Ontology' + nullable: false + description: > + The substance against which the receptor was tested. This can be any substance that + stimulates an adaptive immune response in the host, either through antibody production + or by T cell activation after presentation via an MHC molecule. + title: Antigen + example: + id: UNIPROT:P19597 + label: Circumsporozoite protein + x-airr: + adc-query-support: true + format: ontology + antigen_source_species: + $ref: '#/Ontology' + nullable: true + description: The species from which the antigen was isolated + title: Source species of antigen + example: + id: NCBITAXON:5843 + label: Plasmodium falciparum NF54 + x-airr: + format: ontology + ontology: + draft: true + top_node: + id: NCBITAXON:1 + label: root + peptide_start: + type: integer + nullable: true + description: Start position of the peptide within the reference protein sequence + peptide_end: + type: integer + nullable: true + description: End position of the peptide within the reference protein sequence + mhc_class: + type: string + nullable: true + enum: + - MHC-I + - MHC-II + - MHC-nonclassical + - null + description: Class of MHC molecule, only present for MHC:x ligand types + example: MHC-II + mhc_gene_1: + $ref: '#/Ontology' + nullable: true + description: The MHC gene to which the mhc_allele_1 belongs + title: MHC gene 1 + example: + id: MRO:0000055 + label: HLA-DRA + x-airr: + format: ontology + ontology: + draft: true + top_node: + id: MRO:0000004 + label: MHC gene + mhc_allele_1: + type: string + nullable: true + description: Allele designation of the MHC alpha chain + example: HLA-DRA + mhc_gene_2: + $ref: '#/Ontology' + nullable: true + description: The MHC gene to which the mhc_allele_2 belongs + title: MHC gene 2 + example: + id: MRO:0000057 + label: HLA-DRB1 + x-airr: + format: ontology + ontology: + draft: true + top_node: + id: MRO:0000004 + label: MHC gene + mhc_allele_2: + type: string + nullable: true + description: > + Allele designation of the MHC class II beta chain or the invariant beta2-microglobin chain + example: HLA-DRB1*04:01 + reactivity_method: + type: string + nullable: false + enum: + - SPR + - ITC + - ELISA + - cytometry + - biological_activity + description: The methodology used to assess expression (assay implemented in experiment) + reactivity_readout: + type: string + nullable: false + enum: + - binding_strength + - cytokine_release + - dissociation_constant_kd + - on_rate + - off_rate + - pathogen_inhibition + description: Reactivity measurement read-out + example: cytokine release + reactivity_value: + type: number + nullable: false + description: The absolute (processed) value of the measurement + example: 162.26 + reactivity_unit: + type: string + nullable: false + description: The unit of the measurement + example: pg/ml diff --git a/tests/check-consistency-formats.py b/tests/check-consistency-formats.py index cd3d76423..98bec18fd 100755 --- a/tests/check-consistency-formats.py +++ b/tests/check-consistency-formats.py @@ -17,7 +17,9 @@ spec_files = {basename(f): f for f in glob('specs/airr-schema.yaml')} v3spec_files = {basename(f): f for f in glob('specs/airr-schema-openapi3.yaml')} py_files = {basename(f): f for f in glob('lang/python/airr/specs/airr-schema.yaml')} +py_v3_files = {basename(f): f for f in glob('lang/python/airr/specs/airr-schema-openapi3.yaml')} r_files = {basename(f): f for f in glob('lang/R/inst/extdata/airr-schema.yaml')} +r_v3_files = {basename(f): f for f in glob('lang/R/inst/extdata/airr-schema-openapi3.yaml')} # Check python package specs if set(spec_files.keys()) != set(py_files.keys()): @@ -26,6 +28,12 @@ for spec in set(py_files.keys()) - set(spec_files.keys()): print('{} found in python package but missing from specs/'.format(spec), file=sys.stderr) sys.exit(1) +if set(v3spec_files.keys()) != set(py_v3_files.keys()): + for spec in set(v3spec_files.keys()) - set(py_v3_files.keys()): + print('{} missing from python package'.format(spec), file=sys.stderr) + for spec in set(py_v3_files.keys()) - set(v3spec_files.keys()): + print('{} found in python package but missing from specs/'.format(spec), file=sys.stderr) + sys.exit(1) # Check R package specs if set(spec_files.keys()) != set(r_files.keys()): @@ -34,7 +42,36 @@ for spec in set(r_files.keys()) - set(spec_files.keys()): print('{} found in R package but missing from specs/'.format(spec), file=sys.stderr) sys.exit(1) +if set(v3spec_files.keys()) != set(r_v3_files.keys()): + for spec in set(v3spec_files.keys()) - set(r_v3_files.keys()): + print('{} missing from R package'.format(spec), file=sys.stderr) + for spec in set(r_v3_files.keys()) - set(v3spec_files.keys()): + print('{} found in R package but missing from specs/'.format(spec), file=sys.stderr) + sys.exit(1) + +# V3 spec against lang +for spec_name in v3spec_files: + # check equality of specs + with open(v3spec_files[spec_name], 'r') as ip: + gold_spec = yaml.safe_load(ip) + with open(py_v3_files[spec_name], 'r') as ip: + py_spec = yaml.safe_load(ip) + with open(r_v3_files[spec_name], 'r') as ip: + r_spec = yaml.safe_load(ip) + + # Check python package + if jsondiff.diff(gold_spec, py_spec) != {}: + print('{} openapi v3 spec is different from python version'.format(spec_name), file=sys.stderr) + print(jsondiff.diff(gold_spec, py_spec, syntax='explicit'), file=sys.stderr) + sys.exit(1) + + # Check R package + if jsondiff.diff(gold_spec, r_spec) != {}: + print('{} openapi v3 spec is different from R version'.format(spec_name), file=sys.stderr) + print(jsondiff.diff(gold_spec, r_spec), file=sys.stderr) + sys.exit(1) +# V2 spec against lang for spec_name in spec_files: # check equality of specs with open(spec_files[spec_name], 'r') as ip: @@ -46,13 +83,13 @@ # Check python package if jsondiff.diff(gold_spec, py_spec) != {}: - print('{} spec is different from python version'.format(spec_name), file=sys.stderr) + print('{} openapi v2 spec is different from python version'.format(spec_name), file=sys.stderr) print(jsondiff.diff(gold_spec, py_spec, syntax='explicit'), file=sys.stderr) sys.exit(1) # Check R package if jsondiff.diff(gold_spec, r_spec) != {}: - print('{} spec is different from R version'.format(spec_name), file=sys.stderr) + print('{} openapi v2 spec is different from R version'.format(spec_name), file=sys.stderr) print(jsondiff.diff(gold_spec, r_spec), file=sys.stderr) sys.exit(1) diff --git a/tests/data/bad_genotype_set.json b/tests/data/bad_genotype_set.json new file mode 100644 index 000000000..01709d60a --- /dev/null +++ b/tests/data/bad_genotype_set.json @@ -0,0 +1,44 @@ +{ + "GenotypeSet": [{ + "receptor_genotype_set_id": "1", + "genotype_class_list": [ + { + "receptor_genotype_id": "1", + "locus": "IGH", + "documented_alleles": [ + { + "label": "IGHV1-69*01", + "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", + "phasing": 1 + }, + { + "label": "IGHV1-69*02", + "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", + "phasing": 2 + }, + { + "label": "IGHV1-69*02", + "name": "1234", + "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", + "phasing": 2 + } + ], + "undocumented_alleles": [ + { + "allele_name": "IGHD3-1*01_S1234", + "sequence": "agtagtagtagt", + "phasing": 1 + } + ], + "deleted_genes": [ + { + "label": "IGHV3-30-3", + "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", + "phasing": "1" + } + ], + "inference_process": "repertoire_sequencing" + } + ] + }] +} diff --git a/tests/data/bad_germline_set.json b/tests/data/bad_germline_set.json new file mode 100644 index 000000000..28531aabb --- /dev/null +++ b/tests/data/bad_germline_set.json @@ -0,0 +1,417 @@ +{ + "GermlineSet": [{ + "germline_set_id": "OGRDB:G00007", + "acknowledgements": [ + { + "contributor_id": "1", + "name": "William Lees", + "orcid_id": { + "id": "ORCID:0000-0001-9834-6840", + "label": "William Lees" + }, + "affiliation": { + "id": "ROR:02mb95055", + "label": "Birkbeck, University of London" + }, + "affiliation_department":"", + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + } + ], + "release_version": 1, + "release_description": "", + "release_date": "2021-11-24", + "germline_set_name": "CAST IGH", + "germline_set_ref": "OGRDB:G00007.1", + "pub_ids": [""], + "species": "Mouse", + "species_subgroup": "CAST_EiJ", + "species_subgroup_type": "strain", + "locus": "IGH", + "allele_descriptions": [ + { + "allele_description_id": "OGRDB:A00301", + "acknowledgements": [ + { + "contributor_id": "1", + "name": "William Lees", + "orcid_id": { + "id": "ORCID:0000-0001-9834-6840", + "label": "William Lees" + }, + "affiliation": { + "id": "ROR:02mb95055", + "label": "Birkbeck, University of London" + }, + "affiliation_department":"", + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + } + ], + "release_version": 1, + "release_date": "2021-11-24", + "release_description": "First release", + "label": "IGHV-2DBF", + "sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGATGGTAGTGGCACCTACTATCTGGACTCCTTGAAGAGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", + "coding_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGA...GGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTC............AGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", + "aliases": [ + "watson_et_al:CAST_EiJ_IGHV5-3" + ], + "locus": "IGH", + "chromosome": null, + "sequence_type": "V", + "functional": true, + "inference_type": "rearranged_only", + "species": "Mouse", + "species_subgroup": "CAST_EiJ", + "species_subgroup_type": "strain", + "status": "active", + "gene_designation": null, + "subgroup_designation": null, + "allele_designation": null, + "gene_start": null, + "gene_end": null, + "utr_5_prime_start": null, + "utr_5_prime_end": null, + "leader_1_start": null, + "leader_1_end": null, + "leader_2_start": null, + "leader_2_end": null, + "v_rs_start": null, + "v_rs_end": null, + "v_gene_delineations": [ + { + "sequence_delineation_id": "1", + "delineation_scheme": "IMGT", + "fwr1_start": 1, + "fwr1_end": 78, + "cdr1_start": 79, + "cdr1_end": 114, + "fwr2_start": 115, + "fwr2_end": 165, + "cdr2_start": 166, + "cdr2_end": 195, + "fwr3_start": 196, + "fwr3_end": 312, + "cdr3_start": 313, + "alignment_labels": [ + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104" + ] + } + ], + "unrearranged_support": [], + "rearranged_support": [], + "paralogs": [], + "notes": "Imported to OGRDB with the following notes: watson_et_al: CAST_EiJ_IGHV5-3", + "curational_tags": null + }, + { + "allele_description_id": "OGRDB:A00314", + "acknowledgements": [ + { + "contributor_id": "1", + "name": "William Lees", + "orcid_id": { + "id": "ORCID:0000-0001-9834-6840", + "label": "William Lees" + }, + "affiliation": { + "id": "ROR:02mb95055", + "label": "Birkbeck, University of London" + }, + "affiliation_department":"", + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + } + ], + "release_version": 1, + "release_date": "2021-11-24", + "release_description": "First release", + "label": "IGHV-2ETO", + "sequence": "CAAGTTACTCTAAAAGAGTCTGGCCCTGGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGATGATGATAAGTACTATAACCCATCCCTGAAGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC", + "coding_sequence": "CAAGTTACTCTAAAAGAGTCTGGCCCT...GGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGC......ACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGAT.........GATGATAAGTACTATAACCCATCCCTGAAG...AGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC", + "aliases": [ + "watson_et_al:CAST_EiJ_IGHV8-2" + ], + "locus": "IGH", + "chromosome": null, + "sequence_type": "V", + "functional": true, + "inference_type": "rearranged_only", + "species": "Mouse", + "species_subgroup": "CAST_EiJ", + "species_subgroup_type": "strain", + "status": "active", + "gene_designation": null, + "subgroup_designation": null, + "allele_designation": null, + "gene_start": null, + "gene_end": null, + "utr_5_prime_start": null, + "utr_5_prime_end": null, + "leader_1_start": null, + "leader_1_end": null, + "leader_2_start": null, + "leader_2_end": null, + "v_rs_start": null, + "v_rs_end": null, + "v_gene_delineations": [ + { + "sequence_delineation_id": "1", + "delineation_scheme": "IMGT", + "fwr1_start": 1, + "fwr1_end": 78, + "cdr1_start": 79, + "cdr1_end": 114, + "fwr2_start": 115, + "fwr2_end": 165, + "cdr2_start": 166, + "cdr2_end": 195, + "fwr3_start": 196, + "fwr3_end": 312, + "cdr3_start": 313, + "alignment_labels": [ + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104" + ] + } + ], + "unrearranged_support": [], + "rearranged_support": [], + "paralogs": [], + "notes": "Imported to OGRDB with the following notes: watson_et_al: CAST_EiJ_IGHV8-2", + "curational_tags": null + } + ], + "notes": "" + }] +} diff --git a/tests/data/bad_rearrangement.tsv b/tests/data/bad_rearrangement.tsv new file mode 100644 index 000000000..d12fc79fe --- /dev/null +++ b/tests/data/bad_rearrangement.tsv @@ -0,0 +1,10 @@ +rearrangement_id rearrangement_set_id sequence_id wrong_name rev_comp productive sequence_alignment germline_alignment v_call d_call j_call c_call junction junction_length junction_aa v_score d_score j_score c_score v_cigar d_cigar j_cigar c_cigar v_identity v_evalue d_identity d_evalue j_identity j_evalue v_sequence_start v_sequence_end v_germline_start v_germline_end d_sequence_start d_sequence_end d_germline_start d_germline_end j_sequence_start j_sequence_end j_germline_start j_germline_end np1_length np2_length duplicate_count +IVKNQEJ01BVGQ6 1 IVKNQEJ01BVGQ6 GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T T IGHV4-31*03 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 430 16.4 75.8 22N1S275= 11N280S8= 6N292S32=1X9= 1 1E-122 1 2.7 0.9762 6E-18 0 275 0 317 279 287 10 18 291 333 5 47 4 4 1247 +IVKNQEJ01AQVWS 1 IVKNQEJ01AQVWS GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCAACTACAACCCCTCCCTCAAGAGTCGAGTCACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T T IGHV4-31*03 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 420 16.4 83.8 22N1S156=1X10=1X17=1X89= 11N280S8= 6N292S42= 0.9891 8E-120 1 2.7 1 2E-20 0 275 0 317 279 287 10 18 291 333 5 47 4 4 4 +IVKNQEJ01AOYFZ 1 IVKNQEJ01AOYFZ GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGTAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T F IGHV4-31*03 IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGTAACTTTTGACTACTGG 37 CASGVAGNF*LLX 430 20.4 83.8 22N1S275= 11N280S10= 6N293S42= 1 1E-122 1 0.17 1 2E-20 0 275 0 317 279 289 10 20 292 334 5 47 4 3 92 +IVKNQEJ01EI5S4 1 IVKNQEJ01EI5S4 GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T T IGHV4-31*03 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 430 16.4 83.8 22N1S275= 11N280S8= 6N292S42= 1 1E-122 1 2.7 1 2E-20 0 275 0 317 279 287 10 18 291 333 5 47 4 4 2913 +IVKNQEJ01DGRRI 1 IVKNQEJ01DGRRI GGCCCAGGACTGGTGAAGCCTTCGGAGACCCTGTCCCTCACCTGCGCTGTCTATGGTGGGTCCTTCAGTGGTTACTACTGGAGCTGGATCCGCCAGCCCCCAGGGAAGGGTCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T T IGHV4-34*09 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 389 16.4 83.8 22N1S23=2X85=1X15=1X1=1X3=1X2=1X1=1X5=1X6=1X118= 11N274S8= 6N286S42= 0.9628 2E-110 1 2.6 1 2E-20 0 269 0 317 273 281 10 18 285 327 5 47 4 4 1 +IVKNQEJ01APN5N 1 IVKNQEJ01APN5N GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTAGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T F IGHV4-31*03 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTAG 36 CASGVAGTFDY* 430 16.4 67.9 22N1S275= 11N280S8= 6N292S10=1X21=1X9= 1 1E-122 1 2.7 0.9524 1E-15 0 275 0 317 279 287 10 18 291 333 5 47 4 4 1 +IVKNQEJ01B0TT2 1 IVKNQEJ01B0TT2 GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGTAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T F IGHV4-31*03 IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGTAACTTTTGACTACTGG 37 CASGVAGNF*LLX 430 20.4 75.8 22N1S275= 11N280S10= 6N293S32=1X9= 1 1E-122 1 0.17 0.9762 6E-18 0 275 0 317 279 289 10 20 292 334 5 47 4 3 30 +IVKNQEJ01AIS74 1 IVKNQEJ01AIS74 GGCGCAGGACTGTTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGGCGGGGTGGCTGGTAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T F IGHV4-31*03 IGHD6-19*01 IGHJ4*02 TGTGCGAGGCGGGGTGGCTGGTAACTTTTGACTACTGG 38 CARRGGW*LLTTG 424 20.4 83.8 22N1S3=1X8=1X262= 11N281S10= 6N294S42= 0.9927 9E-121 1 0.17 1 2E-20 0 275 0 317 280 290 10 20 293 335 5 47 5 3 4 +IVKNQEJ01AJ44V 1 IVKNQEJ01AJ44V GGCCCAGGACTGGTGAAGCCTTCGGAGACCCTGTCCCTCACCTGCGCTGTCTATGGTGGGTCCTTCAGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T T IGHV4-59*06 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 386 16.4 75.8 22N1S45=1X5=2X6=1X3=1X5=1X22=1X4=1X1=1X1=1X165= 11N274S8= 6N286S32=1X9= 0.9625 2E-109 1 2.6 0.9762 5E-18 0 267 0 315 273 281 10 18 285 327 5 47 6 4 12 diff --git a/tests/data/bad_repertoire.yaml b/tests/data/bad_repertoire.yaml new file mode 100644 index 000000000..f35355e98 --- /dev/null +++ b/tests/data/bad_repertoire.yaml @@ -0,0 +1,202 @@ +# +# Example metadata +# + +Repertoire: + - repertoire_id: 1841923116114776551-242ac11c-0001-012 + study: + study_id: PRJNA300878 + study_title: "Homo sapiens B and T cell repertoire - MZ twins" + study_description: "The adaptive immune system's capability to protect the body requires a highly diverse lymphocyte antigen receptor repertoire. However, the influence of individual genetic and epigenetic differences on these repertoires is not typically measured. By leveraging the unique characteristics of B, CD4+ T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified the impact of heritable factors on both the V(D)J recombination process and thymic selection in the case of T cell receptors, and show that the repertoires of both naive and antigen experienced cells are subject to biases resulting from differences in recombination. We show that biases in V(D)J usage, as well as biased N/P additions, contribute to significant variation in the CDR3 region. Moreover, we show that the relative usage of V and J gene segments is chromosomally biased, with approximately 1.5 times as many rearrangements originating from a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level." + contributors: + - contributor_id: "1" + name: "Florian Rubelt" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "investigation" + - role: "data curation" + - contributor_id: "2" + name: "Mark M. Davis" + orcid_id: + id: "ORCID:0000-0001-6868-657X" + label: "Mark Davis" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "supervision" + pub_ids: ["PMID:27005435"] + subject: + subject_id: TW01A + synthetic: false + species: + id: "NCBITAXON:9606" + value: "Homo sapiens" + sex: female + age_min: 27 + age_max: 27 + age_unit: + id: UO:0000036 + value: year + linked_subjects: TW01B + link_type: twin + sample: + - sample_id: TW01A_B_naive + tissue: PBMC + cell_subset: "Naive B cell" + cell_phenotype: "expression of CD20 and the absence of CD27" + cell_species: + id: "NCBITAXON:9606" + value: "Homo sapiens" + single_cell: false + cell_isolation: FACS + template_class: RNA + pcr_target: + - pcr_target_locus: IGH + sequencing_platform: "Illumina MiSeq" + read_length: "300" + sequencing_files: + file_type: fastq + filename: SRR2905656_R1.fastq.gz + read_direction: forward + paired_filename: SRR2905656_R2.fastq.gz + paired_read_direction: reverse + data_processing: + - data_processing_id: 3059369183532618216-242ac11b-0001-007 + analysis_provenance_id: 6623294219256599016-242ac11c-0001-012 + + - repertoire_id: 1602908186092376551-242ac11c-0001-012 + study: + study_id: PRJNA300878 + study_title: "Homo sapiens B and T cell repertoire - MZ twins" + study_description: "The adaptive immune system's capability to protect the body requires a highly diverse lymphocyte antigen receptor repertoire. However, the influence of individual genetic and epigenetic differences on these repertoires is not typically measured. By leveraging the unique characteristics of B, CD4+ T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified the impact of heritable factors on both the V(D)J recombination process and thymic selection in the case of T cell receptors, and show that the repertoires of both naive and antigen experienced cells are subject to biases resulting from differences in recombination. We show that biases in V(D)J usage, as well as biased N/P additions, contribute to significant variation in the CDR3 region. Moreover, we show that the relative usage of V and J gene segments is chromosomally biased, with approximately 1.5 times as many rearrangements originating from a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level." + contributors: + - contributor_id: "1" + name: "Florian Rubelt" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "investigation" + - role: "data curation" + - contributor_id: "2" + name: "Mark M. Davis" + orcid_id: + id: "ORCID:0000-0001-6868-657X" + label: "Mark Davis" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "supervision" + pub_ids: ["PMID:27005435"] + subject: + subject_id: TW01A + synthetic: false + species: + id: "NCBITAXON:9606" + value: "Homo sapiens" + sex: female + age_min: 27 + age_max: 27 + age_unit: + id: UO:0000036 + value: year + linked_subjects: TW01B + link_type: twin + sample: + - sample_id: TW01A_B_memory + tissue: PBMC + cell_subset: "Memory B cell" + cell_phenotype: "expression of CD20 and CD27" + cell_species: + id: "NCBITAXON:9606" + value: "Homo sapiens" + single_cell: false + cell_isolation: FACS + template_class: RNA + pcr_target: + - pcr_target_locus: IGH + sequencing_platform: "Illumina MiSeq" + read_length: "300" + sequencing_files: + file_type: fastq + filename: SRR2905655_R1.fastq.gz + read_direction: forward + paired_filename: SRR2905655_R2.fastq.gz + paired_read_direction: reverse + data_processing: + - data_processing_id: 3059369183532618216-242ac11b-0001-007 + analysis_provenance_id: 6623294219256599016-242ac11c-0001-012 + + - repertoire_id: 2366080924918616551-242ac11c-0001-012 + study: + study_id: PRJNA300878 + study_title: "Homo sapiens B and T cell repertoire - MZ twins" + study_description: "The adaptive immune system's capability to protect the body requires a highly diverse lymphocyte antigen receptor repertoire. However, the influence of individual genetic and epigenetic differences on these repertoires is not typically measured. By leveraging the unique characteristics of B, CD4+ T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified the impact of heritable factors on both the V(D)J recombination process and thymic selection in the case of T cell receptors, and show that the repertoires of both naive and antigen experienced cells are subject to biases resulting from differences in recombination. We show that biases in V(D)J usage, as well as biased N/P additions, contribute to significant variation in the CDR3 region. Moreover, we show that the relative usage of V and J gene segments is chromosomally biased, with approximately 1.5 times as many rearrangements originating from a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level." + contributors: + - contributor_id: "1" + name: "Florian Rubelt" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "investigation" + - role: "data curation" + - contributor_id: "2" + name: "Mark M. Davis" + orcid_id: + id: "ORCID:0000-0001-6868-657X" + label: "Mark Davis" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "supervision" + pub_ids: ["PMID:27005435"] + subject: + subject_id: TW01A + synthetic: false + species: + id: "NCBITAXON:9606" + value: "Homo sapiens" + sex: female + age_min: 27 + age_max: 27 + age_unit: + id: UO:0000036 + value: year + linked_subjects: TW01B + link_type: twin + sample: + - sample_id: TW01A_T_naive_CD4 + tissue: PBMC + cell_subset: "Naive CD4+ T cell" + cell_phenotype: "expression of CD8 and absence of CD4 and CD45RO" + cell_species: + id: "NCBITAXON:9606" + value: "Homo sapiens" + single_cell: false + cell_isolation: FACS + template_class: RNA + pcr_target: + - pcr_target_locus: TRB + sequencing_platform: "Illumina MiSeq" + read_length: "300" + sequencing_files: + file_type: fastq + filename: SRR2905659_R1.fastq.gz + read_direction: forward + paired_filename: SRR2905659_R2.fastq.gz + paired_read_direction: reverse + data_processing: + - data_processing_id: 651223970338378216-242ac11b-0001-007 + analysis_provenance_id: 4625424004665971176-242ac11c-0001-012 diff --git a/tests/data/extra_rearrangement.tsv b/tests/data/extra_rearrangement.tsv new file mode 100644 index 000000000..8bedb960f --- /dev/null +++ b/tests/data/extra_rearrangement.tsv @@ -0,0 +1,2 @@ +sequence_id sequence rev_comp productive v_call d_call j_call sequence_alignment germline_alignment junction junction junction_aa v_cigar d_cigar j_cigar +1 2 F F 5 6 7 8 9 10 11 12 13 14 15 not_in_header not_in diff --git a/tests/data/good_combined_airr.json b/tests/data/good_combined_airr.json new file mode 100644 index 000000000..0ef2106ae --- /dev/null +++ b/tests/data/good_combined_airr.json @@ -0,0 +1,1124 @@ +{ + "Repertoire": [ + { + "repertoire_id": "1841923116114776551-242ac11c-0001-012", + "study": { + "study_id": "PRJNA300878", + "study_title": "Homo sapiens B and T cell repertoire - MZ twins", + "study_type": { + "id": null, + "label": null + }, + "study_description": "The adaptive immune system's capability to protect the body requires a highly diverse lymphocyte antigen receptor repertoire. However, the influence of individual genetic and epigenetic differences on these repertoires is not typically measured. By leveraging the unique characteristics of B, CD4+ T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified the impact of heritable factors on both the V(D)J recombination process and thymic selection in the case of T cell receptors, and show that the repertoires of both naive and antigen experienced cells are subject to biases resulting from differences in recombination. We show that biases in V(D)J usage, as well as biased N/P additions, contribute to significant variation in the CDR3 region. Moreover, we show that the relative usage of V and J gene segments is chromosomally biased, with approximately 1.5 times as many rearrangements originating from a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level.", + "inclusion_exclusion_criteria": null, + "contributors": [ + { + "contributor_id": "1", + "name": "Florian Rubelt", + "orcid_id": { + "id": null, + "label": null + }, + "affiliation": { + "id": "ROR:00f54p054", + "label": "Stanford University" + }, + "affiliation_department": "Department of Microbiology and Immunology, Stanford University School of Medicine", + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + }, + { + "contributor_id": "2", + "name": "Mark M. Davis", + "orcid_id": { + "id": "ORCID:0000-0001-6868-657X", + "label": "Mark Davis" + }, + "affiliation": { + "id": "ROR:00f54p054", + "label": "Stanford University" + }, + "affiliation_department": "Department of Microbiology and Immunology, Stanford University School of Medicine", + "contributions": [ + { + "role": "supervision", + "degree": null + } + ] + } + ], + "pub_ids": ["PMID:27005435"], + "grants": null, + "keywords_study": [ + "contains_ig", + "contains_tr" + ] + }, + "subject": { + "subject_id": "TW01A", + "synthetic": false, + "species": { + "id": "NCBITAXON:9606", + "label": "Homo sapiens" + }, + "sex": "female", + "age_min": 27, + "age_max": 27, + "age_unit": { + "id": "UO:0000036", + "label": "year" + }, + "age_event": null, + "ancestry_population": { + "id": null, + "label": null + }, + "location_birth": { + "id": null, + "label": null + }, + "ethnicity": null, + "race": null, + "strain_name": null, + "linked_subjects": "TW01B", + "link_type": "twin", + "diagnosis": [ + { + "study_group_description": null, + "disease_diagnosis": { + "id": null, + "label": null + }, + "disease_length": null, + "disease_stage": null, + "prior_therapies": null, + "immunogen": null, + "intervention": null, + "medical_history": null + } + ], + "genotype": { + "receptor_genotype_set": { + "receptor_genotype_set_id": "1", + "genotype_class_list": [ + { + "receptor_genotype_id": "1", + "locus": "IGH", + "documented_alleles": [ + { + "label": "IGHV1-69*01", + "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", + "phasing": 1 + }, + { + "label": "IGHV1-69*02", + "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", + "phasing": 2 + } + ], + "undocumented_alleles": [ + { + "allele_name": "IGHD3-1*01_S1234", + "sequence": "agtagtagtagt", + "phasing": 1 + } + ], + "deleted_genes": [ + { + "label": "IGHV3-30-3", + "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", + "phasing": 1 + } + ], + "inference_process": "repertoire_sequencing" + } + ] + }, + "mhc_genotype_set": { + "mhc_genotype_set_id": "01847298-d0c2-11ee-bc66", + "mhc_genotype_list": [ + { + "mhc_genotype_id": "00be1c2e-d0c2-11ee-bfe7", + "mhc_class": "MHC-I", + "mhc_genotyping_method": "pcr_low_resolution", + "mhc_alleles": [ + { + "allele_designation": "01:01", + "gene": { + "id": "MRO-0000046", + "label": "HLA-A" + }, + "reference_set_ref": null + } + ] + } + ] + } + } + }, + "sample": [ + { + "sample_id": "TW01A_B_naive", + "sample_processing_id": null, + "sample_type": "peripheral venous puncture", + "tissue": { + "id": "UBERON:0000178", + "label": "blood" + }, + "tissue_processing": "Ficoll gradient", + "cell_subset": { + "id": "CL:0000788", + "label": "naive B cell" + }, + "cell_phenotype": "expression of CD20 and the absence of CD27", + "cell_species": { + "id": "NCBITAXON:9606", + "label": "Homo sapiens" + }, + "single_cell": false, + "cell_isolation": "FACS", + "template_class": "RNA", + "pcr_target": [ + { + "pcr_target_locus": "IGH", + "forward_pcr_primer_target_location": null, + "reverse_pcr_primer_target_location": null + } + ], + "sequencing_platform": "Illumina MiSeq", + "sequencing_files": { + "sequencing_data_id": "SRR2905656", + "file_type": "fastq", + "filename": "SRR2905656_R1.fastq.gz", + "read_direction": "forward", + "read_length": 300, + "paired_filename": "SRR2905656_R2.fastq.gz", + "paired_read_direction": "reverse", + "paired_read_length": 300 + }, + "anatomic_site": null, + "disease_state_sample": null, + "collection_time_point_relative": null, + "collection_time_point_relative_unit": { + "id": null, + "label": null + }, + "collection_time_point_reference": null, + "collection_location": { + "id": null, + "label": null + }, + "biomaterial_provider": null, + "cell_number": null, + "cells_per_reaction": null, + "cell_storage": false, + "cell_quality": null, + "cell_processing_protocol": null, + "template_quality": null, + "template_amount": null, + "template_amount_unit": { + "id": null, + "label": null + }, + "library_generation_method": "RT(oligo-dT)+PCR", + "library_generation_protocol": null, + "library_generation_kit_version": null, + "complete_sequences": "partial", + "physical_linkage": "none", + "sequencing_run_id": null, + "total_reads_passing_qc_filter": null, + "sequencing_facility": null, + "sequencing_run_date": null, + "sequencing_kit": null + } + ], + "data_processing": [ + { + "data_processing_id": "3059369183532618216-242ac11b-0001-007", + "primary_annotation": true, + "software_versions": null, + "paired_reads_assembly": null, + "quality_thresholds": null, + "primer_match_cutoffs": null, + "collapsing_method": null, + "data_processing_protocols": null, + "data_processing_files": null, + "germline_database": null, + "analysis_provenance_id": "6623294219256599016-242ac11c-0001-012" + } + ] + }, + { + "repertoire_id": "1602908186092376551-242ac11c-0001-012", + "study": { + "study_id": "PRJNA300878", + "study_title": "Homo sapiens B and T cell repertoire - MZ twins", + "study_type": { + "id": null, + "label": null + }, + "study_description": "The adaptive immune system's capability to protect the body requires a highly diverse lymphocyte antigen receptor repertoire. However, the influence of individual genetic and epigenetic differences on these repertoires is not typically measured. By leveraging the unique characteristics of B, CD4+ T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified the impact of heritable factors on both the V(D)J recombination process and thymic selection in the case of T cell receptors, and show that the repertoires of both naive and antigen experienced cells are subject to biases resulting from differences in recombination. We show that biases in V(D)J usage, as well as biased N/P additions, contribute to significant variation in the CDR3 region. Moreover, we show that the relative usage of V and J gene segments is chromosomally biased, with approximately 1.5 times as many rearrangements originating from a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level.", + "inclusion_exclusion_criteria": null, + "contributors": [ + { + "contributor_id": "1", + "name": "Florian Rubelt", + "orcid_id": { + "id": null, + "label": null + }, + "affiliation": { + "id": "ROR:00f54p054", + "label": "Stanford University" + }, + "affiliation_department": "Department of Microbiology and Immunology, Stanford University School of Medicine", + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + }, + { + "contributor_id": "2", + "name": "Mark M. Davis", + "orcid_id": { + "id": "ORCID:0000-0001-6868-657X", + "label": "Mark Davis" + }, + "affiliation": { + "id": "ROR:00f54p054", + "label": "Stanford University" + }, + "affiliation_department": "Department of Microbiology and Immunology, Stanford University School of Medicine", + "contributions": [ + { + "role": "supervision", + "degree": null + } + ] + } + ], + "pub_ids": ["PMID:27005435"], + "grants": null, + "keywords_study": [ + "contains_ig", + "contains_tr" + ] + }, + "subject": { + "subject_id": "TW01A", + "synthetic": false, + "species": { + "id": "NCBITAXON:9606", + "label": "Homo sapiens" + }, + "sex": "female", + "age_min": 27, + "age_max": 27, + "age_unit": { + "id": "UO:0000036", + "label": "year" + }, + "age_event": null, + "ancestry_population": { + "id": null, + "label": null + }, + "location_birth": { + "id": null, + "label": null + }, + "ethnicity": null, + "race": null, + "strain_name": null, + "linked_subjects": "TW01B", + "link_type": "twin", + "diagnosis": [ + { + "study_group_description": null, + "disease_diagnosis": { + "id": null, + "label": null + }, + "disease_length": null, + "disease_stage": null, + "prior_therapies": null, + "immunogen": null, + "intervention": null, + "medical_history": null + } + ] + }, + "sample": [ + { + "sample_id": "TW01A_B_memory", + "sample_processing_id": null, + "sample_type": "peripheral venous puncture", + "tissue": { + "id": "UBERON:0000178", + "label": "blood" + }, + "tissue_processing": "Ficoll gradient", + "cell_subset": { + "id": "CL:0000787", + "label": "memory B cell" + }, + "cell_phenotype": "expression of CD20 and CD27", + "cell_species": { + "id": "NCBITAXON:9606", + "label": "Homo sapiens" + }, + "single_cell": false, + "cell_isolation": "FACS", + "template_class": "RNA", + "pcr_target": [ + { + "pcr_target_locus": "IGH", + "forward_pcr_primer_target_location": null, + "reverse_pcr_primer_target_location": null + } + ], + "sequencing_platform": "Illumina MiSeq", + "sequencing_files": { + "sequencing_data_id": "SRR2905655", + "file_type": "fastq", + "filename": "SRR2905655_R1.fastq.gz", + "read_direction": "forward", + "read_length": 300, + "paired_filename": "SRR2905655_R2.fastq.gz", + "paired_read_direction": "reverse", + "paired_read_length": 300 + }, + "anatomic_site": null, + "disease_state_sample": null, + "collection_time_point_relative": null, + "collection_time_point_relative_unit": { + "id": null, + "label": null + }, + "collection_time_point_reference": null, + "collection_location": { + "id": null, + "label": null + }, + "biomaterial_provider": null, + "cell_number": null, + "cells_per_reaction": null, + "cell_storage": false, + "cell_quality": null, + "cell_processing_protocol": null, + "template_quality": null, + "template_amount": null, + "template_amount_unit": { + "id": null, + "label": null + }, + "library_generation_method": "RT(oligo-dT)+PCR", + "library_generation_protocol": null, + "library_generation_kit_version": null, + "complete_sequences": "partial", + "physical_linkage": "none", + "sequencing_run_id": null, + "total_reads_passing_qc_filter": null, + "sequencing_facility": null, + "sequencing_run_date": null, + "sequencing_kit": null + } + ], + "data_processing": [ + { + "data_processing_id": "3059369183532618216-242ac11b-0001-007", + "primary_annotation": true, + "software_versions": null, + "paired_reads_assembly": null, + "quality_thresholds": null, + "primer_match_cutoffs": null, + "collapsing_method": null, + "data_processing_protocols": null, + "data_processing_files": null, + "germline_database": null, + "analysis_provenance_id": "6623294219256599016-242ac11c-0001-012" + } + ] + }, + { + "repertoire_id": "2366080924918616551-242ac11c-0001-012", + "study": { + "study_id": "PRJNA300878", + "study_title": "Homo sapiens B and T cell repertoire - MZ twins", + "study_type": { + "id": null, + "label": null + }, + "study_description": "The adaptive immune system's capability to protect the body requires a highly diverse lymphocyte antigen receptor repertoire. However, the influence of individual genetic and epigenetic differences on these repertoires is not typically measured. By leveraging the unique characteristics of B, CD4+ T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified the impact of heritable factors on both the V(D)J recombination process and thymic selection in the case of T cell receptors, and show that the repertoires of both naive and antigen experienced cells are subject to biases resulting from differences in recombination. We show that biases in V(D)J usage, as well as biased N/P additions, contribute to significant variation in the CDR3 region. Moreover, we show that the relative usage of V and J gene segments is chromosomally biased, with approximately 1.5 times as many rearrangements originating from a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level.", + "inclusion_exclusion_criteria": null, + "contributors": [ + { + "contributor_id": "1", + "name": "Florian Rubelt", + "orcid_id": { + "id": null, + "label": null + }, + "affiliation": { + "id": "ROR:00f54p054", + "label": "Stanford University" + }, + "affiliation_department": "Department of Microbiology and Immunology, Stanford University School of Medicine", + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + }, + { + "contributor_id": "2", + "name": "Mark M. Davis", + "orcid_id": { + "id": "ORCID:0000-0001-6868-657X", + "label": "Mark Davis" + }, + "affiliation": { + "id": "ROR:00f54p054", + "label": "Stanford University" + }, + "affiliation_department": "Department of Microbiology and Immunology, Stanford University School of Medicine", + "contributions": [ + { + "role": "supervision", + "degree": null + } + ] + } + ], + "pub_ids": ["PMID:27005435"], + "grants": null, + "keywords_study": [ + "contains_ig", + "contains_tr" + ] + }, + "subject": { + "subject_id": "TW01A", + "synthetic": false, + "species": { + "id": "NCBITAXON:9606", + "label": "Homo sapiens" + }, + "sex": "female", + "age_min": 27, + "age_max": 27, + "age_unit": { + "id": "UO:0000036", + "label": "year" + }, + "age_event": null, + "ancestry_population": { + "id": null, + "label": null + }, + "location_birth": { + "id": null, + "label": null + }, + "ethnicity": null, + "race": null, + "strain_name": null, + "linked_subjects": "TW01B", + "link_type": "twin", + "diagnosis": [ + { + "study_group_description": null, + "disease_diagnosis": { + "id": null, + "label": null + }, + "disease_length": null, + "disease_stage": null, + "prior_therapies": null, + "immunogen": null, + "intervention": null, + "medical_history": null + } + ] + }, + "sample": [ + { + "sample_id": "TW01A_T_naive_CD4", + "sample_processing_id": null, + "sample_type": "peripheral venous puncture", + "tissue": { + "id": "UBERON:0000178", + "label": "blood" + }, + "tissue_processing": "Ficoll gradient", + "cell_subset": { + "id": "CL:0000895", + "label": "naive thymus-derived CD4-positive, alpha-beta T cell" + }, + "cell_phenotype": "expression of CD8 and absence of CD4 and CD45RO", + "cell_species": { + "id": "NCBITAXON:9606", + "label": "Homo sapiens" + }, + "single_cell": false, + "cell_isolation": "FACS", + "template_class": "RNA", + "pcr_target": [ + { + "pcr_target_locus": "TRB", + "forward_pcr_primer_target_location": null, + "reverse_pcr_primer_target_location": null + } + ], + "sequencing_platform": "Illumina MiSeq", + "sequencing_files": { + "sequencing_data_id": "SRR2905659", + "file_type": "fastq", + "filename": "SRR2905659_R1.fastq.gz", + "read_direction": "forward", + "read_length": 300, + "paired_filename": "SRR2905659_R2.fastq.gz", + "paired_read_direction": "reverse", + "paired_read_length": 300 + }, + "anatomic_site": null, + "disease_state_sample": null, + "collection_time_point_relative": null, + "collection_time_point_relative_unit": { + "id": null, + "label": null + }, + "collection_time_point_reference": null, + "collection_location": { + "id": null, + "label": null + }, + "biomaterial_provider": null, + "cell_number": null, + "cells_per_reaction": null, + "cell_storage": false, + "cell_quality": null, + "cell_processing_protocol": null, + "template_quality": null, + "template_amount": null, + "template_amount_unit": { + "id": null, + "label": null + }, + "library_generation_method": "RT(oligo-dT)+PCR", + "library_generation_protocol": null, + "library_generation_kit_version": null, + "complete_sequences": "partial", + "physical_linkage": "none", + "sequencing_run_id": null, + "total_reads_passing_qc_filter": null, + "sequencing_facility": null, + "sequencing_run_date": null, + "sequencing_kit": null + } + ], + "data_processing": [ + { + "data_processing_id": "651223970338378216-242ac11b-0001-007", + "primary_annotation": true, + "software_versions": null, + "paired_reads_assembly": null, + "quality_thresholds": null, + "primer_match_cutoffs": null, + "collapsing_method": null, + "data_processing_protocols": null, + "data_processing_files": null, + "germline_database": null, + "analysis_provenance_id": "4625424004665971176-242ac11c-0001-012" + } + ] + } + ], + + + "GermlineSet": [{ + "germline_set_id": "OGRDB:G00007", + "acknowledgements": [ + { + "contributor_id": "3", + "name": "William Lees", + "orcid_id": { + "id": "ORCID:0000-0001-9834-6840", + "label": "William Lees" + }, + "affiliation": { + "id": "ROR:02mb95055", + "label": "Birkbeck, University of London" + }, + "affiliation_department": null, + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + } + ], + "release_version": 1, + "release_description": "", + "release_date": "2021-11-24", + "germline_set_name": "CAST IGH", + "germline_set_ref": "OGRDB:G00007.1", + "pub_ids": [""], + "species": { + "id": "NCBITAXON:10090", + "label": "Mus musculus" + }, + "species_subgroup": "CAST_EiJ", + "species_subgroup_type": "strain", + "locus": "IGH", + "allele_descriptions": [ + { + "allele_description_id": "OGRDB:A00301", + "allele_description_ref": "OGRDB:Mouse_IGH:IGHV-2DBF", + "acknowledgements": [ + { + "contributor_id": "3", + "name": "William Lees", + "orcid_id": { + "id": "ORCID:0000-0001-9834-6840", + "label": "William Lees" + }, + "affiliation": { + "id": "ROR:02mb95055", + "label": "Birkbeck, University of London" + }, + "affiliation_department": null, + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + } + ], + "release_version": 1, + "release_date": "2021-11-24", + "release_description": "First release", + "label": "IGHV-2DBF", + "sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGATGGTAGTGGCACCTACTATCTGGACTCCTTGAAGAGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", + "coding_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGATGGTAGTGGCACCTACTATCTGGACTCCTTGAAGAGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", + "aliases": [ + "watson_et_al:CAST_EiJ_IGHV5-3" + ], + "locus": "IGH", + "chromosome": null, + "sequence_type": "V", + "functional": true, + "inference_type": "rearranged_only", + "species": { + "id": "NCBITAXON:10090", + "label": "Mus musculus" + }, + "species_subgroup": "CAST_EiJ", + "species_subgroup_type": "strain", + "status": "active", + "gene_designation": null, + "subgroup_designation": null, + "allele_designation": null, + "gene_start": null, + "gene_end": null, + "utr_5_prime_start": null, + "utr_5_prime_end": null, + "leader_1_start": null, + "leader_1_end": null, + "leader_2_start": null, + "leader_2_end": null, + "v_rs_start": null, + "v_rs_end": null, + "v_gene_delineations": [ + { + "sequence_delineation_id": "1", + "delineation_scheme": "IMGT", + "unaligned_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGATGGTAGTGGCACCTACTATCTGGACTCCTTGAAGAGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", + "aligned_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGA...GGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTC............AGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", + "fwr1_start": 1, + "fwr1_end": 75, + "cdr1_start": 76, + "cdr1_end": 110, + "fwr2_start": 111, + "fwr2_end": 150, + "cdr2_start": 151, + "cdr2_end": 160, + "fwr3_start": 161, + "fwr3_end": 294, + "cdr3_start": 295, + "alignment_labels": [ + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104" + ] + } + ], + "unrearranged_support": [], + "rearranged_support": [], + "paralogs": [], + "curation": "Imported to OGRDB with the following notes: watson_et_al: CAST_EiJ_IGHV5-3", + "curational_tags": null + }, + { + "allele_description_id": "OGRDB:A00314", + "allele_description_ref": "OGRDB:Mouse_IGH:IGHV-2ETO", + "acknowledgements": [ + { + "contributor_id": "3", + "name": "William Lees", + "orcid_id": { + "id": "ORCID:0000-0001-9834-6840", + "label": "William Lees" + }, + "affiliation": { + "id": "ROR:02mb95055", + "label": "Birkbeck, University of London" + }, + "affiliation_department": null, + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + } + ], + "release_version": 1, + "release_date": "2021-11-24", + "release_description": "First release", + "label": "IGHV-2ETO", + "sequence": "CAAGTTACTCTAAAAGAGTCTGGCCCTGGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGATGATGATAAGTACTATAACCCATCCCTGAAGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC", + "coding_sequence": "CAAGTTACTCTAAAAGAGTCTGGCCCTGGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGATGATGATAAGTACTATAACCCATCCCTGAAGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC", + "aliases": [ + "watson_et_al:CAST_EiJ_IGHV8-2" + ], + "locus": "IGH", + "chromosome": null, + "sequence_type": "V", + "functional": true, + "inference_type": "rearranged_only", + "species": { + "id": "NCBITAXON:10090", + "label": "Mus musculus" + }, + "species_subgroup": "CAST_EiJ", + "species_subgroup_type": "strain", + "status": "active", + "gene_designation": null, + "subgroup_designation": null, + "allele_designation": null, + "gene_start": null, + "gene_end": null, + "utr_5_prime_start": null, + "utr_5_prime_end": null, + "leader_1_start": null, + "leader_1_end": null, + "leader_2_start": null, + "leader_2_end": null, + "v_rs_start": null, + "v_rs_end": null, + "v_gene_delineations": [ + { + "sequence_delineation_id": "1", + "delineation_scheme": "IMGT", + "unaligned_sequence": "CAAGTTACTCTAAAAGAGTCTGGCCCTGGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGATGATGATAAGTACTATAACCCATCCCTGAAGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC", + "aligned_sequence": "CAAGTTACTCTAAAAGAGTCTGGCCCTG...GGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGA............GCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGT......GGGATGATGATAAGTACTATAACCCATCCCTGA...AGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC", + "fwr1_start": 1, + "fwr1_end": 75, + "cdr1_start": 76, + "cdr1_end": 110, + "fwr2_start": 111, + "fwr2_end": 150, + "cdr2_start": 151, + "cdr2_end": 160, + "fwr3_start": 161, + "fwr3_end": 294, + "cdr3_start": 295, + "alignment_labels": [ + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104" + ] + } + ], + "unrearranged_support": [], + "rearranged_support": [], + "paralogs": [], + "curation": "Imported to OGRDB with the following notes: watson_et_al: CAST_EiJ_IGHV8-2", + "curational_tags": null + } + ], + "curation": null + }], + "GenotypeSet": [{ + "receptor_genotype_set_id": "1", + "genotype_class_list": [ + { + "receptor_genotype_id": "1", + "locus": "IGH", + "documented_alleles": [ + { + "label": "IGHV1-69*01", + "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", + "phasing": 1 + }, + { + "label": "IGHV1-69*02", + "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", + "phasing": 2 + } + ], + "undocumented_alleles": [ + { + "allele_name": "IGHD3-1*01_S1234", + "sequence": "agtagtagtagt", + "phasing": 1 + } + ], + "deleted_genes": [ + { + "label": "IGHV3-30-3", + "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", + "phasing": 1 + } + ], + "inference_process": "repertoire_sequencing" + } + ] + }] +} diff --git a/tests/data/good_combined_airr.yaml b/tests/data/good_combined_airr.yaml new file mode 100644 index 000000000..2c9ab547c --- /dev/null +++ b/tests/data/good_combined_airr.yaml @@ -0,0 +1,934 @@ +Repertoire: + - repertoire_id: 1841923116114776551-242ac11c-0001-012 + study: + study_id: PRJNA300878 + study_title: Homo sapiens B and T cell repertoire - MZ twins + study_type: + id: + label: + study_description: The adaptive immune system's capability to protect the body + requires a highly diverse lymphocyte antigen receptor repertoire. However, the + influence of individual genetic and epigenetic differences on these repertoires + is not typically measured. By leveraging the unique characteristics of B, CD4+ + T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified + the impact of heritable factors on both the V(D)J recombination process and + thymic selection in the case of T cell receptors, and show that the repertoires + of both naive and antigen experienced cells are subject to biases resulting + from differences in recombination. We show that biases in V(D)J usage, as well + as biased N/P additions, contribute to significant variation in the CDR3 region. + Moreover, we show that the relative usage of V and J gene segments is chromosomally + biased, with approximately 1.5 times as many rearrangements originating from + a single chromosome. These data refine our understanding of the heritable mechanisms + affecting the repertoire, and show that biases are evident on a chromosome-wide + level. + inclusion_exclusion_criteria: + contributors: + - contributor_id: "1" + name: "Florian Rubelt" + orcid_id: + id: + label: + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "investigation" + degree: + - role: "data curation" + degree: + - contributor_id: "2" + name: "Mark M. Davis" + orcid_id: + id: "ORCID:0000-0001-6868-657X" + label: "Mark Davis" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "supervision" + degree: + pub_ids: ["PMID:27005435"] + grants: + keywords_study: + - contains_ig + - contains_tr + subject: + subject_id: TW01A + synthetic: false + species: + id: NCBITAXON:9606 + label: Homo sapiens + sex: female + age_min: 27 + age_max: 27 + age_unit: + id: UO:0000036 + label: year + age_event: + ancestry_population: + id: + label: + location_birth: + id: + label: + ethnicity: + race: + strain_name: + linked_subjects: TW01B + link_type: twin + diagnosis: + - study_group_description: + disease_diagnosis: + id: + label: + disease_length: + disease_stage: + prior_therapies: + immunogen: + intervention: + medical_history: + genotype: + receptor_genotype_set: + receptor_genotype_set_id: "1" + genotype_class_list: + - receptor_genotype_id: "1" + locus: IGH + documented_alleles: + - label: IGHV1-69*01 + germline_set_ref: IMGT:Homo sapiens:2022.1.31 + phasing: 1 + - label: IGHV1-69*02 + germline_set_ref: IMGT:Homo sapiens:2022.1.31 + phasing: 2 + undocumented_alleles: + - allele_name: IGHD3-1*01_S1234 + sequence: agtagtagtagt + phasing: 1 + deleted_genes: + - label: IGHV3-30-3 + germline_set_ref: IMGT:Homo sapiens:2022.1.31 + phasing: 1 + inference_process: repertoire_sequencing + mhc_genotype_set: + mhc_genotype_set_id: 01847298-d0c2-11ee-bc66 + mhc_genotype_list: + - mhc_genotype_id: 00be1c2e-d0c2-11ee-bfe7 + mhc_class: MHC-I + mhc_genotyping_method: pcr_low_resolution + mhc_alleles: + - allele_designation: "01:01" + gene: + id: MRO-0000046 + label: HLA-A + reference_set_ref: + sample: + - sample_id: TW01A_B_naive + sample_processing_id: + sample_type: peripheral venous puncture + tissue: + id: UBERON:0000178 + label: blood + tissue_processing: Ficoll gradient + cell_subset: + id: CL:0000788 + label: naive B cell + cell_phenotype: expression of CD20 and the absence of CD27 + cell_species: + id: NCBITAXON:9606 + label: Homo sapiens + single_cell: false + cell_isolation: FACS + template_class: RNA + pcr_target: + - pcr_target_locus: IGH + forward_pcr_primer_target_location: + reverse_pcr_primer_target_location: + sequencing_platform: Illumina MiSeq + sequencing_files: + sequencing_data_id: SRR2905656 + file_type: fastq + filename: SRR2905656_R1.fastq.gz + read_direction: forward + read_length: 300 + paired_filename: SRR2905656_R2.fastq.gz + paired_read_direction: reverse + paired_read_length: 300 + anatomic_site: + disease_state_sample: + collection_time_point_relative: + collection_time_point_relative_unit: + id: + label: + collection_time_point_reference: + collection_location: + id: + label: + biomaterial_provider: + cell_number: + cells_per_reaction: + cell_storage: false + cell_quality: + cell_processing_protocol: + template_quality: + template_amount: + template_amount_unit: + id: + label: + library_generation_method: RT(oligo-dT)+PCR + library_generation_protocol: + library_generation_kit_version: + complete_sequences: partial + physical_linkage: none + sequencing_run_id: + total_reads_passing_qc_filter: + sequencing_facility: + sequencing_run_date: + sequencing_kit: + data_processing: + - data_processing_id: 3059369183532618216-242ac11b-0001-007 + primary_annotation: true + software_versions: + paired_reads_assembly: + quality_thresholds: + primer_match_cutoffs: + collapsing_method: + data_processing_protocols: + data_processing_files: + germline_database: + analysis_provenance_id: 6623294219256599016-242ac11c-0001-012 + - repertoire_id: 1602908186092376551-242ac11c-0001-012 + study: + study_id: PRJNA300878 + study_title: Homo sapiens B and T cell repertoire - MZ twins + study_type: + id: + label: + study_description: The adaptive immune system's capability to protect the body + requires a highly diverse lymphocyte antigen receptor repertoire. However, the + influence of individual genetic and epigenetic differences on these repertoires + is not typically measured. By leveraging the unique characteristics of B, CD4+ + T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified + the impact of heritable factors on both the V(D)J recombination process and + thymic selection in the case of T cell receptors, and show that the repertoires + of both naive and antigen experienced cells are subject to biases resulting + from differences in recombination. We show that biases in V(D)J usage, as well + as biased N/P additions, contribute to significant variation in the CDR3 region. + Moreover, we show that the relative usage of V and J gene segments is chromosomally + biased, with approximately 1.5 times as many rearrangements originating from + a single chromosome. These data refine our understanding of the heritable mechanisms + affecting the repertoire, and show that biases are evident on a chromosome-wide + level. + inclusion_exclusion_criteria: + contributors: + - contributor_id: "1" + name: "Florian Rubelt" + orcid_id: + id: + label: + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "investigation" + degree: + - role: "data curation" + degree: + - contributor_id: "2" + name: "Mark M. Davis" + orcid_id: + id: "ORCID:0000-0001-6868-657X" + label: "Mark Davis" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "supervision" + degree: + pub_ids: ["PMID:27005435"] + grants: + keywords_study: + - contains_ig + - contains_tr + subject: + subject_id: TW01A + synthetic: false + species: + id: NCBITAXON:9606 + label: Homo sapiens + sex: female + age_min: 27 + age_max: 27 + age_unit: + id: UO:0000036 + label: year + age_event: + ancestry_population: + id: + label: + location_birth: + id: + label: + ethnicity: + race: + strain_name: + linked_subjects: TW01B + link_type: twin + diagnosis: + - study_group_description: + disease_diagnosis: + id: + label: + disease_length: + disease_stage: + prior_therapies: + immunogen: + intervention: + medical_history: + sample: + - sample_id: TW01A_B_memory + sample_processing_id: + sample_type: peripheral venous puncture + tissue: + id: UBERON:0000178 + label: blood + tissue_processing: Ficoll gradient + cell_subset: + id: CL:0000787 + label: memory B cell + cell_phenotype: expression of CD20 and CD27 + cell_species: + id: NCBITAXON:9606 + label: Homo sapiens + single_cell: false + cell_isolation: FACS + template_class: RNA + pcr_target: + - pcr_target_locus: IGH + forward_pcr_primer_target_location: + reverse_pcr_primer_target_location: + sequencing_platform: Illumina MiSeq + sequencing_files: + sequencing_data_id: SRR2905655 + file_type: fastq + filename: SRR2905655_R1.fastq.gz + read_direction: forward + read_length: 300 + paired_filename: SRR2905655_R2.fastq.gz + paired_read_direction: reverse + paired_read_length: 300 + anatomic_site: + disease_state_sample: + collection_time_point_relative: + collection_time_point_relative_unit: + id: + label: + collection_time_point_reference: + collection_location: + id: + label: + biomaterial_provider: + cell_number: + cells_per_reaction: + cell_storage: false + cell_quality: + cell_processing_protocol: + template_quality: + template_amount: + template_amount_unit: + id: + label: + library_generation_method: RT(oligo-dT)+PCR + library_generation_protocol: + library_generation_kit_version: + complete_sequences: partial + physical_linkage: none + sequencing_run_id: + total_reads_passing_qc_filter: + sequencing_facility: + sequencing_run_date: + sequencing_kit: + data_processing: + - data_processing_id: 3059369183532618216-242ac11b-0001-007 + primary_annotation: true + software_versions: + paired_reads_assembly: + quality_thresholds: + primer_match_cutoffs: + collapsing_method: + data_processing_protocols: + data_processing_files: + germline_database: + analysis_provenance_id: 6623294219256599016-242ac11c-0001-012 + - repertoire_id: 2366080924918616551-242ac11c-0001-012 + study: + study_id: PRJNA300878 + study_title: Homo sapiens B and T cell repertoire - MZ twins + study_type: + id: + label: + study_description: The adaptive immune system's capability to protect the body + requires a highly diverse lymphocyte antigen receptor repertoire. However, the + influence of individual genetic and epigenetic differences on these repertoires + is not typically measured. By leveraging the unique characteristics of B, CD4+ + T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified + the impact of heritable factors on both the V(D)J recombination process and + thymic selection in the case of T cell receptors, and show that the repertoires + of both naive and antigen experienced cells are subject to biases resulting + from differences in recombination. We show that biases in V(D)J usage, as well + as biased N/P additions, contribute to significant variation in the CDR3 region. + Moreover, we show that the relative usage of V and J gene segments is chromosomally + biased, with approximately 1.5 times as many rearrangements originating from + a single chromosome. These data refine our understanding of the heritable mechanisms + affecting the repertoire, and show that biases are evident on a chromosome-wide + level. + inclusion_exclusion_criteria: + contributors: + - contributor_id: "1" + name: "Florian Rubelt" + orcid_id: + id: + label: + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "investigation" + degree: + - role: "data curation" + degree: + - contributor_id: "2" + name: "Mark M. Davis" + orcid_id: + id: "ORCID:0000-0001-6868-657X" + label: "Mark Davis" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "supervision" + degree: + pub_ids: ["PMID:27005435"] + grants: + keywords_study: + - contains_ig + - contains_tr + subject: + subject_id: TW01A + synthetic: false + species: + id: NCBITAXON:9606 + label: Homo sapiens + sex: female + age_min: 27 + age_max: 27 + age_unit: + id: UO:0000036 + label: year + age_event: + ancestry_population: + id: + label: + location_birth: + id: + label: + ethnicity: + race: + strain_name: + linked_subjects: TW01B + link_type: twin + diagnosis: + - study_group_description: + disease_diagnosis: + id: + label: + disease_length: + disease_stage: + prior_therapies: + immunogen: + intervention: + medical_history: + sample: + - sample_id: TW01A_T_naive_CD4 + sample_processing_id: + sample_type: peripheral venous puncture + tissue: + id: UBERON:0000178 + label: blood + tissue_processing: Ficoll gradient + cell_subset: + id: CL:0000895 + label: naive thymus-derived CD4-positive, alpha-beta T cell + cell_phenotype: expression of CD8 and absence of CD4 and CD45RO + cell_species: + id: NCBITAXON:9606 + label: Homo sapiens + single_cell: false + cell_isolation: FACS + template_class: RNA + pcr_target: + - pcr_target_locus: TRB + forward_pcr_primer_target_location: + reverse_pcr_primer_target_location: + sequencing_platform: Illumina MiSeq + sequencing_files: + sequencing_data_id: SRR2905659 + file_type: fastq + filename: SRR2905659_R1.fastq.gz + read_direction: forward + read_length: 300 + paired_filename: SRR2905659_R2.fastq.gz + paired_read_direction: reverse + paired_read_length: 300 + anatomic_site: + disease_state_sample: + collection_time_point_relative: + collection_time_point_relative_unit: + id: + label: + collection_time_point_reference: + collection_location: + id: + label: + biomaterial_provider: + cell_number: + cells_per_reaction: + cell_storage: false + cell_quality: + cell_processing_protocol: + template_quality: + template_amount: + template_amount_unit: + id: + label: + library_generation_method: RT(oligo-dT)+PCR + library_generation_protocol: + library_generation_kit_version: + complete_sequences: partial + physical_linkage: none + sequencing_run_id: + total_reads_passing_qc_filter: + sequencing_facility: + sequencing_run_date: + sequencing_kit: + data_processing: + - data_processing_id: 651223970338378216-242ac11b-0001-007 + primary_annotation: true + software_versions: + paired_reads_assembly: + quality_thresholds: + primer_match_cutoffs: + collapsing_method: + data_processing_protocols: + data_processing_files: + germline_database: + analysis_provenance_id: 4625424004665971176-242ac11c-0001-012 + +GermlineSet: + - germline_set_id: OGRDB:G00007 + acknowledgements: + - contributor_id: "3" + name: William Lees + orcid_id: + id: ORCID:0000-0001-9834-6840 + label: William Lees + affiliation: + id: ROR:02mb95055 + label: Birkbeck, University of London + affiliation_department: + contributions: + - role: investigation + degree: null + - role: data curation + degree: null + release_version: 1 + release_description: "" + release_date: "2021-11-24" + germline_set_name: CAST IGH + germline_set_ref: OGRDB:G00007.1 + pub_ids: [""] + species: + id: NCBITAXON:10090 + label: Mus musculus + species_subgroup: CAST_EiJ + species_subgroup_type: strain + locus: IGH + allele_descriptions: + - allele_description_id: OGRDB:A00301 + allele_description_ref: OGRDB:Mouse_IGH:IGHV-2DBF + acknowledgements: + - contributor_id: "3" + name: William Lees + orcid_id: + id: ORCID:0000-0001-9834-6840 + label: William Lees + affiliation: + id: ROR:02mb95055 + label: Birkbeck, University of London + affiliation_department: + contributions: + - role: investigation + degree: + - role: data curation + degree: + release_version: 1 + release_date: "2021-11-24" + release_description: First release + label: IGHV-2DBF + sequence: GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGATGGTAGTGGCACCTACTATCTGGACTCCTTGAAGAGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA + coding_sequence: GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGATGGTAGTGGCACCTACTATCTGGACTCCTTGAAGAGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA + aliases: + - watson_et_al:CAST_EiJ_IGHV5-3 + locus: IGH + chromosome: + sequence_type: V + functional: true + inference_type: rearranged_only + species: + id: NCBITAXON:10090 + label: Mus musculus + species_subgroup: CAST_EiJ + species_subgroup_type: strain + status: active + gene_designation: + subgroup_designation: + allele_designation: + gene_start: + gene_end: + utr_5_prime_start: + utr_5_prime_end: + leader_1_start: + leader_1_end: + leader_2_start: + leader_2_end: + v_rs_start: + v_rs_end: + v_gene_delineations: + - sequence_delineation_id: '1' + delineation_scheme: IMGT + unaligned_sequence: GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGATGGTAGTGGCACCTACTATCTGGACTCCTTGAAGAGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA + aligned_sequence: GAAGTGAAGCTGGTGGAGTCTGAGGGA...GGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTC............AGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA + fwr1_start: 1 + fwr1_end: 75 + cdr1_start: 76 + cdr1_end: 110 + fwr2_start: 111 + fwr2_end: 150 + cdr2_start: 151 + cdr2_end: 160 + fwr3_start: 161 + fwr3_end: 294 + cdr3_start: 295 + alignment_labels: + - '1' + - '2' + - '3' + - '4' + - '5' + - '6' + - '7' + - '8' + - '9' + - '10' + - '11' + - '12' + - '13' + - '14' + - '15' + - '16' + - '17' + - '18' + - '19' + - '20' + - '21' + - '22' + - '23' + - '24' + - '25' + - '26' + - '27' + - '28' + - '29' + - '30' + - '31' + - '32' + - '33' + - '34' + - '35' + - '36' + - '37' + - '38' + - '39' + - '40' + - '41' + - '42' + - '43' + - '44' + - '45' + - '46' + - '47' + - '48' + - '49' + - '50' + - '51' + - '52' + - '53' + - '54' + - '55' + - '56' + - '57' + - '58' + - '59' + - '60' + - '61' + - '62' + - '63' + - '64' + - '65' + - '66' + - '67' + - '68' + - '69' + - '70' + - '71' + - '72' + - '73' + - '74' + - '75' + - '76' + - '77' + - '78' + - '79' + - '80' + - '81' + - '82' + - '83' + - '84' + - '85' + - '86' + - '87' + - '88' + - '89' + - '90' + - '91' + - '92' + - '93' + - '94' + - '95' + - '96' + - '97' + - '98' + - '99' + - '100' + - '101' + - '102' + - '103' + - '104' + unrearranged_support: [] + rearranged_support: [] + paralogs: [] + curation: 'Imported to OGRDB with the following notes: watson_et_al: CAST_EiJ_IGHV5-3' + curational_tags: + - allele_description_id: OGRDB:A00314 + allele_description_ref: OGRDB:Mouse_IGH:IGHV-2ETO + acknowledgements: + - contributor_id: "3" + name: William Lees + orcid_id: + id: ORCID:0000-0001-9834-6840 + label: William Lees + affiliation: + id: ROR:02mb95055 + label: Birkbeck, University of London + affiliation_department: + contributions: + - role: investigation + degree: + - role: data curation + degree: + release_version: 1 + release_date: "2021-11-24" + release_description: First release + label: IGHV-2ETO + sequence: CAAGTTACTCTAAAAGAGTCTGGCCCTGGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGATGATGATAAGTACTATAACCCATCCCTGAAGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC + coding_sequence: CAAGTTACTCTAAAAGAGTCTGGCCCTGGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGATGATGATAAGTACTATAACCCATCCCTGAAGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC + aliases: + - watson_et_al:CAST_EiJ_IGHV8-2 + locus: IGH + chromosome: + sequence_type: V + functional: true + inference_type: rearranged_only + species: + id: NCBITAXON:10090 + label: Mus musculus + species_subgroup: CAST_EiJ + species_subgroup_type: strain + status: active + gene_designation: + subgroup_designation: + allele_designation: + gene_start: + gene_end: + utr_5_prime_start: + utr_5_prime_end: + leader_1_start: + leader_1_end: + leader_2_start: + leader_2_end: + v_rs_start: + v_rs_end: + v_gene_delineations: + - sequence_delineation_id: '1' + delineation_scheme: IMGT + unaligned_sequence: CAAGTTACTCTAAAAGAGTCTGGCCCTGGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGATGATGATAAGTACTATAACCCATCCCTGAAGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC + aligned_sequence: CAAGTTACTCTAAAAGAGTCTGGCCCTG...GGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGA............GCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGT......GGGATGATGATAAGTACTATAACCCATCCCTGA...AGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC + fwr1_start: 1 + fwr1_end: 75 + cdr1_start: 76 + cdr1_end: 110 + fwr2_start: 111 + fwr2_end: 150 + cdr2_start: 151 + cdr2_end: 160 + fwr3_start: 161 + fwr3_end: 294 + cdr3_start: 295 + alignment_labels: + - '1' + - '2' + - '3' + - '4' + - '5' + - '6' + - '7' + - '8' + - '9' + - '10' + - '11' + - '12' + - '13' + - '14' + - '15' + - '16' + - '17' + - '18' + - '19' + - '20' + - '21' + - '22' + - '23' + - '24' + - '25' + - '26' + - '27' + - '28' + - '29' + - '30' + - '31' + - '32' + - '33' + - '34' + - '35' + - '36' + - '37' + - '38' + - '39' + - '40' + - '41' + - '42' + - '43' + - '44' + - '45' + - '46' + - '47' + - '48' + - '49' + - '50' + - '51' + - '52' + - '53' + - '54' + - '55' + - '56' + - '57' + - '58' + - '59' + - '60' + - '61' + - '62' + - '63' + - '64' + - '65' + - '66' + - '67' + - '68' + - '69' + - '70' + - '71' + - '72' + - '73' + - '74' + - '75' + - '76' + - '77' + - '78' + - '79' + - '80' + - '81' + - '82' + - '83' + - '84' + - '85' + - '86' + - '87' + - '88' + - '89' + - '90' + - '91' + - '92' + - '93' + - '94' + - '95' + - '96' + - '97' + - '98' + - '99' + - '100' + - '101' + - '102' + - '103' + - '104' + unrearranged_support: [] + rearranged_support: [] + paralogs: [] + curation: 'Imported to OGRDB with the following notes: watson_et_al: CAST_EiJ_IGHV8-2' + curational_tags: + curation: + +GenotypeSet: + - receptor_genotype_set_id: "1" + genotype_class_list: + - receptor_genotype_id: "1" + locus: IGH + documented_alleles: + - label: IGHV1-69*01 + germline_set_ref: IMGT:Homo sapiens:2022.1.31 + phasing: 1 + - label: IGHV1-69*02 + germline_set_ref: IMGT:Homo sapiens:2022.1.31 + phasing: 2 + undocumented_alleles: + - allele_name: IGHD3-1*01_S1234 + sequence: agtagtagtagt + phasing: 1 + deleted_genes: + - label: IGHV3-30-3 + germline_set_ref: IMGT:Homo sapiens:2022.1.31 + phasing: 1 + inference_process: repertoire_sequencing diff --git a/tests/data/good_genotype_set.json b/tests/data/good_genotype_set.json new file mode 100644 index 000000000..abd24646c --- /dev/null +++ b/tests/data/good_genotype_set.json @@ -0,0 +1,38 @@ +{ + "GenotypeSet": [{ + "receptor_genotype_set_id": "1", + "genotype_class_list": [ + { + "receptor_genotype_id": "1", + "locus": "IGH", + "documented_alleles": [ + { + "label": "IGHV1-69*01", + "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", + "phasing": 1 + }, + { + "label": "IGHV1-69*02", + "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", + "phasing": 2 + } + ], + "undocumented_alleles": [ + { + "allele_name": "IGHD3-1*01_S1234", + "sequence": "agtagtagtagt", + "phasing": 1 + } + ], + "deleted_genes": [ + { + "label": "IGHV3-30-3", + "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", + "phasing": 1 + } + ], + "inference_process": "repertoire_sequencing" + } + ] + }] +} diff --git a/tests/data/good_germline_set.json b/tests/data/good_germline_set.json new file mode 100644 index 000000000..e74c590dc --- /dev/null +++ b/tests/data/good_germline_set.json @@ -0,0 +1,432 @@ +{ + "GermlineSet": [{ + "germline_set_id": "OGRDB:G00007", + "acknowledgements": [ + { + "contributor_id": "1", + "name": "William Lees", + "orcid_id": { + "id": "ORCID:0000-0001-9834-6840", + "label": "William Lees" + }, + "affiliation": { + "id": "ROR:02mb95055", + "label": "Birkbeck, University of London" + }, + "affiliation_department":"", + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + } + ], + "release_version": 1, + "release_description": "", + "release_date": "2021-11-24", + "germline_set_name": "CAST IGH", + "germline_set_ref": "OGRDB:G00007.1", + "pub_ids": [""], + "species": { + "id": "NCBITAXON:10090", + "label": "Mus musculus" + }, + "species_subgroup": "CAST_EiJ", + "species_subgroup_type": "strain", + "locus": "IGH", + "allele_descriptions": [ + { + "allele_description_id": "OGRDB:A00301", + "allele_description_ref": "OGRDB:Mouse_IGH:IGHV-2DBF", + "acknowledgements": [ + { + "contributor_id": "1", + "name": "William Lees", + "orcid_id": { + "id": "ORCID:0000-0001-9834-6840", + "label": "William Lees" + }, + "affiliation": { + "id": "ROR:02mb95055", + "label": "Birkbeck, University of London" + }, + "affiliation_department":"", + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + } + ], + "release_version": 1, + "release_date": "2021-11-24", + "release_description": "First release", + "label": "IGHV-2DBF", + "sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", + "coding_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", + "aliases": [ + "watson_et_al:CAST_EiJ_IGHV5-3" + ], + "locus": "IGH", + "chromosome": null, + "sequence_type": "V", + "functional": true, + "inference_type": "rearranged_only", + "species": { + "id": "NCBITAXON:10090", + "label": "Mus musculus" + }, + "species_subgroup": "CAST_EiJ", + "species_subgroup_type": "strain", + "status": "active", + "gene_designation": null, + "subgroup_designation": null, + "allele_designation": null, + "gene_start": null, + "gene_end": null, + "utr_5_prime_start": null, + "utr_5_prime_end": null, + "leader_1_start": null, + "leader_1_end": null, + "leader_2_start": null, + "leader_2_end": null, + "v_rs_start": null, + "v_rs_end": null, + "v_gene_delineations": [ + { + "sequence_delineation_id": "1", + "delineation_scheme": "IMGT", + "aligned_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGA...GGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTC............AGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", + "unaligned_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", + "fwr1_start": 1, + "fwr1_end": 75, + "cdr1_start": 76, + "cdr1_end": 110, + "fwr2_start": 111, + "fwr2_end": 150, + "cdr2_start": 151, + "cdr2_end": 160, + "fwr3_start": 161, + "fwr3_end": 294, + "cdr3_start": 295, + "alignment_labels": [ + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104" + ] + } + ], + "unrearranged_support": [], + "rearranged_support": [], + "paralogs": [], + "curation": "Imported to OGRDB with the following notes: watson_et_al: CAST_EiJ_IGHV5-3", + "curational_tags": null + }, + { + "allele_description_id": "OGRDB:A00314", + "allele_description_ref": "OGRDB:Mouse_IGH:IGHV-2ETO", + "acknowledgements": [ + { + "contributor_id": "1", + "name": "William Lees", + "orcid_id": { + "id": "ORCID:0000-0001-9834-6840", + "label": "William Lees" + }, + "affiliation": { + "id": "ROR:02mb95055", + "label": "Birkbeck, University of London" + }, + "affiliation_department":"", + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + } + ], + "release_version": 1, + "release_date": "2021-11-24", + "release_description": "First release", + "label": "IGHV-2ETO", + "sequence": "CAAGTTACTCTAAAAGAGTCTGGCCCTGGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGATGATGATAAGTACTATAACCCATCCCTGAAGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC", + "coding_sequence": "CAAGTTACTCTAAAAGAGTCTGGCCCTGGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGATGATGATAAGTACTATAACCCATCCCTGAAGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC", + "aliases": [ + "watson_et_al:CAST_EiJ_IGHV8-2" + ], + "locus": "IGH", + "chromosome": null, + "sequence_type": "V", + "functional": true, + "inference_type": "rearranged_only", + "species": { + "id": "NCBITAXON:10090", + "label": "Mus musculus" + }, + "species_subgroup": "CAST_EiJ", + "species_subgroup_type": "strain", + "status": "active", + "gene_designation": null, + "subgroup_designation": null, + "allele_designation": null, + "gene_start": null, + "gene_end": null, + "utr_5_prime_start": null, + "utr_5_prime_end": null, + "leader_1_start": null, + "leader_1_end": null, + "leader_2_start": null, + "leader_2_end": null, + "v_rs_start": null, + "v_rs_end": null, + "v_gene_delineations": [ + { + "sequence_delineation_id": "1", + "delineation_scheme": "IMGT", + "aligned_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGA...GGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTC............AGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", + "unaligned_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", + "fwr1_start": 1, + "fwr1_end": 75, + "cdr1_start": 76, + "cdr1_end": 110, + "fwr2_start": 111, + "fwr2_end": 150, + "cdr2_start": 151, + "cdr2_end": 160, + "fwr3_start": 161, + "fwr3_end": 294, + "cdr3_start": 295, + "alignment_labels": [ + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + "10", + "11", + "12", + "13", + "14", + "15", + "16", + "17", + "18", + "19", + "20", + "21", + "22", + "23", + "24", + "25", + "26", + "27", + "28", + "29", + "30", + "31", + "32", + "33", + "34", + "35", + "36", + "37", + "38", + "39", + "40", + "41", + "42", + "43", + "44", + "45", + "46", + "47", + "48", + "49", + "50", + "51", + "52", + "53", + "54", + "55", + "56", + "57", + "58", + "59", + "60", + "61", + "62", + "63", + "64", + "65", + "66", + "67", + "68", + "69", + "70", + "71", + "72", + "73", + "74", + "75", + "76", + "77", + "78", + "79", + "80", + "81", + "82", + "83", + "84", + "85", + "86", + "87", + "88", + "89", + "90", + "91", + "92", + "93", + "94", + "95", + "96", + "97", + "98", + "99", + "100", + "101", + "102", + "103", + "104" + ] + } + ], + "unrearranged_support": [], + "rearranged_support": [], + "paralogs": [], + "curation": "Imported to OGRDB with the following notes: watson_et_al: CAST_EiJ_IGHV8-2", + "curational_tags": null + } + ], + "curation": null + }] +} diff --git a/tests/data/good_rearrangement.tsv b/tests/data/good_rearrangement.tsv new file mode 100644 index 000000000..e8521767d --- /dev/null +++ b/tests/data/good_rearrangement.tsv @@ -0,0 +1,10 @@ +rearrangement_id rearrangement_set_id sequence_id sequence rev_comp productive sequence_alignment germline_alignment v_call d_call j_call c_call junction junction_length junction_aa v_score d_score j_score c_score v_cigar d_cigar j_cigar c_cigar v_identity v_evalue d_identity d_evalue j_identity j_evalue v_sequence_start v_sequence_end v_germline_start v_germline_end d_sequence_start d_sequence_end d_germline_start d_germline_end j_sequence_start j_sequence_end j_germline_start j_germline_end np1_length np2_length duplicate_count +IVKNQEJ01BVGQ6 1 IVKNQEJ01BVGQ6 GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T T IGHV4-31*03 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 430 16.4 75.8 22N1S275= 11N280S8= 6N292S32=1X9= 1 1E-122 1 2.7 0.9762 6E-18 0 275 0 317 279 287 10 18 291 333 5 47 4 4 1247 +IVKNQEJ01AQVWS 1 IVKNQEJ01AQVWS GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCAACTACAACCCCTCCCTCAAGAGTCGAGTCACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T T IGHV4-31*03 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 420 16.4 83.8 22N1S156=1X10=1X17=1X89= 11N280S8= 6N292S42= 0.9891 8E-120 1 2.7 1 2E-20 0 275 0 317 279 287 10 18 291 333 5 47 4 4 4 +IVKNQEJ01AOYFZ 1 IVKNQEJ01AOYFZ GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGTAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T F IGHV4-31*03 IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGTAACTTTTGACTACTGG 37 CASGVAGNF*LLX 430 20.4 83.8 22N1S275= 11N280S10= 6N293S42= 1 1E-122 1 0.17 1 2E-20 0 275 0 317 279 289 10 20 292 334 5 47 4 3 92 +IVKNQEJ01EI5S4 1 IVKNQEJ01EI5S4 GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T T IGHV4-31*03 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 430 16.4 83.8 22N1S275= 11N280S8= 6N292S42= 1 1E-122 1 2.7 1 2E-20 0 275 0 317 279 287 10 18 291 333 5 47 4 4 2913 +IVKNQEJ01DGRRI 1 IVKNQEJ01DGRRI GGCCCAGGACTGGTGAAGCCTTCGGAGACCCTGTCCCTCACCTGCGCTGTCTATGGTGGGTCCTTCAGTGGTTACTACTGGAGCTGGATCCGCCAGCCCCCAGGGAAGGGTCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T T IGHV4-34*09 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 389 16.4 83.8 22N1S23=2X85=1X15=1X1=1X3=1X2=1X1=1X5=1X6=1X118= 11N274S8= 6N286S42= 0.9628 2E-110 1 2.6 1 2E-20 0 269 0 317 273 281 10 18 285 327 5 47 4 4 1 +IVKNQEJ01APN5N 1 IVKNQEJ01APN5N GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTAGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T F IGHV4-31*03 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTAG 36 CASGVAGTFDY* 430 16.4 67.9 22N1S275= 11N280S8= 6N292S10=1X21=1X9= 1 1E-122 1 2.7 0.9524 1E-15 0 275 0 317 279 287 10 18 291 333 5 47 4 4 1 +IVKNQEJ01B0TT2 1 IVKNQEJ01B0TT2 GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGTAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T F IGHV4-31*03 IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGTAACTTTTGACTACTGG 37 CASGVAGNF*LLX 430 20.4 75.8 22N1S275= 11N280S10= 6N293S32=1X9= 1 1E-122 1 0.17 0.9762 6E-18 0 275 0 317 279 289 10 20 292 334 5 47 4 3 30 +IVKNQEJ01AIS74 1 IVKNQEJ01AIS74 GGCGCAGGACTGTTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGGCGGGGTGGCTGGTAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T F IGHV4-31*03 IGHD6-19*01 IGHJ4*02 TGTGCGAGGCGGGGTGGCTGGTAACTTTTGACTACTGG 38 CARRGGW*LLTTG 424 20.4 83.8 22N1S3=1X8=1X262= 11N281S10= 6N294S42= 0.9927 9E-121 1 0.17 1 2E-20 0 275 0 317 280 290 10 20 293 335 5 47 5 3 4 +IVKNQEJ01AJ44V 1 IVKNQEJ01AJ44V GGCCCAGGACTGGTGAAGCCTTCGGAGACCCTGTCCCTCACCTGCGCTGTCTATGGTGGGTCCTTCAGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T T IGHV4-59*06 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 386 16.4 75.8 22N1S45=1X5=2X6=1X3=1X5=1X22=1X4=1X1=1X1=1X165= 11N274S8= 6N286S32=1X9= 0.9625 2E-109 1 2.6 0.9762 5E-18 0 267 0 315 273 281 10 18 285 327 5 47 6 4 12 diff --git a/tests/data/good_repertoire.yaml b/tests/data/good_repertoire.yaml new file mode 100644 index 000000000..6adaa2361 --- /dev/null +++ b/tests/data/good_repertoire.yaml @@ -0,0 +1,469 @@ +# +# Example metadata +# + +Repertoire: + - repertoire_id: 1841923116114776551-242ac11c-0001-012 + study: + study_id: PRJNA300878 + study_title: "Homo sapiens B and T cell repertoire - MZ twins" + study_type: + id: null + label: null + study_description: "The adaptive immune system's capability to protect the body requires a highly diverse lymphocyte antigen receptor repertoire. However, the influence of individual genetic and epigenetic differences on these repertoires is not typically measured. By leveraging the unique characteristics of B, CD4+ T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified the impact of heritable factors on both the V(D)J recombination process and thymic selection in the case of T cell receptors, and show that the repertoires of both naive and antigen experienced cells are subject to biases resulting from differences in recombination. We show that biases in V(D)J usage, as well as biased N/P additions, contribute to significant variation in the CDR3 region. Moreover, we show that the relative usage of V and J gene segments is chromosomally biased, with approximately 1.5 times as many rearrangements originating from a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level." + inclusion_exclusion_criteria: null + contributors: + - contributor_id: "1" + name: "Florian Rubelt" + orcid_id: + id: null + label: null + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "investigation" + degree: null + - role: "data curation" + degree: null + - contributor_id: "2" + name: "Mark M. Davis" + orcid_id: + id: "ORCID:0000-0001-6868-657X" + label: "Mark Davis" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "supervision" + degree: null + pub_ids: ["PMID:27005435"] + grants: null + keywords_study: + - "contains_ig" + - "contains_tr" + subject: + subject_id: TW01A + synthetic: false + species: + id: "NCBITAXON:9606" + label: "Homo sapiens" + sex: female + age_min: 27 + age_max: 27 + age_unit: + id: UO:0000036 + label: year + age_event: null + ancestry_population: + id: null + label: null + location_birth: + id: null + label: null + ethnicity: null + race: null + strain_name: null + linked_subjects: TW01B + link_type: twin + diagnosis: + - study_group_description: null + disease_diagnosis: + id: null + label: null + disease_length: null + disease_stage: null + prior_therapies: null + immunogen: null + intervention: null + medical_history: null + + sample: + - sample_id: TW01A_B_naive + sample_processing_id: null + sample_type: "peripheral venous puncture" + tissue: + id: "UBERON:0000178" + label: "blood" + tissue_processing: "Ficoll gradient" + cell_subset: + id: "CL:0000788" + label: "naive B cell" + cell_phenotype: "expression of CD20 and the absence of CD27" + cell_species: + id: "NCBITAXON:9606" + label: "Homo sapiens" + single_cell: false + cell_isolation: FACS + template_class: RNA + pcr_target: + - pcr_target_locus: IGH + forward_pcr_primer_target_location: null + reverse_pcr_primer_target_location: null + sequencing_platform: "Illumina MiSeq" + sequencing_files: + sequencing_data_id: SRA:SRR2905656 + file_type: fastq + filename: SRR2905656_R1.fastq.gz + read_direction: forward + read_length: 300 + paired_filename: SRR2905656_R2.fastq.gz + paired_read_direction: reverse + paired_read_length: 300 + index_filename: SRR2905656_R3.fastq.gz + index_length: 8 + anatomic_site: null + disease_state_sample: null + collection_time_point_relative: null + collection_time_point_relative_unit: + id: null + label: null + collection_time_point_reference: null + collection_location: + id: null + label: null + biomaterial_provider: null + cell_number: null + cells_per_reaction: null + cell_storage: false + cell_quality: null + cell_processing_protocol: null + template_quality: null + template_amount: null + template_amount_unit: + id: null + label: null + library_generation_method: "RT(oligo-dT)+PCR" + library_generation_protocol: null + library_generation_kit_version: null + complete_sequences: "partial" + physical_linkage: "none" + sequencing_run_id: null + total_reads_passing_qc_filter: null + sequencing_facility: null + sequencing_run_date: null + sequencing_kit: null + data_processing: + - data_processing_id: 3059369183532618216-242ac11b-0001-007 + primary_annotation: true + software_versions: null + paired_reads_assembly: null + quality_thresholds: null + primer_match_cutoffs: null + collapsing_method: null + data_processing_protocols: null + data_processing_files: null + germline_database: null + analysis_provenance_id: 6623294219256599016-242ac11c-0001-012 + + - repertoire_id: 1602908186092376551-242ac11c-0001-012 + study: + study_id: PRJNA300878 + study_title: "Homo sapiens B and T cell repertoire - MZ twins" + study_type: + id: null + label: null + study_description: "The adaptive immune system's capability to protect the body requires a highly diverse lymphocyte antigen receptor repertoire. However, the influence of individual genetic and epigenetic differences on these repertoires is not typically measured. By leveraging the unique characteristics of B, CD4+ T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified the impact of heritable factors on both the V(D)J recombination process and thymic selection in the case of T cell receptors, and show that the repertoires of both naive and antigen experienced cells are subject to biases resulting from differences in recombination. We show that biases in V(D)J usage, as well as biased N/P additions, contribute to significant variation in the CDR3 region. Moreover, we show that the relative usage of V and J gene segments is chromosomally biased, with approximately 1.5 times as many rearrangements originating from a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level." + inclusion_exclusion_criteria: null + contributors: + - contributor_id: "1" + name: "Florian Rubelt" + orcid_id: + id: null + label: null + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "investigation" + degree: null + - role: "data curation" + degree: null + - contributor_id: "2" + name: "Mark M. Davis" + orcid_id: + id: "ORCID:0000-0001-6868-657X" + label: "Mark Davis" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "supervision" + degree: null + pub_ids: ["PMID:27005435"] + grants: null + keywords_study: + - "contains_ig" + - "contains_tr" + subject: + subject_id: TW01A + synthetic: false + species: + id: "NCBITAXON:9606" + label: "Homo sapiens" + sex: female + age_min: 27 + age_max: 27 + age_unit: + id: UO:0000036 + label: year + age_event: null + ancestry_population: + id: null + label: null + location_birth: + id: null + label: null + ethnicity: null + race: null + strain_name: null + linked_subjects: TW01B + link_type: twin + diagnosis: + - study_group_description: null + disease_diagnosis: + id: null + label: null + disease_length: null + disease_stage: null + prior_therapies: null + immunogen: null + intervention: null + medical_history: null + + sample: + - sample_id: TW01A_B_memory + sample_processing_id: null + sample_type: "peripheral venous puncture" + tissue: + id: "UBERON:0000178" + label: "blood" + tissue_processing: "Ficoll gradient" + cell_subset: + id: "CL:0000787" + label: "memory B cell" + cell_phenotype: "expression of CD20 and CD27" + cell_species: + id: "NCBITAXON:9606" + label: "Homo sapiens" + single_cell: false + cell_isolation: FACS + template_class: RNA + pcr_target: + - pcr_target_locus: IGH + forward_pcr_primer_target_location: null + reverse_pcr_primer_target_location: null + sequencing_platform: "Illumina MiSeq" + sequencing_files: + sequencing_data_id: SRA:SRR2905655 + file_type: fastq + filename: SRR2905655_R1.fastq.gz + read_direction: forward + read_length: 300 + paired_filename: SRR2905655_R2.fastq.gz + paired_read_direction: reverse + paired_read_length: 300 + index_filename: SRR2905655_R3.fastq.gz + index_length: 8 + anatomic_site: null + disease_state_sample: null + collection_time_point_relative: null + collection_time_point_relative_unit: + id: null + label: null + collection_time_point_reference: null + collection_location: + id: null + label: null + biomaterial_provider: null + cell_number: null + cells_per_reaction: null + cell_storage: false + cell_quality: null + cell_processing_protocol: null + template_quality: null + template_amount: null + template_amount_unit: + id: null + label: null + library_generation_method: "RT(oligo-dT)+PCR" + library_generation_protocol: null + library_generation_kit_version: null + complete_sequences: "partial" + physical_linkage: "none" + sequencing_run_id: null + total_reads_passing_qc_filter: null + sequencing_facility: null + sequencing_run_date: null + sequencing_kit: null + data_processing: + - data_processing_id: 3059369183532618216-242ac11b-0001-007 + primary_annotation: true + software_versions: null + paired_reads_assembly: null + quality_thresholds: null + primer_match_cutoffs: null + collapsing_method: null + data_processing_protocols: null + data_processing_files: null + germline_database: null + analysis_provenance_id: 6623294219256599016-242ac11c-0001-012 + + - repertoire_id: 2366080924918616551-242ac11c-0001-012 + study: + study_id: PRJNA300878 + study_title: "Homo sapiens B and T cell repertoire - MZ twins" + study_type: + id: null + label: null + study_description: "The adaptive immune system's capability to protect the body requires a highly diverse lymphocyte antigen receptor repertoire. However, the influence of individual genetic and epigenetic differences on these repertoires is not typically measured. By leveraging the unique characteristics of B, CD4+ T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified the impact of heritable factors on both the V(D)J recombination process and thymic selection in the case of T cell receptors, and show that the repertoires of both naive and antigen experienced cells are subject to biases resulting from differences in recombination. We show that biases in V(D)J usage, as well as biased N/P additions, contribute to significant variation in the CDR3 region. Moreover, we show that the relative usage of V and J gene segments is chromosomally biased, with approximately 1.5 times as many rearrangements originating from a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level." + inclusion_exclusion_criteria: null + contributors: + - contributor_id: "1" + name: "Florian Rubelt" + orcid_id: + id: null + label: null + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "investigation" + degree: null + - role: "data curation" + degree: null + - contributor_id: "2" + name: "Mark M. Davis" + orcid_id: + id: "ORCID:0000-0001-6868-657X" + label: "Mark Davis" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "supervision" + degree: null + pub_ids: ["PMID:27005435"] + grants: null + keywords_study: + - "contains_ig" + - "contains_tr" + subject: + subject_id: TW01A + synthetic: false + species: + id: "NCBITAXON:9606" + label: "Homo sapiens" + sex: female + age_min: 27 + age_max: 27 + age_unit: + id: UO:0000036 + label: year + age_event: null + ancestry_population: + id: null + label: null + location_birth: + id: null + label: null + ethnicity: null + race: null + strain_name: null + linked_subjects: TW01B + link_type: twin + diagnosis: + - study_group_description: null + disease_diagnosis: + id: null + label: null + disease_length: null + disease_stage: null + prior_therapies: null + immunogen: null + intervention: null + medical_history: null + + sample: + - sample_id: TW01A_T_naive_CD4 + sample_processing_id: null + sample_type: "peripheral venous puncture" + tissue: + id: "UBERON:0000178" + label: "blood" + tissue_processing: "Ficoll gradient" + cell_subset: + id: "CL:0000895" + label: "naive thymus-derived CD4-positive, alpha-beta T cell" + cell_phenotype: "expression of CD8 and absence of CD4 and CD45RO" + cell_species: + id: "NCBITAXON:9606" + label: "Homo sapiens" + single_cell: false + cell_isolation: FACS + template_class: RNA + pcr_target: + - pcr_target_locus: TRB + forward_pcr_primer_target_location: null + reverse_pcr_primer_target_location: null + sequencing_platform: "Illumina MiSeq" + sequencing_files: + sequencing_data_id: SRA:SRR2905659 + file_type: fastq + filename: SRR2905659_R1.fastq.gz + read_direction: forward + read_length: 300 + paired_filename: SRR2905659_R2.fastq.gz + paired_read_direction: reverse + paired_read_length: 300 + index_filename: SRR2905659_R3.fastq.gz + index_length: 8 + anatomic_site: null + disease_state_sample: null + collection_time_point_relative: null + collection_time_point_relative_unit: + id: null + label: null + collection_time_point_reference: null + collection_location: + id: null + label: null + biomaterial_provider: null + cell_number: null + cells_per_reaction: null + cell_storage: false + cell_quality: null + cell_processing_protocol: null + template_quality: null + template_amount: null + template_amount_unit: + id: null + label: null + library_generation_method: "RT(oligo-dT)+PCR" + library_generation_protocol: null + library_generation_kit_version: null + complete_sequences: "partial" + physical_linkage: "none" + sequencing_run_id: null + total_reads_passing_qc_filter: null + sequencing_facility: null + sequencing_run_date: null + sequencing_kit: null + data_processing: + - data_processing_id: 651223970338378216-242ac11b-0001-007 + primary_annotation: true + software_versions: null + paired_reads_assembly: null + quality_thresholds: null + primer_match_cutoffs: null + collapsing_method: null + data_processing_protocols: null + data_processing_files: null + germline_database: null + analysis_provenance_id: 4625424004665971176-242ac11c-0001-012 From 5120bef5712517897900b0950f53fd01f70d68a6 Mon Sep 17 00:00:00 2001 From: Scott Christley Date: Sat, 19 Oct 2024 17:56:31 -0500 Subject: [PATCH 57/59] update ubuntu --- docker/Dockerfile | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 2f62cdac0..0b836ffd0 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:20.04 +FROM ubuntu:24.04 LABEL maintainer="AIRR Community" @@ -8,6 +8,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && apt-get update && apt-get install - python3-pip \ python3-sphinx \ python3-scipy \ + python3-venv \ libyaml-dev \ r-base \ r-base-dev \ @@ -21,6 +22,12 @@ RUN export DEBIAN_FRONTEND=noninteractive && apt-get update && apt-get install - libfreetype6-dev libpng-dev libtiff5-dev libjpeg-dev \ nano +# Create a virtual environment +RUN python3 -m venv /airr-venv + +# Activate the virtual environment +ENV PATH="/airr-venv/bin:$PATH" + RUN pip3 install \ pandas \ biopython \ From 5e2cfe1b787f586b4972d9b8e8f9074a3bb93343 Mon Sep 17 00:00:00 2001 From: Scott Christley Date: Sat, 19 Oct 2024 17:59:33 -0500 Subject: [PATCH 58/59] sync tests for langs --- CONTRIBUTING.rst | 20 +- Makefile | 38 +- lang/R/inst/extdata/airr-schema-openapi3.yaml | 664 +++++++++++++----- .../tests/data-tests/extra_rearrangement.tsv | 2 + .../R/tests/data-tests/good_rearrangement.tsv | 18 +- .../data-tests/good_rearrangement.tsv.gz | Bin 0 -> 1106 bytes lang/R/tests/data-tests/good_repertoire.yaml | 2 +- lang/js/airr-schema-openapi3.yaml | 250 +++++-- lang/js/tests/data/bad_genotype_set.json | 2 +- lang/js/tests/data/bad_germline_set.json | 102 ++- lang/js/tests/data/bad_rearrangement.tsv | 20 +- lang/js/tests/data/bad_repertoire.yaml | 96 ++- lang/js/tests/data/good_combined_airr.json | 516 +++++++++++--- lang/js/tests/data/good_combined_airr.yaml | 372 +++++++--- lang/js/tests/data/good_genotype_set.json | 2 +- lang/js/tests/data/good_germline_set.json | 164 +++-- lang/js/tests/data/good_rearrangement.tsv.gz | Bin 0 -> 1106 bytes lang/js/tests/data/good_repertoire.yaml | 221 ++++-- lang/python/airr/interface.py | 11 +- .../airr/specs/airr-schema-openapi3.yaml | 664 +++++++++++++----- lang/python/tests/data/bad_rearrangement.tsv | 20 +- lang/python/tests/test_interface.py | 3 +- tests/data/bad_rearrangement.tsv | 20 +- tests/data/good_combined_airr.json | 92 +-- tests/data/good_combined_airr.yaml | 75 +- tests/data/good_rearrangement.tsv.gz | Bin 0 -> 1106 bytes tests/data/good_repertoire.yaml | 75 +- 27 files changed, 2507 insertions(+), 942 deletions(-) create mode 100644 lang/R/tests/data-tests/extra_rearrangement.tsv create mode 100644 lang/R/tests/data-tests/good_rearrangement.tsv.gz create mode 100644 lang/js/tests/data/good_rearrangement.tsv.gz create mode 100644 tests/data/good_rearrangement.tsv.gz diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 971104323..d73fa45c8 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -212,9 +212,25 @@ software. For all cases, you will want a local copy of the github repository. git clone https://github.com/airr-community/airr-standards.git +A Makefile is provided to simplify tasks such as copying spec files to the lang +directories, copying test data, running tests and checks, and building documentation. +The default make target will display a help info for all targets. + +.. code-block:: bash + + make + +If your local environment does not have the appropriate software, you can build +and use the docker container. You will need python3, R and npm to run tests for +all languages. + If you will be working in a docker container, you can pull down the airr-standards -image, which has all of the prerequisites installed, or you can use your own image. -The airr-standards image provides a python3 environment. +image from docker hub, which has all of the prerequisites installed, +or you can build your own image. Note: automated builds are not performed on docker +hub so the ``latest`` tag is often out of date. It is generally safe to build +your own container from your local source. +The airr-standards image installs the reference library for all languages and +builds the documentation. .. code-block:: bash diff --git a/Makefile b/Makefile index a18207a95..e11636369 100644 --- a/Makefile +++ b/Makefile @@ -1,41 +1,46 @@ # helper commands for keeping the language directories in sync -# note: "help" MUST be the first target in the file, -# when the user types "make" they should get help info +# note: "help" MUST be the first target in the file, so +# when the user types "make" they get help info by default help: @echo "" @echo "Helper commands for AIRR Standards repository" + @echo "---------------------------------------------" @echo "" - @echo "make gen-v2 -- Generate OpenAPI V2 spec from the V3 spec" - @echo "make build-docs -- Build documentation" - @echo "make spec-copy -- Copy spec files to language directories" - @echo "make data-copy -- Copy test data files to language directories" - @echo "make checks -- Run consistency checks on spec files" - @echo "make tests -- Run all language test suites" - @echo "make python-tests -- Run Python test suite" - @echo "make r-tests -- Run R test suite" - @echo "make js-tests -- Run Javascript test suite" + @echo "make spec-copy -- Copy spec files to language directories" + @echo "make data-copy -- Copy test data files to language directories" + @echo "" + @echo "make docker-latest -- Build docker image with latest tag" + @echo "" + @echo "make checks -- Run consistency checks on spec files" + @echo "make tests -- Run all language test suites" + @echo "make python-tests -- Run Python test suite" + @echo "make r-tests -- Run R test suite" + @echo "make js-tests -- Run Javascript test suite" + @echo "" + @echo "make build-docs -- Build documentation" @echo "" - -gen-v2: - @echo "Not implemented" build-docs: sphinx-build -a -E -b html docs docs/_build/html +docker-latest: + @echo "Building latest docker image" + docker build -f docker/Dockerfile -t airrc/airr-standards:latest . + spec-copy: @echo "Copying specs to language directories" cp specs/airr-schema.yaml lang/python/airr/specs cp specs/airr-schema-openapi3.yaml lang/python/airr/specs cp specs/airr-schema.yaml lang/R/inst/extdata cp specs/airr-schema-openapi3.yaml lang/R/inst/extdata -# cp specs/airr-schema.yaml lang/js/ -# cp specs/airr-schema-openapi3.yaml lang/js/ + cp specs/airr-schema-openapi3.yaml lang/js/ data-copy: @echo "Copying test data to language directories" cp tests/data/* lang/python/tests/data cp tests/data/* lang/R/tests/data-tests + cp tests/data/* lang/js/tests/data checks: @echo "Running consistency checks on spec files" @@ -53,3 +58,4 @@ r-tests: js-tests: @echo "Running Javascript test suite" + cd lang/js; npm test diff --git a/lang/R/inst/extdata/airr-schema-openapi3.yaml b/lang/R/inst/extdata/airr-schema-openapi3.yaml index d6c6d48e2..ff0e5753c 100644 --- a/lang/R/inst/extdata/airr-schema-openapi3.yaml +++ b/lang/R/inst/extdata/airr-schema-openapi3.yaml @@ -318,6 +318,8 @@ Attributes: - ontology - controlled_vocabulary - physical_quantity + - time_point + - time_interval - CURIE ontology: type: object @@ -339,6 +341,138 @@ Attributes: type: string description: Ontology name for the top node term + +# FileObject +FileObject: + type: object + properties: + format: + type: string + description: The file format for the file (e.g. json, tsv). + nullable: true + compression: + type: string + description: The type of compression (if any) used for the file (e.g. zip, gz, bz2). + nullable: true + filename: + type: string + description: The name of the file + nullable: true + checksum: + type: string + description: MD5 checksum of the file. + nullable: true + version: + type: string + description: Version stamp for the file (if any). + nullable: true + +# AIRR DataSet specification +# An AIRR DataSet consists of tags for types of AIRR Schema Objects and a set of files that hold data in the AIRR format +# appropriate for that AIRR schema object. + +DataSet: + type: object + properties: + name: + type: string + description: A human readable name for the data set. + nullable: true + description: + type: string + description: A description of the data set. + nullable: true + repository_url: + type: string + description: The URL of the repository from which the data set came from (if the data came from a repository) + nullable: true + files: + type: object + description: A set of files based on data type, where the key for each object denotes the type of data and the accompanying array of files (and related file metadata) contain the data of that type. All top level AIRR objects (e.g. Repertoire, Rearrangement, Clone, Cell, Expression, Reactivity, Receptor, RepertoireGroup) are allowed. Custom data types are also permitted, so the manifest can include data that is outside of the AIRR specification. + nullable: true + properties: + Repertoire: + type: array + items: + $ref: '#/FileObject' + nullable: true + Rearrangement: + type: array + items: + $ref: '#/FileObject' + nullable: true + Clone: + type: array + items: + $ref: '#/FileObject' + nullable: true + Cell: + type: array + items: + $ref: '#/FileObject' + nullable: true + Expression: + type: array + items: + $ref: '#/FileObject' + nullable: true + Reactivity: + type: array + items: + $ref: '#/FileObject' + nullable: true + Receptor: + type: array + items: + $ref: '#/FileObject' + nullable: true + GermlineSet: + type: array + items: + $ref: '#/FileObject' + nullable: true + GenotypeSet: + type: array + items: + $ref: '#/FileObject' + nullable: true + RepertoireGroup: + type: array + items: + $ref: '#/FileObject' + nullable: true + ADCQuery: + type: array + items: + $ref: '#/FileObject' + nullable: true + + +# AIRR Manifest specification. +# An AIRR Manifest contains metadata for a group of accompanying files that are part of a set or coherent unit. + +Manifest: + type: object + properties: + name: + type: string + description: A human readable name for the data manifest. + nullable: false + description: + type: string + description: A description of the purpose for this data manifest + nullable: true + created_at: + type: string + description: Time/data stamp when the dat manifest was created. + nullable: true + data_sets: + type: array + items: + $ref: '#/DataSet' + nullable: true + + # AIRR Data File # # A JSON data file that holds Repertoire metadata, data processing @@ -400,7 +534,13 @@ DataFile: nullable: false description: List of genotype sets items: - $ref: '#/GenotypeSet' + $ref: '#/GenotypeSet' + Manifest: + type: array + nullable: false + description: List of data set manifests + items: + $ref: '#/Manifest' # AIRR Info object, should be similar to openapi # should we point to an openapi schema? @@ -446,6 +586,10 @@ InfoObject: type: string nullable: true +# +# General objects +# + # A time point TimePoint: description: Time point at which an observation or other action was performed. @@ -460,14 +604,14 @@ TimePoint: adc-query-support: true value: type: number - nullable: true + nullable: false description: Value of the time point example: -5.0 x-airr: adc-query-support: true unit: $ref: '#/Ontology' - nullable: true + nullable: false description: Unit of the time point title: Unit of immunization schedule example: @@ -482,9 +626,98 @@ TimePoint: id: UO:0000003 label: time unit -# -# General objects -# +# A time range or interval +TimeInterval: + description: Time range or interval for a measurement, observation or action. + type: object + properties: + min: + type: number + nullable: false + description: Lower/minimum value of the time interval + example: 5.0 + x-airr: + adc-query-support: true + max: + type: number + nullable: false + description: Upper/maximum value of the time interval + example: 10.0 + x-airr: + adc-query-support: true + unit: + $ref: '#/Ontology' + nullable: false + description: Unit of the time interval + example: + id: UO:0000033 + label: day + x-airr: + adc-query-support: true + format: ontology + ontology: + draft: false + top_node: + id: UO:0000003 + label: time unit + +# A physical quantity +PhysicalQuantity: + description: A physical quantity from a measurement or observation. + type: object + properties: + quantity: + type: number + nullable: false + description: Physical quantity + example: -5.0 + x-airr: + adc-query-support: true + unit: + $ref: '#/Ontology' + nullable: false + description: Unit of physical quantity + example: + id: UO:0000024 + label: nanogram + x-airr: + adc-query-support: true + format: ontology + ontology: + draft: false + top_node: + id: UO:0000002 + label: physical quantity + + +# A time quantity +TimeQuantity: + description: A time quantity + type: object + properties: + quantity: + type: number + nullable: false + description: Time quantity + example: 30.0 + x-airr: + adc-query-support: true + unit: + $ref: '#/Ontology' + nullable: false + description: Unit of time + example: + id: UO:0000033 + label: day + x-airr: + adc-query-support: true + format: ontology + ontology: + draft: false + top_node: + id: UO:0000003 + label: time unit + # Contributor record to describe invididuals and their contribution to a data set # @@ -1913,9 +2146,7 @@ Subject: - synthetic - species - sex - - age_min - - age_max - - age_unit + - age - age_event - ancestry_population - ethnicity @@ -1999,52 +2230,30 @@ Subject: subset: subject name: Sex format: controlled_vocabulary - age_min: - type: number - nullable: true - description: Specific age or lower boundary of age range. - title: Age minimum - example: 60 - x-airr: - miairr: important - adc-query-support: true - set: 1 - subset: subject - name: Age minimum - age_max: - type: number + age: + $ref: '#/TimeInterval' nullable: true description: > - Upper boundary of age range or equal to age_min for specific age. - This field should only be null if age_min is null. - title: Age maximum - example: 80 - x-airr: - miairr: important - adc-query-support: true - set: 1 - subset: subject - name: Age maximum - age_unit: - $ref: '#/Ontology' - nullable: true - description: Unit of age range - title: Age unit - example: - id: UO:0000036 - label: year + Age of subject expressed as a time interval. If singular time point then + min == max in the time interval. + examples: + - min: 50 + max: 50 + unit: + id: UO:0000036 + label: year + - min: 5 + max: 10 + unit: + id: UO:0000034 + label: week x-airr: miairr: important adc-query-support: true set: 1 subset: subject - name: Age unit - format: ontology - ontology: - draft: false - top_node: - id: UO:0000003 - label: time unit + name: Age + format: time_interval age_event: type: string nullable: true @@ -2060,16 +2269,30 @@ Subject: set: 1 subset: subject name: Age event - age: - type: string + age_min: + type: number + nullable: true + x-airr: + deprecated: true + deprecated-description: Combined into single age field. + deprecated-replaced-by: + - age + age_max: + type: number + nullable: true + x-airr: + deprecated: true + deprecated-description: Combined into single age field. + deprecated-replaced-by: + - age + age_unit: + $ref: '#/Ontology' nullable: true x-airr: deprecated: true - deprecated-description: Split into two fields to specify as an age range. + deprecated-description: Combined into single age field. deprecated-replaced-by: - - age_min - - age_max - - age_unit + - age ancestry_population: $ref: '#/Ontology' nullable: true @@ -2207,6 +2430,25 @@ Diagnosis: set: 1 subset: diagnosis and intervention name: Study group description + diagnosis_timepoint: + $ref: '#/TimePoint' + nullable: true + description: Time point for the diagnosis + title: Diagnosis timepoint + example: + label: Study enrollment + value: 60 + unit: + id: UO:0000033 + label: day + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: diagnosis and intervention + name: Diagnosis timepoint + format: time_point + disease_diagnosis: $ref: '#/Ontology' nullable: true @@ -2228,11 +2470,15 @@ Diagnosis: id: DOID:4 label: disease disease_length: - type: string + $ref: '#/TimeQuantity' nullable: true description: Time duration between initial diagnosis and current intervention title: Length of disease - example: 23 months + example: + quantity: 23 + unit: + id: UO:0000035 + label: month x-airr: miairr: important adc-query-support: true @@ -2312,8 +2558,6 @@ Sample: - anatomic_site - disease_state_sample - collection_time_point_relative - - collection_time_point_relative_unit - - collection_time_point_reference - biomaterial_provider properties: sample_id: @@ -2388,49 +2632,40 @@ Sample: subset: sample name: Disease state of sample collection_time_point_relative: - type: number + $ref: '#/TimePoint' nullable: true - description: Time point at which sample was taken, relative to `Collection time event` + description: Time point at which sample was taken, relative to `label` event title: Sample collection time - example: 14 + example: + label: Primary vaccination + value: 14 + unit: + id: UO:0000033 + label: day x-airr: miairr: important adc-query-support: true set: 2 subset: sample name: Sample collection time + format: time_point collection_time_point_relative_unit: $ref: '#/Ontology' nullable: true - description: Unit of Sample collection time - title: Sample collection time unit - example: - id: UO:0000033 - label: day x-airr: - miairr: important - adc-query-support: true - set: 2 - subset: sample - name: Sample collection time unit - format: ontology - ontology: - draft: false - top_node: - id: UO:0000003 - label: time unit + deprecated: true + deprecated-description: Field has been merged with collection_time_point_relative. + deprecated-replaced-by: + - collection_time_point_relative collection_time_point_reference: type: string nullable: true description: Event in the study schedule to which `Sample collection time` relates to - title: Collection time event - example: Primary vaccination x-airr: - miairr: important - adc-query-support: true - set: 2 - subset: sample - name: Collection time event + deprecated: true + deprecated-description: Field has been merged with collection_time_point_relative. + deprecated-replaced-by: + - collection_time_point_relative collection_location: $ref: '#/Ontology' nullable: true @@ -2513,7 +2748,8 @@ CellProcessing: cell_phenotype: type: string nullable: true - description: List of cellular markers and their expression levels used to isolate the cell population + description: > + List of cellular markers and their expression levels used to isolate the cell population. title: Cell subset phenotype example: CD19+ CD38+ CD27+ IgM- IgD- x-airr: @@ -2522,6 +2758,20 @@ CellProcessing: set: 3 subset: process (cell) name: Cell subset phenotype + cell_label: + type: string + nullable: true + description: > + Free text cell type annotation. Primarily used for annotating cell types that are not + provided in the Cell Ontology. + title: Cell annotation + example: age-associated B cell + x-airr: + miairr: defined + adc-query-support: true + set: 3 + subset: process (cell) + name: Cell annotation cell_species: $ref: '#/Ontology' nullable: true @@ -2700,7 +2950,6 @@ NucleicAcidProcessing: - template_class - template_quality - template_amount - - template_amount_unit - library_generation_method - library_generation_protocol - library_generation_kit_version @@ -2737,37 +2986,30 @@ NucleicAcidProcessing: subset: process (nucleic acid) name: Target substrate quality template_amount: - type: number + $ref: '#/PhysicalQuantity' nullable: true description: Amount of template that went into the process title: Template amount - example: 1000 + example: + quantity: 1000 + unit: + id: UO:0000024 + label: nanogram x-airr: miairr: important adc-query-support: true set: 3 subset: process (nucleic acid) name: Template amount + format: physical_quantity template_amount_unit: $ref: '#/Ontology' nullable: true - description: Unit of template amount - title: Template amount time unit - example: - id: UO:0000024 - label: nanogram x-airr: - miairr: important - adc-query-support: true - set: 3 - subset: process (nucleic acid) - name: Template amount time unit - format: ontology - ontology: - draft: false - top_node: - id: UO:0000002 - label: physical quantity + deprecated: true + deprecated-description: Field has been merged with template_amount. + deprecated-replaced-by: + - template_amount library_generation_method: type: string enum: @@ -4701,7 +4943,6 @@ Cell: type: object required: - cell_id - - rearrangements - repertoire_id - virtual_pairing properties: @@ -4709,40 +4950,15 @@ Cell: type: string nullable: false description: > - Identifier defining the cell of origin for the query sequence. + Identifier for the Cell object. This identifier must be unique within + a given study, but it is recommended that it be a universally unique record locator + to enable database applications. title: Cell index - example: W06_046_091 x-airr: identifier: true miairr: defined adc-query-support: true name: Cell index - rearrangements: - type: array - nullable: true - description: > - Array of sequence identifiers defined for the Rearrangement object - title: Cell-associated rearrangements - items: - type: string - example: [id1, id2] #empty vs NULL? - x-airr: - miairr: defined - adc-query-support: true - name: Cell-associated rearrangements - receptors: - type: array - nullable: true - description: > - Array of receptor identifiers defined for the Receptor object - title: Cell-associated receptors - items: - type: string - example: [id1, id2] #empty vs NULL? - x-airr: - miairr: defined - adc-query-support: true - name: Cell-associated receptors repertoire_id: type: string nullable: true @@ -4755,40 +4971,66 @@ Cell: data_processing_id: type: string nullable: true - description: Identifier of the data processing object in the repertoire metadata for this clone. + description: Identifier of the data processing object in the repertoire metadata for this cell. title: Data processing for cell x-airr: miairr: defined adc-query-support: true name: Data processing for cell - expression_study_method: - type: string - enum: - - flow_cytometry - - single-cell_transcriptome - - null + receptors: + type: array nullable: true description: > - Keyword describing the methodology used to assess expression. This values for this field MUST - come from a controlled vocabulary. + Array of receptor identifiers defined for the Receptor objects associated with this cell + title: Cell-associated receptors + items: + type: string + example: [id1, id2] x-airr: miairr: defined adc-query-support: true - expression_raw_doi: + name: Cell-associated receptors + cell_subset: + $ref: '#/Ontology' + nullable: true + description: Commonly-used designation of isolated cell population. + title: Cell subset + example: + id: CL:0000972 + label: class switched memory B cell + x-airr: + miairr: important + adc-query-support: true + name: Cell subset + format: ontology + ontology: + draft: false + top_node: + id: CL:0000542 + label: lymphocyte + cell_phenotype: type: string nullable: true description: > - DOI of raw data set containing the current event + List of cellular markers and their expression levels used to isolate the cell population. + title: Cell subset phenotype + example: CD19+ CD38+ CD27+ IgM- IgD- x-airr: miairr: defined adc-query-support: true - expression_index: + name: Cell subset phenotype + cell_label: type: string nullable: true description: > - Index addressing the current event within the raw data set. + Free text cell type annotation. Primarily used for annotating cell types that are not + provided in the Cell Ontology. + title: Cell annotation + example: age-associated B cell x-airr: miairr: defined + adc-query-support: true + name: Cell annotation virtual_pairing: type: boolean nullable: true @@ -4800,11 +5042,10 @@ Cell: adc-query-support: true name: Virtual pairing -# The CellExpression object acts as a container to hold a single expression level measurement from +# The Expression object acts as a container to hold a single expression level measurement from # an experiment. Expression data is associated with a cell_id and the related repertoire_id and -# data_processing_id as cell_id is not guaranteed to be unique outside the data processing for -# a single repertoire. -CellExpression: +# data_processing_id. +Expression: type: object required: - expression_id @@ -4818,7 +5059,9 @@ CellExpression: expression_id: type: string description: > - Identifier of this expression property measurement. + Identifier for the Expression object. This identifier must be unique within + a given study, but it is recommended that it be a universally unique record locator + to enable database applications. title: Expression property measurement identifier nullable: false x-airr: @@ -4848,7 +5091,7 @@ CellExpression: name: Parental repertoire of cell data_processing_id: type: string - description: Identifier of the data processing object in the repertoire metadata for this clone. + description: Identifier of the data processing object in the repertoire metadata for this cell. title: Data processing for cell nullable: true x-airr: @@ -4897,7 +5140,7 @@ CellExpression: name: Property value -# The Receptor object hold information about a receptor and its reactivity. +# The Receptor object holds information about a receptor (immunoglobulin or TCR) # Receptor: type: object @@ -4913,7 +5156,10 @@ Receptor: receptor_id: type: string nullable: false - description: ID of the current Receptor object, unique within the local repository. + description: > + Identifier for the Receptor object. This identifier must be unique within + a given study, but it is recommended that it be a universally unique record locator + to enable database applications. title: Receptor ID example: TCR-MM-012345 x-airr: @@ -4937,6 +5183,7 @@ Receptor: - TCR description: The top-level receptor type, either Immunoglobulin (Ig) or T Cell Receptor (TCR). x-airr: + format: controlled_vocabulary adc-query-support: true receptor_variable_domain_1_aa: type: string @@ -4959,6 +5206,7 @@ Receptor: description: Locus from which the variable domain in receptor_variable_domain_1_aa originates example: IGH x-airr: + format: controlled_vocabulary adc-query-support: true receptor_variable_domain_2_aa: type: string @@ -4983,6 +5231,7 @@ Receptor: description: Locus from which the variable domain in receptor_variable_domain_2_aa originates example: IGL x-airr: + format: controlled_vocabulary adc-query-support: true receptor_ref: type: array @@ -4994,17 +5243,12 @@ Receptor: example: ["IEDB_RECEPTOR:10"] x-airr: adc-query-support: true - reactivity_measurements: - type: array - nullable: true - description: Records of reactivity measurement - items: - $ref: '#/ReceptorReactivity' - -ReceptorReactivity: +Reactivity: type: object required: + - reactivity_id + - cell_id - ligand_type - antigen_type - antigen @@ -5013,6 +5257,43 @@ ReceptorReactivity: - reactivity_value - reactivity_unit properties: + reactivity_id: + type: string + nullable: false + description: > + Identifier for the Reactivity object. This identifier must be unique within + a given study, but it is recommended that it be a universally unique record locator + to enable database applications. + title: Reactivity ID + x-airr: + identifier: true + adc-query-support: true + cell_id: + type: string + nullable: false + description: > + Identifier of the Cell in the context of which the reactivity measurement was conducted. + title: Cell ID + x-airr: + adc-query-support: true + repertoire_id: + type: string + description: Identifier for the associated repertoire in study metadata. + title: Parental repertoire of cell + nullable: true + x-airr: + miairr: defined + adc-query-support: true + name: Parental repertoire of cell + data_processing_id: + type: string + description: Identifier of the data processing object in the repertoire metadata for this cell. + title: Data processing for cell + nullable: true + x-airr: + miairr: defined + adc-query-support: true + name: Data processing for cell ligand_type: type: string nullable: false @@ -5022,8 +5303,11 @@ ReceptorReactivity: - protein - peptide - non-peptidic - description: Classification of ligand binding to receptor + description: Classification of ligand binding to the cell example: non-peptide + x-airr: + format: controlled_vocabulary + adc-query-support: true antigen_type: type: string nullable: false @@ -5034,6 +5318,9 @@ ReceptorReactivity: description: > The type of antigen before processing by the immune system. example: protein + x-airr: + format: controlled_vocabulary + adc-query-support: true antigen: $ref: '#/Ontology' nullable: false @@ -5057,6 +5344,7 @@ ReceptorReactivity: id: NCBITAXON:5843 label: Plasmodium falciparum NF54 x-airr: + adc-query-support: true format: ontology ontology: draft: true @@ -5071,6 +5359,14 @@ ReceptorReactivity: type: integer nullable: true description: End position of the peptide within the reference protein sequence + peptide_sequence_aa: + type: string + nullable: true + description: > + The actual peptide sequence against which the receptor reactivity was measured. This field should be + used as a convenience for antigens of antigen_type `protein` or `peptide`. + x-airr: + adc-query-support: true mhc_class: type: string nullable: true @@ -5081,6 +5377,9 @@ ReceptorReactivity: - null description: Class of MHC molecule, only present for MHC:x ligand types example: MHC-II + x-airr: + format: controlled_vocabulary + adc-query-support: true mhc_gene_1: $ref: '#/Ontology' nullable: true @@ -5090,6 +5389,7 @@ ReceptorReactivity: id: MRO:0000055 label: HLA-DRA x-airr: + adc-query-support: true format: ontology ontology: draft: true @@ -5100,7 +5400,9 @@ ReceptorReactivity: type: string nullable: true description: Allele designation of the MHC alpha chain - example: HLA-DRA + example: HLA-DRA*01:01 + x-airr: + adc-query-support: true mhc_gene_2: $ref: '#/Ontology' nullable: true @@ -5110,6 +5412,7 @@ ReceptorReactivity: id: MRO:0000057 label: HLA-DRB1 x-airr: + adc-query-support: true format: ontology ontology: draft: true @@ -5122,35 +5425,40 @@ ReceptorReactivity: description: > Allele designation of the MHC class II beta chain or the invariant beta2-microglobin chain example: HLA-DRB1*04:01 + x-airr: + adc-query-support: true reactivity_method: type: string nullable: false enum: - - SPR - - ITC - - ELISA - - cytometry - - biological_activity + - native_protein + - MHC_peptide_multimer description: The methodology used to assess expression (assay implemented in experiment) + x-airr: + adc-query-support: true + format: controlled_vocabulary reactivity_readout: type: string nullable: false enum: - - binding_strength - - cytokine_release - - dissociation_constant_kd - - on_rate - - off_rate - - pathogen_inhibition + - fluorescence_intensity + - barcode_count description: Reactivity measurement read-out - example: cytokine release + example: barcode_count + x-airr: + adc-query-support: true + format: controlled_vocabulary reactivity_value: type: number nullable: false description: The absolute (processed) value of the measurement - example: 162.26 + example: 162 + x-airr: + adc-query-support: true reactivity_unit: type: string nullable: false description: The unit of the measurement - example: pg/ml + example: read count + x-airr: + adc-query-support: true diff --git a/lang/R/tests/data-tests/extra_rearrangement.tsv b/lang/R/tests/data-tests/extra_rearrangement.tsv new file mode 100644 index 000000000..8bedb960f --- /dev/null +++ b/lang/R/tests/data-tests/extra_rearrangement.tsv @@ -0,0 +1,2 @@ +sequence_id sequence rev_comp productive v_call d_call j_call sequence_alignment germline_alignment junction junction junction_aa v_cigar d_cigar j_cigar +1 2 F F 5 6 7 8 9 10 11 12 13 14 15 not_in_header not_in diff --git a/lang/R/tests/data-tests/good_rearrangement.tsv b/lang/R/tests/data-tests/good_rearrangement.tsv index c233d8412..e8521767d 100644 --- a/lang/R/tests/data-tests/good_rearrangement.tsv +++ b/lang/R/tests/data-tests/good_rearrangement.tsv @@ -1,10 +1,10 @@ rearrangement_id rearrangement_set_id sequence_id sequence rev_comp productive sequence_alignment germline_alignment v_call d_call j_call c_call junction junction_length junction_aa v_score d_score j_score c_score v_cigar d_cigar j_cigar c_cigar v_identity v_evalue d_identity d_evalue j_identity j_evalue v_sequence_start v_sequence_end v_germline_start v_germline_end d_sequence_start d_sequence_end d_germline_start d_germline_end j_sequence_start j_sequence_end j_germline_start j_germline_end np1_length np2_length duplicate_count -IVKNQEJ01BVGQ6 1 IVKNQEJ01BVGQ6 GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T T IGHV4-31*03 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 430 16.4 75.8 22N1S275= 11N280S8= 6N292S32=1X9= 1 1E-122 1 2.7 0.9762 6E-18 1 275 1 317 280 287 11 18 292 333 6 47 4 4 1247 -IVKNQEJ01AQVWS 1 IVKNQEJ01AQVWS GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCAACTACAACCCCTCCCTCAAGAGTCGAGTCACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T T IGHV4-31*03 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 420 16.4 83.8 22N1S156=1X10=1X17=1X89= 11N280S8= 6N292S42= 0.9891 8E-120 1 2.7 1 2E-20 1 275 1 317 280 287 11 18 292 333 6 47 4 4 4 -IVKNQEJ01AOYFZ 1 IVKNQEJ01AOYFZ GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGTAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T F IGHV4-31*03 IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGTAACTTTTGACTACTGG 37 CASGVAGNF*LLX 430 20.4 83.8 22N1S275= 11N280S10= 6N293S42= 1 1E-122 1 0.17 1 2E-20 1 275 1 317 280 289 11 20 293 334 6 47 4 3 92 -IVKNQEJ01EI5S4 1 IVKNQEJ01EI5S4 GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T T IGHV4-31*03 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 430 16.4 83.8 22N1S275= 11N280S8= 6N292S42= 1 1E-122 1 2.7 1 2E-20 1 275 1 317 280 287 11 18 292 333 6 47 4 4 2913 -IVKNQEJ01DGRRI 1 IVKNQEJ01DGRRI GGCCCAGGACTGGTGAAGCCTTCGGAGACCCTGTCCCTCACCTGCGCTGTCTATGGTGGGTCCTTCAGTGGTTACTACTGGAGCTGGATCCGCCAGCCCCCAGGGAAGGGTCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T T IGHV4-34*09 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 389 16.4 83.8 22N1S23=2X85=1X15=1X1=1X3=1X2=1X1=1X5=1X6=1X118= 11N274S8= 6N286S42= 0.9628 2E-110 1 2.6 1 2E-20 1 269 1 317 274 281 11 18 286 327 6 47 4 4 1 -IVKNQEJ01APN5N 1 IVKNQEJ01APN5N GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTAGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T F IGHV4-31*03 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTAG 36 CASGVAGTFDY* 430 16.4 67.9 22N1S275= 11N280S8= 6N292S10=1X21=1X9= 1 1E-122 1 2.7 0.9524 1E-15 1 275 1 317 280 287 11 18 292 333 6 47 4 4 1 -IVKNQEJ01B0TT2 1 IVKNQEJ01B0TT2 GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGTAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T F IGHV4-31*03 IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGTAACTTTTGACTACTGG 37 CASGVAGNF*LLX 430 20.4 75.8 22N1S275= 11N280S10= 6N293S32=1X9= 1 1E-122 1 0.17 0.9762 6E-18 1 275 1 317 280 289 11 20 293 334 6 47 4 3 30 -IVKNQEJ01AIS74 1 IVKNQEJ01AIS74 GGCGCAGGACTGTTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGGCGGGGTGGCTGGTAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T F IGHV4-31*03 IGHD6-19*01 IGHJ4*02 TGTGCGAGGCGGGGTGGCTGGTAACTTTTGACTACTGG 38 CARRGGW*LLTTG 424 20.4 83.8 22N1S3=1X8=1X262= 11N281S10= 6N294S42= 0.9927 9E-121 1 0.17 1 2E-20 1 275 1 317 281 290 11 20 294 335 6 47 5 3 4 -IVKNQEJ01AJ44V 1 IVKNQEJ01AJ44V GGCCCAGGACTGGTGAAGCCTTCGGAGACCCTGTCCCTCACCTGCGCTGTCTATGGTGGGTCCTTCAGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T T IGHV4-59*06 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 386 16.4 75.8 22N1S45=1X5=2X6=1X3=1X5=1X22=1X4=1X1=1X1=1X165= 11N274S8= 6N286S32=1X9= 0.9625 2E-109 1 2.6 0.9762 5E-18 1 267 1 315 274 281 11 18 286 327 6 47 6 4 12 +IVKNQEJ01BVGQ6 1 IVKNQEJ01BVGQ6 GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T T IGHV4-31*03 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 430 16.4 75.8 22N1S275= 11N280S8= 6N292S32=1X9= 1 1E-122 1 2.7 0.9762 6E-18 0 275 0 317 279 287 10 18 291 333 5 47 4 4 1247 +IVKNQEJ01AQVWS 1 IVKNQEJ01AQVWS GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCAACTACAACCCCTCCCTCAAGAGTCGAGTCACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T T IGHV4-31*03 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 420 16.4 83.8 22N1S156=1X10=1X17=1X89= 11N280S8= 6N292S42= 0.9891 8E-120 1 2.7 1 2E-20 0 275 0 317 279 287 10 18 291 333 5 47 4 4 4 +IVKNQEJ01AOYFZ 1 IVKNQEJ01AOYFZ GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGTAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T F IGHV4-31*03 IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGTAACTTTTGACTACTGG 37 CASGVAGNF*LLX 430 20.4 83.8 22N1S275= 11N280S10= 6N293S42= 1 1E-122 1 0.17 1 2E-20 0 275 0 317 279 289 10 20 292 334 5 47 4 3 92 +IVKNQEJ01EI5S4 1 IVKNQEJ01EI5S4 GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T T IGHV4-31*03 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 430 16.4 83.8 22N1S275= 11N280S8= 6N292S42= 1 1E-122 1 2.7 1 2E-20 0 275 0 317 279 287 10 18 291 333 5 47 4 4 2913 +IVKNQEJ01DGRRI 1 IVKNQEJ01DGRRI GGCCCAGGACTGGTGAAGCCTTCGGAGACCCTGTCCCTCACCTGCGCTGTCTATGGTGGGTCCTTCAGTGGTTACTACTGGAGCTGGATCCGCCAGCCCCCAGGGAAGGGTCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T T IGHV4-34*09 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 389 16.4 83.8 22N1S23=2X85=1X15=1X1=1X3=1X2=1X1=1X5=1X6=1X118= 11N274S8= 6N286S42= 0.9628 2E-110 1 2.6 1 2E-20 0 269 0 317 273 281 10 18 285 327 5 47 4 4 1 +IVKNQEJ01APN5N 1 IVKNQEJ01APN5N GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTAGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T F IGHV4-31*03 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTAG 36 CASGVAGTFDY* 430 16.4 67.9 22N1S275= 11N280S8= 6N292S10=1X21=1X9= 1 1E-122 1 2.7 0.9524 1E-15 0 275 0 317 279 287 10 18 291 333 5 47 4 4 1 +IVKNQEJ01B0TT2 1 IVKNQEJ01B0TT2 GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGTAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T F IGHV4-31*03 IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGTAACTTTTGACTACTGG 37 CASGVAGNF*LLX 430 20.4 75.8 22N1S275= 11N280S10= 6N293S32=1X9= 1 1E-122 1 0.17 0.9762 6E-18 0 275 0 317 279 289 10 20 292 334 5 47 4 3 30 +IVKNQEJ01AIS74 1 IVKNQEJ01AIS74 GGCGCAGGACTGTTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGGCGGGGTGGCTGGTAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T F IGHV4-31*03 IGHD6-19*01 IGHJ4*02 TGTGCGAGGCGGGGTGGCTGGTAACTTTTGACTACTGG 38 CARRGGW*LLTTG 424 20.4 83.8 22N1S3=1X8=1X262= 11N281S10= 6N294S42= 0.9927 9E-121 1 0.17 1 2E-20 0 275 0 317 280 290 10 20 293 335 5 47 5 3 4 +IVKNQEJ01AJ44V 1 IVKNQEJ01AJ44V GGCCCAGGACTGGTGAAGCCTTCGGAGACCCTGTCCCTCACCTGCGCTGTCTATGGTGGGTCCTTCAGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T T IGHV4-59*06 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 386 16.4 75.8 22N1S45=1X5=2X6=1X3=1X5=1X22=1X4=1X1=1X1=1X165= 11N274S8= 6N286S32=1X9= 0.9625 2E-109 1 2.6 0.9762 5E-18 0 267 0 315 273 281 10 18 285 327 5 47 6 4 12 diff --git a/lang/R/tests/data-tests/good_rearrangement.tsv.gz b/lang/R/tests/data-tests/good_rearrangement.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..c838c584d5e10ba715dd1ebb7fbe1925d847ae23 GIT binary patch literal 1106 zcmV-Y1g-lYiwFpPH)>`817~kil^8C_U(5LsjD3)P1|fK78v5?Paa;Z9}W*y zW=UBl#WK6iitWuRC1p>z&#ZW#{k6-AMb<4t&xf1E-R*{K%DZ&8*sdP3wqvqhEejli zEVJ@7m`r2CukkVainz=8?B9wBB&b^N1#Q}Vzj{&>QIa$TQ{z&hC&=c6X@D0#y}4{ z5{Q^W4;|YcZWbILXS6+R2S_xSKu;5)xs|oEaHB%%$6yC+ zfcDs$*c}@^w_e(TW0GG_$%{b zxXvDd>+6U*n2YeBMrwLC`1tYK`YGo1ndzK|^6`kX2|OGgR+Uk+hagpswLny?DcPKT zk|`vNQg`;rE^snzfa3rht$|ZM;VkHcR-Lo&)6}2#&OR%=JFXWn7d;=VKX&weCv(w0 zbA#4#$zT*d<+yB^#puz1-DkpT{)+&Qe;)QT@g@_e>GFVs0k=GCf5OLoT1*c!{x_2(bya$vOUOmA2nNa#c zAEtbON4{EsB@aGK3EzfM%!X0C3aJ=+CsD;%_L2T!eAW0c8)Nx4#)9C`#aNXe^J_mQ Y152I%RG=m8H>?l;0K5(Ux-J(00KYL1`v3p{ literal 0 HcmV?d00001 diff --git a/lang/R/tests/data-tests/good_repertoire.yaml b/lang/R/tests/data-tests/good_repertoire.yaml index ce83f55fe..7ac0ae578 100644 --- a/lang/R/tests/data-tests/good_repertoire.yaml +++ b/lang/R/tests/data-tests/good_repertoire.yaml @@ -383,7 +383,7 @@ Repertoire: link_type: twin diagnosis: - study_group_description: null - diagnosis_timepoint: + diagnosis_timepoint: label: Study enrollment value: 60 unit: diff --git a/lang/js/airr-schema-openapi3.yaml b/lang/js/airr-schema-openapi3.yaml index e833ff937..ff0e5753c 100644 --- a/lang/js/airr-schema-openapi3.yaml +++ b/lang/js/airr-schema-openapi3.yaml @@ -341,6 +341,138 @@ Attributes: type: string description: Ontology name for the top node term + +# FileObject +FileObject: + type: object + properties: + format: + type: string + description: The file format for the file (e.g. json, tsv). + nullable: true + compression: + type: string + description: The type of compression (if any) used for the file (e.g. zip, gz, bz2). + nullable: true + filename: + type: string + description: The name of the file + nullable: true + checksum: + type: string + description: MD5 checksum of the file. + nullable: true + version: + type: string + description: Version stamp for the file (if any). + nullable: true + +# AIRR DataSet specification +# An AIRR DataSet consists of tags for types of AIRR Schema Objects and a set of files that hold data in the AIRR format +# appropriate for that AIRR schema object. + +DataSet: + type: object + properties: + name: + type: string + description: A human readable name for the data set. + nullable: true + description: + type: string + description: A description of the data set. + nullable: true + repository_url: + type: string + description: The URL of the repository from which the data set came from (if the data came from a repository) + nullable: true + files: + type: object + description: A set of files based on data type, where the key for each object denotes the type of data and the accompanying array of files (and related file metadata) contain the data of that type. All top level AIRR objects (e.g. Repertoire, Rearrangement, Clone, Cell, Expression, Reactivity, Receptor, RepertoireGroup) are allowed. Custom data types are also permitted, so the manifest can include data that is outside of the AIRR specification. + nullable: true + properties: + Repertoire: + type: array + items: + $ref: '#/FileObject' + nullable: true + Rearrangement: + type: array + items: + $ref: '#/FileObject' + nullable: true + Clone: + type: array + items: + $ref: '#/FileObject' + nullable: true + Cell: + type: array + items: + $ref: '#/FileObject' + nullable: true + Expression: + type: array + items: + $ref: '#/FileObject' + nullable: true + Reactivity: + type: array + items: + $ref: '#/FileObject' + nullable: true + Receptor: + type: array + items: + $ref: '#/FileObject' + nullable: true + GermlineSet: + type: array + items: + $ref: '#/FileObject' + nullable: true + GenotypeSet: + type: array + items: + $ref: '#/FileObject' + nullable: true + RepertoireGroup: + type: array + items: + $ref: '#/FileObject' + nullable: true + ADCQuery: + type: array + items: + $ref: '#/FileObject' + nullable: true + + +# AIRR Manifest specification. +# An AIRR Manifest contains metadata for a group of accompanying files that are part of a set or coherent unit. + +Manifest: + type: object + properties: + name: + type: string + description: A human readable name for the data manifest. + nullable: false + description: + type: string + description: A description of the purpose for this data manifest + nullable: true + created_at: + type: string + description: Time/data stamp when the dat manifest was created. + nullable: true + data_sets: + type: array + items: + $ref: '#/DataSet' + nullable: true + + # AIRR Data File # # A JSON data file that holds Repertoire metadata, data processing @@ -402,7 +534,13 @@ DataFile: nullable: false description: List of genotype sets items: - $ref: '#/GenotypeSet' + $ref: '#/GenotypeSet' + Manifest: + type: array + nullable: false + description: List of data set manifests + items: + $ref: '#/Manifest' # AIRR Info object, should be similar to openapi # should we point to an openapi schema? @@ -2610,7 +2748,8 @@ CellProcessing: cell_phenotype: type: string nullable: true - description: List of cellular markers and their expression levels used to isolate the cell population + description: > + List of cellular markers and their expression levels used to isolate the cell population. title: Cell subset phenotype example: CD19+ CD38+ CD27+ IgM- IgD- x-airr: @@ -2619,6 +2758,20 @@ CellProcessing: set: 3 subset: process (cell) name: Cell subset phenotype + cell_label: + type: string + nullable: true + description: > + Free text cell type annotation. Primarily used for annotating cell types that are not + provided in the Cell Ontology. + title: Cell annotation + example: age-associated B cell + x-airr: + miairr: defined + adc-query-support: true + set: 3 + subset: process (cell) + name: Cell annotation cell_species: $ref: '#/Ontology' nullable: true @@ -4790,7 +4943,6 @@ Cell: type: object required: - cell_id - - rearrangements - repertoire_id - virtual_pairing properties: @@ -4807,32 +4959,6 @@ Cell: miairr: defined adc-query-support: true name: Cell index - rearrangements: - type: array - nullable: true - description: > - Array of sequence identifiers defined for the Rearrangement objects associated with this cell - title: Cell-associated rearrangements - items: - type: string - example: [id1, id2] - x-airr: - miairr: defined - adc-query-support: true - name: Cell-associated rearrangements - receptors: - type: array - nullable: true - description: > - Array of receptor identifiers defined for the Receptor objects associated with this cell - title: Cell-associated receptors - items: - type: string - example: [id1, id2] - x-airr: - miairr: defined - adc-query-support: true - name: Cell-associated receptors repertoire_id: type: string nullable: true @@ -4851,34 +4977,60 @@ Cell: miairr: defined adc-query-support: true name: Data processing for cell - expression_study_method: - type: string - enum: - - flow_cytometry - - single-cell_transcriptome - - null + receptors: + type: array nullable: true description: > - Keyword describing the methodology used to assess expression. This values for this field MUST - come from a controlled vocabulary. + Array of receptor identifiers defined for the Receptor objects associated with this cell + title: Cell-associated receptors + items: + type: string + example: [id1, id2] x-airr: miairr: defined adc-query-support: true - expression_raw_doi: + name: Cell-associated receptors + cell_subset: + $ref: '#/Ontology' + nullable: true + description: Commonly-used designation of isolated cell population. + title: Cell subset + example: + id: CL:0000972 + label: class switched memory B cell + x-airr: + miairr: important + adc-query-support: true + name: Cell subset + format: ontology + ontology: + draft: false + top_node: + id: CL:0000542 + label: lymphocyte + cell_phenotype: type: string nullable: true description: > - DOI of raw data set containing the current event + List of cellular markers and their expression levels used to isolate the cell population. + title: Cell subset phenotype + example: CD19+ CD38+ CD27+ IgM- IgD- x-airr: miairr: defined adc-query-support: true - expression_index: + name: Cell subset phenotype + cell_label: type: string nullable: true description: > - Index addressing the current event within the raw data set. + Free text cell type annotation. Primarily used for annotating cell types that are not + provided in the Cell Ontology. + title: Cell annotation + example: age-associated B cell x-airr: miairr: defined + adc-query-support: true + name: Cell annotation virtual_pairing: type: boolean nullable: true @@ -4890,10 +5042,10 @@ Cell: adc-query-support: true name: Virtual pairing -# The CellExpression object acts as a container to hold a single expression level measurement from +# The Expression object acts as a container to hold a single expression level measurement from # an experiment. Expression data is associated with a cell_id and the related repertoire_id and # data_processing_id. -CellExpression: +Expression: type: object required: - expression_id @@ -4907,7 +5059,7 @@ CellExpression: expression_id: type: string description: > - Identifier for the CellExpression object. This identifier must be unique within + Identifier for the Expression object. This identifier must be unique within a given study, but it is recommended that it be a universally unique record locator to enable database applications. title: Expression property measurement identifier @@ -5092,10 +5244,10 @@ Receptor: x-airr: adc-query-support: true -CellReactivity: +Reactivity: type: object required: - - cell_reactivity_id + - reactivity_id - cell_id - ligand_type - antigen_type @@ -5105,14 +5257,14 @@ CellReactivity: - reactivity_value - reactivity_unit properties: - cell_reactivity_id: + reactivity_id: type: string nullable: false description: > - Identifier for the CellReactivity object. This identifier must be unique within + Identifier for the Reactivity object. This identifier must be unique within a given study, but it is recommended that it be a universally unique record locator to enable database applications. - title: CellReactivity ID + title: Reactivity ID x-airr: identifier: true adc-query-support: true diff --git a/lang/js/tests/data/bad_genotype_set.json b/lang/js/tests/data/bad_genotype_set.json index c58a39027..01709d60a 100644 --- a/lang/js/tests/data/bad_genotype_set.json +++ b/lang/js/tests/data/bad_genotype_set.json @@ -41,4 +41,4 @@ } ] }] -} \ No newline at end of file +} diff --git a/lang/js/tests/data/bad_germline_set.json b/lang/js/tests/data/bad_germline_set.json index f221dcf9e..28531aabb 100644 --- a/lang/js/tests/data/bad_germline_set.json +++ b/lang/js/tests/data/bad_germline_set.json @@ -1,27 +1,71 @@ { "GermlineSet": [{ "germline_set_id": "OGRDB:G00007", - "author": "William Lees", - "lab_name": "", - "lab_address": "Birkbeck College, University of London, Malet Street, London", + "acknowledgements": [ + { + "contributor_id": "1", + "name": "William Lees", + "orcid_id": { + "id": "ORCID:0000-0001-9834-6840", + "label": "William Lees" + }, + "affiliation": { + "id": "ROR:02mb95055", + "label": "Birkbeck, University of London" + }, + "affiliation_department":"", + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + } + ], "release_version": 1, "release_description": "", "release_date": "2021-11-24", "germline_set_name": "CAST IGH", "germline_set_ref": "OGRDB:G00007.1", - "pub_ids": "", - "species": ["Mouse"], + "pub_ids": [""], + "species": "Mouse", "species_subgroup": "CAST_EiJ", "species_subgroup_type": "strain", "locus": "IGH", "allele_descriptions": [ { "allele_description_id": "OGRDB:A00301", - "maintainer": "William Lees", - "acknowledgements": [], - "lab_address": "Birkbeck College, University of London, Malet Street, London", + "acknowledgements": [ + { + "contributor_id": "1", + "name": "William Lees", + "orcid_id": { + "id": "ORCID:0000-0001-9834-6840", + "label": "William Lees" + }, + "affiliation": { + "id": "ROR:02mb95055", + "label": "Birkbeck, University of London" + }, + "affiliation_department":"", + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + } + ], "release_version": 1, - "release_date": "24-Nov-2021", + "release_date": "2021-11-24", "release_description": "First release", "label": "IGHV-2DBF", "sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGATGGTAGTGGCACCTACTATCTGGACTCCTTGAAGAGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", @@ -33,7 +77,7 @@ "chromosome": null, "sequence_type": "V", "functional": true, - "inference_type": "Rearranged only", + "inference_type": "rearranged_only", "species": "Mouse", "species_subgroup": "CAST_EiJ", "species_subgroup_type": "strain", @@ -66,7 +110,7 @@ "fwr3_start": 196, "fwr3_end": 312, "cdr3_start": 313, - "alignment": [ + "alignment_labels": [ "1", "2", "3", @@ -182,11 +226,33 @@ }, { "allele_description_id": "OGRDB:A00314", - "maintainer": "William Lees", - "acknowledgements": [], - "lab_address": "Birkbeck College, University of London, Malet Street, London", + "acknowledgements": [ + { + "contributor_id": "1", + "name": "William Lees", + "orcid_id": { + "id": "ORCID:0000-0001-9834-6840", + "label": "William Lees" + }, + "affiliation": { + "id": "ROR:02mb95055", + "label": "Birkbeck, University of London" + }, + "affiliation_department":"", + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + } + ], "release_version": 1, - "release_date": "24-Nov-2021", + "release_date": "2021-11-24", "release_description": "First release", "label": "IGHV-2ETO", "sequence": "CAAGTTACTCTAAAAGAGTCTGGCCCTGGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGATGATGATAAGTACTATAACCCATCCCTGAAGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC", @@ -198,7 +264,7 @@ "chromosome": null, "sequence_type": "V", "functional": true, - "inference_type": "Rearranged only", + "inference_type": "rearranged_only", "species": "Mouse", "species_subgroup": "CAST_EiJ", "species_subgroup_type": "strain", @@ -231,7 +297,7 @@ "fwr3_start": 196, "fwr3_end": 312, "cdr3_start": 313, - "alignment": [ + "alignment_labels": [ "1", "2", "3", @@ -348,4 +414,4 @@ ], "notes": "" }] -} \ No newline at end of file +} diff --git a/lang/js/tests/data/bad_rearrangement.tsv b/lang/js/tests/data/bad_rearrangement.tsv index d12fc79fe..9849c2c04 100644 --- a/lang/js/tests/data/bad_rearrangement.tsv +++ b/lang/js/tests/data/bad_rearrangement.tsv @@ -1,10 +1,12 @@ rearrangement_id rearrangement_set_id sequence_id wrong_name rev_comp productive sequence_alignment germline_alignment v_call d_call j_call c_call junction junction_length junction_aa v_score d_score j_score c_score v_cigar d_cigar j_cigar c_cigar v_identity v_evalue d_identity d_evalue j_identity j_evalue v_sequence_start v_sequence_end v_germline_start v_germline_end d_sequence_start d_sequence_end d_germline_start d_germline_end j_sequence_start j_sequence_end j_germline_start j_germline_end np1_length np2_length duplicate_count -IVKNQEJ01BVGQ6 1 IVKNQEJ01BVGQ6 GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T T IGHV4-31*03 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 430 16.4 75.8 22N1S275= 11N280S8= 6N292S32=1X9= 1 1E-122 1 2.7 0.9762 6E-18 0 275 0 317 279 287 10 18 291 333 5 47 4 4 1247 -IVKNQEJ01AQVWS 1 IVKNQEJ01AQVWS GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCAACTACAACCCCTCCCTCAAGAGTCGAGTCACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T T IGHV4-31*03 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 420 16.4 83.8 22N1S156=1X10=1X17=1X89= 11N280S8= 6N292S42= 0.9891 8E-120 1 2.7 1 2E-20 0 275 0 317 279 287 10 18 291 333 5 47 4 4 4 -IVKNQEJ01AOYFZ 1 IVKNQEJ01AOYFZ GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGTAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T F IGHV4-31*03 IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGTAACTTTTGACTACTGG 37 CASGVAGNF*LLX 430 20.4 83.8 22N1S275= 11N280S10= 6N293S42= 1 1E-122 1 0.17 1 2E-20 0 275 0 317 279 289 10 20 292 334 5 47 4 3 92 -IVKNQEJ01EI5S4 1 IVKNQEJ01EI5S4 GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T T IGHV4-31*03 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 430 16.4 83.8 22N1S275= 11N280S8= 6N292S42= 1 1E-122 1 2.7 1 2E-20 0 275 0 317 279 287 10 18 291 333 5 47 4 4 2913 -IVKNQEJ01DGRRI 1 IVKNQEJ01DGRRI GGCCCAGGACTGGTGAAGCCTTCGGAGACCCTGTCCCTCACCTGCGCTGTCTATGGTGGGTCCTTCAGTGGTTACTACTGGAGCTGGATCCGCCAGCCCCCAGGGAAGGGTCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T T IGHV4-34*09 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 389 16.4 83.8 22N1S23=2X85=1X15=1X1=1X3=1X2=1X1=1X5=1X6=1X118= 11N274S8= 6N286S42= 0.9628 2E-110 1 2.6 1 2E-20 0 269 0 317 273 281 10 18 285 327 5 47 4 4 1 -IVKNQEJ01APN5N 1 IVKNQEJ01APN5N GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTAGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T F IGHV4-31*03 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTAG 36 CASGVAGTFDY* 430 16.4 67.9 22N1S275= 11N280S8= 6N292S10=1X21=1X9= 1 1E-122 1 2.7 0.9524 1E-15 0 275 0 317 279 287 10 18 291 333 5 47 4 4 1 -IVKNQEJ01B0TT2 1 IVKNQEJ01B0TT2 GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGTAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T F IGHV4-31*03 IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGTAACTTTTGACTACTGG 37 CASGVAGNF*LLX 430 20.4 75.8 22N1S275= 11N280S10= 6N293S32=1X9= 1 1E-122 1 0.17 0.9762 6E-18 0 275 0 317 279 289 10 20 292 334 5 47 4 3 30 -IVKNQEJ01AIS74 1 IVKNQEJ01AIS74 GGCGCAGGACTGTTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGGCGGGGTGGCTGGTAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T F IGHV4-31*03 IGHD6-19*01 IGHJ4*02 TGTGCGAGGCGGGGTGGCTGGTAACTTTTGACTACTGG 38 CARRGGW*LLTTG 424 20.4 83.8 22N1S3=1X8=1X262= 11N281S10= 6N294S42= 0.9927 9E-121 1 0.17 1 2E-20 0 275 0 317 280 290 10 20 293 335 5 47 5 3 4 -IVKNQEJ01AJ44V 1 IVKNQEJ01AJ44V GGCCCAGGACTGGTGAAGCCTTCGGAGACCCTGTCCCTCACCTGCGCTGTCTATGGTGGGTCCTTCAGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T T IGHV4-59*06 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 386 16.4 75.8 22N1S45=1X5=2X6=1X3=1X5=1X22=1X4=1X1=1X1=1X165= 11N274S8= 6N286S32=1X9= 0.9625 2E-109 1 2.6 0.9762 5E-18 0 267 0 315 273 281 10 18 285 327 5 47 6 4 12 +IVKNQEJ01BVGQ6 1 IVKNQEJ01BVGQ6 GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T yes IGHV4-31*03 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 430 16.4 75.8 22N1S275= 11N280S8= 6N292S32=1X9= 1 1E-122 1 2.7 0.9762 6E-18 1 275 1 317 280 287 11 18 292 333 6 47 4 4 1247 +IVKNQEJ01AQVWS 1 IVKNQEJ01AQVWS GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCAACTACAACCCCTCCCTCAAGAGTCGAGTCACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T T IGHV4-31*03 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 420 16.4 83.8 22N1S156=1X10=1X17=1X89= 11N280S8= 6N292S42= 0.9891 8E-120 1 2.7 1 2E-20 1 275 1 317 280 287 11 18 292 333 6 47 4 4 4 +IVKNQEJ01AOYFZ 1 IVKNQEJ01AOYFZ GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGTAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T F IGHV4-31*03 IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGTAACTTTTGACTACTGG 37 CASGVAGNF*LLX 430 20.4 83.8 22N1S275= 11N280S10= 6N293S42= 1 1E-122 1 0.17 1 2E-20 1 275 1 317 280 289 11 20 293 334 6 47 4 3 92 +IVKNQEJ01EI5S4 1 IVKNQEJ01EI5S4 GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA NA T IGHV4-31*03 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 430 16.4 83.8 22N1S275= 11N280S8= 6N292S42= 1 1E-122 1 2.7 1 2E-20 1 275 1 317 280 287 11 18 292 333 6 47 4 4 2913 +IVKNQEJ01DGRRI 1 IVKNQEJ01DGRRI GGCCCAGGACTGGTGAAGCCTTCGGAGACCCTGTCCCTCACCTGCGCTGTCTATGGTGGGTCCTTCAGTGGTTACTACTGGAGCTGGATCCGCCAGCCCCCAGGGAAGGGTCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T T IGHV4-34*09 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 389 16.4 83.8 22N1S23=2X85=1X15=1X1=1X3=1X2=1X1=1X5=1X6=1X118= 11N274S8= 6N286S42= 0.9628 2E-110 1 2.6 1 2E-20 1 269 1 317 274 281 11 18 286 327 6 47 4 4 1 +IVKNQEJ01APN5N 1 IVKNQEJ01APN5N GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTAGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T F IGHV4-31*03 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTAG 36 CASGVAGTFDY* 430 16.4 67.9 22N1S275= 11N280S8= 6N292S10=1X21=1X9= 1 1E-122 1 2.7 0.9524 1E-15 1 275 1 317 280 287 11 18 292 333 6 47 4 4 1 +IVKNQEJ01B0TT2 1 GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGTAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T F IGHV4-31*03 IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGTAACTTTTGACTACTGG 37 CASGVAGNF*LLX 430 20.4 75.8 22N1S275= 11N280S10= 6N293S32=1X9= 1 1E-122 1 0.17 0.9762 6E-18 1 275 1 317 280 289 11 20 293 334 6 47 4 3 30 +IVKNQEJ01AIS74 1 IVKNQEJ01AIS74 GGCGCAGGACTGTTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGGCGGGGTGGCTGGTAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T F IGHV4-31*03 IGHD6-19*01 IGHJ4*02 TGTGCGAGGCGGGGTGGCTGGTAACTTTTGACTACTGG 38 CARRGGW*LLTTG 424 20.4 83.8 22N1S3=1X8=1X262= 11N281S10= 6N294S42= 0.9927 9E-121 1 0.17 1 2E-20 1 275 1 317 281 290 11 20 294 335 6 47 5 3 4 +IVKNQEJ01AJ44V 1 IVKNQEJ01AJ44V GGCCCAGGACTGGTGAAGCCTTCGGAGACCCTGTCCCTCACCTGCGCTGTCTATGGTGGGTCCTTCAGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T T IGHV4-59*06 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 386 16.4 75.8 22N1S45=1X5=2X6=1X3=1X5=1X22=1X4=1X1=1X1=1X165= 11N274S8= 6N286S32=1X9= 0.9625 2E-109 1 2.6 0.9762 5E-18 1 267 1 315 274 281 11 18 286 327 6 47 6 4 12 +IVKNQEJ01AJ44V 1 IVKNQEJ01AJ44V GGCCCAGGACTGGTGAAGCCTTCGGAGACCCTGTCCCTCACCTGCGCTGTCTATGGTGGGTCCTTCAGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T T IGHV4-59*06 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 386 16.4 75.8 22N1S45=1X5=2X6=1X3=1X5=1X22=1X4=1X1=1X1=1X165= 11N274S8= 6N286S32=1X9= 0.9625 2E-109 1 2.6 0.9762 5E-18 1 267 1 315 274 281 11 18 286 327 6 47 6 4 12 +IVKNQEJ01AJ44V 1 IVKNQEJ01AJ44V GGCCCAGGACTGGTGAAGCCTTCGGAGACCCTGTCCCTCACCTGCGCTGTCTATGGTGGGTCCTTCAGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T T IGHV4-59*06 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 386 16.4 75.8 22N1S45=1X5=2X6=1X3=1X5=1X22=1X4=1X1=1X1=1X165= 11N274S8= 6N286S32=1X9= 0.9625 2E-109 1 2.6 0.9762 5E-18 1 267 1 315 274 281 11 18 286 327 6 47 6 4 12 diff --git a/lang/js/tests/data/bad_repertoire.yaml b/lang/js/tests/data/bad_repertoire.yaml index a98e085d2..f35355e98 100644 --- a/lang/js/tests/data/bad_repertoire.yaml +++ b/lang/js/tests/data/bad_repertoire.yaml @@ -8,21 +8,39 @@ Repertoire: study_id: PRJNA300878 study_title: "Homo sapiens B and T cell repertoire - MZ twins" study_description: "The adaptive immune system's capability to protect the body requires a highly diverse lymphocyte antigen receptor repertoire. However, the influence of individual genetic and epigenetic differences on these repertoires is not typically measured. By leveraging the unique characteristics of B, CD4+ T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified the impact of heritable factors on both the V(D)J recombination process and thymic selection in the case of T cell receptors, and show that the repertoires of both naive and antigen experienced cells are subject to biases resulting from differences in recombination. We show that biases in V(D)J usage, as well as biased N/P additions, contribute to significant variation in the CDR3 region. Moreover, we show that the relative usage of V and J gene segments is chromosomally biased, with approximately 1.5 times as many rearrangements originating from a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level." - lab_name: "Mark M. Davis" - lab_address: "Stanford University" - submitted_by: "Florian Rubelt" - pub_ids: "PMID:27005435" + contributors: + - contributor_id: "1" + name: "Florian Rubelt" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "investigation" + - role: "data curation" + - contributor_id: "2" + name: "Mark M. Davis" + orcid_id: + id: "ORCID:0000-0001-6868-657X" + label: "Mark Davis" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "supervision" + pub_ids: ["PMID:27005435"] subject: subject_id: TW01A synthetic: false species: - id: "NCBITaxon_9606" + id: "NCBITAXON:9606" value: "Homo sapiens" sex: female age_min: 27 age_max: 27 age_unit: - id: UO_0000036 + id: UO:0000036 value: year linked_subjects: TW01B link_type: twin @@ -32,7 +50,7 @@ Repertoire: cell_subset: "Naive B cell" cell_phenotype: "expression of CD20 and the absence of CD27" cell_species: - id: "NCBITaxon_9606" + id: "NCBITAXON:9606" value: "Homo sapiens" single_cell: false cell_isolation: FACS @@ -56,21 +74,39 @@ Repertoire: study_id: PRJNA300878 study_title: "Homo sapiens B and T cell repertoire - MZ twins" study_description: "The adaptive immune system's capability to protect the body requires a highly diverse lymphocyte antigen receptor repertoire. However, the influence of individual genetic and epigenetic differences on these repertoires is not typically measured. By leveraging the unique characteristics of B, CD4+ T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified the impact of heritable factors on both the V(D)J recombination process and thymic selection in the case of T cell receptors, and show that the repertoires of both naive and antigen experienced cells are subject to biases resulting from differences in recombination. We show that biases in V(D)J usage, as well as biased N/P additions, contribute to significant variation in the CDR3 region. Moreover, we show that the relative usage of V and J gene segments is chromosomally biased, with approximately 1.5 times as many rearrangements originating from a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level." - lab_name: "Mark M. Davis" - lab_address: "Stanford University" - submitted_by: "Florian Rubelt" - pub_ids: "PMID:27005435" + contributors: + - contributor_id: "1" + name: "Florian Rubelt" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "investigation" + - role: "data curation" + - contributor_id: "2" + name: "Mark M. Davis" + orcid_id: + id: "ORCID:0000-0001-6868-657X" + label: "Mark Davis" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "supervision" + pub_ids: ["PMID:27005435"] subject: subject_id: TW01A synthetic: false species: - id: "NCBITaxon_9606" + id: "NCBITAXON:9606" value: "Homo sapiens" sex: female age_min: 27 age_max: 27 age_unit: - id: UO_0000036 + id: UO:0000036 value: year linked_subjects: TW01B link_type: twin @@ -80,7 +116,7 @@ Repertoire: cell_subset: "Memory B cell" cell_phenotype: "expression of CD20 and CD27" cell_species: - id: "NCBITaxon_9606" + id: "NCBITAXON:9606" value: "Homo sapiens" single_cell: false cell_isolation: FACS @@ -104,21 +140,39 @@ Repertoire: study_id: PRJNA300878 study_title: "Homo sapiens B and T cell repertoire - MZ twins" study_description: "The adaptive immune system's capability to protect the body requires a highly diverse lymphocyte antigen receptor repertoire. However, the influence of individual genetic and epigenetic differences on these repertoires is not typically measured. By leveraging the unique characteristics of B, CD4+ T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified the impact of heritable factors on both the V(D)J recombination process and thymic selection in the case of T cell receptors, and show that the repertoires of both naive and antigen experienced cells are subject to biases resulting from differences in recombination. We show that biases in V(D)J usage, as well as biased N/P additions, contribute to significant variation in the CDR3 region. Moreover, we show that the relative usage of V and J gene segments is chromosomally biased, with approximately 1.5 times as many rearrangements originating from a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level." - lab_name: "Mark M. Davis" - lab_address: "Stanford University" - submitted_by: "Florian Rubelt" - pub_ids: "PMID:27005435" + contributors: + - contributor_id: "1" + name: "Florian Rubelt" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "investigation" + - role: "data curation" + - contributor_id: "2" + name: "Mark M. Davis" + orcid_id: + id: "ORCID:0000-0001-6868-657X" + label: "Mark Davis" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "supervision" + pub_ids: ["PMID:27005435"] subject: subject_id: TW01A synthetic: false species: - id: "NCBITaxon_9606" + id: "NCBITAXON:9606" value: "Homo sapiens" sex: female age_min: 27 age_max: 27 age_unit: - id: UO_0000036 + id: UO:0000036 value: year linked_subjects: TW01B link_type: twin @@ -128,7 +182,7 @@ Repertoire: cell_subset: "Naive CD4+ T cell" cell_phenotype: "expression of CD8 and absence of CD4 and CD45RO" cell_species: - id: "NCBITaxon_9606" + id: "NCBITAXON:9606" value: "Homo sapiens" single_cell: false cell_isolation: FACS diff --git a/lang/js/tests/data/good_combined_airr.json b/lang/js/tests/data/good_combined_airr.json index 00480023b..fbea02710 100644 --- a/lang/js/tests/data/good_combined_airr.json +++ b/lang/js/tests/data/good_combined_airr.json @@ -10,13 +10,52 @@ "label": null }, "study_description": "The adaptive immune system's capability to protect the body requires a highly diverse lymphocyte antigen receptor repertoire. However, the influence of individual genetic and epigenetic differences on these repertoires is not typically measured. By leveraging the unique characteristics of B, CD4+ T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified the impact of heritable factors on both the V(D)J recombination process and thymic selection in the case of T cell receptors, and show that the repertoires of both naive and antigen experienced cells are subject to biases resulting from differences in recombination. We show that biases in V(D)J usage, as well as biased N/P additions, contribute to significant variation in the CDR3 region. Moreover, we show that the relative usage of V and J gene segments is chromosomally biased, with approximately 1.5 times as many rearrangements originating from a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level.", - "study_contact": "Mark M. Davis, mmdavis@stanford.edu, ORCID:0000-0001-6868-657X", "inclusion_exclusion_criteria": null, - "lab_name": "Mark M. Davis", - "lab_address": "Stanford University", - "submitted_by": "Florian Rubelt", - "pub_ids": "PMID:27005435", - "collected_by": null, + "contributors": [ + { + "contributor_id": "1", + "name": "Florian Rubelt", + "orcid_id": { + "id": null, + "label": null + }, + "affiliation": { + "id": "ROR:00f54p054", + "label": "Stanford University" + }, + "affiliation_department": "Department of Microbiology and Immunology, Stanford University School of Medicine", + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + }, + { + "contributor_id": "2", + "name": "Mark M. Davis", + "orcid_id": { + "id": "ORCID:0000-0001-6868-657X", + "label": "Mark Davis" + }, + "affiliation": { + "id": "ROR:00f54p054", + "label": "Stanford University" + }, + "affiliation_department": "Department of Microbiology and Immunology, Stanford University School of Medicine", + "contributions": [ + { + "role": "supervision", + "degree": null + } + ] + } + ], + "pub_ids": ["PMID:27005435"], "grants": null, "keywords_study": [ "contains_ig", @@ -27,18 +66,27 @@ "subject_id": "TW01A", "synthetic": false, "species": { - "id": "NCBITaxon_9606", + "id": "NCBITAXON:9606", "label": "Homo sapiens" }, "sex": "female", - "age_min": 27, - "age_max": 27, - "age_unit": { - "id": "UO_0000036", - "label": "year" - }, + "age": { + "min": 27, + "max": 27, + "unit": { + "id": "UO:0000036", + "label": "year" + } + }, "age_event": null, - "ancestry_population": null, + "ancestry_population": { + "id": null, + "label": null + }, + "location_birth": { + "id": null, + "label": null + }, "ethnicity": null, "race": null, "strain_name": null, @@ -47,6 +95,14 @@ "diagnosis": [ { "study_group_description": null, + "diagnosis_timepoint": { + "label": "Study enrollment", + "value": 60, + "unit": { + "id": "UO:0000033", + "label": "day" + } + }, "disease_diagnosis": { "id": null, "label": null @@ -58,7 +114,65 @@ "intervention": null, "medical_history": null } - ] + ], + "genotype": { + "receptor_genotype_set": { + "receptor_genotype_set_id": "1", + "genotype_class_list": [ + { + "receptor_genotype_id": "1", + "locus": "IGH", + "documented_alleles": [ + { + "label": "IGHV1-69*01", + "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", + "phasing": 1 + }, + { + "label": "IGHV1-69*02", + "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", + "phasing": 2 + } + ], + "undocumented_alleles": [ + { + "allele_name": "IGHD3-1*01_S1234", + "sequence": "agtagtagtagt", + "phasing": 1 + } + ], + "deleted_genes": [ + { + "label": "IGHV3-30-3", + "germline_set_ref": "IMGT:Homo sapiens:2022.1.31", + "phasing": 1 + } + ], + "inference_process": "repertoire_sequencing" + } + ] + }, + "mhc_genotype_set": { + "mhc_genotype_set_id": "01847298-d0c2-11ee-bc66", + "mhc_genotype_list": [ + { + "mhc_genotype_id": "00be1c2e-d0c2-11ee-bfe7", + "mhc_class": "MHC-I", + "mhc_genotyping_method": "pcr_low_resolution", + "mhc_alleles": [ + { + "allele_designation": "01:01", + "gene": { + "id": "MRO-0000046", + "label": "HLA-A" + }, + "reference_set_ref": null + } + ] + } + ] + } + } }, "sample": [ { @@ -66,17 +180,18 @@ "sample_processing_id": null, "sample_type": "peripheral venous puncture", "tissue": { - "id": "UBERON_0000178", + "id": "UBERON:0000178", "label": "blood" }, "tissue_processing": "Ficoll gradient", "cell_subset": { - "id": "CL_0000788", + "id": "CL:0000788", "label": "naive B cell" }, "cell_phenotype": "expression of CD20 and the absence of CD27", + "cell_label": "naive B cell", "cell_species": { - "id": "NCBITaxon_9606", + "id": "NCBITAXON:9606", "label": "Homo sapiens" }, "single_cell": false, @@ -103,11 +218,10 @@ "anatomic_site": null, "disease_state_sample": null, "collection_time_point_relative": null, - "collection_time_point_relative_unit": { + "collection_location": { "id": null, "label": null }, - "collection_time_point_reference": null, "biomaterial_provider": null, "cell_number": null, "cells_per_reaction": null, @@ -116,10 +230,6 @@ "cell_processing_protocol": null, "template_quality": null, "template_amount": null, - "template_amount_unit": { - "id": null, - "label": null - }, "library_generation_method": "RT(oligo-dT)+PCR", "library_generation_protocol": null, "library_generation_kit_version": null, @@ -158,13 +268,52 @@ "label": null }, "study_description": "The adaptive immune system's capability to protect the body requires a highly diverse lymphocyte antigen receptor repertoire. However, the influence of individual genetic and epigenetic differences on these repertoires is not typically measured. By leveraging the unique characteristics of B, CD4+ T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified the impact of heritable factors on both the V(D)J recombination process and thymic selection in the case of T cell receptors, and show that the repertoires of both naive and antigen experienced cells are subject to biases resulting from differences in recombination. We show that biases in V(D)J usage, as well as biased N/P additions, contribute to significant variation in the CDR3 region. Moreover, we show that the relative usage of V and J gene segments is chromosomally biased, with approximately 1.5 times as many rearrangements originating from a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level.", - "study_contact": "Mark M. Davis, mmdavis@stanford.edu, ORCID:0000-0001-6868-657X", "inclusion_exclusion_criteria": null, - "lab_name": "Mark M. Davis", - "lab_address": "Stanford University", - "submitted_by": "Florian Rubelt", - "pub_ids": "PMID:27005435", - "collected_by": null, + "contributors": [ + { + "contributor_id": "1", + "name": "Florian Rubelt", + "orcid_id": { + "id": null, + "label": null + }, + "affiliation": { + "id": "ROR:00f54p054", + "label": "Stanford University" + }, + "affiliation_department": "Department of Microbiology and Immunology, Stanford University School of Medicine", + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + }, + { + "contributor_id": "2", + "name": "Mark M. Davis", + "orcid_id": { + "id": "ORCID:0000-0001-6868-657X", + "label": "Mark Davis" + }, + "affiliation": { + "id": "ROR:00f54p054", + "label": "Stanford University" + }, + "affiliation_department": "Department of Microbiology and Immunology, Stanford University School of Medicine", + "contributions": [ + { + "role": "supervision", + "degree": null + } + ] + } + ], + "pub_ids": ["PMID:27005435"], "grants": null, "keywords_study": [ "contains_ig", @@ -175,18 +324,27 @@ "subject_id": "TW01A", "synthetic": false, "species": { - "id": "NCBITaxon_9606", + "id": "NCBITAXON:9606", "label": "Homo sapiens" }, "sex": "female", - "age_min": 27, - "age_max": 27, - "age_unit": { - "id": "UO_0000036", - "label": "year" + "age": { + "min": 27, + "max": 27, + "unit": { + "id": "UO:0000036", + "label": "year" + } }, "age_event": null, - "ancestry_population": null, + "ancestry_population": { + "id": null, + "label": null + }, + "location_birth": { + "id": null, + "label": null + }, "ethnicity": null, "race": null, "strain_name": null, @@ -195,6 +353,14 @@ "diagnosis": [ { "study_group_description": null, + "diagnosis_timepoint": { + "label": "Study enrollment", + "value": 60, + "unit": { + "id": "UO:0000033", + "label": "day" + } + }, "disease_diagnosis": { "id": null, "label": null @@ -214,17 +380,18 @@ "sample_processing_id": null, "sample_type": "peripheral venous puncture", "tissue": { - "id": "UBERON_0000178", + "id": "UBERON:0000178", "label": "blood" }, "tissue_processing": "Ficoll gradient", "cell_subset": { - "id": "CL_0000787", + "id": "CL:0000787", "label": "memory B cell" }, "cell_phenotype": "expression of CD20 and CD27", + "cell_label": "memory B cell", "cell_species": { - "id": "NCBITaxon_9606", + "id": "NCBITAXON:9606", "label": "Homo sapiens" }, "single_cell": false, @@ -251,11 +418,10 @@ "anatomic_site": null, "disease_state_sample": null, "collection_time_point_relative": null, - "collection_time_point_relative_unit": { + "collection_location": { "id": null, "label": null }, - "collection_time_point_reference": null, "biomaterial_provider": null, "cell_number": null, "cells_per_reaction": null, @@ -264,10 +430,6 @@ "cell_processing_protocol": null, "template_quality": null, "template_amount": null, - "template_amount_unit": { - "id": null, - "label": null - }, "library_generation_method": "RT(oligo-dT)+PCR", "library_generation_protocol": null, "library_generation_kit_version": null, @@ -306,13 +468,52 @@ "label": null }, "study_description": "The adaptive immune system's capability to protect the body requires a highly diverse lymphocyte antigen receptor repertoire. However, the influence of individual genetic and epigenetic differences on these repertoires is not typically measured. By leveraging the unique characteristics of B, CD4+ T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified the impact of heritable factors on both the V(D)J recombination process and thymic selection in the case of T cell receptors, and show that the repertoires of both naive and antigen experienced cells are subject to biases resulting from differences in recombination. We show that biases in V(D)J usage, as well as biased N/P additions, contribute to significant variation in the CDR3 region. Moreover, we show that the relative usage of V and J gene segments is chromosomally biased, with approximately 1.5 times as many rearrangements originating from a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level.", - "study_contact": "Mark M. Davis, mmdavis@stanford.edu, ORCID:0000-0001-6868-657X", "inclusion_exclusion_criteria": null, - "lab_name": "Mark M. Davis", - "lab_address": "Stanford University", - "submitted_by": "Florian Rubelt", - "pub_ids": "PMID:27005435", - "collected_by": null, + "contributors": [ + { + "contributor_id": "1", + "name": "Florian Rubelt", + "orcid_id": { + "id": null, + "label": null + }, + "affiliation": { + "id": "ROR:00f54p054", + "label": "Stanford University" + }, + "affiliation_department": "Department of Microbiology and Immunology, Stanford University School of Medicine", + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + }, + { + "contributor_id": "2", + "name": "Mark M. Davis", + "orcid_id": { + "id": "ORCID:0000-0001-6868-657X", + "label": "Mark Davis" + }, + "affiliation": { + "id": "ROR:00f54p054", + "label": "Stanford University" + }, + "affiliation_department": "Department of Microbiology and Immunology, Stanford University School of Medicine", + "contributions": [ + { + "role": "supervision", + "degree": null + } + ] + } + ], + "pub_ids": ["PMID:27005435"], "grants": null, "keywords_study": [ "contains_ig", @@ -323,18 +524,27 @@ "subject_id": "TW01A", "synthetic": false, "species": { - "id": "NCBITaxon_9606", + "id": "NCBITAXON:9606", "label": "Homo sapiens" }, "sex": "female", - "age_min": 27, - "age_max": 27, - "age_unit": { - "id": "UO_0000036", - "label": "year" + "age": { + "min": 27, + "max": 27, + "unit": { + "id": "UO:0000036", + "label": "year" + } }, "age_event": null, - "ancestry_population": null, + "ancestry_population": { + "id": null, + "label": null + }, + "location_birth": { + "id": null, + "label": null + }, "ethnicity": null, "race": null, "strain_name": null, @@ -343,6 +553,14 @@ "diagnosis": [ { "study_group_description": null, + "diagnosis_timepoint": { + "label": "Study enrollment", + "value": 60, + "unit": { + "id": "UO:0000033", + "label": "day" + } + }, "disease_diagnosis": { "id": null, "label": null @@ -362,17 +580,18 @@ "sample_processing_id": null, "sample_type": "peripheral venous puncture", "tissue": { - "id": "UBERON_0000178", + "id": "UBERON:0000178", "label": "blood" }, "tissue_processing": "Ficoll gradient", "cell_subset": { - "id": "CL_0000895", + "id": "CL:0000895", "label": "naive thymus-derived CD4-positive, alpha-beta T cell" }, "cell_phenotype": "expression of CD8 and absence of CD4 and CD45RO", + "cell_label": "naive CD4 aB T cell", "cell_species": { - "id": "NCBITaxon_9606", + "id": "NCBITAXON:9606", "label": "Homo sapiens" }, "single_cell": false, @@ -399,11 +618,10 @@ "anatomic_site": null, "disease_state_sample": null, "collection_time_point_relative": null, - "collection_time_point_relative_unit": { + "collection_location": { "id": null, "label": null }, - "collection_time_point_reference": null, "biomaterial_provider": null, "cell_number": null, "cells_per_reaction": null, @@ -412,10 +630,6 @@ "cell_processing_protocol": null, "template_quality": null, "template_amount": null, - "template_amount_unit": { - "id": null, - "label": null - }, "library_generation_method": "RT(oligo-dT)+PCR", "library_generation_protocol": null, "library_generation_kit_version": null, @@ -446,19 +660,44 @@ } ], + "GermlineSet": [{ "germline_set_id": "OGRDB:G00007", - "author": "William Lees", - "lab_name": "", - "lab_address": "Birkbeck College, University of London, Malet Street, London", - "acknowledgements": [], + "acknowledgements": [ + { + "contributor_id": "3", + "name": "William Lees", + "orcid_id": { + "id": "ORCID:0000-0001-9834-6840", + "label": "William Lees" + }, + "affiliation": { + "id": "ROR:02mb95055", + "label": "Birkbeck, University of London" + }, + "affiliation_department": null, + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + } + ], "release_version": 1, "release_description": "", "release_date": "2021-11-24", "germline_set_name": "CAST IGH", "germline_set_ref": "OGRDB:G00007.1", - "pub_ids": "", - "species": { "id": "NCBITAXON:10090", "label": "Mus musculus" }, + "pub_ids": [""], + "species": { + "id": "NCBITAXON:10090", + "label": "Mus musculus" + }, "species_subgroup": "CAST_EiJ", "species_subgroup_type": "strain", "locus": "IGH", @@ -466,15 +705,37 @@ { "allele_description_id": "OGRDB:A00301", "allele_description_ref": "OGRDB:Mouse_IGH:IGHV-2DBF", - "maintainer": "William Lees", - "acknowledgements": [], - "lab_address": "Birkbeck College, University of London, Malet Street, London", + "acknowledgements": [ + { + "contributor_id": "3", + "name": "William Lees", + "orcid_id": { + "id": "ORCID:0000-0001-9834-6840", + "label": "William Lees" + }, + "affiliation": { + "id": "ROR:02mb95055", + "label": "Birkbeck, University of London" + }, + "affiliation_department": null, + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + } + ], "release_version": 1, - "release_date": "24-Nov-2021", + "release_date": "2021-11-24", "release_description": "First release", "label": "IGHV-2DBF", "sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGATGGTAGTGGCACCTACTATCTGGACTCCTTGAAGAGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", - "coding_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGA...GGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTC............AGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", + "coding_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGATGGTAGTGGCACCTACTATCTGGACTCCTTGAAGAGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", "aliases": [ "watson_et_al:CAST_EiJ_IGHV5-3" ], @@ -482,8 +743,11 @@ "chromosome": null, "sequence_type": "V", "functional": true, - "inference_type": "Rearranged only", - "species": { "id": "NCBITAXON:10090", "label": "Mus musculus" }, + "inference_type": "rearranged_only", + "species": { + "id": "NCBITAXON:10090", + "label": "Mus musculus" + }, "species_subgroup": "CAST_EiJ", "species_subgroup_type": "strain", "status": "active", @@ -504,18 +768,20 @@ { "sequence_delineation_id": "1", "delineation_scheme": "IMGT", + "unaligned_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGATGGTAGTGGCACCTACTATCTGGACTCCTTGAAGAGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", + "aligned_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGA...GGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTC............AGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", "fwr1_start": 1, - "fwr1_end": 78, - "cdr1_start": 79, - "cdr1_end": 114, - "fwr2_start": 115, - "fwr2_end": 165, - "cdr2_start": 166, - "cdr2_end": 195, - "fwr3_start": 196, - "fwr3_end": 312, - "cdr3_start": 313, - "alignment": [ + "fwr1_end": 75, + "cdr1_start": 76, + "cdr1_end": 110, + "fwr2_start": 111, + "fwr2_end": 150, + "cdr2_start": 151, + "cdr2_end": 160, + "fwr3_start": 161, + "fwr3_end": 294, + "cdr3_start": 295, + "alignment_labels": [ "1", "2", "3", @@ -632,15 +898,37 @@ { "allele_description_id": "OGRDB:A00314", "allele_description_ref": "OGRDB:Mouse_IGH:IGHV-2ETO", - "maintainer": "William Lees", - "acknowledgements": [], - "lab_address": "Birkbeck College, University of London, Malet Street, London", + "acknowledgements": [ + { + "contributor_id": "3", + "name": "William Lees", + "orcid_id": { + "id": "ORCID:0000-0001-9834-6840", + "label": "William Lees" + }, + "affiliation": { + "id": "ROR:02mb95055", + "label": "Birkbeck, University of London" + }, + "affiliation_department": null, + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + } + ], "release_version": 1, - "release_date": "24-Nov-2021", + "release_date": "2021-11-24", "release_description": "First release", "label": "IGHV-2ETO", "sequence": "CAAGTTACTCTAAAAGAGTCTGGCCCTGGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGATGATGATAAGTACTATAACCCATCCCTGAAGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC", - "coding_sequence": "CAAGTTACTCTAAAAGAGTCTGGCCCT...GGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGC......ACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGAT.........GATGATAAGTACTATAACCCATCCCTGAAG...AGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC", + "coding_sequence": "CAAGTTACTCTAAAAGAGTCTGGCCCTGGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGATGATGATAAGTACTATAACCCATCCCTGAAGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC", "aliases": [ "watson_et_al:CAST_EiJ_IGHV8-2" ], @@ -648,8 +936,11 @@ "chromosome": null, "sequence_type": "V", "functional": true, - "inference_type": "Rearranged only", - "species": { "id": "NCBITAXON:10090", "label": "Mus musculus" }, + "inference_type": "rearranged_only", + "species": { + "id": "NCBITAXON:10090", + "label": "Mus musculus" + }, "species_subgroup": "CAST_EiJ", "species_subgroup_type": "strain", "status": "active", @@ -670,18 +961,20 @@ { "sequence_delineation_id": "1", "delineation_scheme": "IMGT", + "unaligned_sequence": "CAAGTTACTCTAAAAGAGTCTGGCCCTGGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGATGATGATAAGTACTATAACCCATCCCTGAAGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC", + "aligned_sequence": "CAAGTTACTCTAAAAGAGTCTGGCCCTG...GGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGA............GCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGT......GGGATGATGATAAGTACTATAACCCATCCCTGA...AGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC", "fwr1_start": 1, - "fwr1_end": 78, - "cdr1_start": 79, - "cdr1_end": 114, - "fwr2_start": 115, - "fwr2_end": 165, - "cdr2_start": 166, - "cdr2_end": 195, - "fwr3_start": 196, - "fwr3_end": 312, - "cdr3_start": 313, - "alignment": [ + "fwr1_end": 75, + "cdr1_start": 76, + "cdr1_end": 110, + "fwr2_start": 111, + "fwr2_end": 150, + "cdr2_start": 151, + "cdr2_end": 160, + "fwr3_start": 161, + "fwr3_end": 294, + "cdr3_start": 295, + "alignment_labels": [ "1", "2", "3", @@ -798,7 +1091,6 @@ ], "curation": null }], - "GenotypeSet": [{ "receptor_genotype_set_id": "1", "genotype_class_list": [ @@ -835,4 +1127,4 @@ } ] }] -} \ No newline at end of file +} diff --git a/lang/js/tests/data/good_combined_airr.yaml b/lang/js/tests/data/good_combined_airr.yaml index 5479c0540..b04fcb42e 100644 --- a/lang/js/tests/data/good_combined_airr.yaml +++ b/lang/js/tests/data/good_combined_airr.yaml @@ -21,13 +21,35 @@ Repertoire: a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level. - study_contact: Mark M. Davis, mmdavis@stanford.edu, ORCID:0000-0001-6868-657X inclusion_exclusion_criteria: - lab_name: Mark M. Davis - lab_address: Stanford University - submitted_by: Florian Rubelt - pub_ids: PMID:27005435 - collected_by: + contributors: + - contributor_id: "1" + name: "Florian Rubelt" + orcid_id: + id: + label: + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "investigation" + degree: + - role: "data curation" + degree: + - contributor_id: "2" + name: "Mark M. Davis" + orcid_id: + id: "ORCID:0000-0001-6868-657X" + label: "Mark Davis" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "supervision" + degree: + pub_ids: ["PMID:27005435"] grants: keywords_study: - contains_ig @@ -36,16 +58,22 @@ Repertoire: subject_id: TW01A synthetic: false species: - id: NCBITaxon_9606 + id: NCBITAXON:9606 label: Homo sapiens sex: female - age_min: 27 - age_max: 27 - age_unit: - id: UO_0000036 - label: year + age: + min: 27 + max: 27 + unit: + id: UO:0000036 + label: year age_event: ancestry_population: + id: + label: + location_birth: + id: + label: ethnicity: race: strain_name: @@ -53,6 +81,12 @@ Repertoire: link_type: twin diagnosis: - study_group_description: + diagnosis_timepoint: + label: Study enrollment + value: 60 + unit: + id: UO:0000033 + label: day disease_diagnosis: id: label: @@ -62,20 +96,55 @@ Repertoire: immunogen: intervention: medical_history: + genotype: + receptor_genotype_set: + receptor_genotype_set_id: "1" + genotype_class_list: + - receptor_genotype_id: "1" + locus: IGH + documented_alleles: + - label: IGHV1-69*01 + germline_set_ref: IMGT:Homo sapiens:2022.1.31 + phasing: 1 + - label: IGHV1-69*02 + germline_set_ref: IMGT:Homo sapiens:2022.1.31 + phasing: 2 + undocumented_alleles: + - allele_name: IGHD3-1*01_S1234 + sequence: agtagtagtagt + phasing: 1 + deleted_genes: + - label: IGHV3-30-3 + germline_set_ref: IMGT:Homo sapiens:2022.1.31 + phasing: 1 + inference_process: repertoire_sequencing + mhc_genotype_set: + mhc_genotype_set_id: 01847298-d0c2-11ee-bc66 + mhc_genotype_list: + - mhc_genotype_id: 00be1c2e-d0c2-11ee-bfe7 + mhc_class: MHC-I + mhc_genotyping_method: pcr_low_resolution + mhc_alleles: + - allele_designation: "01:01" + gene: + id: MRO-0000046 + label: HLA-A + reference_set_ref: sample: - sample_id: TW01A_B_naive sample_processing_id: sample_type: peripheral venous puncture tissue: - id: UBERON_0000178 + id: UBERON:0000178 label: blood tissue_processing: Ficoll gradient cell_subset: - id: CL_0000788 + id: CL:0000788 label: naive B cell cell_phenotype: expression of CD20 and the absence of CD27 + cell_label: naive B cell cell_species: - id: NCBITaxon_9606 + id: NCBITAXON:9606 label: Homo sapiens single_cell: false cell_isolation: FACS @@ -97,10 +166,9 @@ Repertoire: anatomic_site: disease_state_sample: collection_time_point_relative: - collection_time_point_relative_unit: + collection_location: id: label: - collection_time_point_reference: biomaterial_provider: cell_number: cells_per_reaction: @@ -109,9 +177,6 @@ Repertoire: cell_processing_protocol: template_quality: template_amount: - template_amount_unit: - id: - label: library_generation_method: RT(oligo-dT)+PCR library_generation_protocol: library_generation_kit_version: @@ -156,13 +221,35 @@ Repertoire: a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level. - study_contact: Mark M. Davis, mmdavis@stanford.edu, ORCID:0000-0001-6868-657X inclusion_exclusion_criteria: - lab_name: Mark M. Davis - lab_address: Stanford University - submitted_by: Florian Rubelt - pub_ids: PMID:27005435 - collected_by: + contributors: + - contributor_id: "1" + name: "Florian Rubelt" + orcid_id: + id: + label: + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "investigation" + degree: + - role: "data curation" + degree: + - contributor_id: "2" + name: "Mark M. Davis" + orcid_id: + id: "ORCID:0000-0001-6868-657X" + label: "Mark Davis" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "supervision" + degree: + pub_ids: ["PMID:27005435"] grants: keywords_study: - contains_ig @@ -171,16 +258,22 @@ Repertoire: subject_id: TW01A synthetic: false species: - id: NCBITaxon_9606 + id: NCBITAXON:9606 label: Homo sapiens sex: female - age_min: 27 - age_max: 27 - age_unit: - id: UO_0000036 - label: year + age: + min: 27 + max: 27 + unit: + id: UO:0000036 + label: year age_event: ancestry_population: + id: + label: + location_birth: + id: + label: ethnicity: race: strain_name: @@ -188,6 +281,12 @@ Repertoire: link_type: twin diagnosis: - study_group_description: + diagnosis_timepoint: + label: Study enrollment + value: 60 + unit: + id: UO:0000033 + label: day disease_diagnosis: id: label: @@ -202,15 +301,16 @@ Repertoire: sample_processing_id: sample_type: peripheral venous puncture tissue: - id: UBERON_0000178 + id: UBERON:0000178 label: blood tissue_processing: Ficoll gradient cell_subset: - id: CL_0000787 + id: CL:0000787 label: memory B cell cell_phenotype: expression of CD20 and CD27 + cell_label: memory B cell cell_species: - id: NCBITaxon_9606 + id: NCBITAXON:9606 label: Homo sapiens single_cell: false cell_isolation: FACS @@ -232,10 +332,9 @@ Repertoire: anatomic_site: disease_state_sample: collection_time_point_relative: - collection_time_point_relative_unit: + collection_location: id: label: - collection_time_point_reference: biomaterial_provider: cell_number: cells_per_reaction: @@ -244,9 +343,6 @@ Repertoire: cell_processing_protocol: template_quality: template_amount: - template_amount_unit: - id: - label: library_generation_method: RT(oligo-dT)+PCR library_generation_protocol: library_generation_kit_version: @@ -291,13 +387,35 @@ Repertoire: a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level. - study_contact: Mark M. Davis, mmdavis@stanford.edu, ORCID:0000-0001-6868-657X inclusion_exclusion_criteria: - lab_name: Mark M. Davis - lab_address: Stanford University - submitted_by: Florian Rubelt - pub_ids: PMID:27005435 - collected_by: + contributors: + - contributor_id: "1" + name: "Florian Rubelt" + orcid_id: + id: + label: + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "investigation" + degree: + - role: "data curation" + degree: + - contributor_id: "2" + name: "Mark M. Davis" + orcid_id: + id: "ORCID:0000-0001-6868-657X" + label: "Mark Davis" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "supervision" + degree: + pub_ids: ["PMID:27005435"] grants: keywords_study: - contains_ig @@ -306,16 +424,22 @@ Repertoire: subject_id: TW01A synthetic: false species: - id: NCBITaxon_9606 + id: NCBITAXON:9606 label: Homo sapiens sex: female - age_min: 27 - age_max: 27 - age_unit: - id: UO_0000036 - label: year + age: + min: 27 + max: 27 + unit: + id: UO:0000036 + label: year age_event: ancestry_population: + id: + label: + location_birth: + id: + label: ethnicity: race: strain_name: @@ -323,6 +447,12 @@ Repertoire: link_type: twin diagnosis: - study_group_description: + diagnosis_timepoint: + label: Study enrollment + value: 60 + unit: + id: UO:0000033 + label: day disease_diagnosis: id: label: @@ -337,15 +467,16 @@ Repertoire: sample_processing_id: sample_type: peripheral venous puncture tissue: - id: UBERON_0000178 + id: UBERON:0000178 label: blood tissue_processing: Ficoll gradient cell_subset: - id: CL_0000895 + id: CL:0000895 label: naive thymus-derived CD4-positive, alpha-beta T cell cell_phenotype: expression of CD8 and absence of CD4 and CD45RO + cell_label: naive CD4 aB T cell cell_species: - id: NCBITaxon_9606 + id: NCBITAXON:9606 label: Homo sapiens single_cell: false cell_isolation: FACS @@ -367,10 +498,9 @@ Repertoire: anatomic_site: disease_state_sample: collection_time_point_relative: - collection_time_point_relative_unit: + collection_location: id: label: - collection_time_point_reference: biomaterial_provider: cell_number: cells_per_reaction: @@ -379,9 +509,6 @@ Repertoire: cell_processing_protocol: template_quality: template_amount: - template_amount_unit: - id: - label: library_generation_method: RT(oligo-dT)+PCR library_generation_protocol: library_generation_kit_version: @@ -407,16 +534,27 @@ Repertoire: GermlineSet: - germline_set_id: OGRDB:G00007 - author: William Lees - lab_name: '' - lab_address: Birkbeck College, University of London, Malet Street, London - acknowledgements: [] + acknowledgements: + - contributor_id: "3" + name: William Lees + orcid_id: + id: ORCID:0000-0001-9834-6840 + label: William Lees + affiliation: + id: ROR:02mb95055 + label: Birkbeck, University of London + affiliation_department: + contributions: + - role: investigation + degree: null + - role: data curation + degree: null release_version: 1 - release_description: '' - release_date: '2021-11-24' + release_description: "" + release_date: "2021-11-24" germline_set_name: CAST IGH germline_set_ref: OGRDB:G00007.1 - pub_ids: '' + pub_ids: [""] species: id: NCBITAXON:10090 label: Mus musculus @@ -426,22 +564,34 @@ GermlineSet: allele_descriptions: - allele_description_id: OGRDB:A00301 allele_description_ref: OGRDB:Mouse_IGH:IGHV-2DBF - maintainer: William Lees - acknowledgements: [] - lab_address: Birkbeck College, University of London, Malet Street, London + acknowledgements: + - contributor_id: "3" + name: William Lees + orcid_id: + id: ORCID:0000-0001-9834-6840 + label: William Lees + affiliation: + id: ROR:02mb95055 + label: Birkbeck, University of London + affiliation_department: + contributions: + - role: investigation + degree: + - role: data curation + degree: release_version: 1 - release_date: 24-Nov-2021 + release_date: "2021-11-24" release_description: First release label: IGHV-2DBF sequence: GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGATGGTAGTGGCACCTACTATCTGGACTCCTTGAAGAGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA - coding_sequence: GAAGTGAAGCTGGTGGAGTCTGAGGGA...GGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTC............AGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA + coding_sequence: GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGATGGTAGTGGCACCTACTATCTGGACTCCTTGAAGAGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA aliases: - watson_et_al:CAST_EiJ_IGHV5-3 locus: IGH chromosome: sequence_type: V functional: true - inference_type: "Rearranged only" + inference_type: rearranged_only species: id: NCBITAXON:10090 label: Mus musculus @@ -464,18 +614,20 @@ GermlineSet: v_gene_delineations: - sequence_delineation_id: '1' delineation_scheme: IMGT + unaligned_sequence: GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGATGGTAGTGGCACCTACTATCTGGACTCCTTGAAGAGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA + aligned_sequence: GAAGTGAAGCTGGTGGAGTCTGAGGGA...GGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTC............AGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA fwr1_start: 1 - fwr1_end: 78 - cdr1_start: 79 - cdr1_end: 114 - fwr2_start: 115 - fwr2_end: 165 - cdr2_start: 166 - cdr2_end: 195 - fwr3_start: 196 - fwr3_end: 312 - cdr3_start: 313 - alignment: + fwr1_end: 75 + cdr1_start: 76 + cdr1_end: 110 + fwr2_start: 111 + fwr2_end: 150 + cdr2_start: 151 + cdr2_end: 160 + fwr3_start: 161 + fwr3_end: 294 + cdr3_start: 295 + alignment_labels: - '1' - '2' - '3' @@ -587,22 +739,34 @@ GermlineSet: curational_tags: - allele_description_id: OGRDB:A00314 allele_description_ref: OGRDB:Mouse_IGH:IGHV-2ETO - maintainer: William Lees - acknowledgements: [] - lab_address: Birkbeck College, University of London, Malet Street, London + acknowledgements: + - contributor_id: "3" + name: William Lees + orcid_id: + id: ORCID:0000-0001-9834-6840 + label: William Lees + affiliation: + id: ROR:02mb95055 + label: Birkbeck, University of London + affiliation_department: + contributions: + - role: investigation + degree: + - role: data curation + degree: release_version: 1 - release_date: 24-Nov-2021 + release_date: "2021-11-24" release_description: First release label: IGHV-2ETO sequence: CAAGTTACTCTAAAAGAGTCTGGCCCTGGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGATGATGATAAGTACTATAACCCATCCCTGAAGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC - coding_sequence: CAAGTTACTCTAAAAGAGTCTGGCCCT...GGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGC......ACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGAT.........GATGATAAGTACTATAACCCATCCCTGAAG...AGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC + coding_sequence: CAAGTTACTCTAAAAGAGTCTGGCCCTGGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGATGATGATAAGTACTATAACCCATCCCTGAAGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC aliases: - watson_et_al:CAST_EiJ_IGHV8-2 locus: IGH chromosome: sequence_type: V functional: true - inference_type: "Rearranged only" + inference_type: rearranged_only species: id: NCBITAXON:10090 label: Mus musculus @@ -625,18 +789,20 @@ GermlineSet: v_gene_delineations: - sequence_delineation_id: '1' delineation_scheme: IMGT + unaligned_sequence: CAAGTTACTCTAAAAGAGTCTGGCCCTGGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGATGATGATAAGTACTATAACCCATCCCTGAAGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC + aligned_sequence: CAAGTTACTCTAAAAGAGTCTGGCCCTG...GGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGA............GCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGT......GGGATGATGATAAGTACTATAACCCATCCCTGA...AGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC fwr1_start: 1 - fwr1_end: 78 - cdr1_start: 79 - cdr1_end: 114 - fwr2_start: 115 - fwr2_end: 165 - cdr2_start: 166 - cdr2_end: 195 - fwr3_start: 196 - fwr3_end: 312 - cdr3_start: 313 - alignment: + fwr1_end: 75 + cdr1_start: 76 + cdr1_end: 110 + fwr2_start: 111 + fwr2_end: 150 + cdr2_start: 151 + cdr2_end: 160 + fwr3_start: 161 + fwr3_end: 294 + cdr3_start: 295 + alignment_labels: - '1' - '2' - '3' @@ -749,9 +915,9 @@ GermlineSet: curation: GenotypeSet: - - receptor_genotype_set_id: '1' + - receptor_genotype_set_id: "1" genotype_class_list: - - receptor_genotype_id: '1' + - receptor_genotype_id: "1" locus: IGH documented_alleles: - label: IGHV1-69*01 diff --git a/lang/js/tests/data/good_genotype_set.json b/lang/js/tests/data/good_genotype_set.json index ba10f56e9..abd24646c 100644 --- a/lang/js/tests/data/good_genotype_set.json +++ b/lang/js/tests/data/good_genotype_set.json @@ -35,4 +35,4 @@ } ] }] -} \ No newline at end of file +} diff --git a/lang/js/tests/data/good_germline_set.json b/lang/js/tests/data/good_germline_set.json index d36d19ad4..e74c590dc 100644 --- a/lang/js/tests/data/good_germline_set.json +++ b/lang/js/tests/data/good_germline_set.json @@ -1,17 +1,41 @@ { "GermlineSet": [{ "germline_set_id": "OGRDB:G00007", - "author": "William Lees", - "lab_name": "", - "lab_address": "Birkbeck College, University of London, Malet Street, London", - "acknowledgements": [], + "acknowledgements": [ + { + "contributor_id": "1", + "name": "William Lees", + "orcid_id": { + "id": "ORCID:0000-0001-9834-6840", + "label": "William Lees" + }, + "affiliation": { + "id": "ROR:02mb95055", + "label": "Birkbeck, University of London" + }, + "affiliation_department":"", + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + } + ], "release_version": 1, "release_description": "", "release_date": "2021-11-24", "germline_set_name": "CAST IGH", "germline_set_ref": "OGRDB:G00007.1", - "pub_ids": "", - "species": { "id": "NCBITAXON:10090", "label": "Mus musculus" }, + "pub_ids": [""], + "species": { + "id": "NCBITAXON:10090", + "label": "Mus musculus" + }, "species_subgroup": "CAST_EiJ", "species_subgroup_type": "strain", "locus": "IGH", @@ -19,15 +43,37 @@ { "allele_description_id": "OGRDB:A00301", "allele_description_ref": "OGRDB:Mouse_IGH:IGHV-2DBF", - "maintainer": "William Lees", - "acknowledgements": [], - "lab_address": "Birkbeck College, University of London, Malet Street, London", + "acknowledgements": [ + { + "contributor_id": "1", + "name": "William Lees", + "orcid_id": { + "id": "ORCID:0000-0001-9834-6840", + "label": "William Lees" + }, + "affiliation": { + "id": "ROR:02mb95055", + "label": "Birkbeck, University of London" + }, + "affiliation_department":"", + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + } + ], "release_version": 1, - "release_date": "24-Nov-2021", + "release_date": "2021-11-24", "release_description": "First release", "label": "IGHV-2DBF", - "sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGATGGTAGTGGCACCTACTATCTGGACTCCTTGAAGAGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", - "coding_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGA...GGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTC............AGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", + "sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", + "coding_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", "aliases": [ "watson_et_al:CAST_EiJ_IGHV5-3" ], @@ -35,8 +81,11 @@ "chromosome": null, "sequence_type": "V", "functional": true, - "inference_type": "Rearranged only", - "species": { "id": "NCBITAXON:10090", "label": "Mus musculus" }, + "inference_type": "rearranged_only", + "species": { + "id": "NCBITAXON:10090", + "label": "Mus musculus" + }, "species_subgroup": "CAST_EiJ", "species_subgroup_type": "strain", "status": "active", @@ -57,18 +106,20 @@ { "sequence_delineation_id": "1", "delineation_scheme": "IMGT", + "aligned_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGA...GGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTC............AGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", + "unaligned_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", "fwr1_start": 1, - "fwr1_end": 78, - "cdr1_start": 79, - "cdr1_end": 114, - "fwr2_start": 115, - "fwr2_end": 165, - "cdr2_start": 166, - "cdr2_end": 195, - "fwr3_start": 196, - "fwr3_end": 312, - "cdr3_start": 313, - "alignment": [ + "fwr1_end": 75, + "cdr1_start": 76, + "cdr1_end": 110, + "fwr2_start": 111, + "fwr2_end": 150, + "cdr2_start": 151, + "cdr2_end": 160, + "fwr3_start": 161, + "fwr3_end": 294, + "cdr3_start": 295, + "alignment_labels": [ "1", "2", "3", @@ -185,15 +236,37 @@ { "allele_description_id": "OGRDB:A00314", "allele_description_ref": "OGRDB:Mouse_IGH:IGHV-2ETO", - "maintainer": "William Lees", - "acknowledgements": [], - "lab_address": "Birkbeck College, University of London, Malet Street, London", + "acknowledgements": [ + { + "contributor_id": "1", + "name": "William Lees", + "orcid_id": { + "id": "ORCID:0000-0001-9834-6840", + "label": "William Lees" + }, + "affiliation": { + "id": "ROR:02mb95055", + "label": "Birkbeck, University of London" + }, + "affiliation_department":"", + "contributions": [ + { + "role": "investigation", + "degree": null + }, + { + "role": "data curation", + "degree": null + } + ] + } + ], "release_version": 1, - "release_date": "24-Nov-2021", + "release_date": "2021-11-24", "release_description": "First release", "label": "IGHV-2ETO", "sequence": "CAAGTTACTCTAAAAGAGTCTGGCCCTGGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGATGATGATAAGTACTATAACCCATCCCTGAAGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC", - "coding_sequence": "CAAGTTACTCTAAAAGAGTCTGGCCCT...GGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGC......ACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGAT.........GATGATAAGTACTATAACCCATCCCTGAAG...AGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC", + "coding_sequence": "CAAGTTACTCTAAAAGAGTCTGGCCCTGGGATATTGAAGCCCTCACAGACCCTCAGTCTGACTTGTTCTTTCTCTGGGTTTTCACTGAGCACTACTAATATGGGTGTAGGCTGGATTCGTCAGCCTTCAGGGAAGGGTCTGGAGTGGCTGGCACACATTTGGTGGGATGATGATAAGTACTATAACCCATCCCTGAAGAGCCGGCTAACAATCTCCAAGGATACCTCCAGAAACCAGGTATTCCTCAAGATCACCAGTGTGGACACTGCAGATACTGCCACTTACTACTGTGCTC", "aliases": [ "watson_et_al:CAST_EiJ_IGHV8-2" ], @@ -201,8 +274,11 @@ "chromosome": null, "sequence_type": "V", "functional": true, - "inference_type": "Rearranged only", - "species": { "id": "NCBITAXON:10090", "label": "Mus musculus" }, + "inference_type": "rearranged_only", + "species": { + "id": "NCBITAXON:10090", + "label": "Mus musculus" + }, "species_subgroup": "CAST_EiJ", "species_subgroup_type": "strain", "status": "active", @@ -223,18 +299,20 @@ { "sequence_delineation_id": "1", "delineation_scheme": "IMGT", + "aligned_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGA...GGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTC............AGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", + "unaligned_sequence": "GAAGTGAAGCTGGTGGAGTCTGAGGGAGGCTTAGTGCAGCCTGGAAGTTCCATGAAACTCTCCTGCACAGCCTCTGGATTCACTTTCAGTGACTATTACATGGCTTGGGTCCGCCAGGTTCCAGAAAAGGGTCTAGAATGGGTTGCAAACATTAATTATGAT......GGTAGTGGCACCTACTATCTGGACTCCTTGAAG...AGCCGTTTCATCATCTCGAGAGACAATGCAAAGAACATTCTATACCTGCAAATGAGCAGTCTGAAGTCTGAGGACACAGCCACGTATTACTGTGCAA", "fwr1_start": 1, - "fwr1_end": 78, - "cdr1_start": 79, - "cdr1_end": 114, - "fwr2_start": 115, - "fwr2_end": 165, - "cdr2_start": 166, - "cdr2_end": 195, - "fwr3_start": 196, - "fwr3_end": 312, - "cdr3_start": 313, - "alignment": [ + "fwr1_end": 75, + "cdr1_start": 76, + "cdr1_end": 110, + "fwr2_start": 111, + "fwr2_end": 150, + "cdr2_start": 151, + "cdr2_end": 160, + "fwr3_start": 161, + "fwr3_end": 294, + "cdr3_start": 295, + "alignment_labels": [ "1", "2", "3", diff --git a/lang/js/tests/data/good_rearrangement.tsv.gz b/lang/js/tests/data/good_rearrangement.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..c838c584d5e10ba715dd1ebb7fbe1925d847ae23 GIT binary patch literal 1106 zcmV-Y1g-lYiwFpPH)>`817~kil^8C_U(5LsjD3)P1|fK78v5?Paa;Z9}W*y zW=UBl#WK6iitWuRC1p>z&#ZW#{k6-AMb<4t&xf1E-R*{K%DZ&8*sdP3wqvqhEejli zEVJ@7m`r2CukkVainz=8?B9wBB&b^N1#Q}Vzj{&>QIa$TQ{z&hC&=c6X@D0#y}4{ z5{Q^W4;|YcZWbILXS6+R2S_xSKu;5)xs|oEaHB%%$6yC+ zfcDs$*c}@^w_e(TW0GG_$%{b zxXvDd>+6U*n2YeBMrwLC`1tYK`YGo1ndzK|^6`kX2|OGgR+Uk+hagpswLny?DcPKT zk|`vNQg`;rE^snzfa3rht$|ZM;VkHcR-Lo&)6}2#&OR%=JFXWn7d;=VKX&weCv(w0 zbA#4#$zT*d<+yB^#puz1-DkpT{)+&Qe;)QT@g@_e>GFVs0k=GCf5OLoT1*c!{x_2(bya$vOUOmA2nNa#c zAEtbON4{EsB@aGK3EzfM%!X0C3aJ=+CsD;%_L2T!eAW0c8)Nx4#)9C`#aNXe^J_mQ Y152I%RG=m8H>?l;0K5(Ux-J(00KYL1`v3p{ literal 0 HcmV?d00001 diff --git a/lang/js/tests/data/good_repertoire.yaml b/lang/js/tests/data/good_repertoire.yaml index e53106c30..7ac0ae578 100644 --- a/lang/js/tests/data/good_repertoire.yaml +++ b/lang/js/tests/data/good_repertoire.yaml @@ -11,31 +11,59 @@ Repertoire: id: null label: null study_description: "The adaptive immune system's capability to protect the body requires a highly diverse lymphocyte antigen receptor repertoire. However, the influence of individual genetic and epigenetic differences on these repertoires is not typically measured. By leveraging the unique characteristics of B, CD4+ T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified the impact of heritable factors on both the V(D)J recombination process and thymic selection in the case of T cell receptors, and show that the repertoires of both naive and antigen experienced cells are subject to biases resulting from differences in recombination. We show that biases in V(D)J usage, as well as biased N/P additions, contribute to significant variation in the CDR3 region. Moreover, we show that the relative usage of V and J gene segments is chromosomally biased, with approximately 1.5 times as many rearrangements originating from a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level." - study_contact: "Mark M. Davis, mmdavis@stanford.edu, ORCID:0000-0001-6868-657X" inclusion_exclusion_criteria: null - lab_name: "Mark M. Davis" - lab_address: "Stanford University" - submitted_by: "Florian Rubelt" - pub_ids: "PMID:27005435" - collected_by: null + contributors: + - contributor_id: "1" + name: "Florian Rubelt" + orcid_id: + id: null + label: null + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "investigation" + degree: null + - role: "data curation" + degree: null + - contributor_id: "2" + name: "Mark M. Davis" + orcid_id: + id: "ORCID:0000-0001-6868-657X" + label: "Mark Davis" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "supervision" + degree: null + pub_ids: ["PMID:27005435"] grants: null - keywords_study: + keywords_study: - "contains_ig" - "contains_tr" subject: subject_id: TW01A synthetic: false species: - id: "NCBITaxon_9606" + id: "NCBITAXON:9606" label: "Homo sapiens" sex: female - age_min: 27 - age_max: 27 - age_unit: - id: UO_0000036 - label: year + age: + min: 27 + max: 27 + unit: + id: UO:0000036 + label: year age_event: null - ancestry_population: null + ancestry_population: + id: null + label: null + location_birth: + id: null + label: null ethnicity: null race: null strain_name: null @@ -43,6 +71,12 @@ Repertoire: link_type: twin diagnosis: - study_group_description: null + diagnosis_timepoint: + label: Study enrollment + value: 60 + unit: + id: UO:0000033 + label: day disease_diagnosis: id: null label: null @@ -58,15 +92,16 @@ Repertoire: sample_processing_id: null sample_type: "peripheral venous puncture" tissue: - id: "UBERON_0000178" + id: "UBERON:0000178" label: "blood" tissue_processing: "Ficoll gradient" cell_subset: - id: "CL_0000788" + id: "CL:0000788" label: "naive B cell" cell_phenotype: "expression of CD20 and the absence of CD27" + cell_label: "naive B cell" cell_species: - id: "NCBITaxon_9606" + id: "NCBITAXON:9606" label: "Homo sapiens" single_cell: false cell_isolation: FACS @@ -90,10 +125,9 @@ Repertoire: anatomic_site: null disease_state_sample: null collection_time_point_relative: null - collection_time_point_relative_unit: + collection_location: id: null label: null - collection_time_point_reference: null biomaterial_provider: null cell_number: null cells_per_reaction: null @@ -102,9 +136,6 @@ Repertoire: cell_processing_protocol: null template_quality: null template_amount: null - template_amount_unit: - id: null - label: null library_generation_method: "RT(oligo-dT)+PCR" library_generation_protocol: null library_generation_kit_version: null @@ -136,13 +167,35 @@ Repertoire: id: null label: null study_description: "The adaptive immune system's capability to protect the body requires a highly diverse lymphocyte antigen receptor repertoire. However, the influence of individual genetic and epigenetic differences on these repertoires is not typically measured. By leveraging the unique characteristics of B, CD4+ T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified the impact of heritable factors on both the V(D)J recombination process and thymic selection in the case of T cell receptors, and show that the repertoires of both naive and antigen experienced cells are subject to biases resulting from differences in recombination. We show that biases in V(D)J usage, as well as biased N/P additions, contribute to significant variation in the CDR3 region. Moreover, we show that the relative usage of V and J gene segments is chromosomally biased, with approximately 1.5 times as many rearrangements originating from a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level." - study_contact: "Mark M. Davis, mmdavis@stanford.edu, ORCID:0000-0001-6868-657X" inclusion_exclusion_criteria: null - lab_name: "Mark M. Davis" - lab_address: "Stanford University" - submitted_by: "Florian Rubelt" - pub_ids: "PMID:27005435" - collected_by: null + contributors: + - contributor_id: "1" + name: "Florian Rubelt" + orcid_id: + id: null + label: null + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "investigation" + degree: null + - role: "data curation" + degree: null + - contributor_id: "2" + name: "Mark M. Davis" + orcid_id: + id: "ORCID:0000-0001-6868-657X" + label: "Mark Davis" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "supervision" + degree: null + pub_ids: ["PMID:27005435"] grants: null keywords_study: - "contains_ig" @@ -151,16 +204,22 @@ Repertoire: subject_id: TW01A synthetic: false species: - id: "NCBITaxon_9606" + id: "NCBITAXON:9606" label: "Homo sapiens" sex: female - age_min: 27 - age_max: 27 - age_unit: - id: UO_0000036 - label: year + age: + min: 27 + max: 27 + unit: + id: UO:0000036 + label: year age_event: null - ancestry_population: null + ancestry_population: + id: null + label: null + location_birth: + id: null + label: null ethnicity: null race: null strain_name: null @@ -168,6 +227,12 @@ Repertoire: link_type: twin diagnosis: - study_group_description: null + diagnosis_timepoint: + label: Study enrollment + value: 60 + unit: + id: UO:0000033 + label: day disease_diagnosis: id: null label: null @@ -183,15 +248,16 @@ Repertoire: sample_processing_id: null sample_type: "peripheral venous puncture" tissue: - id: "UBERON_0000178" + id: "UBERON:0000178" label: "blood" tissue_processing: "Ficoll gradient" cell_subset: - id: "CL_0000787" + id: "CL:0000787" label: "memory B cell" cell_phenotype: "expression of CD20 and CD27" + cell_label: "memory B cell" cell_species: - id: "NCBITaxon_9606" + id: "NCBITAXON:9606" label: "Homo sapiens" single_cell: false cell_isolation: FACS @@ -215,10 +281,9 @@ Repertoire: anatomic_site: null disease_state_sample: null collection_time_point_relative: null - collection_time_point_relative_unit: + collection_location: id: null label: null - collection_time_point_reference: null biomaterial_provider: null cell_number: null cells_per_reaction: null @@ -227,9 +292,6 @@ Repertoire: cell_processing_protocol: null template_quality: null template_amount: null - template_amount_unit: - id: null - label: null library_generation_method: "RT(oligo-dT)+PCR" library_generation_protocol: null library_generation_kit_version: null @@ -261,13 +323,35 @@ Repertoire: id: null label: null study_description: "The adaptive immune system's capability to protect the body requires a highly diverse lymphocyte antigen receptor repertoire. However, the influence of individual genetic and epigenetic differences on these repertoires is not typically measured. By leveraging the unique characteristics of B, CD4+ T, and CD8+ T lymphocyte subsets isolated from monozygotic twins, we have quantified the impact of heritable factors on both the V(D)J recombination process and thymic selection in the case of T cell receptors, and show that the repertoires of both naive and antigen experienced cells are subject to biases resulting from differences in recombination. We show that biases in V(D)J usage, as well as biased N/P additions, contribute to significant variation in the CDR3 region. Moreover, we show that the relative usage of V and J gene segments is chromosomally biased, with approximately 1.5 times as many rearrangements originating from a single chromosome. These data refine our understanding of the heritable mechanisms affecting the repertoire, and show that biases are evident on a chromosome-wide level." - study_contact: "Mark M. Davis, mmdavis@stanford.edu, ORCID:0000-0001-6868-657X" inclusion_exclusion_criteria: null - lab_name: "Mark M. Davis" - lab_address: "Stanford University" - submitted_by: "Florian Rubelt" - pub_ids: "PMID:27005435" - collected_by: null + contributors: + - contributor_id: "1" + name: "Florian Rubelt" + orcid_id: + id: null + label: null + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "investigation" + degree: null + - role: "data curation" + degree: null + - contributor_id: "2" + name: "Mark M. Davis" + orcid_id: + id: "ORCID:0000-0001-6868-657X" + label: "Mark Davis" + affiliation: + id: "ROR:00f54p054" + label: "Stanford University" + affiliation_department: "Department of Microbiology and Immunology, Stanford University School of Medicine" + contributions: + - role: "supervision" + degree: null + pub_ids: ["PMID:27005435"] grants: null keywords_study: - "contains_ig" @@ -276,16 +360,22 @@ Repertoire: subject_id: TW01A synthetic: false species: - id: "NCBITaxon_9606" + id: "NCBITAXON:9606" label: "Homo sapiens" sex: female - age_min: 27 - age_max: 27 - age_unit: - id: UO_0000036 - label: year + age: + min: 27 + max: 27 + unit: + id: UO:0000036 + label: year age_event: null - ancestry_population: null + ancestry_population: + id: null + label: null + location_birth: + id: null + label: null ethnicity: null race: null strain_name: null @@ -293,6 +383,12 @@ Repertoire: link_type: twin diagnosis: - study_group_description: null + diagnosis_timepoint: + label: Study enrollment + value: 60 + unit: + id: UO:0000033 + label: day disease_diagnosis: id: null label: null @@ -308,15 +404,16 @@ Repertoire: sample_processing_id: null sample_type: "peripheral venous puncture" tissue: - id: "UBERON_0000178" + id: "UBERON:0000178" label: "blood" tissue_processing: "Ficoll gradient" cell_subset: - id: "CL_0000895" + id: "CL:0000895" label: "naive thymus-derived CD4-positive, alpha-beta T cell" cell_phenotype: "expression of CD8 and absence of CD4 and CD45RO" + cell_label: "naive CD4+ aB T cell" cell_species: - id: "NCBITaxon_9606" + id: "NCBITAXON:9606" label: "Homo sapiens" single_cell: false cell_isolation: FACS @@ -340,10 +437,9 @@ Repertoire: anatomic_site: null disease_state_sample: null collection_time_point_relative: null - collection_time_point_relative_unit: + collection_location: id: null label: null - collection_time_point_reference: null biomaterial_provider: null cell_number: null cells_per_reaction: null @@ -352,9 +448,6 @@ Repertoire: cell_processing_protocol: null template_quality: null template_amount: null - template_amount_unit: - id: null - label: null library_generation_method: "RT(oligo-dT)+PCR" library_generation_protocol: null library_generation_kit_version: null diff --git a/lang/python/airr/interface.py b/lang/python/airr/interface.py index b58811dc7..4f45e0d60 100644 --- a/lang/python/airr/interface.py +++ b/lang/python/airr/interface.py @@ -100,9 +100,14 @@ def load_rearrangement(filename, validate=False, debug=False): # TODO: test pandas.DataFrame.read_csv with converters argument as an alterative schema = RearrangementSchema - df = pd.read_csv(filename, sep='\t', header=0, index_col=None, - dtype=schema.pandas_types(), true_values=schema.true_values, - false_values=schema.false_values) + try: + df = pd.read_csv(filename, sep='\t', header=0, index_col=None, + dtype=schema.pandas_types(), true_values=schema.true_values, + false_values=schema.false_values) + except Exception as e: + sys.stderr.write('Error occurred while loading AIRR rearrangement file: %s\n' % e) + return None + # added to use RearrangementReader without modifying it: buffer = StringIO() # create an empty buffer df.to_csv(buffer, sep='\t', index=False) # fill buffer diff --git a/lang/python/airr/specs/airr-schema-openapi3.yaml b/lang/python/airr/specs/airr-schema-openapi3.yaml index d6c6d48e2..ff0e5753c 100644 --- a/lang/python/airr/specs/airr-schema-openapi3.yaml +++ b/lang/python/airr/specs/airr-schema-openapi3.yaml @@ -318,6 +318,8 @@ Attributes: - ontology - controlled_vocabulary - physical_quantity + - time_point + - time_interval - CURIE ontology: type: object @@ -339,6 +341,138 @@ Attributes: type: string description: Ontology name for the top node term + +# FileObject +FileObject: + type: object + properties: + format: + type: string + description: The file format for the file (e.g. json, tsv). + nullable: true + compression: + type: string + description: The type of compression (if any) used for the file (e.g. zip, gz, bz2). + nullable: true + filename: + type: string + description: The name of the file + nullable: true + checksum: + type: string + description: MD5 checksum of the file. + nullable: true + version: + type: string + description: Version stamp for the file (if any). + nullable: true + +# AIRR DataSet specification +# An AIRR DataSet consists of tags for types of AIRR Schema Objects and a set of files that hold data in the AIRR format +# appropriate for that AIRR schema object. + +DataSet: + type: object + properties: + name: + type: string + description: A human readable name for the data set. + nullable: true + description: + type: string + description: A description of the data set. + nullable: true + repository_url: + type: string + description: The URL of the repository from which the data set came from (if the data came from a repository) + nullable: true + files: + type: object + description: A set of files based on data type, where the key for each object denotes the type of data and the accompanying array of files (and related file metadata) contain the data of that type. All top level AIRR objects (e.g. Repertoire, Rearrangement, Clone, Cell, Expression, Reactivity, Receptor, RepertoireGroup) are allowed. Custom data types are also permitted, so the manifest can include data that is outside of the AIRR specification. + nullable: true + properties: + Repertoire: + type: array + items: + $ref: '#/FileObject' + nullable: true + Rearrangement: + type: array + items: + $ref: '#/FileObject' + nullable: true + Clone: + type: array + items: + $ref: '#/FileObject' + nullable: true + Cell: + type: array + items: + $ref: '#/FileObject' + nullable: true + Expression: + type: array + items: + $ref: '#/FileObject' + nullable: true + Reactivity: + type: array + items: + $ref: '#/FileObject' + nullable: true + Receptor: + type: array + items: + $ref: '#/FileObject' + nullable: true + GermlineSet: + type: array + items: + $ref: '#/FileObject' + nullable: true + GenotypeSet: + type: array + items: + $ref: '#/FileObject' + nullable: true + RepertoireGroup: + type: array + items: + $ref: '#/FileObject' + nullable: true + ADCQuery: + type: array + items: + $ref: '#/FileObject' + nullable: true + + +# AIRR Manifest specification. +# An AIRR Manifest contains metadata for a group of accompanying files that are part of a set or coherent unit. + +Manifest: + type: object + properties: + name: + type: string + description: A human readable name for the data manifest. + nullable: false + description: + type: string + description: A description of the purpose for this data manifest + nullable: true + created_at: + type: string + description: Time/data stamp when the dat manifest was created. + nullable: true + data_sets: + type: array + items: + $ref: '#/DataSet' + nullable: true + + # AIRR Data File # # A JSON data file that holds Repertoire metadata, data processing @@ -400,7 +534,13 @@ DataFile: nullable: false description: List of genotype sets items: - $ref: '#/GenotypeSet' + $ref: '#/GenotypeSet' + Manifest: + type: array + nullable: false + description: List of data set manifests + items: + $ref: '#/Manifest' # AIRR Info object, should be similar to openapi # should we point to an openapi schema? @@ -446,6 +586,10 @@ InfoObject: type: string nullable: true +# +# General objects +# + # A time point TimePoint: description: Time point at which an observation or other action was performed. @@ -460,14 +604,14 @@ TimePoint: adc-query-support: true value: type: number - nullable: true + nullable: false description: Value of the time point example: -5.0 x-airr: adc-query-support: true unit: $ref: '#/Ontology' - nullable: true + nullable: false description: Unit of the time point title: Unit of immunization schedule example: @@ -482,9 +626,98 @@ TimePoint: id: UO:0000003 label: time unit -# -# General objects -# +# A time range or interval +TimeInterval: + description: Time range or interval for a measurement, observation or action. + type: object + properties: + min: + type: number + nullable: false + description: Lower/minimum value of the time interval + example: 5.0 + x-airr: + adc-query-support: true + max: + type: number + nullable: false + description: Upper/maximum value of the time interval + example: 10.0 + x-airr: + adc-query-support: true + unit: + $ref: '#/Ontology' + nullable: false + description: Unit of the time interval + example: + id: UO:0000033 + label: day + x-airr: + adc-query-support: true + format: ontology + ontology: + draft: false + top_node: + id: UO:0000003 + label: time unit + +# A physical quantity +PhysicalQuantity: + description: A physical quantity from a measurement or observation. + type: object + properties: + quantity: + type: number + nullable: false + description: Physical quantity + example: -5.0 + x-airr: + adc-query-support: true + unit: + $ref: '#/Ontology' + nullable: false + description: Unit of physical quantity + example: + id: UO:0000024 + label: nanogram + x-airr: + adc-query-support: true + format: ontology + ontology: + draft: false + top_node: + id: UO:0000002 + label: physical quantity + + +# A time quantity +TimeQuantity: + description: A time quantity + type: object + properties: + quantity: + type: number + nullable: false + description: Time quantity + example: 30.0 + x-airr: + adc-query-support: true + unit: + $ref: '#/Ontology' + nullable: false + description: Unit of time + example: + id: UO:0000033 + label: day + x-airr: + adc-query-support: true + format: ontology + ontology: + draft: false + top_node: + id: UO:0000003 + label: time unit + # Contributor record to describe invididuals and their contribution to a data set # @@ -1913,9 +2146,7 @@ Subject: - synthetic - species - sex - - age_min - - age_max - - age_unit + - age - age_event - ancestry_population - ethnicity @@ -1999,52 +2230,30 @@ Subject: subset: subject name: Sex format: controlled_vocabulary - age_min: - type: number - nullable: true - description: Specific age or lower boundary of age range. - title: Age minimum - example: 60 - x-airr: - miairr: important - adc-query-support: true - set: 1 - subset: subject - name: Age minimum - age_max: - type: number + age: + $ref: '#/TimeInterval' nullable: true description: > - Upper boundary of age range or equal to age_min for specific age. - This field should only be null if age_min is null. - title: Age maximum - example: 80 - x-airr: - miairr: important - adc-query-support: true - set: 1 - subset: subject - name: Age maximum - age_unit: - $ref: '#/Ontology' - nullable: true - description: Unit of age range - title: Age unit - example: - id: UO:0000036 - label: year + Age of subject expressed as a time interval. If singular time point then + min == max in the time interval. + examples: + - min: 50 + max: 50 + unit: + id: UO:0000036 + label: year + - min: 5 + max: 10 + unit: + id: UO:0000034 + label: week x-airr: miairr: important adc-query-support: true set: 1 subset: subject - name: Age unit - format: ontology - ontology: - draft: false - top_node: - id: UO:0000003 - label: time unit + name: Age + format: time_interval age_event: type: string nullable: true @@ -2060,16 +2269,30 @@ Subject: set: 1 subset: subject name: Age event - age: - type: string + age_min: + type: number + nullable: true + x-airr: + deprecated: true + deprecated-description: Combined into single age field. + deprecated-replaced-by: + - age + age_max: + type: number + nullable: true + x-airr: + deprecated: true + deprecated-description: Combined into single age field. + deprecated-replaced-by: + - age + age_unit: + $ref: '#/Ontology' nullable: true x-airr: deprecated: true - deprecated-description: Split into two fields to specify as an age range. + deprecated-description: Combined into single age field. deprecated-replaced-by: - - age_min - - age_max - - age_unit + - age ancestry_population: $ref: '#/Ontology' nullable: true @@ -2207,6 +2430,25 @@ Diagnosis: set: 1 subset: diagnosis and intervention name: Study group description + diagnosis_timepoint: + $ref: '#/TimePoint' + nullable: true + description: Time point for the diagnosis + title: Diagnosis timepoint + example: + label: Study enrollment + value: 60 + unit: + id: UO:0000033 + label: day + x-airr: + miairr: important + adc-query-support: true + set: 1 + subset: diagnosis and intervention + name: Diagnosis timepoint + format: time_point + disease_diagnosis: $ref: '#/Ontology' nullable: true @@ -2228,11 +2470,15 @@ Diagnosis: id: DOID:4 label: disease disease_length: - type: string + $ref: '#/TimeQuantity' nullable: true description: Time duration between initial diagnosis and current intervention title: Length of disease - example: 23 months + example: + quantity: 23 + unit: + id: UO:0000035 + label: month x-airr: miairr: important adc-query-support: true @@ -2312,8 +2558,6 @@ Sample: - anatomic_site - disease_state_sample - collection_time_point_relative - - collection_time_point_relative_unit - - collection_time_point_reference - biomaterial_provider properties: sample_id: @@ -2388,49 +2632,40 @@ Sample: subset: sample name: Disease state of sample collection_time_point_relative: - type: number + $ref: '#/TimePoint' nullable: true - description: Time point at which sample was taken, relative to `Collection time event` + description: Time point at which sample was taken, relative to `label` event title: Sample collection time - example: 14 + example: + label: Primary vaccination + value: 14 + unit: + id: UO:0000033 + label: day x-airr: miairr: important adc-query-support: true set: 2 subset: sample name: Sample collection time + format: time_point collection_time_point_relative_unit: $ref: '#/Ontology' nullable: true - description: Unit of Sample collection time - title: Sample collection time unit - example: - id: UO:0000033 - label: day x-airr: - miairr: important - adc-query-support: true - set: 2 - subset: sample - name: Sample collection time unit - format: ontology - ontology: - draft: false - top_node: - id: UO:0000003 - label: time unit + deprecated: true + deprecated-description: Field has been merged with collection_time_point_relative. + deprecated-replaced-by: + - collection_time_point_relative collection_time_point_reference: type: string nullable: true description: Event in the study schedule to which `Sample collection time` relates to - title: Collection time event - example: Primary vaccination x-airr: - miairr: important - adc-query-support: true - set: 2 - subset: sample - name: Collection time event + deprecated: true + deprecated-description: Field has been merged with collection_time_point_relative. + deprecated-replaced-by: + - collection_time_point_relative collection_location: $ref: '#/Ontology' nullable: true @@ -2513,7 +2748,8 @@ CellProcessing: cell_phenotype: type: string nullable: true - description: List of cellular markers and their expression levels used to isolate the cell population + description: > + List of cellular markers and their expression levels used to isolate the cell population. title: Cell subset phenotype example: CD19+ CD38+ CD27+ IgM- IgD- x-airr: @@ -2522,6 +2758,20 @@ CellProcessing: set: 3 subset: process (cell) name: Cell subset phenotype + cell_label: + type: string + nullable: true + description: > + Free text cell type annotation. Primarily used for annotating cell types that are not + provided in the Cell Ontology. + title: Cell annotation + example: age-associated B cell + x-airr: + miairr: defined + adc-query-support: true + set: 3 + subset: process (cell) + name: Cell annotation cell_species: $ref: '#/Ontology' nullable: true @@ -2700,7 +2950,6 @@ NucleicAcidProcessing: - template_class - template_quality - template_amount - - template_amount_unit - library_generation_method - library_generation_protocol - library_generation_kit_version @@ -2737,37 +2986,30 @@ NucleicAcidProcessing: subset: process (nucleic acid) name: Target substrate quality template_amount: - type: number + $ref: '#/PhysicalQuantity' nullable: true description: Amount of template that went into the process title: Template amount - example: 1000 + example: + quantity: 1000 + unit: + id: UO:0000024 + label: nanogram x-airr: miairr: important adc-query-support: true set: 3 subset: process (nucleic acid) name: Template amount + format: physical_quantity template_amount_unit: $ref: '#/Ontology' nullable: true - description: Unit of template amount - title: Template amount time unit - example: - id: UO:0000024 - label: nanogram x-airr: - miairr: important - adc-query-support: true - set: 3 - subset: process (nucleic acid) - name: Template amount time unit - format: ontology - ontology: - draft: false - top_node: - id: UO:0000002 - label: physical quantity + deprecated: true + deprecated-description: Field has been merged with template_amount. + deprecated-replaced-by: + - template_amount library_generation_method: type: string enum: @@ -4701,7 +4943,6 @@ Cell: type: object required: - cell_id - - rearrangements - repertoire_id - virtual_pairing properties: @@ -4709,40 +4950,15 @@ Cell: type: string nullable: false description: > - Identifier defining the cell of origin for the query sequence. + Identifier for the Cell object. This identifier must be unique within + a given study, but it is recommended that it be a universally unique record locator + to enable database applications. title: Cell index - example: W06_046_091 x-airr: identifier: true miairr: defined adc-query-support: true name: Cell index - rearrangements: - type: array - nullable: true - description: > - Array of sequence identifiers defined for the Rearrangement object - title: Cell-associated rearrangements - items: - type: string - example: [id1, id2] #empty vs NULL? - x-airr: - miairr: defined - adc-query-support: true - name: Cell-associated rearrangements - receptors: - type: array - nullable: true - description: > - Array of receptor identifiers defined for the Receptor object - title: Cell-associated receptors - items: - type: string - example: [id1, id2] #empty vs NULL? - x-airr: - miairr: defined - adc-query-support: true - name: Cell-associated receptors repertoire_id: type: string nullable: true @@ -4755,40 +4971,66 @@ Cell: data_processing_id: type: string nullable: true - description: Identifier of the data processing object in the repertoire metadata for this clone. + description: Identifier of the data processing object in the repertoire metadata for this cell. title: Data processing for cell x-airr: miairr: defined adc-query-support: true name: Data processing for cell - expression_study_method: - type: string - enum: - - flow_cytometry - - single-cell_transcriptome - - null + receptors: + type: array nullable: true description: > - Keyword describing the methodology used to assess expression. This values for this field MUST - come from a controlled vocabulary. + Array of receptor identifiers defined for the Receptor objects associated with this cell + title: Cell-associated receptors + items: + type: string + example: [id1, id2] x-airr: miairr: defined adc-query-support: true - expression_raw_doi: + name: Cell-associated receptors + cell_subset: + $ref: '#/Ontology' + nullable: true + description: Commonly-used designation of isolated cell population. + title: Cell subset + example: + id: CL:0000972 + label: class switched memory B cell + x-airr: + miairr: important + adc-query-support: true + name: Cell subset + format: ontology + ontology: + draft: false + top_node: + id: CL:0000542 + label: lymphocyte + cell_phenotype: type: string nullable: true description: > - DOI of raw data set containing the current event + List of cellular markers and their expression levels used to isolate the cell population. + title: Cell subset phenotype + example: CD19+ CD38+ CD27+ IgM- IgD- x-airr: miairr: defined adc-query-support: true - expression_index: + name: Cell subset phenotype + cell_label: type: string nullable: true description: > - Index addressing the current event within the raw data set. + Free text cell type annotation. Primarily used for annotating cell types that are not + provided in the Cell Ontology. + title: Cell annotation + example: age-associated B cell x-airr: miairr: defined + adc-query-support: true + name: Cell annotation virtual_pairing: type: boolean nullable: true @@ -4800,11 +5042,10 @@ Cell: adc-query-support: true name: Virtual pairing -# The CellExpression object acts as a container to hold a single expression level measurement from +# The Expression object acts as a container to hold a single expression level measurement from # an experiment. Expression data is associated with a cell_id and the related repertoire_id and -# data_processing_id as cell_id is not guaranteed to be unique outside the data processing for -# a single repertoire. -CellExpression: +# data_processing_id. +Expression: type: object required: - expression_id @@ -4818,7 +5059,9 @@ CellExpression: expression_id: type: string description: > - Identifier of this expression property measurement. + Identifier for the Expression object. This identifier must be unique within + a given study, but it is recommended that it be a universally unique record locator + to enable database applications. title: Expression property measurement identifier nullable: false x-airr: @@ -4848,7 +5091,7 @@ CellExpression: name: Parental repertoire of cell data_processing_id: type: string - description: Identifier of the data processing object in the repertoire metadata for this clone. + description: Identifier of the data processing object in the repertoire metadata for this cell. title: Data processing for cell nullable: true x-airr: @@ -4897,7 +5140,7 @@ CellExpression: name: Property value -# The Receptor object hold information about a receptor and its reactivity. +# The Receptor object holds information about a receptor (immunoglobulin or TCR) # Receptor: type: object @@ -4913,7 +5156,10 @@ Receptor: receptor_id: type: string nullable: false - description: ID of the current Receptor object, unique within the local repository. + description: > + Identifier for the Receptor object. This identifier must be unique within + a given study, but it is recommended that it be a universally unique record locator + to enable database applications. title: Receptor ID example: TCR-MM-012345 x-airr: @@ -4937,6 +5183,7 @@ Receptor: - TCR description: The top-level receptor type, either Immunoglobulin (Ig) or T Cell Receptor (TCR). x-airr: + format: controlled_vocabulary adc-query-support: true receptor_variable_domain_1_aa: type: string @@ -4959,6 +5206,7 @@ Receptor: description: Locus from which the variable domain in receptor_variable_domain_1_aa originates example: IGH x-airr: + format: controlled_vocabulary adc-query-support: true receptor_variable_domain_2_aa: type: string @@ -4983,6 +5231,7 @@ Receptor: description: Locus from which the variable domain in receptor_variable_domain_2_aa originates example: IGL x-airr: + format: controlled_vocabulary adc-query-support: true receptor_ref: type: array @@ -4994,17 +5243,12 @@ Receptor: example: ["IEDB_RECEPTOR:10"] x-airr: adc-query-support: true - reactivity_measurements: - type: array - nullable: true - description: Records of reactivity measurement - items: - $ref: '#/ReceptorReactivity' - -ReceptorReactivity: +Reactivity: type: object required: + - reactivity_id + - cell_id - ligand_type - antigen_type - antigen @@ -5013,6 +5257,43 @@ ReceptorReactivity: - reactivity_value - reactivity_unit properties: + reactivity_id: + type: string + nullable: false + description: > + Identifier for the Reactivity object. This identifier must be unique within + a given study, but it is recommended that it be a universally unique record locator + to enable database applications. + title: Reactivity ID + x-airr: + identifier: true + adc-query-support: true + cell_id: + type: string + nullable: false + description: > + Identifier of the Cell in the context of which the reactivity measurement was conducted. + title: Cell ID + x-airr: + adc-query-support: true + repertoire_id: + type: string + description: Identifier for the associated repertoire in study metadata. + title: Parental repertoire of cell + nullable: true + x-airr: + miairr: defined + adc-query-support: true + name: Parental repertoire of cell + data_processing_id: + type: string + description: Identifier of the data processing object in the repertoire metadata for this cell. + title: Data processing for cell + nullable: true + x-airr: + miairr: defined + adc-query-support: true + name: Data processing for cell ligand_type: type: string nullable: false @@ -5022,8 +5303,11 @@ ReceptorReactivity: - protein - peptide - non-peptidic - description: Classification of ligand binding to receptor + description: Classification of ligand binding to the cell example: non-peptide + x-airr: + format: controlled_vocabulary + adc-query-support: true antigen_type: type: string nullable: false @@ -5034,6 +5318,9 @@ ReceptorReactivity: description: > The type of antigen before processing by the immune system. example: protein + x-airr: + format: controlled_vocabulary + adc-query-support: true antigen: $ref: '#/Ontology' nullable: false @@ -5057,6 +5344,7 @@ ReceptorReactivity: id: NCBITAXON:5843 label: Plasmodium falciparum NF54 x-airr: + adc-query-support: true format: ontology ontology: draft: true @@ -5071,6 +5359,14 @@ ReceptorReactivity: type: integer nullable: true description: End position of the peptide within the reference protein sequence + peptide_sequence_aa: + type: string + nullable: true + description: > + The actual peptide sequence against which the receptor reactivity was measured. This field should be + used as a convenience for antigens of antigen_type `protein` or `peptide`. + x-airr: + adc-query-support: true mhc_class: type: string nullable: true @@ -5081,6 +5377,9 @@ ReceptorReactivity: - null description: Class of MHC molecule, only present for MHC:x ligand types example: MHC-II + x-airr: + format: controlled_vocabulary + adc-query-support: true mhc_gene_1: $ref: '#/Ontology' nullable: true @@ -5090,6 +5389,7 @@ ReceptorReactivity: id: MRO:0000055 label: HLA-DRA x-airr: + adc-query-support: true format: ontology ontology: draft: true @@ -5100,7 +5400,9 @@ ReceptorReactivity: type: string nullable: true description: Allele designation of the MHC alpha chain - example: HLA-DRA + example: HLA-DRA*01:01 + x-airr: + adc-query-support: true mhc_gene_2: $ref: '#/Ontology' nullable: true @@ -5110,6 +5412,7 @@ ReceptorReactivity: id: MRO:0000057 label: HLA-DRB1 x-airr: + adc-query-support: true format: ontology ontology: draft: true @@ -5122,35 +5425,40 @@ ReceptorReactivity: description: > Allele designation of the MHC class II beta chain or the invariant beta2-microglobin chain example: HLA-DRB1*04:01 + x-airr: + adc-query-support: true reactivity_method: type: string nullable: false enum: - - SPR - - ITC - - ELISA - - cytometry - - biological_activity + - native_protein + - MHC_peptide_multimer description: The methodology used to assess expression (assay implemented in experiment) + x-airr: + adc-query-support: true + format: controlled_vocabulary reactivity_readout: type: string nullable: false enum: - - binding_strength - - cytokine_release - - dissociation_constant_kd - - on_rate - - off_rate - - pathogen_inhibition + - fluorescence_intensity + - barcode_count description: Reactivity measurement read-out - example: cytokine release + example: barcode_count + x-airr: + adc-query-support: true + format: controlled_vocabulary reactivity_value: type: number nullable: false description: The absolute (processed) value of the measurement - example: 162.26 + example: 162 + x-airr: + adc-query-support: true reactivity_unit: type: string nullable: false description: The unit of the measurement - example: pg/ml + example: read count + x-airr: + adc-query-support: true diff --git a/lang/python/tests/data/bad_rearrangement.tsv b/lang/python/tests/data/bad_rearrangement.tsv index d12fc79fe..9849c2c04 100644 --- a/lang/python/tests/data/bad_rearrangement.tsv +++ b/lang/python/tests/data/bad_rearrangement.tsv @@ -1,10 +1,12 @@ rearrangement_id rearrangement_set_id sequence_id wrong_name rev_comp productive sequence_alignment germline_alignment v_call d_call j_call c_call junction junction_length junction_aa v_score d_score j_score c_score v_cigar d_cigar j_cigar c_cigar v_identity v_evalue d_identity d_evalue j_identity j_evalue v_sequence_start v_sequence_end v_germline_start v_germline_end d_sequence_start d_sequence_end d_germline_start d_germline_end j_sequence_start j_sequence_end j_germline_start j_germline_end np1_length np2_length duplicate_count -IVKNQEJ01BVGQ6 1 IVKNQEJ01BVGQ6 GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T T IGHV4-31*03 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 430 16.4 75.8 22N1S275= 11N280S8= 6N292S32=1X9= 1 1E-122 1 2.7 0.9762 6E-18 0 275 0 317 279 287 10 18 291 333 5 47 4 4 1247 -IVKNQEJ01AQVWS 1 IVKNQEJ01AQVWS GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCAACTACAACCCCTCCCTCAAGAGTCGAGTCACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T T IGHV4-31*03 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 420 16.4 83.8 22N1S156=1X10=1X17=1X89= 11N280S8= 6N292S42= 0.9891 8E-120 1 2.7 1 2E-20 0 275 0 317 279 287 10 18 291 333 5 47 4 4 4 -IVKNQEJ01AOYFZ 1 IVKNQEJ01AOYFZ GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGTAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T F IGHV4-31*03 IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGTAACTTTTGACTACTGG 37 CASGVAGNF*LLX 430 20.4 83.8 22N1S275= 11N280S10= 6N293S42= 1 1E-122 1 0.17 1 2E-20 0 275 0 317 279 289 10 20 292 334 5 47 4 3 92 -IVKNQEJ01EI5S4 1 IVKNQEJ01EI5S4 GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T T IGHV4-31*03 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 430 16.4 83.8 22N1S275= 11N280S8= 6N292S42= 1 1E-122 1 2.7 1 2E-20 0 275 0 317 279 287 10 18 291 333 5 47 4 4 2913 -IVKNQEJ01DGRRI 1 IVKNQEJ01DGRRI GGCCCAGGACTGGTGAAGCCTTCGGAGACCCTGTCCCTCACCTGCGCTGTCTATGGTGGGTCCTTCAGTGGTTACTACTGGAGCTGGATCCGCCAGCCCCCAGGGAAGGGTCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T T IGHV4-34*09 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 389 16.4 83.8 22N1S23=2X85=1X15=1X1=1X3=1X2=1X1=1X5=1X6=1X118= 11N274S8= 6N286S42= 0.9628 2E-110 1 2.6 1 2E-20 0 269 0 317 273 281 10 18 285 327 5 47 4 4 1 -IVKNQEJ01APN5N 1 IVKNQEJ01APN5N GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTAGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T F IGHV4-31*03 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTAG 36 CASGVAGTFDY* 430 16.4 67.9 22N1S275= 11N280S8= 6N292S10=1X21=1X9= 1 1E-122 1 2.7 0.9524 1E-15 0 275 0 317 279 287 10 18 291 333 5 47 4 4 1 -IVKNQEJ01B0TT2 1 IVKNQEJ01B0TT2 GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGTAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T F IGHV4-31*03 IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGTAACTTTTGACTACTGG 37 CASGVAGNF*LLX 430 20.4 75.8 22N1S275= 11N280S10= 6N293S32=1X9= 1 1E-122 1 0.17 0.9762 6E-18 0 275 0 317 279 289 10 20 292 334 5 47 4 3 30 -IVKNQEJ01AIS74 1 IVKNQEJ01AIS74 GGCGCAGGACTGTTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGGCGGGGTGGCTGGTAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T F IGHV4-31*03 IGHD6-19*01 IGHJ4*02 TGTGCGAGGCGGGGTGGCTGGTAACTTTTGACTACTGG 38 CARRGGW*LLTTG 424 20.4 83.8 22N1S3=1X8=1X262= 11N281S10= 6N294S42= 0.9927 9E-121 1 0.17 1 2E-20 0 275 0 317 280 290 10 20 293 335 5 47 5 3 4 -IVKNQEJ01AJ44V 1 IVKNQEJ01AJ44V GGCCCAGGACTGGTGAAGCCTTCGGAGACCCTGTCCCTCACCTGCGCTGTCTATGGTGGGTCCTTCAGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T T IGHV4-59*06 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 386 16.4 75.8 22N1S45=1X5=2X6=1X3=1X5=1X22=1X4=1X1=1X1=1X165= 11N274S8= 6N286S32=1X9= 0.9625 2E-109 1 2.6 0.9762 5E-18 0 267 0 315 273 281 10 18 285 327 5 47 6 4 12 +IVKNQEJ01BVGQ6 1 IVKNQEJ01BVGQ6 GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T yes IGHV4-31*03 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 430 16.4 75.8 22N1S275= 11N280S8= 6N292S32=1X9= 1 1E-122 1 2.7 0.9762 6E-18 1 275 1 317 280 287 11 18 292 333 6 47 4 4 1247 +IVKNQEJ01AQVWS 1 IVKNQEJ01AQVWS GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCAACTACAACCCCTCCCTCAAGAGTCGAGTCACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T T IGHV4-31*03 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 420 16.4 83.8 22N1S156=1X10=1X17=1X89= 11N280S8= 6N292S42= 0.9891 8E-120 1 2.7 1 2E-20 1 275 1 317 280 287 11 18 292 333 6 47 4 4 4 +IVKNQEJ01AOYFZ 1 IVKNQEJ01AOYFZ GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGTAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T F IGHV4-31*03 IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGTAACTTTTGACTACTGG 37 CASGVAGNF*LLX 430 20.4 83.8 22N1S275= 11N280S10= 6N293S42= 1 1E-122 1 0.17 1 2E-20 1 275 1 317 280 289 11 20 293 334 6 47 4 3 92 +IVKNQEJ01EI5S4 1 IVKNQEJ01EI5S4 GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA NA T IGHV4-31*03 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 430 16.4 83.8 22N1S275= 11N280S8= 6N292S42= 1 1E-122 1 2.7 1 2E-20 1 275 1 317 280 287 11 18 292 333 6 47 4 4 2913 +IVKNQEJ01DGRRI 1 IVKNQEJ01DGRRI GGCCCAGGACTGGTGAAGCCTTCGGAGACCCTGTCCCTCACCTGCGCTGTCTATGGTGGGTCCTTCAGTGGTTACTACTGGAGCTGGATCCGCCAGCCCCCAGGGAAGGGTCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T T IGHV4-34*09 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 389 16.4 83.8 22N1S23=2X85=1X15=1X1=1X3=1X2=1X1=1X5=1X6=1X118= 11N274S8= 6N286S42= 0.9628 2E-110 1 2.6 1 2E-20 1 269 1 317 274 281 11 18 286 327 6 47 4 4 1 +IVKNQEJ01APN5N 1 IVKNQEJ01APN5N GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTAGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T F IGHV4-31*03 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTAG 36 CASGVAGTFDY* 430 16.4 67.9 22N1S275= 11N280S8= 6N292S10=1X21=1X9= 1 1E-122 1 2.7 0.9524 1E-15 1 275 1 317 280 287 11 18 292 333 6 47 4 4 1 +IVKNQEJ01B0TT2 1 GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGTAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T F IGHV4-31*03 IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGTAACTTTTGACTACTGG 37 CASGVAGNF*LLX 430 20.4 75.8 22N1S275= 11N280S10= 6N293S32=1X9= 1 1E-122 1 0.17 0.9762 6E-18 1 275 1 317 280 289 11 20 293 334 6 47 4 3 30 +IVKNQEJ01AIS74 1 IVKNQEJ01AIS74 GGCGCAGGACTGTTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGGCGGGGTGGCTGGTAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T F IGHV4-31*03 IGHD6-19*01 IGHJ4*02 TGTGCGAGGCGGGGTGGCTGGTAACTTTTGACTACTGG 38 CARRGGW*LLTTG 424 20.4 83.8 22N1S3=1X8=1X262= 11N281S10= 6N294S42= 0.9927 9E-121 1 0.17 1 2E-20 1 275 1 317 281 290 11 20 294 335 6 47 5 3 4 +IVKNQEJ01AJ44V 1 IVKNQEJ01AJ44V GGCCCAGGACTGGTGAAGCCTTCGGAGACCCTGTCCCTCACCTGCGCTGTCTATGGTGGGTCCTTCAGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T T IGHV4-59*06 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 386 16.4 75.8 22N1S45=1X5=2X6=1X3=1X5=1X22=1X4=1X1=1X1=1X165= 11N274S8= 6N286S32=1X9= 0.9625 2E-109 1 2.6 0.9762 5E-18 1 267 1 315 274 281 11 18 286 327 6 47 6 4 12 +IVKNQEJ01AJ44V 1 IVKNQEJ01AJ44V GGCCCAGGACTGGTGAAGCCTTCGGAGACCCTGTCCCTCACCTGCGCTGTCTATGGTGGGTCCTTCAGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T T IGHV4-59*06 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 386 16.4 75.8 22N1S45=1X5=2X6=1X3=1X5=1X22=1X4=1X1=1X1=1X165= 11N274S8= 6N286S32=1X9= 0.9625 2E-109 1 2.6 0.9762 5E-18 1 267 1 315 274 281 11 18 286 327 6 47 6 4 12 +IVKNQEJ01AJ44V 1 IVKNQEJ01AJ44V GGCCCAGGACTGGTGAAGCCTTCGGAGACCCTGTCCCTCACCTGCGCTGTCTATGGTGGGTCCTTCAGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T T IGHV4-59*06 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 386 16.4 75.8 22N1S45=1X5=2X6=1X3=1X5=1X22=1X4=1X1=1X1=1X165= 11N274S8= 6N286S32=1X9= 0.9625 2E-109 1 2.6 0.9762 5E-18 1 267 1 315 274 281 11 18 286 327 6 47 6 4 12 diff --git a/lang/python/tests/test_interface.py b/lang/python/tests/test_interface.py index 0585cac0d..1aefa4c2b 100644 --- a/lang/python/tests/test_interface.py +++ b/lang/python/tests/test_interface.py @@ -58,7 +58,8 @@ def test_load_rearrangement(self): # Bad data result = airr.load_rearrangement(self.rearrangement_bad) - self.assertTupleEqual(result.shape, self.shape_bad, 'load(): bad data failed') + self.assertFalse(result, 'load(): bad data failed') + #self.assertTupleEqual(result.shape, self.shape_bad, 'load(): bad data failed') # @unittest.skip('-> read_rearrangement(): skipped\n') def test_read_rearrangement(self): diff --git a/tests/data/bad_rearrangement.tsv b/tests/data/bad_rearrangement.tsv index d12fc79fe..9849c2c04 100644 --- a/tests/data/bad_rearrangement.tsv +++ b/tests/data/bad_rearrangement.tsv @@ -1,10 +1,12 @@ rearrangement_id rearrangement_set_id sequence_id wrong_name rev_comp productive sequence_alignment germline_alignment v_call d_call j_call c_call junction junction_length junction_aa v_score d_score j_score c_score v_cigar d_cigar j_cigar c_cigar v_identity v_evalue d_identity d_evalue j_identity j_evalue v_sequence_start v_sequence_end v_germline_start v_germline_end d_sequence_start d_sequence_end d_germline_start d_germline_end j_sequence_start j_sequence_end j_germline_start j_germline_end np1_length np2_length duplicate_count -IVKNQEJ01BVGQ6 1 IVKNQEJ01BVGQ6 GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T T IGHV4-31*03 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 430 16.4 75.8 22N1S275= 11N280S8= 6N292S32=1X9= 1 1E-122 1 2.7 0.9762 6E-18 0 275 0 317 279 287 10 18 291 333 5 47 4 4 1247 -IVKNQEJ01AQVWS 1 IVKNQEJ01AQVWS GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCAACTACAACCCCTCCCTCAAGAGTCGAGTCACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T T IGHV4-31*03 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 420 16.4 83.8 22N1S156=1X10=1X17=1X89= 11N280S8= 6N292S42= 0.9891 8E-120 1 2.7 1 2E-20 0 275 0 317 279 287 10 18 291 333 5 47 4 4 4 -IVKNQEJ01AOYFZ 1 IVKNQEJ01AOYFZ GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGTAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T F IGHV4-31*03 IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGTAACTTTTGACTACTGG 37 CASGVAGNF*LLX 430 20.4 83.8 22N1S275= 11N280S10= 6N293S42= 1 1E-122 1 0.17 1 2E-20 0 275 0 317 279 289 10 20 292 334 5 47 4 3 92 -IVKNQEJ01EI5S4 1 IVKNQEJ01EI5S4 GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T T IGHV4-31*03 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 430 16.4 83.8 22N1S275= 11N280S8= 6N292S42= 1 1E-122 1 2.7 1 2E-20 0 275 0 317 279 287 10 18 291 333 5 47 4 4 2913 -IVKNQEJ01DGRRI 1 IVKNQEJ01DGRRI GGCCCAGGACTGGTGAAGCCTTCGGAGACCCTGTCCCTCACCTGCGCTGTCTATGGTGGGTCCTTCAGTGGTTACTACTGGAGCTGGATCCGCCAGCCCCCAGGGAAGGGTCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T T IGHV4-34*09 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 389 16.4 83.8 22N1S23=2X85=1X15=1X1=1X3=1X2=1X1=1X5=1X6=1X118= 11N274S8= 6N286S42= 0.9628 2E-110 1 2.6 1 2E-20 0 269 0 317 273 281 10 18 285 327 5 47 4 4 1 -IVKNQEJ01APN5N 1 IVKNQEJ01APN5N GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTAGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T F IGHV4-31*03 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTAG 36 CASGVAGTFDY* 430 16.4 67.9 22N1S275= 11N280S8= 6N292S10=1X21=1X9= 1 1E-122 1 2.7 0.9524 1E-15 0 275 0 317 279 287 10 18 291 333 5 47 4 4 1 -IVKNQEJ01B0TT2 1 IVKNQEJ01B0TT2 GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGTAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T F IGHV4-31*03 IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGTAACTTTTGACTACTGG 37 CASGVAGNF*LLX 430 20.4 75.8 22N1S275= 11N280S10= 6N293S32=1X9= 1 1E-122 1 0.17 0.9762 6E-18 0 275 0 317 279 289 10 20 292 334 5 47 4 3 30 -IVKNQEJ01AIS74 1 IVKNQEJ01AIS74 GGCGCAGGACTGTTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGGCGGGGTGGCTGGTAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T F IGHV4-31*03 IGHD6-19*01 IGHJ4*02 TGTGCGAGGCGGGGTGGCTGGTAACTTTTGACTACTGG 38 CARRGGW*LLTTG 424 20.4 83.8 22N1S3=1X8=1X262= 11N281S10= 6N294S42= 0.9927 9E-121 1 0.17 1 2E-20 0 275 0 317 280 290 10 20 293 335 5 47 5 3 4 -IVKNQEJ01AJ44V 1 IVKNQEJ01AJ44V GGCCCAGGACTGGTGAAGCCTTCGGAGACCCTGTCCCTCACCTGCGCTGTCTATGGTGGGTCCTTCAGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T T IGHV4-59*06 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 386 16.4 75.8 22N1S45=1X5=2X6=1X3=1X5=1X22=1X4=1X1=1X1=1X165= 11N274S8= 6N286S32=1X9= 0.9625 2E-109 1 2.6 0.9762 5E-18 0 267 0 315 273 281 10 18 285 327 5 47 6 4 12 +IVKNQEJ01BVGQ6 1 IVKNQEJ01BVGQ6 GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T yes IGHV4-31*03 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 430 16.4 75.8 22N1S275= 11N280S8= 6N292S32=1X9= 1 1E-122 1 2.7 0.9762 6E-18 1 275 1 317 280 287 11 18 292 333 6 47 4 4 1247 +IVKNQEJ01AQVWS 1 IVKNQEJ01AQVWS GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCAACTACAACCCCTCCCTCAAGAGTCGAGTCACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T T IGHV4-31*03 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 420 16.4 83.8 22N1S156=1X10=1X17=1X89= 11N280S8= 6N292S42= 0.9891 8E-120 1 2.7 1 2E-20 1 275 1 317 280 287 11 18 292 333 6 47 4 4 4 +IVKNQEJ01AOYFZ 1 IVKNQEJ01AOYFZ GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGTAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T F IGHV4-31*03 IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGTAACTTTTGACTACTGG 37 CASGVAGNF*LLX 430 20.4 83.8 22N1S275= 11N280S10= 6N293S42= 1 1E-122 1 0.17 1 2E-20 1 275 1 317 280 289 11 20 293 334 6 47 4 3 92 +IVKNQEJ01EI5S4 1 IVKNQEJ01EI5S4 GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA NA T IGHV4-31*03 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 430 16.4 83.8 22N1S275= 11N280S8= 6N292S42= 1 1E-122 1 2.7 1 2E-20 1 275 1 317 280 287 11 18 292 333 6 47 4 4 2913 +IVKNQEJ01DGRRI 1 IVKNQEJ01DGRRI GGCCCAGGACTGGTGAAGCCTTCGGAGACCCTGTCCCTCACCTGCGCTGTCTATGGTGGGTCCTTCAGTGGTTACTACTGGAGCTGGATCCGCCAGCCCCCAGGGAAGGGTCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T T IGHV4-34*09 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 389 16.4 83.8 22N1S23=2X85=1X15=1X1=1X3=1X2=1X1=1X5=1X6=1X118= 11N274S8= 6N286S42= 0.9628 2E-110 1 2.6 1 2E-20 1 269 1 317 274 281 11 18 286 327 6 47 4 4 1 +IVKNQEJ01APN5N 1 IVKNQEJ01APN5N GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTAGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T F IGHV4-31*03 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTAG 36 CASGVAGTFDY* 430 16.4 67.9 22N1S275= 11N280S8= 6N292S10=1X21=1X9= 1 1E-122 1 2.7 0.9524 1E-15 1 275 1 317 280 287 11 18 292 333 6 47 4 4 1 +IVKNQEJ01B0TT2 1 GGCCCAGGACTGGTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGTAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T F IGHV4-31*03 IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGTAACTTTTGACTACTGG 37 CASGVAGNF*LLX 430 20.4 75.8 22N1S275= 11N280S10= 6N293S32=1X9= 1 1E-122 1 0.17 0.9762 6E-18 1 275 1 317 280 289 11 20 293 334 6 47 4 3 30 +IVKNQEJ01AIS74 1 IVKNQEJ01AIS74 GGCGCAGGACTGTTGAAGCCTTCACAGACCCTGTCCCTCACCTGCACTGTCTCTGGTGGCTCCATCAGCAGTGGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGGCGGGGTGGCTGGTAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACCGTCTCCTCA T F IGHV4-31*03 IGHD6-19*01 IGHJ4*02 TGTGCGAGGCGGGGTGGCTGGTAACTTTTGACTACTGG 38 CARRGGW*LLTTG 424 20.4 83.8 22N1S3=1X8=1X262= 11N281S10= 6N294S42= 0.9927 9E-121 1 0.17 1 2E-20 1 275 1 317 281 290 11 20 294 335 6 47 5 3 4 +IVKNQEJ01AJ44V 1 IVKNQEJ01AJ44V GGCCCAGGACTGGTGAAGCCTTCGGAGACCCTGTCCCTCACCTGCGCTGTCTATGGTGGGTCCTTCAGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T T IGHV4-59*06 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 386 16.4 75.8 22N1S45=1X5=2X6=1X3=1X5=1X22=1X4=1X1=1X1=1X165= 11N274S8= 6N286S32=1X9= 0.9625 2E-109 1 2.6 0.9762 5E-18 1 267 1 315 274 281 11 18 286 327 6 47 6 4 12 +IVKNQEJ01AJ44V 1 IVKNQEJ01AJ44V GGCCCAGGACTGGTGAAGCCTTCGGAGACCCTGTCCCTCACCTGCGCTGTCTATGGTGGGTCCTTCAGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T T IGHV4-59*06 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 386 16.4 75.8 22N1S45=1X5=2X6=1X3=1X5=1X22=1X4=1X1=1X1=1X165= 11N274S8= 6N286S32=1X9= 0.9625 2E-109 1 2.6 0.9762 5E-18 1 267 1 315 274 281 11 18 286 327 6 47 6 4 12 +IVKNQEJ01AJ44V 1 IVKNQEJ01AJ44V GGCCCAGGACTGGTGAAGCCTTCGGAGACCCTGTCCCTCACCTGCGCTGTCTATGGTGGGTCCTTCAGTGGTTACTACTGGAGCTGGATCCGCCAGCACCCAGGGAAGGGCCTGGAGTGGATTGGGTACATCTATTACAGTGGGAGCACCTACTACAACCCGTCCCTCAAGAGTCGAGTTACCATATCAGTAGACACGTCTAAGAACCAGTTCTCCCTGAAGCTGAGCTCTGTGACTGCCGCGGACACGGCCGTGTATTACTGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGGGGCCAGGGAACCCTGGTCACTGTCTCCTCA T T IGHV4-59*06 IGHD1-7*01,IGHD6-19*01 IGHJ4*02 TGTGCGAGCGGGGTGGCTGGAACTTTTGACTACTGG 36 CASGVAGTFDYW 386 16.4 75.8 22N1S45=1X5=2X6=1X3=1X5=1X22=1X4=1X1=1X1=1X165= 11N274S8= 6N286S32=1X9= 0.9625 2E-109 1 2.6 0.9762 5E-18 1 267 1 315 274 281 11 18 286 327 6 47 6 4 12 diff --git a/tests/data/good_combined_airr.json b/tests/data/good_combined_airr.json index 0ef2106ae..fbea02710 100644 --- a/tests/data/good_combined_airr.json +++ b/tests/data/good_combined_airr.json @@ -70,12 +70,14 @@ "label": "Homo sapiens" }, "sex": "female", - "age_min": 27, - "age_max": 27, - "age_unit": { - "id": "UO:0000036", - "label": "year" - }, + "age": { + "min": 27, + "max": 27, + "unit": { + "id": "UO:0000036", + "label": "year" + } + }, "age_event": null, "ancestry_population": { "id": null, @@ -93,6 +95,14 @@ "diagnosis": [ { "study_group_description": null, + "diagnosis_timepoint": { + "label": "Study enrollment", + "value": 60, + "unit": { + "id": "UO:0000033", + "label": "day" + } + }, "disease_diagnosis": { "id": null, "label": null @@ -179,6 +189,7 @@ "label": "naive B cell" }, "cell_phenotype": "expression of CD20 and the absence of CD27", + "cell_label": "naive B cell", "cell_species": { "id": "NCBITAXON:9606", "label": "Homo sapiens" @@ -207,11 +218,6 @@ "anatomic_site": null, "disease_state_sample": null, "collection_time_point_relative": null, - "collection_time_point_relative_unit": { - "id": null, - "label": null - }, - "collection_time_point_reference": null, "collection_location": { "id": null, "label": null @@ -224,10 +230,6 @@ "cell_processing_protocol": null, "template_quality": null, "template_amount": null, - "template_amount_unit": { - "id": null, - "label": null - }, "library_generation_method": "RT(oligo-dT)+PCR", "library_generation_protocol": null, "library_generation_kit_version": null, @@ -326,11 +328,13 @@ "label": "Homo sapiens" }, "sex": "female", - "age_min": 27, - "age_max": 27, - "age_unit": { - "id": "UO:0000036", - "label": "year" + "age": { + "min": 27, + "max": 27, + "unit": { + "id": "UO:0000036", + "label": "year" + } }, "age_event": null, "ancestry_population": { @@ -349,6 +353,14 @@ "diagnosis": [ { "study_group_description": null, + "diagnosis_timepoint": { + "label": "Study enrollment", + "value": 60, + "unit": { + "id": "UO:0000033", + "label": "day" + } + }, "disease_diagnosis": { "id": null, "label": null @@ -377,6 +389,7 @@ "label": "memory B cell" }, "cell_phenotype": "expression of CD20 and CD27", + "cell_label": "memory B cell", "cell_species": { "id": "NCBITAXON:9606", "label": "Homo sapiens" @@ -405,11 +418,6 @@ "anatomic_site": null, "disease_state_sample": null, "collection_time_point_relative": null, - "collection_time_point_relative_unit": { - "id": null, - "label": null - }, - "collection_time_point_reference": null, "collection_location": { "id": null, "label": null @@ -422,10 +430,6 @@ "cell_processing_protocol": null, "template_quality": null, "template_amount": null, - "template_amount_unit": { - "id": null, - "label": null - }, "library_generation_method": "RT(oligo-dT)+PCR", "library_generation_protocol": null, "library_generation_kit_version": null, @@ -524,11 +528,13 @@ "label": "Homo sapiens" }, "sex": "female", - "age_min": 27, - "age_max": 27, - "age_unit": { - "id": "UO:0000036", - "label": "year" + "age": { + "min": 27, + "max": 27, + "unit": { + "id": "UO:0000036", + "label": "year" + } }, "age_event": null, "ancestry_population": { @@ -547,6 +553,14 @@ "diagnosis": [ { "study_group_description": null, + "diagnosis_timepoint": { + "label": "Study enrollment", + "value": 60, + "unit": { + "id": "UO:0000033", + "label": "day" + } + }, "disease_diagnosis": { "id": null, "label": null @@ -575,6 +589,7 @@ "label": "naive thymus-derived CD4-positive, alpha-beta T cell" }, "cell_phenotype": "expression of CD8 and absence of CD4 and CD45RO", + "cell_label": "naive CD4 aB T cell", "cell_species": { "id": "NCBITAXON:9606", "label": "Homo sapiens" @@ -603,11 +618,6 @@ "anatomic_site": null, "disease_state_sample": null, "collection_time_point_relative": null, - "collection_time_point_relative_unit": { - "id": null, - "label": null - }, - "collection_time_point_reference": null, "collection_location": { "id": null, "label": null @@ -620,10 +630,6 @@ "cell_processing_protocol": null, "template_quality": null, "template_amount": null, - "template_amount_unit": { - "id": null, - "label": null - }, "library_generation_method": "RT(oligo-dT)+PCR", "library_generation_protocol": null, "library_generation_kit_version": null, diff --git a/tests/data/good_combined_airr.yaml b/tests/data/good_combined_airr.yaml index 2c9ab547c..b04fcb42e 100644 --- a/tests/data/good_combined_airr.yaml +++ b/tests/data/good_combined_airr.yaml @@ -61,11 +61,12 @@ Repertoire: id: NCBITAXON:9606 label: Homo sapiens sex: female - age_min: 27 - age_max: 27 - age_unit: - id: UO:0000036 - label: year + age: + min: 27 + max: 27 + unit: + id: UO:0000036 + label: year age_event: ancestry_population: id: @@ -80,6 +81,12 @@ Repertoire: link_type: twin diagnosis: - study_group_description: + diagnosis_timepoint: + label: Study enrollment + value: 60 + unit: + id: UO:0000033 + label: day disease_diagnosis: id: label: @@ -135,6 +142,7 @@ Repertoire: id: CL:0000788 label: naive B cell cell_phenotype: expression of CD20 and the absence of CD27 + cell_label: naive B cell cell_species: id: NCBITAXON:9606 label: Homo sapiens @@ -158,10 +166,6 @@ Repertoire: anatomic_site: disease_state_sample: collection_time_point_relative: - collection_time_point_relative_unit: - id: - label: - collection_time_point_reference: collection_location: id: label: @@ -173,9 +177,6 @@ Repertoire: cell_processing_protocol: template_quality: template_amount: - template_amount_unit: - id: - label: library_generation_method: RT(oligo-dT)+PCR library_generation_protocol: library_generation_kit_version: @@ -260,11 +261,12 @@ Repertoire: id: NCBITAXON:9606 label: Homo sapiens sex: female - age_min: 27 - age_max: 27 - age_unit: - id: UO:0000036 - label: year + age: + min: 27 + max: 27 + unit: + id: UO:0000036 + label: year age_event: ancestry_population: id: @@ -279,6 +281,12 @@ Repertoire: link_type: twin diagnosis: - study_group_description: + diagnosis_timepoint: + label: Study enrollment + value: 60 + unit: + id: UO:0000033 + label: day disease_diagnosis: id: label: @@ -300,6 +308,7 @@ Repertoire: id: CL:0000787 label: memory B cell cell_phenotype: expression of CD20 and CD27 + cell_label: memory B cell cell_species: id: NCBITAXON:9606 label: Homo sapiens @@ -323,10 +332,6 @@ Repertoire: anatomic_site: disease_state_sample: collection_time_point_relative: - collection_time_point_relative_unit: - id: - label: - collection_time_point_reference: collection_location: id: label: @@ -338,9 +343,6 @@ Repertoire: cell_processing_protocol: template_quality: template_amount: - template_amount_unit: - id: - label: library_generation_method: RT(oligo-dT)+PCR library_generation_protocol: library_generation_kit_version: @@ -425,11 +427,12 @@ Repertoire: id: NCBITAXON:9606 label: Homo sapiens sex: female - age_min: 27 - age_max: 27 - age_unit: - id: UO:0000036 - label: year + age: + min: 27 + max: 27 + unit: + id: UO:0000036 + label: year age_event: ancestry_population: id: @@ -444,6 +447,12 @@ Repertoire: link_type: twin diagnosis: - study_group_description: + diagnosis_timepoint: + label: Study enrollment + value: 60 + unit: + id: UO:0000033 + label: day disease_diagnosis: id: label: @@ -465,6 +474,7 @@ Repertoire: id: CL:0000895 label: naive thymus-derived CD4-positive, alpha-beta T cell cell_phenotype: expression of CD8 and absence of CD4 and CD45RO + cell_label: naive CD4 aB T cell cell_species: id: NCBITAXON:9606 label: Homo sapiens @@ -488,10 +498,6 @@ Repertoire: anatomic_site: disease_state_sample: collection_time_point_relative: - collection_time_point_relative_unit: - id: - label: - collection_time_point_reference: collection_location: id: label: @@ -503,9 +509,6 @@ Repertoire: cell_processing_protocol: template_quality: template_amount: - template_amount_unit: - id: - label: library_generation_method: RT(oligo-dT)+PCR library_generation_protocol: library_generation_kit_version: diff --git a/tests/data/good_rearrangement.tsv.gz b/tests/data/good_rearrangement.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..c838c584d5e10ba715dd1ebb7fbe1925d847ae23 GIT binary patch literal 1106 zcmV-Y1g-lYiwFpPH)>`817~kil^8C_U(5LsjD3)P1|fK78v5?Paa;Z9}W*y zW=UBl#WK6iitWuRC1p>z&#ZW#{k6-AMb<4t&xf1E-R*{K%DZ&8*sdP3wqvqhEejli zEVJ@7m`r2CukkVainz=8?B9wBB&b^N1#Q}Vzj{&>QIa$TQ{z&hC&=c6X@D0#y}4{ z5{Q^W4;|YcZWbILXS6+R2S_xSKu;5)xs|oEaHB%%$6yC+ zfcDs$*c}@^w_e(TW0GG_$%{b zxXvDd>+6U*n2YeBMrwLC`1tYK`YGo1ndzK|^6`kX2|OGgR+Uk+hagpswLny?DcPKT zk|`vNQg`;rE^snzfa3rht$|ZM;VkHcR-Lo&)6}2#&OR%=JFXWn7d;=VKX&weCv(w0 zbA#4#$zT*d<+yB^#puz1-DkpT{)+&Qe;)QT@g@_e>GFVs0k=GCf5OLoT1*c!{x_2(bya$vOUOmA2nNa#c zAEtbON4{EsB@aGK3EzfM%!X0C3aJ=+CsD;%_L2T!eAW0c8)Nx4#)9C`#aNXe^J_mQ Y152I%RG=m8H>?l;0K5(Ux-J(00KYL1`v3p{ literal 0 HcmV?d00001 diff --git a/tests/data/good_repertoire.yaml b/tests/data/good_repertoire.yaml index 6adaa2361..7ac0ae578 100644 --- a/tests/data/good_repertoire.yaml +++ b/tests/data/good_repertoire.yaml @@ -51,11 +51,12 @@ Repertoire: id: "NCBITAXON:9606" label: "Homo sapiens" sex: female - age_min: 27 - age_max: 27 - age_unit: - id: UO:0000036 - label: year + age: + min: 27 + max: 27 + unit: + id: UO:0000036 + label: year age_event: null ancestry_population: id: null @@ -70,6 +71,12 @@ Repertoire: link_type: twin diagnosis: - study_group_description: null + diagnosis_timepoint: + label: Study enrollment + value: 60 + unit: + id: UO:0000033 + label: day disease_diagnosis: id: null label: null @@ -92,6 +99,7 @@ Repertoire: id: "CL:0000788" label: "naive B cell" cell_phenotype: "expression of CD20 and the absence of CD27" + cell_label: "naive B cell" cell_species: id: "NCBITAXON:9606" label: "Homo sapiens" @@ -117,10 +125,6 @@ Repertoire: anatomic_site: null disease_state_sample: null collection_time_point_relative: null - collection_time_point_relative_unit: - id: null - label: null - collection_time_point_reference: null collection_location: id: null label: null @@ -132,9 +136,6 @@ Repertoire: cell_processing_protocol: null template_quality: null template_amount: null - template_amount_unit: - id: null - label: null library_generation_method: "RT(oligo-dT)+PCR" library_generation_protocol: null library_generation_kit_version: null @@ -206,11 +207,12 @@ Repertoire: id: "NCBITAXON:9606" label: "Homo sapiens" sex: female - age_min: 27 - age_max: 27 - age_unit: - id: UO:0000036 - label: year + age: + min: 27 + max: 27 + unit: + id: UO:0000036 + label: year age_event: null ancestry_population: id: null @@ -225,6 +227,12 @@ Repertoire: link_type: twin diagnosis: - study_group_description: null + diagnosis_timepoint: + label: Study enrollment + value: 60 + unit: + id: UO:0000033 + label: day disease_diagnosis: id: null label: null @@ -247,6 +255,7 @@ Repertoire: id: "CL:0000787" label: "memory B cell" cell_phenotype: "expression of CD20 and CD27" + cell_label: "memory B cell" cell_species: id: "NCBITAXON:9606" label: "Homo sapiens" @@ -272,10 +281,6 @@ Repertoire: anatomic_site: null disease_state_sample: null collection_time_point_relative: null - collection_time_point_relative_unit: - id: null - label: null - collection_time_point_reference: null collection_location: id: null label: null @@ -287,9 +292,6 @@ Repertoire: cell_processing_protocol: null template_quality: null template_amount: null - template_amount_unit: - id: null - label: null library_generation_method: "RT(oligo-dT)+PCR" library_generation_protocol: null library_generation_kit_version: null @@ -361,11 +363,12 @@ Repertoire: id: "NCBITAXON:9606" label: "Homo sapiens" sex: female - age_min: 27 - age_max: 27 - age_unit: - id: UO:0000036 - label: year + age: + min: 27 + max: 27 + unit: + id: UO:0000036 + label: year age_event: null ancestry_population: id: null @@ -380,6 +383,12 @@ Repertoire: link_type: twin diagnosis: - study_group_description: null + diagnosis_timepoint: + label: Study enrollment + value: 60 + unit: + id: UO:0000033 + label: day disease_diagnosis: id: null label: null @@ -402,6 +411,7 @@ Repertoire: id: "CL:0000895" label: "naive thymus-derived CD4-positive, alpha-beta T cell" cell_phenotype: "expression of CD8 and absence of CD4 and CD45RO" + cell_label: "naive CD4+ aB T cell" cell_species: id: "NCBITAXON:9606" label: "Homo sapiens" @@ -427,10 +437,6 @@ Repertoire: anatomic_site: null disease_state_sample: null collection_time_point_relative: null - collection_time_point_relative_unit: - id: null - label: null - collection_time_point_reference: null collection_location: id: null label: null @@ -442,9 +448,6 @@ Repertoire: cell_processing_protocol: null template_quality: null template_amount: null - template_amount_unit: - id: null - label: null library_generation_method: "RT(oligo-dT)+PCR" library_generation_protocol: null library_generation_kit_version: null From 5c6f22807ccf4fa714d95724cfe2f6af2ad66f44 Mon Sep 17 00:00:00 2001 From: Scott Christley Date: Sat, 19 Oct 2024 18:02:45 -0500 Subject: [PATCH 59/59] remove old test file --- lang/R/tests/data-tests/warning_repertoire.json | 1 - 1 file changed, 1 deletion(-) delete mode 100644 lang/R/tests/data-tests/warning_repertoire.json diff --git a/lang/R/tests/data-tests/warning_repertoire.json b/lang/R/tests/data-tests/warning_repertoire.json deleted file mode 100644 index 30699a4c6..000000000 --- a/lang/R/tests/data-tests/warning_repertoire.json +++ /dev/null @@ -1 +0,0 @@ -{"Info":{"title":"AIRR Data Commons API for VDJServer Community Data Portal","description":"VDJServer ADC API response for repertoire query","version":"1.3","contact":{"name":"VDJServer","url":"http://vdjserver.org/","email":"vdjserver@utsouthwestern.edu"}},"Repertoire":[{"repertoire_id":"1329892364225474070-242ac113-0001-012","study":{"study_id":"PRJNA593622","study_title":"Determinants governing T cell receptor α/β-chain pairing in repertoire formation of identical twins","study_type":{"id":"NCIT:C16084","label":"Observational Study"},"study_description":"The T cell repertoire in each individual includes T cell receptors (TCRs) of enormous sequence diversity through the pairing of diverse TCR α- and β-chains, each generated by somatic recombination of paralogous gene segments. Whether the TCR repertoire contributes to susceptibility to infectious or autoimmune diseases in concert with disease-associated major histocompatibility complex (MHC) polymorphisms is unknown. Due to a lack in high-throughput technologies to sequence TCR α-β pairs, current studies on whether the TCR repertoire is shaped by host genetics have so far relied only on single-chain analysis. Using a high-throughput single T cell sequencing technology, we obtained the largest paired TCRαβ dataset so far, comprising 965,523 clonotypes from 15 healthy individuals including 6 monozygotic twin pairs. Public TCR α- and, to a lesser extent, TCR β-chain sequences were common in all individuals. In contrast, sharing of entirely identical TCRαβ amino acid sequences was very infrequent in unrelated individuals, but highly increased in twins, in particular in CD4 memory T cells. Based on nucleotide sequence identity, a subset of these shared clonotypes appeared to be the progeny of T cells that had been generated during fetal development and had persisted for more than 50 y. Additional shared TCRαβ in twins were encoded by different nucleotide sequences, implying that genetic determinants impose structural constraints on thymic selection that favor the selection of TCR α-β pairs with entire sequence identities.\n","inclusion_exclusion_criteria":" ","lab_name":"Jörg J Goronzy","lab_address":"Stanford University School of Medicine","submitted_by":"Scott Christley, scott.christley@utsouthwestern.edu","collected_by":"Hidetaka Tanno, hidetakatanno@utexas.edu","grants":"This work was supported by NIH Grants U19 AI057266 (to G.G. and J.J.G.) and R01 AI129191 (to J.J.G.) and US Defense Threat Reduction Agency Grant HDTRA1-12-C-0105 (to G.G.). H.T. was supported by University of Texas Health Innovation for Cancer Prevention Research Training Program Postdoctoral Fellowship (Cancer Prevention and Research Institute of Texas Grant RP160015), Japan Society for the Promotion of Science Postdoctoral Fellowships for Research Abroad, and Uehara Memorial Foundation Research Fellowship.","pub_ids":"PMID:31879353","keywords_study":["contains_tcr","contains_paired_chain"],"vdjserver_uuid":"1400363782577197546-242ac113-0001-012"},"subject":{"subject_id":"A1","synthetic":false,"species":{"id":"NCBITaxon:9606","label":"Homo sapiens"},"sex":"female","age_min":61,"age_max":61,"age_unit":{"id":"UO:0000036","label":"year"},"linked_subjects":"A2","link_type":"twin","diagnosis":[{"disease_diagnosis":{}}],"mhc":["HLA-A*30:02","HLA-A*31:01","HLA-B*35:02","HLA-B*38:01","HLA-C*04:01","HLA-C*12:03","HLA-DRB1*04:02","HLA-DRB1*04:03","HLA-DRB4*01:03","HLA-DQB1*03:02","HLA-DQB1*03:05"],"vdjserver_uuid":"4743918918142914070-242ac113-0001-012"},"sample":[{"sample_id":"A1_CD4_naive_TRB","tissue":{"id":"UBERON:0013756","label":"venous blood"},"biomaterial_provider":"Stanford University, CA","tissue_processing":"Peripheral blood mononuclear cells (PBMCs) were isolated by density centrifugation using Ficoll media at a density of 1.077 g/mL.","cell_subset":{"id":"CL:0000895","label":"naive thymus-derived CD4-positive, alpha-beta T cell"},"cell_phenotype":"CD4+CD45RA+CCR7+","cell_species":{},"single_cell":false,"cell_storage":true,"cell_isolation":"magnetic-bead–based negative EasySep selection reagents","template_class":"RNA","library_generation_method":"RT(oligo-dT)+PCR","pcr_target":[{"pcr_target_locus":"TRB"}],"complete_sequences":"partial","physical_linkage":"hetero_head-head","sequencing_run_id":"SRR10600326","sequencing_platform":"Illumina MiSeq","sequencing_files":{"file_type":"fastq","filename":"SRR10600326.sra_1.fastq.gz","read_length":300},"vdjserver_uuid":"4055006163864514070-242ac113-0001-012"},{"sample_id":"A1_CD4_naive_TRA","tissue":{"id":"UBERON:0013756","label":"venous blood"},"biomaterial_provider":"Stanford University, CA","tissue_processing":"Peripheral blood mononuclear cells (PBMCs) were isolated by density centrifugation using Ficoll media at a density of 1.077 g/mL.","cell_subset":{"id":"CL:0000895","label":"naive thymus-derived CD4-positive, alpha-beta T cell"},"cell_phenotype":"CD4+CD45RA+CCR7+","cell_species":{},"single_cell":false,"cell_storage":true,"cell_isolation":"magnetic-bead–based negative EasySep selection reagents","template_class":"RNA","library_generation_method":"RT(oligo-dT)+PCR","pcr_target":[{"pcr_target_locus":"TRA"}],"complete_sequences":"partial","physical_linkage":"hetero_head-head","sequencing_run_id":"SRR10600326","sequencing_platform":"Illumina MiSeq","sequencing_files":{"file_type":"fastq","filename":"SRR10600326.sra_2.fastq.gz","read_length":300},"vdjserver_uuid":"3987789925682114070-242ac113-0001-012"}],"data_processing":[{"data_processing_id":"65112922-e976-40d9-9dff-6b581acc745f-007","primary_annotation":true,"software_versions":"IgBlast 1.14","data_processing_files":["SRR10600326.sra_1.igblast.airr.tsv.gz","SRR10600326.sra_2.igblast.airr.tsv.gz"],"germline_database":"VDJServer IMGT 2019.01.23","vdjserver_uuid":"2248499969493954070-242ac113-0001-012"}]}]} \ No newline at end of file