From 74e2733d40f00a11280236ba18a20ced39d2c40a Mon Sep 17 00:00:00 2001 From: Brian Corrie Date: Fri, 11 Feb 2022 16:14:16 -0800 Subject: [PATCH 01/13] Change counts As per #543 --- specs/airr-schema.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/specs/airr-schema.yaml b/specs/airr-schema.yaml index dd55fbc1f..d7937a82b 100644 --- a/specs/airr-schema.yaml +++ b/specs/airr-schema.yaml @@ -3760,10 +3760,10 @@ Clone: junction_end: type: integer description: Junction region end position in the alignment (1-based closed interval). - sequence_count: + umi_count: type: integer description: Number of Rearrangement records (sequences) included in this clone. - clone_abundance: + clone_count: type: integer description: Non-normalized absolute count of the number of members (immune cells) in this clone. seed_id: From c4b6e8a0dd8e8ee4d6d32b789183af0e191cce7c Mon Sep 17 00:00:00 2001 From: Jason Vander Heiden Date: Mon, 21 Feb 2022 10:37:35 -0800 Subject: [PATCH 02/13] Update `_count` field language and add `umi_count` to Rearrangement. --- specs/airr-schema.yaml | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/specs/airr-schema.yaml b/specs/airr-schema.yaml index d7937a82b..177867508 100644 --- a/specs/airr-schema.yaml +++ b/specs/airr-schema.yaml @@ -3557,15 +3557,14 @@ Rearrangement: consensus_count: type: integer description: > - Number of reads contributing to the (UMI) consensus for this sequence. + Number of reads contributing to the UMI consensus or contig assembly for this sequence. For example, the sum of the number of reads for all UMIs that contribute to the query sequence. duplicate_count: type: integer description: > Copy number or number of duplicate observations for the query sequence. - For example, the number of UMIs sharing an identical sequence or the number - of identical observations of this sequence absent UMIs. + For example, the number of identical reads observed for this sequence. title: Read count example: 123 x-airr: @@ -3574,6 +3573,12 @@ Rearrangement: set: 6 subset: data (processed sequence) name: Read count + umi_count: + type: integer + description: > + Number of distinct UMIs represented by this sequence. + For example, the total number of UMIs that contribute to + the contig assembly for the query sequence. cell_id: type: string description: > @@ -3762,10 +3767,11 @@ Clone: description: Junction region end position in the alignment (1-based closed interval). umi_count: type: integer - description: Number of Rearrangement records (sequences) included in this clone. + description: > + Number of distinct UMIs observed across all sequences (Rearrangement records) in this clone. clone_count: type: integer - description: Non-normalized absolute count of the number of members (immune cells) in this clone. + description: Number of sequences (Rearrangement records) observed in this clone. seed_id: type: string description: sequence_id of the seed sequence. Empty string (or null) if there is no seed sequence. From 88facd7e6e9c63424f2199234521560c913de3cd Mon Sep 17 00:00:00 2001 From: Scott Christley Date: Mon, 21 Feb 2022 19:37:43 -0600 Subject: [PATCH 03/13] change description back to original --- specs/airr-schema.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/airr-schema.yaml b/specs/airr-schema.yaml index 177867508..c270639ec 100644 --- a/specs/airr-schema.yaml +++ b/specs/airr-schema.yaml @@ -3771,7 +3771,7 @@ Clone: Number of distinct UMIs observed across all sequences (Rearrangement records) in this clone. clone_count: type: integer - description: Number of sequences (Rearrangement records) observed in this clone. + description: Non-normalized absolute count of the number of members (immune cells) in this clone. seed_id: type: string description: sequence_id of the seed sequence. Empty string (or null) if there is no seed sequence. From a9d7c1bba8cf1dd1586f6d4225e6444d4a119d41 Mon Sep 17 00:00:00 2001 From: Scott Christley Date: Tue, 22 Feb 2022 13:20:19 -0600 Subject: [PATCH 04/13] update description --- specs/airr-schema.yaml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/specs/airr-schema.yaml b/specs/airr-schema.yaml index c270639ec..fd1d66bb2 100644 --- a/specs/airr-schema.yaml +++ b/specs/airr-schema.yaml @@ -3771,7 +3771,11 @@ Clone: Number of distinct UMIs observed across all sequences (Rearrangement records) in this clone. clone_count: type: integer - description: Non-normalized absolute count of the number of members (immune cells) in this clone. + description: > + Absolute count of the size (number of members) of this clone in the repertoire. + This could simply be the number of sequences (Rearrangement records) observed in this clone, + or it may be a more sophisticated analysis calculation intertwined with an experimental protocol. + Absolute count is provided versus a frequency so that downstream analysis tools can perform their own normalization. seed_id: type: string description: sequence_id of the seed sequence. Empty string (or null) if there is no seed sequence. From dd606fc651ca25eb595e56d6feb318a22f49e1c4 Mon Sep 17 00:00:00 2001 From: Scott Christley Date: Tue, 22 Feb 2022 13:23:04 -0600 Subject: [PATCH 05/13] update description --- specs/airr-schema.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/specs/airr-schema.yaml b/specs/airr-schema.yaml index fd1d66bb2..665df89c4 100644 --- a/specs/airr-schema.yaml +++ b/specs/airr-schema.yaml @@ -3774,7 +3774,8 @@ Clone: description: > Absolute count of the size (number of members) of this clone in the repertoire. This could simply be the number of sequences (Rearrangement records) observed in this clone, - or it may be a more sophisticated analysis calculation intertwined with an experimental protocol. + the number of distinct cell barcodes (unique cell_id values), + or a more sophisticated calculation appropriate to the experimental protocol. Absolute count is provided versus a frequency so that downstream analysis tools can perform their own normalization. seed_id: type: string From 67ff6df1c9a93911928147ff273ffd61ef837d49 Mon Sep 17 00:00:00 2001 From: bcorrie Date: Fri, 22 Apr 2022 16:41:03 +0000 Subject: [PATCH 06/13] Sync schemas --- lang/python/airr/specs/airr-schema.yaml | 27 +++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/lang/python/airr/specs/airr-schema.yaml b/lang/python/airr/specs/airr-schema.yaml index e167b74fb..665df89c4 100644 --- a/lang/python/airr/specs/airr-schema.yaml +++ b/lang/python/airr/specs/airr-schema.yaml @@ -556,7 +556,7 @@ SequenceDelineationV: type: string description: one string for each codon in the fields v_start to cdr3_start indicating the label of that codon according to the numbering of the delineation scheme -# The Gene Description +# Description of a putative or confirmed Ig receptor gene/allele AlleleDescription: discriminator: AIRR description: Details of a putative or confirmed Ig receptor gene/allele inferred from one or more observations @@ -3557,15 +3557,14 @@ Rearrangement: consensus_count: type: integer description: > - Number of reads contributing to the (UMI) consensus for this sequence. + Number of reads contributing to the UMI consensus or contig assembly for this sequence. For example, the sum of the number of reads for all UMIs that contribute to the query sequence. duplicate_count: type: integer description: > Copy number or number of duplicate observations for the query sequence. - For example, the number of UMIs sharing an identical sequence or the number - of identical observations of this sequence absent UMIs. + For example, the number of identical reads observed for this sequence. title: Read count example: 123 x-airr: @@ -3574,6 +3573,12 @@ Rearrangement: set: 6 subset: data (processed sequence) name: Read count + umi_count: + type: integer + description: > + Number of distinct UMIs represented by this sequence. + For example, the total number of UMIs that contribute to + the contig assembly for the query sequence. cell_id: type: string description: > @@ -3760,12 +3765,18 @@ Clone: junction_end: type: integer description: Junction region end position in the alignment (1-based closed interval). - sequence_count: + umi_count: type: integer - description: Number of Rearrangement records (sequences) included in this clone. - clone_abundance: + description: > + Number of distinct UMIs observed across all sequences (Rearrangement records) in this clone. + clone_count: type: integer - description: Non-normalized absolute count of the number of members (immune cells) in this clone. + description: > + Absolute count of the size (number of members) of this clone in the repertoire. + This could simply be the number of sequences (Rearrangement records) observed in this clone, + the number of distinct cell barcodes (unique cell_id values), + or a more sophisticated calculation appropriate to the experimental protocol. + Absolute count is provided versus a frequency so that downstream analysis tools can perform their own normalization. seed_id: type: string description: sequence_id of the seed sequence. Empty string (or null) if there is no seed sequence. From 56165246fff7b47e02b3e09bc8674726c82ad7cc Mon Sep 17 00:00:00 2001 From: bcorrie Date: Fri, 22 Apr 2022 16:43:44 +0000 Subject: [PATCH 07/13] Sync spec --- lang/R/inst/extdata/airr-schema.yaml | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/lang/R/inst/extdata/airr-schema.yaml b/lang/R/inst/extdata/airr-schema.yaml index dd55fbc1f..665df89c4 100644 --- a/lang/R/inst/extdata/airr-schema.yaml +++ b/lang/R/inst/extdata/airr-schema.yaml @@ -3557,15 +3557,14 @@ Rearrangement: consensus_count: type: integer description: > - Number of reads contributing to the (UMI) consensus for this sequence. + Number of reads contributing to the UMI consensus or contig assembly for this sequence. For example, the sum of the number of reads for all UMIs that contribute to the query sequence. duplicate_count: type: integer description: > Copy number or number of duplicate observations for the query sequence. - For example, the number of UMIs sharing an identical sequence or the number - of identical observations of this sequence absent UMIs. + For example, the number of identical reads observed for this sequence. title: Read count example: 123 x-airr: @@ -3574,6 +3573,12 @@ Rearrangement: set: 6 subset: data (processed sequence) name: Read count + umi_count: + type: integer + description: > + Number of distinct UMIs represented by this sequence. + For example, the total number of UMIs that contribute to + the contig assembly for the query sequence. cell_id: type: string description: > @@ -3760,12 +3765,18 @@ Clone: junction_end: type: integer description: Junction region end position in the alignment (1-based closed interval). - sequence_count: + umi_count: type: integer - description: Number of Rearrangement records (sequences) included in this clone. - clone_abundance: + description: > + Number of distinct UMIs observed across all sequences (Rearrangement records) in this clone. + clone_count: type: integer - description: Non-normalized absolute count of the number of members (immune cells) in this clone. + description: > + Absolute count of the size (number of members) of this clone in the repertoire. + This could simply be the number of sequences (Rearrangement records) observed in this clone, + the number of distinct cell barcodes (unique cell_id values), + or a more sophisticated calculation appropriate to the experimental protocol. + Absolute count is provided versus a frequency so that downstream analysis tools can perform their own normalization. seed_id: type: string description: sequence_id of the seed sequence. Empty string (or null) if there is no seed sequence. From 595adf51746218a08babb1e9bf2e4e2835db8d8c Mon Sep 17 00:00:00 2001 From: Brian Corrie Date: Fri, 22 Apr 2022 09:48:23 -0700 Subject: [PATCH 08/13] Sync with AIRR v2 spec --- specs/airr-schema-openapi3.yaml | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/specs/airr-schema-openapi3.yaml b/specs/airr-schema-openapi3.yaml index 00209c58b..8ca26c332 100644 --- a/specs/airr-schema-openapi3.yaml +++ b/specs/airr-schema-openapi3.yaml @@ -3962,14 +3962,18 @@ Clone: type: integer nullable: true description: Junction region end position in the alignment (1-based closed interval). - sequence_count: + umi_count: type: integer - nullable: true - description: Number of Rearrangement records (sequences) included in this clone. - clone_abundance: + description: > + Number of distinct UMIs observed across all sequences (Rearrangement records) in this clone. + clone_count: type: integer - nullable: true - description: Non-normalized absolute count of the number of members (immune cells) in this clone. + description: > + Absolute count of the size (number of members) of this clone in the repertoire. + This could simply be the number of sequences (Rearrangement records) observed in this clone, + the number of distinct cell barcodes (unique cell_id values), + or a more sophisticated calculation appropriate to the experimental protocol. + Absolute count is provided versus a frequency so that downstream analysis tools can perform their own normalization. seed_id: type: string nullable: true From 667d6ef2bbdbb531eb8a2758c264e5659b7ac67e Mon Sep 17 00:00:00 2001 From: Brian Corrie Date: Fri, 22 Apr 2022 09:57:08 -0700 Subject: [PATCH 09/13] Update to be synced with v2 spec As required by consistency checks. --- specs/airr-schema-openapi3.yaml | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/specs/airr-schema-openapi3.yaml b/specs/airr-schema-openapi3.yaml index 8ca26c332..dd7947831 100644 --- a/specs/airr-schema-openapi3.yaml +++ b/specs/airr-schema-openapi3.yaml @@ -3734,25 +3734,29 @@ Rearrangement: where frame 1 is relative to the first codon of D gene reference sequence. consensus_count: type: integer - nullable: true description: > - Number of reads contributing to the (UMI) consensus for this sequence. + Number of reads contributing to the UMI consensus or contig assembly for this sequence. For example, the sum of the number of reads for all UMIs that contribute to the query sequence. duplicate_count: type: integer - nullable: true description: > Copy number or number of duplicate observations for the query sequence. - For example, the number of UMIs sharing an identical sequence or the number - of identical observations of this sequence absent UMIs. + For example, the number of identical reads observed for this sequence. title: Read count example: 123 x-airr: miairr: important + nullable: true set: 6 subset: data (processed sequence) name: Read count + umi_count: + type: integer + description: > + Number of distinct UMIs represented by this sequence. + For example, the total number of UMIs that contribute to + the contig assembly for the query sequence. cell_id: type: string nullable: true From 481de11e26c921af2e35a2d5fd3064d67dd1e4c2 Mon Sep 17 00:00:00 2001 From: Brian Corrie Date: Fri, 22 Apr 2022 10:05:26 -0700 Subject: [PATCH 10/13] Update nullable attribute --- specs/airr-schema-openapi3.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/airr-schema-openapi3.yaml b/specs/airr-schema-openapi3.yaml index dd7947831..9213fe783 100644 --- a/specs/airr-schema-openapi3.yaml +++ b/specs/airr-schema-openapi3.yaml @@ -3740,6 +3740,7 @@ Rearrangement: the query sequence. duplicate_count: type: integer + nullable: true description: > Copy number or number of duplicate observations for the query sequence. For example, the number of identical reads observed for this sequence. @@ -3747,7 +3748,6 @@ Rearrangement: example: 123 x-airr: miairr: important - nullable: true set: 6 subset: data (processed sequence) name: Read count From 6579a8634ef69eeeefebc752b9e9a12ed2a582f8 Mon Sep 17 00:00:00 2001 From: Brian Corrie Date: Fri, 22 Apr 2022 10:10:36 -0700 Subject: [PATCH 11/13] Removed spaces Really it checks white space!!! 8-) --- specs/airr-schema-openapi3.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/airr-schema-openapi3.yaml b/specs/airr-schema-openapi3.yaml index 9213fe783..eb8f7e6fb 100644 --- a/specs/airr-schema-openapi3.yaml +++ b/specs/airr-schema-openapi3.yaml @@ -3756,7 +3756,7 @@ Rearrangement: description: > Number of distinct UMIs represented by this sequence. For example, the total number of UMIs that contribute to - the contig assembly for the query sequence. + the contig assembly for the query sequence. cell_id: type: string nullable: true From cf6fad5663b2b8ccb695c89406d3b0c369b16b67 Mon Sep 17 00:00:00 2001 From: Brian Corrie Date: Fri, 22 Apr 2022 10:14:54 -0700 Subject: [PATCH 12/13] Adding nullable Trying to get consistency to pass, does it really require the nullable property? --- specs/airr-schema-openapi3.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/specs/airr-schema-openapi3.yaml b/specs/airr-schema-openapi3.yaml index eb8f7e6fb..389c9b381 100644 --- a/specs/airr-schema-openapi3.yaml +++ b/specs/airr-schema-openapi3.yaml @@ -3734,6 +3734,7 @@ Rearrangement: where frame 1 is relative to the first codon of D gene reference sequence. consensus_count: type: integer + nullable: true description: > Number of reads contributing to the UMI consensus or contig assembly for this sequence. For example, the sum of the number of reads for all UMIs that contribute to @@ -3753,6 +3754,7 @@ Rearrangement: name: Read count umi_count: type: integer + nullable: true description: > Number of distinct UMIs represented by this sequence. For example, the total number of UMIs that contribute to From a750ac19c933495c14e6171404bf700e433cc03e Mon Sep 17 00:00:00 2001 From: Brian Corrie Date: Fri, 22 Apr 2022 10:16:25 -0700 Subject: [PATCH 13/13] More nullable... --- specs/airr-schema-openapi3.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/specs/airr-schema-openapi3.yaml b/specs/airr-schema-openapi3.yaml index 389c9b381..6db4b69d3 100644 --- a/specs/airr-schema-openapi3.yaml +++ b/specs/airr-schema-openapi3.yaml @@ -3970,10 +3970,12 @@ Clone: description: Junction region end position in the alignment (1-based closed interval). umi_count: type: integer + nullable: true description: > Number of distinct UMIs observed across all sequences (Rearrangement records) in this clone. clone_count: type: integer + nullable: true description: > Absolute count of the size (number of members) of this clone in the repertoire. This could simply be the number of sequences (Rearrangement records) observed in this clone,