From 339ceb63ec178cd0809bd1a9d774bcc7532f4495 Mon Sep 17 00:00:00 2001 From: Andrew Rambaut Date: Mon, 25 Oct 2021 17:24:53 +0100 Subject: [PATCH 1/5] Updated to better match agreed VTG definition --- constellations/definitions/cAY.4.2.json | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/constellations/definitions/cAY.4.2.json b/constellations/definitions/cAY.4.2.json index 814c3c4..fccc4d5 100644 --- a/constellations/definitions/cAY.4.2.json +++ b/constellations/definitions/cAY.4.2.json @@ -19,21 +19,10 @@ "sites": [ "nuc:T17040C", "spike:A222V", - "nuc:C25614T", "spike:Y145H" ], - "intermediate": [ - "del:22029:6:0.961207", - "nuc:G210T:0.978338", - "nuc:C241T:0.978338", - "spike:T95I:0.978338", - "spike:G142D:0.978338", - "spike:D950N:0.978338" - ], "rules": { - "min_alt": 3, - "max_ref": 1, - "spike:Y145H": "not ref", - "spike:A222V": "not ref" + "min_alt": 2, + "max_ref": 0 } } From f51ba7a07a3a3f9ee6929245b8c0256dca2b76b5 Mon Sep 17 00:00:00 2001 From: Rachel Colquhoun Date: Mon, 25 Oct 2021 19:18:35 +0100 Subject: [PATCH 2/5] add to readme for #32 --- README.md | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/README.md b/README.md index cbf15f3..396c6bb 100644 --- a/README.md +++ b/README.md @@ -10,3 +10,32 @@ Here we include files that define: - genomically interesting regions e.g. RBD sites which interact with ACE2 In addition we include a JSON file describing the reference sequence and the coordinates of genes/proteins. Mutations can therefore be described with respect to the amino acid position within these features. + +### Definitions + +At a minimum, JSON files must contain the following: + +- `label` : a unique string to represent the constellation +- `sites` : a list of defining mutations +- `rules` : the rules used by scorpio to classify whether a sequence belongs to a constellation + +The general format of a mutation code is: `gene`:[`ref`]`coordinates`[`alt`] where `gene` is a gene code (or `nuc` for the genomic nucleotide sequence), `ref` is the nucleotide or amino acids in the reference, `alt` is the specific nucleotide or amino acid for the mutatant. Either of `ref` or `alt` can be missing if no specific state is required. See https://github.com/cov-lineages/scorpio for more definitions. + +Rules can either specify [min|max]_[ref|alt|ambig|oth] OR the call required at a mutation e.g. "N:S235F": (not )[ref|alt|ambig|oth] + +#### Optional fields + +- `description` : human readable string of information +- `sources` : reference material for the definition where appropriate +- `type` : at present all definitions are of type `variant` although we conceived of having constellations investigating e.g. furin cleavage sites +- `variant` : for all constellations of type variant, information which exists because it is a variant. e.g. + - `mrca_lineage` : the pangolin-lineage of the MRCA of the constellation + - `PHE_label` : PHE label where appropriate + - `WHO_label` : WHO label where appropriate + - `lineage_name` and `parent_lineage` are used together to allow scorpio to handle parent/child constellations. Without them, a parent would be favoured over a child lineage as having all the defining sites instead of possibly missing a few. The `parent_lineage` says that the constellation should only be called if the parent has also been called and should be favoured over the parent if it has enough support. The `lineage_name` should be in the same format. Note there are constellations which include multiple lineages, for which this would not work. + - `incompatible_lineage_calls` : used by `pangolin ` usually in the context of a parent/child relationship. This tells pangolin that if `scorpio` called the given (parent) lineage and `pangolin` called the incompatible (child) lineage, the `scorpio` lineage should override. The default behaviour in `pangolin` is to allow children of a constellation called by `scorpio` as we want to allow a VOC/VUI to continue to evolve and have descendant lineages. However in these cases where we have a constellation for a child lineage, we care about lineage calls meeting a very specific definition so we want the `scorpio` call to override. + - `tags` : any other names given to this constellation + - `Pango_lineages` : unused, human-readable list of relevant tango-lineages + - `representative_genome` : may be used in future, a genome representing the mutations +- `ancestral_sites` : definitions created with `scorpio define` may use an outgroup to partition sites into those that are defining and those that belonged to the outgroup and predate this new variant. These sites are included when classifying (including `pangolin`) but not when running `scorpio haplotype` +- `intermediate_sites` : definitions created with `scorpio define` use a default threshold of 98% when defining mutation sites. e.g. a mutation has to be present in 98% of defining sequences to be included. If it appears below this threshold in more than 25% of defining sequences, we list it as an intermediate site. These are for reference for a manual curation step From 8136758ca2c695272763046a537867d46cd3bfed Mon Sep 17 00:00:00 2001 From: Rachel Colquhoun Date: Tue, 26 Oct 2021 10:39:20 +0100 Subject: [PATCH 3/5] suggested lower min_alt for AY.4 in response to https://github.com/cov-lineages/scorpio/issues/32 and AY.4.2 calling problems --- constellations/definitions/cAY.4.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/constellations/definitions/cAY.4.json b/constellations/definitions/cAY.4.json index 8282827..e18fc39 100644 --- a/constellations/definitions/cAY.4.json +++ b/constellations/definitions/cAY.4.json @@ -62,7 +62,7 @@ "spike:G142D:0.976009" ], "rules": { - "min_alt": 31, + "min_alt": 26, "max_ref": 3, "orf1a:A2529V": "alt" } From c2753b2c97e75c9f1dc5ef930172fbbdfdc42541 Mon Sep 17 00:00:00 2001 From: Rachel Colquhoun Date: Tue, 26 Oct 2021 14:53:41 +0100 Subject: [PATCH 4/5] update AY.4 definition to exclude ancestral sites --- constellations/definitions/cAY.4.json | 25 +++++-------------------- 1 file changed, 5 insertions(+), 20 deletions(-) diff --git a/constellations/definitions/cAY.4.json b/constellations/definitions/cAY.4.json index e18fc39..ad83230 100644 --- a/constellations/definitions/cAY.4.json +++ b/constellations/definitions/cAY.4.json @@ -23,8 +23,6 @@ "del:28271:1", "del:22029:6", "del:28248:6", - "nuc:C241T", - "nuc:C3037T", "orf1a:A1306S", "orf1a:P2046L", "orf1a:P2287S", @@ -38,31 +36,18 @@ "orf1b:G662S", "orf1b:P1000L", "orf1b:A1918V", - "spike:T19R", - "spike:L452R", - "spike:T478K", - "spike:D614G", - "spike:P681R", - "spike:D950N", - "orf3a:S26L", - "m:I82T", - "orf7a:V82A", - "orf7a:T120I", "nuc:C27874T", - "n:D63G", - "n:R203M", "n:G215C", - "n:D377Y", "nuc:G29742T" ], "intermediate": [ - "nuc:G210T:0.976009", - "nuc:T17040C:0.976009", - "spike:T95I:0.976009", - "spike:G142D:0.976009" + "nuc:G210T:0.961862", + "nuc:C241T:0.972018", + "nuc:T17040C:0.468120", + "spike:T95I:0.967111" ], "rules": { - "min_alt": 26, + "min_alt": 12, "max_ref": 3, "orf1a:A2529V": "alt" } From a52edfd0f3ef1a34b990cd6fac66e94241af856c Mon Sep 17 00:00:00 2001 From: Rachel Colquhoun Date: Tue, 26 Oct 2021 21:16:32 +0100 Subject: [PATCH 5/5] update version --- constellations/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/constellations/__init__.py b/constellations/__init__.py index 82d5e25..4fede65 100644 --- a/constellations/__init__.py +++ b/constellations/__init__.py @@ -1,3 +1,3 @@ _program = "constellations" -__version__ = "v0.0.20" +__version__ = "v0.0.21"