diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index 840300aa7b..682eba561d 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -893,7 +893,6 @@ def _add_row(row, family_id, row_type): individual_data_types={i.individual_id: i.data_types for i in individuals}, add_row=_add_row, variant_json_fields=['clinvar', 'variantId'], - saved_variant_annotations={'tags': ArrayAgg('varianttag__variant_tag_type__name', distinct=True)}, mme_values={'variant_ids': ArrayAgg('matchmakersubmissiongenes__saved_variant__saved_variant_json__variantId')}, include_metadata=True, include_mondo=True, diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py index f37031f63b..55ba602a9a 100644 --- a/seqr/views/apis/report_api_tests.py +++ b/seqr/views/apis/report_api_tests.py @@ -616,7 +616,7 @@ def test_anvil_export(self, mock_google_authenticated, mock_zip): '1_248367227_HG00731', 'HG00731', 'HG00731', 'RP11', 'Known', 'paternal', 'Homozygous', 'GRCh37', '1', '248367227', 'TC', 'T', '-', '-', '-', '-', '-', '-', '-'], discovery_file) self.assertIn([ - '21_3343353_NA19675_1', 'NA19675_1', 'NA19675', 'RP11', 'Known', 'de novo', + '21_3343353_NA19675_1', 'NA19675_1', 'NA19675', 'RP11', 'Candidate', 'de novo', 'Heterozygous', 'GRCh37', '21', '3343353', 'GAGA', 'G', 'c.375_377delTCT', 'p.Leu126del', 'ENST00000258436', '-', '-', '-', '-'], discovery_file) self.assertIn([ @@ -993,7 +993,7 @@ def _assert_expected_gregor_files(self, mock_open, has_second_project=False): ]) self.assertIn([ 'Broad_NA19675_1_21_3343353', 'Broad_NA19675_1', '', 'SNV/INDEL', 'GRCh37', '21', '3343353', 'GAGA', 'G', '', - 'RP11', 'ENST00000258436', 'c.375_377delTCT', 'p.Leu126del', 'Heterozygous', '', 'de novo', '', '', 'Known', + 'RP11', 'ENST00000258436', 'c.375_377delTCT', 'p.Leu126del', 'Heterozygous', '', 'de novo', '', '', 'Candidate', 'Myasthenic syndrome, congenital, 8, with pre- and postsynaptic defects', 'OMIM:615120', 'Autosomal recessive|X-linked', 'Full', '', '', 'SR-ES', '', ], genetic_findings_file) @@ -1006,12 +1006,12 @@ def _assert_expected_gregor_files(self, mock_open, has_second_project=False): self.assertIn([ 'Broad_NA20889_1_248367227', 'Broad_NA20889', '', 'SNV/INDEL', 'GRCh37', '1', '248367227', 'TC', 'T', '', 'OR4G11P', 'ENST00000505820', 'c.3955G>A', 'c.1586-17C>G', 'Heterozygous', '', 'unknown', - 'Broad_NA20889_1_249045487', '', 'Known', 'IRIDA syndrome', 'MONDO:0008788', 'Autosomal dominant', + 'Broad_NA20889_1_249045487', '', 'Candidate', 'IRIDA syndrome', 'MONDO:0008788', 'Autosomal dominant', 'Full', '', '', 'SR-ES', '', ], genetic_findings_file) self.assertIn([ 'Broad_NA20889_1_249045487', 'Broad_NA20889', '', 'SNV/INDEL', 'GRCh37', '1', '249045487', 'A', 'G', '', - 'OR4G11P', '', '', '', 'Heterozygous', '', 'unknown', 'Broad_NA20889_1_248367227', '', 'Known', + 'OR4G11P', '', '', '', 'Heterozygous', '', 'unknown', 'Broad_NA20889_1_248367227', '', 'Candidate', 'IRIDA syndrome', 'MONDO:0008788', 'Autosomal dominant', 'Full', '', '', 'SR-ES', '', ], genetic_findings_file) @@ -1225,7 +1225,7 @@ def test_variant_metadata(self): 'family_history': 'Yes', 'gene': 'OR4G11P', 'gene_id': 'ENSG00000240361', - 'gene_known_for_phenotype': 'Known', + 'gene_known_for_phenotype': 'Candidate', 'genetic_findings_id': 'NA20889_1_248367227', 'hgvsc': 'c.3955G>A', 'hgvsp': 'c.1586-17C>G', @@ -1253,7 +1253,7 @@ def test_variant_metadata(self): 'family_history': 'Yes', 'gene': None, 'gene_id': None, - 'gene_known_for_phenotype': 'Known', + 'gene_known_for_phenotype': 'Candidate', 'genetic_findings_id': 'NA20889_1_249045487', 'participant_id': 'NA20889', 'pos': 249045487, diff --git a/seqr/views/apis/summary_data_api_tests.py b/seqr/views/apis/summary_data_api_tests.py index 62b682bc93..c1b94f1e08 100644 --- a/seqr/views/apis/summary_data_api_tests.py +++ b/seqr/views/apis/summary_data_api_tests.py @@ -37,8 +37,8 @@ "num_saved_variants": 2, "solve_status": "Unsolved", "sample_id": "NA20889", - "gene_known_for_phenotype-1": "Known", - "gene_known_for_phenotype-2": "Known", + "gene_known_for_phenotype-1": "Candidate", + "gene_known_for_phenotype-2": "Candidate", "variant_inheritance-1": "unknown", "variant_inheritance-2": "unknown", 'genetic_findings_id-1': 'NA20889_1_248367227', @@ -105,6 +105,8 @@ 'allele_balance_or_heteroplasmy_percentage-2': None, 'notes-1': None, 'notes-2': None, + 'tags-1': ['Tier 1 - Novel gene and phenotype'], + 'tags-2': ['Tier 1 - Novel gene and phenotype'], } EXPECTED_SAMPLE_METADATA_ROW = { "dbgap_submission": "No", @@ -147,6 +149,7 @@ 'alt-1': 'T', 'chrom-1': '1', 'gene_known_for_phenotype-1': 'Candidate', + 'tags-1': ['Tier 1 - Novel gene and phenotype'], 'pos-1': 248367227, 'end-1': None, 'ref-1': 'TC', diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py index 4ba6094f0e..e7dc41a648 100644 --- a/seqr/views/utils/anvil_metadata_utils.py +++ b/seqr/views/utils/anvil_metadata_utils.py @@ -126,7 +126,7 @@ def parse_anvil_metadata( variant_json_fields: Iterable[str] = None, post_process_variant: Callable[[dict, list[dict]], dict] = None, include_no_individual_families: bool = False, omit_airtable: bool = False, include_metadata: bool = False, include_discovery_sample_id: bool = False, include_mondo: bool = False, include_parent_mnvs: bool = False, - proband_only_variants: bool = False, saved_variant_annotations: dict = None): + proband_only_variants: bool = False): individual_samples = individual_samples or (_get_loaded_before_date_project_individual_samples(projects, max_loaded_date) \ if max_loaded_date else _get_all_project_individual_samples(projects)) @@ -147,7 +147,6 @@ def parse_anvil_metadata( saved_variants_by_family = _get_parsed_saved_discovery_variants_by_family( list(family_data_by_id.keys()), variant_filter=variant_filter, variant_json_fields=variant_json_fields, - saved_variant_annotations=saved_variant_annotations, ) condition_map = _get_condition_map(family_data_by_id.values()) @@ -285,21 +284,14 @@ def _post_process_variant_metadata(v, gene_variants, include_parent_mnvs=False): def _get_parsed_saved_discovery_variants_by_family( - families: Iterable[Family], variant_filter: dict, variant_json_fields: list[str], saved_variant_annotations: dict, + families: Iterable[Family], variant_filter: dict, variant_json_fields: list[str], ): tag_types = VariantTagType.objects.filter(project__isnull=True, category=DISCOVERY_CATEGORY) - annotations = { - 'gene_known_for_phenotype': Case(When( - Q(family__post_discovery_omim_numbers__len=0, family__mondo_id__isnull=True), - then=Value('Candidate')), default=Value('Known') - ), - **(saved_variant_annotations or {}), - } project_saved_variants = SavedVariant.objects.filter( varianttag__variant_tag_type__in=tag_types, family__id__in=families, **(variant_filter or {}), - ).order_by('created_date').distinct().annotate(**annotations) + ).order_by('created_date').distinct().annotate(tags=ArrayAgg('varianttag__variant_tag_type__name', distinct=True)) variants = [] gene_ids = set() @@ -321,8 +313,9 @@ def _get_parsed_saved_discovery_variants_by_family( 'hgvsc': (main_transcript.get('hgvsc') or '').split(':')[-1], 'hgvsp': (main_transcript.get('hgvsp') or '').split(':')[-1], 'seqr_chosen_consequence': main_transcript.get('majorConsequence'), + 'gene_known_for_phenotype': 'Known' if 'Known gene for phenotype' in variant.tags else 'Candidate', **{k: variant_json.get(k) for k in ['genotypes', 'svType', 'svName', 'end'] + (variant_json_fields or [])}, - **{k: getattr(variant, k) for k in ['family_id', 'ref', 'alt', *annotations.keys()]}, + **{k: getattr(variant, k) for k in ['family_id', 'ref', 'alt', 'tags']}, }) genes_by_id = get_genes(gene_ids)