diff --git a/seqr/fixtures/1kg_project.json b/seqr/fixtures/1kg_project.json index 7f2b231ff1..1c522b8bf8 100644 --- a/seqr/fixtures/1kg_project.json +++ b/seqr/fixtures/1kg_project.json @@ -1369,6 +1369,14 @@ "gene_id": "ENSG00000227232", "tpm": 9.1 } +}, { + "model": "seqr.rnaseqtpm", + "pk": 6, + "fields": { + "sample": 150, + "gene_id": "ENSG00000233653", + "tpm": 1.03 + } }, { "model": "seqr.rnaseqspliceoutlier", diff --git a/seqr/management/tests/load_rna_seq_tpm_tests.py b/seqr/management/tests/load_rna_seq_tpm_tests.py index 757b08946b..4cf853e466 100644 --- a/seqr/management/tests/load_rna_seq_tpm_tests.py +++ b/seqr/management/tests/load_rna_seq_tpm_tests.py @@ -69,7 +69,7 @@ def test_command(self, mock_open, mock_logger, mock_utils_logger, mock_gzip_open self.assertEqual(new_sample.tissue_type, 'WB') models = RnaSeqTpm.objects.all() - self.assertEqual(models.count(), 4) + self.assertEqual(models.count(), 5) self.assertSetEqual({model.sample for model in models}, set(existing_rna_samples)) self.assertEqual(models.filter(sample=existing_sample, gene_id='ENSG00000240361').count(), 0) self.assertEqual(models.get(sample=new_sample, gene_id='ENSG00000233750').tpm, 6.04) diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py index 01a6b72511..289da43037 100644 --- a/seqr/views/apis/data_manager_api_tests.py +++ b/seqr/views/apis/data_manager_api_tests.py @@ -713,7 +713,7 @@ def test_kibana_proxy(self): 'skipped_samples': 'NA19675_D3, NA20878', 'sample_tissue_type': 'M', 'num_parsed_samples': 4, - 'initial_model_count': 3, + 'initial_model_count': 4, 'deleted_count': 1, 'extra_warnings': [ 'Skipped data loading for the following 1 sample(s) due to mismatched tissue type: NA19678 (fibroblasts, muscle)', diff --git a/seqr/views/utils/variant_utils.py b/seqr/views/utils/variant_utils.py index c42e33c7cd..16ff7772f7 100644 --- a/seqr/views/utils/variant_utils.py +++ b/seqr/views/utils/variant_utils.py @@ -82,6 +82,7 @@ def get_variant_key(xpos=None, ref=None, alt=None, genomeVersion=None, **kwargs) def _saved_variant_genes_transcripts(variants): + family_genes = defaultdict(set) gene_ids = set() transcript_ids = set() for variant in variants: @@ -91,6 +92,8 @@ def _saved_variant_genes_transcripts(variants): for gene_id, transcripts in var.get('transcripts', {}).items(): gene_ids.add(gene_id) transcript_ids.update([t['transcriptId'] for t in transcripts if t.get('transcriptId')]) + for family_guid in var['familyGuids']: + family_genes[family_guid].update(var.get('transcripts', {}).keys()) genes = get_genes_for_variants(gene_ids) for gene in genes.values(): @@ -104,7 +107,7 @@ def _saved_variant_genes_transcripts(variants): ) } - return genes, transcripts + return genes, transcripts, family_genes def _add_locus_lists(projects, genes, add_list_detail=False, user=None): @@ -152,12 +155,17 @@ def get_phenotype_prioritization(family_guids, gene_ids=None): return data_by_individual_gene -def _add_family_has_rna_tpm(families_by_guid, gene_ids): +def _get_family_has_rna_tpm(family_genes, gene_ids): tpm_family_genes = RnaSeqTpm.objects.filter( - sample__individual__family__guid__in=families_by_guid.keys(), gene_id__in=gene_ids, + sample__individual__family__guid__in=family_genes.keys(), gene_id__in=gene_ids, ).values('sample__individual__family__guid').annotate(genes=ArrayAgg('gene_id', distinct=True)) + family_tpms = {} for agg in tpm_family_genes: - families_by_guid[agg['sample__individual__family__guid']]['tpmGenes'] = agg['genes'] + family_guid = agg['sample__individual__family__guid'] + genes = [gene for gene in agg['genes'] if gene in family_genes[family_guid]] + if genes: + family_tpms[family_guid] = {'tpmGenes': genes} + return family_tpms def _add_discovery_tags(variants, discovery_tags): @@ -188,11 +196,9 @@ def get_variants_response(request, saved_variants, response_variants=None, add_a response = get_json_for_saved_variants_with_tags(saved_variants, add_details=True) variants = list(response['savedVariantsByGuid'].values()) if response_variants is None else response_variants + genes, transcripts, family_genes = _saved_variant_genes_transcripts(variants) - loaded_family_guids = set() - for variant in variants: - loaded_family_guids.update(variant['familyGuids']) - projects = Project.objects.filter(family__guid__in=loaded_family_guids).distinct() + projects = Project.objects.filter(family__guid__in=family_genes.keys()).distinct() project = list(projects)[0] if len(projects) == 1 else None discovery_tags = None @@ -201,7 +207,6 @@ def get_variants_response(request, saved_variants, response_variants=None, add_a discovery_tags, discovery_response = get_json_for_discovery_tags(response['savedVariantsByGuid'].values(), request.user) response.update(discovery_response) - genes, transcripts = _saved_variant_genes_transcripts(variants) response['transcriptsById'] = transcripts response['locusListsByGuid'] = _add_locus_lists( projects, genes, add_list_detail=add_locus_list_detail, user=request.user) @@ -223,6 +228,13 @@ def get_variants_response(request, saved_variants, response_variants=None, add_a matchmakersubmissiongenes__saved_variant__guid__in=response['savedVariantsByGuid'].keys())) response['mmeSubmissionsByGuid'] = {s['submissionGuid']: s for s in submissions} + rna_tpm = None + if include_individual_gene_scores: + present_family_genes = {k: v for k, v in family_genes.items() if v} + response['rnaSeqData'] = _get_rna_seq_outliers(genes.keys(), present_family_genes.keys()) + rna_tpm = _get_family_has_rna_tpm(present_family_genes, genes.keys()) + response['phenotypeGeneScores'] = get_phenotype_prioritization(present_family_genes.keys(), gene_ids=genes.keys()) + if add_all_context or request.GET.get(LOAD_PROJECT_TAG_TYPES_CONTEXT_PARAM) == 'true': project_fields = {'projectGuid': 'guid'} if include_project_name: @@ -233,18 +245,14 @@ def get_variants_response(request, saved_variants, response_variants=None, add_a add_project_tag_types(response['projectsByGuid']) if add_all_context or request.GET.get(LOAD_FAMILY_CONTEXT_PARAM) == 'true': - families = Family.objects.filter(guid__in=loaded_family_guids) + families = Family.objects.filter(guid__in=family_genes.keys()) add_families_context( response, families, project_guid=project.guid if project else None, user=request.user, is_analyst=is_analyst, has_case_review_perm=bool(project) and has_case_review_permissions(project, request.user), include_igv=include_igv, ) - if include_individual_gene_scores: - response['rnaSeqData'] = _get_rna_seq_outliers(genes.keys(), loaded_family_guids) - families_by_guid = response.get('familiesByGuid') - if families_by_guid: - _add_family_has_rna_tpm(families_by_guid, genes.keys()) - - response['phenotypeGeneScores'] = get_phenotype_prioritization(loaded_family_guids, gene_ids=genes.keys()) + if rna_tpm: + for family_guid, data in rna_tpm.items(): + response['familiesByGuid'][family_guid].update(data) return response