Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Saved variant timeout #3510

Merged
merged 5 commits into from
Jul 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions seqr/fixtures/1kg_project.json
Original file line number Diff line number Diff line change
Expand Up @@ -1369,6 +1369,14 @@
"gene_id": "ENSG00000227232",
"tpm": 9.1
}
}, {
"model": "seqr.rnaseqtpm",
"pk": 6,
"fields": {
"sample": 150,
"gene_id": "ENSG00000233653",
"tpm": 1.03
}
},
{
"model": "seqr.rnaseqspliceoutlier",
Expand Down
2 changes: 1 addition & 1 deletion seqr/management/tests/load_rna_seq_tpm_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def test_command(self, mock_open, mock_logger, mock_utils_logger, mock_gzip_open
self.assertEqual(new_sample.tissue_type, 'WB')

models = RnaSeqTpm.objects.all()
self.assertEqual(models.count(), 4)
self.assertEqual(models.count(), 5)
self.assertSetEqual({model.sample for model in models}, set(existing_rna_samples))
self.assertEqual(models.filter(sample=existing_sample, gene_id='ENSG00000240361').count(), 0)
self.assertEqual(models.get(sample=new_sample, gene_id='ENSG00000233750').tpm, 6.04)
Expand Down
2 changes: 1 addition & 1 deletion seqr/views/apis/data_manager_api_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -713,7 +713,7 @@ def test_kibana_proxy(self):
'skipped_samples': 'NA19675_D3, NA20878',
'sample_tissue_type': 'M',
'num_parsed_samples': 4,
'initial_model_count': 3,
'initial_model_count': 4,
'deleted_count': 1,
'extra_warnings': [
'Skipped data loading for the following 1 sample(s) due to mismatched tissue type: NA19678 (fibroblasts, muscle)',
Expand Down
42 changes: 25 additions & 17 deletions seqr/views/utils/variant_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ def get_variant_key(xpos=None, ref=None, alt=None, genomeVersion=None, **kwargs)


def _saved_variant_genes_transcripts(variants):
family_genes = defaultdict(set)
gene_ids = set()
transcript_ids = set()
for variant in variants:
Expand All @@ -91,6 +92,8 @@ def _saved_variant_genes_transcripts(variants):
for gene_id, transcripts in var.get('transcripts', {}).items():
gene_ids.add(gene_id)
transcript_ids.update([t['transcriptId'] for t in transcripts if t.get('transcriptId')])
for family_guid in var['familyGuids']:
family_genes[family_guid].update(var.get('transcripts', {}).keys())
Comment on lines +95 to +96
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to include the families who have no transcripts?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes overall, as this is used for the the list of families for filtering which projects to return etc. However, I can add an extra filter before we get the family list for rna data, as theres no reason to include rna data for families with no genes

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added this below


genes = get_genes_for_variants(gene_ids)
for gene in genes.values():
Expand All @@ -104,7 +107,7 @@ def _saved_variant_genes_transcripts(variants):
)
}

return genes, transcripts
return genes, transcripts, family_genes


def _add_locus_lists(projects, genes, add_list_detail=False, user=None):
Expand Down Expand Up @@ -152,12 +155,17 @@ def get_phenotype_prioritization(family_guids, gene_ids=None):
return data_by_individual_gene


def _add_family_has_rna_tpm(families_by_guid, gene_ids):
def _get_family_has_rna_tpm(family_genes, gene_ids):
tpm_family_genes = RnaSeqTpm.objects.filter(
sample__individual__family__guid__in=families_by_guid.keys(), gene_id__in=gene_ids,
sample__individual__family__guid__in=family_genes.keys(), gene_id__in=gene_ids,
).values('sample__individual__family__guid').annotate(genes=ArrayAgg('gene_id', distinct=True))
family_tpms = {}
for agg in tpm_family_genes:
families_by_guid[agg['sample__individual__family__guid']]['tpmGenes'] = agg['genes']
family_guid = agg['sample__individual__family__guid']
genes = [gene for gene in agg['genes'] if gene in family_genes[family_guid]]
if genes:
family_tpms[family_guid] = {'tpmGenes': genes}
return family_tpms


def _add_discovery_tags(variants, discovery_tags):
Expand Down Expand Up @@ -188,11 +196,9 @@ def get_variants_response(request, saved_variants, response_variants=None, add_a
response = get_json_for_saved_variants_with_tags(saved_variants, add_details=True)

variants = list(response['savedVariantsByGuid'].values()) if response_variants is None else response_variants
genes, transcripts, family_genes = _saved_variant_genes_transcripts(variants)

loaded_family_guids = set()
for variant in variants:
loaded_family_guids.update(variant['familyGuids'])
projects = Project.objects.filter(family__guid__in=loaded_family_guids).distinct()
projects = Project.objects.filter(family__guid__in=family_genes.keys()).distinct()
project = list(projects)[0] if len(projects) == 1 else None

discovery_tags = None
Expand All @@ -201,7 +207,6 @@ def get_variants_response(request, saved_variants, response_variants=None, add_a
discovery_tags, discovery_response = get_json_for_discovery_tags(response['savedVariantsByGuid'].values(), request.user)
response.update(discovery_response)

genes, transcripts = _saved_variant_genes_transcripts(variants)
response['transcriptsById'] = transcripts
response['locusListsByGuid'] = _add_locus_lists(
projects, genes, add_list_detail=add_locus_list_detail, user=request.user)
Expand All @@ -223,6 +228,13 @@ def get_variants_response(request, saved_variants, response_variants=None, add_a
matchmakersubmissiongenes__saved_variant__guid__in=response['savedVariantsByGuid'].keys()))
response['mmeSubmissionsByGuid'] = {s['submissionGuid']: s for s in submissions}

rna_tpm = None
if include_individual_gene_scores:
present_family_genes = {k: v for k, v in family_genes.items() if v}
response['rnaSeqData'] = _get_rna_seq_outliers(genes.keys(), present_family_genes.keys())
rna_tpm = _get_family_has_rna_tpm(present_family_genes, genes.keys())
response['phenotypeGeneScores'] = get_phenotype_prioritization(present_family_genes.keys(), gene_ids=genes.keys())

if add_all_context or request.GET.get(LOAD_PROJECT_TAG_TYPES_CONTEXT_PARAM) == 'true':
project_fields = {'projectGuid': 'guid'}
if include_project_name:
Expand All @@ -233,18 +245,14 @@ def get_variants_response(request, saved_variants, response_variants=None, add_a
add_project_tag_types(response['projectsByGuid'])

if add_all_context or request.GET.get(LOAD_FAMILY_CONTEXT_PARAM) == 'true':
families = Family.objects.filter(guid__in=loaded_family_guids)
families = Family.objects.filter(guid__in=family_genes.keys())
add_families_context(
response, families, project_guid=project.guid if project else None, user=request.user, is_analyst=is_analyst,
has_case_review_perm=bool(project) and has_case_review_permissions(project, request.user), include_igv=include_igv,
)

if include_individual_gene_scores:
response['rnaSeqData'] = _get_rna_seq_outliers(genes.keys(), loaded_family_guids)
families_by_guid = response.get('familiesByGuid')
if families_by_guid:
_add_family_has_rna_tpm(families_by_guid, genes.keys())

response['phenotypeGeneScores'] = get_phenotype_prioritization(loaded_family_guids, gene_ids=genes.keys())
if rna_tpm:
for family_guid, data in rna_tpm.items():
response['familiesByGuid'][family_guid].update(data)

return response
Loading