diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index ac4fb3cb27..972b33dd15 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -43,6 +43,7 @@ class BaseHailTableQuery(object): GENOTYPE_FIELDS = {} COMPUTED_GENOTYPE_FIELDS = {} + GENOTYPE_OVERRIDE_FIELDS = {} GENOTYPE_QUERY_FIELDS = {} QUALITY_FILTER_FORMAT = {} POPULATIONS = {} @@ -92,7 +93,7 @@ def _format_population_config(cls, pop_config): base_pop_config.pop('sort', None) return base_pop_config - def annotation_fields(self): + def annotation_fields(self, include_genotype_overrides=True): annotation_fields = { GENOTYPES_FIELD: lambda r: r.family_entries.flatmap(lambda x: x).filter( lambda gt: hl.is_defined(gt.individualGuid) @@ -100,7 +101,8 @@ def annotation_fields(self): 'sampleId', 'sampleType', 'individualGuid', 'familyGuid', numAlt=hl.if_else(hl.is_defined(x[0].GT), x[0].GT.n_alt_alleles(), self.MISSING_NUM_ALT), **{k: x[0][field] for k, field in self.GENOTYPE_FIELDS.items()}, - **{_to_camel_case(k): v(x[0], k, r) for k, v in self.COMPUTED_GENOTYPE_FIELDS.items()}, + **{_to_camel_case(k): v(x[0], k, r) for k, v in self.COMPUTED_GENOTYPE_FIELDS.items() + if include_genotype_overrides or k not in self.GENOTYPE_OVERRIDE_FIELDS}, )), 'populations': lambda r: hl.struct(**{ population: self.population_expression(r, population) for population in self.POPULATIONS.keys() @@ -1029,7 +1031,7 @@ def lookup_variant(self, variant_id, sample_data=None): ht = self._read_table('annotations.ht', drop_globals=['paths', 'versions']) ht = ht.filter(hl.is_defined(ht[XPOS])) - annotation_fields = self.annotation_fields() + annotation_fields = self.annotation_fields(include_genotype_overrides=False) entry_annotations = {k: annotation_fields[k] for k in [FAMILY_GUID_FIELD, GENOTYPES_FIELD]} annotation_fields.update({ FAMILY_GUID_FIELD: lambda ht: hl.empty_array(hl.tstr), @@ -1037,7 +1039,7 @@ def lookup_variant(self, variant_id, sample_data=None): 'genotypeFilters': lambda ht: hl.str(''), }) - formatted = self._format_results(ht.key_by(), annotation_fields=annotation_fields, include_genotype_overrides=bool(sample_data)) + formatted = self._format_results(ht.key_by(), annotation_fields=annotation_fields, include_genotype_overrides=False) variants = formatted.aggregate(hl.agg.take(formatted.row, 1)) if not variants: diff --git a/hail_search/queries/sv.py b/hail_search/queries/sv.py index 526cc52319..bc887410f6 100644 --- a/hail_search/queries/sv.py +++ b/hail_search/queries/sv.py @@ -76,7 +76,7 @@ def _parse_annotations(self, annotations, *args, **kwargs): def _get_family_passes_quality_filter(self, quality_filter, parsed_annotations=None, **kwargs): passes_quality = super()._get_family_passes_quality_filter(quality_filter) - if not parsed_annotations[NEW_SV_FIELD]: + if not (parsed_annotations or {}).get(NEW_SV_FIELD): return passes_quality entries_has_new_call = lambda entries: entries.any(lambda x: x.concordance.new_call) diff --git a/hail_search/test_search.py b/hail_search/test_search.py index 2bc9337fbe..c02e1c0935 100644 --- a/hail_search/test_search.py +++ b/hail_search/test_search.py @@ -662,6 +662,11 @@ async def test_variant_lookup(self): resp_json = await resp.json() self.assertDictEqual(resp_json, {**SV_VARIANT4, 'familyGuids': [], 'genotypes': {}, 'genotypeFilters': ''}) + async with self.client.request('POST', '/lookup', json={**body, 'sample_data': SV_WGS_SAMPLE_DATA['SV_WGS']}) as resp: + self.assertEqual(resp.status, 200) + resp_json = await resp.json() + self.assertDictEqual(resp_json, SV_VARIANT4) + body.update({'variant_id': 'suffix_140608_DUP', 'data_type': 'SV_WES'}) async with self.client.request('POST', '/lookup', json=body) as resp: self.assertEqual(resp.status, 200) @@ -670,6 +675,16 @@ async def test_variant_lookup(self): **GCNV_VARIANT4, 'numExon': 8, 'end': 38736268, 'familyGuids': [], 'genotypes': {}, 'genotypeFilters': '', }) + async with self.client.request('POST', '/lookup', json={**body, 'sample_data': EXPECTED_SAMPLE_DATA['SV_WES']}) as resp: + self.assertEqual(resp.status, 200) + resp_json = await resp.json() + self.assertDictEqual(resp_json, { + **GCNV_VARIANT4, 'numExon': 8, 'end': 38736268, 'genotypes': { + individual: {k: v for k, v in genotype.items() if k not in {'start', 'end', 'numExon', 'geneIds'}} + for individual, genotype in GCNV_VARIANT4['genotypes'].items() + } + }) + async def test_frequency_filter(self): sv_callset_filter = {'sv_callset': {'af': 0.05}} await self._assert_expected_search( diff --git a/seqr/utils/search/hail_search_utils.py b/seqr/utils/search/hail_search_utils.py index c4b700e2ed..f9ac9d4cbf 100644 --- a/seqr/utils/search/hail_search_utils.py +++ b/seqr/utils/search/hail_search_utils.py @@ -74,14 +74,44 @@ def get_hail_variants_for_variant_ids(samples, genome_version, parsed_variant_id return response_json['results'] -def hail_variant_lookup(user, variant_id, samples=None, **kwargs): +def hail_variant_lookup(user, variant_id, samples=None, dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS, sample_type=None, **kwargs): + data_type = dataset_type.replace('_only', '') + is_sv = data_type == Sample.DATASET_TYPE_SV_CALLS + if is_sv: + if not sample_type: + from seqr.utils.search.utils import InvalidSearchException + raise InvalidSearchException('Sample type must be specified to look up a structural variant') + data_type = f'{data_type}_{sample_type}' + body = { 'variant_id': variant_id, + 'data_type': data_type, **kwargs, } + sample_data = None if samples: - body['sample_data'] = _get_sample_data(samples)[Sample.DATASET_TYPE_VARIANT_CALLS] - return _execute_search(body, user, path='lookup', exception_map={404: 'Variant not present in seqr'}) + sample_data = _get_sample_data(samples) + body['sample_data'] = sample_data.pop(data_type) + variant = _execute_search(body, user, path='lookup', exception_map={404: 'Variant not present in seqr'}) + variants = [variant] + + if is_sv and sample_data and variant['svType'] in {'DEL', 'DUP'}: + start = variant['pos'] + end = variant['end'] + offset = 0.2 + if variant.get('endChrom'): + start -= 50 + end += 50 + offset = None + del body['variant_id'] + body.update({ + 'sample_data': sample_data, + 'intervals': [_format_interval(chrom=variant['chrom'], start=start, end=end, offset=offset)], + 'annotations': {'structural': [variant['svType'], f"gCNV_{variant['svType']}"]} + }) + variants += _execute_search(body, user)['results'] + + return variants def _format_search_body(samples, genome_version, num_results, search): diff --git a/seqr/utils/search/hail_search_utils_tests.py b/seqr/utils/search/hail_search_utils_tests.py index b8332ecb2e..3c72f4bb57 100644 --- a/seqr/utils/search/hail_search_utils_tests.py +++ b/seqr/utils/search/hail_search_utils_tests.py @@ -12,9 +12,15 @@ from hail_search.test_utils import get_hail_search_body, EXPECTED_SAMPLE_DATA, FAMILY_1_SAMPLE_DATA, \ FAMILY_2_ALL_SAMPLE_DATA, ALL_AFFECTED_SAMPLE_DATA, CUSTOM_AFFECTED_SAMPLE_DATA, HAIL_BACKEND_VARIANTS, \ LOCATION_SEARCH, EXCLUDE_LOCATION_SEARCH, VARIANT_ID_SEARCH, RSID_SEARCH, GENE_COUNTS, FAMILY_2_VARIANT_SAMPLE_DATA, \ - FAMILY_2_MITO_SAMPLE_DATA, EXPECTED_SAMPLE_DATA_WITH_SEX, VARIANT_LOOKUP_VARIANT, MULTI_PROJECT_SAMPLE_DATA + FAMILY_2_MITO_SAMPLE_DATA, EXPECTED_SAMPLE_DATA_WITH_SEX, VARIANT_LOOKUP_VARIANT, MULTI_PROJECT_SAMPLE_DATA, \ + GCNV_VARIANT4, SV_VARIANT2 MOCK_HOST = 'http://test-hail-host' +SV_WGS_SAMPLE_DATA = [{ + 'individual_guid': 'I000018_na21234', 'family_guid': 'F000014_14', 'project_guid': 'R0004_non_analyst_project', + 'affected': 'A', 'sample_id': 'NA21234', +}] + @mock.patch('seqr.utils.search.hail_search_utils.HAIL_BACKEND_SERVICE_HOSTNAME', MOCK_HOST) class HailSearchUtilsTests(SearchTestHelper, TestCase): @@ -28,11 +34,12 @@ def setUp(self): 'results': HAIL_BACKEND_VARIANTS, 'total': 5, }) - def _test_minimal_search_call(self, expected_search_body=None, **kwargs): + def _test_minimal_search_call(self, expected_search_body=None, call_offset=-1, url_path='search', **kwargs): expected_search = expected_search_body or get_hail_search_body(genome_version='GRCh37', **kwargs) - executed_request = responses.calls[-1].request + executed_request = responses.calls[call_offset].request self.assertEqual(executed_request.headers.get('From'), 'test_user@broadinstitute.org') + self.assertEqual(executed_request.url.split('/')[-1], url_path) self.assertDictEqual(json.loads(executed_request.body), expected_search) def _test_expected_search_call(self, search_fields=None, gene_ids=None, intervals=None, exclude_intervals= None, @@ -149,8 +156,7 @@ def test_query_variants(self): query_variants(self.results_model, user=self.user) sv_sample_data = { 'SV_WES': FAMILY_2_VARIANT_SAMPLE_DATA['SNV_INDEL'], - 'SV_WGS': [{'individual_guid': 'I000018_na21234', 'family_guid': 'F000014_14', - 'project_guid': 'R0004_non_analyst_project', 'affected': 'A', 'sample_id': 'NA21234'}], + 'SV_WGS': SV_WGS_SAMPLE_DATA, } self._test_expected_search_call(search_fields=['annotations'], dataset_type='SV', sample_data=sv_sample_data) @@ -208,34 +214,59 @@ def test_get_variant_query_gene_counts(self): gene_counts = get_variant_query_gene_counts(self.results_model, self.user) self.assertDictEqual(gene_counts, GENE_COUNTS) self.assert_cached_results({'gene_aggs': gene_counts}) - self._test_expected_search_call(sort=None) + self._test_expected_search_call(url_path='gene_counts', sort=None) @responses.activate def test_variant_lookup(self): responses.add(responses.POST, f'{MOCK_HOST}:5000/lookup', status=200, json=VARIANT_LOOKUP_VARIANT) variant = variant_lookup(self.user, '1-10439-AC-A', genome_version='37', foo='bar') - self.assertDictEqual(variant, VARIANT_LOOKUP_VARIANT) - self._test_minimal_search_call(expected_search_body={ - 'variant_id': ['1', 10439, 'AC', 'A'], 'genome_version': 'GRCh37', 'foo': 'bar', + self.assertListEqual(variant, [VARIANT_LOOKUP_VARIANT]) + self._test_minimal_search_call(url_path='lookup', expected_search_body={ + 'variant_id': ['1', 10439, 'AC', 'A'], 'genome_version': 'GRCh37', 'foo': 'bar', 'data_type': 'SNV_INDEL', }) variant_lookup(self.user, '1-10439-AC-A', genome_version='37', families=self.families) - self._test_minimal_search_call(expected_search_body={ + self._test_minimal_search_call(url_path='lookup', expected_search_body={ 'variant_id': ['1', 10439, 'AC', 'A'], 'genome_version': 'GRCh37', - 'sample_data': ALL_AFFECTED_SAMPLE_DATA['SNV_INDEL'], + 'sample_data': ALL_AFFECTED_SAMPLE_DATA['SNV_INDEL'], 'data_type': 'SNV_INDEL', }) with self.assertRaises(InvalidSearchException) as cm: - variant_lookup(self.user, 'prefix_123_DEL') - self.assertEqual(str(cm.exception), 'Invalid variant prefix_123_DEL') + variant_lookup(self.user, 'suffix_140608_DUP') + self.assertEqual(str(cm.exception), 'Sample type must be specified to look up a structural variant') + + responses.add(responses.POST, f'{MOCK_HOST}:5000/lookup', status=200, json=GCNV_VARIANT4) + variant_lookup(self.user, 'suffix_140608_DUP', sample_type='WES') + self._test_minimal_search_call(url_path='lookup', expected_search_body={ + 'variant_id': 'suffix_140608_DUP', 'genome_version': 'GRCh38', 'data_type': 'SV_WES', + }) + + sv_families = Family.objects.filter(id__in=[2, 14]) + variant_lookup(self.user, 'suffix_140608_DUP', sample_type='WES', families=sv_families) + self._test_minimal_search_call(url_path='lookup', call_offset=-2, expected_search_body={ + 'variant_id': 'suffix_140608_DUP', 'genome_version': 'GRCh38', 'data_type': 'SV_WES', + 'sample_data': ALL_AFFECTED_SAMPLE_DATA['SV_WES'] + }) + self._test_minimal_search_call(expected_search_body={ + 'genome_version': 'GRCh38', 'data_type': 'SV_WES', 'annotations': {'structural': ['DEL', 'gCNV_DEL']}, + 'intervals': ['17:38718997-38738487'], 'sample_data': {'SV_WGS': SV_WGS_SAMPLE_DATA}, + }) + + # No second lookup call is made for non DELs/DUPs + responses.add(responses.POST, f'{MOCK_HOST}:5000/lookup', status=200, json=SV_VARIANT2) + variant_lookup(self.user, 'cohort_2911.chr1.final_cleanup_INS_chr1_160', sample_type='WGS', families=sv_families) + self._test_minimal_search_call(url_path='lookup', expected_search_body={ + 'variant_id': 'cohort_2911.chr1.final_cleanup_INS_chr1_160', 'genome_version': 'GRCh38', 'data_type': 'SV_WGS', + 'sample_data': SV_WGS_SAMPLE_DATA + }) responses.add(responses.POST, f'{MOCK_HOST}:5000/lookup', status=404) with self.assertRaises(HTTPError) as cm: variant_lookup(self.user, '1-10439-AC-A') self.assertEqual(cm.exception.response.status_code, 404) self.assertEqual(str(cm.exception), 'Variant not present in seqr') - self._test_minimal_search_call(expected_search_body={ - 'variant_id': ['1', 10439, 'AC', 'A'], 'genome_version': 'GRCh38' + self._test_minimal_search_call(url_path='lookup', expected_search_body={ + 'variant_id': ['1', 10439, 'AC', 'A'], 'genome_version': 'GRCh38', 'data_type': 'SNV_INDEL', }) @responses.activate diff --git a/seqr/utils/search/search_utils_tests.py b/seqr/utils/search/search_utils_tests.py index ab649e0834..cde3599ea7 100644 --- a/seqr/utils/search/search_utils_tests.py +++ b/seqr/utils/search/search_utils_tests.py @@ -55,20 +55,18 @@ def test_variant_lookup(self, mock_variant_lookup): mock_variant_lookup.return_value = VARIANT_LOOKUP_VARIANT variant = variant_lookup(self.user, '1-10439-AC-A', genome_version='38') self.assertDictEqual(variant, VARIANT_LOOKUP_VARIANT) - mock_variant_lookup.assert_called_with(self.user, ('1', 10439, 'AC', 'A'), genome_version='GRCh38') + mock_variant_lookup.assert_called_with(self.user, ('1', 10439, 'AC', 'A'), genome_version='GRCh38', + dataset_type='SNV_INDEL_only') cache_key = 'variant_lookup_results__1-10439-AC-A__38__test_user' self.assert_cached_results(variant, cache_key=cache_key) variant = variant_lookup(self.user, '1-10439-AC-A', genome_version='37', families=self.families) self.assertDictEqual(variant, VARIANT_LOOKUP_VARIANT) - mock_variant_lookup.assert_called_with(self.user, ('1', 10439, 'AC', 'A'), genome_version='GRCh37', samples=mock.ANY) + mock_variant_lookup.assert_called_with(self.user, ('1', 10439, 'AC', 'A'), genome_version='GRCh37', samples=mock.ANY, + dataset_type='SNV_INDEL_only') expected_samples = {s for s in self.search_samples if s.guid not in NON_SNP_INDEL_SAMPLES} self.assertSetEqual(set(mock_variant_lookup.call_args.kwargs['samples']), expected_samples) - with self.assertRaises(InvalidSearchException) as cm: - variant_lookup(self.user, '100-10439-AC-A') - self.assertEqual(str(cm.exception), 'Invalid variant 100-10439-AC-A') - mock_variant_lookup.reset_mock() self.set_cache(variant) cached_variant = variant_lookup(self.user, '1-10439-AC-A', genome_version='38') diff --git a/seqr/utils/search/utils.py b/seqr/utils/search/utils.py index b69a760b40..c9cc5753fe 100644 --- a/seqr/utils/search/utils.py +++ b/seqr/utils/search/utils.py @@ -161,15 +161,14 @@ def variant_lookup(user, variant_id, families=None, genome_version=None, **kwarg return variant parsed_variant_id = _parse_variant_id(variant_id) - if not parsed_variant_id: - raise InvalidSearchException(f'Invalid variant {variant_id}') + dataset_type = DATASET_TYPE_SNP_INDEL_ONLY if parsed_variant_id else Sample.DATASET_TYPE_SV_CALLS if families: - samples, _ = _get_families_search_data(families, dataset_type=DATASET_TYPE_SNP_INDEL_ONLY) + samples, _ = _get_families_search_data(families, dataset_type=dataset_type) kwargs['samples'] = samples lookup_func = backend_specific_call(_raise_search_error('Hail backend is disabled'), hail_variant_lookup) - variant = lookup_func(user, parsed_variant_id, genome_version=GENOME_VERSION_LOOKUP[genome_version], **kwargs) + variant = lookup_func(user, parsed_variant_id or variant_id, genome_version=GENOME_VERSION_LOOKUP[genome_version], dataset_type=dataset_type, **kwargs) safe_redis_set_json(cache_key, variant, expire=timedelta(weeks=2)) return variant diff --git a/seqr/views/apis/variant_search_api.py b/seqr/views/apis/variant_search_api.py index 02976a0e45..035b133029 100644 --- a/seqr/views/apis/variant_search_api.py +++ b/seqr/views/apis/variant_search_api.py @@ -541,12 +541,12 @@ def variant_lookup_handler(request): kwargs.get('genome_version', GENOME_VERSION_GRCh38), request.user, ) - variant = variant_lookup(request.user, families=families, **kwargs) - saved_variants, _ = _get_saved_variant_models([variant], families) if families else (None, None) + variants = variant_lookup(request.user, families=families, **kwargs) + saved_variants, _ = _get_saved_variant_models(variants, families) if families else (None, None) response = get_variants_response( - request, saved_variants=saved_variants, response_variants=[variant], + request, saved_variants=saved_variants, response_variants=variants, add_all_context=include_genotypes, add_locus_list_detail=include_genotypes, ) - response['variant'] = variant + response['variants'] = variants return create_json_response(response) diff --git a/seqr/views/apis/variant_search_api_tests.py b/seqr/views/apis/variant_search_api_tests.py index 6a1a2081b0..e93d221300 100644 --- a/seqr/views/apis/variant_search_api_tests.py +++ b/seqr/views/apis/variant_search_api_tests.py @@ -761,7 +761,7 @@ def _assert_expected_single_variant_results_context(self, response_json, omit_fi @mock.patch('seqr.views.apis.variant_search_api.variant_lookup') def test_variant_lookup(self, mock_variant_lookup): - mock_variant_lookup.return_value = VARIANT_LOOKUP_VARIANT + mock_variant_lookup.return_value = [VARIANT_LOOKUP_VARIANT] url = f'{reverse(variant_lookup_handler)}?variantId=1-10439-AC-A&genomeVersion=38' self.check_require_login(url) @@ -776,15 +776,15 @@ def test_variant_lookup(self, mock_variant_lookup): 'rnaSeqData': {}, 'savedVariantsByGuid': {}, 'transcriptsById': {}, - 'variant': VARIANT_LOOKUP_VARIANT, + 'variants': [VARIANT_LOOKUP_VARIANT], } self.assertDictEqual(response.json(), expected_body) mock_variant_lookup.assert_called_with(self.no_access_user, variant_id='1-10439-AC-A', genome_version='38', families=None) variant = {**VARIANTS[0], 'familyGuids': [], 'genotypes': {}} - mock_variant_lookup.return_value = variant + mock_variant_lookup.return_value = [variant] expected_body.update({ - 'variant': variant, + 'variants': [variant], 'genesById': {'ENSG00000227232': EXPECTED_GENE, 'ENSG00000268903': EXPECTED_GENE}, 'transcriptsById': EXPECTED_SEARCH_RESPONSE['transcriptsById'], }) @@ -794,11 +794,11 @@ def test_variant_lookup(self, mock_variant_lookup): self.assertDictEqual(response.json(), expected_body) self.login_collaborator() - mock_variant_lookup.return_value = SINGLE_FAMILY_VARIANT + mock_variant_lookup.return_value = [SINGLE_FAMILY_VARIANT] response = self.client.get(f'{url.replace("38", "37")}&include_genotypes=true') self.assertEqual(response.status_code, 200) self._assert_expected_single_variant_results_context( - response.json(), variant=SINGLE_FAMILY_VARIANT, omit_fields={'searchedVariants'}, + response.json(), variants=[SINGLE_FAMILY_VARIANT], omit_fields={'searchedVariants'}, ) mock_variant_lookup.assert_called_with( self.collaborator_user, variant_id='1-10439-AC-A', genome_version='37', families=mock.ANY, diff --git a/ui/pages/SummaryData/components/VariantLookup.jsx b/ui/pages/SummaryData/components/VariantLookup.jsx index 3b646ee5b2..07236bcd84 100644 --- a/ui/pages/SummaryData/components/VariantLookup.jsx +++ b/ui/pages/SummaryData/components/VariantLookup.jsx @@ -31,10 +31,10 @@ const FIELDS = [ { required: true, ...GENOME_VERSION_FIELD }, ] -const VariantDisplay = ({ variant }) => (variant ? : null) +const VariantDisplay = ({ variants }) => (variants || []).map(variant => ) VariantDisplay.propTypes = { - variant: PropTypes.object, + variants: PropTypes.arrayOf(PropTypes.object), } const onSubmit = updateQueryParams => (data) => { diff --git a/ui/shared/components/panel/variants/Annotations.jsx b/ui/shared/components/panel/variants/Annotations.jsx index 040f6dffef..15b0a619ce 100644 --- a/ui/shared/components/panel/variants/Annotations.jsx +++ b/ui/shared/components/panel/variants/Annotations.jsx @@ -296,8 +296,13 @@ const VARIANT_LINKS = [ }, ] +const getSampleType = (genotypes) => { + const sampleTypes = [...new Set(Object.values(genotypes || {}).map(({ sampleType }) => sampleType).filter(s => s))] + return sampleTypes.length === 1 ? sampleTypes[0] : '' +} + const variantSearchLinks = (variant, mainTranscript, genesById, user, elasticsearchEnabled) => { - const { chrom, endChrom, pos, end, ref, alt, genomeVersion, svType, variantId, transcripts } = variant + const { chrom, endChrom, pos, end, ref, alt, genomeVersion, genotypes, svType, variantId, transcripts } = variant const mainGene = genesById[mainTranscript.geneId] let genes @@ -331,7 +336,7 @@ const variantSearchLinks = (variant, mainTranscript, genesById, user, elasticsea const linkVariant = { genes, variations, hgvsc, ...variant } - const seqrSearchLink = (elasticsearchEnabled || svType) ? ( + const seqrSearchLink = elasticsearchEnabled ? ( ) : ( seqr