diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 0cb64c863d..3cb64f3f27 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -75,7 +75,7 @@ jobs: pip install -r hail_search/requirements-test.txt - name: Run coverage tests run: | - coverage run --source="./hail_search" --omit="./hail_search/__main__.py" -m pytest hail_search/ + coverage run --source="./hail_search" --omit="./hail_search/__main__.py","./hail_search/test_utils.py" -m pytest hail_search/ coverage report --fail-under=99 nodejs: diff --git a/hail_search/test_utils.py b/hail_search/test_utils.py new file mode 100644 index 0000000000..ed26cd7ed5 --- /dev/null +++ b/hail_search/test_utils.py @@ -0,0 +1,54 @@ +from copy import deepcopy + + +FAMILY_3_SAMPLE = { + 'sample_id': 'NA20870', 'individual_guid': 'I000007_na20870', 'family_guid': 'F000003_3', + 'project_guid': 'R0001_1kg', 'affected': 'A', 'sex': 'M', +} +EXPECTED_SAMPLE_DATA = { + 'VARIANTS': [ + {'sample_id': 'HG00731', 'individual_guid': 'I000004_hg00731', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'A', 'sex': 'F'}, + {'sample_id': 'HG00732', 'individual_guid': 'I000005_hg00732', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sex': 'M'}, + {'sample_id': 'HG00733', 'individual_guid': 'I000006_hg00733', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sex': 'F'}, + FAMILY_3_SAMPLE, + ], 'SV_WES': [ + {'sample_id': 'HG00731', 'individual_guid': 'I000004_hg00731', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'A', 'sex': 'F'}, + {'sample_id': 'HG00732', 'individual_guid': 'I000005_hg00732', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sex': 'M'}, + {'sample_id': 'HG00733', 'individual_guid': 'I000006_hg00733', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sex': 'F'} + ], +} +CUSTOM_AFFECTED_SAMPLE_DATA = {'VARIANTS': deepcopy(EXPECTED_SAMPLE_DATA['VARIANTS'])} +CUSTOM_AFFECTED_SAMPLE_DATA['VARIANTS'][0]['affected'] = 'N' +CUSTOM_AFFECTED_SAMPLE_DATA['VARIANTS'][1]['affected'] = 'A' +CUSTOM_AFFECTED_SAMPLE_DATA['VARIANTS'][2]['affected'] = 'U' + +FAMILY_1_SAMPLE_DATA = { + 'VARIANTS': [ + {'sample_id': 'NA19675', 'individual_guid': 'I000001_na19675', 'family_guid': 'F000001_1', 'project_guid': 'R0001_1kg', 'affected': 'A', 'sex': 'M'}, + {'sample_id': 'NA19678', 'individual_guid': 'I000002_na19678', 'family_guid': 'F000001_1', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sex': 'M'}, + ], +} + +ALL_AFFECTED_SAMPLE_DATA = deepcopy(EXPECTED_SAMPLE_DATA) +ALL_AFFECTED_SAMPLE_DATA['MITO'] = [ + {'sample_id': 'HG00733', 'individual_guid': 'I000006_hg00733', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sex': 'F'}, +] +FAMILY_5_SAMPLE = { + 'sample_id': 'NA20874', 'individual_guid': 'I000009_na20874', 'family_guid': 'F000005_5', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sex': 'M', +} +ALL_AFFECTED_SAMPLE_DATA['VARIANTS'].append(FAMILY_5_SAMPLE) + + +def get_hail_search_body(genome_version='GRCh38', num_results=100, sample_data=None, omit_sample_type=None, **search_body): + sample_data = sample_data or EXPECTED_SAMPLE_DATA + if omit_sample_type: + sample_data = {k: v for k, v in sample_data.items() if k != omit_sample_type} + + search = { + 'sample_data': sample_data, + 'genome_version': genome_version, + 'num_results': num_results, + **search_body, + } + search.update(search_body or {}) + return search diff --git a/seqr/utils/search/hail_search_utils_tests.py b/seqr/utils/search/hail_search_utils_tests.py index ff0578a0f6..a3da7ea0f8 100644 --- a/seqr/utils/search/hail_search_utils_tests.py +++ b/seqr/utils/search/hail_search_utils_tests.py @@ -10,46 +10,11 @@ get_variants_for_variant_ids, InvalidSearchException from seqr.utils.search.search_utils_tests import SearchTestHelper, MOCK_COUNTS from seqr.views.utils.test_utils import PARSED_VARIANTS +from hail_search.test_utils import get_hail_search_body, EXPECTED_SAMPLE_DATA, FAMILY_1_SAMPLE_DATA, FAMILY_3_SAMPLE, \ + ALL_AFFECTED_SAMPLE_DATA, CUSTOM_AFFECTED_SAMPLE_DATA MOCK_HOST = 'http://test-hail-host' -FAMILY_3_SAMPLE = { - 'sample_id': 'NA20870', 'individual_guid': 'I000007_na20870', 'family_guid': 'F000003_3', - 'project_guid': 'R0001_1kg', 'affected': 'A', 'sex': 'M', -} -EXPECTED_SAMPLE_DATA = { - 'VARIANTS': [ - {'sample_id': 'HG00731', 'individual_guid': 'I000004_hg00731', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'A', 'sex': 'F'}, - {'sample_id': 'HG00732', 'individual_guid': 'I000005_hg00732', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sex': 'M'}, - {'sample_id': 'HG00733', 'individual_guid': 'I000006_hg00733', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sex': 'F'}, - FAMILY_3_SAMPLE, - ], 'SV_WES': [ - {'sample_id': 'HG00731', 'individual_guid': 'I000004_hg00731', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'A', 'sex': 'F'}, - {'sample_id': 'HG00732', 'individual_guid': 'I000005_hg00732', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sex': 'M'}, - {'sample_id': 'HG00733', 'individual_guid': 'I000006_hg00733', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sex': 'F'} - ], -} -CUSTOM_AFFECTED_SAMPLE_DATA = {'VARIANTS': deepcopy(EXPECTED_SAMPLE_DATA['VARIANTS'])} -CUSTOM_AFFECTED_SAMPLE_DATA['VARIANTS'][0]['affected'] = 'N' -CUSTOM_AFFECTED_SAMPLE_DATA['VARIANTS'][1]['affected'] = 'A' -CUSTOM_AFFECTED_SAMPLE_DATA['VARIANTS'][2]['affected'] = 'U' - -FAMILY_1_SAMPLE_DATA = { - 'VARIANTS': [ - {'sample_id': 'NA19675', 'individual_guid': 'I000001_na19675', 'family_guid': 'F000001_1', 'project_guid': 'R0001_1kg', 'affected': 'A', 'sex': 'M'}, - {'sample_id': 'NA19678', 'individual_guid': 'I000002_na19678', 'family_guid': 'F000001_1', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sex': 'M'}, - ], -} - -ALL_AFFECTED_SAMPLE_DATA = deepcopy(EXPECTED_SAMPLE_DATA) -ALL_AFFECTED_SAMPLE_DATA['MITO'] = [ - {'sample_id': 'HG00733', 'individual_guid': 'I000006_hg00733', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sex': 'F'}, -] -FAMILY_5_SAMPLE = { - 'sample_id': 'NA20874', 'individual_guid': 'I000009_na20874', 'family_guid': 'F000005_5', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sex': 'M', -} -ALL_AFFECTED_SAMPLE_DATA['VARIANTS'].append(FAMILY_5_SAMPLE) - @mock.patch('seqr.utils.search.hail_search_utils.HAIL_BACKEND_SERVICE_HOSTNAME', MOCK_HOST) class HailSearchUtilsTests(SearchTestHelper, TestCase): @@ -62,17 +27,8 @@ def setUp(self): 'results': PARSED_VARIANTS, 'total': 5, }) - def _test_minimal_search_call(self, search_body, num_results=100, sample_data=None, omit_sample_type=None): - sample_data = sample_data or EXPECTED_SAMPLE_DATA - if omit_sample_type: - sample_data = {k: v for k, v in sample_data.items() if k != omit_sample_type} - - expected_search = { - 'sample_data': sample_data, - 'genome_version': 'GRCh37', - 'num_results': num_results, - } - expected_search.update(search_body) + def _test_minimal_search_call(self, **kwargs): + expected_search = get_hail_search_body(genome_version='GRCh37', **kwargs) executed_request = responses.calls[-1].request self.assertEqual(executed_request.headers.get('From'), 'test_user@broadinstitute.org') @@ -101,7 +57,7 @@ def _test_expected_search_call(self, search_fields=None, gene_ids=None, interval } expected_search.update({field: self.search_model.search[field] for field in search_fields or []}) - self._test_minimal_search_call(expected_search, **kwargs) + self._test_minimal_search_call(**expected_search, **kwargs) @responses.activate def test_query_variants(self): @@ -217,14 +173,14 @@ def test_get_variant_query_gene_counts(self): def test_get_single_variant(self): variant = get_single_variant(self.families, '2-103343353-GAGA-G', user=self.user) self.assertDictEqual(variant, PARSED_VARIANTS[0]) - self._test_minimal_search_call({ - 'variant_ids': [['2', 103343353, 'GAGA', 'G']], 'variant_keys': [], - }, num_results=1, sample_data=ALL_AFFECTED_SAMPLE_DATA, omit_sample_type='SV_WES') + self._test_minimal_search_call( + variant_ids=[['2', 103343353, 'GAGA', 'G']], variant_keys=[], + num_results=1, sample_data=ALL_AFFECTED_SAMPLE_DATA, omit_sample_type='SV_WES') get_single_variant(self.families, 'prefix_19107_DEL', user=self.user) - self._test_minimal_search_call({ - 'variant_ids': [], 'variant_keys': ['prefix_19107_DEL'], - }, num_results=1, sample_data=EXPECTED_SAMPLE_DATA, omit_sample_type='VARIANTS') + self._test_minimal_search_call( + variant_ids=[], variant_keys=['prefix_19107_DEL'], + num_results=1, sample_data=EXPECTED_SAMPLE_DATA, omit_sample_type='VARIANTS') with self.assertRaises(InvalidSearchException) as cm: get_single_variant(self.families, '2-103343353-GAGA-G', user=self.user, return_all_queried_families=True) @@ -234,9 +190,9 @@ def test_get_single_variant(self): ) get_single_variant(self.families.filter(guid='F000003_3'), '2-103343353-GAGA-G', user=self.user, return_all_queried_families=True) - self._test_minimal_search_call({ - 'variant_ids': [['2', 103343353, 'GAGA', 'G']], 'variant_keys': [], - }, num_results=1, sample_data={'VARIANTS': [FAMILY_3_SAMPLE]}) + self._test_minimal_search_call( + variant_ids=[['2', 103343353, 'GAGA', 'G']], variant_keys=[], + num_results=1, sample_data={'VARIANTS': [FAMILY_3_SAMPLE]}) responses.add(responses.POST, f'{MOCK_HOST}:5000/search', status=200, json={'results': [], 'total': 0}) with self.assertRaises(InvalidSearchException) as cm: @@ -247,13 +203,13 @@ def test_get_single_variant(self): def test_get_variants_for_variant_ids(self): variant_ids = ['2-103343353-GAGA-G', '1-248367227-TC-T', 'prefix-938_DEL'] get_variants_for_variant_ids(self.families, variant_ids, user=self.user) - self._test_minimal_search_call({ - 'variant_ids': [['2', 103343353, 'GAGA', 'G'], ['1', 248367227, 'TC', 'T']], - 'variant_keys': ['prefix-938_DEL'], - }, num_results=3, sample_data=ALL_AFFECTED_SAMPLE_DATA) + self._test_minimal_search_call( + variant_ids=[['2', 103343353, 'GAGA', 'G'], ['1', 248367227, 'TC', 'T']], + variant_keys=['prefix-938_DEL'], + num_results=3, sample_data=ALL_AFFECTED_SAMPLE_DATA) get_variants_for_variant_ids(self.families, variant_ids, user=self.user, dataset_type='VARIANTS') - self._test_minimal_search_call({ - 'variant_ids': [['2', 103343353, 'GAGA', 'G'], ['1', 248367227, 'TC', 'T']], - 'variant_keys': [], - }, num_results=2, sample_data=ALL_AFFECTED_SAMPLE_DATA, omit_sample_type='SV_WES') + self._test_minimal_search_call( + variant_ids=[['2', 103343353, 'GAGA', 'G'], ['1', 248367227, 'TC', 'T']], + variant_keys=[], + num_results=2, sample_data=ALL_AFFECTED_SAMPLE_DATA, omit_sample_type='SV_WES')