Skip to content

Commit

Permalink
Merge pull request #3512 from broadinstitute/hail-search-shared-tests
Browse files Browse the repository at this point in the history
shared utilities for hail backend tests
  • Loading branch information
hanars authored Jul 26, 2023
2 parents 092dbe0 + 959c722 commit 77ff557
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 67 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ jobs:
pip install -r hail_search/requirements-test.txt
- name: Run coverage tests
run: |
coverage run --source="./hail_search" --omit="./hail_search/__main__.py" -m pytest hail_search/
coverage run --source="./hail_search" --omit="./hail_search/__main__.py","./hail_search/test_utils.py" -m pytest hail_search/
coverage report --fail-under=99
nodejs:
Expand Down
54 changes: 54 additions & 0 deletions hail_search/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from copy import deepcopy


FAMILY_3_SAMPLE = {
'sample_id': 'NA20870', 'individual_guid': 'I000007_na20870', 'family_guid': 'F000003_3',
'project_guid': 'R0001_1kg', 'affected': 'A', 'sex': 'M',
}
EXPECTED_SAMPLE_DATA = {
'VARIANTS': [
{'sample_id': 'HG00731', 'individual_guid': 'I000004_hg00731', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'A', 'sex': 'F'},
{'sample_id': 'HG00732', 'individual_guid': 'I000005_hg00732', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sex': 'M'},
{'sample_id': 'HG00733', 'individual_guid': 'I000006_hg00733', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sex': 'F'},
FAMILY_3_SAMPLE,
], 'SV_WES': [
{'sample_id': 'HG00731', 'individual_guid': 'I000004_hg00731', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'A', 'sex': 'F'},
{'sample_id': 'HG00732', 'individual_guid': 'I000005_hg00732', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sex': 'M'},
{'sample_id': 'HG00733', 'individual_guid': 'I000006_hg00733', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sex': 'F'}
],
}
CUSTOM_AFFECTED_SAMPLE_DATA = {'VARIANTS': deepcopy(EXPECTED_SAMPLE_DATA['VARIANTS'])}
CUSTOM_AFFECTED_SAMPLE_DATA['VARIANTS'][0]['affected'] = 'N'
CUSTOM_AFFECTED_SAMPLE_DATA['VARIANTS'][1]['affected'] = 'A'
CUSTOM_AFFECTED_SAMPLE_DATA['VARIANTS'][2]['affected'] = 'U'

FAMILY_1_SAMPLE_DATA = {
'VARIANTS': [
{'sample_id': 'NA19675', 'individual_guid': 'I000001_na19675', 'family_guid': 'F000001_1', 'project_guid': 'R0001_1kg', 'affected': 'A', 'sex': 'M'},
{'sample_id': 'NA19678', 'individual_guid': 'I000002_na19678', 'family_guid': 'F000001_1', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sex': 'M'},
],
}

ALL_AFFECTED_SAMPLE_DATA = deepcopy(EXPECTED_SAMPLE_DATA)
ALL_AFFECTED_SAMPLE_DATA['MITO'] = [
{'sample_id': 'HG00733', 'individual_guid': 'I000006_hg00733', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sex': 'F'},
]
FAMILY_5_SAMPLE = {
'sample_id': 'NA20874', 'individual_guid': 'I000009_na20874', 'family_guid': 'F000005_5', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sex': 'M',
}
ALL_AFFECTED_SAMPLE_DATA['VARIANTS'].append(FAMILY_5_SAMPLE)


def get_hail_search_body(genome_version='GRCh38', num_results=100, sample_data=None, omit_sample_type=None, **search_body):
sample_data = sample_data or EXPECTED_SAMPLE_DATA
if omit_sample_type:
sample_data = {k: v for k, v in sample_data.items() if k != omit_sample_type}

search = {
'sample_data': sample_data,
'genome_version': genome_version,
'num_results': num_results,
**search_body,
}
search.update(search_body or {})
return search
88 changes: 22 additions & 66 deletions seqr/utils/search/hail_search_utils_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,46 +10,11 @@
get_variants_for_variant_ids, InvalidSearchException
from seqr.utils.search.search_utils_tests import SearchTestHelper, MOCK_COUNTS
from seqr.views.utils.test_utils import PARSED_VARIANTS
from hail_search.test_utils import get_hail_search_body, EXPECTED_SAMPLE_DATA, FAMILY_1_SAMPLE_DATA, FAMILY_3_SAMPLE, \
ALL_AFFECTED_SAMPLE_DATA, CUSTOM_AFFECTED_SAMPLE_DATA

MOCK_HOST = 'http://test-hail-host'

FAMILY_3_SAMPLE = {
'sample_id': 'NA20870', 'individual_guid': 'I000007_na20870', 'family_guid': 'F000003_3',
'project_guid': 'R0001_1kg', 'affected': 'A', 'sex': 'M',
}
EXPECTED_SAMPLE_DATA = {
'VARIANTS': [
{'sample_id': 'HG00731', 'individual_guid': 'I000004_hg00731', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'A', 'sex': 'F'},
{'sample_id': 'HG00732', 'individual_guid': 'I000005_hg00732', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sex': 'M'},
{'sample_id': 'HG00733', 'individual_guid': 'I000006_hg00733', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sex': 'F'},
FAMILY_3_SAMPLE,
], 'SV_WES': [
{'sample_id': 'HG00731', 'individual_guid': 'I000004_hg00731', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'A', 'sex': 'F'},
{'sample_id': 'HG00732', 'individual_guid': 'I000005_hg00732', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sex': 'M'},
{'sample_id': 'HG00733', 'individual_guid': 'I000006_hg00733', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sex': 'F'}
],
}
CUSTOM_AFFECTED_SAMPLE_DATA = {'VARIANTS': deepcopy(EXPECTED_SAMPLE_DATA['VARIANTS'])}
CUSTOM_AFFECTED_SAMPLE_DATA['VARIANTS'][0]['affected'] = 'N'
CUSTOM_AFFECTED_SAMPLE_DATA['VARIANTS'][1]['affected'] = 'A'
CUSTOM_AFFECTED_SAMPLE_DATA['VARIANTS'][2]['affected'] = 'U'

FAMILY_1_SAMPLE_DATA = {
'VARIANTS': [
{'sample_id': 'NA19675', 'individual_guid': 'I000001_na19675', 'family_guid': 'F000001_1', 'project_guid': 'R0001_1kg', 'affected': 'A', 'sex': 'M'},
{'sample_id': 'NA19678', 'individual_guid': 'I000002_na19678', 'family_guid': 'F000001_1', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sex': 'M'},
],
}

ALL_AFFECTED_SAMPLE_DATA = deepcopy(EXPECTED_SAMPLE_DATA)
ALL_AFFECTED_SAMPLE_DATA['MITO'] = [
{'sample_id': 'HG00733', 'individual_guid': 'I000006_hg00733', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sex': 'F'},
]
FAMILY_5_SAMPLE = {
'sample_id': 'NA20874', 'individual_guid': 'I000009_na20874', 'family_guid': 'F000005_5', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sex': 'M',
}
ALL_AFFECTED_SAMPLE_DATA['VARIANTS'].append(FAMILY_5_SAMPLE)


@mock.patch('seqr.utils.search.hail_search_utils.HAIL_BACKEND_SERVICE_HOSTNAME', MOCK_HOST)
class HailSearchUtilsTests(SearchTestHelper, TestCase):
Expand All @@ -62,17 +27,8 @@ def setUp(self):
'results': PARSED_VARIANTS, 'total': 5,
})

def _test_minimal_search_call(self, search_body, num_results=100, sample_data=None, omit_sample_type=None):
sample_data = sample_data or EXPECTED_SAMPLE_DATA
if omit_sample_type:
sample_data = {k: v for k, v in sample_data.items() if k != omit_sample_type}

expected_search = {
'sample_data': sample_data,
'genome_version': 'GRCh37',
'num_results': num_results,
}
expected_search.update(search_body)
def _test_minimal_search_call(self, **kwargs):
expected_search = get_hail_search_body(genome_version='GRCh37', **kwargs)

executed_request = responses.calls[-1].request
self.assertEqual(executed_request.headers.get('From'), '[email protected]')
Expand Down Expand Up @@ -101,7 +57,7 @@ def _test_expected_search_call(self, search_fields=None, gene_ids=None, interval
}
expected_search.update({field: self.search_model.search[field] for field in search_fields or []})

self._test_minimal_search_call(expected_search, **kwargs)
self._test_minimal_search_call(**expected_search, **kwargs)

@responses.activate
def test_query_variants(self):
Expand Down Expand Up @@ -217,14 +173,14 @@ def test_get_variant_query_gene_counts(self):
def test_get_single_variant(self):
variant = get_single_variant(self.families, '2-103343353-GAGA-G', user=self.user)
self.assertDictEqual(variant, PARSED_VARIANTS[0])
self._test_minimal_search_call({
'variant_ids': [['2', 103343353, 'GAGA', 'G']], 'variant_keys': [],
}, num_results=1, sample_data=ALL_AFFECTED_SAMPLE_DATA, omit_sample_type='SV_WES')
self._test_minimal_search_call(
variant_ids=[['2', 103343353, 'GAGA', 'G']], variant_keys=[],
num_results=1, sample_data=ALL_AFFECTED_SAMPLE_DATA, omit_sample_type='SV_WES')

get_single_variant(self.families, 'prefix_19107_DEL', user=self.user)
self._test_minimal_search_call({
'variant_ids': [], 'variant_keys': ['prefix_19107_DEL'],
}, num_results=1, sample_data=EXPECTED_SAMPLE_DATA, omit_sample_type='VARIANTS')
self._test_minimal_search_call(
variant_ids=[], variant_keys=['prefix_19107_DEL'],
num_results=1, sample_data=EXPECTED_SAMPLE_DATA, omit_sample_type='VARIANTS')

with self.assertRaises(InvalidSearchException) as cm:
get_single_variant(self.families, '2-103343353-GAGA-G', user=self.user, return_all_queried_families=True)
Expand All @@ -234,9 +190,9 @@ def test_get_single_variant(self):
)

get_single_variant(self.families.filter(guid='F000003_3'), '2-103343353-GAGA-G', user=self.user, return_all_queried_families=True)
self._test_minimal_search_call({
'variant_ids': [['2', 103343353, 'GAGA', 'G']], 'variant_keys': [],
}, num_results=1, sample_data={'VARIANTS': [FAMILY_3_SAMPLE]})
self._test_minimal_search_call(
variant_ids=[['2', 103343353, 'GAGA', 'G']], variant_keys=[],
num_results=1, sample_data={'VARIANTS': [FAMILY_3_SAMPLE]})

responses.add(responses.POST, f'{MOCK_HOST}:5000/search', status=200, json={'results': [], 'total': 0})
with self.assertRaises(InvalidSearchException) as cm:
Expand All @@ -247,13 +203,13 @@ def test_get_single_variant(self):
def test_get_variants_for_variant_ids(self):
variant_ids = ['2-103343353-GAGA-G', '1-248367227-TC-T', 'prefix-938_DEL']
get_variants_for_variant_ids(self.families, variant_ids, user=self.user)
self._test_minimal_search_call({
'variant_ids': [['2', 103343353, 'GAGA', 'G'], ['1', 248367227, 'TC', 'T']],
'variant_keys': ['prefix-938_DEL'],
}, num_results=3, sample_data=ALL_AFFECTED_SAMPLE_DATA)
self._test_minimal_search_call(
variant_ids=[['2', 103343353, 'GAGA', 'G'], ['1', 248367227, 'TC', 'T']],
variant_keys=['prefix-938_DEL'],
num_results=3, sample_data=ALL_AFFECTED_SAMPLE_DATA)

get_variants_for_variant_ids(self.families, variant_ids, user=self.user, dataset_type='VARIANTS')
self._test_minimal_search_call({
'variant_ids': [['2', 103343353, 'GAGA', 'G'], ['1', 248367227, 'TC', 'T']],
'variant_keys': [],
}, num_results=2, sample_data=ALL_AFFECTED_SAMPLE_DATA, omit_sample_type='SV_WES')
self._test_minimal_search_call(
variant_ids=[['2', 103343353, 'GAGA', 'G'], ['1', 248367227, 'TC', 'T']],
variant_keys=[],
num_results=2, sample_data=ALL_AFFECTED_SAMPLE_DATA, omit_sample_type='SV_WES')

0 comments on commit 77ff557

Please sign in to comment.