From 6f802d6e044ea1a85d336f0b44abab61d1e2867d Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 31 May 2023 18:08:02 -0400 Subject: [PATCH 01/55] remove hail from file check --- seqr/management/commands/check_bam_cram_paths.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/seqr/management/commands/check_bam_cram_paths.py b/seqr/management/commands/check_bam_cram_paths.py index fb8699154a..512f50949e 100644 --- a/seqr/management/commands/check_bam_cram_paths.py +++ b/seqr/management/commands/check_bam_cram_paths.py @@ -1,12 +1,12 @@ from django.core.management.base import BaseCommand import collections -import hail as hl import logging import tqdm from seqr.models import IgvSample from seqr.utils import communication_utils +from seqr.utils.file_utils import does_file_exist from settings import SEQR_SLACK_DATA_ALERTS_NOTIFICATION_CHANNEL logger = logging.getLogger(__name__) @@ -37,7 +37,7 @@ def handle(self, *args, **options): guids_of_samples_with_missing_file = set() project_name_to_missing_paths = collections.defaultdict(list) for sample in tqdm.tqdm(samples, unit=" samples"): - if not hl.hadoop_is_file(sample.file_path): + if not does_file_exist(sample.file_path): individual_id = sample.individual.individual_id project_name = sample.individual.family.project.name missing_counter[project_name] += 1 From ae245bb6ad121ddade101ef36100e8d91dfade6e Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 1 Jun 2023 11:28:41 -0400 Subject: [PATCH 02/55] fix test --- .../commands/check_bam_cram_paths.py | 2 +- .../tests/check_bam_cram_paths_tests.py | 41 ++++++++----------- 2 files changed, 18 insertions(+), 25 deletions(-) diff --git a/seqr/management/commands/check_bam_cram_paths.py b/seqr/management/commands/check_bam_cram_paths.py index 512f50949e..06a936c309 100644 --- a/seqr/management/commands/check_bam_cram_paths.py +++ b/seqr/management/commands/check_bam_cram_paths.py @@ -47,7 +47,7 @@ def handle(self, *args, **options): guids_of_samples_with_missing_file.add(sample.guid) if len(guids_of_samples_with_missing_file) > 0: - IgvSample.bulk_update(user=None, update_json={'file_path': ''}, guid__in=guids_of_samples_with_missing_file) + IgvSample.bulk_update(user=None, update_json={'file_path': ''}, guid__in=guids_of_samples_with_missing_file) # TODO delete logger.info('---- DONE ----') logger.info('Checked {} samples'.format(len(samples))) diff --git a/seqr/management/tests/check_bam_cram_paths_tests.py b/seqr/management/tests/check_bam_cram_paths_tests.py index a128638ad1..3da5f79baf 100644 --- a/seqr/management/tests/check_bam_cram_paths_tests.py +++ b/seqr/management/tests/check_bam_cram_paths_tests.py @@ -5,6 +5,10 @@ from seqr.models import IgvSample from settings import SEQR_SLACK_DATA_ALERTS_NOTIFICATION_CHANNEL + +@mock.patch('seqr.utils.file_utils.subprocess.Popen') +@mock.patch('seqr.utils.communication_utils.safe_post_to_slack') +@mock.patch('seqr.management.commands.check_bam_cram_paths.logger') class CheckBamCramPathsTest(TestCase): fixtures = ['users', '1kg_project'] @@ -16,18 +20,13 @@ def setUp(self): file_path='gs://missing-bucket/missing_file', ) - @mock.patch('hail.hadoop_is_file') - @mock.patch('seqr.utils.communication_utils.safe_post_to_slack') - @mock.patch('seqr.management.commands.check_bam_cram_paths.logger') - def test_command_with_project(self, mock_logger, mock_safe_post_to_slack, mock_hadoop_is_file): - mock_hadoop_is_file.return_value = False + def test_command_with_project(self, mock_logger, mock_safe_post_to_slack, mock_subprocess): + mock_subprocess.return_value.wait.return_value = -1 call_command('check_bam_cram_paths', '1kg project n\u00e5me with uni\u00e7\u00f8de') - self._check_results(1, mock_logger, mock_safe_post_to_slack, mock_hadoop_is_file) + self._check_results(1, mock_logger, mock_safe_post_to_slack, mock_subprocess) - @mock.patch('hail.hadoop_is_file') - @mock.patch('seqr.management.commands.check_bam_cram_paths.logger') - def test_command_with_other_project(self, mock_logger, mock_hadoop_is_file): - mock_hadoop_is_file.return_value = False + def test_command_with_other_project(self, mock_logger, mock_safe_post_to_slack, mock_subprocess): + mock_subprocess.return_value.wait.return_value = -1 call_command('check_bam_cram_paths', '1kg project') self.assertEqual(IgvSample.objects.filter(file_path='').count(), 0) self.assertEqual(IgvSample.objects.count(), 2) @@ -38,26 +37,20 @@ def test_command_with_other_project(self, mock_logger, mock_hadoop_is_file): ] mock_logger.info.assert_has_calls(calls) - @mock.patch('hail.hadoop_is_file') - @mock.patch('seqr.utils.communication_utils.safe_post_to_slack') - @mock.patch('seqr.management.commands.check_bam_cram_paths.logger') - def test_command(self, mock_logger, mock_safe_post_to_slack, mock_hadoop_is_file): - mock_hadoop_is_file.return_value = False + def test_command(self, mock_logger, mock_safe_post_to_slack, mock_subprocess): + mock_subprocess.return_value.wait.return_value = -1 call_command('check_bam_cram_paths') - self._check_results(1, mock_logger, mock_safe_post_to_slack, mock_hadoop_is_file) + self._check_results(1, mock_logger, mock_safe_post_to_slack, mock_subprocess) - @mock.patch('hail.hadoop_is_file') - @mock.patch('seqr.utils.communication_utils.safe_post_to_slack') - @mock.patch('seqr.management.commands.check_bam_cram_paths.logger') - def test_dry_run_arg(self, mock_logger, mock_safe_post_to_slack, mock_hadoop_is_file): - mock_hadoop_is_file.return_value = False + def test_dry_run_arg(self, mock_logger, mock_safe_post_to_slack, mock_subprocess): + mock_subprocess.return_value.wait.return_value = -1 call_command('check_bam_cram_paths', '--dry-run') - self._check_results(0, mock_logger, mock_safe_post_to_slack, mock_hadoop_is_file) + self._check_results(0, mock_logger, mock_safe_post_to_slack, mock_subprocess) - def _check_results(self, num_paths_deleted, mock_logger, mock_safe_post_to_slack, mock_hadoop_is_file): + def _check_results(self, num_paths_deleted, mock_logger, mock_safe_post_to_slack, mock_subprocess): self.assertEqual(IgvSample.objects.filter(file_path='').count(), num_paths_deleted) self.assertEqual(IgvSample.objects.count(), 2) - mock_hadoop_is_file.assert_called_with("gs://missing-bucket/missing_file") + mock_subprocess.assert_called_with('gsutil ls gs://missing-bucket/missing_file', stdout=-1, stderr=-2, shell=True) calls = [ mock.call('Individual: NA19675_1 file not found: gs://missing-bucket/missing_file'), From 529f2249df41a659a9b78d7d05c68ae5eaa4d608 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 1 Jun 2023 13:01:41 -0400 Subject: [PATCH 03/55] better fixture data --- seqr/fixtures/1kg_project.json | 26 ++++++++++++++++ .../commands/check_bam_cram_paths.py | 2 +- .../tests/check_bam_cram_paths_tests.py | 31 ++++++++----------- 3 files changed, 40 insertions(+), 19 deletions(-) diff --git a/seqr/fixtures/1kg_project.json b/seqr/fixtures/1kg_project.json index 30ff903eda..1fa8b648ab 100644 --- a/seqr/fixtures/1kg_project.json +++ b/seqr/fixtures/1kg_project.json @@ -1487,6 +1487,32 @@ "file_path": "/readviz/NA19675.cram" } }, +{ + "model": "seqr.igvsample", + "pk": 146, + "fields": { + "guid": "S000146_na20870", + "created_date": "2017-02-05T06:42:55.397Z", + "created_by": null, + "last_modified_date": "2017-03-13T09:07:49.744Z", + "individual": 7, + "sample_type": "alignment", + "file_path": "gs://readviz/NA20870.cram" + } +}, +{ + "model": "seqr.igvsample", + "pk": 147, + "fields": { + "guid": "S000147_na20870", + "created_date": "2017-02-05T06:42:55.397Z", + "created_by": null, + "last_modified_date": "2017-03-13T09:07:49.744Z", + "individual": 7, + "sample_type": "gcnv", + "file_path": "gs://datasets-gcnv/NA20870.bed.gz" + } +}, { "model": "seqr.varianttagtype", "pk": 1, diff --git a/seqr/management/commands/check_bam_cram_paths.py b/seqr/management/commands/check_bam_cram_paths.py index 06a936c309..564be0b330 100644 --- a/seqr/management/commands/check_bam_cram_paths.py +++ b/seqr/management/commands/check_bam_cram_paths.py @@ -31,7 +31,7 @@ def handle(self, *args, **options): individual__family__project__name__in=args ) if args else IgvSample.objects.all()).filter( file_path__startswith='gs://' - ).prefetch_related('individual', 'individual__family__project') + ).order_by('id').prefetch_related('individual', 'individual__family__project') missing_counter = collections.defaultdict(int) guids_of_samples_with_missing_file = set() diff --git a/seqr/management/tests/check_bam_cram_paths_tests.py b/seqr/management/tests/check_bam_cram_paths_tests.py index 3da5f79baf..73c206f200 100644 --- a/seqr/management/tests/check_bam_cram_paths_tests.py +++ b/seqr/management/tests/check_bam_cram_paths_tests.py @@ -12,24 +12,16 @@ class CheckBamCramPathsTest(TestCase): fixtures = ['users', '1kg_project'] - def setUp(self): - existing_sample = IgvSample.objects.first() - IgvSample.objects.create( - individual=existing_sample.individual, - sample_type=IgvSample.SAMPLE_TYPE_GCNV, - file_path='gs://missing-bucket/missing_file', - ) - def test_command_with_project(self, mock_logger, mock_safe_post_to_slack, mock_subprocess): - mock_subprocess.return_value.wait.return_value = -1 + mock_subprocess.return_value.wait.side_effect = [-1, 0] call_command('check_bam_cram_paths', '1kg project n\u00e5me with uni\u00e7\u00f8de') self._check_results(1, mock_logger, mock_safe_post_to_slack, mock_subprocess) def test_command_with_other_project(self, mock_logger, mock_safe_post_to_slack, mock_subprocess): - mock_subprocess.return_value.wait.return_value = -1 + mock_subprocess.return_value.wait.side_effect = [-1, 0] call_command('check_bam_cram_paths', '1kg project') self.assertEqual(IgvSample.objects.filter(file_path='').count(), 0) - self.assertEqual(IgvSample.objects.count(), 2) + self.assertEqual(IgvSample.objects.count(), 3) calls = [ mock.call('---- DONE ----'), @@ -38,24 +30,27 @@ def test_command_with_other_project(self, mock_logger, mock_safe_post_to_slack, mock_logger.info.assert_has_calls(calls) def test_command(self, mock_logger, mock_safe_post_to_slack, mock_subprocess): - mock_subprocess.return_value.wait.return_value = -1 + mock_subprocess.return_value.wait.side_effect = [-1, 0] call_command('check_bam_cram_paths') self._check_results(1, mock_logger, mock_safe_post_to_slack, mock_subprocess) def test_dry_run_arg(self, mock_logger, mock_safe_post_to_slack, mock_subprocess): - mock_subprocess.return_value.wait.return_value = -1 + mock_subprocess.return_value.wait.side_effect = [-1, 0] call_command('check_bam_cram_paths', '--dry-run') self._check_results(0, mock_logger, mock_safe_post_to_slack, mock_subprocess) def _check_results(self, num_paths_deleted, mock_logger, mock_safe_post_to_slack, mock_subprocess): self.assertEqual(IgvSample.objects.filter(file_path='').count(), num_paths_deleted) - self.assertEqual(IgvSample.objects.count(), 2) - mock_subprocess.assert_called_with('gsutil ls gs://missing-bucket/missing_file', stdout=-1, stderr=-2, shell=True) + self.assertEqual(IgvSample.objects.count(), 3) + mock_subprocess.assert_has_calls([ + mock.call('gsutil ls gs://readviz/NA20870.cram', stdout=-1, stderr=-2, shell=True), + mock.call('gsutil ls gs://datasets-gcnv/NA20870.bed.gz', stdout=-1, stderr=-2, shell=True), + ], any_order=True) calls = [ - mock.call('Individual: NA19675_1 file not found: gs://missing-bucket/missing_file'), + mock.call('Individual: NA20870 file not found: gs://readviz/NA20870.cram'), mock.call('---- DONE ----'), - mock.call('Checked 1 samples'), + mock.call('Checked 2 samples'), mock.call('1 files not found:'), mock.call(' 1 in 1kg project nåme with uniçøde'), ] @@ -67,4 +62,4 @@ def _check_results(self, num_paths_deleted, mock_logger, mock_safe_post_to_slack self.assertEqual(mock_safe_post_to_slack.call_count, 1) mock_safe_post_to_slack.assert_called_with( SEQR_SLACK_DATA_ALERTS_NOTIFICATION_CHANNEL, - "Found 1 broken bam/cram path(s)\n\nIn project 1kg project nåme with uniçøde:\n NA19675_1 gs://missing-bucket/missing_file") + "Found 1 broken bam/cram path(s)\n\nIn project 1kg project nåme with uniçøde:\n NA20870 gs://readviz/NA20870.cram") From 05201cddb705556bd33376f7e3bcf139849b5716 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 1 Jun 2023 13:36:00 -0400 Subject: [PATCH 04/55] actually delete brken igv --- .../commands/check_bam_cram_paths.py | 4 ++-- .../tests/check_bam_cram_paths_tests.py | 21 +++++++++++-------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/seqr/management/commands/check_bam_cram_paths.py b/seqr/management/commands/check_bam_cram_paths.py index 564be0b330..b018c83f67 100644 --- a/seqr/management/commands/check_bam_cram_paths.py +++ b/seqr/management/commands/check_bam_cram_paths.py @@ -47,7 +47,7 @@ def handle(self, *args, **options): guids_of_samples_with_missing_file.add(sample.guid) if len(guids_of_samples_with_missing_file) > 0: - IgvSample.bulk_update(user=None, update_json={'file_path': ''}, guid__in=guids_of_samples_with_missing_file) # TODO delete + IgvSample.bulk_delete(user=None, guid__in=guids_of_samples_with_missing_file) logger.info('---- DONE ----') logger.info('Checked {} samples'.format(len(samples))) @@ -58,7 +58,7 @@ def handle(self, *args, **options): # post to slack if not options.get('dry_run'): - slack_message = 'Found {} broken bam/cram path(s)\n'.format(sum(missing_counter.values())) + slack_message = 'Found and removed {} broken bam/cram path(s)\n'.format(sum(missing_counter.values())) for project_name, missing_paths_list in project_name_to_missing_paths.items(): slack_message += "\nIn project {}:\n".format(project_name) slack_message += "\n".join([ diff --git a/seqr/management/tests/check_bam_cram_paths_tests.py b/seqr/management/tests/check_bam_cram_paths_tests.py index 73c206f200..d8105f1931 100644 --- a/seqr/management/tests/check_bam_cram_paths_tests.py +++ b/seqr/management/tests/check_bam_cram_paths_tests.py @@ -15,12 +15,11 @@ class CheckBamCramPathsTest(TestCase): def test_command_with_project(self, mock_logger, mock_safe_post_to_slack, mock_subprocess): mock_subprocess.return_value.wait.side_effect = [-1, 0] call_command('check_bam_cram_paths', '1kg project n\u00e5me with uni\u00e7\u00f8de') - self._check_results(1, mock_logger, mock_safe_post_to_slack, mock_subprocess) + self._check_results(True, mock_logger, mock_safe_post_to_slack, mock_subprocess) def test_command_with_other_project(self, mock_logger, mock_safe_post_to_slack, mock_subprocess): mock_subprocess.return_value.wait.side_effect = [-1, 0] call_command('check_bam_cram_paths', '1kg project') - self.assertEqual(IgvSample.objects.filter(file_path='').count(), 0) self.assertEqual(IgvSample.objects.count(), 3) calls = [ @@ -32,16 +31,20 @@ def test_command_with_other_project(self, mock_logger, mock_safe_post_to_slack, def test_command(self, mock_logger, mock_safe_post_to_slack, mock_subprocess): mock_subprocess.return_value.wait.side_effect = [-1, 0] call_command('check_bam_cram_paths') - self._check_results(1, mock_logger, mock_safe_post_to_slack, mock_subprocess) + self._check_results(True, mock_logger, mock_safe_post_to_slack, mock_subprocess) def test_dry_run_arg(self, mock_logger, mock_safe_post_to_slack, mock_subprocess): mock_subprocess.return_value.wait.side_effect = [-1, 0] call_command('check_bam_cram_paths', '--dry-run') - self._check_results(0, mock_logger, mock_safe_post_to_slack, mock_subprocess) + self._check_results(False, mock_logger, mock_safe_post_to_slack, mock_subprocess) + + def _check_results(self, did_delete, mock_logger, mock_safe_post_to_slack, mock_subprocess): + igv_file_paths = IgvSample.objects.values_list('file_path', flat=True) + expected_remaining_files = ['/readviz/NA19675.cram', 'gs://datasets-gcnv/NA20870.bed.gz'] + if not did_delete: + expected_remaining_files.append('gs://readviz/NA20870.cram') + self.assertListEqual(sorted(igv_file_paths), expected_remaining_files) - def _check_results(self, num_paths_deleted, mock_logger, mock_safe_post_to_slack, mock_subprocess): - self.assertEqual(IgvSample.objects.filter(file_path='').count(), num_paths_deleted) - self.assertEqual(IgvSample.objects.count(), 3) mock_subprocess.assert_has_calls([ mock.call('gsutil ls gs://readviz/NA20870.cram', stdout=-1, stderr=-2, shell=True), mock.call('gsutil ls gs://datasets-gcnv/NA20870.bed.gz', stdout=-1, stderr=-2, shell=True), @@ -56,10 +59,10 @@ def _check_results(self, num_paths_deleted, mock_logger, mock_safe_post_to_slack ] mock_logger.info.assert_has_calls(calls) - if num_paths_deleted == 0: + if not did_delete: mock_safe_post_to_slack.assert_not_called() else: self.assertEqual(mock_safe_post_to_slack.call_count, 1) mock_safe_post_to_slack.assert_called_with( SEQR_SLACK_DATA_ALERTS_NOTIFICATION_CHANNEL, - "Found 1 broken bam/cram path(s)\n\nIn project 1kg project nåme with uniçøde:\n NA20870 gs://readviz/NA20870.cram") + "Found and removed 1 broken bam/cram path(s)\n\nIn project 1kg project nåme with uniçøde:\n NA20870 gs://readviz/NA20870.cram") From a3cff8bc472d5b3efa6861e51e227b28f92f1453 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 1 Jun 2023 13:39:40 -0400 Subject: [PATCH 05/55] remove hail dependency --- requirements.in | 1 - requirements.txt | 197 ++++------------------------------------------- 2 files changed, 14 insertions(+), 184 deletions(-) diff --git a/requirements.in b/requirements.in index ec24dd607f..989f96912a 100644 --- a/requirements.in +++ b/requirements.in @@ -10,7 +10,6 @@ social-auth-core # the Python social authentication package. Re elasticsearch==7.9.1 # elasticsearch client elasticsearch-dsl==7.2.1 # elasticsearch query utilities gunicorn # web server -hail<0.3 # provides convenient apis for working with files in google cloud storage jmespath openpyxl # library for reading/writing Excel files pillow # required dependency of Djagno ImageField-type database records diff --git a/requirements.txt b/requirements.txt index fccb79474d..3624815c1c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,79 +1,29 @@ # -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: +# This file is autogenerated by pip-compile with python 3.9 +# To update, run: # # pip-compile requirements.in # -aiohttp==3.8.4 - # via - # aiohttp-session - # hail -aiohttp-session==2.12.0 - # via hail -aiosignal==1.3.1 - # via aiohttp asgiref==3.6.0 # via django async-timeout==4.0.2 - # via - # aiohttp - # redis -asyncinit==0.2.4 - # via hail -attrs==22.2.0 - # via aiohttp -avro==1.11.1 - # via hail -azure-core==1.26.3 - # via - # azure-identity - # azure-storage-blob - # msrest -azure-identity==1.12.0 - # via hail -azure-storage-blob==12.14.1 - # via hail -bokeh==1.4.0 - # via hail -boto3==1.26.76 - # via hail -botocore==1.29.76 - # via - # boto3 - # hail - # s3transfer + # via redis cachetools==5.3.0 # via google-auth certifi==2022.12.7 # via # elasticsearch - # msrest # requests cffi==1.15.1 # via cryptography charset-normalizer==3.0.1 - # via - # aiohttp - # requests -commonmark==0.9.1 - # via rich + # via requests cryptography==39.0.1 - # via - # azure-identity - # azure-storage-blob - # msal - # pyjwt - # social-auth-core -decorator==4.4.2 - # via hail + # via social-auth-core defusedxml==0.7.1 # via # python3-openid # social-auth-core -deprecated==1.2.13 - # via hail -dill==0.3.6 - # via hail django==3.2.19 # via # -r requirements.in @@ -103,11 +53,6 @@ elasticsearch-dsl==7.2.1 # via -r requirements.in et-xmlfile==1.1.0 # via openpyxl -frozenlist==1.3.3 - # via - # aiohttp - # aiosignal - # hail google-api-core==2.11.0 # via # google-cloud-core @@ -117,13 +62,10 @@ google-auth==2.14.1 # google-api-core # google-cloud-core # google-cloud-storage - # hail google-cloud-core==2.3.2 # via google-cloud-storage google-cloud-storage==2.7.0 - # via - # django-storages - # hail + # via django-storages google-crc32c==1.5.0 # via google-resumable-media google-resumable-media==2.4.1 @@ -132,82 +74,24 @@ googleapis-common-protos==1.58.0 # via google-api-core gunicorn==20.1.0 # via -r requirements.in -hail==0.2.109 - # via -r requirements.in -humanize==1.1.0 - # via hail -hurry-filesize==0.9 - # via hail idna==3.4 - # via - # requests - # yarl -isodate==0.6.1 - # via msrest -janus==1.0.0 - # via hail -jinja2==3.0.3 - # via - # bokeh - # hail + # via requests jmespath==1.0.1 - # via - # -r requirements.in - # boto3 - # botocore -markupsafe==2.1.2 - # via jinja2 -msal==1.21.0 - # via - # azure-identity - # msal-extensions -msal-extensions==1.0.0 - # via azure-identity -msrest==0.7.1 - # via azure-storage-blob -multidict==6.0.4 - # via - # aiohttp - # yarl -nest-asyncio==1.5.6 - # via hail -numpy==1.24.2 - # via - # bokeh - # hail - # pandas - # scipy + # via -r requirements.in oauthlib==3.2.2 # via # requests-oauthlib # social-auth-core openpyxl==3.1.1 # via -r requirements.in -orjson==3.8.6 - # via hail -packaging==23.0 - # via bokeh -pandas==1.5.3 - # via hail -parsimonious==0.8.1 - # via hail pillow==9.4.0 - # via - # -r requirements.in - # bokeh -plotly==5.10.0 - # via hail -portalocker==2.7.0 - # via msal-extensions + # via -r requirements.in protobuf==3.20.2 # via # google-api-core # googleapis-common-protos - # hail psycopg2==2.9.5 # via -r requirements.in -py4j==0.10.9 - # via pyspark pyasn1==0.4.8 # via # pyasn1-modules @@ -216,72 +100,38 @@ pyasn1-modules==0.2.8 # via google-auth pycparser==2.21 # via cffi -pygments==2.14.0 - # via rich -pyjwt[crypto]==2.6.0 - # via - # hail - # msal - # social-auth-core +pyjwt==2.6.0 + # via social-auth-core pyliftover==0.4 # via -r requirements.in -pyspark==3.1.3 - # via hail python-dateutil==2.8.2 - # via - # bokeh - # botocore - # elasticsearch-dsl - # pandas -python-json-logger==2.0.7 - # via hail + # via elasticsearch-dsl python3-openid==3.2.0 # via social-auth-core pytz==2022.7.1 - # via - # django - # pandas -pyyaml==6.0 - # via bokeh + # via django redis==4.5.4 # via -r requirements.in requests==2.31.0 # via # -r requirements.in - # azure-core # django-anymail # google-api-core # google-cloud-storage - # hail - # msal - # msrest # requests-oauthlib # requests-toolbelt # slacker # social-auth-core requests-oauthlib==1.3.1 - # via - # msrest - # social-auth-core + # via social-auth-core requests-toolbelt==0.10.1 # via -r requirements.in -rich==12.6.0 - # via hail rsa==4.9 # via google-auth -s3transfer==0.6.0 - # via boto3 -scipy==1.9.3 - # via hail six==1.16.0 # via - # azure-core - # azure-identity - # bokeh # elasticsearch-dsl # google-auth - # isodate - # parsimonious # python-dateutil slacker==0.14.0 # via -r requirements.in @@ -293,35 +143,16 @@ social-auth-core==4.3.0 # via # -r requirements.in # social-auth-app-django -sortedcontainers==2.4.0 - # via hail sqlparse==0.4.4 # via django -tabulate==0.9.0 - # via hail -tenacity==8.2.1 - # via plotly -tornado==6.3.2 - # via bokeh tqdm==4.64.1 # via -r requirements.in -typing-extensions==4.5.0 - # via - # azure-core - # janus urllib3==1.26.14 # via - # botocore # elasticsearch # requests -uvloop==0.17.0 - # via hail whitenoise==6.3.0 # via -r requirements.in -wrapt==1.14.1 - # via deprecated -yarl==1.8.2 - # via aiohttp # The following packages are considered to be unsafe in a requirements file: # setuptools From c5cfcae95caacb900734bae6ff5e952991533537 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 1 Jun 2023 15:55:20 -0400 Subject: [PATCH 06/55] add backend ping --- seqr/utils/search/hail_search_utils.py | 12 +++++-- seqr/utils/search/utils.py | 4 +-- seqr/views/status_tests.py | 44 ++++++++++++++++++++------ 3 files changed, 46 insertions(+), 14 deletions(-) diff --git a/seqr/utils/search/hail_search_utils.py b/seqr/utils/search/hail_search_utils.py index dfb92538d6..5eab257b80 100644 --- a/seqr/utils/search/hail_search_utils.py +++ b/seqr/utils/search/hail_search_utils.py @@ -9,6 +9,14 @@ from settings import HAIL_BACKEND_SERVICE_HOSTNAME, HAIL_BACKEND_SERVICE_PORT +def _hail_backend_url(path): + return f'{HAIL_BACKEND_SERVICE_HOSTNAME}:{HAIL_BACKEND_SERVICE_PORT}/{path}' + + +def ping_hail_backend(): + requests.get(_hail_backend_url('status')).raise_for_status() + + def get_hail_variants(samples, search, user, previous_search_results, genome_version, sort=None, page=1, num_results=100, gene_agg=False, **kwargs): @@ -33,9 +41,7 @@ def get_hail_variants(samples, search, user, previous_search_results, genome_ver _parse_location_search(search_body) path = 'gene_counts' if gene_agg else 'search' - response = requests.post( - f'{HAIL_BACKEND_SERVICE_HOSTNAME}:{HAIL_BACKEND_SERVICE_PORT}/{path}', json=search_body, timeout=300, - ) + response = requests.post(_hail_backend_url(path), json=search_body, timeout=300) response.raise_for_status() response_json = response.json() diff --git a/seqr/utils/search/utils.py b/seqr/utils/search/utils.py index 1c8d41e109..8347e1cd79 100644 --- a/seqr/utils/search/utils.py +++ b/seqr/utils/search/utils.py @@ -9,7 +9,7 @@ from seqr.utils.search.elasticsearch.es_utils import ping_elasticsearch, delete_es_index, get_elasticsearch_status, \ get_es_variants, get_es_variants_for_variant_ids, process_es_previously_loaded_results, process_es_previously_loaded_gene_aggs, \ es_backend_enabled, ES_EXCEPTION_ERROR_MAP, ES_EXCEPTION_MESSAGE_MAP, ES_ERROR_LOG_EXCEPTIONS -from seqr.utils.search.hail_search_utils import get_hail_variants +from seqr.utils.search.hail_search_utils import get_hail_variants, ping_hail_backend from seqr.utils.gene_utils import parse_locus_list_items from seqr.utils.xpos_utils import get_xpos @@ -50,7 +50,7 @@ def backend_specific_call(es_func, other_func=_no_backend_error): def ping_search_backend(): - backend_specific_call(ping_elasticsearch)() + backend_specific_call(ping_elasticsearch, ping_hail_backend)() def get_search_backend_status(): diff --git a/seqr/views/status_tests.py b/seqr/views/status_tests.py index eb01b07be1..4754453ab7 100644 --- a/seqr/views/status_tests.py +++ b/seqr/views/status_tests.py @@ -2,14 +2,15 @@ from django.urls.base import reverse import mock from requests import HTTPError +import responses from seqr.views.status import status_view from seqr.utils.search.elasticsearch.es_utils_tests import urllib3_responses -class StatusTest(TestCase): +class StatusTest(object): - def _test_status_error(self, url, mock_logger, es_error): + def _test_status_error(self, url, mock_logger, search_backend_error): response = self.client.get(url) self.assertEqual(response.status_code, 400) self.assertDictEqual( @@ -18,30 +19,28 @@ def _test_status_error(self, url, mock_logger, es_error): mock.call('Database "default" connection error: No connection'), mock.call('Database "reference_data" connection error: No connection'), mock.call('Redis connection error: Bad connection'), - mock.call(f'Search backend connection error: {es_error}'), + mock.call(f'Search backend connection error: {search_backend_error}'), mock.call('Kibana connection error: Connection refused: HEAD /status'), ]) mock_logger.reset_mock() - @mock.patch('seqr.utils.search.elasticsearch.es_utils.ELASTICSEARCH_SERVICE_HOSTNAME', 'testhost') @mock.patch('seqr.views.status.redis.StrictRedis') @mock.patch('seqr.views.status.connections') @mock.patch('seqr.views.status.logger') @urllib3_responses.activate + @responses.activate def test_status(self, mock_logger, mock_db_connections, mock_redis): url = reverse(status_view) mock_db_connections.__getitem__.return_value.cursor.side_effect = Exception('No connection') mock_redis.return_value.ping.side_effect = HTTPError('Bad connection') + responses.add(responses.GET, 'http://test-hail:5000/status', status=400) - self._test_status_error(url, mock_logger, es_error='No response from elasticsearch ping') - - with mock.patch('seqr.utils.search.elasticsearch.es_utils.ELASTICSEARCH_SERVICE_HOSTNAME', ''): - self._test_status_error(url, mock_logger, es_error='Elasticsearch backend is disabled') + self._test_status_error(url, mock_logger, search_backend_error=self.SEARCH_BACKEND_ERROR) mock_db_connections.__getitem__.return_value.cursor.side_effect = None mock_redis.return_value.ping.side_effect = None - urllib3_responses.add(urllib3_responses.HEAD, '/', status=200) + self.add_search_backend_success() urllib3_responses.add(urllib3_responses.HEAD, '/status', status=500) response = self.client.get(url) @@ -60,3 +59,30 @@ def test_status(self, mock_logger, mock_db_connections, mock_redis): self.assertDictEqual( response.json(), {'version': 'v1.0', 'dependent_services_ok': True, 'secondary_services_ok': True}) mock_logger.error.assert_not_called() + + +class ElasticsearchStatusTest(TestCase, StatusTest): + + SEARCH_BACKEND_ERROR = 'No response from elasticsearch ping' + + @mock.patch('seqr.utils.search.elasticsearch.es_utils.ELASTICSEARCH_SERVICE_HOSTNAME', 'testhost') + def test_status(self, *args): + super(ElasticsearchStatusTest, self).test_status(*args) + + @staticmethod + def add_search_backend_success(): + urllib3_responses.add(urllib3_responses.HEAD, '/', status=200) + + +class HailSearchStatusTest(TestCase, StatusTest): + + SEARCH_BACKEND_ERROR = '400 Client Error: Bad Request for url: http://test-hail:5000/status' + + @mock.patch('seqr.utils.search.elasticsearch.es_utils.ELASTICSEARCH_SERVICE_HOSTNAME', '') + @mock.patch('seqr.utils.search.hail_search_utils.HAIL_BACKEND_SERVICE_HOSTNAME', 'http://test-hail') + def test_status(self, *args): + super(HailSearchStatusTest, self).test_status(*args) + + @staticmethod + def add_search_backend_success(): + responses.add(responses.GET, 'http://test-hail:5000/status', status=200) From 4b98a3944d9a8db87ba9c20944b878b2765b16c0 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 1 Jun 2023 16:28:43 -0400 Subject: [PATCH 07/55] add conditional status check for kibana --- seqr/utils/search/elasticsearch/es_utils.py | 9 ++++- seqr/utils/search/utils.py | 6 ++- seqr/views/status.py | 13 +++--- seqr/views/status_tests.py | 45 ++++++++++----------- 4 files changed, 40 insertions(+), 33 deletions(-) diff --git a/seqr/utils/search/elasticsearch/es_utils.py b/seqr/utils/search/elasticsearch/es_utils.py index 17274cd039..26966dba89 100644 --- a/seqr/utils/search/elasticsearch/es_utils.py +++ b/seqr/utils/search/elasticsearch/es_utils.py @@ -2,6 +2,7 @@ from elasticsearch import Elasticsearch from elasticsearch.exceptions import ConnectionError as EsConnectionError, TransportError import elasticsearch_dsl +from urllib3.connectionpool import connection_from_url from seqr.models import Sample from seqr.utils.redis_utils import safe_redis_get_json, safe_redis_set_json @@ -10,7 +11,7 @@ from seqr.utils.search.elasticsearch.es_search import EsSearch, get_compound_het_page from seqr.views.utils.json_utils import _to_camel_case from settings import ELASTICSEARCH_SERVICE_HOSTNAME, ELASTICSEARCH_SERVICE_PORT, ELASTICSEARCH_CREDENTIALS, \ - ELASTICSEARCH_PROTOCOL, ES_SSL_CONTEXT + ELASTICSEARCH_PROTOCOL, ES_SSL_CONTEXT, KIBANA_SERVER class InvalidIndexException(Exception): @@ -67,6 +68,12 @@ def ping_elasticsearch(): raise ValueError('No response from elasticsearch ping') +def ping_kibana(): + resp = connection_from_url('http://{}'.format(KIBANA_SERVER)).urlopen('HEAD', '/status', timeout=3, retries=3) + if resp.status >= 400: + raise ValueError('Kibana Error {}: {}'.format(resp.status, resp.reason)) + + SAMPLE_FIELDS_LIST = ['samples', 'samples_num_alt_1'] # support .bgz instead of requiring .vcf.bgz due to issues with DSP delivery of large callsets DATASET_FILE_EXTENSIONS = VCF_FILE_EXTENSIONS[:-1] + ('.bgz', '.bed', '.mt') diff --git a/seqr/utils/search/utils.py b/seqr/utils/search/utils.py index 8347e1cd79..880c8d25d8 100644 --- a/seqr/utils/search/utils.py +++ b/seqr/utils/search/utils.py @@ -8,7 +8,7 @@ from seqr.utils.search.elasticsearch.constants import MAX_VARIANTS from seqr.utils.search.elasticsearch.es_utils import ping_elasticsearch, delete_es_index, get_elasticsearch_status, \ get_es_variants, get_es_variants_for_variant_ids, process_es_previously_loaded_results, process_es_previously_loaded_gene_aggs, \ - es_backend_enabled, ES_EXCEPTION_ERROR_MAP, ES_EXCEPTION_MESSAGE_MAP, ES_ERROR_LOG_EXCEPTIONS + es_backend_enabled, ping_kibana, ES_EXCEPTION_ERROR_MAP, ES_EXCEPTION_MESSAGE_MAP, ES_ERROR_LOG_EXCEPTIONS from seqr.utils.search.hail_search_utils import get_hail_variants, ping_hail_backend from seqr.utils.gene_utils import parse_locus_list_items from seqr.utils.xpos_utils import get_xpos @@ -53,6 +53,10 @@ def ping_search_backend(): backend_specific_call(ping_elasticsearch, ping_hail_backend)() +def ping_search_backend_admin(): + backend_specific_call(ping_kibana, lambda: True)() + + def get_search_backend_status(): return backend_specific_call(get_elasticsearch_status)() diff --git a/seqr/views/status.py b/seqr/views/status.py index a1a72e7de1..4775d94444 100644 --- a/seqr/views/status.py +++ b/seqr/views/status.py @@ -1,10 +1,9 @@ from django.db import connections import logging import redis -from urllib3.connectionpool import connection_from_url -from settings import SEQR_VERSION, KIBANA_SERVER, REDIS_SERVICE_HOSTNAME, REDIS_SERVICE_PORT, DATABASES -from seqr.utils.search.utils import ping_search_backend +from settings import SEQR_VERSION, REDIS_SERVICE_HOSTNAME, REDIS_SERVICE_PORT, DATABASES +from seqr.utils.search.utils import ping_search_backend, ping_search_backend_admin from seqr.views.utils.json_utils import create_json_response logger = logging.getLogger(__name__) @@ -37,14 +36,12 @@ def status_view(request): dependent_services_ok = False logger.error('Search backend connection error: {}'.format(str(e))) - # Test kibana connection + # Test search admin view connection try: - resp = connection_from_url('http://{}'.format(KIBANA_SERVER)).urlopen('HEAD', '/status', timeout=3, retries=3) - if resp.status >= 400: - raise ValueError('Error {}: {}'.format(resp.status, resp.reason)) + ping_search_backend_admin() except Exception as e: secondary_services_ok = False - logger.error('Kibana connection error: {}'.format(str(e))) + logger.error('Search Admin connection error: {}'.format(str(e))) return create_json_response( diff --git a/seqr/views/status_tests.py b/seqr/views/status_tests.py index 4754453ab7..154a680b99 100644 --- a/seqr/views/status_tests.py +++ b/seqr/views/status_tests.py @@ -10,18 +10,20 @@ class StatusTest(object): - def _test_status_error(self, url, mock_logger, search_backend_error): + def _test_status_error(self, url, mock_logger): response = self.client.get(url) self.assertEqual(response.status_code, 400) self.assertDictEqual( response.json(), {'version': 'v1.0', 'dependent_services_ok': False, 'secondary_services_ok': False}) - mock_logger.error.assert_has_calls([ + calls = [ mock.call('Database "default" connection error: No connection'), mock.call('Database "reference_data" connection error: No connection'), mock.call('Redis connection error: Bad connection'), - mock.call(f'Search backend connection error: {search_backend_error}'), - mock.call('Kibana connection error: Connection refused: HEAD /status'), - ]) + mock.call(f'Search backend connection error: {self.SEARCH_BACKEND_ERROR}'), + ] + if self.HAS_KIBANA: + calls.append(mock.call('Search Admin connection error: Connection refused: HEAD /status')) + mock_logger.error.assert_has_calls(calls) mock_logger.reset_mock() @mock.patch('seqr.views.status.redis.StrictRedis') @@ -36,25 +38,28 @@ def test_status(self, mock_logger, mock_db_connections, mock_redis): mock_redis.return_value.ping.side_effect = HTTPError('Bad connection') responses.add(responses.GET, 'http://test-hail:5000/status', status=400) - self._test_status_error(url, mock_logger, search_backend_error=self.SEARCH_BACKEND_ERROR) + self._test_status_error(url, mock_logger) mock_db_connections.__getitem__.return_value.cursor.side_effect = None mock_redis.return_value.ping.side_effect = None - self.add_search_backend_success() + responses.add(responses.GET, 'http://test-hail:5000/status', status=200) + urllib3_responses.add(urllib3_responses.HEAD, '/', status=200) urllib3_responses.add(urllib3_responses.HEAD, '/status', status=500) response = self.client.get(url) self.assertEqual(response.status_code, 200) - self.assertDictEqual( - response.json(), {'version': 'v1.0', 'dependent_services_ok': True, 'secondary_services_ok': False}) - mock_logger.error.assert_has_calls([ - mock.call('Kibana connection error: Error 500: Internal Server Error'), - ]) + if self.HAS_KIBANA: + self.assertDictEqual( + response.json(), {'version': 'v1.0', 'dependent_services_ok': True, 'secondary_services_ok': False}) + mock_logger.error.assert_has_calls([ + mock.call('Search Admin connection error: Kibana Error 500: Internal Server Error'), + ]) - mock_logger.reset_mock() - urllib3_responses.replace_json('/status', {'success': True}, method=urllib3_responses.HEAD, status=200) + mock_logger.reset_mock() + urllib3_responses.replace_json('/status', {'success': True}, method=urllib3_responses.HEAD, status=200) + + response = self.client.get(url) - response = self.client.get(url) self.assertEqual(response.status_code, 200) self.assertDictEqual( response.json(), {'version': 'v1.0', 'dependent_services_ok': True, 'secondary_services_ok': True}) @@ -64,25 +69,19 @@ def test_status(self, mock_logger, mock_db_connections, mock_redis): class ElasticsearchStatusTest(TestCase, StatusTest): SEARCH_BACKEND_ERROR = 'No response from elasticsearch ping' + HAS_KIBANA = True @mock.patch('seqr.utils.search.elasticsearch.es_utils.ELASTICSEARCH_SERVICE_HOSTNAME', 'testhost') def test_status(self, *args): super(ElasticsearchStatusTest, self).test_status(*args) - @staticmethod - def add_search_backend_success(): - urllib3_responses.add(urllib3_responses.HEAD, '/', status=200) - class HailSearchStatusTest(TestCase, StatusTest): SEARCH_BACKEND_ERROR = '400 Client Error: Bad Request for url: http://test-hail:5000/status' + HAS_KIBANA = False @mock.patch('seqr.utils.search.elasticsearch.es_utils.ELASTICSEARCH_SERVICE_HOSTNAME', '') @mock.patch('seqr.utils.search.hail_search_utils.HAIL_BACKEND_SERVICE_HOSTNAME', 'http://test-hail') def test_status(self, *args): super(HailSearchStatusTest, self).test_status(*args) - - @staticmethod - def add_search_backend_success(): - responses.add(responses.GET, 'http://test-hail:5000/status', status=200) From 7dc95bc1ca24c824acf75b3225972ba9dc8c3473 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 1 Jun 2023 16:44:27 -0400 Subject: [PATCH 08/55] explicitly set hail backend fallback --- seqr/management/tests/lift_project_to_hg38_tests.py | 2 +- seqr/utils/search/add_data_utils.py | 9 ++++++++- seqr/utils/search/utils.py | 12 ++++++------ seqr/views/apis/dataset_api_tests.py | 2 +- 4 files changed, 16 insertions(+), 9 deletions(-) diff --git a/seqr/management/tests/lift_project_to_hg38_tests.py b/seqr/management/tests/lift_project_to_hg38_tests.py index 99e74cc180..0342bd0fac 100644 --- a/seqr/management/tests/lift_project_to_hg38_tests.py +++ b/seqr/management/tests/lift_project_to_hg38_tests.py @@ -167,7 +167,7 @@ def test_command_other_exceptions(self, mock_liftover, mock_single_es_variants, with self.assertRaises(Exception) as ce: call_command('lift_project_to_hg38', '--project={}'.format(PROJECT_NAME), '--es-index={}'.format(ELASTICSEARCH_INDEX)) - self.assertEqual(str(ce.exception), 'Elasticsearch backend is disabled') + self.assertEqual(str(ce.exception), 'Adding samples is disabled for the hail backend') # Test discontinue on a failed lift mock_liftover_to_38 = mock_liftover.return_value diff --git a/seqr/utils/search/add_data_utils.py b/seqr/utils/search/add_data_utils.py index 33092cb1fd..4dabd97e23 100644 --- a/seqr/utils/search/add_data_utils.py +++ b/seqr/utils/search/add_data_utils.py @@ -4,12 +4,19 @@ from seqr.views.utils.dataset_utils import match_and_update_search_samples, load_mapping_file +def _hail_backend_error(*args, **kwargs): + raise ValueError('Adding samples is disabled for the hail backend') + + def add_new_search_samples(request_json, project, user, summary_template=None, expected_families=None): dataset_type = request_json.get('datasetType') if dataset_type not in Sample.DATASET_TYPE_LOOKUP: raise ValueError(f'Invalid dataset type "{dataset_type}"') - sample_ids, sample_type, sample_data = backend_specific_call(validate_es_index_metadata_and_get_samples)(request_json, project) + sample_ids, sample_type, sample_data = backend_specific_call( + validate_es_index_metadata_and_get_samples, + _hail_backend_error, + )(request_json, project) if not sample_ids: raise ValueError('No samples found. Make sure the specified caller type is correct') diff --git a/seqr/utils/search/utils.py b/seqr/utils/search/utils.py index 880c8d25d8..923c53a5e1 100644 --- a/seqr/utils/search/utils.py +++ b/seqr/utils/search/utils.py @@ -38,11 +38,11 @@ class InvalidSearchException(Exception): DATASET_TYPES_LOOKUP[ALL_DATA_TYPES] = [dt for dts in DATASET_TYPES_LOOKUP.values() for dt in dts] -def _no_backend_error(*args, **kwargs): +def _no_es_error(*args, **kwargs): raise InvalidSearchException('Elasticsearch backend is disabled') -def backend_specific_call(es_func, other_func=_no_backend_error): +def backend_specific_call(es_func, other_func): if es_backend_enabled(): return es_func else: @@ -58,7 +58,7 @@ def ping_search_backend_admin(): def get_search_backend_status(): - return backend_specific_call(get_elasticsearch_status)() + return backend_specific_call(get_elasticsearch_status, _no_es_error)() def _get_filtered_search_samples(search_filter, active_only=True): @@ -103,11 +103,11 @@ def delete_search_backend_data(data_id): projects = set(active_samples.values_list('individual__family__project__name', flat=True)) raise InvalidSearchException(f'"{data_id}" is still used by: {", ".join(projects)}') - return backend_specific_call(delete_es_index)(data_id) + return backend_specific_call(delete_es_index, _no_es_error)(data_id) def get_single_variant(families, variant_id, return_all_queried_families=False, user=None): - variants = backend_specific_call(get_es_variants_for_variant_ids)( + variants = backend_specific_call(get_es_variants_for_variant_ids, _no_es_error)( # TODO *_get_families_search_data(families), [variant_id], user, return_all_queried_families=return_all_queried_families, ) if not variants: @@ -116,7 +116,7 @@ def get_single_variant(families, variant_id, return_all_queried_families=False, def get_variants_for_variant_ids(families, variant_ids, dataset_type=None, user=None): - return backend_specific_call(get_es_variants_for_variant_ids)( + return backend_specific_call(get_es_variants_for_variant_ids, _no_es_error)( # TODO *_get_families_search_data(families), variant_ids, user, dataset_type=dataset_type, ) diff --git a/seqr/views/apis/dataset_api_tests.py b/seqr/views/apis/dataset_api_tests.py index a63e24593a..df71ece234 100644 --- a/seqr/views/apis/dataset_api_tests.py +++ b/seqr/views/apis/dataset_api_tests.py @@ -311,7 +311,7 @@ def test_add_variants_dataset_errors(self): with mock.patch('seqr.utils.search.elasticsearch.es_utils.ELASTICSEARCH_SERVICE_HOSTNAME', ''): response = self.client.post(url, content_type='application/json', data=ADD_DATASET_PAYLOAD) self.assertEqual(response.status_code, 400) - self.assertEqual(response.json()['error'], 'Elasticsearch backend is disabled') + self.assertEqual(response.json()['errors'][0], 'Adding samples is disabled for the hail backend') response = self.client.post(url, content_type='application/json', data=ADD_DATASET_PAYLOAD) self.assertEqual(response.status_code, 400) From ddcd3ade48ff2a2921f59d918f1b9534e8c54aef Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 2 Jun 2023 12:01:40 -0400 Subject: [PATCH 09/55] add explicit errors for no hail backend cases --- seqr/utils/search/utils.py | 16 ++++++++++------ seqr/views/apis/data_manager_api_tests.py | 4 ++-- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/seqr/utils/search/utils.py b/seqr/utils/search/utils.py index bb83c128eb..b3e6c227f4 100644 --- a/seqr/utils/search/utils.py +++ b/seqr/utils/search/utils.py @@ -38,8 +38,10 @@ class InvalidSearchException(Exception): DATASET_TYPES_LOOKUP[ALL_DATA_TYPES] = [dt for dts in DATASET_TYPES_LOOKUP.values() for dt in dts] -def _no_es_error(*args, **kwargs): - raise InvalidSearchException('Elasticsearch backend is disabled') +def _raise_search_error(error): + def _wrapped(*args, **kwargs): + raise InvalidSearchException(error) + return _wrapped def backend_specific_call(es_func, other_func): @@ -58,7 +60,7 @@ def ping_search_backend_admin(): def get_search_backend_status(): - return backend_specific_call(get_elasticsearch_status, _no_es_error)() + return backend_specific_call(get_elasticsearch_status, _raise_search_error('Elasticsearch is disabled'))() def _get_filtered_search_samples(search_filter, active_only=True): @@ -103,11 +105,13 @@ def delete_search_backend_data(data_id): projects = set(active_samples.values_list('individual__family__project__name', flat=True)) raise InvalidSearchException(f'"{data_id}" is still used by: {", ".join(projects)}') - return backend_specific_call(delete_es_index, _no_es_error)(data_id) + return backend_specific_call( + delete_es_index, _raise_search_error('Deleting indices is disabled for the hail backend'), + )(data_id) def get_single_variant(families, variant_id, return_all_queried_families=False, user=None): - variants = backend_specific_call(get_es_variants_for_variant_ids, _no_es_error)( # TODO + variants = backend_specific_call(get_es_variants_for_variant_ids, _raise_search_error('Elasticsearch backend is disabled'))( # TODO *_get_families_search_data(families), [variant_id], user, return_all_queried_families=return_all_queried_families, ) if not variants: @@ -116,7 +120,7 @@ def get_single_variant(families, variant_id, return_all_queried_families=False, def get_variants_for_variant_ids(families, variant_ids, dataset_type=None, user=None): - return backend_specific_call(get_es_variants_for_variant_ids, _no_es_error)( # TODO + return backend_specific_call(get_es_variants_for_variant_ids, _raise_search_error('Elasticsearch backend is disabled'))( # TODO *_get_families_search_data(families), variant_ids, user, dataset_type=dataset_type, ) diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py index 8d77c0ea60..c92d3c3189 100644 --- a/seqr/views/apis/data_manager_api_tests.py +++ b/seqr/views/apis/data_manager_api_tests.py @@ -412,7 +412,7 @@ def test_elasticsearch_status(self): with mock.patch('seqr.utils.search.elasticsearch.es_utils.ELASTICSEARCH_SERVICE_HOSTNAME', ''): response = self.client.get(url) self.assertEqual(response.status_code, 400) - self.assertEqual(response.json()['error'], 'Elasticsearch backend is disabled') + self.assertEqual(response.json()['error'], 'Elasticsearch is disabled') @mock.patch('seqr.utils.search.elasticsearch.es_utils.ELASTICSEARCH_SERVICE_HOSTNAME', 'testhost') @urllib3_responses.activate @@ -446,7 +446,7 @@ def test_delete_index(self): with mock.patch('seqr.utils.search.elasticsearch.es_utils.ELASTICSEARCH_SERVICE_HOSTNAME', ''): response = self.client.post(url, content_type='application/json', data=json.dumps({'index': 'unused_index'})) self.assertEqual(response.status_code, 400) - self.assertEqual(response.json()['error'], 'Elasticsearch backend is disabled') + self.assertEqual(response.json()['error'], 'Deleting indices is disabled for the hail backend') @mock.patch('seqr.utils.file_utils.subprocess.Popen') def test_upload_qc_pipeline_output(self, mock_subprocess): From 38f4eaed127874cee337711cce65ea24aa317206 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 2 Jun 2023 12:06:08 -0400 Subject: [PATCH 10/55] codacy fix --- seqr/utils/search/hail_search_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seqr/utils/search/hail_search_utils.py b/seqr/utils/search/hail_search_utils.py index 5eab257b80..33528179db 100644 --- a/seqr/utils/search/hail_search_utils.py +++ b/seqr/utils/search/hail_search_utils.py @@ -14,7 +14,7 @@ def _hail_backend_url(path): def ping_hail_backend(): - requests.get(_hail_backend_url('status')).raise_for_status() + requests.get(_hail_backend_url('status'), timeout=5).raise_for_status() def get_hail_variants(samples, search, user, previous_search_results, genome_version, sort=None, page=1, num_results=100, From 3c68b626c62debba2006724c22db7283e6071a29 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 2 Jun 2023 13:04:18 -0400 Subject: [PATCH 11/55] add handler for single variant --- seqr/utils/search/constants.py | 11 ++++ seqr/utils/search/hail_search_utils.py | 81 ++++++++++++++++++++----- seqr/utils/search/search_utils_tests.py | 7 +-- seqr/utils/search/utils.py | 26 ++++---- 4 files changed, 91 insertions(+), 34 deletions(-) diff --git a/seqr/utils/search/constants.py b/seqr/utils/search/constants.py index 47f5a2ba92..f75b8065a0 100644 --- a/seqr/utils/search/constants.py +++ b/seqr/utils/search/constants.py @@ -1,3 +1,5 @@ +from seqr.models import Sample + SEQR_DATSETS_GS_PATH = 'gs://seqr-datasets/v02' VCF_FILE_EXTENSIONS = ('.vcf', '.vcf.gz', '.vcf.bgz') @@ -16,3 +18,12 @@ NEW_SV_FIELD = 'new_structural_variants' SV_ANNOTATION_TYPES = {'structural_consequence', 'structural', NEW_SV_FIELD} ALL_DATA_TYPES = 'ALL' + +DATASET_TYPES_LOOKUP = { + data_types[0]: data_types for data_types in [ + [Sample.DATASET_TYPE_VARIANT_CALLS, Sample.DATASET_TYPE_MITO_CALLS], + [Sample.DATASET_TYPE_SV_CALLS], + ] +} +DATASET_TYPES_LOOKUP[ALL_DATA_TYPES] = [dt for dts in DATASET_TYPES_LOOKUP.values() for dt in dts] + diff --git a/seqr/utils/search/hail_search_utils.py b/seqr/utils/search/hail_search_utils.py index 33528179db..f1d32a8f4f 100644 --- a/seqr/utils/search/hail_search_utils.py +++ b/seqr/utils/search/hail_search_utils.py @@ -4,7 +4,7 @@ import requests from reference_data.models import Omim, GeneConstraint, GENOME_VERSION_LOOKUP from seqr.models import Sample, PhenotypePrioritization -from seqr.utils.search.constants import PRIORITIZED_GENE_SORT +from seqr.utils.search.constants import PRIORITIZED_GENE_SORT, DATASET_TYPES_LOOKUP from seqr.utils.xpos_utils import MIN_POS, MAX_POS from settings import HAIL_BACKEND_SERVICE_HOSTNAME, HAIL_BACKEND_SERVICE_PORT @@ -13,37 +13,34 @@ def _hail_backend_url(path): return f'{HAIL_BACKEND_SERVICE_HOSTNAME}:{HAIL_BACKEND_SERVICE_PORT}/{path}' +def _execute_search(search_body, path='search'): + response = requests.post(_hail_backend_url(path), json=search_body, timeout=300) + response.raise_for_status() + return response.json() + + def ping_hail_backend(): requests.get(_hail_backend_url('status'), timeout=5).raise_for_status() def get_hail_variants(samples, search, user, previous_search_results, genome_version, sort=None, page=1, num_results=100, gene_agg=False, **kwargs): - end_offset = num_results * page - search_body = { - 'requester_email': user.email, - 'genome_version': GENOME_VERSION_LOOKUP[genome_version], + search_body = _format_search_body(samples, genome_version, user, end_offset, search) + + search_body.update({ 'sort': sort, 'sort_metadata': _get_sort_metadata(sort, samples), - 'num_results': end_offset, - } - search_body.update(search) - search_body.update({ 'frequencies': search_body.pop('freqs', None), 'quality_filter': search_body.pop('qualityFilter', None), 'custom_query': search_body.pop('customQuery', None), }) search_body.pop('skipped_samples', None) - search_body['sample_data'] = _get_sample_data(samples, search_body.get('inheritance_filter')) - _parse_location_search(search_body) path = 'gene_counts' if gene_agg else 'search' - response = requests.post(_hail_backend_url(path), json=search_body, timeout=300) - response.raise_for_status() - response_json = response.json() + response_json = _execute_search(search_body, path) if gene_agg: previous_search_results['gene_aggs'] = response_json @@ -54,6 +51,45 @@ def get_hail_variants(samples, search, user, previous_search_results, genome_ver return response_json['results'][end_offset - num_results:end_offset] +def get_hail_variants_for_variant_ids(samples, genome_version, raw_variant_ids, user, return_all_queried_families=False): + variant_ids = [] + variant_keys = [] + from seqr.utils.search.utils import parse_variant_id + for variant_id in raw_variant_ids: + try: + variant_ids.append(parse_variant_id(variant_id)) + except (KeyError, ValueError): + variant_keys.append(variant_id) + + dataset_types = set() + if variant_keys: + dataset_types.update(DATASET_TYPES_LOOKUP[Sample.DATASET_TYPE_SV_CALLS]) + if variant_ids: + dataset_types.update(DATASET_TYPES_LOOKUP[Sample.DATASET_TYPE_VARIANT_CALLS]) + + search_body = _format_search_body( + samples.filter(dataset_type__in=dataset_types), genome_version, user, len(raw_variant_ids), { + 'variant_ids': variant_ids, 'variant_keys': variant_keys, + }) + response_json = _execute_search(search_body) + + if return_all_queried_families: + _validate_expected_families(response_json['results'], {s['family_guid'] for s in search_body['sample_data']}) + + return response_json['results'] + + +def _format_search_body(samples, genome_version, user, num_results, search): + search_body = { + 'requester_email': user.email, + 'genome_version': GENOME_VERSION_LOOKUP[genome_version], + 'num_results': num_results, + } + search_body.update(search) + search_body['sample_data'] = _get_sample_data(samples, search_body.get('inheritance_filter')) + return search_body + + def _get_sample_data(samples, inheritance_filter): sample_data = samples.order_by('id').values( 'sample_id', 'dataset_type', 'sample_type', @@ -129,3 +165,20 @@ def _format_interval(chrom=None, start=None, end=None, offset=None, **kwargs): start = max(start - offset_pos, MIN_POS) end = min(end + offset_pos, MAX_POS) return f'{chrom}:{start}-{end}' + + +def _validate_expected_families(results, expected_families): + # In the ES backed we could force return variants even if all families are hom ref + # This is not possible in the hail backend as those rows are removed at loading, so fail if missing + invalid_family_variants = [] + for result in results: + missing_families = expected_families - set(result['familyGuids']) + if missing_families: + invalid_family_variants.append((result['variantId'], missing_families)) + + if invalid_family_variants: + from seqr.utils.search.utils import InvalidSearchException + missing = ', '.join([ + f'{variant_id} ({"; ".join(sorted(families))})' for variant_id, families in invalid_family_variants + ]) + raise InvalidSearchException(f'Unable to return all families for the following variants: {missing}') diff --git a/seqr/utils/search/search_utils_tests.py b/seqr/utils/search/search_utils_tests.py index 34213fb4a0..30a66f4177 100644 --- a/seqr/utils/search/search_utils_tests.py +++ b/seqr/utils/search/search_utils_tests.py @@ -435,12 +435,9 @@ class HailSearchUtilsTests(TestCase, SearchUtilsTests): def setUp(self): self.set_up() - @mock.patch('seqr.utils.search.utils.ping_elasticsearch') + @mock.patch('seqr.utils.search.utils.get_hail_variants_for_variant_ids') def test_get_single_variant(self, mock_call): - with self.assertRaises(InvalidSearchException) as cm: - super(HailSearchUtilsTests, self).test_get_single_variant(mock_call) - self.assertEqual(str(cm.exception), 'Elasticsearch backend is disabled') - mock_call.assert_not_called() + super(HailSearchUtilsTests, self).test_get_single_variant(mock_call) @mock.patch('seqr.utils.search.utils.ping_elasticsearch') def test_get_variants_for_variant_ids(self, mock_call): diff --git a/seqr/utils/search/utils.py b/seqr/utils/search/utils.py index b3e6c227f4..1a619cbb70 100644 --- a/seqr/utils/search/utils.py +++ b/seqr/utils/search/utils.py @@ -4,12 +4,12 @@ from seqr.models import Sample, Individual, Project from seqr.utils.redis_utils import safe_redis_get_json, safe_redis_set_json from seqr.utils.search.constants import XPOS_SORT_KEY, PRIORITIZED_GENE_SORT, RECESSIVE, COMPOUND_HET, \ - MAX_NO_LOCATION_COMP_HET_FAMILIES, SV_ANNOTATION_TYPES, ALL_DATA_TYPES, MAX_EXPORT_VARIANTS + MAX_NO_LOCATION_COMP_HET_FAMILIES, SV_ANNOTATION_TYPES, ALL_DATA_TYPES, MAX_EXPORT_VARIANTS, DATASET_TYPES_LOOKUP from seqr.utils.search.elasticsearch.constants import MAX_VARIANTS from seqr.utils.search.elasticsearch.es_utils import ping_elasticsearch, delete_es_index, get_elasticsearch_status, \ get_es_variants, get_es_variants_for_variant_ids, process_es_previously_loaded_results, process_es_previously_loaded_gene_aggs, \ es_backend_enabled, ping_kibana, ES_EXCEPTION_ERROR_MAP, ES_EXCEPTION_MESSAGE_MAP, ES_ERROR_LOG_EXCEPTIONS -from seqr.utils.search.hail_search_utils import get_hail_variants, ping_hail_backend +from seqr.utils.search.hail_search_utils import get_hail_variants, get_hail_variants_for_variant_ids, ping_hail_backend from seqr.utils.gene_utils import parse_locus_list_items from seqr.utils.xpos_utils import get_xpos @@ -29,14 +29,6 @@ class InvalidSearchException(Exception): ERROR_LOG_EXCEPTIONS = set() ERROR_LOG_EXCEPTIONS.update(ES_ERROR_LOG_EXCEPTIONS) -DATASET_TYPES_LOOKUP = { - data_types[0]: data_types for data_types in [ - [Sample.DATASET_TYPE_VARIANT_CALLS, Sample.DATASET_TYPE_MITO_CALLS], - [Sample.DATASET_TYPE_SV_CALLS], - ] -} -DATASET_TYPES_LOOKUP[ALL_DATA_TYPES] = [dt for dts in DATASET_TYPES_LOOKUP.values() for dt in dts] - def _raise_search_error(error): def _wrapped(*args, **kwargs): @@ -111,7 +103,7 @@ def delete_search_backend_data(data_id): def get_single_variant(families, variant_id, return_all_queried_families=False, user=None): - variants = backend_specific_call(get_es_variants_for_variant_ids, _raise_search_error('Elasticsearch backend is disabled'))( # TODO + variants = backend_specific_call(get_es_variants_for_variant_ids, get_hail_variants_for_variant_ids)( *_get_families_search_data(families), [variant_id], user, return_all_queried_families=return_all_queried_families, ) if not variants: @@ -273,17 +265,21 @@ def _parse_variant_items(search_json): else: try: variant_id = item.lstrip('chr') - chrom, pos, ref, alt = variant_id.split('-') - pos = int(pos) - get_xpos(chrom, pos) + parsed_variant_ids.append(parse_variant_id(variant_id)) variant_ids.append(variant_id) - parsed_variant_ids.append((chrom, pos, ref, alt)) except (KeyError, ValueError): invalid_items.append(item) return rs_ids, variant_ids, parsed_variant_ids, invalid_items +def parse_variant_id(variant_id): + chrom, pos, ref, alt = variant_id.split('-') + pos = int(pos) + get_xpos(chrom, pos) + return chrom, pos, ref, alt + + def _validate_sort(sort, families): if sort == PRIORITIZED_GENE_SORT and len(families) > 1: raise InvalidSearchException('Phenotype sort is only supported for single-family search.') From 5dab8c436e017f48543e9f81e0d920429ad60251 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 2 Jun 2023 16:05:44 -0400 Subject: [PATCH 12/55] shared functionality for variant filtering --- seqr/utils/search/constants.py | 9 --- seqr/utils/search/elasticsearch/es_utils.py | 8 +-- .../search/elasticsearch/es_utils_tests.py | 12 +++- seqr/utils/search/hail_search_utils.py | 28 +++------ seqr/utils/search/search_utils_tests.py | 44 +++++++++----- seqr/utils/search/utils.py | 58 +++++++++++++------ 6 files changed, 91 insertions(+), 68 deletions(-) diff --git a/seqr/utils/search/constants.py b/seqr/utils/search/constants.py index f75b8065a0..5537d304e4 100644 --- a/seqr/utils/search/constants.py +++ b/seqr/utils/search/constants.py @@ -18,12 +18,3 @@ NEW_SV_FIELD = 'new_structural_variants' SV_ANNOTATION_TYPES = {'structural_consequence', 'structural', NEW_SV_FIELD} ALL_DATA_TYPES = 'ALL' - -DATASET_TYPES_LOOKUP = { - data_types[0]: data_types for data_types in [ - [Sample.DATASET_TYPE_VARIANT_CALLS, Sample.DATASET_TYPE_MITO_CALLS], - [Sample.DATASET_TYPE_SV_CALLS], - ] -} -DATASET_TYPES_LOOKUP[ALL_DATA_TYPES] = [dt for dts in DATASET_TYPES_LOOKUP.values() for dt in dts] - diff --git a/seqr/utils/search/elasticsearch/es_utils.py b/seqr/utils/search/elasticsearch/es_utils.py index 26966dba89..2b48a016cd 100644 --- a/seqr/utils/search/elasticsearch/es_utils.py +++ b/seqr/utils/search/elasticsearch/es_utils.py @@ -262,13 +262,11 @@ def _get_es_indices(client): return indices, seqr_index_projects -def get_es_variants_for_variant_ids(samples, genome_version, variant_ids, user, dataset_type=None, return_all_queried_families=False): +def get_es_variants_for_variant_ids(samples, genome_version, variants_by_id, user=None, return_all_queried_families=False): variants = EsSearch( samples, genome_version, user=user, return_all_queried_families=return_all_queried_families, - ).filter_by_variant_ids(variant_ids) - if dataset_type: - variants = variants.update_dataset_type(dataset_type) - return variants.search(num_results=len(variant_ids)) + ).filter_by_variant_ids(list(variants_by_id.keys())) + return variants.search(num_results=len(variants_by_id)) def get_es_variants(samples, search, user, previous_search_results, genome_version, sort=None, page=None, num_results=None, diff --git a/seqr/utils/search/elasticsearch/es_utils_tests.py b/seqr/utils/search/elasticsearch/es_utils_tests.py index c40c1c8d3e..c207a9cfec 100644 --- a/seqr/utils/search/elasticsearch/es_utils_tests.py +++ b/seqr/utils/search/elasticsearch/es_utils_tests.py @@ -732,7 +732,7 @@ def call_request_json(self, index=-1): PARSED_NO_CONSEQUENCE_FILTER_VARIANTS = deepcopy(PARSED_VARIANTS) PARSED_NO_CONSEQUENCE_FILTER_VARIANTS[1]['selectedMainTranscriptId'] = None -PARSED_NO_SORT_VARIANTS = deepcopy(PARSED_NO_CONSEQUENCE_FILTER_VARIANTS) +PARSED_NO_SORT_VARIANTS = deepcopy(PARSED_NO_CONSEQUENCE_FILTER_VARIANTS + [PARSED_SV_VARIANT]) for var in PARSED_NO_SORT_VARIANTS: del var['_sort'] @@ -1397,7 +1397,13 @@ def test_get_single_es_variant(self): self.assertDictEqual(variant, PARSED_NO_SORT_VARIANTS[1]) self.assertExecutedSearch( filters=[{'terms': {'variantId': ['2-103343353-GAGA-G']}}], - size=3, index=','.join([INDEX_NAME, MITO_WGS_INDEX_NAME, SV_INDEX_NAME]), unsorted=True, + size=2, index=','.join([INDEX_NAME, MITO_WGS_INDEX_NAME]), unsorted=True, + ) + + variant = get_single_variant(self.families, 'prefix_19107_DEL') + self.assertDictEqual(variant, PARSED_NO_SORT_VARIANTS[2]) + self.assertExecutedSearch( + filters=[{'terms': {'variantId': ['prefix_19107_DEL']}}], size=1, index=SV_INDEX_NAME, unsorted=True, ) variant = get_single_variant(self.families, '1-248367227-TC-T', return_all_queried_families=True) @@ -1409,7 +1415,7 @@ def test_get_single_es_variant(self): self.assertDictEqual(variant, all_family_variant) self.assertExecutedSearch( filters=[{'terms': {'variantId': ['1-248367227-TC-T']}}], - size=3, index=','.join([INDEX_NAME, MITO_WGS_INDEX_NAME, SV_INDEX_NAME]), unsorted=True, + size=2, index=','.join([INDEX_NAME, MITO_WGS_INDEX_NAME]), unsorted=True, ) with self.assertRaises(InvalidSearchException) as cm: diff --git a/seqr/utils/search/hail_search_utils.py b/seqr/utils/search/hail_search_utils.py index f1d32a8f4f..9dc2e67d08 100644 --- a/seqr/utils/search/hail_search_utils.py +++ b/seqr/utils/search/hail_search_utils.py @@ -4,7 +4,7 @@ import requests from reference_data.models import Omim, GeneConstraint, GENOME_VERSION_LOOKUP from seqr.models import Sample, PhenotypePrioritization -from seqr.utils.search.constants import PRIORITIZED_GENE_SORT, DATASET_TYPES_LOOKUP +from seqr.utils.search.constants import PRIORITIZED_GENE_SORT from seqr.utils.xpos_utils import MIN_POS, MAX_POS from settings import HAIL_BACKEND_SERVICE_HOSTNAME, HAIL_BACKEND_SERVICE_PORT @@ -51,26 +51,12 @@ def get_hail_variants(samples, search, user, previous_search_results, genome_ver return response_json['results'][end_offset - num_results:end_offset] -def get_hail_variants_for_variant_ids(samples, genome_version, raw_variant_ids, user, return_all_queried_families=False): - variant_ids = [] - variant_keys = [] - from seqr.utils.search.utils import parse_variant_id - for variant_id in raw_variant_ids: - try: - variant_ids.append(parse_variant_id(variant_id)) - except (KeyError, ValueError): - variant_keys.append(variant_id) - - dataset_types = set() - if variant_keys: - dataset_types.update(DATASET_TYPES_LOOKUP[Sample.DATASET_TYPE_SV_CALLS]) - if variant_ids: - dataset_types.update(DATASET_TYPES_LOOKUP[Sample.DATASET_TYPE_VARIANT_CALLS]) - - search_body = _format_search_body( - samples.filter(dataset_type__in=dataset_types), genome_version, user, len(raw_variant_ids), { - 'variant_ids': variant_ids, 'variant_keys': variant_keys, - }) +def get_hail_variants_for_variant_ids(samples, genome_version, parsed_variant_ids, user=None, return_all_queried_families=False): + search = { + 'variant_ids': [parsed_id for parsed_id in parsed_variant_ids.values() if parsed_id], + 'variant_keys': [variant_id for variant_id, parsed_id in parsed_variant_ids.itmes() if not parsed_id], + } + search_body = _format_search_body(samples, genome_version, user, len(parsed_variant_ids), search) response_json = _execute_search(search_body) if return_all_queried_families: diff --git a/seqr/utils/search/search_utils_tests.py b/seqr/utils/search/search_utils_tests.py index 30a66f4177..00097b6cdf 100644 --- a/seqr/utils/search/search_utils_tests.py +++ b/seqr/utils/search/search_utils_tests.py @@ -59,15 +59,27 @@ def test_get_single_variant(self, mock_get_variants_for_ids): variant = get_single_variant(self.families, '2-103343353-GAGA-G', user=self.user) self.assertDictEqual(variant, PARSED_VARIANTS[0]) mock_get_variants_for_ids.assert_called_with( - mock.ANY, '37', ['2-103343353-GAGA-G'], self.user, return_all_queried_families=False, + mock.ANY, '37', {'2-103343353-GAGA-G': ('2', 103343353, 'GAGA', 'G')}, user=self.user, ) - self.assertSetEqual(set(mock_get_variants_for_ids.call_args.args[0]), set(self.search_samples)) + expected_samples = { + s for s in self.search_samples if s.guid not in ['S000145_hg00731', 'S000146_hg00732', 'S000148_hg00733'] + } + self.assertSetEqual(set(mock_get_variants_for_ids.call_args.args[0]), expected_samples) get_single_variant(self.families, '2-103343353-GAGA-G', user=self.user, return_all_queried_families=True) mock_get_variants_for_ids.assert_called_with( - mock.ANY, '37', ['2-103343353-GAGA-G'], self.user, return_all_queried_families=True, + mock.ANY, '37', {'2-103343353-GAGA-G': ('2', 103343353, 'GAGA', 'G')}, user=self.user, return_all_queried_families=True, ) - self.assertSetEqual(set(mock_get_variants_for_ids.call_args.args[0]), set(self.search_samples)) + self.assertSetEqual(set(mock_get_variants_for_ids.call_args.args[0]), expected_samples) + + get_single_variant(self.families, 'prefix_19107_DEL', user=self.user) + mock_get_variants_for_ids.assert_called_with( + mock.ANY, '37', {'prefix_19107_DEL': None}, user=self.user, + ) + expected_samples = { + s for s in self.search_samples if s.guid in ['S000145_hg00731', 'S000146_hg00732', 'S000148_hg00733'] + } + self.assertSetEqual(set(mock_get_variants_for_ids.call_args.args[0]), expected_samples) mock_get_variants_for_ids.return_value = [] with self.assertRaises(InvalidSearchException) as cm: @@ -77,14 +89,23 @@ def test_get_single_variant(self, mock_get_variants_for_ids): def test_get_variants_for_variant_ids(self, mock_get_variants_for_ids): variant_ids = ['2-103343353-GAGA-G', '1-248367227-TC-T', 'prefix-938_DEL'] get_variants_for_variant_ids(self.families, variant_ids, user=self.user) - mock_get_variants_for_ids.assert_called_with(mock.ANY, '37', variant_ids, self.user, dataset_type=None) + mock_get_variants_for_ids.assert_called_with(mock.ANY, '37', { + '2-103343353-GAGA-G': ('2', 103343353, 'GAGA', 'G'), + '1-248367227-TC-T': ('1', 248367227, 'TC', 'T'), + 'prefix-938_DEL': None, + }, user=self.user) self.assertSetEqual(set(mock_get_variants_for_ids.call_args.args[0]), set(self.search_samples)) get_variants_for_variant_ids( self.families, variant_ids, user=self.user, dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS) - mock_get_variants_for_ids.assert_called_with( - mock.ANY, '37', variant_ids, self.user, dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS) - self.assertSetEqual(set(mock_get_variants_for_ids.call_args.args[0]), set(self.search_samples)) + mock_get_variants_for_ids.assert_called_with(mock.ANY, '37', { + '2-103343353-GAGA-G': ('2', 103343353, 'GAGA', 'G'), + '1-248367227-TC-T': ('1', 248367227, 'TC', 'T'), + }, user=self.user) + expected_samples = { + s for s in self.search_samples if s.guid not in ['S000145_hg00731', 'S000146_hg00732', 'S000148_hg00733'] + } + self.assertSetEqual(set(mock_get_variants_for_ids.call_args.args[0]), expected_samples) @mock.patch('seqr.utils.search.utils.MAX_NO_LOCATION_COMP_HET_FAMILIES', 1) def _test_invalid_search_params(self, search_func): @@ -439,12 +460,9 @@ def setUp(self): def test_get_single_variant(self, mock_call): super(HailSearchUtilsTests, self).test_get_single_variant(mock_call) - @mock.patch('seqr.utils.search.utils.ping_elasticsearch') + @mock.patch('seqr.utils.search.utils.get_hail_variants_for_variant_ids') def test_get_variants_for_variant_ids(self, mock_call): - with self.assertRaises(InvalidSearchException) as cm: - super(HailSearchUtilsTests, self).test_get_variants_for_variant_ids(mock_call) - self.assertEqual(str(cm.exception), 'Elasticsearch backend is disabled') - mock_call.assert_not_called() + super(HailSearchUtilsTests, self).test_get_variants_for_variant_ids(mock_call) @mock.patch('seqr.utils.search.utils.get_hail_variants') def test_query_variants(self, mock_call): diff --git a/seqr/utils/search/utils.py b/seqr/utils/search/utils.py index 1a619cbb70..d5b3bc857c 100644 --- a/seqr/utils/search/utils.py +++ b/seqr/utils/search/utils.py @@ -4,7 +4,7 @@ from seqr.models import Sample, Individual, Project from seqr.utils.redis_utils import safe_redis_get_json, safe_redis_set_json from seqr.utils.search.constants import XPOS_SORT_KEY, PRIORITIZED_GENE_SORT, RECESSIVE, COMPOUND_HET, \ - MAX_NO_LOCATION_COMP_HET_FAMILIES, SV_ANNOTATION_TYPES, ALL_DATA_TYPES, MAX_EXPORT_VARIANTS, DATASET_TYPES_LOOKUP + MAX_NO_LOCATION_COMP_HET_FAMILIES, SV_ANNOTATION_TYPES, ALL_DATA_TYPES, MAX_EXPORT_VARIANTS from seqr.utils.search.elasticsearch.constants import MAX_VARIANTS from seqr.utils.search.elasticsearch.es_utils import ping_elasticsearch, delete_es_index, get_elasticsearch_status, \ get_es_variants, get_es_variants_for_variant_ids, process_es_previously_loaded_results, process_es_previously_loaded_gene_aggs, \ @@ -29,6 +29,14 @@ class InvalidSearchException(Exception): ERROR_LOG_EXCEPTIONS = set() ERROR_LOG_EXCEPTIONS.update(ES_ERROR_LOG_EXCEPTIONS) +DATASET_TYPES_LOOKUP = { + data_types[0]: data_types for data_types in [ + [Sample.DATASET_TYPE_VARIANT_CALLS, Sample.DATASET_TYPE_MITO_CALLS], + [Sample.DATASET_TYPE_SV_CALLS], + ] +} +DATASET_TYPES_LOOKUP[ALL_DATA_TYPES] = [dt for dts in DATASET_TYPES_LOOKUP.values() for dt in dts] + def _raise_search_error(error): def _wrapped(*args, **kwargs): @@ -66,16 +74,16 @@ def get_search_samples(projects, active_only=True): return _get_filtered_search_samples({'individual__family__project__in': projects}, active_only=active_only) -def _get_families_search_data(families, dataset_types=None): +def _get_families_search_data(families, dataset_type=None): samples = _get_filtered_search_samples({'individual__family__in': families}) if len(samples) < 1: raise InvalidSearchException('No search data found for families {}'.format( ', '.join([f.family_id for f in families]))) - if dataset_types: - samples = samples.filter(dataset_type__in=dataset_types) + if dataset_type: + samples = samples.filter(dataset_type__in=DATASET_TYPES_LOOKUP[dataset_type]) if not samples: - raise InvalidSearchException(f'Unable to search against dataset type "{dataset_types[0]}"') + raise InvalidSearchException(f'Unable to search against dataset type "{dataset_type}"') projects = Project.objects.filter(family__individual__sample__in=samples).values_list('genome_version', 'name') project_versions = defaultdict(set) @@ -102,18 +110,34 @@ def delete_search_backend_data(data_id): )(data_id) -def get_single_variant(families, variant_id, return_all_queried_families=False, user=None): - variants = backend_specific_call(get_es_variants_for_variant_ids, get_hail_variants_for_variant_ids)( - *_get_families_search_data(families), [variant_id], user, return_all_queried_families=return_all_queried_families, - ) +def get_single_variant(families, variant_id, **kwargs): + variants = get_variants_for_variant_ids(families, [variant_id], **kwargs) if not variants: raise InvalidSearchException('Variant {} not found'.format(variant_id)) return variants[0] -def get_variants_for_variant_ids(families, variant_ids, dataset_type=None, user=None): - return backend_specific_call(get_es_variants_for_variant_ids, _raise_search_error('Elasticsearch backend is disabled'))( # TODO - *_get_families_search_data(families), variant_ids, user, dataset_type=dataset_type, +def get_variants_for_variant_ids(families, variant_ids, dataset_type=None, **kwargs): + parsed_variant_ids = {} + for variant_id in variant_ids: + try: + parsed_variant_ids[variant_id] = _parse_variant_id(variant_id) + except (KeyError, ValueError): + parsed_variant_ids[variant_id] = None + + if dataset_type: + def is_valid(v_id): + if dataset_type == Sample.DATASET_TYPE_VARIANT_CALLS: + return bool(v_id) + return not v_id + parsed_variant_ids = {k: v for k, v in parsed_variant_ids.items() if is_valid(v)} + elif all(v for v in parsed_variant_ids.values()): + dataset_type = Sample.DATASET_TYPE_VARIANT_CALLS + elif all(v is None for v in parsed_variant_ids.values()): + dataset_type = Sample.DATASET_TYPE_SV_CALLS + + return backend_specific_call(get_es_variants_for_variant_ids, get_hail_variants_for_variant_ids)( + *_get_families_search_data(families, dataset_type=dataset_type), parsed_variant_ids, **kwargs ) @@ -198,11 +222,11 @@ def _query_variants(search_model, user, previous_search_results, sort=None, num_ dataset_type, secondary_dataset_type = _search_dataset_type(parsed_search) parsed_search.update({'dataset_type': dataset_type, 'secondary_dataset_type': secondary_dataset_type}) - dataset_types = None + search_dataset_type = None if dataset_type and dataset_type != ALL_DATA_TYPES and (secondary_dataset_type is None or secondary_dataset_type == dataset_type): - dataset_types = DATASET_TYPES_LOOKUP[dataset_type] + search_dataset_type = dataset_type - samples, genome_version = _get_families_search_data(families, dataset_types=dataset_types) + samples, genome_version = _get_families_search_data(families, dataset_type=search_dataset_type) if parsed_search.get('inheritance'): samples = _parse_inheritance(parsed_search, samples, previous_search_results) @@ -265,7 +289,7 @@ def _parse_variant_items(search_json): else: try: variant_id = item.lstrip('chr') - parsed_variant_ids.append(parse_variant_id(variant_id)) + parsed_variant_ids.append(_parse_variant_id(variant_id)) variant_ids.append(variant_id) except (KeyError, ValueError): invalid_items.append(item) @@ -273,7 +297,7 @@ def _parse_variant_items(search_json): return rs_ids, variant_ids, parsed_variant_ids, invalid_items -def parse_variant_id(variant_id): +def _parse_variant_id(variant_id): chrom, pos, ref, alt = variant_id.split('-') pos = int(pos) get_xpos(chrom, pos) From 77752ebe4ffa5f377bacccf5af792e87c0ec9ced Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 2 Jun 2023 16:16:00 -0400 Subject: [PATCH 13/55] tet placeholder --- seqr/utils/search/hail_search_utils_tests.py | 57 ++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/seqr/utils/search/hail_search_utils_tests.py b/seqr/utils/search/hail_search_utils_tests.py index be1508459f..f5d6f1ddef 100644 --- a/seqr/utils/search/hail_search_utils_tests.py +++ b/seqr/utils/search/hail_search_utils_tests.py @@ -190,3 +190,60 @@ def test_get_variant_query_gene_counts(self): self.assertDictEqual(gene_counts, MOCK_COUNTS) self.assert_cached_results({'gene_aggs': gene_counts}) self._test_expected_search_call(sort=None) + + # TODO + # @responses.activate + # def test_get_single_variant(self, mock_get_variants_for_ids): + # mock_get_variants_for_ids.return_value = [PARSED_VARIANTS[0]] + # variant = get_single_variant(self.families, '2-103343353-GAGA-G', user=self.user) + # self.assertDictEqual(variant, PARSED_VARIANTS[0]) + # mock_get_variants_for_ids.assert_called_with( + # mock.ANY, '37', {'2-103343353-GAGA-G': ('2', 103343353, 'GAGA', 'G')}, user=self.user, + # ) + # expected_samples = { + # s for s in self.search_samples if s.guid not in ['S000145_hg00731', 'S000146_hg00732', 'S000148_hg00733'] + # } + # self.assertSetEqual(set(mock_get_variants_for_ids.call_args.args[0]), expected_samples) + # + # get_single_variant(self.families, '2-103343353-GAGA-G', user=self.user, return_all_queried_families=True) + # mock_get_variants_for_ids.assert_called_with( + # mock.ANY, '37', {'2-103343353-GAGA-G': ('2', 103343353, 'GAGA', 'G')}, user=self.user, return_all_queried_families=True, + # ) + # self.assertSetEqual(set(mock_get_variants_for_ids.call_args.args[0]), expected_samples) + # + # get_single_variant(self.families, 'prefix_19107_DEL', user=self.user) + # mock_get_variants_for_ids.assert_called_with( + # mock.ANY, '37', {'prefix_19107_DEL': None}, user=self.user, + # ) + # expected_samples = { + # s for s in self.search_samples if s.guid in ['S000145_hg00731', 'S000146_hg00732', 'S000148_hg00733'] + # } + # self.assertSetEqual(set(mock_get_variants_for_ids.call_args.args[0]), expected_samples) + # + # mock_get_variants_for_ids.return_value = [] + # with self.assertRaises(InvalidSearchException) as cm: + # get_single_variant(self.families, '10-10334333-A-G') + # self.assertEqual(str(cm.exception), 'Variant 10-10334333-A-G not found') + + # TODO + # @responses.activate + # def test_get_variants_for_variant_ids(self): + # variant_ids = ['2-103343353-GAGA-G', '1-248367227-TC-T', 'prefix-938_DEL'] + # get_variants_for_variant_ids(self.families, variant_ids, user=self.user) + # mock_get_variants_for_ids.assert_called_with(mock.ANY, '37', { + # '2-103343353-GAGA-G': ('2', 103343353, 'GAGA', 'G'), + # '1-248367227-TC-T': ('1', 248367227, 'TC', 'T'), + # 'prefix-938_DEL': None, + # }, user=self.user) + # self.assertSetEqual(set(mock_get_variants_for_ids.call_args.args[0]), set(self.search_samples)) + # + # get_variants_for_variant_ids( + # self.families, variant_ids, user=self.user, dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS) + # mock_get_variants_for_ids.assert_called_with(mock.ANY, '37', { + # '2-103343353-GAGA-G': ('2', 103343353, 'GAGA', 'G'), + # '1-248367227-TC-T': ('1', 248367227, 'TC', 'T'), + # }, user=self.user) + # expected_samples = { + # s for s in self.search_samples if s.guid not in ['S000145_hg00731', 'S000146_hg00732', 'S000148_hg00733'] + # } + # self.assertSetEqual(set(mock_get_variants_for_ids.call_args.args[0]), expected_samples) From 776a88c5819056661c0ad4ccbf2a4eae723c9ce1 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 2 Jun 2023 16:22:40 -0400 Subject: [PATCH 14/55] do not allow generic kwargs --- seqr/utils/search/elasticsearch/es_utils.py | 2 +- seqr/utils/search/hail_search_utils.py | 2 +- seqr/utils/search/search_utils_tests.py | 10 +++++----- seqr/utils/search/utils.py | 14 ++++++++++---- 4 files changed, 17 insertions(+), 11 deletions(-) diff --git a/seqr/utils/search/elasticsearch/es_utils.py b/seqr/utils/search/elasticsearch/es_utils.py index 2b48a016cd..5f4c895e8f 100644 --- a/seqr/utils/search/elasticsearch/es_utils.py +++ b/seqr/utils/search/elasticsearch/es_utils.py @@ -262,7 +262,7 @@ def _get_es_indices(client): return indices, seqr_index_projects -def get_es_variants_for_variant_ids(samples, genome_version, variants_by_id, user=None, return_all_queried_families=False): +def get_es_variants_for_variant_ids(samples, genome_version, variants_by_id, user, return_all_queried_families=False): variants = EsSearch( samples, genome_version, user=user, return_all_queried_families=return_all_queried_families, ).filter_by_variant_ids(list(variants_by_id.keys())) diff --git a/seqr/utils/search/hail_search_utils.py b/seqr/utils/search/hail_search_utils.py index 9dc2e67d08..ac5d4d17d8 100644 --- a/seqr/utils/search/hail_search_utils.py +++ b/seqr/utils/search/hail_search_utils.py @@ -51,7 +51,7 @@ def get_hail_variants(samples, search, user, previous_search_results, genome_ver return response_json['results'][end_offset - num_results:end_offset] -def get_hail_variants_for_variant_ids(samples, genome_version, parsed_variant_ids, user=None, return_all_queried_families=False): +def get_hail_variants_for_variant_ids(samples, genome_version, parsed_variant_ids, user, return_all_queried_families=False): search = { 'variant_ids': [parsed_id for parsed_id in parsed_variant_ids.values() if parsed_id], 'variant_keys': [variant_id for variant_id, parsed_id in parsed_variant_ids.itmes() if not parsed_id], diff --git a/seqr/utils/search/search_utils_tests.py b/seqr/utils/search/search_utils_tests.py index 00097b6cdf..afb0816f98 100644 --- a/seqr/utils/search/search_utils_tests.py +++ b/seqr/utils/search/search_utils_tests.py @@ -59,7 +59,7 @@ def test_get_single_variant(self, mock_get_variants_for_ids): variant = get_single_variant(self.families, '2-103343353-GAGA-G', user=self.user) self.assertDictEqual(variant, PARSED_VARIANTS[0]) mock_get_variants_for_ids.assert_called_with( - mock.ANY, '37', {'2-103343353-GAGA-G': ('2', 103343353, 'GAGA', 'G')}, user=self.user, + mock.ANY, '37', {'2-103343353-GAGA-G': ('2', 103343353, 'GAGA', 'G')}, self.user, return_all_queried_families=False, ) expected_samples = { s for s in self.search_samples if s.guid not in ['S000145_hg00731', 'S000146_hg00732', 'S000148_hg00733'] @@ -68,13 +68,13 @@ def test_get_single_variant(self, mock_get_variants_for_ids): get_single_variant(self.families, '2-103343353-GAGA-G', user=self.user, return_all_queried_families=True) mock_get_variants_for_ids.assert_called_with( - mock.ANY, '37', {'2-103343353-GAGA-G': ('2', 103343353, 'GAGA', 'G')}, user=self.user, return_all_queried_families=True, + mock.ANY, '37', {'2-103343353-GAGA-G': ('2', 103343353, 'GAGA', 'G')}, self.user, return_all_queried_families=True, ) self.assertSetEqual(set(mock_get_variants_for_ids.call_args.args[0]), expected_samples) get_single_variant(self.families, 'prefix_19107_DEL', user=self.user) mock_get_variants_for_ids.assert_called_with( - mock.ANY, '37', {'prefix_19107_DEL': None}, user=self.user, + mock.ANY, '37', {'prefix_19107_DEL': None}, self.user, return_all_queried_families=False, ) expected_samples = { s for s in self.search_samples if s.guid in ['S000145_hg00731', 'S000146_hg00732', 'S000148_hg00733'] @@ -93,7 +93,7 @@ def test_get_variants_for_variant_ids(self, mock_get_variants_for_ids): '2-103343353-GAGA-G': ('2', 103343353, 'GAGA', 'G'), '1-248367227-TC-T': ('1', 248367227, 'TC', 'T'), 'prefix-938_DEL': None, - }, user=self.user) + }, self.user) self.assertSetEqual(set(mock_get_variants_for_ids.call_args.args[0]), set(self.search_samples)) get_variants_for_variant_ids( @@ -101,7 +101,7 @@ def test_get_variants_for_variant_ids(self, mock_get_variants_for_ids): mock_get_variants_for_ids.assert_called_with(mock.ANY, '37', { '2-103343353-GAGA-G': ('2', 103343353, 'GAGA', 'G'), '1-248367227-TC-T': ('1', 248367227, 'TC', 'T'), - }, user=self.user) + }, self.user) expected_samples = { s for s in self.search_samples if s.guid not in ['S000145_hg00731', 'S000146_hg00732', 'S000148_hg00733'] } diff --git a/seqr/utils/search/utils.py b/seqr/utils/search/utils.py index d5b3bc857c..ddb5386efc 100644 --- a/seqr/utils/search/utils.py +++ b/seqr/utils/search/utils.py @@ -110,14 +110,20 @@ def delete_search_backend_data(data_id): )(data_id) -def get_single_variant(families, variant_id, **kwargs): - variants = get_variants_for_variant_ids(families, [variant_id], **kwargs) +def get_single_variant(families, variant_id, return_all_queried_families=False, user=None): + variants = _get_variants_for_variant_ids( + families, [variant_id], user, return_all_queried_families=return_all_queried_families, + ) if not variants: raise InvalidSearchException('Variant {} not found'.format(variant_id)) return variants[0] -def get_variants_for_variant_ids(families, variant_ids, dataset_type=None, **kwargs): +def get_variants_for_variant_ids(families, variant_ids, dataset_type=None, user=None): + return _get_variants_for_variant_ids(families, variant_ids, user, dataset_type=dataset_type) + + +def _get_variants_for_variant_ids(families, variant_ids, user, dataset_type=None, **kwargs): parsed_variant_ids = {} for variant_id in variant_ids: try: @@ -137,7 +143,7 @@ def is_valid(v_id): dataset_type = Sample.DATASET_TYPE_SV_CALLS return backend_specific_call(get_es_variants_for_variant_ids, get_hail_variants_for_variant_ids)( - *_get_families_search_data(families, dataset_type=dataset_type), parsed_variant_ids, **kwargs + *_get_families_search_data(families, dataset_type=dataset_type), parsed_variant_ids, user, **kwargs ) From 2bf6a5081adc693031c4d1975bc06a0d3d7362c2 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 2 Jun 2023 16:55:41 -0400 Subject: [PATCH 15/55] add variant id test --- seqr/utils/search/hail_search_utils.py | 2 +- seqr/utils/search/hail_search_utils_tests.py | 93 +++++++++++--------- 2 files changed, 53 insertions(+), 42 deletions(-) diff --git a/seqr/utils/search/hail_search_utils.py b/seqr/utils/search/hail_search_utils.py index ac5d4d17d8..2cf46a6776 100644 --- a/seqr/utils/search/hail_search_utils.py +++ b/seqr/utils/search/hail_search_utils.py @@ -54,7 +54,7 @@ def get_hail_variants(samples, search, user, previous_search_results, genome_ver def get_hail_variants_for_variant_ids(samples, genome_version, parsed_variant_ids, user, return_all_queried_families=False): search = { 'variant_ids': [parsed_id for parsed_id in parsed_variant_ids.values() if parsed_id], - 'variant_keys': [variant_id for variant_id, parsed_id in parsed_variant_ids.itmes() if not parsed_id], + 'variant_keys': [variant_id for variant_id, parsed_id in parsed_variant_ids.items() if not parsed_id], } search_body = _format_search_body(samples, genome_version, user, len(parsed_variant_ids), search) response_json = _execute_search(search_body) diff --git a/seqr/utils/search/hail_search_utils_tests.py b/seqr/utils/search/hail_search_utils_tests.py index f5d6f1ddef..2aa89bb6f7 100644 --- a/seqr/utils/search/hail_search_utils_tests.py +++ b/seqr/utils/search/hail_search_utils_tests.py @@ -6,7 +6,8 @@ import responses from seqr.models import Family -from seqr.utils.search.utils import get_variant_query_gene_counts, query_variants +from seqr.utils.search.utils import get_variant_query_gene_counts, query_variants, get_single_variant, \ + get_variants_for_variant_ids from seqr.utils.search.search_utils_tests import SearchTestHelper, MOCK_COUNTS from seqr.views.utils.test_utils import PARSED_VARIANTS @@ -35,6 +36,14 @@ ], } +ALL_AFFECTED_SAMPLE_DATA = deepcopy(EXPECTED_SAMPLE_DATA) +ALL_AFFECTED_SAMPLE_DATA['MITO'] = [ + {'sample_id': 'HG00733', 'individual_guid': 'I000006_hg00733', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sex': 'F'}, +] +ALL_AFFECTED_SAMPLE_DATA['VARIANTS'].append({ + 'sample_id': 'NA20874', 'individual_guid': 'I000009_na20874', 'family_guid': 'F000005_5', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sex': 'M', +}) + @mock.patch('seqr.utils.search.hail_search_utils.HAIL_BACKEND_SERVICE_HOSTNAME', MOCK_HOST) class HailSearchUtilsTests(SearchTestHelper, TestCase): @@ -43,12 +52,11 @@ class HailSearchUtilsTests(SearchTestHelper, TestCase): def setUp(self): super(HailSearchUtilsTests, self).set_up() + responses.add(responses.POST, f'{MOCK_HOST}:5000/search', status=200, json={ + 'results': PARSED_VARIANTS, 'total': 5, + }) - def _test_expected_search_call(self, search_fields=None, gene_ids=None, intervals=None, exclude_intervals= None, - rs_ids=None, variant_ids=None, dataset_type=None, secondary_dataset_type=None, - frequencies=None, custom_query=None, inheritance_mode='de_novo', inheritance_filter=None, - quality_filter=None, sort='xpos', sort_metadata=None, num_results=100, - sample_data=None, omit_sample_type=None): + def _test_minimal_search_call(self, search_body, num_results=100, sample_data=None, omit_sample_type=None): sample_data = sample_data or EXPECTED_SAMPLE_DATA if omit_sample_type: sample_data = {k: v for k, v in sample_data.items() if k != omit_sample_type} @@ -57,9 +65,24 @@ def _test_expected_search_call(self, search_fields=None, gene_ids=None, interval 'requester_email': 'test_user@broadinstitute.org', 'sample_data': sample_data, 'genome_version': 'GRCh37', + 'num_results': num_results, + } + expected_search.update(search_body) + + request_body = json.loads(responses.calls[-1].request.body) + if request_body != expected_search: + diff_k = {k for k, v in request_body.items() if v != expected_search.get(k)} + import pdb; pdb.set_trace() + self.assertDictEqual(request_body, expected_search) + + def _test_expected_search_call(self, search_fields=None, gene_ids=None, intervals=None, exclude_intervals= None, + rs_ids=None, variant_ids=None, dataset_type=None, secondary_dataset_type=None, + frequencies=None, custom_query=None, inheritance_mode='de_novo', inheritance_filter=None, + quality_filter=None, sort='xpos', sort_metadata=None, **kwargs): + + expected_search = { 'sort': sort, 'sort_metadata': sort_metadata, - 'num_results': num_results, 'inheritance_mode': inheritance_mode, 'inheritance_filter': inheritance_filter or {}, 'dataset_type': dataset_type, @@ -75,21 +98,10 @@ def _test_expected_search_call(self, search_fields=None, gene_ids=None, interval } expected_search.update({field: self.search_model.search[field] for field in search_fields or []}) - request_body = json.loads(responses.calls[-1].request.body) - self.assertDictEqual(request_body, expected_search) + self._test_minimal_search_call(expected_search, **kwargs) @responses.activate def test_query_variants(self): - responses.add(responses.POST, f'{MOCK_HOST}:5000/search', status=400, body='Bad Search Error') - with self.assertRaises(HTTPError) as cm: - query_variants(self.results_model, user=self.user) - self.assertEqual(cm.exception.response.status_code, 400) - self.assertEqual(cm.exception.response.text, 'Bad Search Error') - - responses.add(responses.POST, f'{MOCK_HOST}:5000/search', status=200, json={ - 'results': PARSED_VARIANTS, 'total': 5, - }) - variants, total = query_variants(self.results_model, user=self.user) self.assertListEqual(variants, PARSED_VARIANTS) self.assertEqual(total, 5) @@ -182,6 +194,12 @@ def test_query_variants(self): sort='prioritized_gene', sort_metadata={'ENSG00000268903': 1, 'ENSG00000268904': 11}, ) + responses.add(responses.POST, f'{MOCK_HOST}:5000/search', status=400, body='Bad Search Error') + with self.assertRaises(HTTPError) as cm: + query_variants(self.results_model, user=self.user) + self.assertEqual(cm.exception.response.status_code, 400) + self.assertEqual(cm.exception.response.text, 'Bad Search Error') + @responses.activate def test_get_variant_query_gene_counts(self): responses.add(responses.POST, f'{MOCK_HOST}:5000/gene_counts', json=MOCK_COUNTS, status=200) @@ -224,26 +242,19 @@ def test_get_variant_query_gene_counts(self): # with self.assertRaises(InvalidSearchException) as cm: # get_single_variant(self.families, '10-10334333-A-G') # self.assertEqual(str(cm.exception), 'Variant 10-10334333-A-G not found') + # TODO test return_all_queried_families _validate_expected_families failure - # TODO - # @responses.activate - # def test_get_variants_for_variant_ids(self): - # variant_ids = ['2-103343353-GAGA-G', '1-248367227-TC-T', 'prefix-938_DEL'] - # get_variants_for_variant_ids(self.families, variant_ids, user=self.user) - # mock_get_variants_for_ids.assert_called_with(mock.ANY, '37', { - # '2-103343353-GAGA-G': ('2', 103343353, 'GAGA', 'G'), - # '1-248367227-TC-T': ('1', 248367227, 'TC', 'T'), - # 'prefix-938_DEL': None, - # }, user=self.user) - # self.assertSetEqual(set(mock_get_variants_for_ids.call_args.args[0]), set(self.search_samples)) - # - # get_variants_for_variant_ids( - # self.families, variant_ids, user=self.user, dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS) - # mock_get_variants_for_ids.assert_called_with(mock.ANY, '37', { - # '2-103343353-GAGA-G': ('2', 103343353, 'GAGA', 'G'), - # '1-248367227-TC-T': ('1', 248367227, 'TC', 'T'), - # }, user=self.user) - # expected_samples = { - # s for s in self.search_samples if s.guid not in ['S000145_hg00731', 'S000146_hg00732', 'S000148_hg00733'] - # } - # self.assertSetEqual(set(mock_get_variants_for_ids.call_args.args[0]), expected_samples) + @responses.activate + def test_get_variants_for_variant_ids(self): + variant_ids = ['2-103343353-GAGA-G', '1-248367227-TC-T', 'prefix-938_DEL'] + get_variants_for_variant_ids(self.families, variant_ids, user=self.user) + self._test_minimal_search_call({ + 'variant_ids': [['2', 103343353, 'GAGA', 'G'], ['1', 248367227, 'TC', 'T']], + 'variant_keys': ['prefix-938_DEL'], + }, num_results=3, sample_data=ALL_AFFECTED_SAMPLE_DATA) + + get_variants_for_variant_ids(self.families, variant_ids, user=self.user, dataset_type='VARIANTS') + self._test_minimal_search_call({ + 'variant_ids': [['2', 103343353, 'GAGA', 'G'], ['1', 248367227, 'TC', 'T']], + 'variant_keys': [], + }, num_results=2, sample_data=ALL_AFFECTED_SAMPLE_DATA, omit_sample_type='SV_WES') From b50eb5e3bf8fc59b3ee24d5d8274b173bd166fad Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 5 Jun 2023 10:49:33 -0400 Subject: [PATCH 16/55] add tests that only correct responses are used --- seqr/views/status_tests.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/seqr/views/status_tests.py b/seqr/views/status_tests.py index 154a680b99..bb08dc2576 100644 --- a/seqr/views/status_tests.py +++ b/seqr/views/status_tests.py @@ -42,6 +42,8 @@ def test_status(self, mock_logger, mock_db_connections, mock_redis): mock_db_connections.__getitem__.return_value.cursor.side_effect = None mock_redis.return_value.ping.side_effect = None + responses.reset() + urllib3_responses.reset() responses.add(responses.GET, 'http://test-hail:5000/status', status=200) urllib3_responses.add(urllib3_responses.HEAD, '/', status=200) urllib3_responses.add(urllib3_responses.HEAD, '/status', status=500) @@ -64,6 +66,7 @@ def test_status(self, mock_logger, mock_db_connections, mock_redis): self.assertDictEqual( response.json(), {'version': 'v1.0', 'dependent_services_ok': True, 'secondary_services_ok': True}) mock_logger.error.assert_not_called() + self._assert_expected_requests() class ElasticsearchStatusTest(TestCase, StatusTest): @@ -75,6 +78,10 @@ class ElasticsearchStatusTest(TestCase, StatusTest): def test_status(self, *args): super(ElasticsearchStatusTest, self).test_status(*args) + def _assert_expected_requests(self): + self.assertEqual(len(responses.calls), 0) + self.assertListEqual([call.request.url for call in urllib3_responses.calls], ['/', '/status', '/', '/status']) + class HailSearchStatusTest(TestCase, StatusTest): @@ -85,3 +92,8 @@ class HailSearchStatusTest(TestCase, StatusTest): @mock.patch('seqr.utils.search.hail_search_utils.HAIL_BACKEND_SERVICE_HOSTNAME', 'http://test-hail') def test_status(self, *args): super(HailSearchStatusTest, self).test_status(*args) + + def _assert_expected_requests(self): + self.assertEqual(len(urllib3_responses.calls), 0) + self.assertEqual(len(responses.calls), 1) + self.assertEqual(responses.calls[0].request.url, 'http://test-hail:5000/status') From 0686a38709fd9c9cb0aa4171f92cbbe7f08fdb75 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 5 Jun 2023 11:09:29 -0400 Subject: [PATCH 17/55] add single variant tests --- seqr/utils/search/hail_search_utils.py | 3 +- seqr/utils/search/hail_search_utils_tests.py | 79 ++++++++++---------- 2 files changed, 40 insertions(+), 42 deletions(-) diff --git a/seqr/utils/search/hail_search_utils.py b/seqr/utils/search/hail_search_utils.py index 2cf46a6776..2cd648d2b3 100644 --- a/seqr/utils/search/hail_search_utils.py +++ b/seqr/utils/search/hail_search_utils.py @@ -60,7 +60,8 @@ def get_hail_variants_for_variant_ids(samples, genome_version, parsed_variant_id response_json = _execute_search(search_body) if return_all_queried_families: - _validate_expected_families(response_json['results'], {s['family_guid'] for s in search_body['sample_data']}) + expected_family_guids = set(samples.values_list('individual__family__guid', flat=True)) + _validate_expected_families(response_json['results'], expected_family_guids) return response_json['results'] diff --git a/seqr/utils/search/hail_search_utils_tests.py b/seqr/utils/search/hail_search_utils_tests.py index 2aa89bb6f7..7a963f7bb9 100644 --- a/seqr/utils/search/hail_search_utils_tests.py +++ b/seqr/utils/search/hail_search_utils_tests.py @@ -7,18 +7,22 @@ from seqr.models import Family from seqr.utils.search.utils import get_variant_query_gene_counts, query_variants, get_single_variant, \ - get_variants_for_variant_ids + get_variants_for_variant_ids, InvalidSearchException from seqr.utils.search.search_utils_tests import SearchTestHelper, MOCK_COUNTS from seqr.views.utils.test_utils import PARSED_VARIANTS MOCK_HOST = 'http://test-hail-host' +FAMILY_3_SAMPLE = { + 'sample_id': 'NA20870', 'individual_guid': 'I000007_na20870', 'family_guid': 'F000003_3', + 'project_guid': 'R0001_1kg', 'affected': 'A', 'sex': 'M', +} EXPECTED_SAMPLE_DATA = { 'VARIANTS': [ {'sample_id': 'HG00731', 'individual_guid': 'I000004_hg00731', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'A', 'sex': 'F'}, {'sample_id': 'HG00732', 'individual_guid': 'I000005_hg00732', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sex': 'M'}, {'sample_id': 'HG00733', 'individual_guid': 'I000006_hg00733', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sex': 'F'}, - {'sample_id': 'NA20870', 'individual_guid': 'I000007_na20870', 'family_guid': 'F000003_3', 'project_guid': 'R0001_1kg', 'affected': 'A', 'sex': 'M'}, + FAMILY_3_SAMPLE, ], 'SV_WES': [ {'sample_id': 'HG00731', 'individual_guid': 'I000004_hg00731', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'A', 'sex': 'F'}, {'sample_id': 'HG00732', 'individual_guid': 'I000005_hg00732', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sex': 'M'}, @@ -40,9 +44,10 @@ ALL_AFFECTED_SAMPLE_DATA['MITO'] = [ {'sample_id': 'HG00733', 'individual_guid': 'I000006_hg00733', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sex': 'F'}, ] -ALL_AFFECTED_SAMPLE_DATA['VARIANTS'].append({ +FAMILY_5_SAMPLE = { 'sample_id': 'NA20874', 'individual_guid': 'I000009_na20874', 'family_guid': 'F000005_5', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sex': 'M', -}) +} +ALL_AFFECTED_SAMPLE_DATA['VARIANTS'].append(FAMILY_5_SAMPLE) @mock.patch('seqr.utils.search.hail_search_utils.HAIL_BACKEND_SERVICE_HOSTNAME', MOCK_HOST) @@ -70,9 +75,6 @@ def _test_minimal_search_call(self, search_body, num_results=100, sample_data=No expected_search.update(search_body) request_body = json.loads(responses.calls[-1].request.body) - if request_body != expected_search: - diff_k = {k for k, v in request_body.items() if v != expected_search.get(k)} - import pdb; pdb.set_trace() self.assertDictEqual(request_body, expected_search) def _test_expected_search_call(self, search_fields=None, gene_ids=None, intervals=None, exclude_intervals= None, @@ -209,40 +211,35 @@ def test_get_variant_query_gene_counts(self): self.assert_cached_results({'gene_aggs': gene_counts}) self._test_expected_search_call(sort=None) - # TODO - # @responses.activate - # def test_get_single_variant(self, mock_get_variants_for_ids): - # mock_get_variants_for_ids.return_value = [PARSED_VARIANTS[0]] - # variant = get_single_variant(self.families, '2-103343353-GAGA-G', user=self.user) - # self.assertDictEqual(variant, PARSED_VARIANTS[0]) - # mock_get_variants_for_ids.assert_called_with( - # mock.ANY, '37', {'2-103343353-GAGA-G': ('2', 103343353, 'GAGA', 'G')}, user=self.user, - # ) - # expected_samples = { - # s for s in self.search_samples if s.guid not in ['S000145_hg00731', 'S000146_hg00732', 'S000148_hg00733'] - # } - # self.assertSetEqual(set(mock_get_variants_for_ids.call_args.args[0]), expected_samples) - # - # get_single_variant(self.families, '2-103343353-GAGA-G', user=self.user, return_all_queried_families=True) - # mock_get_variants_for_ids.assert_called_with( - # mock.ANY, '37', {'2-103343353-GAGA-G': ('2', 103343353, 'GAGA', 'G')}, user=self.user, return_all_queried_families=True, - # ) - # self.assertSetEqual(set(mock_get_variants_for_ids.call_args.args[0]), expected_samples) - # - # get_single_variant(self.families, 'prefix_19107_DEL', user=self.user) - # mock_get_variants_for_ids.assert_called_with( - # mock.ANY, '37', {'prefix_19107_DEL': None}, user=self.user, - # ) - # expected_samples = { - # s for s in self.search_samples if s.guid in ['S000145_hg00731', 'S000146_hg00732', 'S000148_hg00733'] - # } - # self.assertSetEqual(set(mock_get_variants_for_ids.call_args.args[0]), expected_samples) - # - # mock_get_variants_for_ids.return_value = [] - # with self.assertRaises(InvalidSearchException) as cm: - # get_single_variant(self.families, '10-10334333-A-G') - # self.assertEqual(str(cm.exception), 'Variant 10-10334333-A-G not found') - # TODO test return_all_queried_families _validate_expected_families failure + @responses.activate + def test_get_single_variant(self): + variant = get_single_variant(self.families, '2-103343353-GAGA-G', user=self.user) + self.assertDictEqual(variant, PARSED_VARIANTS[0]) + self._test_minimal_search_call({ + 'variant_ids': [['2', 103343353, 'GAGA', 'G']], 'variant_keys': [], + }, num_results=1, sample_data=ALL_AFFECTED_SAMPLE_DATA, omit_sample_type='SV_WES') + + get_single_variant(self.families, 'prefix_19107_DEL', user=self.user) + self._test_minimal_search_call({ + 'variant_ids': [], 'variant_keys': ['prefix_19107_DEL'], + }, num_results=1, sample_data=EXPECTED_SAMPLE_DATA, omit_sample_type='VARIANTS') + + with self.assertRaises(InvalidSearchException) as cm: + get_single_variant(self.families, '2-103343353-GAGA-G', user=self.user, return_all_queried_families=True) + self.assertEqual( + str(cm.exception), + 'Unable to return all families for the following variants: 1-248367227-TC-T (F000002_2; F000005_5), 2-103343353-GAGA-G (F000005_5)', + ) + + get_single_variant(self.families.filter(guid='F000003_3'), '2-103343353-GAGA-G', user=self.user, return_all_queried_families=True) + self._test_minimal_search_call({ + 'variant_ids': [['2', 103343353, 'GAGA', 'G']], 'variant_keys': [], + }, num_results=1, sample_data={'VARIANTS': [FAMILY_3_SAMPLE]}) + + responses.add(responses.POST, f'{MOCK_HOST}:5000/search', status=200, json={'results': [], 'total': 0}) + with self.assertRaises(InvalidSearchException) as cm: + get_single_variant(self.families, '10-10334333-A-G', user=self.user) + self.assertEqual(str(cm.exception), 'Variant 10-10334333-A-G not found') @responses.activate def test_get_variants_for_variant_ids(self): From a9cfc277a722348085224672eb3ee200e0498e54 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 5 Jun 2023 11:53:17 -0400 Subject: [PATCH 18/55] remove unused import --- seqr/utils/search/constants.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/seqr/utils/search/constants.py b/seqr/utils/search/constants.py index 5537d304e4..47f5a2ba92 100644 --- a/seqr/utils/search/constants.py +++ b/seqr/utils/search/constants.py @@ -1,5 +1,3 @@ -from seqr.models import Sample - SEQR_DATSETS_GS_PATH = 'gs://seqr-datasets/v02' VCF_FILE_EXTENSIONS = ('.vcf', '.vcf.gz', '.vcf.bgz') From 32a12a0827f53bf4e48cf180284c32a08df0f1a5 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 5 Jun 2023 13:15:13 -0400 Subject: [PATCH 19/55] add es global in ui --- seqr/views/react_app.py | 2 ++ seqr/views/react_app_tests.py | 10 ++++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/seqr/views/react_app.py b/seqr/views/react_app.py index a8dca095c6..2490bf7c6d 100644 --- a/seqr/views/react_app.py +++ b/seqr/views/react_app.py @@ -8,6 +8,7 @@ from django.http import HttpResponse from settings import SEQR_VERSION, CSRF_COOKIE_NAME, DEBUG, LOGIN_URL, GA_TOKEN_ID, ANVIL_LOADING_DELAY_EMAIL_START_DATE from seqr.models import WarningMessage +from seqr.utils.search.utils import backend_specific_call from seqr.views.utils.orm_to_json_utils import get_json_for_user, get_json_for_current_user from seqr.views.utils.permissions_utils import login_active_required from seqr.views.utils.terra_api_utils import google_auth_enabled @@ -51,6 +52,7 @@ def render_app_html(request, additional_json=None, include_user=True, status=200 'version': '{}-{}'.format(SEQR_VERSION, ui_version), 'hijakEnabled': DEBUG or False, 'googleLoginEnabled': google_auth_enabled(), + 'elasticsearchEnabled': backend_specific_call(True, False), 'warningMessages': [message.json() for message in WarningMessage.objects.all()], 'anvilLoadingDelayDate': ANVIL_LOADING_DELAY_EMAIL_START_DATE if should_show_loading_delay else None, }} diff --git a/seqr/views/react_app_tests.py b/seqr/views/react_app_tests.py index 9429903ba8..0e7afb6701 100644 --- a/seqr/views/react_app_tests.py +++ b/seqr/views/react_app_tests.py @@ -13,7 +13,7 @@ class AppPageTest(object): databases = '__all__' fixtures = ['users'] - def _check_page_html(self, response, user, user_key='user', user_fields=None, ga_token_id=None, anvil_loading_date=None): + def _check_page_html(self, response, user, user_key='user', user_fields=None, ga_token_id=None, anvil_loading_date=None, elasticsearch_enabled=False): user_fields = user_fields or USER_FIELDS self.assertEqual(response.status_code, 200) initial_json = self.get_initial_page_json(response) @@ -24,6 +24,7 @@ def _check_page_html(self, response, user, user_key='user', user_fields=None, g 'version': mock.ANY, 'hijakEnabled': False, 'googleLoginEnabled': self.GOOGLE_ENABLED, + 'elasticsearchEnabled': elasticsearch_enabled, 'warningMessages': [{'id': 1, 'header': 'Warning!', 'message': 'A sample warning'}], 'anvilLoadingDelayDate': anvil_loading_date, }) @@ -79,9 +80,10 @@ def test_no_login_react_page(self): response = self.client.get(url) self._check_page_html(response, 'test_user') + @mock.patch('seqr.utils.search.elasticsearch.es_utils.ELASTICSEARCH_SERVICE_HOSTNAME', 'testhost') @mock.patch('seqr.views.react_app.ANVIL_LOADING_DELAY_EMAIL_START_DATE', '2022-12-01') @mock.patch('seqr.views.react_app.datetime') - def test_react_page_anvil_loading_delay(self, mock_datetime): + def test_react_page_additional_configs(self, mock_datetime): mock_datetime.strptime.side_effect = datetime.strptime mock_datetime.now.return_value = datetime(2022, 11, 1, 0, 0, 0) @@ -89,11 +91,11 @@ def test_react_page_anvil_loading_delay(self, mock_datetime): self.check_require_login_no_policies(url, login_redirect_url='/login') response = self.client.get(url) - self._check_page_html(response, 'test_user_no_policies') + self._check_page_html(response, 'test_user_no_policies', elasticsearch_enabled=True) mock_datetime.now.return_value = datetime(2022, 12, 30, 0, 0, 0) response = self.client.get(url) - self._check_page_html(response, 'test_user_no_policies', anvil_loading_date='2022-12-01') + self._check_page_html(response, 'test_user_no_policies', anvil_loading_date='2022-12-01', elasticsearch_enabled=True) class LocalAppPageTest(AuthenticationTestCase, AppPageTest): From 97e0c6081ce91273c47032bdad23dd55461309be Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 5 Jun 2023 17:19:31 -0400 Subject: [PATCH 20/55] conditionally show elasticsaearch dta management pages --- ui/pages/DataManagement/DataManagement.jsx | 38 ++++++++++++++++------ ui/redux/selectors.js | 1 + ui/shared/components/page/PageHeader.jsx | 10 +++--- 3 files changed, 35 insertions(+), 14 deletions(-) diff --git a/ui/pages/DataManagement/DataManagement.jsx b/ui/pages/DataManagement/DataManagement.jsx index 1ee3378f32..041d2c60ee 100644 --- a/ui/pages/DataManagement/DataManagement.jsx +++ b/ui/pages/DataManagement/DataManagement.jsx @@ -3,8 +3,9 @@ import PropTypes from 'prop-types' import { connect } from 'react-redux' import { Route, Switch } from 'react-router-dom' -import { getUser } from 'redux/selectors' +import { getUser, getElasticsearchEnabled } from 'redux/selectors' import { Error404, Error401 } from 'shared/components/page/Errors' +import { SimplePageHeader } from 'shared/components/page/PageHeaderLayout' import ElasticsearchStatus from './components/ElasticsearchStatus' import RnaSeq from './components/RnaSeq' @@ -15,12 +16,7 @@ import WritePedigree from './components/WritePedigree' const IFRAME_STYLE = { position: 'fixed', left: '0', top: '95px' } -export const DATA_MANAGEMENT_PAGES = [ - { path: 'elasticsearch_status', component: ElasticsearchStatus }, - { - path: 'kibana', - component: () =>