diff --git a/CHANGELOG.md b/CHANGELOG.md index 5660ee3778..821a61faf7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## dev +## 6/23/23 +* Add a 'rank' field to the RnaSeqSpliceOutlier model (REQUIRES DB MIGRATION) +* Remove hail python dependency + ## 6/2/23 * Update Clinvar filtering and display diff --git a/requirements.in b/requirements.in index ec24dd607f..989f96912a 100644 --- a/requirements.in +++ b/requirements.in @@ -10,7 +10,6 @@ social-auth-core # the Python social authentication package. Re elasticsearch==7.9.1 # elasticsearch client elasticsearch-dsl==7.2.1 # elasticsearch query utilities gunicorn # web server -hail<0.3 # provides convenient apis for working with files in google cloud storage jmespath openpyxl # library for reading/writing Excel files pillow # required dependency of Djagno ImageField-type database records diff --git a/requirements.txt b/requirements.txt index fccb79474d..3624815c1c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,79 +1,29 @@ # -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: +# This file is autogenerated by pip-compile with python 3.9 +# To update, run: # # pip-compile requirements.in # -aiohttp==3.8.4 - # via - # aiohttp-session - # hail -aiohttp-session==2.12.0 - # via hail -aiosignal==1.3.1 - # via aiohttp asgiref==3.6.0 # via django async-timeout==4.0.2 - # via - # aiohttp - # redis -asyncinit==0.2.4 - # via hail -attrs==22.2.0 - # via aiohttp -avro==1.11.1 - # via hail -azure-core==1.26.3 - # via - # azure-identity - # azure-storage-blob - # msrest -azure-identity==1.12.0 - # via hail -azure-storage-blob==12.14.1 - # via hail -bokeh==1.4.0 - # via hail -boto3==1.26.76 - # via hail -botocore==1.29.76 - # via - # boto3 - # hail - # s3transfer + # via redis cachetools==5.3.0 # via google-auth certifi==2022.12.7 # via # elasticsearch - # msrest # requests cffi==1.15.1 # via cryptography charset-normalizer==3.0.1 - # via - # aiohttp - # requests -commonmark==0.9.1 - # via rich + # via requests cryptography==39.0.1 - # via - # azure-identity - # azure-storage-blob - # msal - # pyjwt - # social-auth-core -decorator==4.4.2 - # via hail + # via social-auth-core defusedxml==0.7.1 # via # python3-openid # social-auth-core -deprecated==1.2.13 - # via hail -dill==0.3.6 - # via hail django==3.2.19 # via # -r requirements.in @@ -103,11 +53,6 @@ elasticsearch-dsl==7.2.1 # via -r requirements.in et-xmlfile==1.1.0 # via openpyxl -frozenlist==1.3.3 - # via - # aiohttp - # aiosignal - # hail google-api-core==2.11.0 # via # google-cloud-core @@ -117,13 +62,10 @@ google-auth==2.14.1 # google-api-core # google-cloud-core # google-cloud-storage - # hail google-cloud-core==2.3.2 # via google-cloud-storage google-cloud-storage==2.7.0 - # via - # django-storages - # hail + # via django-storages google-crc32c==1.5.0 # via google-resumable-media google-resumable-media==2.4.1 @@ -132,82 +74,24 @@ googleapis-common-protos==1.58.0 # via google-api-core gunicorn==20.1.0 # via -r requirements.in -hail==0.2.109 - # via -r requirements.in -humanize==1.1.0 - # via hail -hurry-filesize==0.9 - # via hail idna==3.4 - # via - # requests - # yarl -isodate==0.6.1 - # via msrest -janus==1.0.0 - # via hail -jinja2==3.0.3 - # via - # bokeh - # hail + # via requests jmespath==1.0.1 - # via - # -r requirements.in - # boto3 - # botocore -markupsafe==2.1.2 - # via jinja2 -msal==1.21.0 - # via - # azure-identity - # msal-extensions -msal-extensions==1.0.0 - # via azure-identity -msrest==0.7.1 - # via azure-storage-blob -multidict==6.0.4 - # via - # aiohttp - # yarl -nest-asyncio==1.5.6 - # via hail -numpy==1.24.2 - # via - # bokeh - # hail - # pandas - # scipy + # via -r requirements.in oauthlib==3.2.2 # via # requests-oauthlib # social-auth-core openpyxl==3.1.1 # via -r requirements.in -orjson==3.8.6 - # via hail -packaging==23.0 - # via bokeh -pandas==1.5.3 - # via hail -parsimonious==0.8.1 - # via hail pillow==9.4.0 - # via - # -r requirements.in - # bokeh -plotly==5.10.0 - # via hail -portalocker==2.7.0 - # via msal-extensions + # via -r requirements.in protobuf==3.20.2 # via # google-api-core # googleapis-common-protos - # hail psycopg2==2.9.5 # via -r requirements.in -py4j==0.10.9 - # via pyspark pyasn1==0.4.8 # via # pyasn1-modules @@ -216,72 +100,38 @@ pyasn1-modules==0.2.8 # via google-auth pycparser==2.21 # via cffi -pygments==2.14.0 - # via rich -pyjwt[crypto]==2.6.0 - # via - # hail - # msal - # social-auth-core +pyjwt==2.6.0 + # via social-auth-core pyliftover==0.4 # via -r requirements.in -pyspark==3.1.3 - # via hail python-dateutil==2.8.2 - # via - # bokeh - # botocore - # elasticsearch-dsl - # pandas -python-json-logger==2.0.7 - # via hail + # via elasticsearch-dsl python3-openid==3.2.0 # via social-auth-core pytz==2022.7.1 - # via - # django - # pandas -pyyaml==6.0 - # via bokeh + # via django redis==4.5.4 # via -r requirements.in requests==2.31.0 # via # -r requirements.in - # azure-core # django-anymail # google-api-core # google-cloud-storage - # hail - # msal - # msrest # requests-oauthlib # requests-toolbelt # slacker # social-auth-core requests-oauthlib==1.3.1 - # via - # msrest - # social-auth-core + # via social-auth-core requests-toolbelt==0.10.1 # via -r requirements.in -rich==12.6.0 - # via hail rsa==4.9 # via google-auth -s3transfer==0.6.0 - # via boto3 -scipy==1.9.3 - # via hail six==1.16.0 # via - # azure-core - # azure-identity - # bokeh # elasticsearch-dsl # google-auth - # isodate - # parsimonious # python-dateutil slacker==0.14.0 # via -r requirements.in @@ -293,35 +143,16 @@ social-auth-core==4.3.0 # via # -r requirements.in # social-auth-app-django -sortedcontainers==2.4.0 - # via hail sqlparse==0.4.4 # via django -tabulate==0.9.0 - # via hail -tenacity==8.2.1 - # via plotly -tornado==6.3.2 - # via bokeh tqdm==4.64.1 # via -r requirements.in -typing-extensions==4.5.0 - # via - # azure-core - # janus urllib3==1.26.14 # via - # botocore # elasticsearch # requests -uvloop==0.17.0 - # via hail whitenoise==6.3.0 # via -r requirements.in -wrapt==1.14.1 - # via deprecated -yarl==1.8.2 - # via aiohttp # The following packages are considered to be unsafe in a requirements file: # setuptools diff --git a/seqr/fixtures/1kg_project.json b/seqr/fixtures/1kg_project.json index 30ff903eda..20aef5e495 100644 --- a/seqr/fixtures/1kg_project.json +++ b/seqr/fixtures/1kg_project.json @@ -1375,6 +1375,7 @@ "pk": 1, "fields": { "sample": 151, + "rank": 0, "gene_id": "ENSG00000106554", "p_value": 1.08E-56, "z_score": 12.34, @@ -1394,6 +1395,7 @@ "pk": 2, "fields": { "sample": 153, + "rank": 0, "gene_id": "ENSG00000268903", "p_value": 1.08E-56, "z_score": 12.34, @@ -1487,6 +1489,32 @@ "file_path": "/readviz/NA19675.cram" } }, +{ + "model": "seqr.igvsample", + "pk": 146, + "fields": { + "guid": "S000146_na20870", + "created_date": "2017-02-05T06:42:55.397Z", + "created_by": null, + "last_modified_date": "2017-03-13T09:07:49.744Z", + "individual": 7, + "sample_type": "alignment", + "file_path": "gs://readviz/NA20870.cram" + } +}, +{ + "model": "seqr.igvsample", + "pk": 147, + "fields": { + "guid": "S000147_na20870", + "created_date": "2017-02-05T06:42:55.397Z", + "created_by": null, + "last_modified_date": "2017-03-13T09:07:49.744Z", + "individual": 7, + "sample_type": "gcnv", + "file_path": "gs://datasets-gcnv/NA20870.bed.gz" + } +}, { "model": "seqr.varianttagtype", "pk": 1, diff --git a/seqr/management/commands/check_bam_cram_paths.py b/seqr/management/commands/check_bam_cram_paths.py index fb8699154a..b018c83f67 100644 --- a/seqr/management/commands/check_bam_cram_paths.py +++ b/seqr/management/commands/check_bam_cram_paths.py @@ -1,12 +1,12 @@ from django.core.management.base import BaseCommand import collections -import hail as hl import logging import tqdm from seqr.models import IgvSample from seqr.utils import communication_utils +from seqr.utils.file_utils import does_file_exist from settings import SEQR_SLACK_DATA_ALERTS_NOTIFICATION_CHANNEL logger = logging.getLogger(__name__) @@ -31,13 +31,13 @@ def handle(self, *args, **options): individual__family__project__name__in=args ) if args else IgvSample.objects.all()).filter( file_path__startswith='gs://' - ).prefetch_related('individual', 'individual__family__project') + ).order_by('id').prefetch_related('individual', 'individual__family__project') missing_counter = collections.defaultdict(int) guids_of_samples_with_missing_file = set() project_name_to_missing_paths = collections.defaultdict(list) for sample in tqdm.tqdm(samples, unit=" samples"): - if not hl.hadoop_is_file(sample.file_path): + if not does_file_exist(sample.file_path): individual_id = sample.individual.individual_id project_name = sample.individual.family.project.name missing_counter[project_name] += 1 @@ -47,7 +47,7 @@ def handle(self, *args, **options): guids_of_samples_with_missing_file.add(sample.guid) if len(guids_of_samples_with_missing_file) > 0: - IgvSample.bulk_update(user=None, update_json={'file_path': ''}, guid__in=guids_of_samples_with_missing_file) + IgvSample.bulk_delete(user=None, guid__in=guids_of_samples_with_missing_file) logger.info('---- DONE ----') logger.info('Checked {} samples'.format(len(samples))) @@ -58,7 +58,7 @@ def handle(self, *args, **options): # post to slack if not options.get('dry_run'): - slack_message = 'Found {} broken bam/cram path(s)\n'.format(sum(missing_counter.values())) + slack_message = 'Found and removed {} broken bam/cram path(s)\n'.format(sum(missing_counter.values())) for project_name, missing_paths_list in project_name_to_missing_paths.items(): slack_message += "\nIn project {}:\n".format(project_name) slack_message += "\n".join([ diff --git a/seqr/management/tests/check_bam_cram_paths_tests.py b/seqr/management/tests/check_bam_cram_paths_tests.py index a128638ad1..d8105f1931 100644 --- a/seqr/management/tests/check_bam_cram_paths_tests.py +++ b/seqr/management/tests/check_bam_cram_paths_tests.py @@ -5,32 +5,22 @@ from seqr.models import IgvSample from settings import SEQR_SLACK_DATA_ALERTS_NOTIFICATION_CHANNEL + +@mock.patch('seqr.utils.file_utils.subprocess.Popen') +@mock.patch('seqr.utils.communication_utils.safe_post_to_slack') +@mock.patch('seqr.management.commands.check_bam_cram_paths.logger') class CheckBamCramPathsTest(TestCase): fixtures = ['users', '1kg_project'] - def setUp(self): - existing_sample = IgvSample.objects.first() - IgvSample.objects.create( - individual=existing_sample.individual, - sample_type=IgvSample.SAMPLE_TYPE_GCNV, - file_path='gs://missing-bucket/missing_file', - ) - - @mock.patch('hail.hadoop_is_file') - @mock.patch('seqr.utils.communication_utils.safe_post_to_slack') - @mock.patch('seqr.management.commands.check_bam_cram_paths.logger') - def test_command_with_project(self, mock_logger, mock_safe_post_to_slack, mock_hadoop_is_file): - mock_hadoop_is_file.return_value = False + def test_command_with_project(self, mock_logger, mock_safe_post_to_slack, mock_subprocess): + mock_subprocess.return_value.wait.side_effect = [-1, 0] call_command('check_bam_cram_paths', '1kg project n\u00e5me with uni\u00e7\u00f8de') - self._check_results(1, mock_logger, mock_safe_post_to_slack, mock_hadoop_is_file) + self._check_results(True, mock_logger, mock_safe_post_to_slack, mock_subprocess) - @mock.patch('hail.hadoop_is_file') - @mock.patch('seqr.management.commands.check_bam_cram_paths.logger') - def test_command_with_other_project(self, mock_logger, mock_hadoop_is_file): - mock_hadoop_is_file.return_value = False + def test_command_with_other_project(self, mock_logger, mock_safe_post_to_slack, mock_subprocess): + mock_subprocess.return_value.wait.side_effect = [-1, 0] call_command('check_bam_cram_paths', '1kg project') - self.assertEqual(IgvSample.objects.filter(file_path='').count(), 0) - self.assertEqual(IgvSample.objects.count(), 2) + self.assertEqual(IgvSample.objects.count(), 3) calls = [ mock.call('---- DONE ----'), @@ -38,40 +28,41 @@ def test_command_with_other_project(self, mock_logger, mock_hadoop_is_file): ] mock_logger.info.assert_has_calls(calls) - @mock.patch('hail.hadoop_is_file') - @mock.patch('seqr.utils.communication_utils.safe_post_to_slack') - @mock.patch('seqr.management.commands.check_bam_cram_paths.logger') - def test_command(self, mock_logger, mock_safe_post_to_slack, mock_hadoop_is_file): - mock_hadoop_is_file.return_value = False + def test_command(self, mock_logger, mock_safe_post_to_slack, mock_subprocess): + mock_subprocess.return_value.wait.side_effect = [-1, 0] call_command('check_bam_cram_paths') - self._check_results(1, mock_logger, mock_safe_post_to_slack, mock_hadoop_is_file) + self._check_results(True, mock_logger, mock_safe_post_to_slack, mock_subprocess) - @mock.patch('hail.hadoop_is_file') - @mock.patch('seqr.utils.communication_utils.safe_post_to_slack') - @mock.patch('seqr.management.commands.check_bam_cram_paths.logger') - def test_dry_run_arg(self, mock_logger, mock_safe_post_to_slack, mock_hadoop_is_file): - mock_hadoop_is_file.return_value = False + def test_dry_run_arg(self, mock_logger, mock_safe_post_to_slack, mock_subprocess): + mock_subprocess.return_value.wait.side_effect = [-1, 0] call_command('check_bam_cram_paths', '--dry-run') - self._check_results(0, mock_logger, mock_safe_post_to_slack, mock_hadoop_is_file) + self._check_results(False, mock_logger, mock_safe_post_to_slack, mock_subprocess) + + def _check_results(self, did_delete, mock_logger, mock_safe_post_to_slack, mock_subprocess): + igv_file_paths = IgvSample.objects.values_list('file_path', flat=True) + expected_remaining_files = ['/readviz/NA19675.cram', 'gs://datasets-gcnv/NA20870.bed.gz'] + if not did_delete: + expected_remaining_files.append('gs://readviz/NA20870.cram') + self.assertListEqual(sorted(igv_file_paths), expected_remaining_files) - def _check_results(self, num_paths_deleted, mock_logger, mock_safe_post_to_slack, mock_hadoop_is_file): - self.assertEqual(IgvSample.objects.filter(file_path='').count(), num_paths_deleted) - self.assertEqual(IgvSample.objects.count(), 2) - mock_hadoop_is_file.assert_called_with("gs://missing-bucket/missing_file") + mock_subprocess.assert_has_calls([ + mock.call('gsutil ls gs://readviz/NA20870.cram', stdout=-1, stderr=-2, shell=True), + mock.call('gsutil ls gs://datasets-gcnv/NA20870.bed.gz', stdout=-1, stderr=-2, shell=True), + ], any_order=True) calls = [ - mock.call('Individual: NA19675_1 file not found: gs://missing-bucket/missing_file'), + mock.call('Individual: NA20870 file not found: gs://readviz/NA20870.cram'), mock.call('---- DONE ----'), - mock.call('Checked 1 samples'), + mock.call('Checked 2 samples'), mock.call('1 files not found:'), mock.call(' 1 in 1kg project nåme with uniçøde'), ] mock_logger.info.assert_has_calls(calls) - if num_paths_deleted == 0: + if not did_delete: mock_safe_post_to_slack.assert_not_called() else: self.assertEqual(mock_safe_post_to_slack.call_count, 1) mock_safe_post_to_slack.assert_called_with( SEQR_SLACK_DATA_ALERTS_NOTIFICATION_CHANNEL, - "Found 1 broken bam/cram path(s)\n\nIn project 1kg project nåme with uniçøde:\n NA19675_1 gs://missing-bucket/missing_file") + "Found and removed 1 broken bam/cram path(s)\n\nIn project 1kg project nåme with uniçøde:\n NA20870 gs://readviz/NA20870.cram") diff --git a/seqr/management/tests/lift_project_to_hg38_tests.py b/seqr/management/tests/lift_project_to_hg38_tests.py index 99e74cc180..0342bd0fac 100644 --- a/seqr/management/tests/lift_project_to_hg38_tests.py +++ b/seqr/management/tests/lift_project_to_hg38_tests.py @@ -167,7 +167,7 @@ def test_command_other_exceptions(self, mock_liftover, mock_single_es_variants, with self.assertRaises(Exception) as ce: call_command('lift_project_to_hg38', '--project={}'.format(PROJECT_NAME), '--es-index={}'.format(ELASTICSEARCH_INDEX)) - self.assertEqual(str(ce.exception), 'Elasticsearch backend is disabled') + self.assertEqual(str(ce.exception), 'Adding samples is disabled for the hail backend') # Test discontinue on a failed lift mock_liftover_to_38 = mock_liftover.return_value diff --git a/seqr/migrations/0052_rnaseqspliceoutlier_rank.py b/seqr/migrations/0052_rnaseqspliceoutlier_rank.py new file mode 100644 index 0000000000..0958346d3c --- /dev/null +++ b/seqr/migrations/0052_rnaseqspliceoutlier_rank.py @@ -0,0 +1,18 @@ +# Generated by Django 3.2.19 on 2023-06-12 19:19 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('seqr', '0051_auto_20230426_1615'), + ] + + operations = [ + migrations.AddField( + model_name='rnaseqspliceoutlier', + name='rank', + field=models.IntegerField(), + ), + ] diff --git a/seqr/models.py b/seqr/models.py index 81f7ddb9fc..df421a6ec2 100644 --- a/seqr/models.py +++ b/seqr/models.py @@ -1111,6 +1111,7 @@ class RnaSeqSpliceOutlier(DeletableSampleMetadataModel): ('*', 'Any direction'), ) + rank = models.IntegerField() p_value = models.FloatField() z_score = models.FloatField() chrom = models.CharField(max_length=2) diff --git a/seqr/urls.py b/seqr/urls.py index a0d3ff683e..ab4751dfa1 100644 --- a/seqr/urls.py +++ b/seqr/urls.py @@ -167,6 +167,7 @@ 'matchmaker/disclaimer', 'privacy_policy', 'terms_of_service', + 'faq', ] diff --git a/seqr/utils/search/add_data_utils.py b/seqr/utils/search/add_data_utils.py index 33092cb1fd..4dabd97e23 100644 --- a/seqr/utils/search/add_data_utils.py +++ b/seqr/utils/search/add_data_utils.py @@ -4,12 +4,19 @@ from seqr.views.utils.dataset_utils import match_and_update_search_samples, load_mapping_file +def _hail_backend_error(*args, **kwargs): + raise ValueError('Adding samples is disabled for the hail backend') + + def add_new_search_samples(request_json, project, user, summary_template=None, expected_families=None): dataset_type = request_json.get('datasetType') if dataset_type not in Sample.DATASET_TYPE_LOOKUP: raise ValueError(f'Invalid dataset type "{dataset_type}"') - sample_ids, sample_type, sample_data = backend_specific_call(validate_es_index_metadata_and_get_samples)(request_json, project) + sample_ids, sample_type, sample_data = backend_specific_call( + validate_es_index_metadata_and_get_samples, + _hail_backend_error, + )(request_json, project) if not sample_ids: raise ValueError('No samples found. Make sure the specified caller type is correct') diff --git a/seqr/utils/search/elasticsearch/es_utils.py b/seqr/utils/search/elasticsearch/es_utils.py index 17274cd039..5f4c895e8f 100644 --- a/seqr/utils/search/elasticsearch/es_utils.py +++ b/seqr/utils/search/elasticsearch/es_utils.py @@ -2,6 +2,7 @@ from elasticsearch import Elasticsearch from elasticsearch.exceptions import ConnectionError as EsConnectionError, TransportError import elasticsearch_dsl +from urllib3.connectionpool import connection_from_url from seqr.models import Sample from seqr.utils.redis_utils import safe_redis_get_json, safe_redis_set_json @@ -10,7 +11,7 @@ from seqr.utils.search.elasticsearch.es_search import EsSearch, get_compound_het_page from seqr.views.utils.json_utils import _to_camel_case from settings import ELASTICSEARCH_SERVICE_HOSTNAME, ELASTICSEARCH_SERVICE_PORT, ELASTICSEARCH_CREDENTIALS, \ - ELASTICSEARCH_PROTOCOL, ES_SSL_CONTEXT + ELASTICSEARCH_PROTOCOL, ES_SSL_CONTEXT, KIBANA_SERVER class InvalidIndexException(Exception): @@ -67,6 +68,12 @@ def ping_elasticsearch(): raise ValueError('No response from elasticsearch ping') +def ping_kibana(): + resp = connection_from_url('http://{}'.format(KIBANA_SERVER)).urlopen('HEAD', '/status', timeout=3, retries=3) + if resp.status >= 400: + raise ValueError('Kibana Error {}: {}'.format(resp.status, resp.reason)) + + SAMPLE_FIELDS_LIST = ['samples', 'samples_num_alt_1'] # support .bgz instead of requiring .vcf.bgz due to issues with DSP delivery of large callsets DATASET_FILE_EXTENSIONS = VCF_FILE_EXTENSIONS[:-1] + ('.bgz', '.bed', '.mt') @@ -255,13 +262,11 @@ def _get_es_indices(client): return indices, seqr_index_projects -def get_es_variants_for_variant_ids(samples, genome_version, variant_ids, user, dataset_type=None, return_all_queried_families=False): +def get_es_variants_for_variant_ids(samples, genome_version, variants_by_id, user, return_all_queried_families=False): variants = EsSearch( samples, genome_version, user=user, return_all_queried_families=return_all_queried_families, - ).filter_by_variant_ids(variant_ids) - if dataset_type: - variants = variants.update_dataset_type(dataset_type) - return variants.search(num_results=len(variant_ids)) + ).filter_by_variant_ids(list(variants_by_id.keys())) + return variants.search(num_results=len(variants_by_id)) def get_es_variants(samples, search, user, previous_search_results, genome_version, sort=None, page=None, num_results=None, diff --git a/seqr/utils/search/elasticsearch/es_utils_tests.py b/seqr/utils/search/elasticsearch/es_utils_tests.py index c40c1c8d3e..c207a9cfec 100644 --- a/seqr/utils/search/elasticsearch/es_utils_tests.py +++ b/seqr/utils/search/elasticsearch/es_utils_tests.py @@ -732,7 +732,7 @@ def call_request_json(self, index=-1): PARSED_NO_CONSEQUENCE_FILTER_VARIANTS = deepcopy(PARSED_VARIANTS) PARSED_NO_CONSEQUENCE_FILTER_VARIANTS[1]['selectedMainTranscriptId'] = None -PARSED_NO_SORT_VARIANTS = deepcopy(PARSED_NO_CONSEQUENCE_FILTER_VARIANTS) +PARSED_NO_SORT_VARIANTS = deepcopy(PARSED_NO_CONSEQUENCE_FILTER_VARIANTS + [PARSED_SV_VARIANT]) for var in PARSED_NO_SORT_VARIANTS: del var['_sort'] @@ -1397,7 +1397,13 @@ def test_get_single_es_variant(self): self.assertDictEqual(variant, PARSED_NO_SORT_VARIANTS[1]) self.assertExecutedSearch( filters=[{'terms': {'variantId': ['2-103343353-GAGA-G']}}], - size=3, index=','.join([INDEX_NAME, MITO_WGS_INDEX_NAME, SV_INDEX_NAME]), unsorted=True, + size=2, index=','.join([INDEX_NAME, MITO_WGS_INDEX_NAME]), unsorted=True, + ) + + variant = get_single_variant(self.families, 'prefix_19107_DEL') + self.assertDictEqual(variant, PARSED_NO_SORT_VARIANTS[2]) + self.assertExecutedSearch( + filters=[{'terms': {'variantId': ['prefix_19107_DEL']}}], size=1, index=SV_INDEX_NAME, unsorted=True, ) variant = get_single_variant(self.families, '1-248367227-TC-T', return_all_queried_families=True) @@ -1409,7 +1415,7 @@ def test_get_single_es_variant(self): self.assertDictEqual(variant, all_family_variant) self.assertExecutedSearch( filters=[{'terms': {'variantId': ['1-248367227-TC-T']}}], - size=3, index=','.join([INDEX_NAME, MITO_WGS_INDEX_NAME, SV_INDEX_NAME]), unsorted=True, + size=2, index=','.join([INDEX_NAME, MITO_WGS_INDEX_NAME]), unsorted=True, ) with self.assertRaises(InvalidSearchException) as cm: diff --git a/seqr/utils/search/hail_search_utils.py b/seqr/utils/search/hail_search_utils.py index dfb92538d6..c80c9f9d0e 100644 --- a/seqr/utils/search/hail_search_utils.py +++ b/seqr/utils/search/hail_search_utils.py @@ -9,35 +9,42 @@ from settings import HAIL_BACKEND_SERVICE_HOSTNAME, HAIL_BACKEND_SERVICE_PORT +def _hail_backend_url(path): + return f'{HAIL_BACKEND_SERVICE_HOSTNAME}:{HAIL_BACKEND_SERVICE_PORT}/{path}' + + +def _execute_search(search_body, user, path='search'): + response = requests.post(_hail_backend_url(path), json=search_body, headers={'From': user.email}, timeout=300) + response.raise_for_status() + return response.json() + + +def ping_hail_backend(): + requests.get(_hail_backend_url('status'), timeout=5).raise_for_status() + + def get_hail_variants(samples, search, user, previous_search_results, genome_version, sort=None, page=1, num_results=100, gene_agg=False, **kwargs): - end_offset = num_results * page - search_body = { - 'requester_email': user.email, - 'genome_version': GENOME_VERSION_LOOKUP[genome_version], + search_body = _format_search_body(samples, genome_version, end_offset, search) + + frequencies = search_body.pop('freqs', None) + if frequencies and frequencies.get('callset'): + frequencies['seqr'] = frequencies.pop('callset') + + search_body.update({ 'sort': sort, 'sort_metadata': _get_sort_metadata(sort, samples), - 'num_results': end_offset, - } - search_body.update(search) - search_body.update({ - 'frequencies': search_body.pop('freqs', None), + 'frequencies': frequencies, 'quality_filter': search_body.pop('qualityFilter', None), 'custom_query': search_body.pop('customQuery', None), }) search_body.pop('skipped_samples', None) - search_body['sample_data'] = _get_sample_data(samples, search_body.get('inheritance_filter')) - _parse_location_search(search_body) path = 'gene_counts' if gene_agg else 'search' - response = requests.post( - f'{HAIL_BACKEND_SERVICE_HOSTNAME}:{HAIL_BACKEND_SERVICE_PORT}/{path}', json=search_body, timeout=300, - ) - response.raise_for_status() - response_json = response.json() + response_json = _execute_search(search_body, user, path) if gene_agg: previous_search_results['gene_aggs'] = response_json @@ -48,6 +55,31 @@ def get_hail_variants(samples, search, user, previous_search_results, genome_ver return response_json['results'][end_offset - num_results:end_offset] +def get_hail_variants_for_variant_ids(samples, genome_version, parsed_variant_ids, user, return_all_queried_families=False): + search = { + 'variant_ids': [parsed_id for parsed_id in parsed_variant_ids.values() if parsed_id], + 'variant_keys': [variant_id for variant_id, parsed_id in parsed_variant_ids.items() if not parsed_id], + } + search_body = _format_search_body(samples, genome_version, len(parsed_variant_ids), search) + response_json = _execute_search(search_body, user) + + if return_all_queried_families: + expected_family_guids = set(samples.values_list('individual__family__guid', flat=True)) + _validate_expected_families(response_json['results'], expected_family_guids) + + return response_json['results'] + + +def _format_search_body(samples, genome_version, num_results, search): + search_body = { + 'genome_version': GENOME_VERSION_LOOKUP[genome_version], + 'num_results': num_results, + } + search_body.update(search) + search_body['sample_data'] = _get_sample_data(samples, search_body.get('inheritance_filter')) + return search_body + + def _get_sample_data(samples, inheritance_filter): sample_data = samples.order_by('id').values( 'sample_id', 'dataset_type', 'sample_type', @@ -123,3 +155,20 @@ def _format_interval(chrom=None, start=None, end=None, offset=None, **kwargs): start = max(start - offset_pos, MIN_POS) end = min(end + offset_pos, MAX_POS) return f'{chrom}:{start}-{end}' + + +def _validate_expected_families(results, expected_families): + # In the ES backed we could force return variants even if all families are hom ref + # This is not possible in the hail backend as those rows are removed at loading, so fail if missing + invalid_family_variants = [] + for result in results: + missing_families = expected_families - set(result['familyGuids']) + if missing_families: + invalid_family_variants.append((result['variantId'], missing_families)) + + if invalid_family_variants: + from seqr.utils.search.utils import InvalidSearchException + missing = ', '.join([ + f'{variant_id} ({"; ".join(sorted(families))})' for variant_id, families in invalid_family_variants + ]) + raise InvalidSearchException(f'Unable to return all families for the following variants: {missing}') diff --git a/seqr/utils/search/hail_search_utils_tests.py b/seqr/utils/search/hail_search_utils_tests.py index be1508459f..51e4bbee94 100644 --- a/seqr/utils/search/hail_search_utils_tests.py +++ b/seqr/utils/search/hail_search_utils_tests.py @@ -6,18 +6,23 @@ import responses from seqr.models import Family -from seqr.utils.search.utils import get_variant_query_gene_counts, query_variants +from seqr.utils.search.utils import get_variant_query_gene_counts, query_variants, get_single_variant, \ + get_variants_for_variant_ids, InvalidSearchException from seqr.utils.search.search_utils_tests import SearchTestHelper, MOCK_COUNTS from seqr.views.utils.test_utils import PARSED_VARIANTS MOCK_HOST = 'http://test-hail-host' +FAMILY_3_SAMPLE = { + 'sample_id': 'NA20870', 'individual_guid': 'I000007_na20870', 'family_guid': 'F000003_3', + 'project_guid': 'R0001_1kg', 'affected': 'A', 'sex': 'M', +} EXPECTED_SAMPLE_DATA = { 'VARIANTS': [ {'sample_id': 'HG00731', 'individual_guid': 'I000004_hg00731', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'A', 'sex': 'F'}, {'sample_id': 'HG00732', 'individual_guid': 'I000005_hg00732', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sex': 'M'}, {'sample_id': 'HG00733', 'individual_guid': 'I000006_hg00733', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sex': 'F'}, - {'sample_id': 'NA20870', 'individual_guid': 'I000007_na20870', 'family_guid': 'F000003_3', 'project_guid': 'R0001_1kg', 'affected': 'A', 'sex': 'M'}, + FAMILY_3_SAMPLE, ], 'SV_WES': [ {'sample_id': 'HG00731', 'individual_guid': 'I000004_hg00731', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'A', 'sex': 'F'}, {'sample_id': 'HG00732', 'individual_guid': 'I000005_hg00732', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sex': 'M'}, @@ -35,6 +40,15 @@ ], } +ALL_AFFECTED_SAMPLE_DATA = deepcopy(EXPECTED_SAMPLE_DATA) +ALL_AFFECTED_SAMPLE_DATA['MITO'] = [ + {'sample_id': 'HG00733', 'individual_guid': 'I000006_hg00733', 'family_guid': 'F000002_2', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sex': 'F'}, +] +FAMILY_5_SAMPLE = { + 'sample_id': 'NA20874', 'individual_guid': 'I000009_na20874', 'family_guid': 'F000005_5', 'project_guid': 'R0001_1kg', 'affected': 'N', 'sex': 'M', +} +ALL_AFFECTED_SAMPLE_DATA['VARIANTS'].append(FAMILY_5_SAMPLE) + @mock.patch('seqr.utils.search.hail_search_utils.HAIL_BACKEND_SERVICE_HOSTNAME', MOCK_HOST) class HailSearchUtilsTests(SearchTestHelper, TestCase): @@ -43,23 +57,34 @@ class HailSearchUtilsTests(SearchTestHelper, TestCase): def setUp(self): super(HailSearchUtilsTests, self).set_up() + responses.add(responses.POST, f'{MOCK_HOST}:5000/search', status=200, json={ + 'results': PARSED_VARIANTS, 'total': 5, + }) - def _test_expected_search_call(self, search_fields=None, gene_ids=None, intervals=None, exclude_intervals= None, - rs_ids=None, variant_ids=None, dataset_type=None, secondary_dataset_type=None, - frequencies=None, custom_query=None, inheritance_mode='de_novo', inheritance_filter=None, - quality_filter=None, sort='xpos', sort_metadata=None, num_results=100, - sample_data=None, omit_sample_type=None): + def _test_minimal_search_call(self, search_body, num_results=100, sample_data=None, omit_sample_type=None): sample_data = sample_data or EXPECTED_SAMPLE_DATA if omit_sample_type: sample_data = {k: v for k, v in sample_data.items() if k != omit_sample_type} expected_search = { - 'requester_email': 'test_user@broadinstitute.org', 'sample_data': sample_data, 'genome_version': 'GRCh37', + 'num_results': num_results, + } + expected_search.update(search_body) + + executed_request = responses.calls[-1].request + self.assertEqual(executed_request.headers.get('From'), 'test_user@broadinstitute.org') + self.assertDictEqual(json.loads(executed_request.body), expected_search) + + def _test_expected_search_call(self, search_fields=None, gene_ids=None, intervals=None, exclude_intervals= None, + rs_ids=None, variant_ids=None, dataset_type=None, secondary_dataset_type=None, + frequencies=None, custom_query=None, inheritance_mode='de_novo', inheritance_filter=None, + quality_filter=None, sort='xpos', sort_metadata=None, **kwargs): + + expected_search = { 'sort': sort, 'sort_metadata': sort_metadata, - 'num_results': num_results, 'inheritance_mode': inheritance_mode, 'inheritance_filter': inheritance_filter or {}, 'dataset_type': dataset_type, @@ -75,21 +100,10 @@ def _test_expected_search_call(self, search_fields=None, gene_ids=None, interval } expected_search.update({field: self.search_model.search[field] for field in search_fields or []}) - request_body = json.loads(responses.calls[-1].request.body) - self.assertDictEqual(request_body, expected_search) + self._test_minimal_search_call(expected_search, **kwargs) @responses.activate def test_query_variants(self): - responses.add(responses.POST, f'{MOCK_HOST}:5000/search', status=400, body='Bad Search Error') - with self.assertRaises(HTTPError) as cm: - query_variants(self.results_model, user=self.user) - self.assertEqual(cm.exception.response.status_code, 400) - self.assertEqual(cm.exception.response.text, 'Bad Search Error') - - responses.add(responses.POST, f'{MOCK_HOST}:5000/search', status=200, json={ - 'results': PARSED_VARIANTS, 'total': 5, - }) - variants, total = query_variants(self.results_model, user=self.user) self.assertListEqual(variants, PARSED_VARIANTS) self.assertEqual(total, 5) @@ -167,21 +181,28 @@ def test_query_variants(self): freq_filter = {'callset': {'af': 0.1}, 'gnomad_genomes': {'af': 0.01, 'ac': 3, 'hh': 3}} custom_query = {'term': {'customFlag': 'flagVal'}} genotype_filter = {'genotype': {'I000001_na19675': 'ref_alt'}} - self.search_model.search = { + self.search_model.search = deepcopy({ 'inheritance': {'mode': 'any_affected', 'filter': genotype_filter}, 'freqs': freq_filter, 'qualityFilter': quality_filter, 'in_silico': {'cadd': '11.5', 'sift': 'D'}, 'customQuery': custom_query, - } + }) self.results_model.families.set(Family.objects.filter(guid='F000001_1')) query_variants(self.results_model, user=self.user, sort='prioritized_gene') + expected_freq_filter = {'seqr': freq_filter['callset'], 'gnomad_genomes': freq_filter['gnomad_genomes']} self._test_expected_search_call( inheritance_mode=None, inheritance_filter=genotype_filter, sample_data=FAMILY_1_SAMPLE_DATA, - search_fields=['in_silico'], frequencies=freq_filter, quality_filter=quality_filter, custom_query=custom_query, + search_fields=['in_silico'], frequencies=expected_freq_filter, quality_filter=quality_filter, custom_query=custom_query, sort='prioritized_gene', sort_metadata={'ENSG00000268903': 1, 'ENSG00000268904': 11}, ) + responses.add(responses.POST, f'{MOCK_HOST}:5000/search', status=400, body='Bad Search Error') + with self.assertRaises(HTTPError) as cm: + query_variants(self.results_model, user=self.user) + self.assertEqual(cm.exception.response.status_code, 400) + self.assertEqual(cm.exception.response.text, 'Bad Search Error') + @responses.activate def test_get_variant_query_gene_counts(self): responses.add(responses.POST, f'{MOCK_HOST}:5000/gene_counts', json=MOCK_COUNTS, status=200) @@ -190,3 +211,48 @@ def test_get_variant_query_gene_counts(self): self.assertDictEqual(gene_counts, MOCK_COUNTS) self.assert_cached_results({'gene_aggs': gene_counts}) self._test_expected_search_call(sort=None) + + @responses.activate + def test_get_single_variant(self): + variant = get_single_variant(self.families, '2-103343353-GAGA-G', user=self.user) + self.assertDictEqual(variant, PARSED_VARIANTS[0]) + self._test_minimal_search_call({ + 'variant_ids': [['2', 103343353, 'GAGA', 'G']], 'variant_keys': [], + }, num_results=1, sample_data=ALL_AFFECTED_SAMPLE_DATA, omit_sample_type='SV_WES') + + get_single_variant(self.families, 'prefix_19107_DEL', user=self.user) + self._test_minimal_search_call({ + 'variant_ids': [], 'variant_keys': ['prefix_19107_DEL'], + }, num_results=1, sample_data=EXPECTED_SAMPLE_DATA, omit_sample_type='VARIANTS') + + with self.assertRaises(InvalidSearchException) as cm: + get_single_variant(self.families, '2-103343353-GAGA-G', user=self.user, return_all_queried_families=True) + self.assertEqual( + str(cm.exception), + 'Unable to return all families for the following variants: 1-248367227-TC-T (F000002_2; F000005_5), 2-103343353-GAGA-G (F000005_5)', + ) + + get_single_variant(self.families.filter(guid='F000003_3'), '2-103343353-GAGA-G', user=self.user, return_all_queried_families=True) + self._test_minimal_search_call({ + 'variant_ids': [['2', 103343353, 'GAGA', 'G']], 'variant_keys': [], + }, num_results=1, sample_data={'VARIANTS': [FAMILY_3_SAMPLE]}) + + responses.add(responses.POST, f'{MOCK_HOST}:5000/search', status=200, json={'results': [], 'total': 0}) + with self.assertRaises(InvalidSearchException) as cm: + get_single_variant(self.families, '10-10334333-A-G', user=self.user) + self.assertEqual(str(cm.exception), 'Variant 10-10334333-A-G not found') + + @responses.activate + def test_get_variants_for_variant_ids(self): + variant_ids = ['2-103343353-GAGA-G', '1-248367227-TC-T', 'prefix-938_DEL'] + get_variants_for_variant_ids(self.families, variant_ids, user=self.user) + self._test_minimal_search_call({ + 'variant_ids': [['2', 103343353, 'GAGA', 'G'], ['1', 248367227, 'TC', 'T']], + 'variant_keys': ['prefix-938_DEL'], + }, num_results=3, sample_data=ALL_AFFECTED_SAMPLE_DATA) + + get_variants_for_variant_ids(self.families, variant_ids, user=self.user, dataset_type='VARIANTS') + self._test_minimal_search_call({ + 'variant_ids': [['2', 103343353, 'GAGA', 'G'], ['1', 248367227, 'TC', 'T']], + 'variant_keys': [], + }, num_results=2, sample_data=ALL_AFFECTED_SAMPLE_DATA, omit_sample_type='SV_WES') diff --git a/seqr/utils/search/search_utils_tests.py b/seqr/utils/search/search_utils_tests.py index 34213fb4a0..afb0816f98 100644 --- a/seqr/utils/search/search_utils_tests.py +++ b/seqr/utils/search/search_utils_tests.py @@ -59,15 +59,27 @@ def test_get_single_variant(self, mock_get_variants_for_ids): variant = get_single_variant(self.families, '2-103343353-GAGA-G', user=self.user) self.assertDictEqual(variant, PARSED_VARIANTS[0]) mock_get_variants_for_ids.assert_called_with( - mock.ANY, '37', ['2-103343353-GAGA-G'], self.user, return_all_queried_families=False, + mock.ANY, '37', {'2-103343353-GAGA-G': ('2', 103343353, 'GAGA', 'G')}, self.user, return_all_queried_families=False, ) - self.assertSetEqual(set(mock_get_variants_for_ids.call_args.args[0]), set(self.search_samples)) + expected_samples = { + s for s in self.search_samples if s.guid not in ['S000145_hg00731', 'S000146_hg00732', 'S000148_hg00733'] + } + self.assertSetEqual(set(mock_get_variants_for_ids.call_args.args[0]), expected_samples) get_single_variant(self.families, '2-103343353-GAGA-G', user=self.user, return_all_queried_families=True) mock_get_variants_for_ids.assert_called_with( - mock.ANY, '37', ['2-103343353-GAGA-G'], self.user, return_all_queried_families=True, + mock.ANY, '37', {'2-103343353-GAGA-G': ('2', 103343353, 'GAGA', 'G')}, self.user, return_all_queried_families=True, ) - self.assertSetEqual(set(mock_get_variants_for_ids.call_args.args[0]), set(self.search_samples)) + self.assertSetEqual(set(mock_get_variants_for_ids.call_args.args[0]), expected_samples) + + get_single_variant(self.families, 'prefix_19107_DEL', user=self.user) + mock_get_variants_for_ids.assert_called_with( + mock.ANY, '37', {'prefix_19107_DEL': None}, self.user, return_all_queried_families=False, + ) + expected_samples = { + s for s in self.search_samples if s.guid in ['S000145_hg00731', 'S000146_hg00732', 'S000148_hg00733'] + } + self.assertSetEqual(set(mock_get_variants_for_ids.call_args.args[0]), expected_samples) mock_get_variants_for_ids.return_value = [] with self.assertRaises(InvalidSearchException) as cm: @@ -77,14 +89,23 @@ def test_get_single_variant(self, mock_get_variants_for_ids): def test_get_variants_for_variant_ids(self, mock_get_variants_for_ids): variant_ids = ['2-103343353-GAGA-G', '1-248367227-TC-T', 'prefix-938_DEL'] get_variants_for_variant_ids(self.families, variant_ids, user=self.user) - mock_get_variants_for_ids.assert_called_with(mock.ANY, '37', variant_ids, self.user, dataset_type=None) + mock_get_variants_for_ids.assert_called_with(mock.ANY, '37', { + '2-103343353-GAGA-G': ('2', 103343353, 'GAGA', 'G'), + '1-248367227-TC-T': ('1', 248367227, 'TC', 'T'), + 'prefix-938_DEL': None, + }, self.user) self.assertSetEqual(set(mock_get_variants_for_ids.call_args.args[0]), set(self.search_samples)) get_variants_for_variant_ids( self.families, variant_ids, user=self.user, dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS) - mock_get_variants_for_ids.assert_called_with( - mock.ANY, '37', variant_ids, self.user, dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS) - self.assertSetEqual(set(mock_get_variants_for_ids.call_args.args[0]), set(self.search_samples)) + mock_get_variants_for_ids.assert_called_with(mock.ANY, '37', { + '2-103343353-GAGA-G': ('2', 103343353, 'GAGA', 'G'), + '1-248367227-TC-T': ('1', 248367227, 'TC', 'T'), + }, self.user) + expected_samples = { + s for s in self.search_samples if s.guid not in ['S000145_hg00731', 'S000146_hg00732', 'S000148_hg00733'] + } + self.assertSetEqual(set(mock_get_variants_for_ids.call_args.args[0]), expected_samples) @mock.patch('seqr.utils.search.utils.MAX_NO_LOCATION_COMP_HET_FAMILIES', 1) def _test_invalid_search_params(self, search_func): @@ -435,19 +456,13 @@ class HailSearchUtilsTests(TestCase, SearchUtilsTests): def setUp(self): self.set_up() - @mock.patch('seqr.utils.search.utils.ping_elasticsearch') + @mock.patch('seqr.utils.search.utils.get_hail_variants_for_variant_ids') def test_get_single_variant(self, mock_call): - with self.assertRaises(InvalidSearchException) as cm: - super(HailSearchUtilsTests, self).test_get_single_variant(mock_call) - self.assertEqual(str(cm.exception), 'Elasticsearch backend is disabled') - mock_call.assert_not_called() + super(HailSearchUtilsTests, self).test_get_single_variant(mock_call) - @mock.patch('seqr.utils.search.utils.ping_elasticsearch') + @mock.patch('seqr.utils.search.utils.get_hail_variants_for_variant_ids') def test_get_variants_for_variant_ids(self, mock_call): - with self.assertRaises(InvalidSearchException) as cm: - super(HailSearchUtilsTests, self).test_get_variants_for_variant_ids(mock_call) - self.assertEqual(str(cm.exception), 'Elasticsearch backend is disabled') - mock_call.assert_not_called() + super(HailSearchUtilsTests, self).test_get_variants_for_variant_ids(mock_call) @mock.patch('seqr.utils.search.utils.get_hail_variants') def test_query_variants(self, mock_call): diff --git a/seqr/utils/search/utils.py b/seqr/utils/search/utils.py index e17e9bb2fa..c7d6019db0 100644 --- a/seqr/utils/search/utils.py +++ b/seqr/utils/search/utils.py @@ -8,8 +8,8 @@ from seqr.utils.search.elasticsearch.constants import MAX_VARIANTS from seqr.utils.search.elasticsearch.es_utils import ping_elasticsearch, delete_es_index, get_elasticsearch_status, \ get_es_variants, get_es_variants_for_variant_ids, process_es_previously_loaded_results, process_es_previously_loaded_gene_aggs, \ - es_backend_enabled, ES_EXCEPTION_ERROR_MAP, ES_EXCEPTION_MESSAGE_MAP, ES_ERROR_LOG_EXCEPTIONS -from seqr.utils.search.hail_search_utils import get_hail_variants + es_backend_enabled, ping_kibana, ES_EXCEPTION_ERROR_MAP, ES_EXCEPTION_MESSAGE_MAP, ES_ERROR_LOG_EXCEPTIONS +from seqr.utils.search.hail_search_utils import get_hail_variants, get_hail_variants_for_variant_ids, ping_hail_backend from seqr.utils.gene_utils import parse_locus_list_items from seqr.utils.xpos_utils import get_xpos @@ -38,11 +38,13 @@ class InvalidSearchException(Exception): DATASET_TYPES_LOOKUP[ALL_DATA_TYPES] = [dt for dts in DATASET_TYPES_LOOKUP.values() for dt in dts] -def _no_backend_error(*args, **kwargs): - raise InvalidSearchException('Elasticsearch backend is disabled') +def _raise_search_error(error): + def _wrapped(*args, **kwargs): + raise InvalidSearchException(error) + return _wrapped -def backend_specific_call(es_func, other_func=_no_backend_error): +def backend_specific_call(es_func, other_func): if es_backend_enabled(): return es_func else: @@ -50,11 +52,15 @@ def backend_specific_call(es_func, other_func=_no_backend_error): def ping_search_backend(): - backend_specific_call(ping_elasticsearch)() + backend_specific_call(ping_elasticsearch, ping_hail_backend)() + + +def ping_search_backend_admin(): + backend_specific_call(ping_kibana, lambda: True)() def get_search_backend_status(): - return backend_specific_call(get_elasticsearch_status)() + return backend_specific_call(get_elasticsearch_status, _raise_search_error('Elasticsearch is disabled'))() def _get_filtered_search_samples(search_filter, active_only=True): @@ -68,16 +74,16 @@ def get_search_samples(projects, active_only=True): return _get_filtered_search_samples({'individual__family__project__in': projects}, active_only=active_only) -def _get_families_search_data(families, dataset_types=None): +def _get_families_search_data(families, dataset_type=None): samples = _get_filtered_search_samples({'individual__family__in': families}) if len(samples) < 1: raise InvalidSearchException('No search data found for families {}'.format( ', '.join([f.family_id for f in families]))) - if dataset_types: - samples = samples.filter(dataset_type__in=dataset_types) + if dataset_type: + samples = samples.filter(dataset_type__in=DATASET_TYPES_LOOKUP[dataset_type]) if not samples: - raise InvalidSearchException(f'Unable to search against dataset type "{dataset_types[0]}"') + raise InvalidSearchException(f'Unable to search against dataset type "{dataset_type}"') projects = Project.objects.filter(family__individual__sample__in=samples).values_list('genome_version', 'name') project_versions = defaultdict(set) @@ -99,12 +105,14 @@ def delete_search_backend_data(data_id): projects = set(active_samples.values_list('individual__family__project__name', flat=True)) raise InvalidSearchException(f'"{data_id}" is still used by: {", ".join(projects)}') - return backend_specific_call(delete_es_index)(data_id) + return backend_specific_call( + delete_es_index, _raise_search_error('Deleting indices is disabled for the hail backend'), + )(data_id) def get_single_variant(families, variant_id, return_all_queried_families=False, user=None): - variants = backend_specific_call(get_es_variants_for_variant_ids)( - *_get_families_search_data(families), [variant_id], user, return_all_queried_families=return_all_queried_families, + variants = _get_variants_for_variant_ids( + families, [variant_id], user, return_all_queried_families=return_all_queried_families, ) if not variants: raise InvalidSearchException('Variant {} not found'.format(variant_id)) @@ -112,8 +120,30 @@ def get_single_variant(families, variant_id, return_all_queried_families=False, def get_variants_for_variant_ids(families, variant_ids, dataset_type=None, user=None): - return backend_specific_call(get_es_variants_for_variant_ids)( - *_get_families_search_data(families), variant_ids, user, dataset_type=dataset_type, + return _get_variants_for_variant_ids(families, variant_ids, user, dataset_type=dataset_type) + + +def _get_variants_for_variant_ids(families, variant_ids, user, dataset_type=None, **kwargs): + parsed_variant_ids = {} + for variant_id in variant_ids: + try: + parsed_variant_ids[variant_id] = _parse_variant_id(variant_id) + except (KeyError, ValueError): + parsed_variant_ids[variant_id] = None + + if dataset_type: + parsed_variant_ids = { + k: v for k, v in parsed_variant_ids.items() + if (dataset_type == Sample.DATASET_TYPE_VARIANT_CALLS and v) or + (dataset_type != Sample.DATASET_TYPE_VARIANT_CALLS and not v) + } + elif all(v for v in parsed_variant_ids.values()): + dataset_type = Sample.DATASET_TYPE_VARIANT_CALLS + elif all(v is None for v in parsed_variant_ids.values()): + dataset_type = Sample.DATASET_TYPE_SV_CALLS + + return backend_specific_call(get_es_variants_for_variant_ids, get_hail_variants_for_variant_ids)( + *_get_families_search_data(families, dataset_type=dataset_type), parsed_variant_ids, user, **kwargs ) @@ -198,11 +228,11 @@ def _query_variants(search_model, user, previous_search_results, sort=None, num_ dataset_type, secondary_dataset_type = _search_dataset_type(parsed_search) parsed_search.update({'dataset_type': dataset_type, 'secondary_dataset_type': secondary_dataset_type}) - dataset_types = None + search_dataset_type = None if dataset_type and dataset_type != ALL_DATA_TYPES and (secondary_dataset_type is None or secondary_dataset_type == dataset_type): - dataset_types = DATASET_TYPES_LOOKUP[dataset_type] + search_dataset_type = dataset_type - samples, genome_version = _get_families_search_data(families, dataset_types=dataset_types) + samples, genome_version = _get_families_search_data(families, dataset_type=search_dataset_type) if parsed_search.get('inheritance'): samples = _parse_inheritance(parsed_search, samples, previous_search_results) @@ -265,17 +295,21 @@ def _parse_variant_items(search_json): else: try: variant_id = item.lstrip('chr') - chrom, pos, ref, alt = variant_id.split('-') - pos = int(pos) - get_xpos(chrom, pos) + parsed_variant_ids.append(_parse_variant_id(variant_id)) variant_ids.append(variant_id) - parsed_variant_ids.append((chrom, pos, ref, alt)) except (KeyError, ValueError): invalid_items.append(item) return rs_ids, variant_ids, parsed_variant_ids, invalid_items +def _parse_variant_id(variant_id): + chrom, pos, ref, alt = variant_id.split('-') + pos = int(pos) + get_xpos(chrom, pos) + return chrom, pos, ref, alt + + def _validate_sort(sort, families): if sort == PRIORITIZED_GENE_SORT and len(families) > 1: raise InvalidSearchException('Phenotype sort is only supported for single-family search.') diff --git a/seqr/utils/vcf_utils.py b/seqr/utils/vcf_utils.py index fd5779375d..5e1adaa4fc 100644 --- a/seqr/utils/vcf_utils.py +++ b/seqr/utils/vcf_utils.py @@ -8,14 +8,8 @@ BLOCK_SIZE = 65536 EXPECTED_META_FIELDS ={ - 'INFO': { - 'AC': 'Integer', - 'AN': 'Integer', - 'AF': 'Float' - }, 'FORMAT': { 'AD': 'Integer', - 'DP': 'Integer', 'GQ': 'Integer', 'GT': 'String' } diff --git a/seqr/views/apis/anvil_workspace_api.py b/seqr/views/apis/anvil_workspace_api.py index 14264d5875..43d9905501 100644 --- a/seqr/views/apis/anvil_workspace_api.py +++ b/seqr/views/apis/anvil_workspace_api.py @@ -28,7 +28,7 @@ from seqr.views.utils.file_utils import load_uploaded_file from seqr.views.utils.terra_api_utils import add_service_account, has_service_account_access, TerraAPIException, \ TerraRefreshTokenFailedException -from seqr.views.utils.pedigree_info_utils import parse_pedigree_table, JsonConstants +from seqr.views.utils.pedigree_info_utils import parse_basic_pedigree_table, JsonConstants from seqr.views.utils.individual_utils import add_or_update_individuals_and_families from seqr.utils.communication_utils import safe_post_to_slack, send_html_email from seqr.utils.file_utils import does_file_exist, mv_file_to_gs, get_gs_file_list @@ -192,7 +192,7 @@ def create_project_from_workspace(request, namespace, name): error = 'Field(s) "{}" are required'.format(', '.join(missing_fields)) return create_json_response({'error': error}, status=400, reason=error) - pedigree_records = _parse_uploaded_pedigree(request_json, request.user) + pedigree_records = _parse_uploaded_pedigree(request_json) # Create a new Project in seqr project_args = { @@ -232,7 +232,7 @@ def add_workspace_data(request, project_guid): error = 'Field(s) "{}" are required'.format(', '.join(missing_fields)) return create_json_response({'error': error}, status=400, reason=error) - pedigree_records = _parse_uploaded_pedigree(request_json, request.user) + pedigree_records = _parse_uploaded_pedigree(request_json) previous_samples = get_search_samples([project]).filter( dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS).prefetch_related('individual') @@ -257,11 +257,11 @@ def add_workspace_data(request, project_guid): return create_json_response(pedigree_json) -def _parse_uploaded_pedigree(request_json, user): +def _parse_uploaded_pedigree(request_json): # Parse families/individuals in the uploaded pedigree file json_records = load_uploaded_file(request_json['uploadedFileId']) - pedigree_records, _ = parse_pedigree_table( - json_records, 'uploaded pedigree file', user=user, fail_on_warnings=True, required_columns=[ + pedigree_records, _ = parse_basic_pedigree_table( + json_records, 'uploaded pedigree file', required_columns=[ JsonConstants.SEX_COLUMN, JsonConstants.AFFECTED_COLUMN, ]) diff --git a/seqr/views/apis/anvil_workspace_api_tests.py b/seqr/views/apis/anvil_workspace_api_tests.py index 3335bcae32..069da1053f 100644 --- a/seqr/views/apis/anvil_workspace_api_tests.py +++ b/seqr/views/apis/anvil_workspace_api_tests.py @@ -214,7 +214,7 @@ BAD_FORMAT_META = [ b'##FORMAT=\n', - b'##FORMAT=\n', + b'##FORMAT=\n', ] FORMAT_META = [ @@ -455,10 +455,8 @@ def test_validate_anvil_vcf(self, mock_subprocess, mock_file_logger, mock_utils_ response = self.client.post(url, content_type='application/json', data=json.dumps(REQUEST_BODY_GZ_DATA_PATH)) self.assertEqual(response.status_code, 400) self.assertListEqual(response.json()['errors'], [ - 'Missing required INFO field(s) AN', - 'Incorrect meta Type for INFO.AF - expected "Float", got "Integer"', - 'Missing required FORMAT field(s) GQ, GT', - 'Incorrect meta Type for FORMAT.DP - expected "Integer", got "String"' + 'Missing required FORMAT field(s) GT', + 'Incorrect meta Type for FORMAT.GQ - expected "Integer", got "String"' ]) # Test valid operations diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py index 8d77c0ea60..1e31c7bb8d 100644 --- a/seqr/views/apis/data_manager_api_tests.py +++ b/seqr/views/apis/data_manager_api_tests.py @@ -285,18 +285,19 @@ 'ENSG00000163092-2-167254166-167258349-*-psi3': { 'chrom': '2', 'start': 167254166, 'end': 167258349, 'strand': '*', 'type': 'psi3', 'p_value': 1.56e-25, 'z_score': -4.9, 'delta_psi': -0.46, 'read_count': 166, 'gene_id': 'ENSG00000163092', - 'rare_disease_samples_with_junction': 1, 'rare_disease_samples_total': 20 + 'rare_disease_samples_with_junction': 1, 'rare_disease_samples_total': 20, 'rank': 1, }, 'ENSG00000106554-7-132885746-132975168-*-psi5': { 'chrom': '7', 'start': 132885746, 'end': 132975168, 'strand': '*', 'type': 'psi5', 'p_value': 1.08e-56, 'z_score': -6.53, 'delta_psi': -0.85, 'read_count': 231, 'gene_id': 'ENSG00000106554', - 'rare_disease_samples_with_junction': 1, 'rare_disease_samples_total': 20}, + 'rare_disease_samples_with_junction': 1, 'rare_disease_samples_total': 20, 'rank': 0, + }, } SAMPLE_GENE_SPLICE_DATA2 = { 'ENSG00000163092-2-167258096-167258349-*-psi3': { 'chrom': '2', 'start': 167258096, 'end': 167258349, 'strand': '*', 'type': 'psi3', 'p_value': 1.56e-25, 'z_score': 6.33, 'delta_psi': 0.45, 'read_count': 143, 'gene_id': 'ENSG00000163092', - 'rare_disease_samples_with_junction': 1, 'rare_disease_samples_total': 20 + 'rare_disease_samples_with_junction': 1, 'rare_disease_samples_total': 20, 'rank': 0, } } RNA_OUTLIER_SAMPLE_DATA = [ @@ -412,7 +413,7 @@ def test_elasticsearch_status(self): with mock.patch('seqr.utils.search.elasticsearch.es_utils.ELASTICSEARCH_SERVICE_HOSTNAME', ''): response = self.client.get(url) self.assertEqual(response.status_code, 400) - self.assertEqual(response.json()['error'], 'Elasticsearch backend is disabled') + self.assertEqual(response.json()['error'], 'Elasticsearch is disabled') @mock.patch('seqr.utils.search.elasticsearch.es_utils.ELASTICSEARCH_SERVICE_HOSTNAME', 'testhost') @urllib3_responses.activate @@ -446,7 +447,7 @@ def test_delete_index(self): with mock.patch('seqr.utils.search.elasticsearch.es_utils.ELASTICSEARCH_SERVICE_HOSTNAME', ''): response = self.client.post(url, content_type='application/json', data=json.dumps({'index': 'unused_index'})) self.assertEqual(response.status_code, 400) - self.assertEqual(response.json()['error'], 'Elasticsearch backend is disabled') + self.assertEqual(response.json()['error'], 'Deleting indices is disabled for the hail backend') @mock.patch('seqr.utils.file_utils.subprocess.Popen') def test_upload_qc_pipeline_output(self, mock_subprocess): @@ -743,11 +744,11 @@ def test_kibana_proxy(self): 'write_data': {'NA20870\t\t{"ENSG00000163092-2-167258096-167258349-*-psi3": {"chrom": "2", "start": 167258096,' ' "end": 167258349, "strand": "*", "type": "psi3", "p_value": 1.56e-25, "z_score": 6.33,' ' "delta_psi": 0.45, "read_count": 143, "gene_id": "ENSG00000163092",' - ' "rare_disease_samples_with_junction": 1, "rare_disease_samples_total": 20}}\n', + ' "rare_disease_samples_with_junction": 1, "rare_disease_samples_total": 20, "rank": 0}}\n', 'NA20870\t\t{"ENSG00000163093-2-167258096-167258349-*-psi3": {"chrom": "2", "start": 167258096,' ' "end": 167258349, "strand": "*", "type": "psi3", "p_value": 1.56e-25, "z_score": 6.33,' ' "delta_psi": 0.45, "read_count": 143, "gene_id": "ENSG00000163093",' - ' "rare_disease_samples_with_junction": 1, "rare_disease_samples_total": 20}}\n', + ' "rare_disease_samples_with_junction": 1, "rare_disease_samples_total": 20, "rank": 0}}\n', }, 'new_data': [ # existing sample NA19675_1 diff --git a/seqr/views/apis/dataset_api_tests.py b/seqr/views/apis/dataset_api_tests.py index a63e24593a..e11f3dab41 100644 --- a/seqr/views/apis/dataset_api_tests.py +++ b/seqr/views/apis/dataset_api_tests.py @@ -103,13 +103,17 @@ def test_add_variants_dataset(self, mock_send_email, mock_send_slack, mock_rando ) self.assertDictEqual(response_json['individualsByGuid'], { 'I000002_na19678': {'sampleGuids': mock.ANY}, - 'I000003_na19679': {'sampleGuids': ['S000153_na19679', existing_sample_guid]}, + 'I000003_na19679': {'sampleGuids': mock.ANY}, 'I000013_na20878': {'sampleGuids': [new_sample_guid]}, }) self.assertSetEqual( set(response_json['individualsByGuid']['I000002_na19678']['sampleGuids']), {replaced_sample_guid, existing_old_index_sample_guid} ) + self.assertSetEqual( + set(response_json['individualsByGuid']['I000003_na19679']['sampleGuids']), + {'S000153_na19679', existing_sample_guid} + ) self.assertDictEqual(response_json['familiesByGuid'], { 'F000001_1': {'analysisStatus': 'I'}, @@ -311,7 +315,7 @@ def test_add_variants_dataset_errors(self): with mock.patch('seqr.utils.search.elasticsearch.es_utils.ELASTICSEARCH_SERVICE_HOSTNAME', ''): response = self.client.post(url, content_type='application/json', data=ADD_DATASET_PAYLOAD) self.assertEqual(response.status_code, 400) - self.assertEqual(response.json()['error'], 'Elasticsearch backend is disabled') + self.assertEqual(response.json()['errors'][0], 'Adding samples is disabled for the hail backend') response = self.client.post(url, content_type='application/json', data=ADD_DATASET_PAYLOAD) self.assertEqual(response.status_code, 400) diff --git a/seqr/views/apis/individual_api_tests.py b/seqr/views/apis/individual_api_tests.py index 0307379631..cede8d7757 100644 --- a/seqr/views/apis/individual_api_tests.py +++ b/seqr/views/apis/individual_api_tests.py @@ -1,10 +1,13 @@ # -*- coding: utf-8 -*- +import datetime import json import mock from copy import deepcopy from django.core.files.uploadedfile import SimpleUploadedFile from django.urls.base import reverse +from io import BytesIO +from openpyxl import load_workbook from seqr.models import Individual from seqr.views.apis.individual_api import edit_individuals_handler, update_individual_handler, \ @@ -354,12 +357,10 @@ def test_delete_individuals(self, mock_pm_group): })) self.assertEqual(response.status_code, 200 if self.HAS_EXTERNAL_PROJECT_ACCESS else 403) - @mock.patch('seqr.views.utils.permissions_utils.PM_USER_GROUP', 'project-managers') - def test_individuals_table_handler(self): + def test_individuals_table_handler_errors(self): individuals_url = reverse(receive_individuals_table_handler, args=[PROJECT_GUID]) self.check_manager_login(individuals_url) - # send invalid requests response = self.client.get(individuals_url) self.assertEqual(response.status_code, 400) self.assertDictEqual(response.json(), {'errors': ['Received 0 files instead of 1'], 'warnings': []}) @@ -371,6 +372,27 @@ def test_individuals_table_handler(self): self.assertEqual(len(errors), 1) self.assertEqual(errors[0], 'Missing required columns: Individual Id') + response = self.client.post(individuals_url, {'f': SimpleUploadedFile( + 'test.tsv', 'Family ID Individual ID\n"" """"'.encode('utf-8'))}) + self.assertEqual(response.status_code, 400) + self.assertDictEqual(response.json(), {'warnings': None, 'errors': [ + 'Missing Family Id in row #1', 'Missing Individual Id in row #1', + ]}) + + response = self.client.post(individuals_url, {'f': SimpleUploadedFile( + 'test.tsv', '#Some comments\n#Family ID #Individual ID Previous Individual ID\n"1" "NA19675_1"""'.encode('utf-8'))}) + self.assertEqual(response.status_code, 400) + self.assertDictEqual(response.json(), {'warnings': [], 'errors': [ + 'Error while parsing file: test.tsv. Row 1 contains 2 columns: 1, NA19675_1, while header contains 3: Family ID, Individual ID, Previous Individual ID', + ]}) + + response = self.client.post(individuals_url, {'f': SimpleUploadedFile( + 'test.tsv', 'Family ID Individual ID Previous Individual ID\n"1" "NA19675_1" "NA19675"\n"2" "NA19675_1" ""'.encode('utf-8'))}) + self.assertEqual(response.status_code, 400) + self.assertDictEqual(response.json(), { + 'errors': ['NA19675_1 is included as 2 separate records, but must be unique within the project'], 'warnings': [] + }) + response = self.client.post(individuals_url, {'f': SimpleUploadedFile( 'test.tsv', 'Family ID Individual ID Previous Individual ID\n"1" "NA19675_1" "NA19675"'.encode('utf-8'))}) self.assertEqual(response.status_code, 400) @@ -379,22 +401,55 @@ def test_individuals_table_handler(self): }) response = self.client.post(individuals_url, {'f': SimpleUploadedFile( - 'test.tsv', 'Family ID Individual ID Paternal ID\n"1" "NA19675_1" "NA19678_dad"'.encode('utf-8'))}) + 'test.tsv', 'Family ID Individual ID affected sex proband_relation\n"1" "NA19675_1" "no" "boy" "mom"'.encode('utf-8'))}) + self.assertEqual(response.status_code, 400) + self.assertDictEqual(response.json(), {'warnings': None, 'errors': [ + 'Invalid value "no" for Affected in row #1', + 'Invalid value "boy" for Sex in row #1', + 'Invalid value "mom" for Proband Relationship in row #1', + ]}) + + rows = [ + 'Family ID Individual ID Paternal ID sex proband_relation', + '"1" "NA19675_1" "NA19678_dad" "" ""', + ] + response = self.client.post(individuals_url, { + 'f': SimpleUploadedFile('test.tsv', '\n'.join(rows).encode('utf-8'))}) + self.assertEqual(response.status_code, 400) + missing_entry_warning = "NA19678_dad is the father of NA19675_1 but is not included. Make sure to create an additional record with NA19678_dad as the Individual ID" + self.assertDictEqual(response.json(), { + 'errors': [missing_entry_warning], + 'warnings': [], + }) + + rows += [ + '"1" "NA19675_1" "NA19675_1" "F" "Father"', + '"2" "NA19675_2" "NA19675_1" "M" ""', + ] + response = self.client.post(individuals_url, { + 'f': SimpleUploadedFile('test.tsv', '\n'.join(rows).encode('utf-8'))}) self.assertEqual(response.status_code, 400) self.assertDictEqual(response.json(), { 'errors': [ - "NA19678_dad is the father of NA19675_1 but is not included. Make sure to create an additional record with NA19678_dad as the Individual ID", + 'Invalid proband relationship "Father" for NA19675_1 with given gender Female', + 'NA19675_1 is recorded as their own father', + 'NA19675_1 is recorded as Female and also as the father of NA19675_1', + 'NA19675_1 is recorded as Female and also as the father of NA19675_2', + 'NA19675_1 is recorded as the father of NA19675_2 but they have different family ids: 1 and 2', + 'NA19675_1 is included as 2 separate records, but must be unique within the project', ], - 'warnings': [], + 'warnings': [missing_entry_warning], }) - # send valid requests + @mock.patch('seqr.views.utils.permissions_utils.PM_USER_GROUP', 'project-managers') + def test_individuals_table_handler(self): + individuals_url = reverse(receive_individuals_table_handler, args=[PROJECT_GUID]) + self.check_manager_login(individuals_url) + data = 'Family ID Individual ID Previous Individual ID Paternal ID Maternal ID Sex Affected Status Notes familyNotes\n\ "1" "NA19675" "NA19675_1" "NA19678" "NA19679" "Female" "Affected" "A affected individual, test1-zsf" ""\n\ "1" "NA19678" "" "" "" "Male" "Unaffected" "a individual note" ""\n\ -"1" "NA19678" "" "" "" "Male" "Unaffected" "a individual note" ""\n\ -"21" "HG00735" "" "" "" "Female" "Unaffected" "" "a new family""\n\ -"21" "HG00735" "" "" "" "Female" "Unaffected" "" ""' +"21" "HG00735" "" "" "" "Female" "Unaffected" "" "a new family""' f = SimpleUploadedFile("1000_genomes demo_individuals.tsv", data.encode('utf-8')) @@ -458,6 +513,309 @@ def test_individuals_table_handler(self): response = self.client.post(save_url) self.assertEqual(response.status_code, 200) + @mock.patch('seqr.views.utils.permissions_utils.PM_USER_GROUP', 'project-managers') + @mock.patch('seqr.views.utils.pedigree_info_utils.NO_VALIDATE_MANIFEST_PROJECT_CATEGORIES') + @mock.patch('seqr.utils.communication_utils.EmailMultiAlternatives') + def test_individuals_sample_manifest_table_handler(self, mock_email, mock_no_validate_categories): + receive_url = reverse(receive_individuals_table_handler, args=[PROJECT_GUID]) + self.check_manager_login(receive_url) + + def _send_request_data(data): + return self.client.post(receive_url, {'f': SimpleUploadedFile( + 'sample_manifest.tsv', '\n'.join(['\t'.join(row) for row in data]).encode('utf-8')), + }) + + header_2 = [ + 'Kit ID', 'Well', 'Sample ID', 'Family ID', 'Alias', 'Alias', 'Paternal Sample ID', 'Maternal Sample ID', + 'Gender', 'Affected Status', 'Primary Biosample', 'Analyte Type', 'Tissue Affected Status', 'Recontactable', + 'Volume', 'Concentration', 'Notes', 'MONDO Label', 'MONDO ID', 'Consent Code', 'Data Use Restrictions'] + header_3 = [ + '', 'Position', '', '', 'Collaborator Participant ID', 'Collaborator Sample ID', '', '', '', '', '', '', + '(i.e yes, no)', '(i.e yes, no, unknown)', 'ul', 'ng/ul', '', '', '(i.e. "MONDO:0031632")', '', + 'indicate study/protocol number'] + data = [ + ['Do not modify - Broad use', '', '', 'Please fill in columns D - T', '', '', '', '', '', '', '', '', '', + '', '', '', '', '', '', '', ''], + header_2, header_3, + ] + + response = _send_request_data(data) + self.assertEqual(response.status_code, 400) + self.assertDictEqual(response.json(), { + 'errors': ['Unsupported file format'], 'warnings': [], + }) + + data[1] = header_2[:5] + header_2[7:10] + header_2[14:17] + ['Coded Phenotype'] + header_2[19:] + response = _send_request_data(data) + self.assertDictEqual(response.json(), { + 'errors': ['Unsupported file format'], 'warnings': [], + }) + + self.login_pm_user() + response = _send_request_data(data) + self.assertEqual(response.status_code, 400) + self.assertDictEqual(response.json(), {'warnings': [], 'errors': [ + 'Expected vs. actual header columns: | ' + 'Sample ID| Family ID| Alias|-Alias|-Paternal Sample ID| Maternal Sample ID| Gender| Affected Status|' + '-Primary Biosample|-Analyte Type|-Tissue Affected Status|-Recontactable| Volume| Concentration| Notes|' + '-MONDO Label|-MONDO ID|+Coded Phenotype| Consent Code| Data Use Restrictions', + ]}) + + data[1] = header_2 + data[2] = header_3[:4] + header_3[5:10] + header_3[14:18] + header_3[-1:] + response = _send_request_data(data) + self.assertEqual(response.status_code, 400) + self.assertDictEqual(response.json(), {'warnings': [], 'errors': [ + 'Expected vs. actual header columns: |-Collaborator Participant ID| Collaborator Sample ID|+', + ]}) + + data[2] = header_3 + data += [ + ['SK-3QVD', 'A02', 'SM-IRW6C', 'PED073', 'SCO_PED073B_GA0339', 'SCO_PED073B_GA0339_1', '', '', 'male', + 'unaffected', 'UBERON:0000479 (tissue)', 'blood plasma', '', 'Unknown', '20', '94.8', 'probably dad', '', + '', 'GMB', '1234'], + ['SK-3QVD', 'A03', 'SM-IRW69', 'PED073', 'SCO_PED073C_GA0340', 'SCO_PED073C_GA0340_1', + 'SCO_PED073B_GA0339_1', 'SCO_PED073A_GA0338_1', 'female', 'affected', 'UBERON:0002371 (bone marrow)', + 'DNA', 'Yes', 'No', '20', '98', '', 'Perinatal death', 'MONDO:0100086', 'HMB', '', + ], + ['SK-3QVD', 'A04', 'SM-IRW61', 'PED073', 'SCO_PED073C_GA0341', 'SCO_PED073C_GA0341_1', + 'SCO_PED073B_GA0339_1', '', 'male', 'affected', 'UBERON:0002371 (bone marrow)', + 'RNA', 'No', 'No', '17', '83', 'half sib', 'Perinatal death', 'MONDO:0100086', '', '', + ]] + + expected_warning = 'SCO_PED073A_GA0338_1 is the mother of SCO_PED073C_GA0340_1 but is not included. ' \ + 'Make sure to create an additional record with SCO_PED073A_GA0338_1 as the Individual ID' + missing_columns_error = 'SCO_PED073B_GA0339_1 is missing the following required columns: MONDO ID, MONDO Label, Tissue Affected Status' + response = _send_request_data(data) + self.assertDictEqual(response.json(), {'warnings': [expected_warning], 'errors': [ + missing_columns_error, 'Multiple consent codes specified in manifest: GMB, HMB', + ]}) + + data[4][-2] = 'GMB' + mock_no_validate_categories.resolve_expression.return_value = ['Not-used category'] + response = _send_request_data(data) + self.assertEqual(response.status_code, 400) + self.assertDictEqual(response.json(), {'warnings': [expected_warning], 'errors': [ + missing_columns_error, 'Consent code in manifest "GMB" does not match project consent code "HMB"', + ]}) + + data[3][12] = 'No' + data[3][17] = 'microcephaly' + data[3][18] = 'MONDO:0001149' + data[3][-2] = '' + data[4][-2] = 'HMB' + response = _send_request_data(data) + self.assertEqual(response.status_code, 400) + self.assertDictEqual(response.json(), {'warnings': [], 'errors': [expected_warning]}) + + data[4][7] = '' + response = _send_request_data(data) + self.assertEqual(response.status_code, 200) + response_json = response.json() + self.assertDictEqual(response_json, {'uploadedFileId': mock.ANY, 'warnings': [], 'errors': [], 'info': [ + '1 families, 3 individuals parsed from sample_manifest.tsv', + '1 new families, 3 new individuals will be added to the project', + '0 existing individuals will be updated', + ]}) + + mock_email.assert_called_with( + subject='SK-3QVD Merged Sample Pedigree File', + body=mock.ANY, + to=['test_pm_user@test.com'], + attachments=[ + ('SK-3QVD.xlsx', mock.ANY, + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"), + ('sample_manifest.xlsx', mock.ANY, + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"), + ]) + self.assertEqual( + mock_email.call_args.kwargs['body'], + '\n'.join([ + 'User test_pm_user@test.com just uploaded pedigree info to 1kg project n\xe5me with uni\xe7\xf8de.This email has 2 attached files:', + ' ', ' SK-3QVD.xlsx is the sample manifest file in a format that can be sent to GP.', ' ', + ' sample_manifest.tsv is the original merged pedigree-sample-manifest file that the user uploaded.', ' ', + ])) + mock_email.return_value.attach_alternative.assert_called_with( + """User test_pm_user@test.com just uploaded pedigree info to 1kg project n\xe5me with uni\xe7\xf8de.
This email has 2 attached files:
+
+ SK-3QVD.xlsx is the sample manifest file in a format that can be sent to GP.
+
+ sample_manifest.tsv is the original merged pedigree-sample-manifest file that the user uploaded.
+ """, 'text/html') + mock_email.return_value.send.assert_called() + + # Test sent sample manifest is correct + sample_wb = load_workbook(BytesIO(mock_email.call_args.kwargs['attachments'][0][1])) + sample_ws = sample_wb.active + sample_ws.title = 'Sample Info' + self.assertListEqual( + [[cell.value or '' for cell in row] for row in sample_ws], + [['Well', 'Sample ID', 'Alias', 'Alias', 'Gender', 'Volume', 'Concentration'], + ['Position', '', 'Collaborator Participant ID', 'Collaborator Sample ID', '', 'ul', 'ng/ul'], + ['A02', 'SM-IRW6C', 'SCO_PED073B_GA0339', 'SCO_PED073B_GA0339_1', 'male', '20', '94.8'], + ['A03', 'SM-IRW69', 'SCO_PED073C_GA0340', 'SCO_PED073C_GA0340_1', 'female', '20', '98'], + ['A04', 'SM-IRW61', 'SCO_PED073C_GA0341', 'SCO_PED073C_GA0341_1', 'male', '17', '83']]) + + # Test original file copy is correct + original_wb = load_workbook(BytesIO(mock_email.call_args.kwargs['attachments'][1][1])) + original_ws = original_wb.active + self.assertListEqual([[cell.value or '' for cell in row] for row in original_ws], data) + + url = reverse(save_individuals_table_handler, args=[PROJECT_GUID, response_json['uploadedFileId']]) + response = self.client.post(url) + self.assertEqual(response.status_code, 200) + response_json = response.json() + self.assertSetEqual(set(response_json.keys()), {'individualsByGuid', 'familiesByGuid'}) + self.assertEqual(len(response_json['familiesByGuid']), 1) + family_guid = next(iter(response_json['familiesByGuid'].keys())) + self.assertEqual(response_json['familiesByGuid'][family_guid]['familyId'], 'PED073') + self.assertEqual(response_json['familiesByGuid'][family_guid]['codedPhenotype'], 'Perinatal death') + self.assertEqual(response_json['familiesByGuid'][family_guid]['mondoId'], 'MONDO:0100086') + self.assertSetEqual(set( + response_json['familiesByGuid'][family_guid]['individualGuids']), + set(response_json['individualsByGuid'].keys()) + ) + self.assertSetEqual({i['familyGuid'] for i in response_json['individualsByGuid'].values()}, {family_guid}) + self.assertEqual(len(response_json['individualsByGuid']), 3) + test_keys = { + 'affected', 'sex', 'notes', 'probandRelationship', 'primaryBiosample', 'analyteType', 'tissueAffectedStatus', + 'maternalId', 'paternalId'} + indiv_1 = next(i for i in response_json['individualsByGuid'].values() if i['individualId'] == 'SCO_PED073B_GA0339_1') + self.assertDictEqual({k: v for k, v in indiv_1.items() if k in test_keys}, { + 'affected': 'N', 'notes': 'probably dad', 'sex': 'M', 'maternalId': None, 'paternalId': None, + 'primaryBiosample': 'T', 'analyteType': 'B', 'tissueAffectedStatus': False, + 'probandRelationship': 'F', + }) + indiv_2 = next(i for i in response_json['individualsByGuid'].values() if i['individualId'] == 'SCO_PED073C_GA0341_1') + self.assertDictEqual({k: v for k, v in indiv_2.items() if k in test_keys}, { + 'affected': 'A', 'notes': 'half sib', 'sex': 'M', 'maternalId': None, 'paternalId': 'SCO_PED073B_GA0339_1', + 'primaryBiosample': 'BM', 'analyteType': 'R', 'tissueAffectedStatus': False, + 'probandRelationship': 'J', + }) + indiv_3 = next(i for i in response_json['individualsByGuid'].values() if i['individualId'] == 'SCO_PED073C_GA0340_1') + self.assertDictEqual({k: v for k, v in indiv_3.items() if k in test_keys}, { + 'affected': 'A', 'notes': None, 'sex': 'F', 'maternalId': None, 'paternalId': 'SCO_PED073B_GA0339_1', + 'primaryBiosample': 'BM', 'analyteType': 'D', 'tissueAffectedStatus': True, 'probandRelationship': 'S', + }) + self.assertEqual(indiv_2['paternalGuid'], indiv_1['individualGuid']) + self.assertEqual(indiv_3['paternalGuid'], indiv_1['individualGuid']) + + @mock.patch('seqr.views.utils.pedigree_info_utils.date') + def test_individuals_datastat_table_handler(self, mock_date): + mock_date.today.return_value = datetime.date(2020, 1, 1) + + receive_url = reverse(receive_individuals_table_handler, args=[PROJECT_GUID]) + self.check_manager_login(receive_url) + + data = [['participant_guid', 'familyId', 'RELATIONSHIP', 'RELATIONSHIP_OTHER_DETAILS', 'WEBSITE', 'DESCRIPTION', 'CLINICAL_DIAGNOSES', 'CLINICAL_DIAGNOSES_DETAILS', 'GENETIC_DIAGNOSES', 'GENETIC_DIAGNOSES_DETAILS', 'FIND_OUT_DOCTOR_DETAILS', 'PATIENT_AGE', 'CONDITION_AGE', 'PATIENT_DECEASED', 'DECEASED_AGE', 'DECEASED_CAUSE', 'DECEASED_DNA', 'PATIENT_SEX', 'RACE', 'ETHNICITY', 'DOCTOR_TYPES', 'DOCTOR_TYPES_OTHER_DETAILS', 'TESTS', 'TESTS_MICROARRAY_YEAR', 'TESTS_MICROARRAY_LAB', 'TESTS_MICROARRAY_FAMILY', 'TESTS_MICROARRAY_FAMILY_OTHER_DETAILS', 'TESTS_WEXOME_YEAR', 'TESTS_WEXOME_LAB', 'TESTS_WEXOME_FAMILY', 'TESTS_WEXOME_FAMILY_OTHER_DETAILS', 'TESTS_WGENOME_YEAR', 'TESTS_WGENOME_LAB', 'TESTS_WGENOME_FAMILY', 'TESTS_WGENOME_FAMILY_OTHER_DETAILS', 'TESTS_OTHER_DETAILS', 'BIOPSY', 'BIOPSY_OTHER_DETAILS', 'OTHER_STUDIES', 'OTHER_STUDIES_DESCRIBE', 'EXPECT_RESULTS', 'MOTHER_SAME_CONDITION', 'MOTHER_CONDITION_AGE', 'MOTHER_RACE', 'MOTHER_ETHNICITY', 'MOTHER_CAN_PARTICIPATE', 'MOTHER_DECEASED', 'MOTHER_DECEASED_DNA', 'FATHER_SAME_CONDITION', 'FATHER_CONDITION_AGE', 'FATHER_RACE', 'FATHER_ETHNICITY', 'FATHER_CAN_PARTICIPATE', 'FATHER_DECEASED', 'FATHER_DECEASED_DNA', 'NO_SIBLINGS', 'SIBLING', 'NO_CHILDREN', 'CHILD', 'NO_RELATIVE_AFFECTED', 'RELATIVE', 'FAMILY_INFO'], + ['1518231365', '123', 'OTHER', 'Grandchild', 'wwww.myblog.com', 'I have a really debilitating probably genetic condition. I\xe2ve seen many specialists.', 'YES', 'SMA\xe2s', 'YES', 'Dwarfism\xe2', 'Dr John Smith', '34', '21', 'YES', '33', 'heart attack', 'NO', 'MALE', 'WHITE,ASIAN,PACIFIC', 'NOT_HISPANIC', 'CLIN_GEN,NEURO,CARDIO,OTHER', 'Pediatrician', 'SINGLE_GENE,GENE_PANEL,WEXOME,WGENOME,OTHER', '', '', '', '', '2018', 'UDN\xe2s lab', 'PARENT,AUNT_UNCLE,NIECE_NEPHEW,OTHER', 'Grandmother', '', '', '', 'Grandmother', 'Blood work', 'MUSCLE,SKIN,OTHER', 'Bone\xe2s', 'YES', 'Undiagnosed Diseases Network', 'NO', 'YES', '19', 'WHITE,ASIAN', 'NOT_HISPANIC', 'YES', '', '', 'NO', '', '', 'BLACK', 'PREFER_NOT_ANSWER', 'YES', 'NO', '', '[{"SIBLING_SEX":"FEMALE","SIBLING_AGE":"21","SIBLING_RACE":"WHITE","SIBLING_ETHNICITY":"NOT_HISPANIC","SIBLING_SAME_CONDITION":"YES","SIBLING_CONDITION_AGE":null,"SIBLING_CAN_PARTICIPATE":"NO"},{"SIBLING_SEX":"","SIBLING_AGE":"17","SIBLING_RACE": "WHITE","SIBLING_ETHNICITY":"NOT_HISPANIC","SIBLING_SAME_CONDITION":"","SIBLING_CONDITION_AGE":"","SIBLING_CAN_PARTICIPATE":"YES"}]', 'YES', '', 'NO', '[{"RELATIVE_SEX":"MALE","RELATIVE_AGE":"44","RELATIVE_RACE": "WHITE", "RELATIVE_ETHNICITY":"NOT_HISPANIC","RELATIVE_CONDITION_AGE":null,"RELATIVE_CAN_PARTICIPATE":null}]', 'patient\xe2s uncle (dads brother) died from Fahrs disease at 70'], + ['b392fd78b440', '987', 'ADULT_CHILD', 'Grandchild', '', '', 'UNSURE', 'SMA', 'NO', 'Dwarfism', '', '47', '2', '', '33', 'heart attack', 'NO', 'PREFER_NOT_ANSWER', 'WHITE', 'UNKNOWN', '', 'Pediatrician', 'NOT_SURE,MICROARRAY,WEXOME', '', '', '', '', '2018', 'UDN', 'PARENT,AUNT_UNCLE,OTHER', 'Grandmother', '', '', '', 'Grandmother', 'Blood work', 'NONE', '', 'NO', 'Undiagnosed Diseases Network', 'NO', 'UNSURE', '19', '', 'UNKNOWN', 'NO', 'UNSURE', '', '', '', '', '', '', '', 'YES', 'YES', '[{"SIBLING_SEX":"FEMALE","SIBLING_AGE":"21","SIBLING_RACE":"WHITE","SIBLING_ETHNICITY":"NOT_HISPANIC","SIBLING_SAME_CONDITION":"YES","SIBLING_CONDITION_AGE":null,"SIBLING_CAN_PARTICIPATE":"NO"}]', 'NO', '[{"CHILD_SEX":"MALE","CHILD_AGE":"12","CHILD_RACE":"WHITE","CHILD_ETHNICITY":"NOT_HISPANIC","CHILD_SAME_CONDITION":"NO","CHILD_CONDITION_AGE":null,"CHILD_CAN_PARTICIPATE":"UNSURE"}]', 'YES', '', '']] + response = self.client.post(receive_url, {'f': SimpleUploadedFile( + 'datstat.tsv', '\n'.join(['\t'.join(row) for row in data]).encode('utf-8')), + }) + self.assertEqual(response.status_code, 200) + response_json = response.json() + self.assertDictEqual(response_json, {'uploadedFileId': mock.ANY, 'warnings': [], 'errors': [], 'info': [ + '2 families, 6 individuals parsed from datstat.tsv', + '2 new families, 6 new individuals will be added to the project', + '0 existing individuals will be updated', + ]}) + + url = reverse(save_individuals_table_handler, args=[PROJECT_GUID, response_json['uploadedFileId']]) + response = self.client.post(url) + self.assertEqual(response.status_code, 200) + response_json = response.json() + self.assertSetEqual(set(response_json.keys()), {'individualsByGuid', 'familiesByGuid', 'familyNotesByGuid'}) + self.assertEqual(len(response_json['familiesByGuid']), 2) + RGP_123_guid = next(f for f in response_json['familiesByGuid'].values() if f['familyId'] == 'RGP_123')['familyGuid'] + RGP_987_guid = next(f for f in response_json['familiesByGuid'].values() if f['familyId'] == 'RGP_987')['familyGuid'] + self.assertEqual(len(response_json['individualsByGuid']), 6) + individuals = sorted(response_json['individualsByGuid'].values(), key=lambda i: i['individualId']) + self.assertListEqual( + [i['individualId'] for i in individuals], + ['RGP_123_1', 'RGP_123_2', 'RGP_123_3', 'RGP_987_1', 'RGP_987_2', 'RGP_987_3'], + ) + self.assertSetEqual({i['familyGuid'] for i in individuals[:3]}, {RGP_123_guid}) + self.assertSetEqual({i['familyGuid'] for i in individuals[3:]}, {RGP_987_guid}) + self.assertListEqual([i['sex'] for i in individuals], ['F', 'M', 'M', 'F', 'M', 'U']) + self.assertListEqual([i['affected'] for i in individuals], ['N', 'N', 'A', 'N', 'N', 'A']) + parents = individuals[:2] + individuals[3:5] + no_parent_fields = [ + 'maternalId', 'paternalId', 'maternalGuid', 'paternalGuid', 'maternalEthnicity', 'paternalEthnicity', + 'birthYear', 'deathYear', 'onsetAge', 'affectedRelatives', + ] + for no_parent_field in no_parent_fields: + self.assertSetEqual({i[no_parent_field] for i in parents}, {None}) + + self.assertDictEqual({k: v for k, v in individuals[2].items() if k in no_parent_fields}, { + 'maternalGuid': individuals[0]['individualGuid'], 'paternalGuid': individuals[1]['individualGuid'], + 'maternalId': 'RGP_123_1', 'paternalId': 'RGP_123_2', 'paternalEthnicity': ['Black'], + 'maternalEthnicity': ['White', 'Asian', 'Not Hispanic'], 'birthYear': 1986, 'deathYear': 2019, + 'onsetAge': 'A', 'affectedRelatives': True, + }) + self.assertDictEqual({k: v for k, v in individuals[5].items() if k in no_parent_fields}, { + 'maternalGuid': individuals[3]['individualGuid'], 'paternalGuid': individuals[4]['individualGuid'], + 'maternalId': 'RGP_987_1', 'paternalId': 'RGP_987_2', 'maternalEthnicity': None, 'paternalEthnicity': None, + 'birthYear': 1973, 'deathYear': None, 'onsetAge': 'C', 'affectedRelatives': False, + }) + + self.assertEqual(len(response_json['familyNotesByGuid']), 2) + note_1 = next(n['note'] for n in response_json['familyNotesByGuid'].values() if n['familyGuid'] == RGP_123_guid) + self.assertEqual(note_1, """#### Clinical Information +* __Patient is my:__ Grandchild (male) +* __Current Age:__ Patient is deceased, age 33, due to heart attack, sample not available +* __Age of Onset:__ 21 +* __Race/Ethnicity:__ White, Asian, Pacific; Not Hispanic +* __Case Description:__ I have a really debilitating probably genetic condition. Ive seen many specialists. +* __Clinical Diagnoses:__ Yes; SMAs +* __Genetic Diagnoses:__ Yes; Dwarfism +* __Website/Blog:__ Yes +* __Additional Information:__ patients uncle (dads brother) died from Fahrs disease at 70 +#### Prior Testing +* __Referring Physician:__ Dr John Smith +* __Doctors Seen:__ Clinical geneticist, Neurologist, Cardiologist, Other: Pediatrician +* __Previous Testing:__ Yes; +* * Single gene testing +* * Gene panel testing +* * Whole exome sequencing. Year: 2018, Lab: UDNs lab, Relatives: Parent, Aunt or Uncle, Niece or Nephew, Other: Grandmother +* * Whole genome sequencing. Year: unspecified, Lab: unspecified, Relatives: None Specified +* * Other tests: Blood work +* __Biopsies Available:__ Muscle Biopsy, Skin Biopsy, Other Tissue Biopsy: Bones +* __Other Research Studies:__ Yes, Name of studies: Undiagnosed Diseases Network, Expecting results: No +#### Family Information +* __Mother:__ affected, onset age 19, available +* __Father:__ unaffected, unavailable, deceased, sample not available +* __Siblings:__ +* * Sister, age 21, affected, unavailable +* * Sibling (unspecified sex), age 17, unspecified affected status, available +* __Children:__ None +* __Relatives:__ +* * Male, age 44, affected, unspecified availability""") + + note_2 = next(n['note'] for n in response_json['familyNotesByGuid'].values() if n['familyGuid'] == RGP_987_guid) + self.assertEqual(note_2, """#### Clinical Information +* __Patient is my:__ Adult Child (unspecified sex) - unable to provide consent +* __Current Age:__ 47 +* __Age of Onset:__ 2 +* __Race/Ethnicity:__ White; Unknown +* __Case Description:__ +* __Clinical Diagnoses:__ Unsure +* __Genetic Diagnoses:__ No +* __Website/Blog:__ No +* __Additional Information:__ None specified +#### Prior Testing +* __Referring Physician:__ None +* __Doctors Seen:__ +* __Previous Testing:__ Not sure +* __Biopsies Available:__ None +* __Other Research Studies:__ No +#### Family Information +* __Mother:__ unknown affected status, unavailable, unknown deceased status +* __Father:__ unknown affected status, unavailable, unspecified deceased status +* __Siblings:__ None +* __Children:__ +* * Son, age 12, unaffected, unspecified availability +* __Relatives:__ None""") def _is_expected_individuals_metadata_upload(self, response, expected_families=False): self.assertEqual(response.status_code, 200) diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index dad0d82af9..d5ae198e92 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -870,7 +870,7 @@ def _get_gregor_airtable_data(individuals, user): fields = ALL_AIRTABLE_COLUMNS airtable_metadata = session.fetch_records( 'GREGoR Data Model', - fields=[SMID_FIELD] + fields, + fields=[SMID_FIELD] + sorted(fields), or_filters={f'{SMID_FIELD}': {r[SMID_FIELD] for r in sample_records.values()}}, ) airtable_metadata_by_smid = {r[SMID_FIELD]: r for r in airtable_metadata.values()} diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py index 6b3ae8d33e..11fb5818e0 100644 --- a/seqr/views/apis/report_api_tests.py +++ b/seqr/views/apis/report_api_tests.py @@ -825,12 +825,12 @@ def test_gregor_export(self, mock_subprocess, mock_temp_dir, mock_open, mock_dat sample_fields[0] = 'SeqrCollaboratorSampleID' self._assert_expected_airtable_call(1, secondary_sample_filter, sample_fields) metadata_fields = [ - 'SMID', 'seq_library_prep_kit_method', 'read_length', 'experiment_type', 'targeted_regions_method', - 'targeted_region_bed_file', 'date_data_generation', 'target_insert_size', 'sequencing_platform', - 'aligned_dna_short_read_file', 'aligned_dna_short_read_index_file', 'md5sum', 'reference_assembly', - 'alignment_software', 'mean_coverage', 'analysis_details', - 'called_variants_dna_short_read_id', 'aligned_dna_short_read_set_id', - 'called_variants_dna_file', 'md5sum', 'caller_software', 'variant_types', 'analysis_details', + 'SMID', 'aligned_dna_short_read_file', 'aligned_dna_short_read_index_file', 'aligned_dna_short_read_set_id', + 'alignment_software', 'analysis_details', 'analysis_details', 'called_variants_dna_file', + 'called_variants_dna_short_read_id', 'caller_software', 'date_data_generation', 'experiment_type', + 'md5sum', 'md5sum', 'mean_coverage', 'read_length', 'reference_assembly', 'seq_library_prep_kit_method', + 'sequencing_platform', 'target_insert_size', 'targeted_region_bed_file', 'targeted_regions_method', + 'variant_types', ] self._assert_expected_airtable_call(2, "OR(SMID='SM-AGHT',SMID='SM-JDBTM')", metadata_fields) diff --git a/seqr/views/react_app.py b/seqr/views/react_app.py index a8dca095c6..2490bf7c6d 100644 --- a/seqr/views/react_app.py +++ b/seqr/views/react_app.py @@ -8,6 +8,7 @@ from django.http import HttpResponse from settings import SEQR_VERSION, CSRF_COOKIE_NAME, DEBUG, LOGIN_URL, GA_TOKEN_ID, ANVIL_LOADING_DELAY_EMAIL_START_DATE from seqr.models import WarningMessage +from seqr.utils.search.utils import backend_specific_call from seqr.views.utils.orm_to_json_utils import get_json_for_user, get_json_for_current_user from seqr.views.utils.permissions_utils import login_active_required from seqr.views.utils.terra_api_utils import google_auth_enabled @@ -51,6 +52,7 @@ def render_app_html(request, additional_json=None, include_user=True, status=200 'version': '{}-{}'.format(SEQR_VERSION, ui_version), 'hijakEnabled': DEBUG or False, 'googleLoginEnabled': google_auth_enabled(), + 'elasticsearchEnabled': backend_specific_call(True, False), 'warningMessages': [message.json() for message in WarningMessage.objects.all()], 'anvilLoadingDelayDate': ANVIL_LOADING_DELAY_EMAIL_START_DATE if should_show_loading_delay else None, }} diff --git a/seqr/views/react_app_tests.py b/seqr/views/react_app_tests.py index 9429903ba8..0e7afb6701 100644 --- a/seqr/views/react_app_tests.py +++ b/seqr/views/react_app_tests.py @@ -13,7 +13,7 @@ class AppPageTest(object): databases = '__all__' fixtures = ['users'] - def _check_page_html(self, response, user, user_key='user', user_fields=None, ga_token_id=None, anvil_loading_date=None): + def _check_page_html(self, response, user, user_key='user', user_fields=None, ga_token_id=None, anvil_loading_date=None, elasticsearch_enabled=False): user_fields = user_fields or USER_FIELDS self.assertEqual(response.status_code, 200) initial_json = self.get_initial_page_json(response) @@ -24,6 +24,7 @@ def _check_page_html(self, response, user, user_key='user', user_fields=None, g 'version': mock.ANY, 'hijakEnabled': False, 'googleLoginEnabled': self.GOOGLE_ENABLED, + 'elasticsearchEnabled': elasticsearch_enabled, 'warningMessages': [{'id': 1, 'header': 'Warning!', 'message': 'A sample warning'}], 'anvilLoadingDelayDate': anvil_loading_date, }) @@ -79,9 +80,10 @@ def test_no_login_react_page(self): response = self.client.get(url) self._check_page_html(response, 'test_user') + @mock.patch('seqr.utils.search.elasticsearch.es_utils.ELASTICSEARCH_SERVICE_HOSTNAME', 'testhost') @mock.patch('seqr.views.react_app.ANVIL_LOADING_DELAY_EMAIL_START_DATE', '2022-12-01') @mock.patch('seqr.views.react_app.datetime') - def test_react_page_anvil_loading_delay(self, mock_datetime): + def test_react_page_additional_configs(self, mock_datetime): mock_datetime.strptime.side_effect = datetime.strptime mock_datetime.now.return_value = datetime(2022, 11, 1, 0, 0, 0) @@ -89,11 +91,11 @@ def test_react_page_anvil_loading_delay(self, mock_datetime): self.check_require_login_no_policies(url, login_redirect_url='/login') response = self.client.get(url) - self._check_page_html(response, 'test_user_no_policies') + self._check_page_html(response, 'test_user_no_policies', elasticsearch_enabled=True) mock_datetime.now.return_value = datetime(2022, 12, 30, 0, 0, 0) response = self.client.get(url) - self._check_page_html(response, 'test_user_no_policies', anvil_loading_date='2022-12-01') + self._check_page_html(response, 'test_user_no_policies', anvil_loading_date='2022-12-01', elasticsearch_enabled=True) class LocalAppPageTest(AuthenticationTestCase, AppPageTest): diff --git a/seqr/views/status.py b/seqr/views/status.py index a1a72e7de1..4775d94444 100644 --- a/seqr/views/status.py +++ b/seqr/views/status.py @@ -1,10 +1,9 @@ from django.db import connections import logging import redis -from urllib3.connectionpool import connection_from_url -from settings import SEQR_VERSION, KIBANA_SERVER, REDIS_SERVICE_HOSTNAME, REDIS_SERVICE_PORT, DATABASES -from seqr.utils.search.utils import ping_search_backend +from settings import SEQR_VERSION, REDIS_SERVICE_HOSTNAME, REDIS_SERVICE_PORT, DATABASES +from seqr.utils.search.utils import ping_search_backend, ping_search_backend_admin from seqr.views.utils.json_utils import create_json_response logger = logging.getLogger(__name__) @@ -37,14 +36,12 @@ def status_view(request): dependent_services_ok = False logger.error('Search backend connection error: {}'.format(str(e))) - # Test kibana connection + # Test search admin view connection try: - resp = connection_from_url('http://{}'.format(KIBANA_SERVER)).urlopen('HEAD', '/status', timeout=3, retries=3) - if resp.status >= 400: - raise ValueError('Error {}: {}'.format(resp.status, resp.reason)) + ping_search_backend_admin() except Exception as e: secondary_services_ok = False - logger.error('Kibana connection error: {}'.format(str(e))) + logger.error('Search Admin connection error: {}'.format(str(e))) return create_json_response( diff --git a/seqr/views/status_tests.py b/seqr/views/status_tests.py index eb01b07be1..bb08dc2576 100644 --- a/seqr/views/status_tests.py +++ b/seqr/views/status_tests.py @@ -2,61 +2,98 @@ from django.urls.base import reverse import mock from requests import HTTPError +import responses from seqr.views.status import status_view from seqr.utils.search.elasticsearch.es_utils_tests import urllib3_responses -class StatusTest(TestCase): +class StatusTest(object): - def _test_status_error(self, url, mock_logger, es_error): + def _test_status_error(self, url, mock_logger): response = self.client.get(url) self.assertEqual(response.status_code, 400) self.assertDictEqual( response.json(), {'version': 'v1.0', 'dependent_services_ok': False, 'secondary_services_ok': False}) - mock_logger.error.assert_has_calls([ + calls = [ mock.call('Database "default" connection error: No connection'), mock.call('Database "reference_data" connection error: No connection'), mock.call('Redis connection error: Bad connection'), - mock.call(f'Search backend connection error: {es_error}'), - mock.call('Kibana connection error: Connection refused: HEAD /status'), - ]) + mock.call(f'Search backend connection error: {self.SEARCH_BACKEND_ERROR}'), + ] + if self.HAS_KIBANA: + calls.append(mock.call('Search Admin connection error: Connection refused: HEAD /status')) + mock_logger.error.assert_has_calls(calls) mock_logger.reset_mock() - @mock.patch('seqr.utils.search.elasticsearch.es_utils.ELASTICSEARCH_SERVICE_HOSTNAME', 'testhost') @mock.patch('seqr.views.status.redis.StrictRedis') @mock.patch('seqr.views.status.connections') @mock.patch('seqr.views.status.logger') @urllib3_responses.activate + @responses.activate def test_status(self, mock_logger, mock_db_connections, mock_redis): url = reverse(status_view) mock_db_connections.__getitem__.return_value.cursor.side_effect = Exception('No connection') mock_redis.return_value.ping.side_effect = HTTPError('Bad connection') + responses.add(responses.GET, 'http://test-hail:5000/status', status=400) - self._test_status_error(url, mock_logger, es_error='No response from elasticsearch ping') - - with mock.patch('seqr.utils.search.elasticsearch.es_utils.ELASTICSEARCH_SERVICE_HOSTNAME', ''): - self._test_status_error(url, mock_logger, es_error='Elasticsearch backend is disabled') + self._test_status_error(url, mock_logger) mock_db_connections.__getitem__.return_value.cursor.side_effect = None mock_redis.return_value.ping.side_effect = None + responses.reset() + urllib3_responses.reset() + responses.add(responses.GET, 'http://test-hail:5000/status', status=200) urllib3_responses.add(urllib3_responses.HEAD, '/', status=200) urllib3_responses.add(urllib3_responses.HEAD, '/status', status=500) response = self.client.get(url) self.assertEqual(response.status_code, 200) - self.assertDictEqual( - response.json(), {'version': 'v1.0', 'dependent_services_ok': True, 'secondary_services_ok': False}) - mock_logger.error.assert_has_calls([ - mock.call('Kibana connection error: Error 500: Internal Server Error'), - ]) + if self.HAS_KIBANA: + self.assertDictEqual( + response.json(), {'version': 'v1.0', 'dependent_services_ok': True, 'secondary_services_ok': False}) + mock_logger.error.assert_has_calls([ + mock.call('Search Admin connection error: Kibana Error 500: Internal Server Error'), + ]) - mock_logger.reset_mock() - urllib3_responses.replace_json('/status', {'success': True}, method=urllib3_responses.HEAD, status=200) + mock_logger.reset_mock() + urllib3_responses.replace_json('/status', {'success': True}, method=urllib3_responses.HEAD, status=200) + + response = self.client.get(url) - response = self.client.get(url) self.assertEqual(response.status_code, 200) self.assertDictEqual( response.json(), {'version': 'v1.0', 'dependent_services_ok': True, 'secondary_services_ok': True}) mock_logger.error.assert_not_called() + self._assert_expected_requests() + + +class ElasticsearchStatusTest(TestCase, StatusTest): + + SEARCH_BACKEND_ERROR = 'No response from elasticsearch ping' + HAS_KIBANA = True + + @mock.patch('seqr.utils.search.elasticsearch.es_utils.ELASTICSEARCH_SERVICE_HOSTNAME', 'testhost') + def test_status(self, *args): + super(ElasticsearchStatusTest, self).test_status(*args) + + def _assert_expected_requests(self): + self.assertEqual(len(responses.calls), 0) + self.assertListEqual([call.request.url for call in urllib3_responses.calls], ['/', '/status', '/', '/status']) + + +class HailSearchStatusTest(TestCase, StatusTest): + + SEARCH_BACKEND_ERROR = '400 Client Error: Bad Request for url: http://test-hail:5000/status' + HAS_KIBANA = False + + @mock.patch('seqr.utils.search.elasticsearch.es_utils.ELASTICSEARCH_SERVICE_HOSTNAME', '') + @mock.patch('seqr.utils.search.hail_search_utils.HAIL_BACKEND_SERVICE_HOSTNAME', 'http://test-hail') + def test_status(self, *args): + super(HailSearchStatusTest, self).test_status(*args) + + def _assert_expected_requests(self): + self.assertEqual(len(urllib3_responses.calls), 0) + self.assertEqual(len(responses.calls), 1) + self.assertEqual(responses.calls[0].request.url, 'http://test-hail:5000/status') diff --git a/seqr/views/utils/dataset_utils.py b/seqr/views/utils/dataset_utils.py index a9f7e9aa10..8c3da67fb1 100644 --- a/seqr/views/utils/dataset_utils.py +++ b/seqr/views/utils/dataset_utils.py @@ -333,11 +333,18 @@ def _get_splice_id(row): def load_rna_seq_splice_outlier(file_path, user=None, mapping_file=None, ignore_extra_samples=False): - return _load_rna_seq( + samples_to_load, info, warnings = _load_rna_seq( RnaSeqSpliceOutlier, file_path, user, mapping_file, ignore_extra_samples, _parse_splice_outlier_row, SPLICE_OUTLIER_HEADER_COLS.values(), get_unique_key=_get_splice_id ) + for sample_data_rows in samples_to_load.values(): + sorted_data_rows = sorted([data_row for data_row in sample_data_rows.values()], key=lambda d: d[P_VALUE_COL]) + for i, data_row in enumerate(sorted_data_rows): + data_row['rank'] = i + + return samples_to_load, info, warnings + def _load_rna_seq_file(file_path, user, mapping_file, parse_row, expected_columns, get_unique_key): diff --git a/seqr/views/utils/individual_utils.py b/seqr/views/utils/individual_utils.py index 967922faea..8e58f174c8 100644 --- a/seqr/views/utils/individual_utils.py +++ b/seqr/views/utils/individual_utils.py @@ -6,7 +6,8 @@ from matchmaker.models import MatchmakerSubmission, MatchmakerResult from seqr.models import Sample, IgvSample, Individual, Family, FamilyNote from seqr.utils.middleware import ErrorsWarningsException -from seqr.views.utils.json_to_orm_utils import update_individual_from_json, update_individual_parents, create_model_from_json +from seqr.views.utils.json_to_orm_utils import update_individual_from_json, update_individual_parents, create_model_from_json, \ + update_family_from_json from seqr.views.utils.orm_to_json_utils import _get_json_for_individuals, _get_json_for_families, get_json_for_family_notes from seqr.views.utils.pedigree_info_utils import JsonConstants @@ -143,6 +144,14 @@ def _update_from_record(record, user, families_by_id, individual_lookup, updated note = create_model_from_json(FamilyNote, {'note': family_notes, 'note_type': 'C', 'family': family}, user) updated_note_ids.append(note.id) + family_record = { + k: record.pop(k) for k in [JsonConstants.CODED_PHENOTYPE_COLUMN, JsonConstants.MONDO_ID_COLUMN] if k in record + } + if family_record: + is_updated = update_family_from_json(family, family_record, user=user) + if is_updated: + updated_family_ids.add(family.id) + is_updated = update_individual_from_json(individual, record, user=user, allow_unknown_keys=True) if is_updated: updated_individuals.add(individual) diff --git a/seqr/views/utils/json_to_orm_utils.py b/seqr/views/utils/json_to_orm_utils.py index 6a7f42ece5..16a044ac34 100644 --- a/seqr/views/utils/json_to_orm_utils.py +++ b/seqr/views/utils/json_to_orm_utils.py @@ -21,7 +21,7 @@ def update_family_from_json(family, json, user, allow_unknown_keys=False, immuta immutable_keys = (immutable_keys or []) + ['pedigree_image', 'assigned_analyst', 'case_review_summary', 'case_review_notes', 'guid'] - update_model_from_json( + return update_model_from_json( family, json, user=user, allow_unknown_keys=allow_unknown_keys, immutable_keys=immutable_keys, ) diff --git a/seqr/views/utils/pedigree_info_utils.py b/seqr/views/utils/pedigree_info_utils.py index 6c1c6e60f6..2396f4c946 100644 --- a/seqr/views/utils/pedigree_info_utils.py +++ b/seqr/views/utils/pedigree_info_utils.py @@ -21,14 +21,14 @@ RELATIONSHIP_REVERSE_LOOKUP = {v.lower(): k for k, v in Individual.RELATIONSHIP_LOOKUP.items()} -def parse_pedigree_table(parsed_file, filename, user, project=None, fail_on_warnings=False, required_columns=None): +def parse_pedigree_table(parsed_file, filename, user, project): """Validates and parses pedigree information from a .fam, .tsv, or Excel file. Args: parsed_file (array): The parsed output from the raw file. filename (string): The original filename - used to determine the file format based on the suffix. - user (User): (optional) Django User object - project (Project): (optional) Django Project object + user (User): Django User object + project (Project): Django Project object Return: A 3-tuple that contains: @@ -39,20 +39,55 @@ def parse_pedigree_table(parsed_file, filename, user, project=None, fail_on_warn warnings (list): list of warning message strings ) """ + header_string = str(parsed_file[0]) + is_merged_pedigree_sample_manifest = "do not modify" in header_string.lower() and "Broad" in header_string + if is_merged_pedigree_sample_manifest: + if not user_is_pm(user): + raise ValueError('Unsupported file format') + if not project: + raise ValueError('Project argument required for parsing sample manifest') + header, rows = _parse_merged_pedigree_sample_manifest_rows(parsed_file[1:]) + else: + header = None + rows = None - # parse rows from file - try: - rows = [row for row in parsed_file[1:] if row and not (row[0] or '').startswith('#')] + rows, header = _parse_pedigree_table_rows(parsed_file, filename, header=header, rows=rows) - header_string = str(parsed_file[0]) - is_merged_pedigree_sample_manifest = "do not modify" in header_string.lower() and "Broad" in header_string + # convert to json and validate + errors = None + column_map = None + try: if is_merged_pedigree_sample_manifest: - if not user_is_pm(user): - raise ValueError('Unsupported file format') - if not project: - raise ValueError('Project argument required for parsing sample manifest') - header, rows = _parse_merged_pedigree_sample_manifest_rows(rows) - else: + logger.info("Parsing merged pedigree-sample-manifest file", user) + sample_manifest_rows, kit_id, errors = _parse_merged_pedigree_sample_manifest_format(rows, project) + column_map = MergedPedigreeSampleManifestConstants.MERGED_PEDIGREE_COLUMN_MAP + elif 'participant_guid' in header: + logger.info("Parsing RGP DSM export file", user) + rows = _parse_rgp_dsm_export_format(rows) + header = None + except Exception as e: + raise ErrorsWarningsException(['Error while converting {} rows to json: {}'.format(filename, e)], []) + + json_records, warnings = _parse_pedigree_table_json(rows, header=header, column_map=column_map, errors=errors) + + if is_merged_pedigree_sample_manifest: + _set_proband_relationship(json_records) + _send_sample_manifest(sample_manifest_rows, kit_id, filename, parsed_file, user, project) + + return json_records, warnings + + +def parse_basic_pedigree_table(parsed_file, filename, required_columns=None): + rows, header = _parse_pedigree_table_rows(parsed_file, filename) + return _parse_pedigree_table_json(rows, header=header, fail_on_warnings=True, required_columns=required_columns) + + +def _parse_pedigree_table_rows(parsed_file, filename, header=None, rows=None): + # parse rows from file + try: + rows = rows or [row for row in parsed_file[1:] if row and not (row[0] or '').startswith('#')] + if not header: + header_string = str(parsed_file[0]) if _is_header_row(header_string): header_row = parsed_file[0] else: @@ -68,38 +103,20 @@ def parse_pedigree_table(parsed_file, filename, user, project=None, fail_on_warn i + 1, len(row), ', '.join(row), len(header), ', '.join(header) )) - rows = [dict(zip(header, row)) for row in rows] + return [dict(zip(header, row)) for row in rows], header except Exception as e: raise ErrorsWarningsException(['Error while parsing file: {}. {}'.format(filename, e)], []) - # convert to json and validate - errors = None - try: - if is_merged_pedigree_sample_manifest: - logger.info("Parsing merged pedigree-sample-manifest file", user) - sample_manifest_rows, kit_id, errors = _parse_merged_pedigree_sample_manifest_format(rows, project) - column_map = MergedPedigreeSampleManifestConstants.MERGED_PEDIGREE_COLUMN_MAP - elif 'participant_guid' in header: - logger.info("Parsing RGP DSM export file", user) - rows = _parse_rgp_dsm_export_format(rows) - column_map = None - else: - logger.info("Parsing regular pedigree file", user) - column_map = _parse_header_columns(header) - except Exception as e: - raise ErrorsWarningsException(['Error while converting {} rows to json: {}'.format(filename, e)], []) +def _parse_pedigree_table_json(rows, header=None, column_map=None, errors=None, fail_on_warnings=False, required_columns=None): + # convert to json and validate + column_map = column_map or (_parse_header_columns(header) if header else None) if column_map: json_records = _convert_fam_file_rows_to_json(column_map, rows, required_columns=required_columns) else: json_records = rows warnings = validate_fam_file_records(json_records, fail_on_warnings=fail_on_warnings, errors=errors) - - if is_merged_pedigree_sample_manifest: - _set_proband_relationship(json_records) - _send_sample_manifest(sample_manifest_rows, kit_id, filename, parsed_file, user, project) - return json_records, warnings @@ -235,8 +252,10 @@ def validate_fam_file_records(records, fail_on_warnings=False, errors=None): errors = errors or [] warnings = [] + individual_id_counts = defaultdict(int) for r in records: individual_id = r[JsonConstants.INDIVIDUAL_ID_COLUMN] + individual_id_counts[individual_id] += 1 family_id = r.get(JsonConstants.FAMILY_ID_COLUMN) or r['family']['familyId'] # check proband relationship has valid gender @@ -286,6 +305,11 @@ def validate_fam_file_records(records, fail_on_warnings=False, errors=None): if parent_family_id != family_id: errors.append("%(parent_id)s is recorded as the %(parent_id_type)s of %(individual_id)s but they have different family ids: %(parent_family_id)s and %(family_id)s" % locals()) + errors += [ + f'{individual_id} is included as {count} separate records, but must be unique within the project' + for individual_id, count in individual_id_counts.items() if count > 1 + ] + if fail_on_warnings: errors += warnings warnings = [] @@ -404,11 +428,11 @@ def _set_proband_relationship(json_records): maternal_siblings = { r[JsonConstants.INDIVIDUAL_ID_COLUMN] for r in records - if affected[JsonConstants.MATERNAL_ID_COLUMN] == r[JsonConstants.MATERNAL_ID_COLUMN] + if affected[JsonConstants.MATERNAL_ID_COLUMN] and affected[JsonConstants.MATERNAL_ID_COLUMN] == r[JsonConstants.MATERNAL_ID_COLUMN] } paternal_siblings = { r[JsonConstants.INDIVIDUAL_ID_COLUMN] for r in records - if affected[JsonConstants.PATERNAL_ID_COLUMN] == r[JsonConstants.PATERNAL_ID_COLUMN] + if affected[JsonConstants.PATERNAL_ID_COLUMN] and affected[JsonConstants.PATERNAL_ID_COLUMN] == r[JsonConstants.PATERNAL_ID_COLUMN] } relationships.update({r_id: Individual.MATERNAL_SIBLING_RELATIONSHIP for r_id in maternal_siblings}) relationships.update({r_id: Individual.PATERNAL_SIBLING_RELATIONSHIP for r_id in paternal_siblings}) diff --git a/seqr/views/utils/pedigree_info_utils_tests.py b/seqr/views/utils/pedigree_info_utils_tests.py deleted file mode 100644 index da7894c468..0000000000 --- a/seqr/views/utils/pedigree_info_utils_tests.py +++ /dev/null @@ -1,338 +0,0 @@ -import datetime -import mock -from openpyxl import load_workbook -from io import BytesIO - -from seqr.models import Project -from seqr.views.utils.pedigree_info_utils import parse_pedigree_table, ErrorsWarningsException -from seqr.views.utils.test_utils import AuthenticationTestCase, AnvilAuthenticationTestCase - -FILENAME = 'test.csv' - - -class PedigreeInfoUtilsTest(object): - - def test_parse_pedigree_table(self): - with self.assertRaises(ErrorsWarningsException) as ec: - parse_pedigree_table( - [['family_id', 'individual_id', 'sex', 'affected'], - ['fam1', 'ind1', 'male']], FILENAME, self.collaborator_user) - self.assertListEqual( - ec.exception.errors, ['Error while parsing file: {}. Row 1 contains 3 columns: fam1, ind1, male, while header contains 4: family_id, individual_id, sex, affected'.format(FILENAME)]) - self.assertListEqual(ec.exception.warnings, []) - - with self.assertRaises(ErrorsWarningsException) as ec: - parse_pedigree_table( - [['family_id', 'individual_id', 'sex', 'affected', 'father', 'mother'], - ['', '', 'male', 'u', '.', 'ind2']], FILENAME, self.collaborator_user) - self.assertEqual(len(ec.exception.errors), 2) - self.assertListEqual(ec.exception.errors, ['Missing Family Id in row #1', 'Missing Individual Id in row #1']) - self.assertIsNone(ec.exception.warnings) - - with self.assertRaises(ErrorsWarningsException) as ec: - parse_pedigree_table( - [['family_id', 'individual_id', 'sex', 'affected', 'father', 'mother'], - ['fam1', '', 'male', 'u', '.', 'ind2'], - ['fam1', 'ind1', 'boy', 'u', '.', 'ind2']], FILENAME, self.collaborator_user) - self.assertListEqual( - ec.exception.errors, ['Missing Individual Id in row #1', 'Invalid value "boy" for Sex in row #2']) - self.assertIsNone(ec.exception.warnings) - - with self.assertRaises(ErrorsWarningsException) as ec: - parse_pedigree_table( - [['family_id', 'individual_id', 'sex', 'affected', 'father', 'mother'], - ['fam1', 'ind1', 'male', 'no', '.', 'ind2']], FILENAME, self.collaborator_user) - self.assertListEqual( - ec.exception.errors, ['Invalid value "no" for Affected in row #1']) - - with self.assertRaises(ErrorsWarningsException) as ec: - parse_pedigree_table( - [['family_id', 'individual_id', 'sex', 'affected', 'father', 'mother', 'proband_relation'], - ['fam1', 'ind1', 'male', 'aff.', 'ind3', 'ind2', 'mom']], FILENAME, self.collaborator_user) - self.assertListEqual(ec.exception.errors, ['Invalid value "mom" for Proband Relationship in row #1']) - - with self.assertRaises(ErrorsWarningsException) as ec: - parse_pedigree_table( - [['family_id', 'individual_id', 'sex', 'affected', 'father', 'mother', 'proband_relation'], - ['fam1', 'ind1', 'male', 'aff.', 'ind3', 'ind2', 'mother'], - ['fam2', 'ind2', 'male', 'unknown', 'ind2', '.', '']], - FILENAME, self.collaborator_user) - self.assertListEqual(ec.exception.errors, [ - 'Invalid proband relationship "Mother" for ind1 with given gender Male', - 'ind2 is recorded as Male and also as the mother of ind1', - 'ind2 is recorded as the mother of ind1 but they have different family ids: fam2 and fam1', - 'ind2 is recorded as their own father', - ]) - self.assertListEqual(ec.exception.warnings, [ - "ind3 is the father of ind1 but is not included. Make sure to create an additional record with ind3 as the Individual ID" - ]) - - no_error_data = [['A pedigree file'], ['# Some comments'], - ['#family_id', '#individual_id', 'previous_individual_id', 'notes_for_import', 'other_data', 'sex', 'affected', 'father', 'mother', 'phenotype: coded', 'proband_relation'], - ['fam1', 'ind1', 'ind1_old_id', 'some notes', 'some more notes', 'male', 'aff.', '.', 'ind2', 'HPO:12345', ''], - ['fam1', 'ind2', '', ' ', '', 'female', 'u', '.', 'ind3', 'HPO:56789', 'mother']] - no_error_warnings = ["ind3 is the mother of ind2 but is not included. Make sure to create an additional record with ind3 as the Individual ID"] - records, warnings = parse_pedigree_table(no_error_data, FILENAME, self.collaborator_user) - self.assertListEqual(records, [ - {'familyId': 'fam1', 'individualId': 'ind1', 'sex': 'M', 'affected': 'A', 'paternalId': '', - 'maternalId': 'ind2', 'notes': 'some notes', 'codedPhenotype': 'HPO:12345', 'probandRelationship': '', - 'previousIndividualId': 'ind1_old_id'}, - {'familyId': 'fam1', 'individualId': 'ind2', 'sex': 'F', 'affected': 'N', 'paternalId': '', - 'maternalId': 'ind3', 'notes': None, 'codedPhenotype': 'HPO:56789', 'probandRelationship': 'M', - 'previousIndividualId': None}, - ]) - self.assertListEqual(warnings, no_error_warnings) - - with self.assertRaises(ErrorsWarningsException) as ec: - parse_pedigree_table(no_error_data, FILENAME, self.collaborator_user, fail_on_warnings=True) - self.assertListEqual(ec.exception.errors, no_error_warnings) - - def _assert_errors_warnings_exception(self, ec, error, warning=None): - self.assertListEqual(ec.exception.errors, [error]) - self.assertListEqual(ec.exception.warnings, [warning] if warning else []) - - @mock.patch('seqr.views.utils.pedigree_info_utils.NO_VALIDATE_MANIFEST_PROJECT_CATEGORIES') - @mock.patch('seqr.views.utils.permissions_utils.PM_USER_GROUP') - @mock.patch('seqr.utils.communication_utils.EmailMultiAlternatives') - def test_parse_sample_manifest(self, mock_email, mock_pm_group, mock_no_validate_categories): - mock_pm_group.__eq__.side_effect = lambda s: str(mock_pm_group) == s - mock_no_validate_categories.resolve_expression.return_value = ['GREGoR'] - - header_1 = [ - 'Do not modify - Broad use', '', '', 'Please fill in columns D - T', '', '', '', '', '', '', '', '', '', - '', '', '', '', '', '', '', ''] - header_2 = [ - 'Kit ID', 'Well', 'Sample ID', 'Family ID', 'Alias', 'Alias', 'Paternal Sample ID', 'Maternal Sample ID', - 'Gender', 'Affected Status', 'Primary Biosample', 'Analyte Type', 'Tissue Affected Status', 'Recontactable', - 'Volume', 'Concentration', 'Notes', 'MONDO Label', 'MONDO ID', 'Consent Code', 'Data Use Restrictions'] - header_3 = [ - '', 'Position', '', '', 'Collaborator Participant ID', 'Collaborator Sample ID', '', '', '', '', '', '', - '(i.e yes, no)', '(i.e yes, no, unknown)', 'ul', 'ng/ul', '', '', '(i.e. "MONDO:0031632")', '', 'indicate study/protocol number'] - - with self.assertRaises(ErrorsWarningsException) as ec: - parse_pedigree_table([header_1], FILENAME, user=self.analyst_user) - self._assert_errors_warnings_exception(ec, f'Error while parsing file: {FILENAME}. Unsupported file format') - - incomplete_header_data = [ - header_1, - ['Kit ID', 'Well', 'Sample ID', 'Family ID', 'Alias', 'Maternal Sample ID', - 'Gender', 'Affected Status', 'Volume', 'Concentration', 'Notes', 'Coded Phenotype', 'Consent Code', - 'Data Use Restrictions'], - header_3, - ] - with self.assertRaises(ErrorsWarningsException) as ec: - parse_pedigree_table(incomplete_header_data, FILENAME, self.pm_user) - self._assert_errors_warnings_exception(ec, f'Error while parsing file: {FILENAME}. Unsupported file format') - - mock_pm_group.__bool__.return_value = True - mock_pm_group.__str__.return_value = 'project-managers' - mock_pm_group.resolve_expression.return_value = 'project-managers' - with self.assertRaises(ErrorsWarningsException) as ec: - parse_pedigree_table(incomplete_header_data, FILENAME, self.pm_user) - self._assert_errors_warnings_exception( - ec, f'Error while parsing file: {FILENAME}. Project argument required for parsing sample manifest') - - project = Project.objects.get(id=1) - with self.assertRaises(ErrorsWarningsException) as ec: - parse_pedigree_table(incomplete_header_data, FILENAME, self.pm_user, project=project) - self._assert_errors_warnings_exception( - ec, f'Error while parsing file: {FILENAME}. Expected vs. actual header columns: | ' - f'Sample ID| Family ID| Alias|-Alias|-Paternal Sample ID| Maternal Sample ID| Gender| Affected Status|' - f'-Primary Biosample|-Analyte Type|-Tissue Affected Status|-Recontactable| Volume| Concentration| Notes|' - f'-MONDO Label|-MONDO ID|+Coded Phenotype| Consent Code| Data Use Restrictions') - - with self.assertRaises(ErrorsWarningsException) as ec: - parse_pedigree_table([ - header_1, header_2, ['', 'Position', '', '', 'Collaborator Sample ID', '', '', '', '', 'ul', 'ng/ul', '', - '', 'indicate study/protocol number']], FILENAME, self.pm_user, project=project) - self._assert_errors_warnings_exception( - ec, f'Error while parsing file: {FILENAME}. Expected vs. actual header columns: |-Collaborator Participant ID| Collaborator Sample ID|+') - - original_data = [ - header_1, header_2, header_3, - ['SK-3QVD', 'A02', 'SM-IRW6C', 'PED073', 'SCO_PED073B_GA0339', 'SCO_PED073B_GA0339_1', '', '', 'male', - 'unaffected', 'UBERON:0000479 (tissue)', 'blood plasma', '', 'Unknown', '20', '94.8', 'probably dad', '', - '', 'GMB', '1234'], - ['SK-3QVD', 'A03', 'SM-IRW69', 'PED073', 'SCO_PED073C_GA0340', 'SCO_PED073C_GA0340_1', - 'SCO_PED073B_GA0339_1', 'SCO_PED073A_GA0338_1', 'female', 'affected', 'UBERON:0002371 (bone marrow)', - 'DNA', 'Yes', 'No', '20', '98', '', 'Perinatal death', 'MONDO:0100086', 'HMB', '', - ], - ['SK-3QVD', 'A04', 'SM-IRW61', 'PED073', 'SCO_PED073C_GA0341', 'SCO_PED073C_GA0341_1', - 'SCO_PED073B_GA0339_1', '', 'male', 'affected', 'UBERON:0002371 (bone marrow)', - 'RNA', 'No', 'No', '17', '83', 'half sib', 'Perinatal death', 'MONDO:0100086', '', '', - ]] - with self.assertRaises(ErrorsWarningsException) as ec: - parse_pedigree_table(original_data, FILENAME, self.pm_user, project=project) - expected_warning = 'SCO_PED073A_GA0338_1 is the mother of SCO_PED073C_GA0340_1 but is not included. ' \ - 'Make sure to create an additional record with SCO_PED073A_GA0338_1 as the Individual ID' - self._assert_errors_warnings_exception( - ec, 'Multiple consent codes specified in manifest: GMB, HMB', warning=expected_warning) - - original_data[4][-2] = 'GMB' - mock_no_validate_categories.resolve_expression.return_value = ['Not-used category'] - with self.assertRaises(ErrorsWarningsException) as ec: - parse_pedigree_table(original_data, FILENAME, self.pm_user, project=project) - self.assertListEqual(ec.exception.errors, [ - 'SCO_PED073B_GA0339_1 is missing the following required columns: MONDO ID, MONDO Label, Tissue Affected Status', - 'Consent code in manifest "GMB" does not match project consent code "HMB"', - ]) - self.assertListEqual(ec.exception.warnings, [expected_warning]) - - original_data[3][12] = 'No' - original_data[3][17] = 'microcephaly' - original_data[3][18] = 'MONDO:0001149' - original_data[3][-2] = '' - original_data[4][-2] = 'HMB' - records, warnings = parse_pedigree_table(original_data, FILENAME, self.pm_user, project=project) - self.assertListEqual(records, [ - {'affected': 'N', 'maternalId': '', 'notes': 'probably dad', 'individualId': 'SCO_PED073B_GA0339_1', - 'sex': 'M', 'familyId': 'PED073', 'paternalId': '', 'codedPhenotype': 'microcephaly', - 'mondoId': 'MONDO:0001149', 'primaryBiosample': 'T', 'analyteType': 'B', 'tissueAffectedStatus': False, - 'probandRelationship': 'F',}, - {'affected': 'A', 'maternalId': 'SCO_PED073A_GA0338_1', 'notes': None, 'individualId': 'SCO_PED073C_GA0340_1', - 'sex': 'F', 'familyId': 'PED073', 'paternalId': 'SCO_PED073B_GA0339_1', 'codedPhenotype': 'Perinatal death', - 'mondoId': 'MONDO:0100086', 'primaryBiosample': 'BM', 'analyteType': 'D', 'tissueAffectedStatus': True, - 'probandRelationship': 'S', - }, {'affected': 'A', 'maternalId': '', 'notes': 'half sib', 'individualId': 'SCO_PED073C_GA0341_1', - 'sex': 'M', 'familyId': 'PED073', 'paternalId': 'SCO_PED073B_GA0339_1', 'codedPhenotype': 'Perinatal death', - 'mondoId': 'MONDO:0100086', 'primaryBiosample': 'BM', 'analyteType': 'R', 'tissueAffectedStatus': False, - 'probandRelationship': 'J', - }]) - self.assertListEqual(warnings, [expected_warning]) - - mock_email.assert_called_with( - subject='SK-3QVD Merged Sample Pedigree File', - body=mock.ANY, - to=['test_pm_user@test.com'], - attachments=[ - ('SK-3QVD.xlsx', mock.ANY, - "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"), - ('test.xlsx', mock.ANY, - "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"), - ]) - self.assertEqual( - mock_email.call_args.kwargs['body'], - '\n'.join([ - 'User test_pm_user@test.com just uploaded pedigree info to 1kg project n\xe5me with uni\xe7\xf8de.This email has 2 attached files:', - ' ', ' SK-3QVD.xlsx is the sample manifest file in a format that can be sent to GP.', ' ', - ' test.csv is the original merged pedigree-sample-manifest file that the user uploaded.', ' ', - ])) - mock_email.return_value.attach_alternative.assert_called_with( - """User test_pm_user@test.com just uploaded pedigree info to 1kg project n\xe5me with uni\xe7\xf8de.
This email has 2 attached files:
-
- SK-3QVD.xlsx is the sample manifest file in a format that can be sent to GP.
-
- test.csv is the original merged pedigree-sample-manifest file that the user uploaded.
- """, 'text/html') - mock_email.return_value.send.assert_called() - - # Test sent sample manifest is correct - sample_wb = load_workbook(BytesIO(mock_email.call_args.kwargs['attachments'][0][1])) - sample_ws = sample_wb.active - sample_ws.title = 'Sample Info' - self.assertListEqual( - [[cell.value or '' for cell in row] for row in sample_ws], - [['Well', 'Sample ID', 'Alias', 'Alias', 'Gender', 'Volume', 'Concentration'], - ['Position', '', 'Collaborator Participant ID', 'Collaborator Sample ID', '', 'ul', 'ng/ul'], - ['A02', 'SM-IRW6C', 'SCO_PED073B_GA0339', 'SCO_PED073B_GA0339_1', 'male', '20', '94.8'], - ['A03', 'SM-IRW69', 'SCO_PED073C_GA0340', 'SCO_PED073C_GA0340_1', 'female', '20', '98'], - ['A04', 'SM-IRW61', 'SCO_PED073C_GA0341', 'SCO_PED073C_GA0341_1', 'male', '17', '83']]) - - # Test original file copy is correct - original_wb = load_workbook(BytesIO(mock_email.call_args.kwargs['attachments'][1][1])) - original_ws = original_wb.active - self.assertListEqual([[cell.value or '' for cell in row] for row in original_ws], original_data) - - @mock.patch('seqr.views.utils.pedigree_info_utils.date') - def test_parse_datstat_pedigree_table(self, mock_date): - mock_date.today.return_value = datetime.date(2020, 1, 1) - - records, warnings = parse_pedigree_table( - [['participant_guid', 'familyId', 'RELATIONSHIP', 'RELATIONSHIP_OTHER_DETAILS', 'WEBSITE', 'DESCRIPTION', 'CLINICAL_DIAGNOSES', 'CLINICAL_DIAGNOSES_DETAILS', 'GENETIC_DIAGNOSES', 'GENETIC_DIAGNOSES_DETAILS', 'FIND_OUT_DOCTOR_DETAILS', 'PATIENT_AGE', 'CONDITION_AGE', 'PATIENT_DECEASED', 'DECEASED_AGE', 'DECEASED_CAUSE', 'DECEASED_DNA', 'PATIENT_SEX', 'RACE', 'ETHNICITY', 'DOCTOR_TYPES', 'DOCTOR_TYPES_OTHER_DETAILS', 'TESTS', 'TESTS_MICROARRAY_YEAR', 'TESTS_MICROARRAY_LAB', 'TESTS_MICROARRAY_FAMILY', 'TESTS_MICROARRAY_FAMILY_OTHER_DETAILS', 'TESTS_WEXOME_YEAR', 'TESTS_WEXOME_LAB', 'TESTS_WEXOME_FAMILY', 'TESTS_WEXOME_FAMILY_OTHER_DETAILS', 'TESTS_WGENOME_YEAR', 'TESTS_WGENOME_LAB', 'TESTS_WGENOME_FAMILY', 'TESTS_WGENOME_FAMILY_OTHER_DETAILS', 'TESTS_OTHER_DETAILS', 'BIOPSY', 'BIOPSY_OTHER_DETAILS', 'OTHER_STUDIES', 'OTHER_STUDIES_DESCRIBE', 'EXPECT_RESULTS', 'MOTHER_SAME_CONDITION', 'MOTHER_CONDITION_AGE', 'MOTHER_RACE', 'MOTHER_ETHNICITY', 'MOTHER_CAN_PARTICIPATE', 'MOTHER_DECEASED', 'MOTHER_DECEASED_DNA', 'FATHER_SAME_CONDITION', 'FATHER_CONDITION_AGE', 'FATHER_RACE', 'FATHER_ETHNICITY', 'FATHER_CAN_PARTICIPATE', 'FATHER_DECEASED', 'FATHER_DECEASED_DNA', 'NO_SIBLINGS', 'SIBLING', 'NO_CHILDREN', 'CHILD', 'NO_RELATIVE_AFFECTED', 'RELATIVE', 'FAMILY_INFO'], - ['1518231365', '123', 'OTHER', 'Grandchild', 'wwww.myblog.com', 'I have a really debilitating probably genetic condition. I\xe2ve seen many specialists.', 'YES', 'SMA\xe2s', 'YES', 'Dwarfism\xe2', 'Dr John Smith', '34', '21', 'YES', '33', 'heart attack', 'NO', 'MALE', 'WHITE,ASIAN,PACIFIC', 'NOT_HISPANIC', 'CLIN_GEN,NEURO,CARDIO,OTHER', 'Pediatrician', 'SINGLE_GENE,GENE_PANEL,WEXOME,WGENOME,OTHER', '', '', '', '', '2018', 'UDN\xe2s lab', 'PARENT,AUNT_UNCLE,NIECE_NEPHEW,OTHER', 'Grandmother', '', '', '', 'Grandmother', 'Blood work', 'MUSCLE,SKIN,OTHER', 'Bone\xe2s', 'YES', 'Undiagnosed Diseases Network', 'NO', 'YES', '19', 'WHITE,ASIAN', 'NOT_HISPANIC', 'YES', '', '', 'NO', '', '', 'BLACK', 'PREFER_NOT_ANSWER', 'YES', 'NO', '', '[{"SIBLING_SEX":"FEMALE","SIBLING_AGE":"21","SIBLING_RACE":"WHITE","SIBLING_ETHNICITY":"NOT_HISPANIC","SIBLING_SAME_CONDITION":"YES","SIBLING_CONDITION_AGE":null,"SIBLING_CAN_PARTICIPATE":"NO"},{"SIBLING_SEX":"","SIBLING_AGE":"17","SIBLING_RACE": "WHITE","SIBLING_ETHNICITY":"NOT_HISPANIC","SIBLING_SAME_CONDITION":"","SIBLING_CONDITION_AGE":"","SIBLING_CAN_PARTICIPATE":"YES"}]', 'YES', '', 'NO', '[{"RELATIVE_SEX":"MALE","RELATIVE_AGE":"44","RELATIVE_RACE": "WHITE", "RELATIVE_ETHNICITY":"NOT_HISPANIC","RELATIVE_CONDITION_AGE":null,"RELATIVE_CAN_PARTICIPATE":null}]', 'patient\xe2s uncle (dads brother) died from Fahrs disease at 70'], - ['b392fd78b440', '987', 'ADULT_CHILD', 'Grandchild', '', '', 'UNSURE', 'SMA', 'NO', 'Dwarfism', '', '47', '2', '', '33', 'heart attack', 'NO', 'PREFER_NOT_ANSWER', 'WHITE', 'UNKNOWN', '', 'Pediatrician', 'NOT_SURE,MICROARRAY,WEXOME', '', '', '', '', '2018', 'UDN', 'PARENT,AUNT_UNCLE,OTHER', 'Grandmother', '', '', '', 'Grandmother', 'Blood work', 'NONE', '', 'NO', 'Undiagnosed Diseases Network', 'NO', 'UNSURE', '19', '', 'UNKNOWN', 'NO', 'UNSURE', '', '', '', '', '', '', '', 'YES', 'YES', '[{"SIBLING_SEX":"FEMALE","SIBLING_AGE":"21","SIBLING_RACE":"WHITE","SIBLING_ETHNICITY":"NOT_HISPANIC","SIBLING_SAME_CONDITION":"YES","SIBLING_CONDITION_AGE":null,"SIBLING_CAN_PARTICIPATE":"NO"}]', 'NO', '[{"CHILD_SEX":"MALE","CHILD_AGE":"12","CHILD_RACE":"WHITE","CHILD_ETHNICITY":"NOT_HISPANIC","CHILD_SAME_CONDITION":"NO","CHILD_CONDITION_AGE":null,"CHILD_CAN_PARTICIPATE":"UNSURE"}]', 'YES', '', '']], - FILENAME, self.collaborator_user) - - self.assertListEqual(warnings, []) - - note_1 = """#### Clinical Information -* __Patient is my:__ Grandchild (male) -* __Current Age:__ Patient is deceased, age 33, due to heart attack, sample not available -* __Age of Onset:__ 21 -* __Race/Ethnicity:__ White, Asian, Pacific; Not Hispanic -* __Case Description:__ I have a really debilitating probably genetic condition. Ive seen many specialists. -* __Clinical Diagnoses:__ Yes; SMAs -* __Genetic Diagnoses:__ Yes; Dwarfism -* __Website/Blog:__ Yes -* __Additional Information:__ patients uncle (dads brother) died from Fahrs disease at 70 -#### Prior Testing -* __Referring Physician:__ Dr John Smith -* __Doctors Seen:__ Clinical geneticist, Neurologist, Cardiologist, Other: Pediatrician -* __Previous Testing:__ Yes; -* * Single gene testing -* * Gene panel testing -* * Whole exome sequencing. Year: 2018, Lab: UDNs lab, Relatives: Parent, Aunt or Uncle, Niece or Nephew, Other: Grandmother -* * Whole genome sequencing. Year: unspecified, Lab: unspecified, Relatives: None Specified -* * Other tests: Blood work -* __Biopsies Available:__ Muscle Biopsy, Skin Biopsy, Other Tissue Biopsy: Bones -* __Other Research Studies:__ Yes, Name of studies: Undiagnosed Diseases Network, Expecting results: No -#### Family Information -* __Mother:__ affected, onset age 19, available -* __Father:__ unaffected, unavailable, deceased, sample not available -* __Siblings:__ -* * Sister, age 21, affected, unavailable -* * Sibling (unspecified sex), age 17, unspecified affected status, available -* __Children:__ None -* __Relatives:__ -* * Male, age 44, affected, unspecified availability""" - - note_2 = """#### Clinical Information -* __Patient is my:__ Adult Child (unspecified sex) - unable to provide consent -* __Current Age:__ 47 -* __Age of Onset:__ 2 -* __Race/Ethnicity:__ White; Unknown -* __Case Description:__ -* __Clinical Diagnoses:__ Unsure -* __Genetic Diagnoses:__ No -* __Website/Blog:__ No -* __Additional Information:__ None specified -#### Prior Testing -* __Referring Physician:__ None -* __Doctors Seen:__ -* __Previous Testing:__ Not sure -* __Biopsies Available:__ None -* __Other Research Studies:__ No -#### Family Information -* __Mother:__ unknown affected status, unavailable, unknown deceased status -* __Father:__ unknown affected status, unavailable, unspecified deceased status -* __Siblings:__ None -* __Children:__ -* * Son, age 12, unaffected, unspecified availability -* __Relatives:__ None""" - - self.assertListEqual(records, [ - {'familyId': 'RGP_123', 'individualId': 'RGP_123_1', 'sex': 'F', 'affected': 'N'}, - {'familyId': 'RGP_123', 'individualId': 'RGP_123_2', 'sex': 'M', 'affected': 'N'}, - { - 'familyId': 'RGP_123', 'individualId': 'RGP_123_3', 'sex': 'M', 'affected': 'A', - 'maternalId': 'RGP_123_1', 'paternalId': 'RGP_123_2', 'familyNotes': note_1, - 'maternalEthnicity': ['White', 'Asian', 'Not Hispanic'], 'paternalEthnicity': ['Black'], - 'birthYear': 1986, 'deathYear': 2019, 'onsetAge': 'A', 'affectedRelatives': True, - }, - {'familyId': 'RGP_987', 'individualId': 'RGP_987_1', 'sex': 'F', 'affected': 'N'}, - {'familyId': 'RGP_987', 'individualId': 'RGP_987_2', 'sex': 'M', 'affected': 'N'}, - { - 'familyId': 'RGP_987', 'individualId': 'RGP_987_3', 'sex': 'U', 'affected': 'A', - 'maternalId': 'RGP_987_1', 'paternalId': 'RGP_987_2', 'familyNotes': note_2, - 'maternalEthnicity': None, 'paternalEthnicity': None, 'birthYear': 1973, 'deathYear': None, - 'onsetAge': 'C', 'affectedRelatives': False, - }, - ]) - - -class LocalPedigreeInfoUtilsTest(AuthenticationTestCase, PedigreeInfoUtilsTest): - fixtures = ['users', '1kg_project'] - - -class AnvilPedigreeInfoUtilsTest(AnvilAuthenticationTestCase, PedigreeInfoUtilsTest): - fixtures = ['users', 'social_auth', '1kg_project'] diff --git a/ui/pages/AnvilWorkspace/LoadWorkspaceData.jsx b/ui/pages/AnvilWorkspace/LoadWorkspaceData.jsx index c0142e9544..78e4a139fc 100644 --- a/ui/pages/AnvilWorkspace/LoadWorkspaceData.jsx +++ b/ui/pages/AnvilWorkspace/LoadWorkspaceData.jsx @@ -2,7 +2,7 @@ import React from 'react' import PropTypes from 'prop-types' import { Message, Segment } from 'semantic-ui-react' -import LoadWorkspaceDataForm from 'shared/components/panel/LoadWorkspaceDataForm' +import LoadWorkspaceDataForm, { WORKSPACE_REQUIREMENTS } from 'shared/components/panel/LoadWorkspaceDataForm' export const WorkspaceAccessError = ({ match }) => ( @@ -13,15 +13,7 @@ export const WorkspaceAccessError = ({ match }) => ( To submit the initial request to load data to seqr, users require: - "Writer" or "Owner" level access to the workspace - The "Can Share" permission enabled for the workspace - - No   - - authorization domains - -   to be associated with the workspace - + {WORKSPACE_REQUIREMENTS.map(item => {item})} diff --git a/ui/pages/DataManagement/DataManagement.jsx b/ui/pages/DataManagement/DataManagement.jsx index 1ee3378f32..65e6cda934 100644 --- a/ui/pages/DataManagement/DataManagement.jsx +++ b/ui/pages/DataManagement/DataManagement.jsx @@ -3,8 +3,9 @@ import PropTypes from 'prop-types' import { connect } from 'react-redux' import { Route, Switch } from 'react-router-dom' -import { getUser } from 'redux/selectors' +import { getUser, getElasticsearchEnabled } from 'redux/selectors' import { Error404, Error401 } from 'shared/components/page/Errors' +import { SimplePageHeader } from 'shared/components/page/PageHeaderLayout' import ElasticsearchStatus from './components/ElasticsearchStatus' import RnaSeq from './components/RnaSeq' @@ -15,12 +16,7 @@ import WritePedigree from './components/WritePedigree' const IFRAME_STYLE = { position: 'fixed', left: '0', top: '95px' } -export const DATA_MANAGEMENT_PAGES = [ - { path: 'elasticsearch_status', component: ElasticsearchStatus }, - { - path: 'kibana', - component: () =>