From 1e0c8643b767ca3f0158f464cc05768c8d70497e Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 2 May 2024 15:38:24 -0400 Subject: [PATCH 01/51] accept drs file path --- seqr/views/apis/igv_api.py | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/seqr/views/apis/igv_api.py b/seqr/views/apis/igv_api.py index 58fc92c1f0..6d0ae529fe 100644 --- a/seqr/views/apis/igv_api.py +++ b/seqr/views/apis/igv_api.py @@ -147,12 +147,13 @@ def update_individual_igv_sample(request, individual_guid): if not file_path: raise ValueError('request must contain fields: filePath') - sample_type = next((st for suffix, st in SAMPLE_TYPE_MAP if file_path.endswith(suffix)), None) + file_name = _get_valid_file_name(file_path, request.user) + if not file_name: + raise Exception('Error accessing "{}"'.format(file_path)) + sample_type = next((st for suffix, st in SAMPLE_TYPE_MAP if file_name.endswith(suffix)), None) if not sample_type: raise Exception('Invalid file extension for "{}" - valid extensions are {}'.format( - file_path, ', '.join([suffix for suffix, _ in SAMPLE_TYPE_MAP]))) - if not does_file_exist(file_path, user=request.user): - raise Exception('Error accessing "{}"'.format(file_path)) + file_name, ', '.join([suffix for suffix, _ in SAMPLE_TYPE_MAP]))) sample, created = get_or_create_model_from_json( IgvSample, create_json={'individual': individual, 'sample_type': sample_type}, @@ -172,6 +173,21 @@ def update_individual_igv_sample(request, individual_guid): return create_json_response({'error': error}, status=400, reason=error) +def _get_valid_file_name(file_path, user): + if not file_path.startswith('drs://'): + return file_path if does_file_exist(file_path, user=user) else None + + drs_path = file_path.split('/') + response = requests.get( + f'https://{drs_path[-2]}/ga4gh/drs/v1/objects/{drs_path[-1]}', + headers=_get_gs_auth_api_headers(user), + ) + if response.status_code != 200: + return None + #access = next((a['access_url'] for a in drs_info['access_methods'] if a.get('type') == 'https'), None) + return response.json()['name'] + + @login_and_policies_required def fetch_igv_track(request, project_guid, igv_track_path): @@ -198,8 +214,12 @@ def _stream_gs(request, gs_path): content_type='application/octet-stream') +def _get_gs_auth_api_headers(user): + return {'Authorization': 'Bearer {}'.format(_get_access_token(user))} + + def _get_gs_rest_api_headers(range_header, gs_path, user=None): - headers = {'Authorization': 'Bearer {}'.format(_get_access_token(user))} + headers = _get_gs_auth_api_headers(user) if range_header: headers['Range'] = range_header google_project = get_google_project(gs_path) From 944cd05d234175797336cccfcc174e8dc41ea7d7 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 2 May 2024 15:58:52 -0400 Subject: [PATCH 02/51] stream drs uri for igv --- seqr/views/apis/igv_api.py | 54 +++++++++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 15 deletions(-) diff --git a/seqr/views/apis/igv_api.py b/seqr/views/apis/igv_api.py index 6d0ae529fe..262e354a9d 100644 --- a/seqr/views/apis/igv_api.py +++ b/seqr/views/apis/igv_api.py @@ -173,19 +173,26 @@ def update_individual_igv_sample(request, individual_guid): return create_json_response({'error': error}, status=400, reason=error) -def _get_valid_file_name(file_path, user): - if not file_path.startswith('drs://'): - return file_path if does_file_exist(file_path, user=user) else None +def _is_drs_uri_path(file_path): + return file_path.startswith('drs://') + +def _get_drs_info(file_path, user): drs_path = file_path.split('/') response = requests.get( f'https://{drs_path[-2]}/ga4gh/drs/v1/objects/{drs_path[-1]}', headers=_get_gs_auth_api_headers(user), ) - if response.status_code != 200: - return None - #access = next((a['access_url'] for a in drs_info['access_methods'] if a.get('type') == 'https'), None) - return response.json()['name'] + + return response.json() if response.status_code == 200 else None + + +def _get_valid_file_name(file_path, user): + if _is_drs_uri_path(file_path): + drs_info = _get_drs_info(file_path, user) + return None if drs_info is None else drs_info['name'] + + return file_path if does_file_exist(file_path, user=user) else None @login_and_policies_required @@ -199,16 +206,35 @@ def fetch_igv_track(request, project_guid, igv_track_path): if is_google_bucket_file_path(igv_track_path): return _stream_gs(request, igv_track_path) + if _is_drs_uri_path(igv_track_path): + return _stream_drs(request, igv_track_path) + return _stream_file(request, igv_track_path) def _stream_gs(request, gs_path): - headers = _get_gs_rest_api_headers(request.META.get('HTTP_RANGE'), gs_path, user=request.user) + return _stream_response( + url=f"{GS_STORAGE_URL}/{gs_path.replace('gs://', '', 1)}", + headers=_get_gs_rest_api_headers(gs_path, user=request.user), + request=request) - response = requests.get( - f"{GS_STORAGE_URL}/{gs_path.replace('gs://', '', 1)}", - headers=headers, - stream=True) + +def _stream_drs(request, file_path): + drs_info = _get_drs_info(file_path, request.user) + https_access = next(a['access_url'] for a in drs_info['access_methods'] if a['type'] == 'https') + + return _stream_response( + https_access['url'], + headers=dict([h.split(': ') for h in https_access['headers']]), + request=request) + + +def _stream_response(url, headers, request): + range_header = request.META.get('HTTP_RANGE') + if range_header: + headers['Range'] = range_header + + response = requests.get(url, headers=headers, stream=True) return StreamingHttpResponse(response.iter_content(chunk_size=65536), status=response.status_code, content_type='application/octet-stream') @@ -218,10 +244,8 @@ def _get_gs_auth_api_headers(user): return {'Authorization': 'Bearer {}'.format(_get_access_token(user))} -def _get_gs_rest_api_headers(range_header, gs_path, user=None): +def _get_gs_rest_api_headers(gs_path, user): headers = _get_gs_auth_api_headers(user) - if range_header: - headers['Range'] = range_header google_project = get_google_project(gs_path) if google_project: headers['x-goog-user-project'] = get_google_project(gs_path) From c615b272e7bfff4dbe6f1ac868c80179b17a6057 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 2 May 2024 16:32:40 -0400 Subject: [PATCH 03/51] actually store file type --- seqr/views/apis/igv_api.py | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/seqr/views/apis/igv_api.py b/seqr/views/apis/igv_api.py index 262e354a9d..41f9845a50 100644 --- a/seqr/views/apis/igv_api.py +++ b/seqr/views/apis/igv_api.py @@ -147,13 +147,14 @@ def update_individual_igv_sample(request, individual_guid): if not file_path: raise ValueError('request must contain fields: filePath') - file_name = _get_valid_file_name(file_path, request.user) - if not file_name: - raise Exception('Error accessing "{}"'.format(file_path)) - sample_type = next((st for suffix, st in SAMPLE_TYPE_MAP if file_name.endswith(suffix)), None) + original_file_path = file_path + file_path = _get_valid_file_path(file_path, request.user) + if not file_path: + raise Exception('Error accessing "{}"'.format(original_file_path)) + sample_type = next((st for suffix, st in SAMPLE_TYPE_MAP if file_path.endswith(suffix)), None) if not sample_type: raise Exception('Invalid file extension for "{}" - valid extensions are {}'.format( - file_name, ', '.join([suffix for suffix, _ in SAMPLE_TYPE_MAP]))) + file_path, ', '.join([suffix for suffix, _ in SAMPLE_TYPE_MAP]))) sample, created = get_or_create_model_from_json( IgvSample, create_json={'individual': individual, 'sample_type': sample_type}, @@ -180,17 +181,17 @@ def _is_drs_uri_path(file_path): def _get_drs_info(file_path, user): drs_path = file_path.split('/') response = requests.get( - f'https://{drs_path[-2]}/ga4gh/drs/v1/objects/{drs_path[-1]}', + f'https://{drs_path[2]}/ga4gh/drs/v1/objects/{drs_path[3]}', headers=_get_gs_auth_api_headers(user), ) return response.json() if response.status_code == 200 else None -def _get_valid_file_name(file_path, user): +def _get_valid_file_path(file_path, user): if _is_drs_uri_path(file_path): drs_info = _get_drs_info(file_path, user) - return None if drs_info is None else drs_info['name'] + return None if drs_info is None else f"{file_path}/{drs_info['name']}" return file_path if does_file_exist(file_path, user=user) else None @@ -221,12 +222,16 @@ def _stream_gs(request, gs_path): def _stream_drs(request, file_path): drs_info = _get_drs_info(file_path, request.user) + https_access = next(a['access_url'] for a in drs_info['access_methods'] if a['type'] == 'https') + url = https_access['url'] + headers = dict([h.split(': ') for h in https_access['headers']]) - return _stream_response( - https_access['url'], - headers=dict([h.split(': ') for h in https_access['headers']]), - request=request) + # TODO this does not actually result in a valid index file + if file_path.endswith('.cram.crai'): + url = url.replace('.cram', '.crai') + + return _stream_response(url, headers, request) def _stream_response(url, headers, request): From 24de48d9b00b6335003bc409b63f723d0c54e7f5 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 2 May 2024 17:12:25 -0400 Subject: [PATCH 04/51] use provided index url --- .../0064_igvsample_index_file_path.py | 18 ++++++++++++++++ seqr/models.py | 3 ++- seqr/views/apis/igv_api.py | 21 ++++++++++++------- ui/shared/components/form/IGVUploadField.jsx | 2 +- .../components/panel/family/FamilyReads.jsx | 8 +++---- 5 files changed, 38 insertions(+), 14 deletions(-) create mode 100644 seqr/migrations/0064_igvsample_index_file_path.py diff --git a/seqr/migrations/0064_igvsample_index_file_path.py b/seqr/migrations/0064_igvsample_index_file_path.py new file mode 100644 index 0000000000..437cabb23f --- /dev/null +++ b/seqr/migrations/0064_igvsample_index_file_path.py @@ -0,0 +1,18 @@ +# Generated by Django 3.2.23 on 2024-05-02 20:48 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('seqr', '0063_dynamicanalysisgroup'), + ] + + operations = [ + migrations.AddField( + model_name='igvsample', + name='index_file_path', + field=models.TextField(blank=True, null=True), + ), + ] diff --git a/seqr/models.py b/seqr/models.py index aa89945fd6..46765f6702 100644 --- a/seqr/models.py +++ b/seqr/models.py @@ -742,6 +742,7 @@ class IgvSample(ModelWithGUID): individual = models.ForeignKey('Individual', on_delete=models.PROTECT) sample_type = models.CharField(max_length=15, choices=SAMPLE_TYPE_CHOICES) file_path = models.TextField() + index_file_path = models.TextField(null=True, blank=True) sample_id = models.TextField(null=True) def __unicode__(self): @@ -753,7 +754,7 @@ def _compute_guid(self): class Meta: unique_together = ('individual', 'sample_type') - json_fields = ['guid', 'file_path', 'sample_type', 'sample_id'] + json_fields = ['guid', 'file_path', 'index_file_path', 'sample_type', 'sample_id'] class SavedVariant(ModelWithGUID): diff --git a/seqr/views/apis/igv_api.py b/seqr/views/apis/igv_api.py index 41f9845a50..3c14fd9e3a 100644 --- a/seqr/views/apis/igv_api.py +++ b/seqr/views/apis/igv_api.py @@ -31,7 +31,14 @@ def _process_alignment_records(rows, num_id_cols=1, **kwargs): parsed_records = defaultdict(list) for row in rows: row_id = row[0] if num_id_cols == 1 else tuple(row[:num_id_cols]) - parsed_records[row_id].append({'filePath': row[num_id_cols], 'sampleId': row[num_cols] if len(row) > num_cols else None}) + sample_id = None + index_file_path = None + if len(row) > num_cols: + if _is_drs_uri_path(row[num_cols]): + index_file_path = row[num_cols] + else: + sample_id = row[num_cols] + parsed_records[row_id].append({'filePath': row[num_id_cols], 'sampleId': sample_id, 'indexFilePath': index_file_path}) return parsed_records @@ -158,7 +165,10 @@ def update_individual_igv_sample(request, individual_guid): sample, created = get_or_create_model_from_json( IgvSample, create_json={'individual': individual, 'sample_type': sample_type}, - update_json={'file_path': file_path, 'sample_id': request_json.get('sampleId')}, user=request.user) + update_json={ + 'file_path': file_path, + **{field: request_json.get(field) for field in ['sampleId', 'indexFilePath']} + }, user=request.user) response = { 'igvSamplesByGuid': { @@ -224,14 +234,9 @@ def _stream_drs(request, file_path): drs_info = _get_drs_info(file_path, request.user) https_access = next(a['access_url'] for a in drs_info['access_methods'] if a['type'] == 'https') - url = https_access['url'] headers = dict([h.split(': ') for h in https_access['headers']]) - # TODO this does not actually result in a valid index file - if file_path.endswith('.cram.crai'): - url = url.replace('.cram', '.crai') - - return _stream_response(url, headers, request) + return _stream_response(https_access['url'], headers, request) def _stream_response(url, headers, request): diff --git a/ui/shared/components/form/IGVUploadField.jsx b/ui/shared/components/form/IGVUploadField.jsx index dc2f412fa6..af244d2e96 100644 --- a/ui/shared/components/form/IGVUploadField.jsx +++ b/ui/shared/components/form/IGVUploadField.jsx @@ -39,7 +39,7 @@ IgvDropzoneLabel.propTypes = { const NO_PROJECT_COLUMNS = [ 'Individual ID', 'IGV Track File Path', - 'gCNV Sample ID, to identify the sample in the gCNV batch path. Not used for other track types', + 'For gCNV data: Sample ID, to identify the sample in the gCNV batch path. For other track types: Index File Path', ] // eslint-disable-next-line react-perf/jsx-no-new-array-as-prop diff --git a/ui/shared/components/panel/family/FamilyReads.jsx b/ui/shared/components/panel/family/FamilyReads.jsx index ddea5e7f07..bba09bd91f 100644 --- a/ui/shared/components/panel/family/FamilyReads.jsx +++ b/ui/shared/components/panel/family/FamilyReads.jsx @@ -30,6 +30,8 @@ const IGV = React.lazy(() => import('../../graph/IGV')) const MIN_LOCUS_RANGE_SIZE = 100 +const igvUrl = (sample, field = 'filePath') => `/api/project/${sample.projectGuid}/igv_track/${encodeURIComponent(sample[field])}` + const getTrackOptions = (type, sample, individual) => { const name = ReactDOMServer.renderToString( @@ -38,9 +40,7 @@ const getTrackOptions = (type, sample, individual) => { , ) - const url = `/api/project/${sample.projectGuid}/igv_track/${encodeURIComponent(sample.filePath)}` - - return { url, name, type, ...TRACK_OPTIONS[type] } + return { url: igvUrl(sample), name, type, ...TRACK_OPTIONS[type] } } const getSampleColor = individual => (individual.affected === AFFECTED ? 'red' : 'blue') @@ -75,7 +75,7 @@ const getIgvTracks = (igvSampleIndividuals, sortedIndividuals, sampleTypes) => { if (sample.filePath.endsWith('.cram')) { Object.assign(track, { format: 'cram', - indexURL: `${track.url}.crai`, + indexURL: sample.indexFilePath ? igvUrl(sample, 'indexFilePath') : `${track.url}.crai`, }) } else { Object.assign(track, BAM_TRACK_OPTIONS) From 59a04ba3d0825ee656dc9719983993334c066a00 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 2 May 2024 17:13:26 -0400 Subject: [PATCH 05/51] add validation for index files --- seqr/views/apis/igv_api.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/seqr/views/apis/igv_api.py b/seqr/views/apis/igv_api.py index 3c14fd9e3a..31f84eae7c 100644 --- a/seqr/views/apis/igv_api.py +++ b/seqr/views/apis/igv_api.py @@ -162,6 +162,8 @@ def update_individual_igv_sample(request, individual_guid): if not sample_type: raise Exception('Invalid file extension for "{}" - valid extensions are {}'.format( file_path, ', '.join([suffix for suffix, _ in SAMPLE_TYPE_MAP]))) + if _is_drs_uri_path(file_path) and not request_json.get('indexFilePath'): + raise Exception('Index File Path is required for DRS URIs') sample, created = get_or_create_model_from_json( IgvSample, create_json={'individual': individual, 'sample_type': sample_type}, From 3207fce3572dce069ce9c2cfb52f61ce4f067b58 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 3 Jun 2024 11:25:51 -0400 Subject: [PATCH 06/51] fix migration --- CHANGELOG.md | 1 + ...e_index_file_path.py => 0067_igvsample_index_file_path.py} | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) rename seqr/migrations/{0064_igvsample_index_file_path.py => 0067_igvsample_index_file_path.py} (74%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 64626f8078..56a31c866d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # _seqr_ Changes ## dev +* Adds index_file_path to IGV Sample model (REQUIRES DB MIGRATION) ## 5/24/24 * Adds external_data to Family model (REQUIRES DB MIGRATION) diff --git a/seqr/migrations/0064_igvsample_index_file_path.py b/seqr/migrations/0067_igvsample_index_file_path.py similarity index 74% rename from seqr/migrations/0064_igvsample_index_file_path.py rename to seqr/migrations/0067_igvsample_index_file_path.py index 437cabb23f..56fd82555f 100644 --- a/seqr/migrations/0064_igvsample_index_file_path.py +++ b/seqr/migrations/0067_igvsample_index_file_path.py @@ -1,4 +1,4 @@ -# Generated by Django 3.2.23 on 2024-05-02 20:48 +# Generated by Django 3.2.23 on 2024-06-03 15:25 from django.db import migrations, models @@ -6,7 +6,7 @@ class Migration(migrations.Migration): dependencies = [ - ('seqr', '0063_dynamicanalysisgroup'), + ('seqr', '0066_family_post_discovery_mondo_id'), ] operations = [ From ac9ac079c5de55d2017ad32a20efa4048be48773 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 3 Jun 2024 11:32:53 -0400 Subject: [PATCH 07/51] allow index URI for non-DRS alignment files --- seqr/views/apis/igv_api.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/seqr/views/apis/igv_api.py b/seqr/views/apis/igv_api.py index 822ebcc124..dff66d2e4d 100644 --- a/seqr/views/apis/igv_api.py +++ b/seqr/views/apis/igv_api.py @@ -32,13 +32,14 @@ def _process_alignment_records(rows, num_id_cols=1, **kwargs): parsed_records = defaultdict(list) for row in rows: row_id = row[0] if num_id_cols == 1 else tuple(row[:num_id_cols]) + file_path = row[num_id_cols] sample_id = None index_file_path = None if len(row) > num_cols: - if _is_drs_uri_path(row[num_cols]): - index_file_path = row[num_cols] - else: + if file_path.endswith(GCNV_FILE_EXTENSIONS): sample_id = row[num_cols] + else: + index_file_path = row[num_cols] parsed_records[row_id].append({'filePath': row[num_id_cols], 'sampleId': sample_id, 'indexFilePath': index_file_path}) return parsed_records @@ -141,6 +142,8 @@ def _get_valid_matched_individuals(individual_dataset_mapping): ('bed.gz', IgvSample.SAMPLE_TYPE_GCNV), ] +GCNV_FILE_EXTENSIONS = tuple(ext for ext, sample_type in SAMPLE_TYPE_MAP if sample_type == IgvSample.SAMPLE_TYPE_GCNV) + @pm_or_data_manager_required def update_individual_igv_sample(request, individual_guid): From dd2740bfc22a8c9bc65084205174f2857ac91b2f Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 19 Jul 2024 16:29:22 -0400 Subject: [PATCH 08/51] shared sample ID utility --- seqr/views/utils/airtable_utils.py | 18 ++++++++++++++++++ seqr/views/utils/anvil_metadata_utils.py | 18 +----------------- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/seqr/views/utils/airtable_utils.py b/seqr/views/utils/airtable_utils.py index 63a8767611..eb1a4f8d1b 100644 --- a/seqr/views/utils/airtable_utils.py +++ b/seqr/views/utils/airtable_utils.py @@ -104,3 +104,21 @@ def _populate_records(self, record_type, records, offset=None): if response_json.get('offset'): self._populate_records(record_type, records, offset=response_json['offset']) + + def _get_samples_for_id_field(self, sample_ids, id_field, fields): + raw_records = self.fetch_records( + 'Samples', fields=[id_field] + fields, + or_filters={f'{{{id_field}}}': sample_ids}, + ) + + records_by_id = defaultdict(list) + for airtable_id, record in raw_records.items(): + records_by_id[record[id_field]].append({**record, 'airtable_id': airtable_id}) + return records_by_id + + def get_samples_for_sample_ids(self, sample_ids, fields): + records_by_id = self._get_samples_for_id_field(sample_ids, 'CollaboratorSampleID', fields) + missing = set(sample_ids) - set(records_by_id.keys()) + if missing: + records_by_id.update(self._get_samples_for_id_field(missing, 'SeqrCollaboratorSampleID', fields)) + return records_by_id diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py index 2f47aca5d0..f3a71193e1 100644 --- a/seqr/views/utils/anvil_metadata_utils.py +++ b/seqr/views/utils/anvil_metadata_utils.py @@ -550,27 +550,11 @@ def _get_variant_inheritance(individual, genotypes): LIST_SAMPLE_FIELDS = ['SequencingProduct', 'dbgap_submission'] -def _get_airtable_samples_for_id_field(sample_ids, id_field, fields, session): - raw_records = session.fetch_records( - 'Samples', fields=[id_field] + fields, - or_filters={f'{{{id_field}}}': sample_ids}, - ) - - records_by_id = defaultdict(list) - for airtable_id, record in raw_records.items(): - records_by_id[record[id_field]].append({**record, 'airtable_id': airtable_id}) - return records_by_id - - def _get_sample_airtable_metadata(sample_ids, user, airtable_fields): fields, list_fields = airtable_fields or [SINGLE_SAMPLE_FIELDS, LIST_SAMPLE_FIELDS] all_fields = fields + list_fields - session = AirtableSession(user) - records_by_id = _get_airtable_samples_for_id_field(sample_ids, 'CollaboratorSampleID', all_fields, session) - missing = set(sample_ids) - set(records_by_id.keys()) - if missing: - records_by_id.update(_get_airtable_samples_for_id_field(missing, 'SeqrCollaboratorSampleID', all_fields, session)) + records_by_id = AirtableSession(user).get_samples_for_sample_ids(sample_ids, all_fields) sample_records = {} for record_id, records in records_by_id.items(): From d0646073ac8101254537a10e2f3f14ed975829e3 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 19 Jul 2024 17:11:16 -0400 Subject: [PATCH 09/51] check if missign samples already loaded --- seqr/views/apis/data_manager_api.py | 34 +++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py index acbf6720b6..837790f091 100644 --- a/seqr/views/apis/data_manager_api.py +++ b/seqr/views/apis/data_manager_api.py @@ -451,6 +451,10 @@ def write_pedigree(request, project_guid): 'On hold for phenotips, but ready to load', 'Methods (Loading)', ] +AVAILABLE_PDO_STATUSES = { + 'Available in seqr', + 'Historic', +} @pm_or_data_manager_required @@ -518,7 +522,7 @@ def load_data(request): additional_project_files = None individual_ids = None if dataset_type == Sample.DATASET_TYPE_VARIANT_CALLS: - individual_ids = _get_valid_project_samples(project_samples, sample_type) + individual_ids = _get_valid_project_samples(project_samples, sample_type, request.user) additional_project_files = { project_guid: (f'{project_guid}_ids', ['s'], [{'s': sample_id} for sample_id in sample_ids]) for project_guid, sample_ids in project_samples.items() @@ -534,7 +538,7 @@ def load_data(request): return create_json_response({'success': True}) -def _get_valid_project_samples(project_samples, sample_type): +def _get_valid_project_samples(project_samples, sample_type, user): individuals = { (i['project'], i['individual_id']): i for i in Individual.objects.filter(family__project__guid__in=project_samples).values( 'id', 'individual_id', @@ -560,11 +564,20 @@ def _get_valid_project_samples(project_samples, sample_type): if missing_samples: errors.append(f'The following samples are included in airtable but missing from seqr: {", ".join(missing_samples)}') - missing_family_samples = defaultdict(list) + missing_samples = {} for (project, sample_id), individual in individuals.items(): family_key = (project, individual['family_name']) if sample_id not in project_samples[project] and family_key in airtable_families and individual['sampleCount']: - missing_family_samples[family_key].append(sample_id) + missing_samples[(project, sample_id)] = individual + + loaded_samples = _get_loaded_samples(missing_samples.keys(), user) if missing_samples else [] + + missing_family_samples = defaultdict(list) + for (project, sample_id), individual in missing_samples.items(): + if (project, sample_id) in loaded_samples: + individual_ids.append(individual['id']) + else: + missing_family_samples[(project, individual['family_name'])].append(sample_id) if missing_family_samples: family_errors = [ @@ -578,6 +591,19 @@ def _get_valid_project_samples(project_samples, sample_type): return individual_ids +def _get_loaded_samples(sample_keys, user): + sample_ids = [sample_id for _, sample_id in sample_keys] + samples_by_id = AirtableSession(user).get_samples_for_sample_ids(sample_ids, ['PDOStatus', 'SeqrProject']) + return [(project, sample_id) for project, sample_id in sample_keys if any( + _is_loaded_airtable_sample(s, project) for s in samples_by_id.get(sample_id, []) + )] + + +def _is_loaded_airtable_sample(sample, project_guid): + return f'https://seqr.broadinstitute.org/project/{project_guid}/project_page' in sample['SeqrProject'] and any( + status in AVAILABLE_PDO_STATUSES for status in sample['PDOStatus']) + + # Hop-by-hop HTTP response headers shouldn't be forwarded. # More info at: http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.5.1 EXCLUDE_HTTP_RESPONSE_HEADERS = { From aef856e2a75e5a9469f7bbdc0cd3ef99daae86d0 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 22 Jul 2024 11:09:42 -0400 Subject: [PATCH 10/51] merge separate class for load data test --- seqr/views/apis/data_manager_api_tests.py | 42 +++++++++++------------ 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py index 6c9290c0cb..57847aeef0 100644 --- a/seqr/views/apis/data_manager_api_tests.py +++ b/seqr/views/apis/data_manager_api_tests.py @@ -11,7 +11,7 @@ update_rna_seq, load_rna_seq_sample_data, load_phenotype_prioritization_data, write_pedigree, validate_callset, \ get_loaded_projects, load_data from seqr.views.utils.orm_to_json_utils import _get_json_for_models -from seqr.views.utils.test_utils import AuthenticationTestCase, AnvilAuthenticationTestCase, AirflowTestCase, AirtableTest +from seqr.views.utils.test_utils import AuthenticationTestCase, AirflowTestCase, AirtableTest from seqr.utils.search.elasticsearch.es_utils_tests import urllib3_responses from seqr.models import Individual, RnaSeqOutlier, RnaSeqTpm, RnaSeqSpliceOutlier, Sample, Project, PhenotypePrioritization from settings import SEQR_SLACK_LOADING_NOTIFICATION_CHANNEL @@ -1495,9 +1495,15 @@ def _add_file_iter(self, stdout): self.mock_file_iter.return_value += stdout -class AnvilDataManagerAPITest(AnvilAuthenticationTestCase, DataManagerAPITest): +@mock.patch('seqr.views.utils.permissions_utils.PM_USER_GROUP', 'project-managers') +class AnvilDataManagerAPITest(AirflowTestCase, DataManagerAPITest): fixtures = ['users', '1kg_project', 'reference_data'] + DAG_NAME = 'v03_pipeline-MITO' + SECOND_DAG_NAME = 'v03_pipeline-GCNV' + LOADING_PROJECT_GUID = 'R0004_non_analyst_project' + PROJECTS = [PROJECT_GUID, LOADING_PROJECT_GUID] + def setUp(self): patcher = mock.patch('seqr.utils.file_utils.subprocess.Popen') self.mock_subprocess = patcher.start() @@ -1547,16 +1553,6 @@ def test_get_loaded_projects(self, *args, **kwargs): # Test relies on the local-only project data, and has no real difference for local/ non-local behavior pass - -@mock.patch('seqr.views.utils.permissions_utils.PM_USER_GROUP', 'project-managers') -class LoadDataAPITest(AirflowTestCase): - fixtures = ['users', 'social_auth', '1kg_project'] - - DAG_NAME = 'v03_pipeline-MITO' - SECOND_DAG_NAME = 'v03_pipeline-GCNV' - LOADING_PROJECT_GUID = 'R0004_non_analyst_project' - PROJECTS = [PROJECT_GUID, LOADING_PROJECT_GUID] - @staticmethod def _get_dag_variable_overrides(*args, **kwargs): return { @@ -1568,14 +1564,16 @@ def _get_dag_variable_overrides(*args, **kwargs): @responses.activate @mock.patch('seqr.views.utils.export_utils.open') @mock.patch('seqr.views.utils.export_utils.TemporaryDirectory') - @mock.patch('seqr.utils.file_utils.subprocess.Popen') - def test_load_data(self, mock_subprocess, mock_temp_dir, mock_open): + def test_load_data(self, mock_temp_dir, mock_open): url = reverse(load_data) self.check_pm_login(url) mock_temp_dir.return_value.__enter__.return_value = '/mock/tmp' - mock_subprocess.return_value.wait.return_value = 0 - mock_subprocess.return_value.communicate.return_value = b'', b'File not found' + mock_subprocess = mock.MagicMock() + self.mock_subprocess.side_effect = None + self.mock_subprocess.return_value = mock_subprocess + mock_subprocess.wait.return_value = 0 + mock_subprocess.communicate.return_value = b'', b'File not found' body = {'filePath': 'gs://test_bucket/mito_callset.mt', 'datasetType': 'MITO', 'sampleType': 'WGS', 'projects': [ json.dumps({'projectGuid': 'R0001_1kg'}), json.dumps(PROJECT_OPTION), json.dumps({'projectGuid': 'R0005_not_project'}), ]} @@ -1589,7 +1587,7 @@ def test_load_data(self, mock_subprocess, mock_temp_dir, mock_open): self.assertDictEqual(response.json(), {'success': True}) self.assert_airflow_calls() - self._has_expected_gs_calls(mock_subprocess, mock_open, 'MITO') + self._has_expected_gs_calls(mock_open, 'MITO') dag_json = """{ "projects_to_run": [ @@ -1618,7 +1616,7 @@ def test_load_data(self, mock_subprocess, mock_temp_dir, mock_open): mock_open.reset_mock() responses.calls.reset() mock_subprocess.reset_mock() - mock_subprocess.return_value.communicate.return_value = b'gs://seqr-datasets/v02/GRCh38/RDG_WES_Broad_Internal_SV/\ngs://seqr-datasets/v02/GRCh38/RDG_WGS_Broad_Internal_SV/v01/\ngs://seqr-datasets/v02/GRCh38/RDG_WES_Broad_Internal_GCNV/v02/', b'' + mock_subprocess.communicate.return_value = b'gs://seqr-datasets/v02/GRCh38/RDG_WES_Broad_Internal_SV/\ngs://seqr-datasets/v02/GRCh38/RDG_WGS_Broad_Internal_SV/v01/\ngs://seqr-datasets/v02/GRCh38/RDG_WES_Broad_Internal_GCNV/v02/', b'' body.update({'datasetType': 'SV', 'filePath': 'gs://test_bucket/sv_callset.vcf', 'sampleType': 'WES'}) response = self.client.post(url, content_type='application/json', data=json.dumps(body)) @@ -1626,7 +1624,7 @@ def test_load_data(self, mock_subprocess, mock_temp_dir, mock_open): self.assertDictEqual(response.json(), {'success': True}) self.assert_airflow_calls(trigger_error=True, secondary_dag_name=self.SECOND_DAG_NAME) - self._has_expected_gs_calls(mock_subprocess, mock_open, 'SV', is_second_dag=True, sample_type='WES') + self._has_expected_gs_calls(mock_open, 'SV', is_second_dag=True, sample_type='WES') self.mock_airflow_logger.warning.assert_not_called() self.mock_airflow_logger.error.assert_called_with(mock.ANY, self.pm_user) errors = [call.args[0] for call in self.mock_airflow_logger.error.call_args_list] @@ -1669,9 +1667,9 @@ def test_load_data(self, mock_subprocess, mock_temp_dir, mock_open): response = self.client.post(url, content_type='application/json', data=json.dumps(body)) self.assertEqual(response.status_code, 200) self.assertDictEqual(response.json(), {'success': True}) - self._has_expected_gs_calls(mock_subprocess, mock_open, 'SNV_INDEL', has_project_subset=True) + self._has_expected_gs_calls(mock_open, 'SNV_INDEL', has_project_subset=True) - def _has_expected_gs_calls(self, mock_subprocess, mock_open, dataset_type, sample_type='WGS', has_project_subset=False, **kwargs): + def _has_expected_gs_calls(self, mock_open, dataset_type, sample_type='WGS', has_project_subset=False, **kwargs): projects = self.PROJECTS[1:] if has_project_subset else self.PROJECTS mock_open.assert_has_calls([ mock.call(f'/mock/tmp/{project}_pedigree.tsv', 'w') for project in projects @@ -1698,7 +1696,7 @@ def _has_expected_gs_calls(self, mock_subprocess, mock_open, dataset_type, sampl ['R0004_non_analyst_project', 'F000014_14', '14', 'NA21987', '', '', 'M'], ]) - mock_subprocess.assert_has_calls([ + self.mock_subprocess.assert_has_calls([ mock.call( f'gsutil mv /mock/tmp/* gs://seqr-datasets/v02/GRCh38/RDG_{sample_type}_Broad_Internal/base/projects/{project}/', stdout=-1, stderr=-2, shell=True, # nosec From 95265c210baa56363a3de9346c7df1401603592f Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 22 Jul 2024 11:52:30 -0400 Subject: [PATCH 11/51] test check airtable for loaded samples --- seqr/fixtures/social_auth.json | 16 ++++++ seqr/views/apis/data_manager_api_tests.py | 58 +++++++++++++++++++-- seqr/views/apis/variant_search_api_tests.py | 15 +++--- 3 files changed, 80 insertions(+), 9 deletions(-) diff --git a/seqr/fixtures/social_auth.json b/seqr/fixtures/social_auth.json index e092f257fa..b4642e5919 100644 --- a/seqr/fixtures/social_auth.json +++ b/seqr/fixtures/social_auth.json @@ -95,5 +95,21 @@ "created": "2020-03-12T23:09:54.180Z", "modified": "2020-03-12T23:09:54.180Z" } +}, { + "model": "social_django.usersocialauth", + "pk": 7, + "fields": { + "user": 16, + "provider": "google-oauth2", + "uid": "test_data_manager@broadinstitute.org", + "extra_data": { + "expires": 3599, + "auth_time": 1603287741, + "token_type": "Bearer", + "access_token": "ya29.EXAMPLE" + }, + "created": "2020-03-12T23:09:54.180Z", + "modified": "2020-03-12T23:09:54.180Z" + } } ] diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py index 57847aeef0..ace672dd41 100644 --- a/seqr/views/apis/data_manager_api_tests.py +++ b/seqr/views/apis/data_manager_api_tests.py @@ -435,6 +435,30 @@ }, ] } +AIRTABLE_SAMPLE_RECORDS = { + 'records': [ + { + 'id': 'recW24C2CJW5lT64K', + 'fields': { + 'CollaboratorSampleID': 'NA19678', + 'SeqrProject': ['https://seqr.broadinstitute.org/project/R0001_1kg/project_page'], + 'PDOStatus': ['Available in seqr'], + } + }, + ], +} +AIRTABLE_SECONDARY_SAMPLE_RECORDS = { + 'records': [ + { + 'id': 'recW24C2CJW5lT64K', + 'fields': { + 'SeqrCollaboratorSampleID': 'NA21234', + 'SeqrProject': ['https://seqr.broadinstitute.org/project/R0004_non_analyst_project/project_page'], + 'PDOStatus': ['Hold for phenotips'], + } + }, + ], +} @mock.patch('seqr.views.utils.permissions_utils.PM_USER_GROUP', 'project-managers') @@ -1497,7 +1521,7 @@ def _add_file_iter(self, stdout): @mock.patch('seqr.views.utils.permissions_utils.PM_USER_GROUP', 'project-managers') class AnvilDataManagerAPITest(AirflowTestCase, DataManagerAPITest): - fixtures = ['users', '1kg_project', 'reference_data'] + fixtures = ['users', 'social_auth', '1kg_project', 'reference_data'] DAG_NAME = 'v03_pipeline-MITO' SECOND_DAG_NAME = 'v03_pipeline-GCNV' @@ -1653,21 +1677,49 @@ def test_load_data(self, mock_temp_dir, mock_open): sample_ids = PROJECT_SAMPLES_OPTION['sampleIds'] body['projects'] = [json.dumps({**PROJECT_OPTION, 'sampleIds': [sample_ids[1]]})] + airtable_samples_url = 'https://api.airtable.com/v0/app3Y97xtbbaOopVR/Samples' + responses.add(responses.GET, airtable_samples_url, json=AIRTABLE_SAMPLE_RECORDS, status=200) + responses.add(responses.GET, airtable_samples_url, json=AIRTABLE_SECONDARY_SAMPLE_RECORDS, status=200) + + # Non-Broad users can not access airtable + response = self.client.post(url, content_type='application/json', data=json.dumps(body)) + self.assertEqual(response.status_code, 403) + + responses.calls.reset() + self.login_data_manager_user() response = self.client.post(url, content_type='application/json', data=json.dumps(body)) self.assertEqual(response.status_code, 400) self.assertDictEqual(response.json(), { 'warnings': None, 'errors': ['The following families have previously loaded samples absent from airtable: 14 (NA21234)'], }) + self.assert_expected_airtable_call( + call_index=0, + filter_formula="OR({CollaboratorSampleID}='NA21234')", + fields=['CollaboratorSampleID', 'PDOStatus', 'SeqrProject'], + ) + self.assert_expected_airtable_call( + call_index=1, + filter_formula="OR({SeqrCollaboratorSampleID}='NA21234')", + fields=['SeqrCollaboratorSampleID', 'PDOStatus', 'SeqrProject'], + ) - body['projects'] = [ - json.dumps({'projectGuid': 'R0001_1kg', 'sampleIds': ['NA19675_1', 'NA19678', 'NA19679']}), + responses.calls.reset() + responses.add(responses.GET, airtable_samples_url, json=AIRTABLE_SAMPLE_RECORDS, status=200) + body['projects'] = [ # TODO + json.dumps({'projectGuid': 'R0001_1kg', 'sampleIds': ['NA19675_1', 'NA19679']}), json.dumps({**PROJECT_OPTION, 'sampleIds': sample_ids[:2]}), ] + body['sampleType'] = 'WES' response = self.client.post(url, content_type='application/json', data=json.dumps(body)) self.assertEqual(response.status_code, 200) self.assertDictEqual(response.json(), {'success': True}) self._has_expected_gs_calls(mock_open, 'SNV_INDEL', has_project_subset=True) + self.assert_expected_airtable_call( + call_index=0, + filter_formula="OR({CollaboratorSampleID}='NA19678')", + fields=['CollaboratorSampleID', 'PDOStatus', 'SeqrProject'], + ) def _has_expected_gs_calls(self, mock_open, dataset_type, sample_type='WGS', has_project_subset=False, **kwargs): projects = self.PROJECTS[1:] if has_project_subset else self.PROJECTS diff --git a/seqr/views/apis/variant_search_api_tests.py b/seqr/views/apis/variant_search_api_tests.py index 5142b2154f..0cf449d4f0 100644 --- a/seqr/views/apis/variant_search_api_tests.py +++ b/seqr/views/apis/variant_search_api_tests.py @@ -1025,15 +1025,18 @@ def assert_no_list_ws_has_al(self, acl_call_count, group_call_count, workspace_n assert_ws_has_al(self, acl_call_count, group_call_count, workspace_name) -def assert_has_list_ws(self): - self.mock_list_workspaces.assert_has_calls([ +def assert_has_list_ws(self, has_data_manager=False): + calls = [ mock.call(self.no_access_user), mock.call(self.collaborator_user), - ]) + ] + if has_data_manager: + calls.insert(1, mock.call(self.data_manager_user)) + self.mock_list_workspaces.assert_has_calls(calls) -def assert_no_al_has_list_ws(self, group_count=1): - assert_has_list_ws(self) +def assert_no_al_has_list_ws(self, group_count=1, has_data_manager=False): + assert_has_list_ws(self, has_data_manager) self.mock_get_ws_access_level.assert_not_called() assert_workspace_calls(self, group_count) @@ -1067,7 +1070,7 @@ def test_query_variants(self, *args): def test_query_all_projects_variants(self, *args): super(AnvilVariantSearchAPITest, self).test_query_all_projects_variants(*args) - assert_no_al_has_list_ws(self, group_count=3) + assert_no_al_has_list_ws(self, group_count=3, has_data_manager=True) def test_query_all_project_families_variants(self, *args): super(AnvilVariantSearchAPITest, self).test_query_all_project_families_variants(*args) From c9bb84569b13a440dbc3bdf112bbc62abc1013f9 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 22 Jul 2024 11:53:03 -0400 Subject: [PATCH 12/51] test check airtable for loaded samples --- seqr/views/apis/data_manager_api.py | 1 + 1 file changed, 1 insertion(+) diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py index 837790f091..f2bf9e5b8e 100644 --- a/seqr/views/apis/data_manager_api.py +++ b/seqr/views/apis/data_manager_api.py @@ -527,6 +527,7 @@ def load_data(request): project_guid: (f'{project_guid}_ids', ['s'], [{'s': sample_id} for sample_id in sample_ids]) for project_guid, sample_ids in project_samples.items() } + import pdb; pdb.set_trace() success_message = f'*{request.user.email}* triggered loading internal {sample_type} {dataset_type} data for {len(projects)} projects' trigger_data_loading( From 17b78d8a3fa3112b781bfc53d5d03918609a9b85 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 22 Jul 2024 12:02:49 -0400 Subject: [PATCH 13/51] correctly update sample subset --- seqr/views/apis/data_manager_api.py | 2 +- seqr/views/apis/data_manager_api_tests.py | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py index f2bf9e5b8e..d6bc359e08 100644 --- a/seqr/views/apis/data_manager_api.py +++ b/seqr/views/apis/data_manager_api.py @@ -527,7 +527,6 @@ def load_data(request): project_guid: (f'{project_guid}_ids', ['s'], [{'s': sample_id} for sample_id in sample_ids]) for project_guid, sample_ids in project_samples.items() } - import pdb; pdb.set_trace() success_message = f'*{request.user.email}* triggered loading internal {sample_type} {dataset_type} data for {len(projects)} projects' trigger_data_loading( @@ -577,6 +576,7 @@ def _get_valid_project_samples(project_samples, sample_type, user): for (project, sample_id), individual in missing_samples.items(): if (project, sample_id) in loaded_samples: individual_ids.append(individual['id']) + project_samples[project].append(sample_id) else: missing_family_samples[(project, individual['family_name'])].append(sample_id) diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py index ace672dd41..f3b8bb130e 100644 --- a/seqr/views/apis/data_manager_api_tests.py +++ b/seqr/views/apis/data_manager_api_tests.py @@ -1704,9 +1704,10 @@ def test_load_data(self, mock_temp_dir, mock_open): fields=['SeqrCollaboratorSampleID', 'PDOStatus', 'SeqrProject'], ) + mock_subprocess.reset_mock() responses.calls.reset() responses.add(responses.GET, airtable_samples_url, json=AIRTABLE_SAMPLE_RECORDS, status=200) - body['projects'] = [ # TODO + body['projects'] = [ json.dumps({'projectGuid': 'R0001_1kg', 'sampleIds': ['NA19675_1', 'NA19679']}), json.dumps({**PROJECT_OPTION, 'sampleIds': sample_ids[:2]}), ] @@ -1714,7 +1715,7 @@ def test_load_data(self, mock_temp_dir, mock_open): response = self.client.post(url, content_type='application/json', data=json.dumps(body)) self.assertEqual(response.status_code, 200) self.assertDictEqual(response.json(), {'success': True}) - self._has_expected_gs_calls(mock_open, 'SNV_INDEL', has_project_subset=True) + self._has_expected_gs_calls(mock_open, 'SNV_INDEL', sample_type='WES', has_project_subset=True) self.assert_expected_airtable_call( call_index=0, filter_formula="OR({CollaboratorSampleID}='NA19678')", @@ -1722,9 +1723,8 @@ def test_load_data(self, mock_temp_dir, mock_open): ) def _has_expected_gs_calls(self, mock_open, dataset_type, sample_type='WGS', has_project_subset=False, **kwargs): - projects = self.PROJECTS[1:] if has_project_subset else self.PROJECTS mock_open.assert_has_calls([ - mock.call(f'/mock/tmp/{project}_pedigree.tsv', 'w') for project in projects + mock.call(f'/mock/tmp/{project}_pedigree.tsv', 'w') for project in self.PROJECTS ], any_order=True) files = [ [row.split('\t') for row in write_call.args[0].split('\n')] @@ -1733,7 +1733,7 @@ def _has_expected_gs_calls(self, mock_open, dataset_type, sample_type='WGS', has self.assertEqual(len(files), 4 if has_project_subset else 2) if has_project_subset: self.assertEqual(len(files[1]), 4) - self.assertListEqual(files[1], [['s'], ['NA19675_1'], ['NA19678'], ['NA19679']]) + self.assertListEqual(files[1], [['s'], ['NA19675_1'], ['NA19679'], ['NA19678']]) self.assertEqual(len(files[3]), 3) self.assertListEqual(files[3], [['s'], ['NA21234'], ['NA21987']]) @@ -1752,10 +1752,10 @@ def _has_expected_gs_calls(self, mock_open, dataset_type, sample_type='WGS', has mock.call( f'gsutil mv /mock/tmp/* gs://seqr-datasets/v02/GRCh38/RDG_{sample_type}_Broad_Internal/base/projects/{project}/', stdout=-1, stderr=-2, shell=True, # nosec - ) for project in projects + ) for project in self.PROJECTS ] + [ mock.call( f'gsutil rsync -r gs://seqr-datasets/v02/GRCh38/RDG_{sample_type}_Broad_Internal/base/projects/{project}/ gs://seqr-loading-temp/v03/GRCh38/{sample_type}/{dataset_type}/pedigrees/', stdout=-1, stderr=-2, shell=True, # nosec - ) for project in projects + ) for project in self.PROJECTS ], any_order=True) From 47c55d3a67c4031eb02f03734d9477511b9c43ca Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 22 Jul 2024 14:59:04 -0400 Subject: [PATCH 14/51] add external data to bulk edit ui --- seqr/views/apis/project_api.py | 2 +- ui/pages/Project/constants.js | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/seqr/views/apis/project_api.py b/seqr/views/apis/project_api.py index e232f8fb58..3da4765471 100644 --- a/seqr/views/apis/project_api.py +++ b/seqr/views/apis/project_api.py @@ -189,7 +189,7 @@ def project_families(request, project_guid): families = family_models.values( 'id', 'description', **{_to_camel_case(field): F(field) for field in [ - 'family_id', 'analysis_status', 'created_date', 'coded_phenotype', 'mondo_id', + 'family_id', 'analysis_status', 'created_date', 'coded_phenotype', 'mondo_id', 'external_data', ]}, familyGuid=F('guid'), projectGuid=Value(project_guid), diff --git a/ui/pages/Project/constants.js b/ui/pages/Project/constants.js index f63465019d..fd3f07640e 100644 --- a/ui/pages/Project/constants.js +++ b/ui/pages/Project/constants.js @@ -20,6 +20,7 @@ import { FAMILY_FIELD_MONDO_ID, FAMILY_FIELD_SAVED_VARIANTS, FAMILY_FIELD_NAME_LOOKUP, + FAMILY_FIELD_EXTERNAL_DATA, INDIVIDUAL_FIELD_ID, INDIVIDUAL_FIELD_PATERNAL_ID, INDIVIDUAL_FIELD_MATERNAL_ID, @@ -37,6 +38,7 @@ import { FAMILY_NOTES_FIELDS, SNP_DATA_TYPE, MME_TAG_NAME, + FAMILY_EXTERNAL_DATA_LOOKUP, } from 'shared/utils/constants' export const CASE_REVIEW_TABLE_NAME = 'Case Review' @@ -421,6 +423,10 @@ const FAMILY_FIELD_CONFIGS = Object.entries({ [FAMILY_FIELD_ANALYSED_BY]: { format: analysedBy => analysedBy.map(o => o.createdBy).join(',') }, [FAMILY_FIELD_CODED_PHENOTYPE]: { width: 4, description: "High level summary of the family's phenotype/disease" }, [FAMILY_FIELD_MONDO_ID]: { width: 3, description: 'MONDO Disease Ontology ID' }, + [FAMILY_FIELD_EXTERNAL_DATA]: { + description: 'Data types available external to seqr', + format: externalData => externalData.map(dataType => FAMILY_EXTERNAL_DATA_LOOKUP[dataType]?.name || dataType).join('; '), + }, ...FAMILY_NOTES_FIELDS.reduce((acc, { id }) => ({ ...acc, [id]: { format: formatNotes } }), {}), }).reduce((acc, [field, config]) => ({ ...acc, [field]: { label: FAMILY_FIELD_NAME_LOOKUP[field], ...config } }), {}) @@ -447,6 +453,7 @@ export const FAMILY_BULK_EDIT_EXPORT_DATA = [ FAMILY_FIELD_DESCRIPTION, FAMILY_FIELD_CODED_PHENOTYPE, FAMILY_FIELD_MONDO_ID, + FAMILY_FIELD_EXTERNAL_DATA, ].map(exportConfigForField(FAMILY_FIELD_CONFIGS)) export const INDIVIDUAL_FIELDS = [ From dbc4a32551b8a71aced855ad1937becae460989c Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 22 Jul 2024 15:27:42 -0400 Subject: [PATCH 15/51] support external data in bulk upload --- seqr/views/apis/family_api.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/seqr/views/apis/family_api.py b/seqr/views/apis/family_api.py index dcae71adeb..3e15337cb2 100644 --- a/seqr/views/apis/family_api.py +++ b/seqr/views/apis/family_api.py @@ -395,6 +395,12 @@ def update_family_analysis_groups(request, family_guid): }) +EXTERNAL_DATA_LOOKUP = {v: k for k, v in Family.EXTERNAL_DATA_CHOICES} +PARSE_FAMILY_TABLE_FIELDS = { + 'externalData': lambda data_type: [EXTERNAL_DATA_LOOKUP[dt.strip()] for dt in (data_type or '').split(';')], +} + + @login_and_policies_required def receive_families_table_handler(request, project_guid): """Handler for the initial upload of an Excel or .tsv table of families. This handler @@ -425,10 +431,12 @@ def _process_records(records, filename=''): column_map['mondoId'] = i elif 'description' in key: column_map['description'] = i + elif 'external' in key and 'data' in key: + column_map['externalData'] = i if FAMILY_ID_FIELD not in column_map: raise ValueError('Invalid header, missing family id column') - return [{column: row[index] if isinstance(index, int) else next((row[i] for i in index if row[i]), None) + return [{column: PARSE_FAMILY_TABLE_FIELDS.get(column, lambda v: v)(row[index]) for column, index in column_map.items()} for row in records[1:]] try: From 8110d995b3336c0e78b5c2b8cab1477e7d73c91f Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 22 Jul 2024 16:00:38 -0400 Subject: [PATCH 16/51] update tests --- seqr/views/apis/family_api.py | 2 +- seqr/views/apis/family_api_tests.py | 22 +++++++++++++++++++--- seqr/views/apis/project_api_tests.py | 4 +++- 3 files changed, 23 insertions(+), 5 deletions(-) diff --git a/seqr/views/apis/family_api.py b/seqr/views/apis/family_api.py index 3e15337cb2..9f2820dffc 100644 --- a/seqr/views/apis/family_api.py +++ b/seqr/views/apis/family_api.py @@ -397,7 +397,7 @@ def update_family_analysis_groups(request, family_guid): EXTERNAL_DATA_LOOKUP = {v: k for k, v in Family.EXTERNAL_DATA_CHOICES} PARSE_FAMILY_TABLE_FIELDS = { - 'externalData': lambda data_type: [EXTERNAL_DATA_LOOKUP[dt.strip()] for dt in (data_type or '').split(';')], + 'externalData': lambda data_type: [EXTERNAL_DATA_LOOKUP[dt.strip()] for dt in (data_type or '').split(';') if dt], } diff --git a/seqr/views/apis/family_api_tests.py b/seqr/views/apis/family_api_tests.py index bd43d8d697..c4175eb150 100644 --- a/seqr/views/apis/family_api_tests.py +++ b/seqr/views/apis/family_api_tests.py @@ -469,9 +469,9 @@ def test_receive_families_table_handler(self, mock_pm_group): self.assertSetEqual(set(response_json.keys()), {'info', 'errors', 'warnings', 'uploadedFileId'}) - url = reverse(edit_families_handler, args=[PROJECT_GUID]) + edit_url = reverse(edit_families_handler, args=[PROJECT_GUID]) - response = self.client.post(url, content_type='application/json', + response = self.client.post(edit_url, content_type='application/json', data=json.dumps({'uploadedFileId': response_json['uploadedFileId']})) self.assertEqual(response.status_code, 200) response_json = response.json() @@ -487,8 +487,18 @@ def test_receive_families_table_handler(self, mock_pm_group): self.assertEqual(family_2['description'], 'family two description') self.assertEqual(family_2['familyId'], '2') + internal_field_data = b'Family ID External Data\n\ +"11" ""\n\ +"12" "ONT lrGS; BioNano"' + response = self.client.post(url, {'f': SimpleUploadedFile('families.tsv', internal_field_data)}) + self.assertEqual(response.status_code, 200) + response = self.client.post( + edit_url, content_type='application/json', data=json.dumps({'uploadedFileId': response.json()['uploadedFileId']})) + self.assertEqual(response.status_code, 403) + # Test PM permission url = reverse(receive_families_table_handler, args=[PM_REQUIRED_PROJECT_GUID]) + edit_url = reverse(edit_families_handler, args=[PM_REQUIRED_PROJECT_GUID]) response = self.client.post(url) self.assertEqual(response.status_code, 403) @@ -498,8 +508,14 @@ def test_receive_families_table_handler(self, mock_pm_group): mock_pm_group.__bool__.return_value = True mock_pm_group.resolve_expression.return_value = 'project-managers' - response = self.client.post(url, {'f': SimpleUploadedFile('families.tsv', 'Family ID\n1'.encode('utf-8'))}) + response = self.client.post(url, {'f': SimpleUploadedFile('families.tsv', internal_field_data)}) + self.assertEqual(response.status_code, 200) + response = self.client.post( + edit_url, content_type='application/json', data=json.dumps({'uploadedFileId': response.json()['uploadedFileId']})) self.assertEqual(response.status_code, 200) + response_json = response.json() + self.assertListEqual(response_json['familiesByGuid']['F000011_11']['externalData'], []) + self.assertListEqual(response_json['familiesByGuid']['F000012_12']['externalData'], ['L', 'B']) def test_create_update_and_delete_family_note(self): # create the note diff --git a/seqr/views/apis/project_api_tests.py b/seqr/views/apis/project_api_tests.py index 7c5c821542..a86b472dae 100644 --- a/seqr/views/apis/project_api_tests.py +++ b/seqr/views/apis/project_api_tests.py @@ -382,7 +382,7 @@ def test_project_families(self): family_3 = response_json['familiesByGuid']['F000003_3'] family_fields = { 'individualGuids', 'discoveryTags', 'caseReviewStatuses', 'caseReviewStatusLastModified', 'hasRequiredMetadata', - 'parents', 'hasPhenotypePrioritization', + 'parents', 'hasPhenotypePrioritization', 'externalData', } family_fields.update(SUMMARY_FAMILY_FIELDS) self.assertSetEqual(set(family_1.keys()), family_fields) @@ -399,6 +399,8 @@ def test_project_families(self): self.assertListEqual(family_3['parents'], []) self.assertEqual(family_1['hasPhenotypePrioritization'], True) self.assertFalse(family_3['hasPhenotypePrioritization'], False) + self.assertListEqual(family_1['externalData'], ['M']) + self.assertListEqual(family_3['externalData'], []) self.assertListEqual(family_3['discoveryTags'], []) self.assertSetEqual({tag['variantGuid'] for tag in family_1['discoveryTags']}, {'SV0000001_2103343353_r0390_100'}) From a39d4023f1125fef63d182461e3359b9584748f9 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 23 Jul 2024 13:51:03 -0400 Subject: [PATCH 17/51] validate families have affected individuals --- seqr/views/apis/anvil_workspace_api.py | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/seqr/views/apis/anvil_workspace_api.py b/seqr/views/apis/anvil_workspace_api.py index 9a0f006c14..4682064bc8 100644 --- a/seqr/views/apis/anvil_workspace_api.py +++ b/seqr/views/apis/anvil_workspace_api.py @@ -13,7 +13,7 @@ from django.shortcuts import redirect from reference_data.models import GENOME_VERSION_LOOKUP -from seqr.models import Project, CAN_EDIT, Sample +from seqr.models import Project, CAN_EDIT, Sample, Individual from seqr.views.react_app import render_app_html from seqr.views.utils.airtable_utils import AirtableSession, ANVIL_REQUEST_TRACKING_TABLE from seqr.utils.search.constants import VCF_FILE_EXTENSIONS @@ -250,10 +250,26 @@ def _parse_uploaded_pedigree(request_json, project=None): missing_samples = [record['individualId'] for record in pedigree_records if record['individualId'] not in request_json['vcfSamples']] + errors = [] if missing_samples: - error = 'The following samples are included in the pedigree file but are missing from the VCF: {}'.format( - ', '.join(missing_samples)) - raise ErrorsWarningsException([error], []) + errors.append('The following samples are included in the pedigree file but are missing from the VCF: {}'.format( + ', '.join(missing_samples))) + + records_by_family = defaultdict(list) + for record in pedigree_records: + records_by_family[record[JsonConstants.FAMILY_ID_COLUMN]].append(record) + + no_affected_families = [] + for family_id, records in records_by_family.items(): + affected = [record for record in records if record[JsonConstants.AFFECTED_COLUMN] == Individual.AFFECTED_STATUS_AFFECTED] + if not affected: + no_affected_families.append(family_id) + + if no_affected_families: + errors.append('The following families do not have any affected individuals: {}'.format(', '.join(no_affected_families))) + + if errors: + raise ErrorsWarningsException(errors, []) return pedigree_records From 2e77253acf3214325cc1b25a59b9bda39f681deb Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 23 Jul 2024 14:20:36 -0400 Subject: [PATCH 18/51] update ui to require HPO terms --- .../components/panel/LoadWorkspaceDataForm.jsx | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/ui/shared/components/panel/LoadWorkspaceDataForm.jsx b/ui/shared/components/panel/LoadWorkspaceDataForm.jsx index b134d57132..0ee64e81c2 100644 --- a/ui/shared/components/panel/LoadWorkspaceDataForm.jsx +++ b/ui/shared/components/panel/LoadWorkspaceDataForm.jsx @@ -12,6 +12,8 @@ import { FILE_FORMATS, INDIVIDUAL_CORE_EXPORT_DATA, INDIVIDUAL_ID_EXPORT_DATA, + INDIVIDUAL_HPO_EXPORT_DATA, + INDIVIDUAL_FIELD_FEATURES, INDIVIDUAL_FIELD_SEX, INDIVIDUAL_FIELD_AFFECTED, SAMPLE_TYPE_OPTIONS, @@ -43,16 +45,19 @@ export const WORKSPACE_REQUIREMENTS = [ ] const NON_ID_REQUIRED_FIELDS = [INDIVIDUAL_FIELD_SEX, INDIVIDUAL_FIELD_AFFECTED] +const HPO_FIELD = { ...INDIVIDUAL_HPO_EXPORT_DATA[0], header: 'HPO Terms' } const FIELD_DESCRIPTIONS = { [FAMILY_FIELD_ID]: 'Family ID', [INDIVIDUAL_FIELD_ID]: 'Individual ID (needs to match the VCF ids)', [INDIVIDUAL_FIELD_SEX]: 'Male, Female, or Unknown', [INDIVIDUAL_FIELD_AFFECTED]: 'Affected, Unaffected, or Unknown', + [INDIVIDUAL_FIELD_FEATURES]: 'Semi-colon separated list of HPO terms. Required for affected individuals only.', } const REQUIRED_FIELDS = [ ...INDIVIDUAL_ID_EXPORT_DATA, ...INDIVIDUAL_CORE_EXPORT_DATA.filter(({ field }) => NON_ID_REQUIRED_FIELDS.includes(field)), + HPO_FIELD, ].map(config => ({ ...config, description: FIELD_DESCRIPTIONS[config.field] })) const OPTIONAL_FIELDS = INDIVIDUAL_CORE_EXPORT_DATA.filter(({ field }) => !NON_ID_REQUIRED_FIELDS.includes(field)) @@ -60,7 +65,7 @@ const OPTIONAL_FIELDS = INDIVIDUAL_CORE_EXPORT_DATA.filter(({ field }) => !NON_I const BLANK_EXPORT = { filename: 'individuals_template', rawData: [], - headers: [...INDIVIDUAL_ID_EXPORT_DATA, ...INDIVIDUAL_CORE_EXPORT_DATA].map(config => config.header), + headers: [...INDIVIDUAL_ID_EXPORT_DATA, ...INDIVIDUAL_CORE_EXPORT_DATA, HPO_FIELD].map(config => config.header), processRow: val => val, } @@ -68,11 +73,11 @@ const DEMO_EXPORT = { ...BLANK_EXPORT, filename: 'demo_individuals', rawData: [ - ['FAM1', 'FAM1_1', 'FAM1_2', 'FAM1_3', 'Male', 'Affected', ''], - ['FAM1', 'FAM1_4', 'FAM1_2', 'FAM1_3', '', 'Affected', 'an affected sibling'], - ['FAM1', 'FAM1_2', '', '', 'Male', 'Unaffected', ''], - ['FAM1', 'FAM1_3', '', '', 'Female', '', 'affected status of mother unknown'], - ['FAM2', 'FAM2_1', '', '', 'Female', 'Affected', 'a proband-only family'], + ['FAM1', 'FAM1_1', 'FAM1_2', 'FAM1_3', 'Male', 'Affected', '', 'HP:0001324 (Muscle weakness)'], + ['FAM1', 'FAM1_4', 'FAM1_2', 'FAM1_3', '', 'Affected', 'an affected sibling', 'HP:0001250 (Seizure); HP:0001324 (Muscle weakness)'], + ['FAM1', 'FAM1_2', '', '', 'Male', 'Unaffected', '', ''], + ['FAM1', 'FAM1_3', '', '', 'Female', 'Unknown', 'affected status of mother unknown', ''], + ['FAM2', 'FAM2_1', '', '', 'Female', 'Affected', 'a proband-only family', 'HP:0001263 (Global developmental delay)'], ], } From dc57e074428ebf55c7ebcb7e0994606c85c3be1c Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 23 Jul 2024 16:12:19 -0400 Subject: [PATCH 19/51] update hpo parsing for anvil upload --- seqr/views/apis/anvil_workspace_api.py | 12 ++--- seqr/views/apis/individual_api.py | 50 ++++++++------------- seqr/views/utils/pedigree_info_utils.py | 60 ++++++++++++++++++++----- 3 files changed, 73 insertions(+), 49 deletions(-) diff --git a/seqr/views/apis/anvil_workspace_api.py b/seqr/views/apis/anvil_workspace_api.py index 4682064bc8..5cf6af9036 100644 --- a/seqr/views/apis/anvil_workspace_api.py +++ b/seqr/views/apis/anvil_workspace_api.py @@ -243,7 +243,7 @@ def _parse_uploaded_pedigree(request_json, project=None): # Parse families/individuals in the uploaded pedigree file json_records = load_uploaded_file(request_json['uploadedFileId']) pedigree_records, _ = parse_basic_pedigree_table( - project, json_records, 'uploaded pedigree file', required_columns=[ + project, json_records, 'uploaded pedigree file', update_features=True, required_columns=[ JsonConstants.SEX_COLUMN, JsonConstants.AFFECTED_COLUMN, ]) @@ -259,11 +259,10 @@ def _parse_uploaded_pedigree(request_json, project=None): for record in pedigree_records: records_by_family[record[JsonConstants.FAMILY_ID_COLUMN]].append(record) - no_affected_families = [] - for family_id, records in records_by_family.items(): - affected = [record for record in records if record[JsonConstants.AFFECTED_COLUMN] == Individual.AFFECTED_STATUS_AFFECTED] - if not affected: - no_affected_families.append(family_id) + no_affected_families = [ + family_id for family_id, records in records_by_family.items() + if not any(record[JsonConstants.AFFECTED_COLUMN] == Individual.AFFECTED_STATUS_AFFECTED for record in records) + ] if no_affected_families: errors.append('The following families do not have any affected individuals: {}'.format(', '.join(no_affected_families))) @@ -278,6 +277,7 @@ def _trigger_add_workspace_data(project, pedigree_records, user, data_path, samp # add families and individuals according to the uploaded individual records pedigree_json, sample_ids = add_or_update_individuals_and_families( project, individual_records=pedigree_records, user=user, get_update_json=get_pedigree_json, get_updated_individual_ids=True, + allow_features_update=True, ) num_updated_individuals = len(sample_ids) sample_ids.update(previous_loaded_ids or []) diff --git a/seqr/views/apis/individual_api.py b/seqr/views/apis/individual_api.py index 7b6d6f0c59..cef3f8d1f9 100644 --- a/seqr/views/apis/individual_api.py +++ b/seqr/views/apis/individual_api.py @@ -20,7 +20,8 @@ from seqr.views.utils.orm_to_json_utils import _get_json_for_model, _get_json_for_individuals, add_individual_hpo_details, \ _get_json_for_families, get_json_for_rna_seq_outliers, get_project_collaborators_by_username, INDIVIDUAL_DISPLAY_NAME_EXPR, \ GREGOR_FINDING_TAG_TYPE -from seqr.views.utils.pedigree_info_utils import parse_pedigree_table, validate_fam_file_records, JsonConstants, ErrorsWarningsException +from seqr.views.utils.pedigree_info_utils import parse_pedigree_table, validate_fam_file_records, parse_hpo_terms, \ + get_valid_hpo_terms, JsonConstants, ErrorsWarningsException from seqr.views.utils.permissions_utils import get_project_and_check_permissions, check_project_permissions, \ get_project_and_check_pm_permissions, login_and_policies_required, has_project_permissions, project_has_anvil, \ is_internal_anvil_project, pm_or_data_manager_required, check_workspace_perm @@ -387,7 +388,7 @@ def _set_parent_relationships(record, parents_by_guid, guid_key, parent_key, par INDIVIDUAL_GUID_COL = 'individual_guid' HPO_TERM_NUMBER_COL = 'hpo_number' AFFECTED_FEATURE_COL = 'affected' -FEATURES_COL = 'features' +FEATURES_COL = JsonConstants.FEATURES ABSENT_FEATURES_COL = 'absent_features' BIRTH_COL = 'birth_year' DEATH_COL = 'death_year' @@ -440,8 +441,8 @@ def _gene_list_value(val): INDIVIDUAL_METADATA_FIELDS = { - FEATURES_COL: lambda val: [{'id': feature} for feature in set(val)], - ABSENT_FEATURES_COL: lambda val: [{'id': feature} for feature in val], + FEATURES_COL: list, + ABSENT_FEATURES_COL: list, BIRTH_COL: int, DEATH_COL: int, ONSET_AGE_COL: lambda val: Individual.ONSET_AGE_REVERSE_LOOKUP[val], @@ -471,7 +472,7 @@ def _nested_val(nested_key): def _get_phenotips_features(observed): def get_observed_features(features): - return [feature['id'] for feature in features if feature['observed'] == observed] + return [{'id': feature['id']} for feature in features if feature['observed'] == observed] return get_observed_features PHENOTIPS_JSON_FIELD_MAP = { @@ -592,8 +593,8 @@ def _process_hpo_records(records, filename, project, user): if FEATURES_COL in column_map or ABSENT_FEATURES_COL in column_map: for row in row_dicts: - row[FEATURES_COL] = _parse_hpo_terms(row.get(FEATURES_COL)) - row[ABSENT_FEATURES_COL] = _parse_hpo_terms(row.get(ABSENT_FEATURES_COL)) + row[FEATURES_COL] = parse_hpo_terms(row.get(FEATURES_COL)) + row[ABSENT_FEATURES_COL] = parse_hpo_terms(row.get(ABSENT_FEATURES_COL)) elif HPO_TERM_NUMBER_COL in column_map: aggregate_rows = defaultdict(lambda: {FEATURES_COL: set(), ABSENT_FEATURES_COL: set()}) @@ -608,30 +609,20 @@ def _process_hpo_records(records, filename, project, user): aggregate_entry.update({k: v for k, v in row.items() if v}) row_dicts = [ - {**entry, FEATURES_COL: list(entry[FEATURES_COL]), ABSENT_FEATURES_COL: list(entry[ABSENT_FEATURES_COL])} + {**entry, **{col: [{'id': feature} for feature in entry[col]] for col in [FEATURES_COL, ABSENT_FEATURES_COL]}} for entry in aggregate_rows.values() ] return _parse_individual_hpo_terms(row_dicts, project, user) -def _parse_hpo_terms(hpo_term_string): - if not hpo_term_string: - return [] - return [hpo_term.strip() for hpo_term in re.sub(r'\(.*?\)', '', hpo_term_string).replace(',', ';').split(';')] - - def _has_same_features(individual, present_features, absent_features): - return {feature['id'] for feature in individual.features or []} == set(present_features or []) and \ - {feature['id'] for feature in individual.absent_features or []} == set(absent_features or []) + return {feature['id'] for feature in individual.features or []} == {feature['id'] for feature in present_features or []} and \ + {feature['id'] for feature in individual.absent_features or []} == {feature['id'] for feature in absent_features or []} def _get_valid_hpo_terms(json_records): - all_hpo_terms = set() - for record in json_records: - all_hpo_terms.update(record.get(FEATURES_COL, [])) - all_hpo_terms.update(record.get(ABSENT_FEATURES_COL, [])) - return set(HumanPhenotypeOntology.objects.filter(hpo_id__in=all_hpo_terms).values_list('hpo_id', flat=True)) + return get_valid_hpo_terms(json_records, additional_feature_columns=[ABSENT_FEATURES_COL]) def _parse_individual_hpo_terms(json_records, project, user): @@ -704,14 +695,11 @@ def _get_record_individual(record, individual_lookup): def _remove_invalid_hpo_terms(record, hpo_terms): invalid_terms = set() - for feature in record.get(FEATURES_COL, []): - if feature not in hpo_terms: - invalid_terms.add(feature) - record[FEATURES_COL].remove(feature) - for feature in record.get(ABSENT_FEATURES_COL, []): - if feature not in hpo_terms: - invalid_terms.add(feature) - record[ABSENT_FEATURES_COL].remove(feature) + for col in [FEATURES_COL, ABSENT_FEATURES_COL]: + for feature in record.get(col, []): + if feature['id'] not in hpo_terms: + invalid_terms.add(feature['id']) + record[col].remove(feature) return invalid_terms @@ -858,12 +846,12 @@ def import_gregor_metadata(request, project_guid): lambda r: r['participant_id'] in individuals_by_participant and r['ontology'] == 'HPO' and r['presence'] in {'Present', 'Absent'}, ): col = FEATURES_COL if row['presence'] == 'Present' else ABSENT_FEATURES_COL - individuals_by_participant[row['participant_id']][col].append(row['term_id']) + individuals_by_participant[row['participant_id']][col].append({'id': row['term_id']}) hpo_terms = _get_valid_hpo_terms(individuals) invalid_hpo_terms = set() for row in individuals: invalid_hpo_terms.update(_remove_invalid_hpo_terms(row, hpo_terms)) - row.update({k: INDIVIDUAL_METADATA_FIELDS[k](v) for k, v in row.items() if k in [FEATURES_COL, ABSENT_FEATURES_COL]}) + row.update({k: row[k] for k in [FEATURES_COL, ABSENT_FEATURES_COL] if k in row}) if invalid_hpo_terms: warnings.append(f"Skipped the following unrecognized HPO terms: {', '.join(sorted(invalid_hpo_terms))}") diff --git a/seqr/views/utils/pedigree_info_utils.py b/seqr/views/utils/pedigree_info_utils.py index df2b0f026e..7472dbfd5f 100644 --- a/seqr/views/utils/pedigree_info_utils.py +++ b/seqr/views/utils/pedigree_info_utils.py @@ -2,11 +2,13 @@ import difflib import os import json +import re import tempfile import openpyxl as xl from collections import defaultdict from datetime import date +from reference_data.models import HumanPhenotypeOntology from seqr.utils.communication_utils import send_html_email from seqr.utils.logging_utils import SeqrLogger from seqr.utils.middleware import ErrorsWarningsException @@ -77,9 +79,12 @@ def parse_pedigree_table(parsed_file, filename, user, project): return json_records, warnings -def parse_basic_pedigree_table(project, parsed_file, filename, required_columns=None): +def parse_basic_pedigree_table(project, parsed_file, filename, required_columns=None, update_features=False): rows, header = _parse_pedigree_table_rows(parsed_file, filename) - return _parse_pedigree_table_json(project, rows, header=header, fail_on_warnings=True, required_columns=required_columns, allow_id_update=False) + return _parse_pedigree_table_json( + project, rows, header=header, fail_on_warnings=True, allow_id_update=False, + required_columns=required_columns, update_features=update_features, + ) def _parse_pedigree_table_rows(parsed_file, filename, header=None, rows=None): @@ -110,15 +115,15 @@ def _parse_pedigree_table_rows(parsed_file, filename, header=None, rows=None): raise ErrorsWarningsException(['Error while parsing file: {}. {}'.format(filename, e)], []) -def _parse_pedigree_table_json(project, rows, header=None, column_map=None, errors=None, fail_on_warnings=False, required_columns=None, allow_id_update=True): +def _parse_pedigree_table_json(project, rows, header=None, column_map=None, errors=None, fail_on_warnings=False, required_columns=None, allow_id_update=True, update_features=False): # convert to json and validate - column_map = column_map or (_parse_header_columns(header, allow_id_update) if header else None) + column_map = column_map or (_parse_header_columns(header, allow_id_update, update_features) if header else None) if column_map: - json_records = _convert_fam_file_rows_to_json(column_map, rows, required_columns=required_columns) + json_records = _convert_fam_file_rows_to_json(column_map, rows, required_columns=required_columns, update_features=update_features) else: json_records = rows - warnings = validate_fam_file_records(project, json_records, fail_on_warnings=fail_on_warnings, errors=errors) + warnings = validate_fam_file_records(project, json_records, fail_on_warnings=fail_on_warnings, errors=errors, update_features=update_features) return json_records, warnings @@ -142,7 +147,14 @@ def _parse_affected(affected): return None -def _convert_fam_file_rows_to_json(column_map, rows, required_columns=None): +def parse_hpo_terms(hpo_term_string): + if not hpo_term_string: + return [] + terms = {hpo_term.strip() for hpo_term in re.sub(r'\(.*?\)', '', hpo_term_string).replace(',', ';').split(';')} + return[{'id': term for term in terms if term}] + + +def _convert_fam_file_rows_to_json(column_map, rows, required_columns=None, update_features=False): """Parse the values in rows and convert them to a json representation. Args: @@ -170,10 +182,11 @@ def _convert_fam_file_rows_to_json(column_map, rows, required_columns=None): ValueError: if there are unexpected values or row sizes """ required_columns = [JsonConstants.FAMILY_ID_COLUMN, JsonConstants.INDIVIDUAL_ID_COLUMN] + (required_columns or []) - missing_cols = set(required_columns) - set(column_map.values()) + missing_cols = [_to_title_case(_to_snake_case(col)) for col in set(required_columns) - set(column_map.values())] + if update_features and JsonConstants.FEATURES not in column_map.values(): + missing_cols.append('HPO Terms') if missing_cols: - raise ErrorsWarningsException( - [f"Missing required columns: {', '.join([_to_title_case(_to_snake_case(col)) for col in sorted(missing_cols)])}"]) + raise ErrorsWarningsException([f"Missing required columns: {', '.join(sorted(missing_cols))}"]) json_results = [] errors = [] @@ -200,7 +213,7 @@ def _convert_fam_file_rows_to_json(column_map, rows, required_columns=None): return json_results -def _parse_header_columns(header, allow_id_update): +def _parse_header_columns(header, allow_id_update, update_features): column_map = {} for key in header: column = None @@ -215,6 +228,8 @@ def _parse_header_columns(header, allow_id_update): elif 'indiv' in key and 'previous' in key: if allow_id_update: column = JsonConstants.PREVIOUS_INDIVIDUAL_ID_COLUMN + elif update_features and 'hpo' in key and 'term' in key: + column = JsonConstants.FEATURES else: column = next(( col for col, substrings in JsonConstants.COLUMN_SUBSTRINGS @@ -238,7 +253,7 @@ def _format_value(value, column): return value -def validate_fam_file_records(project, records, fail_on_warnings=False, errors=None, clear_invalid_values=False): +def validate_fam_file_records(project, records, fail_on_warnings=False, errors=None, clear_invalid_values=False, update_features=False): """Basic validation such as checking that parents have the same family id as the child, etc. Args: @@ -259,6 +274,8 @@ def validate_fam_file_records(project, records, fail_on_warnings=False, errors=N loaded_individual_families = dict(Individual.objects.filter( family__project=project, sample__is_active=True).values_list('individual_id', 'family__family_id')) + hpo_terms = get_valid_hpo_terms(records) if update_features else None + errors = errors or [] warnings = [] individual_id_counts = defaultdict(int) @@ -298,6 +315,14 @@ def validate_fam_file_records(project, records, fail_on_warnings=False, errors=N ]: _validate_parent(r, *parent, individual_id, family_id, records_by_id, warnings, errors, clear_invalid_values) + if update_features: + features = r[JsonConstants.FEATURES] or [] + if not features and r[JsonConstants.AFFECTED_COLUMN] == Individual.AFFECTED_STATUS_AFFECTED: + errors.append(f'{individual_id} is affected but has no HPO terms') + invalid_features = {feature['id'] for feature in features if feature['id'] not in hpo_terms} + if invalid_features: + errors.append(f'{individual_id} has invalid HPO terms: {", ".join(sorted(invalid_features))}') + errors += [ f'{individual_id} is included as {count} separate records, but must be unique within the project' for individual_id, count in individual_id_counts.items() if count > 1 @@ -311,6 +336,15 @@ def validate_fam_file_records(project, records, fail_on_warnings=False, errors=N return warnings +def get_valid_hpo_terms(records, additional_feature_columns=None): + all_hpo_terms = set() + for record in records: + all_hpo_terms.update({feature['id'] for feature in record.get(JsonConstants.FEATURES, [])}) + for col in (additional_feature_columns or []): + all_hpo_terms.update({feature['id'] for feature in record.get(col, [])}) + return set(HumanPhenotypeOntology.objects.filter(hpo_id__in=all_hpo_terms).values_list('hpo_id', flat=True)) + + def _validate_parent(row, parent_id_type, parent_id_field, expected_sex, individual_id, family_id, records_by_id, warnings, errors, clear_invalid_values): parent_id = row.get(parent_id_field) if not parent_id: @@ -808,6 +842,7 @@ class JsonConstants: PRIMARY_BIOSAMPLE = 'primaryBiosample' ANALYTE_TYPE = 'analyteType' TISSUE_AFFECTED_STATUS = 'tissueAffectedStatus' + FEATURES = 'features' JSON_COLUMNS = {MATERNAL_ETHNICITY, PATERNAL_ETHNICITY, BIRTH_YEAR, DEATH_YEAR, ONSET_AGE, AFFECTED_RELATIVES} NULLABLE_COLUMNS = {TISSUE_AFFECTED_STATUS} @@ -823,6 +858,7 @@ class JsonConstants: (code for code, uberon_code in Individual.BIOSAMPLE_CHOICES if value.startswith(uberon_code)), None), ANALYTE_TYPE: Individual.ANALYTE_REVERSE_LOOKUP.get, TISSUE_AFFECTED_STATUS: lambda value: {'Yes': True, 'No': False, 'Unknown': None}[value], + FEATURES: lambda value: parse_hpo_terms(value), } FORMAT_COLUMNS.update({col: json.loads for col in JSON_COLUMNS}) From a7ddf14165cc7bcfd32177aaebf2e9f5fdcb9b34 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 23 Jul 2024 16:17:00 -0400 Subject: [PATCH 20/51] fix typo --- seqr/views/utils/pedigree_info_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seqr/views/utils/pedigree_info_utils.py b/seqr/views/utils/pedigree_info_utils.py index 7472dbfd5f..9d7d131bc1 100644 --- a/seqr/views/utils/pedigree_info_utils.py +++ b/seqr/views/utils/pedigree_info_utils.py @@ -151,7 +151,7 @@ def parse_hpo_terms(hpo_term_string): if not hpo_term_string: return [] terms = {hpo_term.strip() for hpo_term in re.sub(r'\(.*?\)', '', hpo_term_string).replace(',', ';').split(';')} - return[{'id': term for term in terms if term}] + return[{'id': term} for term in terms if term] def _convert_fam_file_rows_to_json(column_map, rows, required_columns=None, update_features=False): From ff65b4a914ff2efb021c2244015d5837935f4246 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 23 Jul 2024 16:52:14 -0400 Subject: [PATCH 21/51] udpate tests --- seqr/views/apis/anvil_workspace_api_tests.py | 33 ++++++++++++-------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/seqr/views/apis/anvil_workspace_api_tests.py b/seqr/views/apis/anvil_workspace_api_tests.py index e1b081adeb..d8bdf7e9e7 100644 --- a/seqr/views/apis/anvil_workspace_api_tests.py +++ b/seqr/views/apis/anvil_workspace_api_tests.py @@ -15,17 +15,19 @@ LOAD_SAMPLE_DATA = [ ["Family ID", "Individual ID", "Previous Individual ID", "Paternal ID", "Maternal ID", "Sex", "Affected Status", - "Notes", "familyNotes"], - ["1", " NA19675_1 ", "NA19675_1 ", "NA19678 ", "", "Female", "Affected", "A affected individual, test1-zsf", ""], - ["1", "NA19678", "", "", "", "Male", "Unaffected", "a individual note", ""], - ["21", " HG00735", "", "", "", "Unknown", "Unknown", "", "a new family"]] + "HPO Terms", "Notes", "familyNotes"], + ["1", " NA19675_1 ", "NA19675_1 ", "NA19678 ", "", "Female", "Affected", "HP:0012469 (Infantile spasms); HP:0011675 (Arrhythmia)", "A affected individual, test1-zsf", ""], + ["1", "NA19678", "", "", "", "Male", "Unaffected", "", "a individual note", ""], + ["21", " HG00735", "", "", "", "Unknown", "Affected", "HP:0001508,HP:0001508", "", "a new family"]] -BAD_SAMPLE_DATA = [["1", "NA19674", "NA19674_1", "NA19678", "NA19679", "Female", "Affected", "A affected individual, test1-zsf", ""]] -INVALID_ADDED_SAMPLE_DATA = [['22', 'HG00731', 'HG00731', '', '', 'Female', 'Affected', '', '']] +BAD_SAMPLE_DATA = [["1", "NA19674", "NA19674_1", "NA19678", "NA19679", "Female", "Affected", "", "A affected individual, test1-zsf", ""], + ["1", "NA19681", "", "", "", "Male", "Affected", "HP:0100258", "", ""]] +INVALID_ADDED_SAMPLE_DATA = [['22', 'HG00731', 'HG00731', '', '', 'Female', 'Affected', 'HP:0011675', '', '']] -MISSING_REQUIRED_SAMPLE_DATA = [["21", "HG00736", "", "", "", "", "", "", ""]] +MISSING_REQUIRED_SAMPLE_DATA = [["21", "HG00736", "", "", "", "", "", "", "", ""]] -LOAD_SAMPLE_DATA_EXTRA_SAMPLE = LOAD_SAMPLE_DATA + [["1", "NA19679", "", "", "", "Male", "Affected", "", ""]] +LOAD_SAMPLE_DATA_EXTRA_SAMPLE = LOAD_SAMPLE_DATA + [["1", "NA19679", "", "", "", "Male", "Affected", "HP:0011675", "", ""], + ["22", "HG00736", "", "", "", "Unknown", "Unknown", "", "", ""]] FILE_DATA = [ '##fileformat=VCFv4.2\n', @@ -468,7 +470,7 @@ def test_get_anvil_vcf_list(self, mock_subprocess, mock_file_logger, mock_utils_ class LoadAnvilDataAPITest(AirflowTestCase): - fixtures = ['users', 'social_auth', '1kg_project'] + fixtures = ['users', 'social_auth', 'reference_data', '1kg_project'] LOADING_PROJECT_GUID = f'P_{TEST_NO_PROJECT_WORKSPACE_NAME}' DAG_NAME = 'v03_pipeline-SNV_INDEL' @@ -676,7 +678,7 @@ def _test_errors(self, url, fields, workspace_name): response = self.client.post(url, content_type='application/json', data=json.dumps(REQUEST_BODY)) self.assertEqual(response.status_code, 400) response_json = response.json() - self.assertListEqual(response_json['errors'], ['Missing required columns: Affected, Sex']) + self.assertListEqual(response_json['errors'], ['Missing required columns: Affected, HPO Terms, Sex']) self.mock_load_file.return_value = LOAD_SAMPLE_DATA + MISSING_REQUIRED_SAMPLE_DATA response = self.client.post(url, content_type='application/json', data=json.dumps(REQUEST_BODY)) @@ -690,6 +692,8 @@ def _test_errors(self, url, fields, workspace_name): self.assertEqual(response.status_code, 400) response_json = response.json() self.assertListEqual(response_json['errors'], [ + 'NA19674 is affected but has no HPO terms', + 'NA19681 has invalid HPO terms: HP:0100258', 'NA19679 is the mother of NA19674 but is not included. Make sure to create an additional record with NA19679 as the Individual ID', ]) @@ -699,7 +703,8 @@ def _test_errors(self, url, fields, workspace_name): self.assertEqual(response.status_code, 400) response_json = response.json() self.assertEqual(response_json['errors'], - ['The following samples are included in the pedigree file but are missing from the VCF: NA19679']) + ['The following samples are included in the pedigree file but are missing from the VCF: NA19679, HG00736', + 'The following families do not have any affected individuals: 22']) def _assert_valid_operation(self, project, test_add_data=True): genome_version = 'GRCh37' if test_add_data else 'GRCh38' @@ -798,19 +803,21 @@ def _assert_valid_operation(self, project, test_add_data=True): individual_model_data = list(Individual.objects.filter(family__project=project).values( 'family__family_id', 'individual_id', 'mother__individual_id', 'father__individual_id', 'sex', 'affected', 'notes', + 'features', )) self.assertEqual(len(individual_model_data), 15 if test_add_data else 3) self.assertIn({ 'family__family_id': '21', 'individual_id': 'HG00735', 'mother__individual_id': None, - 'father__individual_id': None, 'sex': 'U', 'affected': 'U', 'notes': None, + 'father__individual_id': None, 'sex': 'U', 'affected': 'A', 'notes': None, 'features': [{'id': 'HP:0001508'}], }, individual_model_data) self.assertIn({ 'family__family_id': '1', 'individual_id': 'NA19675_1', 'mother__individual_id': None, 'father__individual_id': 'NA19678', 'sex': 'F', 'affected': 'A', 'notes': 'A affected individual, test1-zsf', + 'features': [{'id': 'HP:0012469'}, {'id': 'HP:0011675'}], }, individual_model_data) self.assertIn({ 'family__family_id': '1', 'individual_id': 'NA19678', 'mother__individual_id': None, - 'father__individual_id': None, 'sex': 'M', 'affected': 'N', 'notes': 'a individual note' + 'father__individual_id': None, 'sex': 'M', 'affected': 'N', 'notes': 'a individual note', 'features': [], }, individual_model_data) def _test_mv_file_and_triggering_dag_exception(self, url, workspace, sample_data, genome_version, request_body, num_samples=None): From 948bf457dcb611233642f27ea21ce0f7bd4a48af Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 23 Jul 2024 16:53:43 -0400 Subject: [PATCH 22/51] fix empty features formatting --- seqr/views/utils/pedigree_info_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seqr/views/utils/pedigree_info_utils.py b/seqr/views/utils/pedigree_info_utils.py index 9d7d131bc1..1d3135d5b0 100644 --- a/seqr/views/utils/pedigree_info_utils.py +++ b/seqr/views/utils/pedigree_info_utils.py @@ -244,7 +244,7 @@ def _parse_header_columns(header, allow_id_update, update_features): def _format_value(value, column): format_func = JsonConstants.FORMAT_COLUMNS.get(column) if format_func: - if (value or column in {JsonConstants.SEX_COLUMN, JsonConstants.AFFECTED_COLUMN}): + if (value or column in {JsonConstants.SEX_COLUMN, JsonConstants.AFFECTED_COLUMN, JsonConstants.FEATURES}): value = format_func(value) if value is None and column not in JsonConstants.NULLABLE_COLUMNS: raise ValueError() From 74e997470b31745cd151fb861cdf3c5283241346 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 24 Jul 2024 10:10:47 -0400 Subject: [PATCH 23/51] codacy clean up --- seqr/views/utils/pedigree_info_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/seqr/views/utils/pedigree_info_utils.py b/seqr/views/utils/pedigree_info_utils.py index 1d3135d5b0..fffe0426c7 100644 --- a/seqr/views/utils/pedigree_info_utils.py +++ b/seqr/views/utils/pedigree_info_utils.py @@ -83,7 +83,7 @@ def parse_basic_pedigree_table(project, parsed_file, filename, required_columns= rows, header = _parse_pedigree_table_rows(parsed_file, filename) return _parse_pedigree_table_json( project, rows, header=header, fail_on_warnings=True, allow_id_update=False, - required_columns=required_columns, update_features=update_features, + required_columns=required_columns, update_features=update_features, ) @@ -858,7 +858,7 @@ class JsonConstants: (code for code, uberon_code in Individual.BIOSAMPLE_CHOICES if value.startswith(uberon_code)), None), ANALYTE_TYPE: Individual.ANALYTE_REVERSE_LOOKUP.get, TISSUE_AFFECTED_STATUS: lambda value: {'Yes': True, 'No': False, 'Unknown': None}[value], - FEATURES: lambda value: parse_hpo_terms(value), + FEATURES: parse_hpo_terms, } FORMAT_COLUMNS.update({col: json.loads for col in JSON_COLUMNS}) From a7dd1d6e763c78ad0bfab5a36a4f81133c188a92 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 24 Jul 2024 10:34:50 -0400 Subject: [PATCH 24/51] migration --- ...e_index_file_path.py => 0071_igvsample_index_file_path.py} | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename seqr/migrations/{0068_igvsample_index_file_path.py => 0071_igvsample_index_file_path.py} (71%) diff --git a/seqr/migrations/0068_igvsample_index_file_path.py b/seqr/migrations/0071_igvsample_index_file_path.py similarity index 71% rename from seqr/migrations/0068_igvsample_index_file_path.py rename to seqr/migrations/0071_igvsample_index_file_path.py index 8840dccc59..5d29558631 100644 --- a/seqr/migrations/0068_igvsample_index_file_path.py +++ b/seqr/migrations/0071_igvsample_index_file_path.py @@ -1,4 +1,4 @@ -# Generated by Django 3.2.23 on 2024-06-03 15:25 +# Generated by Django 4.2.13 on 2024-07-24 14:34 from django.db import migrations, models @@ -6,7 +6,7 @@ class Migration(migrations.Migration): dependencies = [ - ('seqr', '0067_alter_variantfunctionaldata_functional_data_tag'), + ('seqr', '0070_remove_rnasample_dataset_type_and_more'), ] operations = [ From 7294c4c895f3ed4b56f1081350716f573bb99c6e Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 24 Jul 2024 10:35:28 -0400 Subject: [PATCH 25/51] fix changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a0881fecb9..6a9587770b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,13 +1,13 @@ # _seqr_ Changes ## dev +* Adds index_file_path to IGV Sample model (REQUIRES DB MIGRATION) ## 7/8/24 * Add VLM contact for Projects (REQUIRES DB MIGRATION) ## 6/11/24 * Add "Partial Phenotype Contribution" functional tag (REQUIRES DB MIGRATION) -* Adds index_file_path to IGV Sample model (REQUIRES DB MIGRATION) ## 5/24/24 * Adds external_data to Family model (REQUIRES DB MIGRATION) From bf3b4066ffbd440541ea8e9d1afa9520b8058783 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 24 Jul 2024 10:47:22 -0400 Subject: [PATCH 26/51] remove drs code --- seqr/views/apis/igv_api.py | 68 ++++++-------------------------------- 1 file changed, 11 insertions(+), 57 deletions(-) diff --git a/seqr/views/apis/igv_api.py b/seqr/views/apis/igv_api.py index dff66d2e4d..7bf9320112 100644 --- a/seqr/views/apis/igv_api.py +++ b/seqr/views/apis/igv_api.py @@ -158,16 +158,12 @@ def update_individual_igv_sample(request, individual_guid): if not file_path: raise ValueError('request must contain fields: filePath') - original_file_path = file_path - file_path = _get_valid_file_path(file_path, request.user) - if not file_path: - raise Exception('Error accessing "{}"'.format(original_file_path)) sample_type = next((st for suffix, st in SAMPLE_TYPE_MAP if file_path.endswith(suffix)), None) if not sample_type: raise Exception('Invalid file extension for "{}" - valid extensions are {}'.format( file_path, ', '.join([suffix for suffix, _ in SAMPLE_TYPE_MAP]))) - if _is_drs_uri_path(file_path) and not request_json.get('indexFilePath'): - raise Exception('Index File Path is required for DRS URIs') + if not does_file_exist(file_path, user=request.user): + raise Exception('Error accessing "{}"'.format(file_path)) sample, created = get_or_create_model_from_json( IgvSample, create_json={'individual': individual, 'sample_type': sample_type}, @@ -190,28 +186,6 @@ def update_individual_igv_sample(request, individual_guid): return create_json_response({'error': error}, status=400, reason=error) -def _is_drs_uri_path(file_path): - return file_path.startswith('drs://') - - -def _get_drs_info(file_path, user): - drs_path = file_path.split('/') - response = requests.get( - f'https://{drs_path[2]}/ga4gh/drs/v1/objects/{drs_path[3]}', - headers=_get_gs_auth_api_headers(user), - ) - - return response.json() if response.status_code == 200 else None - - -def _get_valid_file_path(file_path, user): - if _is_drs_uri_path(file_path): - drs_info = _get_drs_info(file_path, user) - return None if drs_info is None else f"{file_path}/{drs_info['name']}" - - return file_path if does_file_exist(file_path, user=user) else None - - @login_and_policies_required def fetch_igv_track(request, project_guid, igv_track_path): @@ -223,45 +197,25 @@ def fetch_igv_track(request, project_guid, igv_track_path): if is_google_bucket_file_path(igv_track_path): return _stream_gs(request, igv_track_path) - if _is_drs_uri_path(igv_track_path): - return _stream_drs(request, igv_track_path) - return _stream_file(request, igv_track_path) def _stream_gs(request, gs_path): - return _stream_response( - url=f"{GS_STORAGE_URL}/{gs_path.replace('gs://', '', 1)}", - headers=_get_gs_rest_api_headers(gs_path, user=request.user), - request=request) - + headers = _get_gs_rest_api_headers(request.META.get('HTTP_RANGE'), gs_path, user=request.user) -def _stream_drs(request, file_path): - drs_info = _get_drs_info(file_path, request.user) - - https_access = next(a['access_url'] for a in drs_info['access_methods'] if a['type'] == 'https') - headers = dict([h.split(': ') for h in https_access['headers']]) - - return _stream_response(https_access['url'], headers, request) - - -def _stream_response(url, headers, request): - range_header = request.META.get('HTTP_RANGE') - if range_header: - headers['Range'] = range_header - - response = requests.get(url, headers=headers, stream=True, timeout=TIMEOUT) + response = requests.get( + f"{GS_STORAGE_URL}/{gs_path.replace('gs://', '', 1)}", + headers=headers, + stream=True, timeout=TIMEOUT) return StreamingHttpResponse(response.iter_content(chunk_size=65536), status=response.status_code, content_type='application/octet-stream') -def _get_gs_auth_api_headers(user): - return {'Authorization': 'Bearer {}'.format(_get_access_token(user))} - - -def _get_gs_rest_api_headers(gs_path, user): - headers = _get_gs_auth_api_headers(user) +def _get_gs_rest_api_headers(range_header, gs_path, user=None): + headers = {'Authorization': 'Bearer {}'.format(_get_access_token(user))} + if range_header: + headers['Range'] = range_header google_project = get_google_project(gs_path) if google_project: headers['x-goog-user-project'] = get_google_project(gs_path) From 140c7213db86f160dbb4ba71cc01a18d82044e09 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 24 Jul 2024 11:19:07 -0400 Subject: [PATCH 27/51] validate new index file --- seqr/views/apis/igv_api.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/seqr/views/apis/igv_api.py b/seqr/views/apis/igv_api.py index 7bf9320112..e74e26856a 100644 --- a/seqr/views/apis/igv_api.py +++ b/seqr/views/apis/igv_api.py @@ -58,8 +58,11 @@ def _process_igv_table_handler(parse_uploaded_file, get_valid_matched_individual info.append(message) existing_sample_files = defaultdict(set) + existing_sample_index_files = defaultdict(set) for sample in IgvSample.objects.select_related('individual').filter(individual__in=matched_individuals.keys()): existing_sample_files[sample.individual].add(sample.file_path) + if sample.index_file_path: + existing_sample_index_files[sample.individual].add(sample.index_file_path) num_unchanged_rows = 0 all_updates = [] @@ -68,6 +71,7 @@ def _process_igv_table_handler(parse_uploaded_file, get_valid_matched_individual dict(individualGuid=individual.guid, individualId=individual.individual_id, **update) for update in updates if update['filePath'] not in existing_sample_files[individual] + or (update['indexFilePath'] and update['indexFilePath'] not in existing_sample_index_files) ] all_updates += changed_updates num_unchanged_rows += len(updates) - len(changed_updates) @@ -164,6 +168,8 @@ def update_individual_igv_sample(request, individual_guid): file_path, ', '.join([suffix for suffix, _ in SAMPLE_TYPE_MAP]))) if not does_file_exist(file_path, user=request.user): raise Exception('Error accessing "{}"'.format(file_path)) + if request_json.get('indexFilePath') and not does_file_exist(request_json['indexFilePath'], user=request.user): + raise Exception('Error accessing "{}"'.format(request_json['indexFilePath'])) sample, created = get_or_create_model_from_json( IgvSample, create_json={'individual': individual, 'sample_type': sample_type}, From fa7a969ec13c3eb2dd632749b6301a56e9d1f8a1 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 24 Jul 2024 12:21:37 -0400 Subject: [PATCH 28/51] update tests --- seqr/views/apis/igv_api_tests.py | 52 ++++++++++++++++++++------------ seqr/views/utils/test_utils.py | 2 +- 2 files changed, 34 insertions(+), 20 deletions(-) diff --git a/seqr/views/apis/igv_api_tests.py b/seqr/views/apis/igv_api_tests.py index 4de3bd6df5..2080f655cb 100644 --- a/seqr/views/apis/igv_api_tests.py +++ b/seqr/views/apis/igv_api_tests.py @@ -124,8 +124,8 @@ def test_receive_alignment_table_handler(self): self.assertListEqual( response_json['info'], ['Parsed 3 rows in 2 individuals from samples.csv', 'No change detected for 1 rows']) self.assertListEqual(sorted(response_json['updates'], key=lambda o: o['individualGuid']), [ - {'individualGuid': 'I000001_na19675', 'individualId': 'NA19675_1', 'filePath': 'gs://readviz/batch_10.dcr.bed.gz', 'sampleId': 'NA19675'}, - {'individualGuid': 'I000003_na19679', 'individualId': 'NA19679', 'filePath': 'gs://readviz/NA19679.bam', 'sampleId': None}, + {'individualGuid': 'I000001_na19675', 'individualId': 'NA19675_1', 'filePath': 'gs://readviz/batch_10.dcr.bed.gz', 'indexFilePath': None, 'sampleId': 'NA19675'}, + {'individualGuid': 'I000003_na19679', 'individualId': 'NA19679', 'filePath': 'gs://readviz/NA19679.bam', 'indexFilePath': None, 'sampleId': None}, ]) # test data manager access @@ -147,7 +147,7 @@ def test_receive_bulk_alignment_table_handler(self, mock_load_uploaded_file): request_data = json.dumps({'mappingFile': {'uploadedFileId': uploaded_file_id}}) pm_projects_rows = [ ['1kg project nåme with uniçøde', 'NA19675_1', 'gs://readviz/batch_10.dcr.bed.gz', 'NA19675'], - ['1kg project nåme with uniçøde', 'NA19675_1', 'gs://readviz/NA19675_1.bam'], + ['1kg project nåme with uniçøde', 'NA19675_1', 'gs://readviz/NA19675_1.bam', 'gs://readviz-index/NA19675_1.bai'], ['1kg project nåme with uniçøde', 'NA20870', 'gs://readviz/NA20870.cram'], ['Test Reprocessed Project', 'NA20885', 'gs://readviz/NA20885.cram'], ] @@ -177,21 +177,26 @@ def test_receive_bulk_alignment_table_handler(self, mock_load_uploaded_file): self.assertListEqual(response_json['warnings'], []) self.assertListEqual(response_json['info'], ['Parsed 4 rows in 3 individuals', 'No change detected for 1 rows']) updates = [ - {'individualGuid': 'I000001_na19675', 'individualId': 'NA19675_1', 'filePath': 'gs://readviz/batch_10.dcr.bed.gz', 'sampleId': 'NA19675'}, - {'individualGuid': 'I000001_na19675', 'individualId': 'NA19675_1', 'filePath': 'gs://readviz/NA19675_1.bam', 'sampleId': None}, - {'individualGuid': 'I000015_na20885', 'individualId': 'NA20885', 'filePath': 'gs://readviz/NA20885.cram', 'sampleId': None}, + {'individualGuid': 'I000001_na19675', 'individualId': 'NA19675_1', 'filePath': 'gs://readviz/batch_10.dcr.bed.gz', 'indexFilePath': None, 'sampleId': 'NA19675'}, + {'individualGuid': 'I000001_na19675', 'individualId': 'NA19675_1', 'filePath': 'gs://readviz/NA19675_1.bam', + 'indexFilePath': 'gs://readviz-index/NA19675_1.bai', 'sampleId': None}, + {'individualGuid': 'I000015_na20885', 'individualId': 'NA20885', 'filePath': 'gs://readviz/NA20885.cram', 'indexFilePath': None, 'sampleId': None}, ] self.assertListEqual(sorted(response_json['updates'], key=lambda o: o['individualGuid']), updates) # test data manager access self.login_data_manager_user() + rows[2].append('gs://readviz-index/NA20870.crai') mock_load_uploaded_file.return_value = rows response = self.client.post(url, content_type='application/json', data=request_data) self.assertEqual(response.status_code, 200) response_json = response.json() - self.assertListEqual(response_json['info'], ['Parsed 5 rows in 4 individuals', 'No change detected for 1 rows']) - self.assertListEqual(sorted(response_json['updates'], key=lambda o: o['individualGuid']), updates + [ - {'individualGuid': 'I000018_na21234', 'individualId': 'NA21234', 'filePath': 'gs://readviz/NA21234.cram', 'sampleId': None} + self.assertListEqual(response_json['info'], ['Parsed 5 rows in 4 individuals']) + self.assertListEqual(sorted(response_json['updates'], key=lambda o: o['individualGuid']), updates[:2] + [ + {'individualGuid': 'I000007_na20870', 'individualId': 'NA20870', 'sampleId': None, + 'filePath': 'gs://readviz/NA20870.cram', 'indexFilePath': 'gs://readviz-index/NA20870.crai'}, + updates[2], + {'individualGuid': 'I000018_na21234', 'individualId': 'NA21234', 'filePath': 'gs://readviz/NA21234.cram', 'indexFilePath': None, 'sampleId': None} ]) @mock.patch('seqr.utils.file_utils.subprocess.Popen') @@ -206,7 +211,7 @@ def test_add_alignment_sample(self, mock_local_file_exists, mock_subprocess): self.assertEqual(response.reason_phrase, 'request must contain fields: filePath') response = self.client.post(url, content_type='application/json', data=json.dumps({ - 'filePath': 'invalid_path.txt', + 'filePath': 'invalid_path.txt', 'indexFilePath': None, })) self.assertEqual(response.status_code, 400) self.assertEqual( @@ -216,31 +221,40 @@ def test_add_alignment_sample(self, mock_local_file_exists, mock_subprocess): mock_local_file_exists.return_value = False mock_subprocess.return_value.wait.return_value = 1 response = self.client.post(url, content_type='application/json', data=json.dumps({ - 'filePath': '/readviz/NA19675_new.cram', + 'filePath': '/readviz/NA19675_new.cram', 'indexFilePath': None, })) self.assertEqual(response.status_code, 400) self.assertEqual(response.reason_phrase, 'Error accessing "/readviz/NA19675_new.cram"') response = self.client.post(url, content_type='application/json', data=json.dumps({ - 'filePath': 'gs://readviz/NA19675_new.cram', + 'filePath': 'gs://readviz/NA19675_new.cram', 'indexFilePath': None, })) self.assertEqual(response.status_code, 400) self.assertEqual(response.reason_phrase, 'Error accessing "gs://readviz/NA19675_new.cram"') - # Send valid request mock_local_file_exists.return_value = True + response = self.client.post(url, content_type='application/json', data=json.dumps({ + 'filePath': '/readviz/NA19675.new.cram', 'indexFilePath': 'gs://readviz/NA19675_new.crai', + })) + self.assertEqual(response.status_code, 400) + self.assertEqual(response.reason_phrase, 'Error accessing "gs://readviz/NA19675_new.crai"') + + # Send valid request mock_subprocess.return_value.wait.return_value = 0 response = self.client.post(url, content_type='application/json', data=json.dumps({ - 'filePath': '/readviz/NA19675.new.cram', + 'filePath': '/readviz/NA19675.new.cram', 'indexFilePath': '/readviz-index/NA19675.cram.crai', })) self.assertEqual(response.status_code, 200) self.assertDictEqual(response.json(), {'igvSamplesByGuid': {'S000145_na19675': { 'projectGuid': PROJECT_GUID, 'individualGuid': 'I000001_na19675', 'sampleGuid': 'S000145_na19675', - 'familyGuid': 'F000001_1', 'filePath': '/readviz/NA19675.new.cram', 'sampleId': None, 'sampleType': 'alignment'}}}) - mock_local_file_exists.assert_called_with('/readviz/NA19675.new.cram') + 'familyGuid': 'F000001_1', 'filePath': '/readviz/NA19675.new.cram', + 'indexFilePath': '/readviz-index/NA19675.cram.crai', 'sampleId': None, 'sampleType': 'alignment'}}}) + mock_local_file_exists.assert_has_calls([ + mock.call('/readviz/NA19675.new.cram'), mock.call('/readviz-index/NA19675.cram.crai'), + ]) response = self.client.post(url, content_type='application/json', data=json.dumps({ - 'filePath': 'gs://readviz/batch_10.dcr.bed.gz', 'sampleId': 'NA19675', + 'filePath': 'gs://readviz/batch_10.dcr.bed.gz', 'sampleId': 'NA19675', 'indexFilePath': None, })) self.assertEqual(response.status_code, 200) response_json = response.json() @@ -250,7 +264,7 @@ def test_add_alignment_sample(self, mock_local_file_exists, mock_subprocess): sample_guid = next(iter(response_json['igvSamplesByGuid'])) self.assertDictEqual(response_json['igvSamplesByGuid'][sample_guid], { 'projectGuid': PROJECT_GUID, 'individualGuid': 'I000001_na19675', 'sampleGuid': sample_guid, - 'familyGuid': 'F000001_1', 'filePath': 'gs://readviz/batch_10.dcr.bed.gz', 'sampleId': 'NA19675', 'sampleType': 'gcnv'}) + 'familyGuid': 'F000001_1', 'filePath': 'gs://readviz/batch_10.dcr.bed.gz', 'indexFilePath': None, 'sampleId': 'NA19675', 'sampleType': 'gcnv'}) self.assertListEqual(list(response_json['individualsByGuid'].keys()), ['I000001_na19675']) self.assertSetEqual( set(response_json['individualsByGuid']['I000001_na19675']['igvSampleGuids']), @@ -269,7 +283,7 @@ def test_add_alignment_sample(self, mock_local_file_exists, mock_subprocess): self.assertDictEqual(response_json['igvSamplesByGuid'][junctions_sample_guid], { 'projectGuid': PROJECT_GUID, 'individualGuid': 'I000001_na19675', 'sampleGuid': junctions_sample_guid, 'familyGuid': 'F000001_1', 'filePath': 'gs://readviz/batch_10.junctions.bed.gz', 'sampleId': 'NA19675', - 'sampleType': 'spliceJunctions'}) + 'indexFilePath': None, 'sampleType': 'spliceJunctions'}) # test data manager access self.login_data_manager_user() diff --git a/seqr/views/utils/test_utils.py b/seqr/views/utils/test_utils.py index 4b1b3780af..bc9fe1ffb9 100644 --- a/seqr/views/utils/test_utils.py +++ b/seqr/views/utils/test_utils.py @@ -797,7 +797,7 @@ def _get_list_param(call, param): } IGV_SAMPLE_FIELDS = { - 'projectGuid', 'familyGuid', 'individualGuid', 'sampleGuid', 'filePath', 'sampleId', 'sampleType', + 'projectGuid', 'familyGuid', 'individualGuid', 'sampleGuid', 'filePath', 'indexFilePath', 'sampleId', 'sampleType', } SAVED_VARIANT_FIELDS = {'variantGuid', 'variantId', 'familyGuids', 'xpos', 'ref', 'alt', 'selectedMainTranscriptId', 'acmgClassification'} From 0c7e2be24530f5cc0f989084378f30b89cce8c3b Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 24 Jul 2024 15:12:37 -0400 Subject: [PATCH 29/51] add family name edit button for anvil projects --- ui/shared/components/panel/family/Family.jsx | 31 +++++++++++++++----- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/ui/shared/components/panel/family/Family.jsx b/ui/shared/components/panel/family/Family.jsx index b6036941f6..b11876a74b 100644 --- a/ui/shared/components/panel/family/Family.jsx +++ b/ui/shared/components/panel/family/Family.jsx @@ -35,6 +35,8 @@ import { import { FirstSample, AnalystEmailDropdown, AnalysedBy, AnalysisGroups, analysisStatusIcon } from './FamilyFields' import FamilyLayout from './FamilyLayout' +const FAMILY_NAME_FIELD_PROPS = { label: 'Name' } + const ASSIGNED_ANALYST_EDIT_FIELDS = [ { name: 'assigned_analyst_username', @@ -221,25 +223,38 @@ class Family extends React.PureComponent { }) } + familyHeader = (displayName) => { + const { family, showFamilyPageLink } = this.props + const content = showFamilyPageLink ? + {displayName} : + displayName + return + } + render() { const { project, family, fields, rightContent, compact, useFullWidth, disablePedigreeZoom, disableEdit, - showFamilyPageLink, annotation, hidePedigree, toggleDetails, + annotation, hidePedigree, toggleDetails, updateFamily: dispatchUpdateFamily, } = this.props if (!family) { return
Family Not Found
} + const isEditable = !disableEdit && project.canEdit + let leftContent = null if (!hidePedigree) { const familyHeader = ( - {family.displayName} : - family.displayName} + ) leftContent = ( @@ -256,7 +271,7 @@ class Family extends React.PureComponent { key="pedigree" family={family} disablePedigreeZoom={disablePedigreeZoom} - isEditable={!disableEdit && project.canEdit} + isEditable={isEditable} /> )} From 1167b1d3868b958c988f534a0a8a1a4a416c768b Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 24 Jul 2024 15:17:32 -0400 Subject: [PATCH 30/51] edit underlyin id field --- ui/shared/components/panel/family/Family.jsx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ui/shared/components/panel/family/Family.jsx b/ui/shared/components/panel/family/Family.jsx index b11876a74b..efa5dd5dc9 100644 --- a/ui/shared/components/panel/family/Family.jsx +++ b/ui/shared/components/panel/family/Family.jsx @@ -223,11 +223,11 @@ class Family extends React.PureComponent { }) } - familyHeader = (displayName) => { + familyHeader = () => { const { family, showFamilyPageLink } = this.props const content = showFamilyPageLink ? - {displayName} : - displayName + {family.displayName} : + family.displayName return } @@ -247,7 +247,7 @@ class Family extends React.PureComponent { if (!hidePedigree) { const familyHeader = ( Date: Wed, 24 Jul 2024 15:36:11 -0400 Subject: [PATCH 31/51] actually edit family id --- seqr/views/apis/family_api.py | 13 +++++++++---- seqr/views/apis/individual_api.py | 13 ++++--------- seqr/views/utils/permissions_utils.py | 5 +++++ 3 files changed, 18 insertions(+), 13 deletions(-) diff --git a/seqr/views/apis/family_api.py b/seqr/views/apis/family_api.py index d12530007c..7e9bb08fbf 100644 --- a/seqr/views/apis/family_api.py +++ b/seqr/views/apis/family_api.py @@ -25,7 +25,7 @@ from seqr.models import Family, FamilyAnalysedBy, Individual, FamilyNote, Sample, VariantTag, AnalysisGroup, RnaSeqTpm, \ PhenotypePrioritization, Project, RnaSeqOutlier, RnaSeqSpliceOutlier, RnaSample from seqr.views.utils.permissions_utils import check_project_permissions, get_project_and_check_pm_permissions, \ - login_and_policies_required, user_is_analyst, has_case_review_permissions + login_and_policies_required, user_is_analyst, has_case_review_permissions, external_anvil_project_can_edit from seqr.views.utils.variant_utils import get_phenotype_prioritization, get_omim_intervals_query, DISCOVERY_CATEGORY from seqr.utils.xpos_utils import get_chrom_pos @@ -269,15 +269,20 @@ def update_family_fields_handler(request, family_guid): check_project_permissions(family.project, request.user) request_json = json.loads(request.body) + immutable_keys = [] if external_anvil_project_can_edit(family.project, request.user) else ['family_id'] update_family_from_json(family, request_json, user=request.user, allow_unknown_keys=True, immutable_keys=[ - 'family_id', 'display_name', - ]) + 'display_name', + ] + immutable_keys) return create_json_response({ - family.guid: _get_json_for_model(family, user=request.user) + family.guid: _get_json_for_model(family, user=request.user, process_result=_set_display_name) }) +def _set_display_name(family_json, family_model): + family_json['displayName'] = family_model.display_name or family_model.family_id + + @login_and_policies_required def update_family_assigned_analyst(request, family_guid): """Updates the specified field in the Family model. diff --git a/seqr/views/apis/individual_api.py b/seqr/views/apis/individual_api.py index 7b6d6f0c59..1224dd16ad 100644 --- a/seqr/views/apis/individual_api.py +++ b/seqr/views/apis/individual_api.py @@ -22,8 +22,8 @@ GREGOR_FINDING_TAG_TYPE from seqr.views.utils.pedigree_info_utils import parse_pedigree_table, validate_fam_file_records, JsonConstants, ErrorsWarningsException from seqr.views.utils.permissions_utils import get_project_and_check_permissions, check_project_permissions, \ - get_project_and_check_pm_permissions, login_and_policies_required, has_project_permissions, project_has_anvil, \ - is_internal_anvil_project, pm_or_data_manager_required, check_workspace_perm + get_project_and_check_pm_permissions, login_and_policies_required, has_project_permissions, external_anvil_project_can_edit, \ + pm_or_data_manager_required, check_workspace_perm from seqr.views.utils.project_context_utils import add_project_tag_type_counts from seqr.views.utils.individual_utils import delete_individuals, add_or_update_individuals_and_families from seqr.views.utils.variant_utils import bulk_create_tagged_variants @@ -118,11 +118,6 @@ def _get_parsed_features(features): return list(parsed_features.values()) -def _anvil_project_can_edit_pedigree(project, user): - return project_has_anvil(project) and has_project_permissions(project, user, can_edit=True) and not \ - is_internal_anvil_project(project) - - @login_and_policies_required def edit_individuals_handler(request, project_guid): """Modify one or more Individual records. @@ -153,7 +148,7 @@ def edit_individuals_handler(request, project_guid): """ project = get_project_and_check_pm_permissions(project_guid, request.user, - override_permission_func=_anvil_project_can_edit_pedigree) + override_permission_func=external_anvil_project_can_edit) request_json = json.loads(request.body) @@ -230,7 +225,7 @@ def delete_individuals_handler(request, project_guid): # validate request project = get_project_and_check_pm_permissions(project_guid, request.user, - override_permission_func=_anvil_project_can_edit_pedigree) + override_permission_func=external_anvil_project_can_edit) request_json = json.loads(request.body) individuals_list = request_json.get('individuals') diff --git a/seqr/views/utils/permissions_utils.py b/seqr/views/utils/permissions_utils.py index 035df27b13..b440915052 100644 --- a/seqr/views/utils/permissions_utils.py +++ b/seqr/views/utils/permissions_utils.py @@ -145,6 +145,11 @@ def project_has_anvil(project): return anvil_enabled() and bool(project.workspace_namespace and project.workspace_name) +def external_anvil_project_can_edit(project, user): + return project_has_anvil(project) and has_project_permissions(project, user, can_edit=True) and not \ + is_internal_anvil_project(project) + + def _map_anvil_seqr_permission(anvil_permission): if anvil_permission.get('pending'): return None From 57b4bd74246a319d315ed2d23af89ac25eef1fd0 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 24 Jul 2024 15:53:16 -0400 Subject: [PATCH 32/51] add tests --- seqr/views/apis/family_api_tests.py | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/seqr/views/apis/family_api_tests.py b/seqr/views/apis/family_api_tests.py index d638e07f7a..7c93eded24 100644 --- a/seqr/views/apis/family_api_tests.py +++ b/seqr/views/apis/family_api_tests.py @@ -11,7 +11,8 @@ update_family_fields_handler, update_family_analysed_by, edit_families_handler, delete_families_handler, \ receive_families_table_handler, create_family_note, update_family_note, delete_family_note, family_page_data, \ family_variant_tag_summary, update_family_analysis_groups, get_family_rna_seq_data, get_family_phenotype_gene_scores -from seqr.views.utils.test_utils import AuthenticationTestCase, FAMILY_NOTE_FIELDS, FAMILY_FIELDS, IGV_SAMPLE_FIELDS, \ +from seqr.views.utils.test_utils import AuthenticationTestCase, AnvilAuthenticationTestCase, \ + FAMILY_NOTE_FIELDS, FAMILY_FIELDS, IGV_SAMPLE_FIELDS, \ SAMPLE_FIELDS, INDIVIDUAL_FIELDS, INTERNAL_INDIVIDUAL_FIELDS, INTERNAL_FAMILY_FIELDS, CASE_REVIEW_FAMILY_FIELDS, \ MATCHMAKER_SUBMISSION_FIELDS, TAG_TYPE_FIELDS, CASE_REVIEW_INDIVIDUAL_FIELDS from seqr.models import FamilyAnalysedBy, AnalysisGroup @@ -35,8 +36,7 @@ SAMPLE_GUIDS = ['S000129_na19675', 'S000130_na19678', 'S000131_na19679'] -class FamilyAPITest(AuthenticationTestCase): - fixtures = ['users', '1kg_project', 'reference_data'] +class FamilyAPITest(object): def test_family_page_data(self): url = reverse(family_page_data, args=[FAMILY_GUID]) @@ -253,6 +253,7 @@ def test_edit_families_handler(self, mock_pm_group): self.assertEqual(response.status_code, 403) mock_pm_group.__bool__.return_value = True mock_pm_group.resolve_expression.return_value = 'project-managers' + mock_pm_group.__eq__.side_effect = lambda s: s == 'project-managers' response = self.client.post(url, content_type='application/json', data=json.dumps({ 'families': [{'familyGuid': 'F000012_12'}]})) @@ -307,6 +308,7 @@ def test_delete_families_handler(self, mock_pm_group): self.assertEqual(response.status_code, 403) mock_pm_group.__bool__.return_value = True mock_pm_group.resolve_expression.return_value = 'project-managers' + mock_pm_group.__eq__.side_effect = lambda s: s == 'project-managers' response = self.client.post(url, content_type='application/json', data=json.dumps({ 'families': [{'familyGuid': 'F000012_12'}]})) @@ -436,6 +438,17 @@ def test_update_family_fields(self): self.assertEqual(response.status_code, 200) self.assertEqual(response.json()[FAMILY_GUID]['analysisStatusLastModifiedBy'], 'Test Collaborator User') + # Test External AnVIL projects + external_family_url = reverse(update_family_fields_handler, args=['F000014_14']) + response = self.client.post(external_family_url, content_type='application/json', data=json.dumps(body)) + self.assertEqual(response.status_code, 200) + response_json = response.json() + self.assertEqual(response_json['F000014_14']['description'], 'Updated description') + self.assertEqual(response_json['F000014_14'][FAMILY_ID_FIELD], 'new_id' if self._anvil_enabled() else '14') + + def _anvil_enabled(self): + return not self.ES_HOSTNAME + @mock.patch('seqr.views.utils.permissions_utils.PM_USER_GROUP') def test_receive_families_table_handler(self, mock_pm_group): url = reverse(receive_families_table_handler, args=[PROJECT_GUID]) @@ -499,6 +512,7 @@ def test_receive_families_table_handler(self, mock_pm_group): self.assertEqual(response.status_code, 403) mock_pm_group.__bool__.return_value = True mock_pm_group.resolve_expression.return_value = 'project-managers' + mock_pm_group.__eq__.side_effect = lambda s: s == 'project-managers' response = self.client.post(url, {'f': SimpleUploadedFile('families.tsv', 'Family ID\n1'.encode('utf-8'))}) self.assertEqual(response.status_code, 200) @@ -607,3 +621,11 @@ def test_get_family_phenotype_gene_scores(self): } } }) + + +class LocalFamilyAPITest(AuthenticationTestCase, FamilyAPITest): + fixtures = ['users', '1kg_project', 'reference_data'] + + +class AnvilFamilyAPITest(AnvilAuthenticationTestCase, FamilyAPITest): + fixtures = ['users', '1kg_project', 'reference_data'] From 4d8c599d93c582eae7d9bf254dad8826e2e9ae86 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 24 Jul 2024 15:58:33 -0400 Subject: [PATCH 33/51] test display name response --- seqr/views/apis/family_api_tests.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/seqr/views/apis/family_api_tests.py b/seqr/views/apis/family_api_tests.py index 7c93eded24..8d2a527c9a 100644 --- a/seqr/views/apis/family_api_tests.py +++ b/seqr/views/apis/family_api_tests.py @@ -428,6 +428,7 @@ def test_update_family_fields(self): response_json = response.json() self.assertEqual(response_json[FAMILY_GUID]['description'], 'Updated description') self.assertEqual(response_json[FAMILY_GUID][FAMILY_ID_FIELD], '1') + self.assertEqual(response_json[FAMILY_GUID]['displayName'], '1') self.assertEqual(response_json[FAMILY_GUID]['analysisStatus'], 'C') self.assertEqual(response_json[FAMILY_GUID]['analysisStatusLastModifiedBy'], 'Test Collaborator User') self.assertEqual(response_json[FAMILY_GUID]['analysisStatusLastModifiedDate'], '2020-01-01T00:00:00') @@ -444,7 +445,9 @@ def test_update_family_fields(self): self.assertEqual(response.status_code, 200) response_json = response.json() self.assertEqual(response_json['F000014_14']['description'], 'Updated description') - self.assertEqual(response_json['F000014_14'][FAMILY_ID_FIELD], 'new_id' if self._anvil_enabled() else '14') + expected_id = 'new_id' if self._anvil_enabled() else '14' + self.assertEqual(response_json['F000014_14'][FAMILY_ID_FIELD], expected_id) + self.assertEqual(response_json['F000014_14']['displayName'], expected_id) def _anvil_enabled(self): return not self.ES_HOSTNAME From 7c008863a69ee71025322b2acabb15d936025e24 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 24 Jul 2024 16:52:04 -0400 Subject: [PATCH 34/51] mock persist file to gs --- seqr/views/apis/family_api_tests.py | 1 + 1 file changed, 1 insertion(+) diff --git a/seqr/views/apis/family_api_tests.py b/seqr/views/apis/family_api_tests.py index 8d2a527c9a..6d11edca20 100644 --- a/seqr/views/apis/family_api_tests.py +++ b/seqr/views/apis/family_api_tests.py @@ -452,6 +452,7 @@ def test_update_family_fields(self): def _anvil_enabled(self): return not self.ES_HOSTNAME + @mock.patch('seqr.views.utils.file_utils.mv_file_to_gs', lambda *args: True) @mock.patch('seqr.views.utils.permissions_utils.PM_USER_GROUP') def test_receive_families_table_handler(self, mock_pm_group): url = reverse(receive_families_table_handler, args=[PROJECT_GUID]) From 9d74524ee557fc42bb40747c33c5280586e99b8b Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 24 Jul 2024 17:14:59 -0400 Subject: [PATCH 35/51] fix test persist file --- seqr/views/apis/family_api_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seqr/views/apis/family_api_tests.py b/seqr/views/apis/family_api_tests.py index 6d11edca20..0544812056 100644 --- a/seqr/views/apis/family_api_tests.py +++ b/seqr/views/apis/family_api_tests.py @@ -452,7 +452,7 @@ def test_update_family_fields(self): def _anvil_enabled(self): return not self.ES_HOSTNAME - @mock.patch('seqr.views.utils.file_utils.mv_file_to_gs', lambda *args: True) + @mock.patch('seqr.views.utils.file_utils.anvil_enabled', lambda: False) @mock.patch('seqr.views.utils.permissions_utils.PM_USER_GROUP') def test_receive_families_table_handler(self, mock_pm_group): url = reverse(receive_families_table_handler, args=[PROJECT_GUID]) From 4c0c6563407e322e748008e1bb1e8c1f708b5275 Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Thu, 25 Jul 2024 15:56:20 -0400 Subject: [PATCH 36/51] Add batch size to rna seq load route (#4257) * Add batch size to rna seq load * Pass kwargs down --- seqr/models.py | 8 ++++---- seqr/views/apis/data_manager_api.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/seqr/models.py b/seqr/models.py index 4514644634..a3b6cbc36e 100644 --- a/seqr/models.py +++ b/seqr/models.py @@ -124,13 +124,13 @@ def delete_model(self, user, user_can_delete=False): log_model_update(logger, self, user, 'delete') @classmethod - def bulk_create(cls, user, new_models): + def bulk_create(cls, user, new_models, **kwargs): """Helper bulk create method that logs the creation""" for model in new_models: model.created_by = user model.created_date = timezone.now() model.guid = model._format_guid(random.randint(10**(cls.GUID_PRECISION-1), 10**cls.GUID_PRECISION)) # nosec - models = cls.objects.bulk_create(new_models) + models = cls.objects.bulk_create(new_models, **kwargs) log_model_bulk_update(logger, models, user, 'create') return models @@ -1139,11 +1139,11 @@ def log_model_no_guid_bulk_update(cls, models, user, update_type): logger.info(f'{update_type} {db_entity}s', user, db_update=db_update) @classmethod - def bulk_create(cls, user, new_models): + def bulk_create(cls, user, new_models, **kwargs): """Helper bulk create method that logs the creation""" for model in new_models: model.created_by = user - models = cls.objects.bulk_create(new_models) + models = cls.objects.bulk_create(new_models, **kwargs) cls.log_model_no_guid_bulk_update(models, user, 'create') return models diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py index 0960c2d3eb..4b2f9d1e1b 100644 --- a/seqr/views/apis/data_manager_api.py +++ b/seqr/views/apis/data_manager_api.py @@ -336,7 +336,7 @@ def load_rna_seq_sample_data(request, sample_guid): return create_json_response({'error': error}, status=400) model_cls = config['model_class'] - model_cls.bulk_create(request.user, [model_cls(sample=sample, **data) for data in data_rows]) + model_cls.bulk_create(request.user, [model_cls(sample=sample, **data) for data in data_rows], batch_size=1000) update_model_from_json(sample, {'is_active': True}, user=request.user) return create_json_response({'success': True}) From 23fbddd497d4300d738c31ed565bf4b78a6c760e Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 25 Jul 2024 16:05:37 -0400 Subject: [PATCH 37/51] pr feedback --- seqr/views/apis/data_manager_api.py | 10 +++++----- seqr/views/apis/data_manager_api_tests.py | 2 ++ 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py index c268250fb0..1ad54f5d99 100644 --- a/seqr/views/apis/data_manager_api.py +++ b/seqr/views/apis/data_manager_api.py @@ -498,7 +498,7 @@ def _fetch_airtable_loadable_project_samples(user): project_samples = defaultdict(set) for pdo in pdos.values(): project_guid = re.match( - 'https://seqr.broadinstitute.org/project/([^/]+)/project_page', pdo['SeqrProjectURL'], + f'{BASE_URL}project/([^/]+)/project_page', pdo['SeqrProjectURL'], ).group(1) project_samples[project_guid].update([ sample_id for sample_id in pdo['PassingCollaboratorSampleIDs'] + pdo['SeqrIDs'] if sample_id @@ -592,16 +592,16 @@ def _get_valid_project_samples(project_samples, sample_type, user): return individual_ids -def _get_loaded_samples(sample_keys, user): - sample_ids = [sample_id for _, sample_id in sample_keys] +def _get_loaded_samples(project_samples, user): + sample_ids = [sample_id for _, sample_id in project_samples] samples_by_id = AirtableSession(user).get_samples_for_sample_ids(sample_ids, ['PDOStatus', 'SeqrProject']) - return [(project, sample_id) for project, sample_id in sample_keys if any( + return [(project, sample_id) for project, sample_id in project_samples if any( _is_loaded_airtable_sample(s, project) for s in samples_by_id.get(sample_id, []) )] def _is_loaded_airtable_sample(sample, project_guid): - return f'https://seqr.broadinstitute.org/project/{project_guid}/project_page' in sample['SeqrProject'] and any( + return f'{BASE_URL}project/{project_guid}/project_page' in sample['SeqrProject'] and any( status in AVAILABLE_PDO_STATUSES for status in sample['PDOStatus']) diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py index 42cf958262..81e77f0fac 100644 --- a/seqr/views/apis/data_manager_api_tests.py +++ b/seqr/views/apis/data_manager_api_tests.py @@ -1460,6 +1460,7 @@ def test_validate_callset(self, mock_subprocess): response = self.client.post(url, content_type='application/json', data=json.dumps(body)) self.assertEqual(response.status_code, 200) + @mock.patch('seqr.views.apis.data_manager_api.BASE_URL', 'https://seqr.broadinstitute.org/') @mock.patch('seqr.views.utils.airtable_utils.is_google_authenticated', lambda x: True) @responses.activate def test_get_loaded_projects(self): @@ -1589,6 +1590,7 @@ def _get_dag_variable_overrides(*args, **kwargs): } @responses.activate + @mock.patch('seqr.views.apis.data_manager_api.BASE_URL', 'https://seqr.broadinstitute.org/') @mock.patch('seqr.views.utils.export_utils.open') @mock.patch('seqr.views.utils.export_utils.TemporaryDirectory') def test_load_data(self, mock_temp_dir, mock_open): From f3d2cf5dc037c43499c0efa9b623d55136c6759b Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 25 Jul 2024 17:18:21 -0400 Subject: [PATCH 38/51] no max families for hpo search --- ui/pages/Search/VariantSearch.jsx | 2 +- ui/pages/Search/components/PageHeader.jsx | 18 ++++++++-- ui/pages/Search/selectors.js | 8 +++-- ui/pages/SummaryData/components/Hpo.jsx | 40 +++++++++++++++-------- ui/redux/rootReducer.js | 5 +-- ui/redux/selectors.js | 1 + ui/redux/utils/configureStore.js | 2 +- 7 files changed, 53 insertions(+), 23 deletions(-) diff --git a/ui/pages/Search/VariantSearch.jsx b/ui/pages/Search/VariantSearch.jsx index 843f9f79db..59e4274206 100644 --- a/ui/pages/Search/VariantSearch.jsx +++ b/ui/pages/Search/VariantSearch.jsx @@ -28,7 +28,7 @@ const VariantSearch = ({ match }) => ( `${match.url}/${pagePath}`)} component={VariantSearchForm} /> - + diff --git a/ui/pages/Search/components/PageHeader.jsx b/ui/pages/Search/components/PageHeader.jsx index 58b2b74ca0..a8beb47253 100644 --- a/ui/pages/Search/components/PageHeader.jsx +++ b/ui/pages/Search/components/PageHeader.jsx @@ -2,7 +2,7 @@ import React from 'react' import PropTypes from 'prop-types' import { connect } from 'react-redux' -import { getProjectsByGuid, getFamiliesByGuid, getAnalysisGroupsByGuid, getSearchesByHash } from 'redux/selectors' +import { getProjectsByGuid, getFamiliesByGuid, getAnalysisGroupsByGuid, getSearchesByHash, getSearchFamiliesByHash } from 'redux/selectors' import PageHeaderLayout from 'shared/components/page/PageHeaderLayout' import { snakecaseToTitlecase } from 'shared/utils/stringUtils' @@ -19,7 +19,14 @@ const PAGE_CONFIGS = { entity: analysisGroupsByGuid[entityGuid], entityUrlPath: `analysis_group/${entityGuid}`, }), - families: entityGuid => ({ description: `Searching in ${entityGuid.split(/[,:]/).length} Families` }), + families: (entityGuid, p, f, a, s, searchFamiliesByHash) => { + const numFamilies = Object.values(searchFamiliesByHash[entityGuid] || {}).reduce( + (acc, familyGuids) => acc + familyGuids.length, 0, + ) + return { + description: `Searching in ${numFamilies} Families`, + } + }, results: (entityGuid, projectsByGuid, familiesByGuid, analysisGroupsByGuid, searchesByHash) => { const { projectFamilies } = searchesByHash[entityGuid] || {} let pageType @@ -62,12 +69,15 @@ const PAGE_CONFIGS = { variant: entityGuid => ({ entity: { name: entityGuid } }), } -const getPageHeaderProps = ({ projectsByGuid, familiesByGuid, analysisGroupsByGuid, searchesByHash, match }) => { +const getPageHeaderProps = ( + { projectsByGuid, familiesByGuid, analysisGroupsByGuid, searchesByHash, searchFamiliesByHash, match }, +) => { const { pageType, entityGuid, subPageType, subEntityGuid } = match.params const breadcrumbIdSections = [] const { entity, entityUrlPath, actualPageType, description } = PAGE_CONFIGS[subPageType || pageType]( subEntityGuid || entityGuid, projectsByGuid, familiesByGuid, analysisGroupsByGuid, searchesByHash, + searchFamiliesByHash, ) if (entity) { const project = projectsByGuid[entity.projectGuid || entityGuid] @@ -90,6 +100,7 @@ PageHeader.propTypes = { familiesByGuid: PropTypes.object, analysisGroupsByGuid: PropTypes.object, searchesByHash: PropTypes.object, + searchFamiliesByHash: PropTypes.object, match: PropTypes.object, } @@ -98,6 +109,7 @@ const mapStateToProps = state => ({ familiesByGuid: getFamiliesByGuid(state), analysisGroupsByGuid: getAnalysisGroupsByGuid(state), searchesByHash: getSearchesByHash(state), + searchFamiliesByHash: getSearchFamiliesByHash(state), }) export default connect(mapStateToProps)(PageHeader) diff --git a/ui/pages/Search/selectors.js b/ui/pages/Search/selectors.js index 04b5f042d2..929871dd3d 100644 --- a/ui/pages/Search/selectors.js +++ b/ui/pages/Search/selectors.js @@ -10,6 +10,7 @@ import { getCurrentSearchParams, getUser, getProjectDatasetTypes, + getSearchFamiliesByHash, } from 'redux/selectors' import { FAMILY_ANALYSIS_STATUS_LOOKUP } from 'shared/utils/constants' import { compareObjects } from 'shared/utils/sortUtils' @@ -66,9 +67,10 @@ export const getProjectFamilies = createSelector( export const getMultiProjectFamilies = createSelector( (state, props) => props.match.params, - params => ({ - projectFamilies: params.families.split(':').map(f => f.split(';')).map( - ([projectGuid, familyGuids]) => ({ projectGuid, familyGuids: familyGuids.split(',') }), + getSearchFamiliesByHash, + (params, searchFamiliesByHash) => ({ + projectFamilies: Object.entries(searchFamiliesByHash[params.familiesHash] || {}).map( + ([projectGuid, familyGuids]) => ({ projectGuid, familyGuids }), ), }), ) diff --git a/ui/pages/SummaryData/components/Hpo.jsx b/ui/pages/SummaryData/components/Hpo.jsx index 14bf7d0f18..97e4610e14 100644 --- a/ui/pages/SummaryData/components/Hpo.jsx +++ b/ui/pages/SummaryData/components/Hpo.jsx @@ -1,7 +1,9 @@ import React from 'react' -import { NavLink } from 'react-router-dom' +import { connect } from 'react-redux' +import PropTypes from 'prop-types' import { Divider, Button, Header } from 'semantic-ui-react' +import { navigateSavedHashedSearch } from 'redux/rootReducer' import { NoHoverFamilyLink } from 'shared/components/buttons/FamilyLink' import AwesomeBar from 'shared/components/page/AwesomeBar' import { Phenotypes } from 'shared/components/panel/MatchmakerPanel' @@ -12,7 +14,6 @@ import { HttpRequestHelper } from 'shared/utils/httpRequestHelper' import { GENOME_VERSION_LOOKUP } from 'shared/utils/constants' const SEARCH_CATEGORIES = ['hpo_terms'] -const MAX_SEARCH_FAMILIES = 500 const ID_FIELD = 'individualGuid' const COLUMNS = [ { @@ -30,7 +31,9 @@ const COLUMNS = [ class Hpo extends React.PureComponent { - static propTypes = {} + static propTypes = { + navigateSearch: PropTypes.func.isRequired, + } state = { data: [], @@ -71,6 +74,7 @@ class Hpo extends React.PureComponent { render() { const { terms, data, loading, error } = this.state + const { navigateSearch } = this.props const familiesByGenomeVersion = data.reduce((acc, { familyData }) => { if (!acc[familyData.genomeVersion]) { @@ -84,9 +88,9 @@ class Hpo extends React.PureComponent { (acc, families) => acc + Object.keys(families).length, 0, ) - const genomeSearchPaths = Object.entries(familiesByGenomeVersion).map(([genomeVersion, familyAcc]) => { + const genomeProjectFamilies = Object.entries(familiesByGenomeVersion).map(([genomeVersion, familyAcc]) => { const families = Object.entries(familyAcc) - const searchPath = families.length < MAX_SEARCH_FAMILIES ? Object.entries(families.reduce( + const projectFamilies = families.reduce( (acc, [familyGuid, projectGuid]) => { if (!acc[projectGuid]) { acc[projectGuid] = [] @@ -94,8 +98,8 @@ class Hpo extends React.PureComponent { acc[projectGuid].push(familyGuid) return acc }, {}, - )).map(([projectGuid, familyGuids]) => `${projectGuid};${familyGuids.join(',')}`).join(':') : '' - return [genomeVersion, searchPath] + ) + return [genomeVersion, projectFamilies] }) return ( @@ -124,14 +128,12 @@ class Hpo extends React.PureComponent {
{`${numFamilies} Families, ${data.length} Individuals`} - {genomeSearchPaths.map(([genomeVersion, searchPath]) => ( + {genomeProjectFamilies.map(([genomeVersion, projectFamilies]) => ( {`${GENOME_VERSION_LOOKUP[genomeVersion]}: `} {`Variant Search - ${Object.keys(familiesByGenomeVersion[genomeVersion]).length} Families`} @@ -155,4 +157,16 @@ class Hpo extends React.PureComponent { } -export default Hpo +const mapDispatchToProps = dispatch => ({ + navigateSearch: (e, { projectFamilies }) => { + e.stopPropagation() + dispatch(navigateSavedHashedSearch( + projectFamilies, + resultsLink => window.open(resultsLink, '_blank'), + '/variant_search/families', + 'searchFamiliesByHash', + )) + }, +}) + +export default connect(null, mapDispatchToProps)(Hpo) diff --git a/ui/redux/rootReducer.js b/ui/redux/rootReducer.js index 8570df4b8e..8053d84340 100644 --- a/ui/redux/rootReducer.js +++ b/ui/redux/rootReducer.js @@ -187,11 +187,11 @@ export const updateGeneNote = values => updateEntity( values, RECEIVE_DATA, `/api/gene_info/${values.geneId || values.gene_id}/note`, 'noteGuid', ) -export const navigateSavedHashedSearch = (search, navigateSearch, resultsPath) => (dispatch) => { +export const navigateSavedHashedSearch = (search, navigateSearch, resultsPath, hashKey) => (dispatch) => { // lazy load object-hash library as it is not used anywhere else import('object-hash').then((hash) => { const searchHash = hash.default.MD5(search) - dispatch({ type: RECEIVE_SAVED_SEARCHES, updatesById: { searchesByHash: { [searchHash]: search } } }) + dispatch({ type: RECEIVE_SAVED_SEARCHES, updatesById: { [hashKey || 'searchesByHash']: { [searchHash]: search } } }) navigateSearch(`${resultsPath || '/variant_search/results'}/${searchHash}`) }) } @@ -343,6 +343,7 @@ const rootReducer = combineReducers({ variantNotesByGuid: createObjectsByIdReducer(RECEIVE_DATA, 'variantNotesByGuid'), variantFunctionalDataByGuid: createObjectsByIdReducer(RECEIVE_DATA, 'variantFunctionalDataByGuid'), searchesByHash: createObjectsByIdReducer(RECEIVE_SAVED_SEARCHES, 'searchesByHash'), + searchFamiliesByHash: createObjectsByIdReducer(RECEIVE_SAVED_SEARCHES, 'searchFamiliesByHash'), searchedVariants: createSingleValueReducer(RECEIVE_SEARCHED_VARIANTS, []), searchedVariantsLoading: loadingReducer(REQUEST_SEARCHED_VARIANTS, RECEIVE_SEARCHED_VARIANTS), searchGeneBreakdown: createObjectsByIdReducer(RECEIVE_SEARCH_GENE_BREAKDOWN, 'searchGeneBreakdown'), diff --git a/ui/redux/selectors.js b/ui/redux/selectors.js index 5db3c308e3..4ef20a4229 100644 --- a/ui/redux/selectors.js +++ b/ui/redux/selectors.js @@ -45,6 +45,7 @@ export const getAnvilLoadingDelayDate = state => state.meta.anvilLoadingDelayDat export const getSavedVariantsIsLoading = state => state.savedVariantsLoading.isLoading export const getSavedVariantsLoadingError = state => state.savedVariantsLoading.errorMessage export const getSearchesByHash = state => state.searchesByHash +export const getSearchFamiliesByHash = state => state.searchFamiliesByHash export const getSearchedVariants = state => state.searchedVariants export const getSearchedVariantsIsLoading = state => state.searchedVariantsLoading.isLoading export const getSearchedVariantsErrorMessage = state => state.searchedVariantsLoading.errorMessage diff --git a/ui/redux/utils/configureStore.js b/ui/redux/utils/configureStore.js index a35be8fe44..cbbaf06326 100644 --- a/ui/redux/utils/configureStore.js +++ b/ui/redux/utils/configureStore.js @@ -5,7 +5,7 @@ import { loadState, saveState } from 'shared/utils/localStorage' const PERSISTING_STATE = [ 'projectsTableState', 'familyTableState', 'savedVariantTableState', 'variantSearchDisplay', 'searchesByHash', - 'familyTableFilterState', + 'familyTableFilterState', 'searchFamiliesByHash', ] const persistStoreMiddleware = store => next => (action) => { From 73f70942e15e7ce02805bfa07c0c4e999ab5e394 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 26 Jul 2024 14:22:05 -0400 Subject: [PATCH 39/51] clear unused freq fields on header change --- ui/shared/components/panel/search/FrequencyFilter.jsx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ui/shared/components/panel/search/FrequencyFilter.jsx b/ui/shared/components/panel/search/FrequencyFilter.jsx index 2deb5ddf0d..6d35360694 100644 --- a/ui/shared/components/panel/search/FrequencyFilter.jsx +++ b/ui/shared/components/panel/search/FrequencyFilter.jsx @@ -145,9 +145,9 @@ const callsetChange = (onChange, initialValues) => val => onChange( { ...initialValues, [THIS_CALLSET_FREQUENCY]: val, [SV_CALLSET_FREQUENCY]: val }, ) -const freqChange = (onChange, initialValues) => val => onChange(FREQUENCIES.filter( +const freqChange = onChange => val => onChange(FREQUENCIES.filter( ({ name }) => name !== THIS_CALLSET_FREQUENCY && name !== SV_CALLSET_FREQUENCY, -).reduce((acc, { name }) => ({ ...acc, [name]: val }), initialValues || {})) +).reduce((acc, { name }) => ({ ...acc, [name]: val }), {})) export const HeaderFrequencyFilter = ({ value, onChange, esEnabled, ...props }) => { const { callset, sv_callset: svCallset, ...freqValues } = value || {} @@ -155,7 +155,7 @@ export const HeaderFrequencyFilter = ({ value, onChange, esEnabled, ...props }) const onCallsetChange = callsetChange(onChange, freqValues) - const onFreqChange = freqChange(onChange, value) + const onFreqChange = freqChange(onChange) const callsetTitle = esEnabled ? 'Callset' : 'seqr' return ( From 13ef9ef23454b1183f5526222c4aa9a5eb6b2852 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 26 Jul 2024 14:23:53 -0400 Subject: [PATCH 40/51] larger freq header inputs --- ui/shared/components/panel/search/VariantSearchFormPanels.jsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ui/shared/components/panel/search/VariantSearchFormPanels.jsx b/ui/shared/components/panel/search/VariantSearchFormPanels.jsx index 619676072d..f1bd1f6e83 100644 --- a/ui/shared/components/panel/search/VariantSearchFormPanels.jsx +++ b/ui/shared/components/panel/search/VariantSearchFormPanels.jsx @@ -195,7 +195,7 @@ export const FREQUENCY_PANEL = { name: 'freqs', headerProps: { title: 'Frequency', - inputSize: 10, + inputSize: 12, inputProps: { component: HeaderFrequencyFilter, format: val => val || {}, From 85e578f4471cb92dcfb468730eebd7aeb4ea745a Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 26 Jul 2024 14:28:32 -0400 Subject: [PATCH 41/51] dont clear callset filter on freq update --- ui/shared/components/panel/search/FrequencyFilter.jsx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ui/shared/components/panel/search/FrequencyFilter.jsx b/ui/shared/components/panel/search/FrequencyFilter.jsx index 6d35360694..695aa0fcff 100644 --- a/ui/shared/components/panel/search/FrequencyFilter.jsx +++ b/ui/shared/components/panel/search/FrequencyFilter.jsx @@ -145,9 +145,9 @@ const callsetChange = (onChange, initialValues) => val => onChange( { ...initialValues, [THIS_CALLSET_FREQUENCY]: val, [SV_CALLSET_FREQUENCY]: val }, ) -const freqChange = onChange => val => onChange(FREQUENCIES.filter( - ({ name }) => name !== THIS_CALLSET_FREQUENCY && name !== SV_CALLSET_FREQUENCY, -).reduce((acc, { name }) => ({ ...acc, [name]: val }), {})) +const freqChange = (onChange, initialValues) => val => onChange(FREQUENCIES.reduce((acc, { name }) => ({ + ...acc, [name]: name !== THIS_CALLSET_FREQUENCY && name !== SV_CALLSET_FREQUENCY ? val : initialValues[name], +}), initialValues || {})) export const HeaderFrequencyFilter = ({ value, onChange, esEnabled, ...props }) => { const { callset, sv_callset: svCallset, ...freqValues } = value || {} @@ -155,7 +155,7 @@ export const HeaderFrequencyFilter = ({ value, onChange, esEnabled, ...props }) const onCallsetChange = callsetChange(onChange, freqValues) - const onFreqChange = freqChange(onChange) + const onFreqChange = freqChange(onChange, value) const callsetTitle = esEnabled ? 'Callset' : 'seqr' return ( From d5d9baf9d7220ef7cf7a11df15f981db3f83c80c Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 26 Jul 2024 14:34:55 -0400 Subject: [PATCH 42/51] add browser support to FAQ --- ui/pages/Public/components/Faq.jsx | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/ui/pages/Public/components/Faq.jsx b/ui/pages/Public/components/Faq.jsx index f8b6129edc..5bed4f052c 100644 --- a/ui/pages/Public/components/Faq.jsx +++ b/ui/pages/Public/components/Faq.jsx @@ -236,6 +236,17 @@ const FAQS = [ ), }, + }, { + [ENGLISH]: { + header: 'Q. Which browsers are supported for seqr?', + content: `seqr is only supported in Google Chrome. While it may sometimes function in other browsers, to ensure + reliable behavior you should only use seqr in Chrome`, + }, + [SPANISH]: { + header: 'P: ¿Cuáles navegadores son compatibles con seqr?', + content: `seqr solamente es compatible con Google Chrome. Aunque a veces puede funcionar en otros navegadores, + para garantizar un funcionamiento fiable sólo debe usar seqr en Chrome.`, + }, }, { [ENGLISH]: { header: 'Q. How can I set up seqr locally?', From 0c96bc36400ee7fab6f15f80d4fef9bfb4734a14 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 31 Jul 2024 11:52:03 -0400 Subject: [PATCH 43/51] shared saved variant model util --- .../check_for_new_samples_from_pipeline.py | 10 +++++----- seqr/views/utils/variant_utils.py | 17 ++++++++++++++--- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/seqr/management/commands/check_for_new_samples_from_pipeline.py b/seqr/management/commands/check_for_new_samples_from_pipeline.py index c530eef6b6..bc68c142ba 100644 --- a/seqr/management/commands/check_for_new_samples_from_pipeline.py +++ b/seqr/management/commands/check_for_new_samples_from_pipeline.py @@ -15,7 +15,7 @@ from seqr.utils.search.hail_search_utils import hail_variant_multi_lookup, search_data_type from seqr.views.utils.dataset_utils import match_and_update_search_samples from seqr.views.utils.variant_utils import reset_cached_search_results, update_projects_saved_variant_json, \ - saved_variants_dataset_type_filter + get_saved_variants from settings import SEQR_SLACK_LOADING_NOTIFICATION_CHANNEL logger = logging.getLogger(__name__) @@ -153,10 +153,10 @@ def _reload_shared_variant_annotations(data_type, genome_version, updated_varian if is_sv: updated_annotation_samples = updated_annotation_samples.filter(sample_type=data_type.split('_')[1]) - variant_models = SavedVariant.objects.filter( - family_id__in=updated_annotation_samples.values_list('individual__family', flat=True).distinct(), - **saved_variants_dataset_type_filter(dataset_type), - ).filter(Q(saved_variant_json__genomeVersion__isnull=True) | Q(saved_variant_json__genomeVersion=db_genome_version)) + variant_models = get_saved_variants( + family_guids=updated_annotation_samples.values_list('individual__family__guid', flat=True).distinct(), + dataset_type=dataset_type, genome_version=genome_version, + ) if not variant_models: logger.info('No additional saved variants to update') diff --git a/seqr/views/utils/variant_utils.py b/seqr/views/utils/variant_utils.py index 17d1f7e36a..5015c78f3a 100644 --- a/seqr/views/utils/variant_utils.py +++ b/seqr/views/utils/variant_utils.py @@ -66,13 +66,24 @@ def update_projects_saved_variant_json(projects, user_email, **kwargs): return updated_variants_by_id -def update_project_saved_variant_json(project_id, family_guids=None, dataset_type=None, user=None, user_email=None): - saved_variants = SavedVariant.objects.filter(family__project_id=project_id).select_related('family') +def get_saved_variants(project_id=None, family_guids=None, dataset_type=None, genome_version=None): + saved_variants = SavedVariant.objects.all() + if project_id: + saved_variants = saved_variants.filter(family__project_id=project_id) if family_guids: saved_variants = saved_variants.filter(family__guid__in=family_guids) - if dataset_type: saved_variants = saved_variants.filter(**saved_variants_dataset_type_filter(dataset_type)) + if genome_version: + db_genome_version = genome_version.replace('GRCh', '') + saved_variants = saved_variants.filter( + Q(saved_variant_json__genomeVersion__isnull=True) | Q(saved_variant_json__genomeVersion=db_genome_version) + ) + return saved_variants + + +def update_project_saved_variant_json(project_id, family_guids=None, dataset_type=None, user=None, user_email=None): + saved_variants = get_saved_variants(project_id, family_guids, dataset_type).select_related('family') if not saved_variants: return None From 3523d98e666848e0604c60f62ae8427f8d4fdf05 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 31 Jul 2024 12:02:45 -0400 Subject: [PATCH 44/51] only reload variants on project genome build --- .../check_for_new_samples_from_pipeline.py | 4 ++-- .../commands/reload_saved_variant_json.py | 2 +- seqr/views/apis/saved_variant_api.py | 2 +- seqr/views/utils/variant_utils.py | 20 +++++++++---------- 4 files changed, 13 insertions(+), 15 deletions(-) diff --git a/seqr/management/commands/check_for_new_samples_from_pipeline.py b/seqr/management/commands/check_for_new_samples_from_pipeline.py index bc68c142ba..17824d8114 100644 --- a/seqr/management/commands/check_for_new_samples_from_pipeline.py +++ b/seqr/management/commands/check_for_new_samples_from_pipeline.py @@ -108,7 +108,7 @@ def handle(self, *args, **options): ) project_families = project_sample_data['family_guids'] updated_families.update(project_families) - updated_project_families.append((project.id, project.name, project_families)) + updated_project_families.append((project.id, project.name, project.genome_version, project_families)) # Send failure notifications failed_family_samples = metadata.get('failed_family_samples', {}) @@ -154,8 +154,8 @@ def _reload_shared_variant_annotations(data_type, genome_version, updated_varian updated_annotation_samples = updated_annotation_samples.filter(sample_type=data_type.split('_')[1]) variant_models = get_saved_variants( + genome_version, dataset_type=dataset_type, family_guids=updated_annotation_samples.values_list('individual__family__guid', flat=True).distinct(), - dataset_type=dataset_type, genome_version=genome_version, ) if not variant_models: diff --git a/seqr/management/commands/reload_saved_variant_json.py b/seqr/management/commands/reload_saved_variant_json.py index 2e83305eaa..eea208cf32 100644 --- a/seqr/management/commands/reload_saved_variant_json.py +++ b/seqr/management/commands/reload_saved_variant_json.py @@ -27,6 +27,6 @@ def handle(self, *args, **options): logging.info("Processing all %s projects" % len(projects)) family_ids = [family_guid] if family_guid else None - project_list = [(*project, family_ids) for project in projects.values_list('id', 'name')] + project_list = [(*project, family_ids) for project in projects.values_list('id', 'name', 'genome_version')] update_projects_saved_variant_json(project_list, user_email='manage_command') logger.info("Done") diff --git a/seqr/views/apis/saved_variant_api.py b/seqr/views/apis/saved_variant_api.py index 9220d0cbeb..18740ba3b9 100644 --- a/seqr/views/apis/saved_variant_api.py +++ b/seqr/views/apis/saved_variant_api.py @@ -303,7 +303,7 @@ def update_saved_variant_json(request, project_guid): project = get_project_and_check_permissions(project_guid, request.user, can_edit=True) reset_cached_search_results(project) try: - updated_saved_variant_guids = update_project_saved_variant_json(project.id, user=request.user) + updated_saved_variant_guids = update_project_saved_variant_json(project.id, project.genome_version, user=request.user) except Exception as e: logger.error('Unable to reset saved variant json for {}: {}'.format(project_guid, e)) updated_saved_variant_guids = [] diff --git a/seqr/views/utils/variant_utils.py b/seqr/views/utils/variant_utils.py index 5015c78f3a..859f73b9cc 100644 --- a/seqr/views/utils/variant_utils.py +++ b/seqr/views/utils/variant_utils.py @@ -37,10 +37,10 @@ def update_projects_saved_variant_json(projects, user_email, **kwargs): error = {} updated_variants_by_id = {} logger.info(f'Reloading saved variants in {len(projects)} projects') - for project_id, project_name, family_guids in tqdm(projects, unit=' project'): + for project_id, project_name, genome_version, family_guids in tqdm(projects, unit=' project'): try: updated_saved_variants = update_project_saved_variant_json( - project_id, user_email=user_email, family_guids=family_guids, **kwargs) + project_id, genome_version, user_email=user_email, family_guids=family_guids, **kwargs) if updated_saved_variants is None: skipped[project_name] = True else: @@ -66,24 +66,22 @@ def update_projects_saved_variant_json(projects, user_email, **kwargs): return updated_variants_by_id -def get_saved_variants(project_id=None, family_guids=None, dataset_type=None, genome_version=None): - saved_variants = SavedVariant.objects.all() +def get_saved_variants(genome_version, project_id=None, family_guids=None, dataset_type=None): + saved_variants = SavedVariant.objects.filter( + Q(saved_variant_json__genomeVersion__isnull=True) | + Q(saved_variant_json__genomeVersion=genome_version.replace('GRCh', '')) + ) if project_id: saved_variants = saved_variants.filter(family__project_id=project_id) if family_guids: saved_variants = saved_variants.filter(family__guid__in=family_guids) if dataset_type: saved_variants = saved_variants.filter(**saved_variants_dataset_type_filter(dataset_type)) - if genome_version: - db_genome_version = genome_version.replace('GRCh', '') - saved_variants = saved_variants.filter( - Q(saved_variant_json__genomeVersion__isnull=True) | Q(saved_variant_json__genomeVersion=db_genome_version) - ) return saved_variants -def update_project_saved_variant_json(project_id, family_guids=None, dataset_type=None, user=None, user_email=None): - saved_variants = get_saved_variants(project_id, family_guids, dataset_type).select_related('family') +def update_project_saved_variant_json(project_id, genome_version, family_guids=None, dataset_type=None, user=None, user_email=None): + saved_variants = get_saved_variants(genome_version, project_id, family_guids, dataset_type).select_related('family') if not saved_variants: return None From e5e3140954cacf9563e0f675facbf4eaa8186f60 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 31 Jul 2024 13:18:35 -0400 Subject: [PATCH 45/51] update unit tests --- .../check_for_new_samples_from_pipeline_tests.py | 7 ++++--- .../tests/reload_saved_variant_json_tests.py | 14 +++++++------- seqr/views/apis/saved_variant_api_tests.py | 6 +++--- 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py b/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py index 193d7dee86..f9f5f83748 100644 --- a/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py +++ b/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py @@ -193,9 +193,10 @@ def test_command(self, mock_email, mock_airtable_utils): # Update fixture data to allow testing edge cases Project.objects.filter(id__in=[1, 3]).update(genome_version=38) - sv = SavedVariant.objects.get(guid='SV0000002_1248367227_r0390_100') - sv.saved_variant_json['genomeVersion'] = '38' - sv.save() + svs = SavedVariant.objects.filter(guid__in=['SV0000002_1248367227_r0390_100', 'SV0000006_1248367227_r0003_tes']) + for sv in svs: + sv.saved_variant_json['genomeVersion'] = '38' + sv.save() with self.assertRaises(ValueError) as ce: call_command('check_for_new_samples_from_pipeline', 'GRCh38/SNV_INDEL', 'auto__2023-08-08') diff --git a/seqr/management/tests/reload_saved_variant_json_tests.py b/seqr/management/tests/reload_saved_variant_json_tests.py index 04175a94fc..4ceb4314b6 100644 --- a/seqr/management/tests/reload_saved_variant_json_tests.py +++ b/seqr/management/tests/reload_saved_variant_json_tests.py @@ -27,12 +27,12 @@ def test_with_param_command(self, mock_get_variants, mock_logger): family_1 = Family.objects.get(id=1) mock_get_variants.assert_called_with( - [family_1], ['1-1562437-G-CA', '1-46859832-G-A','21-3343353-GAGA-G'], user=None, user_email='manage_command') + [family_1], ['1-46859832-G-A','21-3343353-GAGA-G'], user=None, user_email='manage_command') logger_info_calls = [ - mock.call('Updated 3 variants for project 1kg project n\xe5me with uni\xe7\xf8de'), + mock.call('Updated 2 variants for project 1kg project n\xe5me with uni\xe7\xf8de'), mock.call('Reload Summary: '), - mock.call(' 1kg project n\xe5me with uni\xe7\xf8de: Updated 3 variants') + mock.call(' 1kg project n\xe5me with uni\xe7\xf8de: Updated 2 variants') ] mock_logger.info.assert_has_calls(logger_info_calls) mock_get_variants.reset_mock() @@ -45,7 +45,7 @@ def test_with_param_command(self, mock_get_variants, mock_logger): family_2 = Family.objects.get(id=2) mock_get_variants.assert_has_calls([ mock.call( - [family_1, family_2], ['1-1562437-G-CA', '1-248367227-TC-T', '1-46859832-G-A', '21-3343353-GAGA-G'], user=None, user_email='manage_command', + [family_1, family_2], ['1-248367227-TC-T', '1-46859832-G-A', '21-3343353-GAGA-G'], user=None, user_email='manage_command', ), mock.call([Family.objects.get(id=12)], ['1-248367227-TC-T', 'prefix_19107_DEL'], user=None, user_email='manage_command'), mock.call([Family.objects.get(id=14)], ['1-248367227-TC-T'], user=None, user_email='manage_command') @@ -53,11 +53,11 @@ def test_with_param_command(self, mock_get_variants, mock_logger): logger_info_calls = [ mock.call('Reloading saved variants in 4 projects'), - mock.call('Updated 4 variants for project 1kg project n\xe5me with uni\xe7\xf8de'), + mock.call('Updated 3 variants for project 1kg project n\xe5me with uni\xe7\xf8de'), mock.call('Updated 2 variants for project Test Reprocessed Project'), mock.call('Updated 1 variants for project Non-Analyst Project'), mock.call('Reload Summary: '), - mock.call(' 1kg project n\xe5me with uni\xe7\xf8de: Updated 4 variants'), + mock.call(' 1kg project n\xe5me with uni\xe7\xf8de: Updated 3 variants'), mock.call(' Test Reprocessed Project: Updated 2 variants'), mock.call(' Non-Analyst Project: Updated 1 variants'), mock.call('Skipped the following 1 project with no saved variants: Empty Project'), @@ -72,7 +72,7 @@ def test_with_param_command(self, mock_get_variants, mock_logger): PROJECT_GUID, '--family-guid={}'.format(FAMILY_GUID)) - mock_get_variants.assert_called_with([family_1], ['1-1562437-G-CA', '1-46859832-G-A', '21-3343353-GAGA-G'], user=None, user_email='manage_command') + mock_get_variants.assert_called_with([family_1], ['1-46859832-G-A', '21-3343353-GAGA-G'], user=None, user_email='manage_command') logger_info_calls = [ mock.call('Reload Summary: '), diff --git a/seqr/views/apis/saved_variant_api_tests.py b/seqr/views/apis/saved_variant_api_tests.py index 7d13c1afb5..a2a09a9765 100644 --- a/seqr/views/apis/saved_variant_api_tests.py +++ b/seqr/views/apis/saved_variant_api_tests.py @@ -906,7 +906,7 @@ def test_update_compound_hets_variant_functional_data(self): self.assertEqual(response.status_code, 400) self.assertDictEqual(response.json(), {'error': 'Unable to find the following variant(s): not_variant'}) - @mock.patch('seqr.views.utils.variant_utils.MAX_VARIANTS_FETCH', 3) + @mock.patch('seqr.views.utils.variant_utils.MAX_VARIANTS_FETCH', 2) @mock.patch('seqr.utils.search.utils.es_backend_enabled') @mock.patch('seqr.views.apis.saved_variant_api.logger') @mock.patch('seqr.views.utils.variant_utils.get_variants_for_variant_ids') @@ -925,12 +925,12 @@ def test_update_saved_variant_json(self, mock_get_variants, mock_logger, mock_es self.assertDictEqual( response.json(), {'SV0000002_1248367227_r0390_100': None, 'SV0000001_2103343353_r0390_100': None, - 'SV0059957_11562437_f019313_1': None, 'SV0059956_11560662_f019313_1': None} + 'SV0059956_11560662_f019313_1': None} ) families = [Family.objects.get(guid='F000001_1'), Family.objects.get(guid='F000002_2')] mock_get_variants.assert_has_calls([ - mock.call(families, ['1-1562437-G-CA', '1-248367227-TC-T', '1-46859832-G-A'], user=self.manager_user, user_email=None), + mock.call(families, ['1-248367227-TC-T', '1-46859832-G-A'], user=self.manager_user, user_email=None), mock.call(families, ['21-3343353-GAGA-G'], user=self.manager_user, user_email=None), ]) mock_logger.error.assert_not_called() From d86935fe99dd9a48f6b0162e3ec610c0c23eafe1 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 1 Aug 2024 15:10:55 -0400 Subject: [PATCH 46/51] fix error for loading context when no variants present --- seqr/views/apis/saved_variant_api_tests.py | 30 +++++++++++++++------- seqr/views/utils/variant_utils.py | 3 +++ 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/seqr/views/apis/saved_variant_api_tests.py b/seqr/views/apis/saved_variant_api_tests.py index 7d13c1afb5..3ea5f54ece 100644 --- a/seqr/views/apis/saved_variant_api_tests.py +++ b/seqr/views/apis/saved_variant_api_tests.py @@ -25,10 +25,12 @@ COMPOUND_HET_2_GUID = 'SV0059957_11562437_f019313_1' GENE_GUID_2 = 'ENSG00000197530' +VARIANT_TAG_RESPONSE_KEYS = { + 'variantTagsByGuid', 'variantNotesByGuid', 'variantFunctionalDataByGuid', 'savedVariantsByGuid', +} SAVED_VARIANT_RESPONSE_KEYS = { - 'variantTagsByGuid', 'variantNotesByGuid', 'variantFunctionalDataByGuid', 'savedVariantsByGuid', 'familiesByGuid', + *VARIANT_TAG_RESPONSE_KEYS, 'familiesByGuid', 'omimIntervals', 'genesById', 'locusListsByGuid', 'rnaSeqData', 'mmeSubmissionsByGuid', 'transcriptsById', 'phenotypeGeneScores', - 'omimIntervals', } COMPOUND_HET_3_JSON = { @@ -235,6 +237,10 @@ def test_saved_variant_data(self): # get variants with no tags for whole project response = self.client.get('{}?includeNoteVariants=true'.format(url)) self.assertEqual(response.status_code, 200) + no_families_response_keys = {*SAVED_VARIANT_RESPONSE_KEYS} + no_families_response_keys.remove('familiesByGuid') + no_families_response_keys.remove('transcriptsById') + self.assertSetEqual(set(response.json().keys()), no_families_response_keys) variants = response.json()['savedVariantsByGuid'] self.assertSetEqual(set(variants.keys()), {COMPOUND_HET_1_GUID, COMPOUND_HET_2_GUID}) self.assertListEqual(variants[COMPOUND_HET_1_GUID]['tagGuids'], []) @@ -266,10 +272,7 @@ def test_saved_variant_data(self): response = self.client.get(url.replace(PROJECT_GUID, 'R0003_test')) self.assertEqual(response.status_code, 200) response_json = response.json() - response_keys = {*SAVED_VARIANT_RESPONSE_KEYS} - response_keys.remove('familiesByGuid') - response_keys.remove('transcriptsById') - self.assertSetEqual(set(response_json.keys()), response_keys) + self.assertSetEqual(set(response_json.keys()), no_families_response_keys) self.assertSetEqual( set(response_json['savedVariantsByGuid'].keys()), @@ -330,6 +333,17 @@ def test_saved_variant_data(self): self.assertListEqual(variants['SV0000002_1248367227_r0390_100']['familyGuids'], ['F000002_2']) self.assertEqual(set(response_json['familiesByGuid'].keys()), {'F000001_1', 'F000002_2', 'F000012_12'}) + # Test empty project + empty_project_url = url.replace(PROJECT_GUID, 'R0002_empty') + response = self.client.get(empty_project_url) + self.assertEqual(response.status_code, 200) + empty_response = {k: {} for k in VARIANT_TAG_RESPONSE_KEYS} + self.assertDictEqual(response.json(), empty_response) + + response = self.client.get(f'{empty_project_url}?loadProjectTagTypes=true&loadFamilyContext=true') + self.assertEqual(response.status_code, 200) + self.assertDictEqual(response.json(), empty_response) + def test_create_saved_variant(self): create_saved_variant_url = reverse(create_saved_variant_handler) self.check_collaborator_login(create_saved_variant_url, request_data={'familyGuid': 'F000001_1'}) @@ -410,9 +424,7 @@ def test_create_saved_sv_variant(self): self.assertEqual(response.status_code, 200) response_json = response.json() - self.assertSetEqual(set(response_json.keys()), { - 'variantTagsByGuid', 'variantNotesByGuid', 'variantFunctionalDataByGuid', 'savedVariantsByGuid', 'genesById', - }) + self.assertSetEqual(set(response_json.keys()), {*VARIANT_TAG_RESPONSE_KEYS, 'genesById'}) self.assertEqual(len(response_json['savedVariantsByGuid']), 1) variant_guid = next(iter(response_json['savedVariantsByGuid'])) diff --git a/seqr/views/utils/variant_utils.py b/seqr/views/utils/variant_utils.py index 17d1f7e36a..7f98e14653 100644 --- a/seqr/views/utils/variant_utils.py +++ b/seqr/views/utils/variant_utils.py @@ -374,6 +374,9 @@ def get_variants_response(request, saved_variants, response_variants=None, add_a if saved_variants is not None else {'savedVariantsByGuid': {}} variants = list(response['savedVariantsByGuid'].values()) if response_variants is None else response_variants + if not variants: + return response + genes, transcripts, family_genes = _saved_variant_genes_transcripts(variants) projects = Project.objects.filter(family__guid__in=family_genes.keys()).distinct() From b3ebaec088dd9ddffe26023a378f2749b7cf2c82 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 1 Aug 2024 15:25:09 -0400 Subject: [PATCH 47/51] fix unit tests --- seqr/views/apis/summary_data_api_tests.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/seqr/views/apis/summary_data_api_tests.py b/seqr/views/apis/summary_data_api_tests.py index 8d90812dbf..cc80bc86fb 100644 --- a/seqr/views/apis/summary_data_api_tests.py +++ b/seqr/views/apis/summary_data_api_tests.py @@ -26,10 +26,12 @@ u'dateGenerated': '2020-04-27' } +VARIANT_TAG_RESPONSE_KEYS = { + 'variantTagsByGuid', 'variantNotesByGuid', 'variantFunctionalDataByGuid', 'savedVariantsByGuid', +} SAVED_VARIANT_RESPONSE_KEYS = { - 'projectsByGuid', 'locusListsByGuid', 'savedVariantsByGuid', 'variantFunctionalDataByGuid', 'genesById', - 'variantNotesByGuid', 'individualsByGuid', 'variantTagsByGuid', 'familiesByGuid', 'familyNotesByGuid', - 'mmeSubmissionsByGuid', 'transcriptsById', + *VARIANT_TAG_RESPONSE_KEYS, 'projectsByGuid', 'locusListsByGuid', 'genesById', + 'individualsByGuid', 'familiesByGuid', 'familyNotesByGuid', 'mmeSubmissionsByGuid', 'transcriptsById', } EXPECTED_NO_AIRTABLE_SAMPLE_METADATA_ROW = { @@ -317,7 +319,7 @@ def test_saved_variants_page(self): response = self.client.get('{}?gene=ENSG00000135953'.format(url)) self.assertEqual(response.status_code, 200) - self.assertDictEqual(response.json(), {k: {} for k in SAVED_VARIANT_RESPONSE_KEYS if k != 'transcriptsById'}) + self.assertDictEqual(response.json(), {k: {} for k in VARIANT_TAG_RESPONSE_KEYS}) self.login_manager() response = self.client.get(url) From a18bf966e14b8352ee6ef50bb88132aae34c419b Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Thu, 1 Aug 2024 16:00:37 -0400 Subject: [PATCH 48/51] fix unit tests --- seqr/views/apis/saved_variant_api_tests.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/seqr/views/apis/saved_variant_api_tests.py b/seqr/views/apis/saved_variant_api_tests.py index 3ea5f54ece..e70c410aa7 100644 --- a/seqr/views/apis/saved_variant_api_tests.py +++ b/seqr/views/apis/saved_variant_api_tests.py @@ -1011,8 +1011,10 @@ def test_saved_variant_data(self, *args): super(AnvilSavedVariantAPITest, self).test_saved_variant_data(*args) self.mock_list_workspaces.assert_called_with(self.analyst_user) self.mock_get_ws_access_level.assert_called_with( + mock.ANY, 'my-seqr-billing', 'empty') + self.mock_get_ws_access_level.assert_any_call( mock.ANY, 'my-seqr-billing', 'anvil-1kg project n\u00e5me with uni\u00e7\u00f8de') - self.assertEqual(self.mock_get_ws_access_level.call_count, 15) + self.assertEqual(self.mock_get_ws_access_level.call_count, 17) self.mock_get_groups.assert_has_calls([mock.call(self.collaborator_user), mock.call(self.analyst_user)]) self.assertEqual(self.mock_get_groups.call_count, 11) self.mock_get_ws_acl.assert_not_called() From f5c9b02fafa33e2c7fb67067540b502ef96c56cb Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 2 Aug 2024 13:35:53 -0400 Subject: [PATCH 49/51] bump changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e0184a538f..e730f2820d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,8 @@ # _seqr_ Changes ## dev + +## 8/2/24 * Adds index_file_path to IGV Sample model (REQUIRES DB MIGRATION) ## 7/24/24 From 0d7dba43cfb0d76e55f655d06cb0305e876b8ab5 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 2 Aug 2024 13:49:08 -0400 Subject: [PATCH 50/51] fix flapping test --- seqr/views/apis/anvil_workspace_api_tests.py | 2 +- seqr/views/utils/pedigree_info_utils.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/seqr/views/apis/anvil_workspace_api_tests.py b/seqr/views/apis/anvil_workspace_api_tests.py index d8bdf7e9e7..2265fb7507 100644 --- a/seqr/views/apis/anvil_workspace_api_tests.py +++ b/seqr/views/apis/anvil_workspace_api_tests.py @@ -813,7 +813,7 @@ def _assert_valid_operation(self, project, test_add_data=True): self.assertIn({ 'family__family_id': '1', 'individual_id': 'NA19675_1', 'mother__individual_id': None, 'father__individual_id': 'NA19678', 'sex': 'F', 'affected': 'A', 'notes': 'A affected individual, test1-zsf', - 'features': [{'id': 'HP:0012469'}, {'id': 'HP:0011675'}], + 'features': [{'id': 'HP:0011675'}, {'id': 'HP:0012469'}], }, individual_model_data) self.assertIn({ 'family__family_id': '1', 'individual_id': 'NA19678', 'mother__individual_id': None, diff --git a/seqr/views/utils/pedigree_info_utils.py b/seqr/views/utils/pedigree_info_utils.py index fffe0426c7..91b74f8566 100644 --- a/seqr/views/utils/pedigree_info_utils.py +++ b/seqr/views/utils/pedigree_info_utils.py @@ -151,7 +151,7 @@ def parse_hpo_terms(hpo_term_string): if not hpo_term_string: return [] terms = {hpo_term.strip() for hpo_term in re.sub(r'\(.*?\)', '', hpo_term_string).replace(',', ';').split(';')} - return[{'id': term} for term in terms if term] + return[{'id': term} for term in sorted(terms) if term] def _convert_fam_file_rows_to_json(column_map, rows, required_columns=None, update_features=False): From 640ee46ad86642b384ae4afeea9108f42cece5d4 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 2 Aug 2024 14:07:57 -0400 Subject: [PATCH 51/51] update to latest docker compose syntax --- deploy/LOCAL_DEVELOPMENT_INSTALL.md | 2 +- deploy/LOCAL_INSTALL.md | 36 ++++++++++++++--------------- test_local_deployment.sh | 20 ++++++++-------- 3 files changed, 29 insertions(+), 29 deletions(-) diff --git a/deploy/LOCAL_DEVELOPMENT_INSTALL.md b/deploy/LOCAL_DEVELOPMENT_INSTALL.md index 919a0acc9b..02e34f8859 100644 --- a/deploy/LOCAL_DEVELOPMENT_INSTALL.md +++ b/deploy/LOCAL_DEVELOPMENT_INSTALL.md @@ -116,7 +116,7 @@ Before running seqr, make sure the following are currently running/ started: - If you want ES running but do not need production data/ are working with a standalone seqr instance, use docker-compose ```bash - docker-compose up elasticsearch + docker compose up elasticsearch ``` ### Run ui asset server diff --git a/deploy/LOCAL_INSTALL.md b/deploy/LOCAL_INSTALL.md index 6ed9162d40..d766931d45 100644 --- a/deploy/LOCAL_INSTALL.md +++ b/deploy/LOCAL_INSTALL.md @@ -31,10 +31,10 @@ SEQR_DIR=$(pwd) wget https://raw.githubusercontent.com/broadinstitute/seqr/master/docker-compose.yml -docker-compose up -d seqr # start up the seqr docker image in the background after also starting other components it depends on (postgres, redis, elasticsearch). This may take 10+ minutes. -docker-compose logs -f seqr # (optional) continuously print seqr logs to see when it is done starting up or if there are any errors. Type Ctrl-C to exit from the logs. +docker compose up -d seqr # start up the seqr docker image in the background after also starting other components it depends on (postgres, redis, elasticsearch). This may take 10+ minutes. +docker compose logs -f seqr # (optional) continuously print seqr logs to see when it is done starting up or if there are any errors. Type Ctrl-C to exit from the logs. -docker-compose exec seqr python manage.py createsuperuser # create a seqr Admin user +docker compose exec seqr python manage.py createsuperuser # create a seqr Admin user open http://localhost # open the seqr landing page in your browser. Log in to seqr using the email and password from the previous step ``` @@ -45,15 +45,15 @@ Updating your local installation of seqr involves pulling the latest version of ```bash # run this from the directory containing your docker-compose.yml file -docker-compose pull -docker-compose up -d seqr +docker compose pull +docker compose up -d seqr -docker-compose logs -f seqr # (optional) continuously print seqr logs to see when it is done starting up or if there are any errors. Type Ctrl-C to exit from the logs. +docker compose logs -f seqr # (optional) continuously print seqr logs to see when it is done starting up or if there are any errors. Type Ctrl-C to exit from the logs. ``` To update reference data in seqr, such as OMIM, HPO, etc., run the following ```bash -docker-compose exec seqr ./manage.py update_all_reference_data --use-cached-omim --skip-gencode +docker compose exec seqr ./manage.py update_all_reference_data --use-cached-omim --skip-gencode ``` ### Annotating and loading VCF callsets @@ -79,7 +79,7 @@ The steps below describe how to annotate a callset and then load it into your on 1. start a pipeline-runner container which has the necessary tools and environment for starting and submitting jobs to a Dataproc cluster. ```bash - docker-compose up -d pipeline-runner # start the pipeline-runner container + docker compose up -d pipeline-runner # start the pipeline-runner container ``` 1. if you haven't already, upload reference data to your own google bucket. @@ -88,7 +88,7 @@ This is expected to take a while ```bash BUILD_VERSION=38 # can be 37 or 38 - docker-compose exec pipeline-runner copy_reference_data_to_gs.sh $BUILD_VERSION $GS_BUCKET + docker compose exec pipeline-runner copy_reference_data_to_gs.sh $BUILD_VERSION $GS_BUCKET ``` Periodically, you may want to update the reference data in order to get the latest versions of these annotations. @@ -115,7 +115,7 @@ annotations, but you will need to re-load previously loaded projects to get the INPUT_FILE_PATH=/${GS_FILE_PATH}/${FILENAME} - docker-compose exec pipeline-runner load_data_dataproc.sh $BUILD_VERSION $SAMPLE_TYPE $INDEX_NAME $GS_BUCKET $INPUT_FILE_PATH + docker compose exec pipeline-runner load_data_dataproc.sh $BUILD_VERSION $SAMPLE_TYPE $INDEX_NAME $GS_BUCKET $INPUT_FILE_PATH ``` @@ -138,13 +138,13 @@ The steps below describe how to annotate a callset and then load it into your on 1. start a pipeline-runner container ```bash - docker-compose up -d pipeline-runner # start the pipeline-runner container + docker compose up -d pipeline-runner # start the pipeline-runner container ``` 1. authenticate into your google cloud account. This is required for hail to access buckets hosted on gcloud. ```bash - docker-compose exec pipeline-runner gcloud auth application-default login + docker compose exec pipeline-runner gcloud auth application-default login ``` 1. if you haven't already, download VEP and other reference data to the docker image's mounted directories. @@ -153,7 +153,7 @@ This is expected to take a while ```bash BUILD_VERSION=38 # can be 37 or 38 - docker-compose exec pipeline-runner download_reference_data.sh $BUILD_VERSION + docker compose exec pipeline-runner download_reference_data.sh $BUILD_VERSION ``` Periodically, you may want to update the reference data in order to get the latest versions of these annotations. @@ -163,12 +163,12 @@ annotations, but you will need to re-load previously loaded projects to get the BUILD_VERSION=38 # can be 37 or 38 # Update clinvar - docker-compose exec pipeline-runner rm -rf "/seqr-reference-data/GRCh${BUILD_VERSION}/clinvar.GRCh${BUILD_VERSION}.ht" - docker-compose exec pipeline-runner gsutil rsync -r "gs://seqr-reference-data/GRCh${BUILD_VERSION}/clinvar/clinvar.GRCh${BUILD_VERSION}.ht" "/seqr-reference-data/GRCh${BUILD_VERSION}/clinvar.GRCh${BUILD_VERSION}.ht" + docker compose exec pipeline-runner rm -rf "/seqr-reference-data/GRCh${BUILD_VERSION}/clinvar.GRCh${BUILD_VERSION}.ht" + docker compose exec pipeline-runner gsutil rsync -r "gs://seqr-reference-data/GRCh${BUILD_VERSION}/clinvar/clinvar.GRCh${BUILD_VERSION}.ht" "/seqr-reference-data/GRCh${BUILD_VERSION}/clinvar.GRCh${BUILD_VERSION}.ht" # Update all other reference data - docker-compose exec pipeline-runner rm -rf "/seqr-reference-data/GRCh${BUILD_VERSION}/combined_reference_data_grch${BUILD_VERSION}.ht" - docker-compose exec pipeline-runner gsutil rsync -r "gs://seqr-reference-data/GRCh${BUILD_VERSION}/all_reference_data/combined_reference_data_grch${BUILD_VERSION}.ht" "/seqr-reference-data/GRCh${BUILD_VERSION}/combined_reference_data_grch${BUILD_VERSION}.ht" + docker compose exec pipeline-runner rm -rf "/seqr-reference-data/GRCh${BUILD_VERSION}/combined_reference_data_grch${BUILD_VERSION}.ht" + docker compose exec pipeline-runner gsutil rsync -r "gs://seqr-reference-data/GRCh${BUILD_VERSION}/all_reference_data/combined_reference_data_grch${BUILD_VERSION}.ht" "/seqr-reference-data/GRCh${BUILD_VERSION}/combined_reference_data_grch${BUILD_VERSION}.ht" ``` 1. run the loading command in the pipeline-runner container. Adjust the arguments as needed @@ -179,7 +179,7 @@ annotations, but you will need to re-load previously loaded projects to get the INPUT_FILE_PATH=${FILE_PATH}/${FILENAME} - docker-compose exec pipeline-runner load_data.sh $BUILD_VERSION $SAMPLE_TYPE $INDEX_NAME $INPUT_FILE_PATH + docker compose exec pipeline-runner load_data.sh $BUILD_VERSION $SAMPLE_TYPE $INDEX_NAME $INPUT_FILE_PATH ``` diff --git a/test_local_deployment.sh b/test_local_deployment.sh index e6d38d908f..b964a2e235 100755 --- a/test_local_deployment.sh +++ b/test_local_deployment.sh @@ -3,15 +3,15 @@ set -ex # Due to travis filesystem issues, need to explicitly grant permissions for the volume mount from the container -# This is not required to use docker-compose locally, only for testing -docker-compose up -d elasticsearch -docker-compose exec -T elasticsearch chmod 777 ./data +# This is not required to use docker compose locally, only for testing +docker compose up -d elasticsearch +docker compose exec -T elasticsearch chmod 777 ./data -docker-compose up -d seqr -docker-compose logs postgres -docker-compose logs elasticsearch -docker-compose logs redis -docker-compose exec -T seqr curl elasticsearch:9200 +docker compose up -d seqr +docker compose logs postgres +docker compose logs elasticsearch +docker compose logs redis +docker compose exec -T seqr curl elasticsearch:9200 sleep 30 -docker-compose logs seqr -echo -ne 'testpassword\n' docker-compose exec -T seqr python manage.py createsuperuser --username test --email test@test.com +docker compose logs seqr +echo -ne 'testpassword\n' docker compose exec -T seqr python manage.py createsuperuser --username test --email test@test.com