diff --git a/seqr/urls.py b/seqr/urls.py index 09d104074a..d7ef5b2fec 100644 --- a/seqr/urls.py +++ b/seqr/urls.py @@ -134,7 +134,7 @@ from seqr.views.apis.awesomebar_api import awesomebar_autocomplete_handler from seqr.views.apis.auth_api import login_required_error, login_view, logout_view, policies_required_error from seqr.views.apis.igv_api import fetch_igv_track, receive_igv_table_handler, update_individual_igv_sample, \ - igv_genomes_proxy + igv_genomes_proxy, receive_bulk_igv_table_handler from seqr.views.apis.analysis_group_api import update_analysis_group_handler, delete_analysis_group_handler from seqr.views.apis.project_api import create_project_handler, update_project_handler, delete_project_handler, \ project_page_data, project_families, project_overview, project_mme_submisssions, project_individuals, \ @@ -324,6 +324,7 @@ 'data_management/validate_callset': validate_callset, 'data_management/loaded_projects/(?P[^/]+)/(?P[^/]+)': get_loaded_projects, 'data_management/load_data': load_data, + 'data_management/add_igv': receive_bulk_igv_table_handler, 'summary_data/saved_variants/(?P[^/]+)': saved_variants_page, 'summary_data/hpo/(?P[^/]+)': hpo_summary_data, diff --git a/seqr/views/apis/igv_api.py b/seqr/views/apis/igv_api.py index 34f48fb986..58fc92c1f0 100644 --- a/seqr/views/apis/igv_api.py +++ b/seqr/views/apis/igv_api.py @@ -8,12 +8,12 @@ from seqr.models import Individual, IgvSample from seqr.utils.file_utils import file_iter, does_file_exist, is_google_bucket_file_path, run_command, get_google_project from seqr.utils.redis_utils import safe_redis_get_json, safe_redis_set_json -from seqr.views.utils.file_utils import save_uploaded_file +from seqr.views.utils.file_utils import save_uploaded_file, load_uploaded_file from seqr.views.utils.json_to_orm_utils import get_or_create_model_from_json from seqr.views.utils.json_utils import create_json_response from seqr.views.utils.orm_to_json_utils import get_json_for_sample from seqr.views.utils.permissions_utils import get_project_and_check_permissions, check_project_permissions, \ - login_and_policies_required, pm_or_data_manager_required + login_and_policies_required, pm_or_data_manager_required, get_project_guids_user_can_view GS_STORAGE_ACCESS_CACHE_KEY = 'gs_storage_access_cache_entry' GS_STORAGE_URL = 'https://storage.googleapis.com' @@ -22,51 +22,49 @@ 'gs': GS_STORAGE_URL, } -@pm_or_data_manager_required -def receive_igv_table_handler(request, project_guid): - project = get_project_and_check_permissions(project_guid, request.user, can_edit=True) - info = [] - def _process_alignment_records(rows, **kwargs): - invalid_row = next((row for row in rows if not 2 <= len(row) <= 3), None) - if invalid_row: - raise ValueError("Must contain 2 or 3 columns: " + ', '.join(invalid_row)) - parsed_records = defaultdict(list) - for row in rows: - parsed_records[row[0]].append({'filePath': row[1], 'sampleId': row[2] if len(row)> 2 else None}) - return parsed_records +def _process_alignment_records(rows, num_id_cols=1, **kwargs): + num_cols = num_id_cols + 1 + invalid_row = next((row for row in rows if not num_cols <= len(row) <= num_cols+1), None) + if invalid_row: + raise ValueError(f"Must contain {num_cols} or {num_cols+1} columns: {', '.join(invalid_row)}") + parsed_records = defaultdict(list) + for row in rows: + row_id = row[0] if num_id_cols == 1 else tuple(row[:num_id_cols]) + parsed_records[row_id].append({'filePath': row[num_id_cols], 'sampleId': row[num_cols] if len(row) > num_cols else None}) + return parsed_records - try: - uploaded_file_id, filename, individual_dataset_mapping = save_uploaded_file(request, process_records=_process_alignment_records) - matched_individuals = Individual.objects.filter(family__project=project, individual_id__in=individual_dataset_mapping.keys()) - unmatched_individuals = set(individual_dataset_mapping.keys()) - {i.individual_id for i in matched_individuals} - if len(unmatched_individuals) > 0: - raise Exception('The following Individual IDs do not exist: {}'.format(", ".join(unmatched_individuals))) +def _process_igv_table_handler(parse_uploaded_file, get_valid_matched_individuals): + info = [] - info.append('Parsed {} rows in {} individuals from {}'.format( - sum([len(rows) for rows in individual_dataset_mapping.values()]), len(individual_dataset_mapping), filename)) + try: + uploaded_file_id, filename, individual_dataset_mapping = parse_uploaded_file() - existing_sample_files = defaultdict(set) - for sample in IgvSample.objects.select_related('individual').filter(individual__in=matched_individuals): - existing_sample_files[sample.individual.individual_id].add(sample.file_path) + matched_individuals = get_valid_matched_individuals(individual_dataset_mapping) - unchanged_rows = set() - for individual_id, updates in individual_dataset_mapping.items(): - unchanged_rows.update([ - (individual_id, update['filePath']) for update in updates - if update['filePath'] in existing_sample_files[individual_id] - ]) + message = f'Parsed {sum([len(rows) for rows in individual_dataset_mapping.values()])} rows in {len(matched_individuals)} individuals' + if filename: + message += f' from {filename}' + info.append(message) - if unchanged_rows: - info.append('No change detected for {} rows'.format(len(unchanged_rows))) + existing_sample_files = defaultdict(set) + for sample in IgvSample.objects.select_related('individual').filter(individual__in=matched_individuals.keys()): + existing_sample_files[sample.individual].add(sample.file_path) + num_unchanged_rows = 0 all_updates = [] - for i in matched_individuals: - all_updates += [ - dict(individualGuid=i.guid, individualId=i.individual_id, **update) for update in individual_dataset_mapping[i.individual_id] - if (i.individual_id, update['filePath']) not in unchanged_rows + for individual, updates in matched_individuals.items(): + changed_updates = [ + dict(individualGuid=individual.guid, individualId=individual.individual_id, **update) + for update in updates + if update['filePath'] not in existing_sample_files[individual] ] + all_updates += changed_updates + num_unchanged_rows += len(updates) - len(changed_updates) + + if num_unchanged_rows: + info.append('No change detected for {} rows'.format(num_unchanged_rows)) except Exception as e: return create_json_response({'errors': [str(e)]}, status=400) @@ -75,11 +73,58 @@ def _process_alignment_records(rows, **kwargs): 'updates': all_updates, 'uploadedFileId': uploaded_file_id, 'errors': [], + 'warnings': [], 'info': info, } return create_json_response(response) +@pm_or_data_manager_required +def receive_igv_table_handler(request, project_guid): + project = get_project_and_check_permissions(project_guid, request.user, can_edit=True) + + def _get_valid_matched_individuals(individual_dataset_mapping): + matched_individuals = Individual.objects.filter( + family__project=project, individual_id__in=individual_dataset_mapping.keys() + ) + unmatched_individuals = set(individual_dataset_mapping.keys()) - {i.individual_id for i in matched_individuals} + if len(unmatched_individuals) > 0: + raise Exception('The following Individual IDs do not exist: {}'.format(", ".join(unmatched_individuals))) + + return {i: individual_dataset_mapping[i.individual_id] for i in matched_individuals} + + return _process_igv_table_handler( + lambda: save_uploaded_file(request, process_records=_process_alignment_records), + _get_valid_matched_individuals, + ) + + +@pm_or_data_manager_required +def receive_bulk_igv_table_handler(request): + def _parse_uploaded_file(): + uploaded_file_id = json.loads(request.body).get('mappingFile', {}).get('uploadedFileId') + if not uploaded_file_id: + raise ValueError('No file uploaded') + records = _process_alignment_records(load_uploaded_file(uploaded_file_id), num_id_cols=2) + return uploaded_file_id, None, records + + def _get_valid_matched_individuals(individual_dataset_mapping): + individuals = Individual.objects.filter( + family__project__guid__in=get_project_guids_user_can_view(request.user, limit_data_manager=False), + family__project__name__in={k[0] for k in individual_dataset_mapping.keys()}, + individual_id__in={k[1] for k in individual_dataset_mapping.keys()}, + ).select_related('family__project') + individuals_by_project_id = {(i.family.project.name, i.individual_id): i for i in individuals} + unmatched = set(individual_dataset_mapping.keys()) - set(individuals_by_project_id.keys()) + if len(unmatched) > 0: + raise Exception( + f'The following Individuals do not exist: {", ".join([f"{i} ({p})" for p, i in sorted(unmatched)])}') + + return {v: individual_dataset_mapping[k] for k, v in individuals_by_project_id.items() if individual_dataset_mapping[k]} + + return _process_igv_table_handler(_parse_uploaded_file, _get_valid_matched_individuals) + + SAMPLE_TYPE_MAP = [ ('bam', IgvSample.SAMPLE_TYPE_ALIGNMENT), ('cram', IgvSample.SAMPLE_TYPE_ALIGNMENT), diff --git a/seqr/views/apis/igv_api_tests.py b/seqr/views/apis/igv_api_tests.py index 575ebb4999..b24f09a8e0 100644 --- a/seqr/views/apis/igv_api_tests.py +++ b/seqr/views/apis/igv_api_tests.py @@ -6,7 +6,7 @@ from django.core.files.uploadedfile import SimpleUploadedFile from django.urls.base import reverse from seqr.views.apis.igv_api import fetch_igv_track, receive_igv_table_handler, update_individual_igv_sample, \ - igv_genomes_proxy + igv_genomes_proxy, receive_bulk_igv_table_handler from seqr.views.apis.igv_api import GS_STORAGE_ACCESS_CACHE_KEY from seqr.views.utils.test_utils import AuthenticationTestCase @@ -118,8 +118,9 @@ def test_receive_alignment_table_handler(self): self.assertEqual(response.status_code, 200) response_json = response.json() - self.assertSetEqual(set(response_json.keys()), {'uploadedFileId', 'errors', 'info', 'updates'}) + self.assertSetEqual(set(response_json.keys()), {'uploadedFileId', 'errors', 'warnings', 'info', 'updates'}) self.assertListEqual(response_json['errors'], []) + self.assertListEqual(response_json['warnings'], []) self.assertListEqual( response_json['info'], ['Parsed 3 rows in 2 individuals from samples.csv', 'No change detected for 1 rows']) self.assertListEqual(sorted(response_json['updates'], key=lambda o: o['individualGuid']), [ @@ -132,6 +133,67 @@ def test_receive_alignment_table_handler(self): response = self.client.post(url, data={'f': f}) self.assertEqual(response.status_code, 200) + @mock.patch('seqr.views.apis.igv_api.load_uploaded_file') + def test_receive_bulk_alignment_table_handler(self, mock_load_uploaded_file): + url = reverse(receive_bulk_igv_table_handler) + self.check_pm_login(url) + + # Send invalid requests + response = self.client.post(url, content_type='application/json', data=json.dumps({})) + self.assertEqual(response.status_code, 400) + self.assertDictEqual(response.json(), {'errors': ['No file uploaded']}) + + uploaded_file_id = 'test_file_id' + request_data = json.dumps({'mappingFile': {'uploadedFileId': uploaded_file_id}}) + pm_projects_rows = [ + ['1kg project nåme with uniçøde', 'NA19675_1', 'gs://readviz/batch_10.dcr.bed.gz', 'NA19675'], + ['1kg project nåme with uniçøde', 'NA19675_1', 'gs://readviz/NA19675_1.bam'], + ['1kg project nåme with uniçøde', 'NA20870', 'gs://readviz/NA20870.cram'], + ['Test Reprocessed Project', 'NA20885', 'gs://readviz/NA20885.cram'], + ] + rows = pm_projects_rows + [['Non-Analyst Project', 'NA21234', 'gs://readviz/NA21234.cram']] + mock_load_uploaded_file.return_value = [['NA19675']] + rows + response = self.client.post(url, content_type='application/json', data=request_data) + self.assertEqual(response.status_code, 400) + self.assertDictEqual(response.json(), {'errors': ['Must contain 3 or 4 columns: NA19675']}) + + mock_load_uploaded_file.return_value = rows + [ + ['Non-project', 'NA19675_1', 'gs://readviz/NA19679.bam'], + ['1kg project nåme with uniçøde', 'NA19675', 'gs://readviz/batch_10.dcr.bed.gz'], + ] + response = self.client.post(url, content_type='application/json', data=request_data) + self.assertEqual(response.status_code, 400) + self.assertDictEqual(response.json(), {'errors': [ + 'The following Individuals do not exist: NA19675 (1kg project nåme with uniçøde), NA21234 (Non-Analyst Project), NA19675_1 (Non-project)']}) + + # Send valid request + mock_load_uploaded_file.return_value = pm_projects_rows + response = self.client.post(url, content_type='application/json', data=request_data) + self.assertEqual(response.status_code, 200) + + response_json = response.json() + self.assertSetEqual(set(response_json.keys()), {'uploadedFileId', 'errors', 'warnings', 'info', 'updates'}) + self.assertListEqual(response_json['errors'], []) + self.assertListEqual(response_json['warnings'], []) + self.assertListEqual(response_json['info'], ['Parsed 4 rows in 3 individuals', 'No change detected for 1 rows']) + updates = [ + {'individualGuid': 'I000001_na19675', 'individualId': 'NA19675_1', 'filePath': 'gs://readviz/batch_10.dcr.bed.gz', 'sampleId': 'NA19675'}, + {'individualGuid': 'I000001_na19675', 'individualId': 'NA19675_1', 'filePath': 'gs://readviz/NA19675_1.bam', 'sampleId': None}, + {'individualGuid': 'I000015_na20885', 'individualId': 'NA20885', 'filePath': 'gs://readviz/NA20885.cram', 'sampleId': None}, + ] + self.assertListEqual(sorted(response_json['updates'], key=lambda o: o['individualGuid']), updates) + + # test data manager access + self.login_data_manager_user() + mock_load_uploaded_file.return_value = rows + response = self.client.post(url, content_type='application/json', data=request_data) + self.assertEqual(response.status_code, 200) + response_json = response.json() + self.assertListEqual(response_json['info'], ['Parsed 5 rows in 4 individuals', 'No change detected for 1 rows']) + self.assertListEqual(sorted(response_json['updates'], key=lambda o: o['individualGuid']), updates + [ + {'individualGuid': 'I000018_na21234', 'individualId': 'NA21234', 'filePath': 'gs://readviz/NA21234.cram', 'sampleId': None} + ]) + @mock.patch('seqr.utils.file_utils.subprocess.Popen') @mock.patch('seqr.utils.file_utils.os.path.isfile') def test_add_alignment_sample(self, mock_local_file_exists, mock_subprocess): diff --git a/ui/pages/DataManagement/DataManagement.jsx b/ui/pages/DataManagement/DataManagement.jsx index 6cb2401ed5..3bfe0378fc 100644 --- a/ui/pages/DataManagement/DataManagement.jsx +++ b/ui/pages/DataManagement/DataManagement.jsx @@ -7,6 +7,7 @@ import { getUser, getElasticsearchEnabled } from 'redux/selectors' import { Error404, Error401 } from 'shared/components/page/Errors' import { SimplePageHeader } from 'shared/components/page/PageHeaderLayout' +import AddIGV from './components/AddIGV' import ElasticsearchStatus from './components/ElasticsearchStatus' import LoadData from './components/LoadData' import RnaSeq from './components/RnaSeq' @@ -19,6 +20,7 @@ const IFRAME_STYLE = { position: 'fixed', left: '0', top: '95px' } const PM_DATA_MANAGEMENT_PAGES = [ { path: 'load_data', component: LoadData }, + { path: 'add_igv', component: AddIGV }, ] const DATA_MANAGEMENT_PAGES = [ diff --git a/ui/pages/DataManagement/components/AddIGV.jsx b/ui/pages/DataManagement/components/AddIGV.jsx new file mode 100644 index 0000000000..03b4f755a0 --- /dev/null +++ b/ui/pages/DataManagement/components/AddIGV.jsx @@ -0,0 +1,44 @@ +import React from 'react' +import { connect } from 'react-redux' +import { List, Segment } from 'semantic-ui-react' + +import FileUploadField, { validateUploadedFile } from 'shared/components/form/XHRUploaderField' +import UploadFormPage from 'shared/components/page/UploadFormPage' + +import { getIgvUploadStats } from '../selectors' +import { addIgv } from '../reducers' + +const mapStateToProps = state => ({ + fields: [ + { + name: 'mappingFile', + validate: validateUploadedFile, + component: FileUploadField, + dropzoneLabel: ( + + Upload a file with desired IGV tracks. Include one row per track. + For merged RNA tracks, include one row for coverage and one for junctions. +
+ Columns are as follows: +
+ + Project + Individual ID + IGV Track File Path + + Optional: Sample ID if different from Individual ID. + Used primarily for gCNV files to identify the sample in the batch path + + +
+ ), + }, + ], + uploadStats: getIgvUploadStats(state), +}) + +const mapDispatchToProps = { + onSubmit: addIgv, +} + +export default connect(mapStateToProps, mapDispatchToProps)(UploadFormPage) diff --git a/ui/pages/DataManagement/reducers.js b/ui/pages/DataManagement/reducers.js index 7cc15fd611..ba672bfef0 100644 --- a/ui/pages/DataManagement/reducers.js +++ b/ui/pages/DataManagement/reducers.js @@ -9,6 +9,7 @@ const RECEIVE_ELASTICSEARCH_STATUS = 'RECEIVE_ELASTICSEARCH_STATUS' const RECEIVE_PIPELINE_UPLOAD_STATS = 'RECEIVE_PIPELINE_UPLOAD_STATS' const RECEIVE_RNA_SEQ_UPLOAD_STATS = 'RECEIVE_RNA_SEQ_UPLOAD_STATS' const RECEIVE_PHE_PRI_UPLOAD_STATS = 'RECEIVE_PHE_PRI_UPLOAD_STATS' +const RECEIVE_IGV_UPLOAD_STATS = 'RECEIVE_IGV_UPLOAD_STATS' const REQUEST_ALL_USERS = 'REQUEST_ALL_USERS' const RECEIVE_ALL_USERS = 'RECEIVE_ALL_USERS' @@ -53,29 +54,49 @@ export const uploadQcPipelineOutput = values => submitRequest( export const deleteEsIndex = index => submitRequest('delete_index', RECEIVE_ELASTICSEARCH_STATUS, { index }) -export const uploadRnaSeq = values => (dispatch) => { +const loadMultipleData = (path, getUpdateData, dispatchType, formatSuccessMessage) => values => (dispatch) => { let successResponseJson = null return new HttpRequestHelper( - '/api/data_management/update_rna_seq', + `/api/data_management/${path}`, (responseJson) => { successResponseJson = responseJson }, ).post(values).then(() => { - const { info, warnings, sampleGuids, fileName } = successResponseJson + const { info, warnings } = successResponseJson let numLoaded = 0 - return Promise.all(sampleGuids.map(sampleGuid => new HttpRequestHelper( - `/api/data_management/load_rna_seq_sample/${sampleGuid}`, - () => { - numLoaded += 1 - }, - e => warnings.push(`Error loading ${sampleGuid}: ${e.body && e.body.error ? e.body.error : e.message}`), - ).post({ fileName, dataType: values.dataType }))).then(() => { - info.push(`Successfully loaded data for ${numLoaded} RNA-seq samples`) - dispatch({ type: RECEIVE_RNA_SEQ_UPLOAD_STATS, newValue: { info, warnings } }) + return Promise.all(getUpdateData(successResponseJson, values).map( + ([entityUrl, entityId, body]) => new HttpRequestHelper( + entityUrl, + () => { + numLoaded += 1 + }, + e => warnings.push(`Error loading ${entityId}: ${e.body && e.body.error ? e.body.error : e.message}`), + ).post(body), + )).then(() => { + info.push(formatSuccessMessage(numLoaded)) + dispatch({ type: dispatchType, newValue: { info, warnings } }) }) }) } +export const uploadRnaSeq = loadMultipleData( + 'update_rna_seq', + ({ sampleGuids, fileName }, { dataType }) => sampleGuids.map(sampleGuid => ([ + `/api/data_management/load_rna_seq_sample/${sampleGuid}`, sampleGuid, { fileName, dataType }, + ])), + RECEIVE_RNA_SEQ_UPLOAD_STATS, + numLoaded => `Successfully loaded data for ${numLoaded} RNA-seq samples`, +) + +export const addIgv = loadMultipleData( + 'add_igv', + ({ updates }) => updates.map(({ individualGuid, individualId, ...update }) => ([ + `/api/individual/${individualGuid}/update_igv_sample`, individualId, update, + ])), + RECEIVE_IGV_UPLOAD_STATS, + numLoaded => `Successfully added IGV tracks for ${numLoaded} samples`, +) + export const uploadPhenotypePrioritization = values => submitRequest( 'load_phenotype_prioritization_data', RECEIVE_PHE_PRI_UPLOAD_STATS, values, ) @@ -86,6 +107,7 @@ export const reducers = { qcUploadStats: createSingleValueReducer(RECEIVE_PIPELINE_UPLOAD_STATS, {}), rnaSeqUploadStats: createSingleValueReducer(RECEIVE_RNA_SEQ_UPLOAD_STATS, {}), phePriUploadStats: createSingleValueReducer(RECEIVE_PHE_PRI_UPLOAD_STATS, {}), + igvUploadStats: createSingleValueReducer(RECEIVE_IGV_UPLOAD_STATS, {}), allUsers: createSingleValueReducer(RECEIVE_ALL_USERS, [], 'users'), allUsersLoading: loadingReducer(REQUEST_ALL_USERS, RECEIVE_ALL_USERS), } diff --git a/ui/pages/DataManagement/selectors.js b/ui/pages/DataManagement/selectors.js index 2944714d68..1ebcbfe56e 100644 --- a/ui/pages/DataManagement/selectors.js +++ b/ui/pages/DataManagement/selectors.js @@ -2,6 +2,7 @@ export const getElasticsearchStatusLoading = state => state.elasticsearchStatusL export const getElasticsearchStatusData = state => state.elasticsearchStatus export const getQcUploadStats = state => state.qcUploadStats export const getRnaSeqUploadStats = state => state.rnaSeqUploadStats +export const getIgvUploadStats = state => state.igvUploadStats export const getAllUsersLoading = state => state.allUsersLoading.isLoading export const getAllUsers = state => state.allUsers export const getPhePriUploadStats = state => state.phePriUploadStats