Skip to content

Commit

Permalink
Merge pull request #3463 from broadinstitute/bulk-load-igv
Browse files Browse the repository at this point in the history
Bulk load igv
  • Loading branch information
hanars authored Jul 11, 2023
2 parents fe6e943 + d1441ab commit 55da63e
Show file tree
Hide file tree
Showing 7 changed files with 229 additions and 52 deletions.
3 changes: 2 additions & 1 deletion seqr/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@
from seqr.views.apis.awesomebar_api import awesomebar_autocomplete_handler
from seqr.views.apis.auth_api import login_required_error, login_view, logout_view, policies_required_error
from seqr.views.apis.igv_api import fetch_igv_track, receive_igv_table_handler, update_individual_igv_sample, \
igv_genomes_proxy
igv_genomes_proxy, receive_bulk_igv_table_handler
from seqr.views.apis.analysis_group_api import update_analysis_group_handler, delete_analysis_group_handler
from seqr.views.apis.project_api import create_project_handler, update_project_handler, delete_project_handler, \
project_page_data, project_families, project_overview, project_mme_submisssions, project_individuals, \
Expand Down Expand Up @@ -324,6 +324,7 @@
'data_management/validate_callset': validate_callset,
'data_management/loaded_projects/(?P<sample_type>[^/]+)/(?P<dataset_type>[^/]+)': get_loaded_projects,
'data_management/load_data': load_data,
'data_management/add_igv': receive_bulk_igv_table_handler,

'summary_data/saved_variants/(?P<tag>[^/]+)': saved_variants_page,
'summary_data/hpo/(?P<hpo_id>[^/]+)': hpo_summary_data,
Expand Down
119 changes: 82 additions & 37 deletions seqr/views/apis/igv_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@
from seqr.models import Individual, IgvSample
from seqr.utils.file_utils import file_iter, does_file_exist, is_google_bucket_file_path, run_command, get_google_project
from seqr.utils.redis_utils import safe_redis_get_json, safe_redis_set_json
from seqr.views.utils.file_utils import save_uploaded_file
from seqr.views.utils.file_utils import save_uploaded_file, load_uploaded_file
from seqr.views.utils.json_to_orm_utils import get_or_create_model_from_json
from seqr.views.utils.json_utils import create_json_response
from seqr.views.utils.orm_to_json_utils import get_json_for_sample
from seqr.views.utils.permissions_utils import get_project_and_check_permissions, check_project_permissions, \
login_and_policies_required, pm_or_data_manager_required
login_and_policies_required, pm_or_data_manager_required, get_project_guids_user_can_view

GS_STORAGE_ACCESS_CACHE_KEY = 'gs_storage_access_cache_entry'
GS_STORAGE_URL = 'https://storage.googleapis.com'
Expand All @@ -22,51 +22,49 @@
'gs': GS_STORAGE_URL,
}

@pm_or_data_manager_required
def receive_igv_table_handler(request, project_guid):
project = get_project_and_check_permissions(project_guid, request.user, can_edit=True)
info = []

def _process_alignment_records(rows, **kwargs):
invalid_row = next((row for row in rows if not 2 <= len(row) <= 3), None)
if invalid_row:
raise ValueError("Must contain 2 or 3 columns: " + ', '.join(invalid_row))
parsed_records = defaultdict(list)
for row in rows:
parsed_records[row[0]].append({'filePath': row[1], 'sampleId': row[2] if len(row)> 2 else None})
return parsed_records
def _process_alignment_records(rows, num_id_cols=1, **kwargs):
num_cols = num_id_cols + 1
invalid_row = next((row for row in rows if not num_cols <= len(row) <= num_cols+1), None)
if invalid_row:
raise ValueError(f"Must contain {num_cols} or {num_cols+1} columns: {', '.join(invalid_row)}")
parsed_records = defaultdict(list)
for row in rows:
row_id = row[0] if num_id_cols == 1 else tuple(row[:num_id_cols])
parsed_records[row_id].append({'filePath': row[num_id_cols], 'sampleId': row[num_cols] if len(row) > num_cols else None})
return parsed_records

try:
uploaded_file_id, filename, individual_dataset_mapping = save_uploaded_file(request, process_records=_process_alignment_records)

matched_individuals = Individual.objects.filter(family__project=project, individual_id__in=individual_dataset_mapping.keys())
unmatched_individuals = set(individual_dataset_mapping.keys()) - {i.individual_id for i in matched_individuals}
if len(unmatched_individuals) > 0:
raise Exception('The following Individual IDs do not exist: {}'.format(", ".join(unmatched_individuals)))
def _process_igv_table_handler(parse_uploaded_file, get_valid_matched_individuals):
info = []

info.append('Parsed {} rows in {} individuals from {}'.format(
sum([len(rows) for rows in individual_dataset_mapping.values()]), len(individual_dataset_mapping), filename))
try:
uploaded_file_id, filename, individual_dataset_mapping = parse_uploaded_file()

existing_sample_files = defaultdict(set)
for sample in IgvSample.objects.select_related('individual').filter(individual__in=matched_individuals):
existing_sample_files[sample.individual.individual_id].add(sample.file_path)
matched_individuals = get_valid_matched_individuals(individual_dataset_mapping)

unchanged_rows = set()
for individual_id, updates in individual_dataset_mapping.items():
unchanged_rows.update([
(individual_id, update['filePath']) for update in updates
if update['filePath'] in existing_sample_files[individual_id]
])
message = f'Parsed {sum([len(rows) for rows in individual_dataset_mapping.values()])} rows in {len(matched_individuals)} individuals'
if filename:
message += f' from {filename}'
info.append(message)

if unchanged_rows:
info.append('No change detected for {} rows'.format(len(unchanged_rows)))
existing_sample_files = defaultdict(set)
for sample in IgvSample.objects.select_related('individual').filter(individual__in=matched_individuals.keys()):
existing_sample_files[sample.individual].add(sample.file_path)

num_unchanged_rows = 0
all_updates = []
for i in matched_individuals:
all_updates += [
dict(individualGuid=i.guid, individualId=i.individual_id, **update) for update in individual_dataset_mapping[i.individual_id]
if (i.individual_id, update['filePath']) not in unchanged_rows
for individual, updates in matched_individuals.items():
changed_updates = [
dict(individualGuid=individual.guid, individualId=individual.individual_id, **update)
for update in updates
if update['filePath'] not in existing_sample_files[individual]
]
all_updates += changed_updates
num_unchanged_rows += len(updates) - len(changed_updates)

if num_unchanged_rows:
info.append('No change detected for {} rows'.format(num_unchanged_rows))

except Exception as e:
return create_json_response({'errors': [str(e)]}, status=400)
Expand All @@ -75,11 +73,58 @@ def _process_alignment_records(rows, **kwargs):
'updates': all_updates,
'uploadedFileId': uploaded_file_id,
'errors': [],
'warnings': [],
'info': info,
}
return create_json_response(response)


@pm_or_data_manager_required
def receive_igv_table_handler(request, project_guid):
project = get_project_and_check_permissions(project_guid, request.user, can_edit=True)

def _get_valid_matched_individuals(individual_dataset_mapping):
matched_individuals = Individual.objects.filter(
family__project=project, individual_id__in=individual_dataset_mapping.keys()
)
unmatched_individuals = set(individual_dataset_mapping.keys()) - {i.individual_id for i in matched_individuals}
if len(unmatched_individuals) > 0:
raise Exception('The following Individual IDs do not exist: {}'.format(", ".join(unmatched_individuals)))

return {i: individual_dataset_mapping[i.individual_id] for i in matched_individuals}

return _process_igv_table_handler(
lambda: save_uploaded_file(request, process_records=_process_alignment_records),
_get_valid_matched_individuals,
)


@pm_or_data_manager_required
def receive_bulk_igv_table_handler(request):
def _parse_uploaded_file():
uploaded_file_id = json.loads(request.body).get('mappingFile', {}).get('uploadedFileId')
if not uploaded_file_id:
raise ValueError('No file uploaded')
records = _process_alignment_records(load_uploaded_file(uploaded_file_id), num_id_cols=2)
return uploaded_file_id, None, records

def _get_valid_matched_individuals(individual_dataset_mapping):
individuals = Individual.objects.filter(
family__project__guid__in=get_project_guids_user_can_view(request.user, limit_data_manager=False),
family__project__name__in={k[0] for k in individual_dataset_mapping.keys()},
individual_id__in={k[1] for k in individual_dataset_mapping.keys()},
).select_related('family__project')
individuals_by_project_id = {(i.family.project.name, i.individual_id): i for i in individuals}
unmatched = set(individual_dataset_mapping.keys()) - set(individuals_by_project_id.keys())
if len(unmatched) > 0:
raise Exception(
f'The following Individuals do not exist: {", ".join([f"{i} ({p})" for p, i in sorted(unmatched)])}')

return {v: individual_dataset_mapping[k] for k, v in individuals_by_project_id.items() if individual_dataset_mapping[k]}

return _process_igv_table_handler(_parse_uploaded_file, _get_valid_matched_individuals)


SAMPLE_TYPE_MAP = [
('bam', IgvSample.SAMPLE_TYPE_ALIGNMENT),
('cram', IgvSample.SAMPLE_TYPE_ALIGNMENT),
Expand Down
66 changes: 64 additions & 2 deletions seqr/views/apis/igv_api_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from django.core.files.uploadedfile import SimpleUploadedFile
from django.urls.base import reverse
from seqr.views.apis.igv_api import fetch_igv_track, receive_igv_table_handler, update_individual_igv_sample, \
igv_genomes_proxy
igv_genomes_proxy, receive_bulk_igv_table_handler
from seqr.views.apis.igv_api import GS_STORAGE_ACCESS_CACHE_KEY
from seqr.views.utils.test_utils import AuthenticationTestCase

Expand Down Expand Up @@ -118,8 +118,9 @@ def test_receive_alignment_table_handler(self):
self.assertEqual(response.status_code, 200)

response_json = response.json()
self.assertSetEqual(set(response_json.keys()), {'uploadedFileId', 'errors', 'info', 'updates'})
self.assertSetEqual(set(response_json.keys()), {'uploadedFileId', 'errors', 'warnings', 'info', 'updates'})
self.assertListEqual(response_json['errors'], [])
self.assertListEqual(response_json['warnings'], [])
self.assertListEqual(
response_json['info'], ['Parsed 3 rows in 2 individuals from samples.csv', 'No change detected for 1 rows'])
self.assertListEqual(sorted(response_json['updates'], key=lambda o: o['individualGuid']), [
Expand All @@ -132,6 +133,67 @@ def test_receive_alignment_table_handler(self):
response = self.client.post(url, data={'f': f})
self.assertEqual(response.status_code, 200)

@mock.patch('seqr.views.apis.igv_api.load_uploaded_file')
def test_receive_bulk_alignment_table_handler(self, mock_load_uploaded_file):
url = reverse(receive_bulk_igv_table_handler)
self.check_pm_login(url)

# Send invalid requests
response = self.client.post(url, content_type='application/json', data=json.dumps({}))
self.assertEqual(response.status_code, 400)
self.assertDictEqual(response.json(), {'errors': ['No file uploaded']})

uploaded_file_id = 'test_file_id'
request_data = json.dumps({'mappingFile': {'uploadedFileId': uploaded_file_id}})
pm_projects_rows = [
['1kg project nåme with uniçøde', 'NA19675_1', 'gs://readviz/batch_10.dcr.bed.gz', 'NA19675'],
['1kg project nåme with uniçøde', 'NA19675_1', 'gs://readviz/NA19675_1.bam'],
['1kg project nåme with uniçøde', 'NA20870', 'gs://readviz/NA20870.cram'],
['Test Reprocessed Project', 'NA20885', 'gs://readviz/NA20885.cram'],
]
rows = pm_projects_rows + [['Non-Analyst Project', 'NA21234', 'gs://readviz/NA21234.cram']]
mock_load_uploaded_file.return_value = [['NA19675']] + rows
response = self.client.post(url, content_type='application/json', data=request_data)
self.assertEqual(response.status_code, 400)
self.assertDictEqual(response.json(), {'errors': ['Must contain 3 or 4 columns: NA19675']})

mock_load_uploaded_file.return_value = rows + [
['Non-project', 'NA19675_1', 'gs://readviz/NA19679.bam'],
['1kg project nåme with uniçøde', 'NA19675', 'gs://readviz/batch_10.dcr.bed.gz'],
]
response = self.client.post(url, content_type='application/json', data=request_data)
self.assertEqual(response.status_code, 400)
self.assertDictEqual(response.json(), {'errors': [
'The following Individuals do not exist: NA19675 (1kg project nåme with uniçøde), NA21234 (Non-Analyst Project), NA19675_1 (Non-project)']})

# Send valid request
mock_load_uploaded_file.return_value = pm_projects_rows
response = self.client.post(url, content_type='application/json', data=request_data)
self.assertEqual(response.status_code, 200)

response_json = response.json()
self.assertSetEqual(set(response_json.keys()), {'uploadedFileId', 'errors', 'warnings', 'info', 'updates'})
self.assertListEqual(response_json['errors'], [])
self.assertListEqual(response_json['warnings'], [])
self.assertListEqual(response_json['info'], ['Parsed 4 rows in 3 individuals', 'No change detected for 1 rows'])
updates = [
{'individualGuid': 'I000001_na19675', 'individualId': 'NA19675_1', 'filePath': 'gs://readviz/batch_10.dcr.bed.gz', 'sampleId': 'NA19675'},
{'individualGuid': 'I000001_na19675', 'individualId': 'NA19675_1', 'filePath': 'gs://readviz/NA19675_1.bam', 'sampleId': None},
{'individualGuid': 'I000015_na20885', 'individualId': 'NA20885', 'filePath': 'gs://readviz/NA20885.cram', 'sampleId': None},
]
self.assertListEqual(sorted(response_json['updates'], key=lambda o: o['individualGuid']), updates)

# test data manager access
self.login_data_manager_user()
mock_load_uploaded_file.return_value = rows
response = self.client.post(url, content_type='application/json', data=request_data)
self.assertEqual(response.status_code, 200)
response_json = response.json()
self.assertListEqual(response_json['info'], ['Parsed 5 rows in 4 individuals', 'No change detected for 1 rows'])
self.assertListEqual(sorted(response_json['updates'], key=lambda o: o['individualGuid']), updates + [
{'individualGuid': 'I000018_na21234', 'individualId': 'NA21234', 'filePath': 'gs://readviz/NA21234.cram', 'sampleId': None}
])

@mock.patch('seqr.utils.file_utils.subprocess.Popen')
@mock.patch('seqr.utils.file_utils.os.path.isfile')
def test_add_alignment_sample(self, mock_local_file_exists, mock_subprocess):
Expand Down
2 changes: 2 additions & 0 deletions ui/pages/DataManagement/DataManagement.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import { getUser, getElasticsearchEnabled } from 'redux/selectors'
import { Error404, Error401 } from 'shared/components/page/Errors'
import { SimplePageHeader } from 'shared/components/page/PageHeaderLayout'

import AddIGV from './components/AddIGV'
import ElasticsearchStatus from './components/ElasticsearchStatus'
import LoadData from './components/LoadData'
import RnaSeq from './components/RnaSeq'
Expand All @@ -19,6 +20,7 @@ const IFRAME_STYLE = { position: 'fixed', left: '0', top: '95px' }

const PM_DATA_MANAGEMENT_PAGES = [
{ path: 'load_data', component: LoadData },
{ path: 'add_igv', component: AddIGV },
]

const DATA_MANAGEMENT_PAGES = [
Expand Down
44 changes: 44 additions & 0 deletions ui/pages/DataManagement/components/AddIGV.jsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import React from 'react'
import { connect } from 'react-redux'
import { List, Segment } from 'semantic-ui-react'

import FileUploadField, { validateUploadedFile } from 'shared/components/form/XHRUploaderField'
import UploadFormPage from 'shared/components/page/UploadFormPage'

import { getIgvUploadStats } from '../selectors'
import { addIgv } from '../reducers'

const mapStateToProps = state => ({
fields: [
{
name: 'mappingFile',
validate: validateUploadedFile,
component: FileUploadField,
dropzoneLabel: (
<Segment basic textAlign="left">
Upload a file with desired IGV tracks. Include one row per track.
For merged RNA tracks, include one row for coverage and one for junctions.
<br />
Columns are as follows:
<br />
<List ordered>
<List.Item>Project</List.Item>
<List.Item>Individual ID</List.Item>
<List.Item>IGV Track File Path</List.Item>
<List.Item>
Optional: Sample ID if different from Individual ID.
Used primarily for gCNV files to identify the sample in the batch path
</List.Item>
</List>
</Segment>
),
},
],
uploadStats: getIgvUploadStats(state),
})

const mapDispatchToProps = {
onSubmit: addIgv,
}

export default connect(mapStateToProps, mapDispatchToProps)(UploadFormPage)
Loading

0 comments on commit 55da63e

Please sign in to comment.