Skip to content

Commit

Permalink
Merge pull request #3984 from broadinstitute/dont-skip-gtex
Browse files Browse the repository at this point in the history
do not allow unneccessary GTEX data in rna upload
  • Loading branch information
hanars authored Mar 19, 2024
2 parents e083b1c + 5d4bf80 commit 499d67d
Show file tree
Hide file tree
Showing 3 changed files with 2 additions and 8 deletions.
2 changes: 0 additions & 2 deletions seqr/management/tests/load_rna_seq_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,7 @@ def test_tpm(self, mock_utils_logger):
'NA19675_D2\t1kg project nåme with uniçøde\t\tENSG00000240361\t12.6\t\n',
'NA19675_D2\t1kg project nåme with uniçøde\t\tENSG00000233750\t1.26\t\n',
'NA19678_D1\t1kg project nåme with uniçøde\t\tENSG00000233750\t 6.04\twhole_blood\n',
'GTEX-001\t1kg project nåme with uniçøde\t\tENSG00000240361\t3.1\tinvalid\n',
'NA19677\t1kg project nåme with uniçøde\t\tENSG00000233750\t5.31\tmuscle\n',
'GTEX-001\t1kg project nåme with uniçøde\t\tENSG00000233750\t7.8\tmuscle\n',
'NA19678\tTest Reprocessed Project\t\tENSG00000240361\t0.2\twhole_blood\n',
],
unmatched_samples='NA19677, NA19678, NA19678_D1',
Expand Down
2 changes: 0 additions & 2 deletions seqr/views/apis/data_manager_api_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -708,8 +708,6 @@ def test_kibana_proxy(self):
['NA19675_D2', '1kg project nåme with uniçøde', 'ENSG00000233750', 'NA19675_D2', 'muscle', 0.0],
# no matched individual NA19675_D3
['NA19675_D3', '1kg project nåme with uniçøde', 'ENSG00000233750', 'NA19675_D3', 'fibroblasts', 0.064],
# skip GTEX samples
['GTEX_001', '1kg project nåme with uniçøde', 'ENSG00000233750', 'NA19675_D3', 'whole_blood', 1.95],
# a different project sample NA20888
['NA20888', 'Test Reprocessed Project', 'ENSG00000240361', 'NA20888', 'muscle', 0.112],
# a project mismatched sample NA20878
Expand Down
6 changes: 2 additions & 4 deletions seqr/views/utils/dataset_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ def _add_splice_rank(sample_data_rows):
'tpm': {
'model_class': RnaSeqTpm,
'columns': TPM_HEADER_COLS,
'additional_kwargs': {'should_skip': lambda row: row[SAMPLE_ID_COL].startswith('GTEX')},
'additional_kwargs': {},
},
'splice_outlier': {
'model_class': RnaSeqSpliceOutlier,
Expand Down Expand Up @@ -331,7 +331,7 @@ def _validate_rna_header(header, column_map):
def _load_rna_seq_file(
file_path, user, potential_loaded_samples, update_sample_models, save_sample_data, get_matched_sample,
column_map, mapping_file=None, allow_missing_gene=False, ignore_extra_samples=False,
should_skip=None, format_fields=None,
format_fields=None,
):

sample_id_to_individual_id_mapping = {}
Expand All @@ -349,8 +349,6 @@ def _load_rna_seq_file(
gene_ids = set()
for line in tqdm(parsed_f, unit=' rows'):
row = dict(zip(header, line))
if should_skip and should_skip(row):
continue

row_dict = {mapped_key: row[col] for mapped_key, col in column_map.items()}
for mapped_key, format_func in (format_fields or {}).items():
Expand Down

0 comments on commit 499d67d

Please sign in to comment.