diff --git a/seqr/management/commands/load_rna_seq.py b/seqr/management/commands/load_rna_seq.py index fb3d7ab205..8aff327956 100644 --- a/seqr/management/commands/load_rna_seq.py +++ b/seqr/management/commands/load_rna_seq.py @@ -38,4 +38,4 @@ def _save_sample_data(self, sample_guid, data_by_gene): sample = Sample.objects.get(guid=sample_guid) models = self.model_cls.objects.bulk_create( [self.model_cls(sample=sample, **data) for data in data_by_gene.values()], batch_size=1000) - logger.info(f'create {len(models)} {self.model_cls} for {sample.sample_id}') + logger.info(f'create {len(models)} {self.model_cls.__name__} for {sample.sample_id}') diff --git a/seqr/management/commands/load_rna_seq_outlier.py b/seqr/management/commands/load_rna_seq_outlier.py deleted file mode 100644 index 49d2d7091b..0000000000 --- a/seqr/management/commands/load_rna_seq_outlier.py +++ /dev/null @@ -1,38 +0,0 @@ -from django.core.management.base import BaseCommand -import logging - -from seqr.models import RnaSeqOutlier, Sample -from seqr.views.utils.dataset_utils import load_rna_seq_outlier -from seqr.views.utils.file_utils import parse_file - -logger = logging.getLogger(__name__) - -class Command(BaseCommand): - help = 'Load RNA-Seq Outlier data' - - def add_arguments(self, parser): - parser.add_argument('input_file') - parser.add_argument('--mapping-file') - parser.add_argument('--ignore-extra-samples', action='store_true') - - def handle(self, *args, **options): - mapping_file = None - if options['mapping_file']: - with open(options['mapping_file']) as f: - mapping_file = parse_file(options['mapping_file'], f) - - sample_guids, _, _ = load_rna_seq_outlier( - options['input_file'], self._save_sample_data, lambda *args: {}, create_models_before_save=True, - mapping_file=mapping_file, ignore_extra_samples=options['ignore_extra_samples']) - - Sample.bulk_update(user=None, update_json={'is_active': True}, guid__in=sample_guids) - - @staticmethod - def _save_sample_data(sample_guid, data_by_gene): - sample = Sample.objects.get(guid=sample_guid) - models = RnaSeqOutlier.objects.bulk_create( - [RnaSeqOutlier(sample=sample, **data) for data in data_by_gene.values()]) - logger.info(f'create {len(models)} RnaSeqOutliers for {sample.sample_id}') - - - diff --git a/seqr/management/tests/load_rna_seq_outlier_tests.py b/seqr/management/tests/load_rna_seq_outlier_tests.py deleted file mode 100644 index 703b1c029f..0000000000 --- a/seqr/management/tests/load_rna_seq_outlier_tests.py +++ /dev/null @@ -1,63 +0,0 @@ -# -*- coding: utf-8 -*- -import mock - -from django.core.management import call_command -from django.test import TestCase - -from seqr.models import Sample, RnaSeqOutlier -from seqr.utils.middleware import ErrorsWarningsException - -RNA_FILE_ID = 'tmp_-_2021-03-01T00:00:00_-_test_data_manager_-_new_muscle_samples.tsv.gz' -EXISTING_SAMPLE_GUID = 'S000152_na19675_d2' - -class LoadRnaSeqTest(TestCase): - databases = '__all__' - fixtures = ['users', '1kg_project', 'reference_data'] - - @mock.patch('seqr.management.commands.load_rna_seq_outlier.logger.info') - @mock.patch('seqr.management.commands.load_rna_seq_outlier.open') - @mock.patch('seqr.utils.file_utils.gzip.open') - def test_command(self, mock_gzip_open, mock_open, mock_logger): - mock_gzip_file = mock_gzip_open.return_value.__enter__.return_value - mock_gzip_file.__iter__.return_value = ['invalid\theader'] - - with self.assertRaises(ValueError) as e: - call_command('load_rna_seq_outlier', RNA_FILE_ID) - self.assertEqual(str(e.exception), 'Invalid file: missing column(s): geneID, pValue, padjust, project, sampleID, tissue, zScore') - - mock_gzip_file.__iter__.return_value = [ - 'sampleID\tproject\tgeneID\tdetail\tpValue\tpadjust\tzScore\ttissue\n', - 'NA19675_D2\t1kg project nåme with uniçøde\tENSG00000240361\tdetail1\t0.01\t0.13\t-3.1\tmuscle\n', - 'NA19675_D2\t1kg project nåme with uniçøde\tENSG00000240361\tdetail2\t0.01\t0.13\t-3.1\tmuscle\n', - 'NA19675_D2\t1kg project nåme with uniçøde\tENSG00000233750\tdetail1\t0.064\t0.0000057\t7.8\tmuscle\n', - 'NA19675_D3\t1kg project nåme with uniçøde\tENSG00000233750\tdetail1\t0.064\t0.0000057\t7.8\tmuscle\n', - 'NA19675_D4\t1kg project nåme with uniçøde\tENSG00000233750\tdetail1\t0.064\t0.0000057\t7.8\tmuscle\n', - ] - mock_open.return_value.__enter__.return_value.__iter__.return_value = ['NA19675_D4\tNA19678'] - - with self.assertRaises(ErrorsWarningsException) as e: - call_command('load_rna_seq_outlier', RNA_FILE_ID) - self.assertEqual(e.exception.errors, ['Unable to find matches for the following samples: NA19675_D3, NA19675_D4']) - - with self.assertRaises(ErrorsWarningsException) as e: - call_command('load_rna_seq_outlier', RNA_FILE_ID, '--mapping-file', 'map.tsv') - self.assertEqual(e.exception.errors, ['Unable to find matches for the following samples: NA19675_D3']) - - call_command('load_rna_seq_outlier', RNA_FILE_ID, '--ignore-extra-samples') - - rna_samples = Sample.objects.filter(individual_id=1, sample_id='NA19675_D2', sample_type='RNA') - self.assertEqual(len(rna_samples), 1) - sample = rna_samples.first() - self.assertEqual(sample.guid, EXISTING_SAMPLE_GUID) - self.assertTrue(sample.is_active) - self.assertIsNone(sample.elasticsearch_index) - self.assertEqual(sample.data_source, 'new_muscle_samples.tsv.gz') - self.assertEqual(sample.tissue_type, 'M') - - models = RnaSeqOutlier.objects.all() - self.assertEqual(models.count(), 2) - self.assertSetEqual({model.sample for model in models}, {sample}) - self.assertListEqual(list(models.values_list('gene_id', 'p_adjust', 'p_value', 'z_score')), [ - ('ENSG00000240361', 0.13, 0.01, -3.1), ('ENSG00000233750', 0.0000057, 0.064, 7.8), - ]) - mock_logger.assert_called_with('create 2 RnaSeqOutliers for NA19675_D2') diff --git a/seqr/management/tests/load_rna_seq_tests.py b/seqr/management/tests/load_rna_seq_tests.py index ce533d2901..f7f618c7df 100644 --- a/seqr/management/tests/load_rna_seq_tests.py +++ b/seqr/management/tests/load_rna_seq_tests.py @@ -96,4 +96,58 @@ def test_tpm(self, mock_open, mock_logger, mock_utils_logger, mock_gzip_open): self.assertEqual(models.values('sample').distinct().count(), 2) mock_logger.info.assert_has_calls([ mock.call('create 1 RnaSeqTpm for NA19678_D1'), + mock.call('DONE'), + ]) + + @mock.patch('seqr.management.commands.load_rna_seq.logger.info') + @mock.patch('seqr.management.commands.load_rna_seq.open') + @mock.patch('seqr.utils.file_utils.gzip.open') + def test_outlier(self, mock_gzip_open, mock_open, mock_logger): + mock_gzip_file = mock_gzip_open.return_value.__enter__.return_value + mock_gzip_file.__iter__.return_value = ['invalid\theader'] + + with self.assertRaises(ValueError) as e: + call_command('load_rna_seq', 'outlier', RNA_FILE_ID) + self.assertEqual(str(e.exception), + 'Invalid file: missing column(s): geneID, pValue, padjust, project, sampleID, tissue, zScore') + + mock_gzip_file.__iter__.return_value = [ + 'sampleID\tproject\tgeneID\tdetail\tpValue\tpadjust\tzScore\ttissue\n', + 'NA19675_D2\t1kg project nåme with uniçøde\tENSG00000240361\tdetail1\t0.01\t0.13\t-3.1\tmuscle\n', + 'NA19675_D2\t1kg project nåme with uniçøde\tENSG00000240361\tdetail2\t0.01\t0.13\t-3.1\tmuscle\n', + 'NA19675_D2\t1kg project nåme with uniçøde\tENSG00000233750\tdetail1\t0.064\t0.0000057\t7.8\tmuscle\n', + 'NA19675_D3\t1kg project nåme with uniçøde\tENSG00000233750\tdetail1\t0.064\t0.0000057\t7.8\tmuscle\n', + 'NA19675_D4\t1kg project nåme with uniçøde\tENSG00000233750\tdetail1\t0.064\t0.0000057\t7.8\tmuscle\n', + ] + mock_open.return_value.__enter__.return_value.__iter__.return_value = ['NA19675_D4\tNA19678'] + + with self.assertRaises(ErrorsWarningsException) as e: + call_command('load_rna_seq', 'outlier', RNA_FILE_ID) + self.assertEqual(e.exception.errors, + ['Unable to find matches for the following samples: NA19675_D3, NA19675_D4']) + + with self.assertRaises(ErrorsWarningsException) as e: + call_command('load_rna_seq', 'outlier', RNA_FILE_ID, '--mapping-file', 'map.tsv') + self.assertEqual(e.exception.errors, ['Unable to find matches for the following samples: NA19675_D3']) + + call_command('load_rna_seq', 'outlier', RNA_FILE_ID, '--ignore-extra-samples') + + rna_samples = Sample.objects.filter(individual_id=1, sample_id='NA19675_D2', sample_type='RNA') + self.assertEqual(len(rna_samples), 1) + sample = rna_samples.first() + self.assertEqual(sample.guid, EXISTING_SAMPLE_GUID) + self.assertTrue(sample.is_active) + self.assertIsNone(sample.elasticsearch_index) + #self.assertEqual(sample.data_source, 'new_muscle_samples.tsv.gz') TODO? + self.assertEqual(sample.tissue_type, 'M') + + models = RnaSeqOutlier.objects.all() + self.assertEqual(models.count(), 2) + self.assertSetEqual({model.sample for model in models}, {sample}) + self.assertListEqual(list(models.values_list('gene_id', 'p_adjust', 'p_value', 'z_score')), [ + ('ENSG00000240361', 0.13, 0.01, -3.1), ('ENSG00000233750', 0.0000057, 0.064, 7.8), + ]) + mock_logger.assert_has_calls([ + mock.call('create 2 RnaSeqOutlier for NA19675_D2'), + mock.call('DONE'), ])