diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index 9fb4c5ca8c..c5eeb13378 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -250,10 +250,12 @@ def _load_filtered_table(self, sample_data, intervals=None, **kwargs): def _get_table_path(cls, path, use_ssd_dir=False): return f'{SSD_DATASETS_DIR if use_ssd_dir else DATASETS_DIR}/{cls.GENOME_VERSION}/{cls.DATA_TYPE}/{path}' - def _read_table(self, path, drop_globals=None, use_ssd_dir=False): + def _read_table(self, path, drop_globals=None, use_ssd_dir=False, skip_missing_field=None): table_path = self._get_table_path(path, use_ssd_dir=use_ssd_dir) if 'variant_ht' in self._load_table_kwargs: ht = self._query_table_annotations(self._load_table_kwargs['variant_ht'], table_path) + if skip_missing_field and not ht.any(hl.is_defined(ht[skip_missing_field])): + return None ht_globals = hl.read_table(table_path).globals if drop_globals: ht_globals = ht_globals.drop(*drop_globals) @@ -275,11 +277,17 @@ def _parse_sample_data(self, sample_data): logger.info(f'Loading {self.DATA_TYPE} data for {len(family_samples)} families in {len(project_samples)} projects') return project_samples, family_samples - def _load_filtered_project_hts(self, project_samples, **kwargs): + def _load_filtered_project_hts(self, project_samples, skip_all_missing=False, **kwargs): filtered_project_hts = [] exception_messages = set() - for project_guid, project_sample_data in project_samples.items(): - project_ht = self._read_table(f'projects/{project_guid}.ht', use_ssd_dir=True) + for i, (project_guid, project_sample_data) in enumerate(project_samples.items()): + project_ht = self._read_table( + f'projects/{project_guid}.ht', + use_ssd_dir=True, + skip_missing_field='entries' if skip_all_missing or i > 0 else None, + ) + if project_ht is None: + continue try: filtered_project_hts.append(self._filter_entries_table(project_ht, project_sample_data, **kwargs)) except HTTPBadRequest as e: @@ -960,10 +968,9 @@ def lookup_variant(self, variant_id, sample_data=None): if sample_data: project_samples, _ = self._parse_sample_data(sample_data) - for pht, _ in self._load_filtered_project_hts(project_samples): + for pht, _ in self._load_filtered_project_hts(project_samples, skip_all_missing=True): project_entries = pht.aggregate(hl.agg.take(hl.struct(**{k: v(pht) for k, v in entry_annotations.items()}), 1)) - if project_entries: - variant[FAMILY_GUID_FIELD] += project_entries[0][FAMILY_GUID_FIELD] - variant[GENOTYPES_FIELD].update(project_entries[0][GENOTYPES_FIELD]) + variant[FAMILY_GUID_FIELD] += project_entries[0][FAMILY_GUID_FIELD] + variant[GENOTYPES_FIELD].update(project_entries[0][GENOTYPES_FIELD]) return variant diff --git a/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py b/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py index 0fd14e370d..25c46f8f53 100644 --- a/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py +++ b/seqr/management/tests/check_for_new_samples_from_pipeline_tests.py @@ -116,8 +116,8 @@ def test_command(self, mock_logger, mock_send_email, mock_send_slack, mock_subpr ]) mock_logger.warining.assert_not_called() - mock_redis.return_value.delete.assert_called_with('search_results__*') - mock_utils_logger.info.assert_called_with('Reset 1 cached results') + mock_redis.return_value.delete.assert_called_with('search_results__*', 'variant_lookup_results__*') + mock_utils_logger.info.assert_called_with('Reset 2 cached results') # Tests Sample models created/updated updated_sample_models = Sample.objects.filter(guid__in={ diff --git a/seqr/management/tests/reset_cached_search_results_tests.py b/seqr/management/tests/reset_cached_search_results_tests.py index 7e218f2a98..509d77be23 100644 --- a/seqr/management/tests/reset_cached_search_results_tests.py +++ b/seqr/management/tests/reset_cached_search_results_tests.py @@ -23,7 +23,7 @@ def setUpTestData(cls): @mock.patch('seqr.views.utils.variant_utils.logger') @mock.patch('seqr.management.commands.reset_cached_search_results.logger') def test_command(self, mock_command_logger, mock_utils_logger, mock_redis): - mock_redis.return_value.keys.side_effect = lambda pattern: [pattern] + mock_redis.return_value.keys.side_effect = lambda pattern: [pattern] if pattern != 'variant_lookup_results__*' else [] # Test command with a --project argument call_command('reset_cached_search_results', '--project={}'.format(PROJECT_NAME)) @@ -47,9 +47,10 @@ def test_command(self, mock_command_logger, mock_utils_logger, mock_redis): # Test command for reset metadata mock_redis.reset_mock() + mock_redis.return_value.keys.side_effect = lambda pattern: [pattern] call_command('reset_cached_search_results', '--reset-index-metadata') - mock_redis.return_value.delete.assert_called_with('search_results__*', 'index_metadata__*') - mock_utils_logger.info.assert_called_with('Reset 2 cached results') + mock_redis.return_value.delete.assert_called_with('search_results__*', 'variant_lookup_results__*', 'index_metadata__*') + mock_utils_logger.info.assert_called_with('Reset 3 cached results') mock_command_logger.info.assert_called_with('Reset cached search results for all projects') # Test with connection error diff --git a/seqr/utils/search/hail_search_utils.py b/seqr/utils/search/hail_search_utils.py index 2de11cb137..c4b700e2ed 100644 --- a/seqr/utils/search/hail_search_utils.py +++ b/seqr/utils/search/hail_search_utils.py @@ -2,7 +2,7 @@ from django.db.models import F, Min, Count import requests -from reference_data.models import Omim, GeneConstraint, GENOME_VERSION_LOOKUP, GENOME_VERSION_GRCh38 +from reference_data.models import Omim, GeneConstraint, GENOME_VERSION_LOOKUP from seqr.models import Sample, PhenotypePrioritization from seqr.utils.search.constants import PRIORITIZED_GENE_SORT, X_LINKED_RECESSIVE from seqr.utils.xpos_utils import MIN_POS, MAX_POS @@ -74,9 +74,8 @@ def get_hail_variants_for_variant_ids(samples, genome_version, parsed_variant_id return response_json['results'] -def hail_variant_lookup(user, variant_id, genome_version=None, samples=None, **kwargs): +def hail_variant_lookup(user, variant_id, samples=None, **kwargs): body = { - 'genome_version': GENOME_VERSION_LOOKUP[genome_version or GENOME_VERSION_GRCh38], 'variant_id': variant_id, **kwargs, } diff --git a/seqr/utils/search/search_utils_tests.py b/seqr/utils/search/search_utils_tests.py index 1dd1f1c847..ab649e0834 100644 --- a/seqr/utils/search/search_utils_tests.py +++ b/seqr/utils/search/search_utils_tests.py @@ -29,8 +29,8 @@ def set_up(self): def set_cache(self, cached): self.mock_redis.get.return_value = json.dumps(cached) - def assert_cached_results(self, expected_results, sort='xpos'): - cache_key = f'search_results__{self.results_model.guid}__{sort}' + def assert_cached_results(self, expected_results, sort='xpos', cache_key=None): + cache_key = cache_key or f'search_results__{self.results_model.guid}__{sort}' self.mock_redis.set.assert_called_with(cache_key, mock.ANY) self.assertEqual(json.loads(self.mock_redis.set.call_args.args[1]), expected_results) self.mock_redis.expire.assert_called_with(cache_key, timedelta(weeks=2)) @@ -55,11 +55,13 @@ def test_variant_lookup(self, mock_variant_lookup): mock_variant_lookup.return_value = VARIANT_LOOKUP_VARIANT variant = variant_lookup(self.user, '1-10439-AC-A', genome_version='38') self.assertDictEqual(variant, VARIANT_LOOKUP_VARIANT) - mock_variant_lookup.assert_called_with(self.user, ('1', 10439, 'AC', 'A'), genome_version='38') + mock_variant_lookup.assert_called_with(self.user, ('1', 10439, 'AC', 'A'), genome_version='GRCh38') + cache_key = 'variant_lookup_results__1-10439-AC-A__38__test_user' + self.assert_cached_results(variant, cache_key=cache_key) variant = variant_lookup(self.user, '1-10439-AC-A', genome_version='37', families=self.families) self.assertDictEqual(variant, VARIANT_LOOKUP_VARIANT) - mock_variant_lookup.assert_called_with(self.user, ('1', 10439, 'AC', 'A'), genome_version='37', samples=mock.ANY) + mock_variant_lookup.assert_called_with(self.user, ('1', 10439, 'AC', 'A'), genome_version='GRCh37', samples=mock.ANY) expected_samples = {s for s in self.search_samples if s.guid not in NON_SNP_INDEL_SAMPLES} self.assertSetEqual(set(mock_variant_lookup.call_args.kwargs['samples']), expected_samples) @@ -67,6 +69,13 @@ def test_variant_lookup(self, mock_variant_lookup): variant_lookup(self.user, '100-10439-AC-A') self.assertEqual(str(cm.exception), 'Invalid variant 100-10439-AC-A') + mock_variant_lookup.reset_mock() + self.set_cache(variant) + cached_variant = variant_lookup(self.user, '1-10439-AC-A', genome_version='38') + self.assertDictEqual(variant, cached_variant) + mock_variant_lookup.assert_not_called() + self.mock_redis.get.assert_called_with(cache_key) + def test_get_single_variant(self, mock_get_variants_for_ids): mock_get_variants_for_ids.return_value = [PARSED_VARIANTS[0]] variant = get_single_variant(self.families, '2-103343353-GAGA-G', user=self.user) diff --git a/seqr/utils/search/utils.py b/seqr/utils/search/utils.py index bfa72a590b..b69a760b40 100644 --- a/seqr/utils/search/utils.py +++ b/seqr/utils/search/utils.py @@ -2,6 +2,7 @@ from copy import deepcopy from datetime import timedelta +from reference_data.models import GENOME_VERSION_LOOKUP, GENOME_VERSION_GRCh38 from seqr.models import Sample, Individual, Project from seqr.utils.redis_utils import safe_redis_get_json, safe_redis_set_json from seqr.utils.search.constants import XPOS_SORT_KEY, PRIORITIZED_GENE_SORT, RECESSIVE, COMPOUND_HET, \ @@ -152,7 +153,13 @@ def _get_variants_for_variant_ids(families, variant_ids, user, dataset_type=None ) -def variant_lookup(user, variant_id, families=None, **kwargs): +def variant_lookup(user, variant_id, families=None, genome_version=None, **kwargs): + genome_version = genome_version or GENOME_VERSION_GRCh38 + cache_key = f'variant_lookup_results__{variant_id}__{genome_version}__{user}' + variant = safe_redis_get_json(cache_key) + if variant: + return variant + parsed_variant_id = _parse_variant_id(variant_id) if not parsed_variant_id: raise InvalidSearchException(f'Invalid variant {variant_id}') @@ -162,7 +169,9 @@ def variant_lookup(user, variant_id, families=None, **kwargs): kwargs['samples'] = samples lookup_func = backend_specific_call(_raise_search_error('Hail backend is disabled'), hail_variant_lookup) - return lookup_func(user, parsed_variant_id, **kwargs) + variant = lookup_func(user, parsed_variant_id, genome_version=GENOME_VERSION_LOOKUP[genome_version], **kwargs) + safe_redis_set_json(cache_key, variant, expire=timedelta(weeks=2)) + return variant def _get_search_cache_key(search_model, sort=None): diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py index 738bf33426..188b7c7a06 100644 --- a/seqr/views/apis/data_manager_api_tests.py +++ b/seqr/views/apis/data_manager_api_tests.py @@ -859,6 +859,13 @@ def _set_file_iter_stdout(rows): mock_file_iter.stdout = [('\t'.join([str(col) for col in row]) + '\n').encode() for row in rows] mock_subprocess.side_effect = [mock_does_file_exist, mock_file_iter] + _set_file_iter_stdout([]) + invalid_body = {**body, 'file': body['file'].replace('tsv', 'xlsx')} + response = self.client.post(url, content_type='application/json', data=json.dumps(invalid_body)) + self.assertEqual(response.status_code, 400) + self.assertDictEqual( + response.json(), {'error': 'Unexpected iterated file type: gs://rna_data/muscle_samples.xlsx'}) + _set_file_iter_stdout([['']]) response = self.client.post(url, content_type='application/json', data=json.dumps(body)) self.assertEqual(response.status_code, 400) diff --git a/seqr/views/utils/dataset_utils.py b/seqr/views/utils/dataset_utils.py index 8497c1df25..03e0ec36d5 100644 --- a/seqr/views/utils/dataset_utils.py +++ b/seqr/views/utils/dataset_utils.py @@ -20,6 +20,9 @@ logger = SeqrLogger(__name__) +MAX_UNSAVED_DATA_PER_SAMPLE = 5000 + + def load_mapping_file(mapping_file_path, user): file_content = parse_file(mapping_file_path, file_iter(mapping_file_path, user=user)) return load_mapping_file_content(file_content) @@ -389,7 +392,8 @@ def _load_rna_seq_file( continue if current_sample != sample_guid: - if len(samples_by_guid[current_sample]) > 5000: + # If a large amount of data has been parsed for the previous sample, save and do not keep in memory + if len(samples_by_guid[current_sample]) > MAX_UNSAVED_DATA_PER_SAMPLE: save_sample_data(current_sample, samples_by_guid[current_sample]) del samples_by_guid[current_sample] current_sample = sample_guid diff --git a/seqr/views/utils/file_utils.py b/seqr/views/utils/file_utils.py index ffe76d2df9..3b816ac4c8 100644 --- a/seqr/views/utils/file_utils.py +++ b/seqr/views/utils/file_utils.py @@ -68,7 +68,7 @@ def parse_file(filename, stream, iter_file=False): elif filename.endswith('.json') and not iter_file: return json.loads(stream.read()) - raise ValueError("Unexpected file type: {}".format(filename)) + raise ValueError(f"Unexpected{' iterated' if iter_file else ''} file type: {filename}") def _parse_excel_string_cell(cell): diff --git a/seqr/views/utils/orm_to_json_utils_tests.py b/seqr/views/utils/orm_to_json_utils_tests.py index 718c558d13..52af8585f5 100644 --- a/seqr/views/utils/orm_to_json_utils_tests.py +++ b/seqr/views/utils/orm_to_json_utils_tests.py @@ -155,7 +155,7 @@ def test_json_for_saved_variants_with_tags(self): self.assertSetEqual(set(var_2['noteGuids']), set(v2_note_guids)) self.assertSetEqual(set(json['variantTagsByGuid'].keys()), v1_tag_guids | v2_tag_guids) - self.assertSetEqual(set(next(iter(json['variantTagsByGuid'].values())).keys()), TAG_FIELDS) + self.assertSetEqual(set(json['variantTagsByGuid']['VT1726961_2103343353_r0390_100'].keys()), TAG_FIELDS) for tag_guid in v1_tag_guids: self.assertListEqual(json['variantTagsByGuid'][tag_guid]['variantGuids'], [variant_guid_1]) for tag_guid in v2_tag_guids: diff --git a/seqr/views/utils/variant_utils.py b/seqr/views/utils/variant_utils.py index a6ac84dd9e..d6bb0ae7b9 100644 --- a/seqr/views/utils/variant_utils.py +++ b/seqr/views/utils/variant_utils.py @@ -88,6 +88,7 @@ def reset_cached_search_results(project, reset_index_metadata=False): keys_to_delete += redis_client.keys(pattern='search_results__{}*'.format(guid)) else: keys_to_delete = redis_client.keys(pattern='search_results__*') + keys_to_delete += redis_client.keys(pattern='variant_lookup_results__*') if reset_index_metadata: keys_to_delete += redis_client.keys(pattern='index_metadata__*') if keys_to_delete: diff --git a/ui/shared/components/panel/variants/Annotations.jsx b/ui/shared/components/panel/variants/Annotations.jsx index a32b23aa99..d18b23f3b9 100644 --- a/ui/shared/components/panel/variants/Annotations.jsx +++ b/ui/shared/components/panel/variants/Annotations.jsx @@ -7,6 +7,7 @@ import { Popup, Label, Icon } from 'semantic-ui-react' import { getGenesById, + getTranscriptsById, getLocusListIntervalsByChromProject, getOmimIntervalsByChrom, getFamiliesByGuid, @@ -218,6 +219,21 @@ const getLitSearch = (genes, variations) => { return search } +const shouldShowNonDefaultTranscriptInfoIcon = (variant, transcript, transcriptsById) => { + const allVariantTranscripts = Object.values(variant.transcripts)?.flat() || [] + const canonical = allVariantTranscripts.find(t => t.canonical) || null + const mane = allVariantTranscripts.find( + t => transcriptsById[t.transcriptId]?.isManeSelect || false, + ) || null + + const result = canonical !== null && + mane !== null && + transcript.transcriptId !== canonical.transcriptId && + transcript.transcriptId !== mane.transcriptId + + return result +} + const VARIANT_LINKS = [ { name: 'gnomAD', @@ -417,7 +433,7 @@ const svSizeDisplay = (size) => { return `${(size / 1000000).toFixed(2) / 1}Mb` } -const Annotations = React.memo(({ variant, mainGeneId, showMainGene }) => { +const Annotations = React.memo(({ variant, mainGeneId, showMainGene, transcriptsById }) => { const { rsid, svType, numExon, pos, end, svTypeDetail, svSourceDetail, cpxIntervals, algorithms, bothsidesSupport, endChrom, @@ -457,32 +473,51 @@ const Annotations = React.memo(({ variant, mainGeneId, showMainGene }) => { return (
{(mainTranscript.majorConsequence || svType) && ( - - {svType ? (SVTYPE_LOOKUP[svType] || svType) : mainTranscript.majorConsequence.replace(/_/g, ' ')} - {svType && (svTypeDetail || svSourceDetail) && ( - } - content={ -
- {(SVTYPE_DETAILS[svType] || {})[svTypeDetail] || svTypeDetail || ''} - {svTypeDetail &&
} - {svSourceDetail && `Inserted from chr${svSourceDetail.chrom}`} -
- } - position="top center" - /> - )} - - } - popup={transcriptPopupProps} - > - -
+
+ + {svType ? (SVTYPE_LOOKUP[svType] || svType) : mainTranscript.majorConsequence.replace(/_/g, ' ')} + {svType && (svTypeDetail || svSourceDetail) && ( + } + content={ +
+ {(SVTYPE_DETAILS[svType] || {})[svTypeDetail] || svTypeDetail || ''} + {svTypeDetail &&
} + {svSourceDetail && `Inserted from chr${svSourceDetail.chrom}`} +
+ } + position="top center" + /> + )} + + } + popup={transcriptPopupProps} + > + +
+ + {shouldShowNonDefaultTranscriptInfoIcon(variant, mainTranscript, transcriptsById) && ( + + } + content={ +
+ This transcript is neither the Gencode Canonical transcript nor the MANE transcript. + It has been selected by seqr as it has the most severe consequence for the variant + given your search parameters. + Click on the consequence to see alternate transcripts which may have other consequences. +
+ } + position="top left" + /> +
+ )} +
)} {svType && end && !endChrom && end !== pos && ( @@ -602,6 +637,11 @@ Annotations.propTypes = { variant: PropTypes.object, mainGeneId: PropTypes.string, showMainGene: PropTypes.bool, + transcriptsById: PropTypes.object.isRequired, } -export default Annotations +const mapAnnotationsStateToProps = state => ({ + transcriptsById: getTranscriptsById(state), +}) + +export default connect(mapAnnotationsStateToProps)(Annotations) diff --git a/ui/shared/components/panel/variants/Annotations.test.js b/ui/shared/components/panel/variants/Annotations.test.js index b15da8a70e..b11315cdde 100644 --- a/ui/shared/components/panel/variants/Annotations.test.js +++ b/ui/shared/components/panel/variants/Annotations.test.js @@ -1,13 +1,15 @@ import React from 'react' import { shallow, configure } from 'enzyme' import Adapter from '@wojtekmaj/enzyme-adapter-react-17' -import { getUser } from 'redux/selectors' +import configureStore from 'redux-mock-store' import Annotations from './Annotations' -import { VARIANT, SV_VARIANT } from '../fixtures' +import { STATE1, VARIANT, SV_VARIANT } from '../fixtures' configure({ adapter: new Adapter() }) test('shallow-render without crashing', () => { - shallow() - shallow() + const store = configureStore()(STATE1) + + shallow() + shallow() })