From e93cb3d01b51b628e3750ad25724a1e2ac40d3e9 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 11 Aug 2023 14:50:02 -0400 Subject: [PATCH 01/16] add gene counts endpoint --- hail_search/hail_search_query.py | 17 +++++++++++++++++ hail_search/search.py | 7 +++++-- hail_search/web_app.py | 4 ++++ 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/hail_search/hail_search_query.py b/hail_search/hail_search_query.py index 3c4fc4a51d..3466f5aa58 100644 --- a/hail_search/hail_search_query.py +++ b/hail_search/hail_search_query.py @@ -685,6 +685,23 @@ def _sort_order(self, ht): def _get_sort_expressions(self, ht, sort): return self.SORTS[sort](ht) + def gene_counts(self): + if self._comp_het_ht: + ht = self._comp_het_ht.explode(self._comp_het_ht[GROUPED_VARIANTS_FIELD]) + ht = ht.transmute(**ht[GROUPED_VARIANTS_FIELD]) + if self._ht: + ht = ht.join(self._ht, 'outer') + else: + ht = self._ht + + ht = ht.select( + gene_ids=hl.set(ht.sortedTranscriptConsequences.map(lambda t: t.gene_id)), + families=self.BASE_ANNOTATION_FIELDS['familyGuids'](ht), + ).explode('gene_ids').explode('families') + return ht.aggregate(hl.agg.group_by( + ht.gene_ids, hl.struct(total=hl.agg.count(), families=hl.agg.counter(ht.families)) + )) + class VariantHailTableQuery(BaseHailTableQuery): diff --git a/hail_search/search.py b/hail_search/search.py index 716aae5e7b..07db813797 100644 --- a/hail_search/search.py +++ b/hail_search/search.py @@ -1,7 +1,7 @@ from hail_search.hail_search_query import QUERY_CLASS_MAP -def search_hail_backend(request): +def search_hail_backend(request, gene_counts=False): sample_data = request.pop('sample_data', {}) data_types = list(sample_data.keys()) @@ -12,4 +12,7 @@ def search_hail_backend(request): query_cls = QUERY_CLASS_MAP[single_data_type] query = query_cls(data_type, sample_data=sample_data, **request) - return query.search() + if gene_counts: + return query.gene_counts() + else: + return query.search() diff --git a/hail_search/web_app.py b/hail_search/web_app.py index cf538cf751..d56bc331ec 100644 --- a/hail_search/web_app.py +++ b/hail_search/web_app.py @@ -14,6 +14,10 @@ def hl_json_dumps(obj): return json.dumps(obj, default=_hl_json_default) +async def gene_counts(request: web.Request) -> web.Response: + return web.json_response(search_hail_backend(await request.json(), gene_counts=True)) + + async def search(request: web.Request) -> web.Response: hail_results, total_results = search_hail_backend(await request.json()) return web.json_response({'results': hail_results, 'total': total_results}, dumps=hl_json_dumps) From 79d4589379795005f88c4d3fd647b3b965aa9d13 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 11 Aug 2023 14:51:49 -0400 Subject: [PATCH 02/16] use geen_ids helper --- hail_search/hail_search_query.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hail_search/hail_search_query.py b/hail_search/hail_search_query.py index 43db291bc5..85781b77b5 100644 --- a/hail_search/hail_search_query.py +++ b/hail_search/hail_search_query.py @@ -728,7 +728,7 @@ def gene_counts(self): ht = self._ht ht = ht.select( - gene_ids=hl.set(ht.sortedTranscriptConsequences.map(lambda t: t.gene_id)), + gene_ids=self._gene_ids_expr(ht), families=self.BASE_ANNOTATION_FIELDS['familyGuids'](ht), ).explode('gene_ids').explode('families') return ht.aggregate(hl.agg.group_by( From 6b6d844f7ed038f252f1df659965c2017398a4a7 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 11 Aug 2023 15:40:28 -0400 Subject: [PATCH 03/16] tes gene counts --- hail_search/hail_search_query.py | 24 ++++++++------ hail_search/test_search.py | 34 ++++++++++++++------ hail_search/test_utils.py | 5 +++ hail_search/web_app.py | 3 +- seqr/utils/search/hail_search_utils_tests.py | 8 ++--- seqr/utils/search/search_utils_tests.py | 13 +++----- 6 files changed, 55 insertions(+), 32 deletions(-) diff --git a/hail_search/hail_search_query.py b/hail_search/hail_search_query.py index 85781b77b5..9a2b7214ef 100644 --- a/hail_search/hail_search_query.py +++ b/hail_search/hail_search_query.py @@ -719,18 +719,24 @@ def _gene_rank_sort(cls, r, gene_ranks): return [] def gene_counts(self): + selects = { + 'gene_ids': self._gene_ids_expr, + 'families': self.BASE_ANNOTATION_FIELDS['familyGuids'], + } + ch_ht = None if self._comp_het_ht: - ht = self._comp_het_ht.explode(self._comp_het_ht[GROUPED_VARIANTS_FIELD]) - ht = ht.transmute(**ht[GROUPED_VARIANTS_FIELD]) - if self._ht: - ht = ht.join(self._ht, 'outer') + ch_ht = self._comp_het_ht.explode(self._comp_het_ht[GROUPED_VARIANTS_FIELD]) + ch_ht = ch_ht.select(**{k: v(ch_ht[GROUPED_VARIANTS_FIELD]) for k, v in selects.items()}) + + if self._ht: + ht = self._ht.select(**{k: v(self._ht) for k, v in selects.items()}) + if ch_ht: + ht = ht.join(ch_ht, 'outer') + ht = ht.transmute(**{k: hl.or_else(ht[k], ht[f'{k}_1']) for k in selects}) else: - ht = self._ht + ht = ch_ht - ht = ht.select( - gene_ids=self._gene_ids_expr(ht), - families=self.BASE_ANNOTATION_FIELDS['familyGuids'](ht), - ).explode('gene_ids').explode('families') + ht = ht.explode('gene_ids').explode('families') return ht.aggregate(hl.agg.group_by( ht.gene_ids, hl.struct(total=hl.agg.count(), families=hl.agg.counter(ht.families)) )) diff --git a/hail_search/test_search.py b/hail_search/test_search.py index b260b8d6c7..39d344909e 100644 --- a/hail_search/test_search.py +++ b/hail_search/test_search.py @@ -3,7 +3,7 @@ from hail_search.test_utils import get_hail_search_body, FAMILY_2_VARIANT_SAMPLE_DATA, FAMILY_2_MISSING_SAMPLE_DATA, \ VARIANT1, VARIANT2, VARIANT3, VARIANT4, MULTI_PROJECT_SAMPLE_DATA, MULTI_PROJECT_MISSING_SAMPLE_DATA, \ - LOCATION_SEARCH, EXCLUDE_LOCATION_SEARCH, VARIANT_ID_SEARCH, RSID_SEARCH + LOCATION_SEARCH, EXCLUDE_LOCATION_SEARCH, VARIANT_ID_SEARCH, RSID_SEARCH, GENE_COUNTS from hail_search.web_app import init_web_app PROJECT_2_VARIANT = { @@ -113,7 +113,7 @@ async def test_status(self): resp_json = await resp.json() self.assertDictEqual(resp_json, {'success': True}) - async def _assert_expected_search(self, results, **search_kwargs): + async def _assert_expected_search(self, results, gene_counts=None, **search_kwargs): search_body = get_hail_search_body(**search_kwargs) async with self.client.request('POST', '/search', json=search_body) as resp: self.assertEqual(resp.status, 200) @@ -123,20 +123,32 @@ async def _assert_expected_search(self, results, **search_kwargs): for i, result in enumerate(resp_json['results']): self.assertEqual(result, results[i]) + if gene_counts: + async with self.client.request('POST', '/gene_counts', json=search_body) as resp: + self.assertEqual(resp.status, 200) + gene_counts_json = await resp.json() + self.assertDictEqual(gene_counts_json, gene_counts) + async def test_single_family_search(self): await self._assert_expected_search( - [VARIANT1, VARIANT2, VARIANT3, VARIANT4], sample_data=FAMILY_2_VARIANT_SAMPLE_DATA, + [VARIANT1, VARIANT2, VARIANT3, VARIANT4], sample_data=FAMILY_2_VARIANT_SAMPLE_DATA, gene_counts={ + 'ENSG00000097046': {'total': 2, 'families': {'F000002_2': 2}}, + 'ENSG00000177000': {'total': 2, 'families': {'F000002_2': 2}}, + } ) async def test_single_project_search(self): await self._assert_expected_search( - [VARIANT1, VARIANT2, MULTI_FAMILY_VARIANT, VARIANT4], omit_sample_type='SV_WES', + [VARIANT1, VARIANT2, MULTI_FAMILY_VARIANT, VARIANT4], omit_sample_type='SV_WES', gene_counts={ + 'ENSG00000097046': {'total': 3, 'families': {'F000002_2': 2, 'F000003_3': 1}}, + 'ENSG00000177000': {'total': 3, 'families': {'F000002_2': 2, 'F000003_3': 1}}, + } ) async def test_multi_project_search(self): await self._assert_expected_search( [PROJECT_2_VARIANT, MULTI_PROJECT_VARIANT1, MULTI_PROJECT_VARIANT2, VARIANT3, VARIANT4], - sample_data=MULTI_PROJECT_SAMPLE_DATA, + gene_counts=GENE_COUNTS, sample_data=MULTI_PROJECT_SAMPLE_DATA, ) async def test_inheritance_filter(self): @@ -163,13 +175,17 @@ async def test_inheritance_filter(self): [VARIANT2, VARIANT3], inheritance_filter=gt_inheritance_filter, sample_data=FAMILY_2_VARIANT_SAMPLE_DATA) await self._assert_expected_search( - [[VARIANT3, VARIANT4]], inheritance_mode='compound_het', sample_data=MULTI_PROJECT_SAMPLE_DATA, - **COMP_HET_ALL_PASS_FILTERS, + [[VARIANT3, VARIANT4]], inheritance_mode='compound_het', sample_data=MULTI_PROJECT_SAMPLE_DATA, gene_counts={ + 'ENSG00000097046': {'total': 2, 'families': {'F000002_2': 2}}, + 'ENSG00000177000': {'total': 1, 'families': {'F000002_2': 1}}, + }, **COMP_HET_ALL_PASS_FILTERS, ) await self._assert_expected_search( - [PROJECT_2_VARIANT1, VARIANT2, [VARIANT3, VARIANT4]], inheritance_mode='recessive', - sample_data=MULTI_PROJECT_SAMPLE_DATA, **COMP_HET_ALL_PASS_FILTERS, + [PROJECT_2_VARIANT1, VARIANT2, [VARIANT3, VARIANT4]], inheritance_mode='recessive', gene_counts={ + 'ENSG00000097046': {'total': 2, 'families': {'F000002_2': 2}}, + 'ENSG00000177000': {'total': 2, 'families': {'F000002_2': 2}}, + }, sample_data=MULTI_PROJECT_SAMPLE_DATA, **COMP_HET_ALL_PASS_FILTERS, ) async def test_quality_filter(self): diff --git a/hail_search/test_utils.py b/hail_search/test_utils.py index 27ef225dc8..6ac29fe9ab 100644 --- a/hail_search/test_utils.py +++ b/hail_search/test_utils.py @@ -362,6 +362,11 @@ VARIANT_ID_SEARCH = {'variant_ids': [['1', 10439, 'AC', 'A'], ['1', 91511686, 'TCA', 'G']], 'rs_ids': []} RSID_SEARCH = {'variant_ids': [], 'rs_ids': ['rs1801131']} +GENE_COUNTS = { + 'ENSG00000097046': {'total': 2, 'families': {'F000002_2': 2}}, + 'ENSG00000177000': {'total': 3, 'families': {'F000002_2': 2, 'F000011_11': 1}}, +} + def get_hail_search_body(genome_version='GRCh38', num_results=100, sample_data=None, omit_sample_type=None, **search_body): sample_data = sample_data or EXPECTED_SAMPLE_DATA diff --git a/hail_search/web_app.py b/hail_search/web_app.py index d56bc331ec..8a288e64db 100644 --- a/hail_search/web_app.py +++ b/hail_search/web_app.py @@ -15,7 +15,7 @@ def hl_json_dumps(obj): async def gene_counts(request: web.Request) -> web.Response: - return web.json_response(search_hail_backend(await request.json(), gene_counts=True)) + return web.json_response(search_hail_backend(await request.json(), gene_counts=True), dumps=hl_json_dumps) async def search(request: web.Request) -> web.Response: @@ -32,5 +32,6 @@ def init_web_app(): app.add_routes([ web.get('/status', status), web.post('/search', search), + web.post('/gene_counts', gene_counts), ]) return app diff --git a/seqr/utils/search/hail_search_utils_tests.py b/seqr/utils/search/hail_search_utils_tests.py index b4dc36b882..9090249b2a 100644 --- a/seqr/utils/search/hail_search_utils_tests.py +++ b/seqr/utils/search/hail_search_utils_tests.py @@ -8,10 +8,10 @@ from seqr.models import Family from seqr.utils.search.utils import get_variant_query_gene_counts, query_variants, get_single_variant, \ get_variants_for_variant_ids, InvalidSearchException -from seqr.utils.search.search_utils_tests import SearchTestHelper, MOCK_COUNTS +from seqr.utils.search.search_utils_tests import SearchTestHelper from hail_search.test_utils import get_hail_search_body, EXPECTED_SAMPLE_DATA, FAMILY_1_SAMPLE_DATA, \ FAMILY_2_ALL_SAMPLE_DATA, ALL_AFFECTED_SAMPLE_DATA, CUSTOM_AFFECTED_SAMPLE_DATA, HAIL_BACKEND_VARIANTS, \ - LOCATION_SEARCH, EXCLUDE_LOCATION_SEARCH, VARIANT_ID_SEARCH, RSID_SEARCH + LOCATION_SEARCH, EXCLUDE_LOCATION_SEARCH, VARIANT_ID_SEARCH, RSID_SEARCH, GENE_COUNTS MOCK_HOST = 'http://test-hail-host' @@ -155,10 +155,10 @@ def test_query_variants(self): @responses.activate def test_get_variant_query_gene_counts(self): - responses.add(responses.POST, f'{MOCK_HOST}:5000/gene_counts', json=MOCK_COUNTS, status=200) + responses.add(responses.POST, f'{MOCK_HOST}:5000/gene_counts', json=GENE_COUNTS, status=200) gene_counts = get_variant_query_gene_counts(self.results_model, self.user) - self.assertDictEqual(gene_counts, MOCK_COUNTS) + self.assertDictEqual(gene_counts, GENE_COUNTS) self.assert_cached_results({'gene_aggs': gene_counts}) self._test_expected_search_call(sort=None) diff --git a/seqr/utils/search/search_utils_tests.py b/seqr/utils/search/search_utils_tests.py index afb0816f98..ccfec426da 100644 --- a/seqr/utils/search/search_utils_tests.py +++ b/seqr/utils/search/search_utils_tests.py @@ -4,17 +4,12 @@ import json import mock +from hail_search.test_utils import GENE_COUNTS from seqr.models import Family, Sample, VariantSearch, VariantSearchResults from seqr.utils.search.utils import get_single_variant, get_variants_for_variant_ids, get_variant_query_gene_counts, \ query_variants, InvalidSearchException from seqr.views.utils.test_utils import PARSED_VARIANTS, PARSED_COMPOUND_HET_VARIANTS_MULTI_PROJECT, GENE_FIELDS -MOCK_COUNTS = { - 'ENSG00000135953': {'total': 3, 'families': {'F000003_3': 2, 'F000002_2': 1, 'F000005_5': 1}}, - 'ENSG00000228198': {'total': 5, 'families': {'F000003_3': 4, 'F000002_2': 1, 'F000005_5': 1}}, - 'ENSG00000240361': {'total': 2, 'families': {'F000003_3': 2}}, -} - class SearchTestHelper(object): @@ -354,12 +349,12 @@ def test_invalid_search_get_variant_query_gene_counts(self): def test_get_variant_query_gene_counts(self, mock_get_variants): def _mock_get_variants(families, search, user, previous_search_results, genome_version, **kwargs): - previous_search_results['gene_aggs'] = MOCK_COUNTS - return MOCK_COUNTS + previous_search_results['gene_aggs'] = GENE_COUNTS + return GENE_COUNTS mock_get_variants.side_effect = _mock_get_variants gene_counts = get_variant_query_gene_counts(self.results_model, self.user) - self.assertDictEqual(gene_counts, MOCK_COUNTS) + self.assertDictEqual(gene_counts, GENE_COUNTS) results_cache = {'gene_aggs': gene_counts} self.assert_cached_results(results_cache) self._test_expected_search_call( From 0cd2e89f17132628d05a523a4d28211e43e4a439 Mon Sep 17 00:00:00 2001 From: Shifa Zhang Date: Wed, 16 Aug 2023 14:52:40 -0400 Subject: [PATCH 04/16] First draft for the new in-silico thresholds. --- .../components/panel/variants/Predictions.jsx | 43 ++++++------ ui/shared/utils/constants.js | 68 ++++++++++++++----- 2 files changed, 74 insertions(+), 37 deletions(-) diff --git a/ui/shared/components/panel/variants/Predictions.jsx b/ui/shared/components/panel/variants/Predictions.jsx index c4dcd7e5d9..300a1671d9 100644 --- a/ui/shared/components/panel/variants/Predictions.jsx +++ b/ui/shared/components/panel/variants/Predictions.jsx @@ -5,7 +5,7 @@ import { connect } from 'react-redux' import { Icon, Transition, Popup } from 'semantic-ui-react' import { getGenesById } from 'redux/selectors' -import { PREDICTOR_FIELDS, getVariantMainGeneId } from 'shared/utils/constants' +import { PREDICTOR_FIELDS, getPredictColor, getVariantMainGeneId } from 'shared/utils/constants' import { snakecaseToTitlecase } from 'shared/utils/stringUtils' import { HorizontalSpacer } from '../../Spacers' import { ButtonLink } from '../../StyledComponents' @@ -20,7 +20,7 @@ const PredictionValue = styled.span` const NUM_TO_SHOW_ABOVE_THE_FOLD = 6 // how many predictors to show immediately const predictionFieldValue = ( - predictions, { field, dangerThreshold, warningThreshold, indicatorMap, infoField, infoTitle }, + predictions, { field, pathHigher, thresholds, indicatorMap, infoField, infoTitle }, ) => { let value = predictions[field] if (value === null || value === undefined) { @@ -29,22 +29,19 @@ const predictionFieldValue = ( const infoValue = predictions[infoField] - if (dangerThreshold) { - value = parseFloat(value).toPrecision(2) - let color = 'green' - if (value >= dangerThreshold) { - color = 'red' - } else if (value >= warningThreshold) { - color = 'yellow' - } - return { value, color, infoValue, infoTitle, dangerThreshold, warningThreshold } + if (thresholds) { + value = parseFloat(value).toPrecision(3) + const color = getPredictColor(value, pathHigher, thresholds) + return { value, color, infoValue, infoTitle, pathHigher, thresholds } } return indicatorMap[value[0]] || indicatorMap[value] } +const PATHOGENIC_COLORS = ['green', 'light green', 'grey', 'yellow', 'red', 'dark red'] + const Prediction = ( - { field, fieldTitle, value, color, infoValue, infoTitle, warningThreshold, dangerThreshold, href }, + { field, fieldTitle, value, color, infoValue, infoTitle, pathHigher, thresholds, href }, ) => { const indicator = infoValue ? ( ) : const fieldName = fieldTitle || snakecaseToTitlecase(field) - const fieldDisplay = dangerThreshold ? ( + const fieldDisplay = thresholds ? ( -
{`Red > ${dangerThreshold}`}
- {warningThreshold < dangerThreshold &&
{`Yellow > ${warningThreshold}`}
} + {thresholds.map((th, i) => { + if (!th) { + return null + } + const t = pathHigher ? th : -1 * th + if (i < 3) { + return
{`${PATHOGENIC_COLORS[i]} ${pathHigher ? '<' : '>'}= ${t}`}
+ } + return
{`${PATHOGENIC_COLORS[i]} ${pathHigher ? '>' : '<'}= ${t}`}
+ }).filter(e => !!e)} } trigger={{fieldName}} @@ -85,8 +90,8 @@ Prediction.propTypes = { infoTitle: PropTypes.string, fieldTitle: PropTypes.string, color: PropTypes.string, - warningThreshold: PropTypes.number, - dangerThreshold: PropTypes.number, + pathHigher: PropTypes.bool, + thresholds: PropTypes.arrayOf(PropTypes.number), href: PropTypes.string, } @@ -116,8 +121,8 @@ class Predictions extends React.PureComponent { if (gene && gene.primateAi) { genePredictors.primate_ai = { field: 'primate_ai', - warningThreshold: gene.primateAi.percentile25, - dangerThreshold: gene.primateAi.percentile75, + pathHigher: gene.primateAi.percentile75 >= gene.primateAi.percentile25, + thresholds: [null, null, gene.primateAi.percentile25, gene.primateAi.percentile75, null], } } diff --git a/ui/shared/utils/constants.js b/ui/shared/utils/constants.js index 709b72b471..644941f16a 100644 --- a/ui/shared/utils/constants.js +++ b/ui/shared/utils/constants.js @@ -1316,16 +1316,37 @@ export const SV_IN_SILICO_GROUP = 'Structural' export const NO_SV_IN_SILICO_GROUPS = [MISSENSE_IN_SILICO_GROUP, CODING_IN_SILICO_GROUP] export const SPLICE_AI_FIELD = 'splice_ai' +const PRED_COLOR_MAP = ['green', '#90ee90', 'grey', 'yellow', 'red', '#8b0000'] + +export const getPredictColor = (value, pathHigher, thresholds) => { + let colorIndex = 0 + const v = pathHigher ? value : -1 * value + if (v <= thresholds[1]) { + if (v > thresholds[0]) { + colorIndex = 1 + } + } else { + colorIndex = 5 + if (v < thresholds[2]) { + colorIndex = 2 + } else if (v < thresholds[3]) { + colorIndex = 3 + } else if (v < thresholds[4]) { + colorIndex = 4 + } + } + return PRED_COLOR_MAP[colorIndex] +} + export const PREDICTOR_FIELDS = [ - { field: 'cadd', group: CODING_IN_SILICO_GROUP, warningThreshold: 10, dangerThreshold: 20, min: 1, max: 99 }, - { field: 'revel', group: MISSENSE_IN_SILICO_GROUP, warningThreshold: 0.5, dangerThreshold: 0.75 }, - { field: 'primate_ai', group: MISSENSE_IN_SILICO_GROUP, warningThreshold: 0.5, dangerThreshold: 0.7 }, - { field: 'mpc', group: MISSENSE_IN_SILICO_GROUP, warningThreshold: 1, dangerThreshold: 2, max: 5 }, + { field: 'cadd', group: CODING_IN_SILICO_GROUP, thresholds: [0.15, 22.7, 25.3, 28.1, null], min: 1, max: 99 }, + { field: 'revel', group: MISSENSE_IN_SILICO_GROUP, thresholds: [0.016, 0.29, 0.644, 0.773, 0.932] }, + { field: 'primate_ai', group: MISSENSE_IN_SILICO_GROUP, thresholds: [null, 0.483, 0.79, 0.867, null] }, + { field: 'mpc', group: MISSENSE_IN_SILICO_GROUP, thresholds: [null, null, 1.36, 1.828, null], max: 5 }, { field: SPLICE_AI_FIELD, group: SPLICING_IN_SILICO_GROUP, - warningThreshold: 0.5, - dangerThreshold: 0.8, + thresholds: [null, null, 0.5, 0.8, null], infoField: 'splice_ai_consequence', infoTitle: 'Predicted Consequence', fieldTitle: 'SpliceAI', @@ -1333,21 +1354,32 @@ export const PREDICTOR_FIELDS = [ `https://spliceailookup.broadinstitute.org/#variant=${chrom}-${pos}-${ref}-${alt}&hg=${genomeVersion}&distance=1000&mask=1` ), }, - { field: 'eigen', group: CODING_IN_SILICO_GROUP, warningThreshold: 1, dangerThreshold: 2, max: 99 }, - { field: 'dann', displayOnly: true, warningThreshold: 0.93, dangerThreshold: 0.96 }, - { field: 'strvctvre', group: SV_IN_SILICO_GROUP, warningThreshold: 0.5, dangerThreshold: 0.75 }, - { field: 'polyphen', group: MISSENSE_IN_SILICO_GROUP, indicatorMap: POLYPHEN_MAP }, - { field: 'sift', group: MISSENSE_IN_SILICO_GROUP, indicatorMap: INDICATOR_MAP }, + { field: 'eigen', group: CODING_IN_SILICO_GROUP, thresholds: [null, null, 1, 2, null], max: 99 }, + { field: 'dann', displayOnly: true, thresholds: [null, null, 0.93, 0.96, null] }, + { field: 'strvctvre', group: SV_IN_SILICO_GROUP, thresholds: [null, null, 0.5, 0.75, null] }, + { field: 'polyphen', group: MISSENSE_IN_SILICO_GROUP, thresholds: [null, 0.113, 0.978, 0.999, null], indicatorMap: POLYPHEN_MAP }, + { field: 'sift', group: MISSENSE_IN_SILICO_GROUP, thresholds: [null, 0.08, 0.001, 0, null], indicatorMap: INDICATOR_MAP }, { field: 'mut_taster', group: MISSENSE_IN_SILICO_GROUP, indicatorMap: MUTTASTER_MAP }, - { field: 'fathmm', group: MISSENSE_IN_SILICO_GROUP, indicatorMap: FATHMM_MAP }, - { field: 'vest', warningThreshold: 0.5, dangerThreshold: 0.764 }, - { field: 'mut_pred', warningThreshold: 0.392, dangerThreshold: 0.737 }, - { field: 'apogee', warningThreshold: 0.5, dangerThreshold: 0.5 }, - { field: 'gnomad_noncoding', fieldTitle: 'gnomAD Constraint', displayOnly: true, warningThreshold: 2.18, dangerThreshold: 4 }, + { field: 'fathmm', group: MISSENSE_IN_SILICO_GROUP, thresholds: [null, 3.32, -4.14, -5.04, null], indicatorMap: FATHMM_MAP }, + { field: 'vest', thresholds: [null, 0.449, 0.764, 0.861, 0.965] }, + { field: 'mut_pred', thresholds: [0.01, 0.391, 0.737, 0.829, 0.932] }, + { field: 'apogee', thresholds: [null, null, 0.5, 0.5, null] }, + { field: 'gnomad_noncoding', fieldTitle: 'gnomAD Constraint', displayOnly: true, thresholds: [null, null, 2.18, 4, null], warningThreshold: 2.18, dangerThreshold: 4 }, { field: 'haplogroup_defining', indicatorMap: { Y: { color: 'green', value: '' } } }, { field: 'mitotip', indicatorMap: MITOTIP_MAP }, - { field: 'hmtvar', warningThreshold: 0.35, dangerThreshold: 0.35 }, -] + { field: 'hmtvar', thresholds: [null, null, 0.35, 0.35, null] }, +].map(({ thresholds, ...pred }) => { + if (!thresholds) { + return pred + } + const noneNullThresholds = thresholds.filter(t => t) + const pathHigher = noneNullThresholds[1] >= noneNullThresholds[0] + return { + ...pred, + pathHigher, + thresholds: pathHigher ? thresholds : thresholds.map(t => (t === null ? null : -1 * t)), + } +}) export const getVariantMainGeneId = ({ transcripts = {}, mainTranscriptId, selectedMainTranscriptId }) => { if (selectedMainTranscriptId || mainTranscriptId) { From 7357125bbc080ddecea1005bc41b8692cb3277fe Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 16 Aug 2023 15:09:16 -0400 Subject: [PATCH 05/16] better validation --- seqr/views/apis/report_api.py | 57 ++++++++++++++++++++++++++++++++--- 1 file changed, 52 insertions(+), 5 deletions(-) diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index 3e1e67995c..3ff0f8e08f 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -1033,6 +1033,21 @@ def _get_experiment_lookup_row(is_rna, row_data): } +DATA_TYPE_VALIDATORS = { + 'string': ( + lambda val, validator: (not validator.get('is_bucket_path')) or val.startswith('gs://'), + lambda validator: ' are a google bucket path starting with gs://' + ), + 'enumeration': ( + lambda val, validator: val in validator['enumerations'], + lambda validator: f': {", ".join(column_validator["enumerations"])}', + ), + 'integer': (lambda val, validator: val.replace(',', '').isnumeric(), None), + 'float': (lambda val, validator: val.replace(',', '').replace('.', '').isnumeric(), None), + 'date': (lambda val, validator: bool(re.match(r'^\d{4}-\d{2}-\d{2}$', val)), None), +} + + def _validate_gregor_files(file_data): errors = [] warnings = [] @@ -1070,6 +1085,26 @@ def _validate_gregor_files(file_data): warnings.append( f'The following columns are included in the "{file_name}" data model but are missing in the report: {col_summary}' ) + invalid_data_type_columns = { + col: validator['data_type'] for col, validator in table_validator.items() + if validator.get('data_type') and validator['data_type'] not in DATA_TYPE_VALIDATORS + } + if invalid_data_type_columns: + col_summary = ', '.join(sorted([f'{col} ({data_type})' for col, data_type in invalid_data_type_columns.items()])) + warnings.append( + f'The following columns are included in the "{file_name}" data model but have an unsupported data type: {col_summary}' + ) + invalid_enum_columns = [ + col for col, validator in table_validator.items() + if validator.get('data_type') == 'enumeration' and not validator.get('enumerations') + ] + if invalid_enum_columns: + for col in invalid_enum_columns: + table_validator[col]['data_type'] = None + col_summary = ', '.join(sorted(invalid_enum_columns)) + warnings.append( + f'The following columns are specified as "enumeration" in the "{file_name}" data model but are missing the allowed values definition: {col_summary}' + ) for column in columns: _validate_column_data( @@ -1112,15 +1147,18 @@ def _has_required_table(table, validator, tables): def _validate_column_data(column, file_name, data, column_validator, warnings, errors): - enum = column_validator.get('enumerations') + data_type = column_validator.get('data_type') + data_type_validator, allowed_formatter = DATA_TYPE_VALIDATORS.get(data_type) + unique = column_validator.get('is_unique') required = column_validator.get('required') recommended = column in WARN_MISSING_TABLE_COLUMNS.get(file_name, []) - if not (required or enum or recommended): + if not (required or unique or recommended or data_type_validator): return missing = [] warn_missing = [] invalid = [] + grouped_values = defaultdict(set) for row in data: value = row.get(column) if not value: @@ -1130,9 +1168,13 @@ def _validate_column_data(column, file_name, data, column_validator, warnings, e check_recommend_condition = WARN_MISSING_CONDITIONAL_COLUMNS.get(column) if not check_recommend_condition or check_recommend_condition(row): warn_missing.append(_get_row_id(row)) - elif enum and value not in enum: + elif data_type_validator and not data_type_validator(value, column_validator): invalid.append(f'{_get_row_id(row)} ({value})') - if missing or warn_missing or invalid: + elif unique: + grouped_values[value].add(_get_row_id(row)) + + duplicates = [f'{k} ({", ".join(v)})' for k, v in grouped_values.items() if len(v) > 1] + if missing or warn_missing or invalid or duplicates: airtable_summary = ' (from Airtable)' if column in ALL_AIRTABLE_COLUMNS else '' error_template = f'The following entries {{issue}} "{column}"{airtable_summary} in the "{file_name}" table' if missing: @@ -1141,8 +1183,13 @@ def _validate_column_data(column, file_name, data, column_validator, warnings, e ) if invalid: invalid_values = f'Invalid values: {", ".join(sorted(invalid))}' + allowed = allowed_formatter(column_validator) if allowed_formatter else f' have data type {data_type}' + errors.append( + f'{error_template.format(issue="have invalid values for")}. Allowed values{allowed}. {invalid_values}' + ) + if duplicates: errors.append( - f'{error_template.format(issue="have invalid values for")}. Allowed values: {", ".join(enum)}. {invalid_values}' + f'{error_template.format(issue="have non-unique values for")}: {", ".join(sorted(duplicates))}' ) if warn_missing: warnings.append( From c3586be3fc204a8846d9b4ff3df3534e258402a5 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 16 Aug 2023 15:53:08 -0400 Subject: [PATCH 06/16] add tests --- seqr/views/apis/report_api.py | 28 ++++++++-------- seqr/views/apis/report_api_tests.py | 51 ++++++++++++++++------------- 2 files changed, 41 insertions(+), 38 deletions(-) diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index 3ff0f8e08f..4a9fef873f 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -1034,19 +1034,16 @@ def _get_experiment_lookup_row(is_rna, row_data): DATA_TYPE_VALIDATORS = { - 'string': ( - lambda val, validator: (not validator.get('is_bucket_path')) or val.startswith('gs://'), - lambda validator: ' are a google bucket path starting with gs://' - ), - 'enumeration': ( - lambda val, validator: val in validator['enumerations'], - lambda validator: f': {", ".join(column_validator["enumerations"])}', - ), - 'integer': (lambda val, validator: val.replace(',', '').isnumeric(), None), - 'float': (lambda val, validator: val.replace(',', '').replace('.', '').isnumeric(), None), - 'date': (lambda val, validator: bool(re.match(r'^\d{4}-\d{2}-\d{2}$', val)), None), + 'string': lambda val, validator: (not validator.get('is_bucket_path')) or val.startswith('gs://'), + 'enumeration': lambda val, validator: val in validator['enumerations'], + 'integer': lambda val, validator: val.replace(',', '').isnumeric(), + 'float': lambda val, validator: val.replace(',', '').replace('.', '').isnumeric(), + 'date': lambda val, validator: bool(re.match(r'^\d{4}-\d{2}-\d{2}$', val)), +} +DATA_TYPE_ERROR_FORMATTERS = { + 'string': lambda validator: ' are a google bucket path starting with gs://', + 'enumeration': lambda validator: f': {", ".join(validator["enumerations"])}', } - def _validate_gregor_files(file_data): errors = [] @@ -1148,7 +1145,7 @@ def _has_required_table(table, validator, tables): def _validate_column_data(column, file_name, data, column_validator, warnings, errors): data_type = column_validator.get('data_type') - data_type_validator, allowed_formatter = DATA_TYPE_VALIDATORS.get(data_type) + data_type_validator = DATA_TYPE_VALIDATORS.get(data_type) unique = column_validator.get('is_unique') required = column_validator.get('required') recommended = column in WARN_MISSING_TABLE_COLUMNS.get(file_name, []) @@ -1173,7 +1170,7 @@ def _validate_column_data(column, file_name, data, column_validator, warnings, e elif unique: grouped_values[value].add(_get_row_id(row)) - duplicates = [f'{k} ({", ".join(v)})' for k, v in grouped_values.items() if len(v) > 1] + duplicates = [f'{k} ({", ".join(sorted(v))})' for k, v in grouped_values.items() if len(v) > 1] if missing or warn_missing or invalid or duplicates: airtable_summary = ' (from Airtable)' if column in ALL_AIRTABLE_COLUMNS else '' error_template = f'The following entries {{issue}} "{column}"{airtable_summary} in the "{file_name}" table' @@ -1183,7 +1180,8 @@ def _validate_column_data(column, file_name, data, column_validator, warnings, e ) if invalid: invalid_values = f'Invalid values: {", ".join(sorted(invalid))}' - allowed = allowed_formatter(column_validator) if allowed_formatter else f' have data type {data_type}' + allowed = DATA_TYPE_ERROR_FORMATTERS[data_type](column_validator) \ + if data_type in DATA_TYPE_ERROR_FORMATTERS else f' have data type {data_type}' errors.append( f'{error_template.format(issue="have invalid values for")}. Allowed values{allowed}. {invalid_values}' ) diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py index b03234cebe..d01af29f22 100644 --- a/seqr/views/apis/report_api_tests.py +++ b/seqr/views/apis/report_api_tests.py @@ -209,7 +209,7 @@ 'target_insert_size_wes': '385', 'sequencing_platform_wes': 'NovaSeq', 'aligned_dna_short_read_file_wes': 'gs://fc-eb352699-d849-483f-aefe-9d35ce2b21ac/Broad_COL_FAM1_1_D1.cram', - 'aligned_dna_short_read_index_file_wes': 'gs://fc-eb352699-d849-483f-aefe-9d35ce2b21ac/Broad_COL_FAM1_1_D1.crai', + 'aligned_dna_short_read_index_file_wes': 'NA', 'md5sum_wes': '129c28163df082', 'reference_assembly': 'GRCh38', 'alignment_software_dna': 'BWA-MEM-2.3', @@ -295,7 +295,7 @@ 'md5sum_wes': 'a6f6308866765ce8', 'md5sum_wgs': '2aa33e8c32020b1c', 'reference_assembly': 'GRCh38', - 'alignment_software_dna': 'BWA 0.7.15.r1140', + 'alignment_software_dna': 'BWA-MEM-2.3', 'mean_coverage_wes': '42.8', 'mean_coverage_wgs': '36.1', 'analysis_details': '', @@ -388,23 +388,23 @@ 'table': 'participant', 'required': True, 'columns': [ - {'column': 'participant_id', 'required': True}, - {'column': 'internal_project_id'}, - {'column': 'gregor_center', 'required': True, 'enumerations': ['BCM', 'BROAD', 'UW']}, - {'column': 'consent_code', 'required': True, 'enumerations': ['GRU', 'HMB']}, - {'column': 'recontactable', 'enumerations': ['Yes', 'No']}, - {'column': 'prior_testing'}, + {'column': 'participant_id', 'required': True, 'data_type': 'string'}, + {'column': 'internal_project_id', 'data_type': 'reference'}, + {'column': 'gregor_center', 'required': True, 'data_type': 'enumeration', 'enumerations': ['BCM', 'BROAD', 'UW']}, + {'column': 'consent_code', 'required': True, 'data_type': 'enumeration', 'enumerations': ['GRU', 'HMB']}, + {'column': 'recontactable', 'data_type': 'enumeration', 'enumerations': ['Yes', 'No']}, + {'column': 'prior_testing', 'data_type': 'enumeration'}, {'column': 'family_id', 'required': True}, {'column': 'paternal_id'}, {'column': 'maternal_id'}, {'column': 'proband_relationship', 'required': True}, - {'column': 'sex', 'required': True, 'enumerations': ['Male', 'Female', 'Unknown']}, - {'column': 'reported_race', 'enumerations': ['Asian', 'White', 'Black']}, - {'column': 'reported_ethnicity', 'enumerations': ['Hispanic or Latino', 'Not Hispanic or Latino']}, + {'column': 'sex', 'required': True, 'data_type': 'enumeration', 'enumerations': ['Male', 'Female', 'Unknown']}, + {'column': 'reported_race', 'data_type': 'enumeration', 'enumerations': ['Asian', 'White', 'Black']}, + {'column': 'reported_ethnicity', 'data_type': 'enumeration', 'enumerations': ['Hispanic or Latino', 'Not Hispanic or Latino']}, {'column': 'ancestry_metadata'}, - {'column': 'affected_status', 'required': True, 'enumerations': ['Affected', 'Unaffected', 'Unknown']}, + {'column': 'affected_status', 'required': True, 'data_type': 'enumeration', 'enumerations': ['Affected', 'Unaffected', 'Unknown']}, {'column': 'phenotype_description'}, - {'column': 'age_at_enrollment'}, + {'column': 'age_at_enrollment', 'data_type': 'date'}, ], }, { @@ -413,13 +413,13 @@ 'columns': [ {'column': 'aligned_dna_short_read_id', 'required': True}, {'column': 'experiment_dna_short_read_id', 'required': True}, - {'column': 'aligned_dna_short_read_file'}, - {'column': 'aligned_dna_short_read_index_file'}, - {'column': 'alignment_software'}, + {'column': 'aligned_dna_short_read_file', 'is_unique': True, 'data_type': 'string', 'is_bucket_path': True}, + {'column': 'aligned_dna_short_read_index_file', 'data_type': 'string', 'is_bucket_path': True}, + {'column': 'alignment_software', 'is_unique': True}, {'column': 'analysis_details'}, - {'column': 'md5sum'}, - {'column': 'mean_coverage', 'required': True}, - {'column': 'reference_assembly'}, + {'column': 'md5sum', 'is_unique': True}, + {'column': 'mean_coverage', 'required': True, 'data_type': 'float'}, + {'column': 'reference_assembly', 'data_type': 'integer'}, {'column': 'reference_assembly_details'}, {'column': 'reference_assembly_uri'}, {'column': 'quality_issues'}, @@ -816,6 +816,8 @@ def test_gregor_export(self, mock_subprocess, mock_temp_dir, mock_open, mock_dat 'The following tables are required in the data model but absent from the reports: subject, dna_read_data_set', 'The following columns are included in the "participant" table but are missing from the data model: age_at_last_observation, ancestry_detail, pmid_id, proband_relationship_detail, sex_detail, twin_id', 'The following columns are included in the "participant" data model but are missing in the report: ancestry_metadata', + 'The following columns are included in the "participant" data model but have an unsupported data type: internal_project_id (reference)', + 'The following columns are specified as "enumeration" in the "participant" data model but are missing the allowed values definition: prior_testing', 'The following entries are missing recommended "recontactable" in the "participant" table: Broad_HG00731, Broad_HG00732, Broad_HG00733, Broad_NA19678, Broad_NA20870, Broad_NA20872, Broad_NA20874, Broad_NA20875, Broad_NA20876, Broad_NA20881', 'The following entries are missing recommended "reported_race" in the "participant" table: Broad_HG00732, Broad_HG00733, Broad_NA19678, Broad_NA19679, Broad_NA20870, Broad_NA20872, Broad_NA20874, Broad_NA20875, Broad_NA20876, Broad_NA20881, Broad_NA20888', 'The following entries are missing recommended "phenotype_description" in the "participant" table: Broad_HG00731, Broad_HG00732, Broad_HG00733, Broad_NA20870, Broad_NA20872, Broad_NA20874, Broad_NA20875, Broad_NA20876, Broad_NA20881, Broad_NA20888', @@ -824,7 +826,11 @@ def test_gregor_export(self, mock_subprocess, mock_temp_dir, mock_open, mock_dat self.assertListEqual(response.json()['errors'], [ 'The following entries are missing required "proband_relationship" in the "participant" table: Broad_HG00731, Broad_HG00732, Broad_HG00733, Broad_NA19678, Broad_NA19679, Broad_NA20870, Broad_NA20872, Broad_NA20874, Broad_NA20875, Broad_NA20876, Broad_NA20881, Broad_NA20888', 'The following entries have invalid values for "reported_race" in the "participant" table. Allowed values: Asian, White, Black. Invalid values: Broad_NA19675_1 (Middle Eastern or North African)', + 'The following entries have invalid values for "age_at_enrollment" in the "participant" table. Allowed values have data type date. Invalid values: Broad_NA19675_1 (18)', + 'The following entries have invalid values for "aligned_dna_short_read_index_file" (from Airtable) in the "aligned_dna_short_read" table. Allowed values are a google bucket path starting with gs://. Invalid values: VCGS_FAM203_621_D2 (NA)', + 'The following entries have invalid values for "reference_assembly" (from Airtable) in the "aligned_dna_short_read" table. Allowed values have data type integer. Invalid values: NA20888 (GRCh38), VCGS_FAM203_621_D2 (GRCh38)', 'The following entries are missing required "mean_coverage" (from Airtable) in the "aligned_dna_short_read" table: VCGS_FAM203_621_D2', + 'The following entries have non-unique values for "alignment_software" (from Airtable) in the "aligned_dna_short_read" table: BWA-MEM-2.3 (NA20888, VCGS_FAM203_621_D2)', ]) responses.add(responses.GET, MOCK_DATA_MODEL_URL, status=404) @@ -976,20 +982,19 @@ def _assert_expected_gregor_files(self, mock_open, has_second_project=False): self.assertIn([ 'Broad_exome_VCGS_FAM203_621_D2_1', 'Broad_exome_VCGS_FAM203_621_D2', 'gs://fc-eb352699-d849-483f-aefe-9d35ce2b21ac/Broad_COL_FAM1_1_D1.cram', - 'gs://fc-eb352699-d849-483f-aefe-9d35ce2b21ac/Broad_COL_FAM1_1_D1.crai', '129c28163df082', 'GRCh38', - '', '', '', 'BWA-MEM-2.3', 'DOI:10.5281/zenodo.4469317', '', + 'NA', '129c28163df082', 'GRCh38', '', '', '', 'BWA-MEM-2.3', 'DOI:10.5281/zenodo.4469317', '', ], read_file) self.assertIn([ 'Broad_exome_NA20888_1', 'Broad_exome_NA20888', 'gs://fc-eb352699-d849-483f-aefe-9d35ce2b21ac/Broad_NA20888.cram', 'gs://fc-eb352699-d849-483f-aefe-9d35ce2b21ac/Broad_NA20888.crai', 'a6f6308866765ce8', 'GRCh38', '', '', - '42.8', 'BWA 0.7.15.r1140', '', '', + '42.8', 'BWA-MEM-2.3', '', '', ], read_file) self.assertEqual([ 'Broad_genome_NA20888_1_1', 'Broad_genome_NA20888_1', 'gs://fc-eb352699-d849-483f-aefe-9d35ce2b21ac/Broad_NA20888_1.cram', 'gs://fc-eb352699-d849-483f-aefe-9d35ce2b21ac/Broad_NA20888_1.crai', '2aa33e8c32020b1c', 'GRCh38', '', '', - '36.1', 'BWA 0.7.15.r1140', '', '', + '36.1', 'BWA-MEM-2.3', '', '', ] in read_file, has_second_project) self.assertEqual(len(read_set_file), num_airtable_rows) From 78fc0b721843909f6776d5c1b56b558d123c3578 Mon Sep 17 00:00:00 2001 From: Shifa Zhang Date: Thu, 17 Aug 2023 16:32:18 -0400 Subject: [PATCH 07/16] Update per review. --- .../components/panel/variants/Predictions.jsx | 69 +++++++++++++------ ui/shared/utils/constants.js | 68 ++++++------------ 2 files changed, 69 insertions(+), 68 deletions(-) diff --git a/ui/shared/components/panel/variants/Predictions.jsx b/ui/shared/components/panel/variants/Predictions.jsx index 300a1671d9..fd60ceefc7 100644 --- a/ui/shared/components/panel/variants/Predictions.jsx +++ b/ui/shared/components/panel/variants/Predictions.jsx @@ -5,10 +5,10 @@ import { connect } from 'react-redux' import { Icon, Transition, Popup } from 'semantic-ui-react' import { getGenesById } from 'redux/selectors' -import { PREDICTOR_FIELDS, getPredictColor, getVariantMainGeneId } from 'shared/utils/constants' +import { PREDICTOR_FIELDS, PRED_COLOR_MAP, getVariantMainGeneId } from 'shared/utils/constants' import { snakecaseToTitlecase } from 'shared/utils/stringUtils' import { HorizontalSpacer } from '../../Spacers' -import { ButtonLink } from '../../StyledComponents' +import { ButtonLink, ColoredIcon } from '../../StyledComponents' const PredictionValue = styled.span` margin-left: 5px; @@ -19,8 +19,27 @@ const PredictionValue = styled.span` const NUM_TO_SHOW_ABOVE_THE_FOLD = 6 // how many predictors to show immediately +const comparePathScores = (value, i, thresholds) => { + if (i < 2) { // Benign thresholds + if (i === 0) { + return value <= thresholds[0] + } + return (thresholds[0] === undefined || value > thresholds[0]) && value <= thresholds[1] + } + + if (i === 2) { // Grey area + return (thresholds[1] === undefined || value > thresholds[1]) && value < thresholds[2] + } + + // Pathogenic thresholds + if (i === 5) { + return true + } + return value >= thresholds[i - 1] && (thresholds[i] === undefined || value < thresholds[i]) +} + const predictionFieldValue = ( - predictions, { field, pathHigher, thresholds, indicatorMap, infoField, infoTitle }, + predictions, { field, thresholds, indicatorMap, infoField, infoTitle }, ) => { let value = predictions[field] if (value === null || value === undefined) { @@ -31,17 +50,17 @@ const predictionFieldValue = ( if (thresholds) { value = parseFloat(value).toPrecision(3) - const color = getPredictColor(value, pathHigher, thresholds) - return { value, color, infoValue, infoTitle, pathHigher, thresholds } + const color = PRED_COLOR_MAP.find((clr, i) => comparePathScores(value, i, thresholds)) + return { value, color, infoValue, infoTitle, thresholds } } return indicatorMap[value[0]] || indicatorMap[value] } -const PATHOGENIC_COLORS = ['green', 'light green', 'grey', 'yellow', 'red', 'dark red'] +const coloredIcon = color => const Prediction = ( - { field, fieldTitle, value, color, infoValue, infoTitle, pathHigher, thresholds, href }, + { field, fieldTitle, value, color, infoValue, infoTitle, thresholds, href }, ) => { const indicator = infoValue ? ( } /> - ) : + ) : coloredIcon(color) const fieldName = fieldTitle || snakecaseToTitlecase(field) const fieldDisplay = thresholds ? ( - {thresholds.map((th, i) => { - if (!th) { - return null - } - const t = pathHigher ? th : -1 * th - if (i < 3) { - return
{`${PATHOGENIC_COLORS[i]} ${pathHigher ? '<' : '>'}= ${t}`}
- } - return
{`${PATHOGENIC_COLORS[i]} ${pathHigher ? '>' : '<'}= ${t}`}
- }).filter(e => !!e)} + {[0, 1].map(i => thresholds[i] !== undefined && ( +
+ {coloredIcon(PRED_COLOR_MAP[i])} + {i > 0 && thresholds[i - 1] !== undefined && ` > ${thresholds[i - 1]} and`} + {` <= ${thresholds[i]}`} +
+ ))} +
+ {coloredIcon(PRED_COLOR_MAP[2])} + {thresholds[1] === undefined ? '' : ` > ${thresholds[1]} and`} + {` < ${thresholds[2]}`} +
+ {[2, 3, 4].map(i => thresholds[i] !== undefined && ( +
+ {coloredIcon(PRED_COLOR_MAP[i + 1])} + {` >= ${thresholds[i]}`} + {i < 4 && thresholds[i + 1] !== undefined && ` and < ${thresholds[i + 1]}`} +
+ ))} } trigger={{fieldName}} @@ -90,7 +118,6 @@ Prediction.propTypes = { infoTitle: PropTypes.string, fieldTitle: PropTypes.string, color: PropTypes.string, - pathHigher: PropTypes.bool, thresholds: PropTypes.arrayOf(PropTypes.number), href: PropTypes.string, } @@ -121,8 +148,8 @@ class Predictions extends React.PureComponent { if (gene && gene.primateAi) { genePredictors.primate_ai = { field: 'primate_ai', - pathHigher: gene.primateAi.percentile75 >= gene.primateAi.percentile25, - thresholds: [null, null, gene.primateAi.percentile25, gene.primateAi.percentile75, null], + thresholds: [undefined, undefined, gene.primateAi.percentile25.toPrecision(3), + gene.primateAi.percentile75.toPrecision(3), undefined], } } diff --git a/ui/shared/utils/constants.js b/ui/shared/utils/constants.js index 644941f16a..7b87421bfe 100644 --- a/ui/shared/utils/constants.js +++ b/ui/shared/utils/constants.js @@ -1316,37 +1316,17 @@ export const SV_IN_SILICO_GROUP = 'Structural' export const NO_SV_IN_SILICO_GROUPS = [MISSENSE_IN_SILICO_GROUP, CODING_IN_SILICO_GROUP] export const SPLICE_AI_FIELD = 'splice_ai' -const PRED_COLOR_MAP = ['green', '#90ee90', 'grey', 'yellow', 'red', '#8b0000'] - -export const getPredictColor = (value, pathHigher, thresholds) => { - let colorIndex = 0 - const v = pathHigher ? value : -1 * value - if (v <= thresholds[1]) { - if (v > thresholds[0]) { - colorIndex = 1 - } - } else { - colorIndex = 5 - if (v < thresholds[2]) { - colorIndex = 2 - } else if (v < thresholds[3]) { - colorIndex = 3 - } else if (v < thresholds[4]) { - colorIndex = 4 - } - } - return PRED_COLOR_MAP[colorIndex] -} +export const PRED_COLOR_MAP = ['green', '#90ee90', 'grey', 'yellow', 'red', '#8b0000'] export const PREDICTOR_FIELDS = [ - { field: 'cadd', group: CODING_IN_SILICO_GROUP, thresholds: [0.15, 22.7, 25.3, 28.1, null], min: 1, max: 99 }, + { field: 'cadd', group: CODING_IN_SILICO_GROUP, thresholds: [0.15, 22.7, 25.3, 28.1, undefined], min: 1, max: 99 }, { field: 'revel', group: MISSENSE_IN_SILICO_GROUP, thresholds: [0.016, 0.29, 0.644, 0.773, 0.932] }, - { field: 'primate_ai', group: MISSENSE_IN_SILICO_GROUP, thresholds: [null, 0.483, 0.79, 0.867, null] }, - { field: 'mpc', group: MISSENSE_IN_SILICO_GROUP, thresholds: [null, null, 1.36, 1.828, null], max: 5 }, + { field: 'primate_ai', group: MISSENSE_IN_SILICO_GROUP, thresholds: [undefined, 0.483, 0.79, 0.867, undefined] }, + { field: 'mpc', group: MISSENSE_IN_SILICO_GROUP, thresholds: [undefined, undefined, 1.36, 1.828, undefined], max: 5 }, { field: SPLICE_AI_FIELD, group: SPLICING_IN_SILICO_GROUP, - thresholds: [null, null, 0.5, 0.8, null], + thresholds: [undefined, undefined, 0.5, 0.8, undefined], infoField: 'splice_ai_consequence', infoTitle: 'Predicted Consequence', fieldTitle: 'SpliceAI', @@ -1354,32 +1334,26 @@ export const PREDICTOR_FIELDS = [ `https://spliceailookup.broadinstitute.org/#variant=${chrom}-${pos}-${ref}-${alt}&hg=${genomeVersion}&distance=1000&mask=1` ), }, - { field: 'eigen', group: CODING_IN_SILICO_GROUP, thresholds: [null, null, 1, 2, null], max: 99 }, - { field: 'dann', displayOnly: true, thresholds: [null, null, 0.93, 0.96, null] }, - { field: 'strvctvre', group: SV_IN_SILICO_GROUP, thresholds: [null, null, 0.5, 0.75, null] }, - { field: 'polyphen', group: MISSENSE_IN_SILICO_GROUP, thresholds: [null, 0.113, 0.978, 0.999, null], indicatorMap: POLYPHEN_MAP }, - { field: 'sift', group: MISSENSE_IN_SILICO_GROUP, thresholds: [null, 0.08, 0.001, 0, null], indicatorMap: INDICATOR_MAP }, + { field: 'eigen', group: CODING_IN_SILICO_GROUP, thresholds: [undefined, undefined, 1, 2, undefined], max: 99 }, + { field: 'dann', displayOnly: true, thresholds: [undefined, undefined, 0.93, 0.96, undefined] }, + { field: 'strvctvre', group: SV_IN_SILICO_GROUP, thresholds: [undefined, undefined, 0.5, 0.75, undefined] }, + { field: 'polyphen', group: MISSENSE_IN_SILICO_GROUP, indicatorMap: POLYPHEN_MAP }, + { field: 'sift', group: MISSENSE_IN_SILICO_GROUP, indicatorMap: INDICATOR_MAP }, { field: 'mut_taster', group: MISSENSE_IN_SILICO_GROUP, indicatorMap: MUTTASTER_MAP }, - { field: 'fathmm', group: MISSENSE_IN_SILICO_GROUP, thresholds: [null, 3.32, -4.14, -5.04, null], indicatorMap: FATHMM_MAP }, - { field: 'vest', thresholds: [null, 0.449, 0.764, 0.861, 0.965] }, + { field: 'fathmm', group: MISSENSE_IN_SILICO_GROUP, indicatorMap: FATHMM_MAP }, + { field: 'vest', thresholds: [undefined, 0.449, 0.764, 0.861, 0.965] }, { field: 'mut_pred', thresholds: [0.01, 0.391, 0.737, 0.829, 0.932] }, - { field: 'apogee', thresholds: [null, null, 0.5, 0.5, null] }, - { field: 'gnomad_noncoding', fieldTitle: 'gnomAD Constraint', displayOnly: true, thresholds: [null, null, 2.18, 4, null], warningThreshold: 2.18, dangerThreshold: 4 }, + { field: 'apogee', thresholds: [undefined, undefined, 0.5, 0.5, undefined] }, + { + field: 'gnomad_noncoding', + fieldTitle: 'gnomAD Constraint', + displayOnly: true, + thresholds: [undefined, undefined, 2.18, 4, undefined], + }, { field: 'haplogroup_defining', indicatorMap: { Y: { color: 'green', value: '' } } }, { field: 'mitotip', indicatorMap: MITOTIP_MAP }, - { field: 'hmtvar', thresholds: [null, null, 0.35, 0.35, null] }, -].map(({ thresholds, ...pred }) => { - if (!thresholds) { - return pred - } - const noneNullThresholds = thresholds.filter(t => t) - const pathHigher = noneNullThresholds[1] >= noneNullThresholds[0] - return { - ...pred, - pathHigher, - thresholds: pathHigher ? thresholds : thresholds.map(t => (t === null ? null : -1 * t)), - } -}) + { field: 'hmtvar', thresholds: [undefined, undefined, 0.35, 0.35, undefined] }, +] export const getVariantMainGeneId = ({ transcripts = {}, mainTranscriptId, selectedMainTranscriptId }) => { if (selectedMainTranscriptId || mainTranscriptId) { From 43ae552a71258ffe2c735df06e503d60524d9c4d Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 18 Aug 2023 10:10:22 -0400 Subject: [PATCH 08/16] update called variant file filter missing --- seqr/views/apis/report_api.py | 7 +++++-- seqr/views/apis/report_api_tests.py | 9 +++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index 4a9fef873f..1d422526b3 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -713,8 +713,9 @@ def _get_sample_airtable_metadata(sample_ids, user, include_collaborator=False): READ_RNA_TABLE_COLUMNS.insert(10, 'gene_annotation_details') READ_RNA_TABLE_COLUMNS.insert(13, 'alignment_postprocessing') READ_SET_TABLE_COLUMNS = ['aligned_dna_short_read_set_id', 'aligned_dna_short_read_id'] +CALLED_VARIANT_FILE_COLUMN = 'called_variants_dna_file' CALLED_TABLE_COLUMNS = [ - 'called_variants_dna_short_read_id', 'aligned_dna_short_read_set_id', 'called_variants_dna_file', 'md5sum', + 'called_variants_dna_short_read_id', 'aligned_dna_short_read_set_id', CALLED_VARIANT_FILE_COLUMN, 'md5sum', 'caller_software', 'variant_types', 'analysis_details', ] @@ -912,7 +913,9 @@ def gregor_export(request): ('experiment_dna_short_read', EXPERIMENT_TABLE_COLUMNS, airtable_rows), ('aligned_dna_short_read', READ_TABLE_COLUMNS, airtable_rows), ('aligned_dna_short_read_set', READ_SET_TABLE_COLUMNS, airtable_rows), - ('called_variants_dna_short_read', CALLED_TABLE_COLUMNS, airtable_rows), + ('called_variants_dna_short_read', CALLED_TABLE_COLUMNS, [ + row for row in airtable_rows if row.get(CALLED_VARIANT_FILE_COLUMN) + ]), ('experiment_rna_short_read', EXPERIMENT_RNA_TABLE_COLUMNS, airtable_rna_rows), ('aligned_rna_short_read', READ_RNA_TABLE_COLUMNS, airtable_rna_rows), ('experiment', EXPERIMENT_LOOKUP_TABLE_COLUMNS, experiment_lookup_rows), diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py index d01af29f22..ff6f324ad5 100644 --- a/seqr/views/apis/report_api_tests.py +++ b/seqr/views/apis/report_api_tests.py @@ -299,9 +299,9 @@ 'mean_coverage_wes': '42.8', 'mean_coverage_wgs': '36.1', 'analysis_details': '', - 'called_variants_dna_short_read_id': 'NA', + 'called_variants_dna_short_read_id': '', 'aligned_dna_short_read_set_id': 'Broad_NA20888_D1', - 'called_variants_dna_file': 'NA', + 'called_variants_dna_file': '', 'caller_software': 'NA', 'variant_types': 'SNV', }, @@ -1003,7 +1003,7 @@ def _assert_expected_gregor_files(self, mock_open, has_second_project=False): self.assertIn(['Broad_NA20888_D1', 'Broad_exome_NA20888_1'], read_set_file) self.assertEqual(['Broad_NA20888_D1', 'Broad_genome_NA20888_1_1'] in read_set_file, has_second_project) - self.assertEqual(len(called_file), num_airtable_rows) + self.assertEqual(len(called_file), 2) self.assertEqual(called_file[0], [ 'called_variants_dna_short_read_id', 'aligned_dna_short_read_set_id', 'called_variants_dna_file', 'md5sum', 'caller_software', 'variant_types', 'analysis_details', @@ -1012,9 +1012,6 @@ def _assert_expected_gregor_files(self, mock_open, has_second_project=False): 'SX2-3', 'BCM_H7YG5DSX2', 'gs://fc-fed09429-e563-44a7-aaeb-776c8336ba02/COL_FAM1_1_D1.SV.vcf', '129c28163df082', 'gatk4.1.2', 'SNV', 'DOI:10.5281/zenodo.4469317', ], called_file) - self.assertIn(['NA', 'Broad_NA20888_D1', 'NA', 'a6f6308866765ce8', 'NA', 'SNV', ''], called_file) - self.assertEqual( - ['NA', 'Broad_NA20888_D1', 'NA', '2aa33e8c32020b1c', 'NA', 'SNV', ''] in called_file, has_second_project) self.assertEqual(len(experiment_rna_file), 2) self.assertEqual(experiment_rna_file[0], [ From 0a858611be6974df64913fd01f435e517b56f51c Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 18 Aug 2023 10:38:18 -0400 Subject: [PATCH 09/16] better flot and int validation and testing --- seqr/views/apis/report_api.py | 5 +++-- seqr/views/apis/report_api_tests.py | 30 ++++++++++++++++++++++++++++- 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index 4a9fef873f..b52ac8cc6f 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -1033,11 +1033,12 @@ def _get_experiment_lookup_row(is_rna, row_data): } +is_integer = lambda val, *args: val.isnumeric() or re.match(r'^[\d{3},]*\d{3}$', val) DATA_TYPE_VALIDATORS = { 'string': lambda val, validator: (not validator.get('is_bucket_path')) or val.startswith('gs://'), 'enumeration': lambda val, validator: val in validator['enumerations'], - 'integer': lambda val, validator: val.replace(',', '').isnumeric(), - 'float': lambda val, validator: val.replace(',', '').replace('.', '').isnumeric(), + 'integer': is_integer, + 'float': lambda val, validator: is_integer(val) or re.match(r'^\d+.\d+$', val), 'date': lambda val, validator: bool(re.match(r'^\d{4}-\d{2}-\d{2}$', val)), } DATA_TYPE_ERROR_FORMATTERS = { diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py index d01af29f22..e3a46a930d 100644 --- a/seqr/views/apis/report_api_tests.py +++ b/seqr/views/apis/report_api_tests.py @@ -441,6 +441,33 @@ 'required': 'CONDITIONAL (aligned_dna_short_read_set, dna_read_data)', 'columns': [{'column': 'analyte_id', 'required': True}], }, + { + 'table': 'experiment_rna_short_read', + 'columns': [ + {'column': 'experiment_rna_short_read_id', 'required': True}, + {'column': 'analyte_id', 'required': True}, + {'column': 'experiment_sample_id'}, + {'column': 'seq_library_prep_kit_method'}, + {'column': 'library_prep_type'}, + {'column': 'experiment_type'}, + {'column': 'read_length', 'data_type': 'integer'}, + {'column': 'single_or_paired_ends'}, + {'column': 'date_data_generation', 'data_type': 'float'}, + {'column': 'sequencing_platform'}, + {'column': 'within_site_batch_name'}, + {'column': 'RIN', 'data_type': 'float'}, + {'column': 'estimated_library_size'}, + {'column': 'total_reads', 'data_type': 'integer'}, + {'column': 'percent_rRNA', 'data_type': 'float'}, + {'column': 'percent_mRNA', 'data_type': 'float'}, + {'column': 'percent_mtRNA', 'data_type': 'float'}, + {'column': 'percent_Globin', 'data_type': 'float'}, + {'column': 'percent_UMI', 'data_type': 'float'}, + {'column': '5prime3prime_bias', 'data_type': 'float'}, + {'column': 'percent_GC', 'data_type': 'float'}, + {'column': 'percent_chrX_Y', 'data_type': 'float'}, + ], + }, ] } @@ -822,7 +849,7 @@ def test_gregor_export(self, mock_subprocess, mock_temp_dir, mock_open, mock_dat 'The following entries are missing recommended "reported_race" in the "participant" table: Broad_HG00732, Broad_HG00733, Broad_NA19678, Broad_NA19679, Broad_NA20870, Broad_NA20872, Broad_NA20874, Broad_NA20875, Broad_NA20876, Broad_NA20881, Broad_NA20888', 'The following entries are missing recommended "phenotype_description" in the "participant" table: Broad_HG00731, Broad_HG00732, Broad_HG00733, Broad_NA20870, Broad_NA20872, Broad_NA20874, Broad_NA20875, Broad_NA20876, Broad_NA20881, Broad_NA20888', 'The following entries are missing recommended "age_at_enrollment" in the "participant" table: Broad_HG00731, Broad_NA20870, Broad_NA20872, Broad_NA20875, Broad_NA20876, Broad_NA20881, Broad_NA20888', - ] + skipped_file_validation_warnings[1:5] + skipped_file_validation_warnings[7:]) + ] + skipped_file_validation_warnings[1:5] + skipped_file_validation_warnings[7:8] + skipped_file_validation_warnings[9:]) self.assertListEqual(response.json()['errors'], [ 'The following entries are missing required "proband_relationship" in the "participant" table: Broad_HG00731, Broad_HG00732, Broad_HG00733, Broad_NA19678, Broad_NA19679, Broad_NA20870, Broad_NA20872, Broad_NA20874, Broad_NA20875, Broad_NA20876, Broad_NA20881, Broad_NA20888', 'The following entries have invalid values for "reported_race" in the "participant" table. Allowed values: Asian, White, Black. Invalid values: Broad_NA19675_1 (Middle Eastern or North African)', @@ -831,6 +858,7 @@ def test_gregor_export(self, mock_subprocess, mock_temp_dir, mock_open, mock_dat 'The following entries have invalid values for "reference_assembly" (from Airtable) in the "aligned_dna_short_read" table. Allowed values have data type integer. Invalid values: NA20888 (GRCh38), VCGS_FAM203_621_D2 (GRCh38)', 'The following entries are missing required "mean_coverage" (from Airtable) in the "aligned_dna_short_read" table: VCGS_FAM203_621_D2', 'The following entries have non-unique values for "alignment_software" (from Airtable) in the "aligned_dna_short_read" table: BWA-MEM-2.3 (NA20888, VCGS_FAM203_621_D2)', + 'The following entries have invalid values for "date_data_generation" (from Airtable) in the "experiment_rna_short_read" table. Allowed values have data type float. Invalid values: NA19679 (2023-02-11)', ]) responses.add(responses.GET, MOCK_DATA_MODEL_URL, status=404) From 04676d87f2f72b90345bfcfc3701159163bb59fd Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Fri, 18 Aug 2023 13:26:48 -0400 Subject: [PATCH 10/16] Add "wait_for_routes" helper script for CronJobs. (#3565) * add wait_for_routes script * add wait * wait for routes * Add wait_for_routes helper script * Flush out description * Update wait_for_routes --- deploy/docker/seqr/Dockerfile | 1 + deploy/docker/seqr/wait_for_routes | 26 ++++++++++++++++++++++++++ 2 files changed, 27 insertions(+) create mode 100755 deploy/docker/seqr/wait_for_routes diff --git a/deploy/docker/seqr/Dockerfile b/deploy/docker/seqr/Dockerfile index 2093aef700..22a7b63a27 100644 --- a/deploy/docker/seqr/Dockerfile +++ b/deploy/docker/seqr/Dockerfile @@ -93,6 +93,7 @@ EXPOSE 8000 ENV TERM=xterm COPY deploy/docker/seqr/readiness_probe / +COPY deploy/docker/seqr/wait_for_routes / COPY deploy/docker/seqr/bin/*.sh /usr/local/bin/ COPY deploy/docker/seqr/config/*.py ./ COPY deploy/docker/seqr/bashrc /root/.bashrc diff --git a/deploy/docker/seqr/wait_for_routes b/deploy/docker/seqr/wait_for_routes new file mode 100755 index 0000000000..f8263fe1ac --- /dev/null +++ b/deploy/docker/seqr/wait_for_routes @@ -0,0 +1,26 @@ +#!/bin/bash + +### +# Waits for network endpoints. Intended usage is within Kubernetes CronJobs to wait for sidecar availability. +# Usage: ./wait_for_routes https://www.google.com/ https://www.broadinstitute.org https://www.broadins.org +### + +RETRY_COUNT=10 +SLEEP_S=2 + +for route in "$@" +do + retries=0 + until [ "$retries" -ge 10 ] + do + curl -s $route -o /dev/null && echo "Successful ping of $route" && break + retries=$((retries+1)) + if [ "$retries" -eq 10 ]; then + echo "Route ${route} wasn't available after ${RETRY_COUNT} connection attempts" + exit 1 + else + echo "Unable to connect to ${route}, retrying. Attempt ${retries}/${RETRY_COUNT}" + sleep $SLEEP_S + fi + done +done From 0cbc4af350974ba2415636c553d7e3ff8f2f27f3 Mon Sep 17 00:00:00 2001 From: Shifa Zhang Date: Fri, 18 Aug 2023 14:06:24 -0400 Subject: [PATCH 11/16] Update the threshold search and popup info. --- .../components/panel/variants/Predictions.jsx | 58 ++++++------------- ui/shared/utils/constants.js | 12 ++-- 2 files changed, 23 insertions(+), 47 deletions(-) diff --git a/ui/shared/components/panel/variants/Predictions.jsx b/ui/shared/components/panel/variants/Predictions.jsx index fd60ceefc7..b569b56c0a 100644 --- a/ui/shared/components/panel/variants/Predictions.jsx +++ b/ui/shared/components/panel/variants/Predictions.jsx @@ -19,25 +19,6 @@ const PredictionValue = styled.span` const NUM_TO_SHOW_ABOVE_THE_FOLD = 6 // how many predictors to show immediately -const comparePathScores = (value, i, thresholds) => { - if (i < 2) { // Benign thresholds - if (i === 0) { - return value <= thresholds[0] - } - return (thresholds[0] === undefined || value > thresholds[0]) && value <= thresholds[1] - } - - if (i === 2) { // Grey area - return (thresholds[1] === undefined || value > thresholds[1]) && value < thresholds[2] - } - - // Pathogenic thresholds - if (i === 5) { - return true - } - return value >= thresholds[i - 1] && (thresholds[i] === undefined || value < thresholds[i]) -} - const predictionFieldValue = ( predictions, { field, thresholds, indicatorMap, infoField, infoTitle }, ) => { @@ -50,14 +31,19 @@ const predictionFieldValue = ( if (thresholds) { value = parseFloat(value).toPrecision(3) - const color = PRED_COLOR_MAP.find((clr, i) => comparePathScores(value, i, thresholds)) + const color = PRED_COLOR_MAP.find( + (clr, i) => (thresholds[i - 1] || thresholds[i - 1]) && + (thresholds[i - 1] === undefined || value >= thresholds[i - 1]) && + (thresholds[i] === undefined || value < thresholds[i]), + ) return { value, color, infoValue, infoTitle, thresholds } } return indicatorMap[value[0]] || indicatorMap[value] } -const coloredIcon = color => +const coloredIcon = color => (color === 'darkred' ? : +) const Prediction = ( { field, fieldTitle, value, color, infoValue, infoTitle, thresholds, href }, @@ -74,27 +60,17 @@ const Prediction = ( - {[0, 1].map(i => thresholds[i] !== undefined && ( -
- {coloredIcon(PRED_COLOR_MAP[i])} - {i > 0 && thresholds[i - 1] !== undefined && ` > ${thresholds[i - 1]} and`} - {` <= ${thresholds[i]}`} -
- ))} -
- {coloredIcon(PRED_COLOR_MAP[2])} - {thresholds[1] === undefined ? '' : ` > ${thresholds[1]} and`} - {` < ${thresholds[2]}`} -
- {[2, 3, 4].map(i => thresholds[i] !== undefined && ( -
- {coloredIcon(PRED_COLOR_MAP[i + 1])} - {` >= ${thresholds[i]}`} - {i < 4 && thresholds[i + 1] !== undefined && ` and < ${thresholds[i + 1]}`} + PRED_COLOR_MAP.map((c, i) => { + if (thresholds[i] === undefined && thresholds[i - 1] === undefined) { + return null + } + return ( +
+ {coloredIcon(c)} + {thresholds[i] === undefined ? ` >= ${thresholds[i - 1]}` : ` < ${thresholds[i]}`}
- ))} -
+ ) + }) } trigger={{fieldName}} /> diff --git a/ui/shared/utils/constants.js b/ui/shared/utils/constants.js index 7b87421bfe..675ea87708 100644 --- a/ui/shared/utils/constants.js +++ b/ui/shared/utils/constants.js @@ -1316,12 +1316,12 @@ export const SV_IN_SILICO_GROUP = 'Structural' export const NO_SV_IN_SILICO_GROUPS = [MISSENSE_IN_SILICO_GROUP, CODING_IN_SILICO_GROUP] export const SPLICE_AI_FIELD = 'splice_ai' -export const PRED_COLOR_MAP = ['green', '#90ee90', 'grey', 'yellow', 'red', '#8b0000'] +export const PRED_COLOR_MAP = ['green', 'olive', 'grey', 'yellow', 'red', 'darkred'] export const PREDICTOR_FIELDS = [ - { field: 'cadd', group: CODING_IN_SILICO_GROUP, thresholds: [0.15, 22.7, 25.3, 28.1, undefined], min: 1, max: 99 }, - { field: 'revel', group: MISSENSE_IN_SILICO_GROUP, thresholds: [0.016, 0.29, 0.644, 0.773, 0.932] }, - { field: 'primate_ai', group: MISSENSE_IN_SILICO_GROUP, thresholds: [undefined, 0.483, 0.79, 0.867, undefined] }, + { field: 'cadd', group: CODING_IN_SILICO_GROUP, thresholds: [0.151, 22.8, 25.3, 28.1, undefined], min: 1, max: 99 }, + { field: 'revel', group: MISSENSE_IN_SILICO_GROUP, thresholds: [0.0161, 0.291, 0.644, 0.773, 0.932] }, + { field: 'primate_ai', group: MISSENSE_IN_SILICO_GROUP, thresholds: [undefined, 0.484, 0.79, 0.867, undefined] }, { field: 'mpc', group: MISSENSE_IN_SILICO_GROUP, thresholds: [undefined, undefined, 1.36, 1.828, undefined], max: 5 }, { field: SPLICE_AI_FIELD, @@ -1341,8 +1341,8 @@ export const PREDICTOR_FIELDS = [ { field: 'sift', group: MISSENSE_IN_SILICO_GROUP, indicatorMap: INDICATOR_MAP }, { field: 'mut_taster', group: MISSENSE_IN_SILICO_GROUP, indicatorMap: MUTTASTER_MAP }, { field: 'fathmm', group: MISSENSE_IN_SILICO_GROUP, indicatorMap: FATHMM_MAP }, - { field: 'vest', thresholds: [undefined, 0.449, 0.764, 0.861, 0.965] }, - { field: 'mut_pred', thresholds: [0.01, 0.391, 0.737, 0.829, 0.932] }, + { field: 'vest', thresholds: [undefined, 0.450, 0.764, 0.861, 0.965] }, + { field: 'mut_pred', thresholds: [0.0101, 0.392, 0.737, 0.829, 0.932] }, { field: 'apogee', thresholds: [undefined, undefined, 0.5, 0.5, undefined] }, { field: 'gnomad_noncoding', From 9d755dbb6ef868d8615136402d567cea4b85276b Mon Sep 17 00:00:00 2001 From: Shifa Zhang Date: Mon, 21 Aug 2023 15:35:26 -0400 Subject: [PATCH 12/16] Update per review. --- ui/shared/components/panel/variants/Predictions.jsx | 5 ++--- ui/shared/utils/constants.js | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/ui/shared/components/panel/variants/Predictions.jsx b/ui/shared/components/panel/variants/Predictions.jsx index b569b56c0a..3b80cb6842 100644 --- a/ui/shared/components/panel/variants/Predictions.jsx +++ b/ui/shared/components/panel/variants/Predictions.jsx @@ -32,7 +32,7 @@ const predictionFieldValue = ( if (thresholds) { value = parseFloat(value).toPrecision(3) const color = PRED_COLOR_MAP.find( - (clr, i) => (thresholds[i - 1] || thresholds[i - 1]) && + (clr, i) => (thresholds[i - 1] || thresholds[i]) && (thresholds[i - 1] === undefined || value >= thresholds[i - 1]) && (thresholds[i] === undefined || value < thresholds[i]), ) @@ -42,8 +42,7 @@ const predictionFieldValue = ( return indicatorMap[value[0]] || indicatorMap[value] } -const coloredIcon = color => (color === 'darkred' ? : -) +const coloredIcon = color => React.createElement(color.startsWith('#') ? ColoredIcon : Icon, { name: 'circle', size: 'small', color }) const Prediction = ( { field, fieldTitle, value, color, infoValue, infoTitle, thresholds, href }, diff --git a/ui/shared/utils/constants.js b/ui/shared/utils/constants.js index 675ea87708..1323503fd2 100644 --- a/ui/shared/utils/constants.js +++ b/ui/shared/utils/constants.js @@ -1316,7 +1316,7 @@ export const SV_IN_SILICO_GROUP = 'Structural' export const NO_SV_IN_SILICO_GROUPS = [MISSENSE_IN_SILICO_GROUP, CODING_IN_SILICO_GROUP] export const SPLICE_AI_FIELD = 'splice_ai' -export const PRED_COLOR_MAP = ['green', 'olive', 'grey', 'yellow', 'red', 'darkred'] +export const PRED_COLOR_MAP = ['green', 'olive', 'grey', 'yellow', 'red', '#8b0000'] export const PREDICTOR_FIELDS = [ { field: 'cadd', group: CODING_IN_SILICO_GROUP, thresholds: [0.151, 22.8, 25.3, 28.1, undefined], min: 1, max: 99 }, From 09f7036b7f032492aa8e726bbfc552086040a97e Mon Sep 17 00:00:00 2001 From: Shifa Zhang Date: Tue, 22 Aug 2023 09:27:50 -0400 Subject: [PATCH 13/16] Move color map definition. --- ui/shared/components/panel/variants/Predictions.jsx | 4 +++- ui/shared/utils/constants.js | 2 -- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ui/shared/components/panel/variants/Predictions.jsx b/ui/shared/components/panel/variants/Predictions.jsx index 3b80cb6842..395bb5b0e5 100644 --- a/ui/shared/components/panel/variants/Predictions.jsx +++ b/ui/shared/components/panel/variants/Predictions.jsx @@ -5,7 +5,7 @@ import { connect } from 'react-redux' import { Icon, Transition, Popup } from 'semantic-ui-react' import { getGenesById } from 'redux/selectors' -import { PREDICTOR_FIELDS, PRED_COLOR_MAP, getVariantMainGeneId } from 'shared/utils/constants' +import { PREDICTOR_FIELDS, getVariantMainGeneId } from 'shared/utils/constants' import { snakecaseToTitlecase } from 'shared/utils/stringUtils' import { HorizontalSpacer } from '../../Spacers' import { ButtonLink, ColoredIcon } from '../../StyledComponents' @@ -19,6 +19,8 @@ const PredictionValue = styled.span` const NUM_TO_SHOW_ABOVE_THE_FOLD = 6 // how many predictors to show immediately +export const PRED_COLOR_MAP = ['green', 'olive', 'grey', 'yellow', 'red', '#8b0000'] + const predictionFieldValue = ( predictions, { field, thresholds, indicatorMap, infoField, infoTitle }, ) => { diff --git a/ui/shared/utils/constants.js b/ui/shared/utils/constants.js index 1323503fd2..caf2f5919f 100644 --- a/ui/shared/utils/constants.js +++ b/ui/shared/utils/constants.js @@ -1316,8 +1316,6 @@ export const SV_IN_SILICO_GROUP = 'Structural' export const NO_SV_IN_SILICO_GROUPS = [MISSENSE_IN_SILICO_GROUP, CODING_IN_SILICO_GROUP] export const SPLICE_AI_FIELD = 'splice_ai' -export const PRED_COLOR_MAP = ['green', 'olive', 'grey', 'yellow', 'red', '#8b0000'] - export const PREDICTOR_FIELDS = [ { field: 'cadd', group: CODING_IN_SILICO_GROUP, thresholds: [0.151, 22.8, 25.3, 28.1, undefined], min: 1, max: 99 }, { field: 'revel', group: MISSENSE_IN_SILICO_GROUP, thresholds: [0.0161, 0.291, 0.644, 0.773, 0.932] }, From c6e60a241f6e4442d0fb050e803d9b205bd16eb5 Mon Sep 17 00:00:00 2001 From: Shifa Zhang Date: Tue, 22 Aug 2023 09:47:43 -0400 Subject: [PATCH 14/16] Fix codacy. --- ui/shared/utils/constants.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ui/shared/utils/constants.js b/ui/shared/utils/constants.js index caf2f5919f..9334446cab 100644 --- a/ui/shared/utils/constants.js +++ b/ui/shared/utils/constants.js @@ -1339,7 +1339,7 @@ export const PREDICTOR_FIELDS = [ { field: 'sift', group: MISSENSE_IN_SILICO_GROUP, indicatorMap: INDICATOR_MAP }, { field: 'mut_taster', group: MISSENSE_IN_SILICO_GROUP, indicatorMap: MUTTASTER_MAP }, { field: 'fathmm', group: MISSENSE_IN_SILICO_GROUP, indicatorMap: FATHMM_MAP }, - { field: 'vest', thresholds: [undefined, 0.450, 0.764, 0.861, 0.965] }, + { field: 'vest', thresholds: [undefined, 0.45, 0.764, 0.861, 0.965] }, { field: 'mut_pred', thresholds: [0.0101, 0.392, 0.737, 0.829, 0.932] }, { field: 'apogee', thresholds: [undefined, undefined, 0.5, 0.5, undefined] }, { From 60cc895dbf4285c8f977b7620c9a26c26549fb14 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 22 Aug 2023 10:13:35 -0400 Subject: [PATCH 15/16] bump changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9808f0fea5..73d72d8dda 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,8 @@ # _seqr_ Changes ## dev + +## 8/22/23 * Add db indices to optimize RNA data queries (REQUIRES DB MIGRATION) ## 7/11/23 From dea5ab2da1fff967e089dde0e33827802101c5a3 Mon Sep 17 00:00:00 2001 From: Shifa Zhang Date: Tue, 22 Aug 2023 10:28:08 -0400 Subject: [PATCH 16/16] Remove unnecessary export. --- ui/shared/components/panel/variants/Predictions.jsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ui/shared/components/panel/variants/Predictions.jsx b/ui/shared/components/panel/variants/Predictions.jsx index 395bb5b0e5..7899621545 100644 --- a/ui/shared/components/panel/variants/Predictions.jsx +++ b/ui/shared/components/panel/variants/Predictions.jsx @@ -19,7 +19,7 @@ const PredictionValue = styled.span` const NUM_TO_SHOW_ABOVE_THE_FOLD = 6 // how many predictors to show immediately -export const PRED_COLOR_MAP = ['green', 'olive', 'grey', 'yellow', 'red', '#8b0000'] +const PRED_COLOR_MAP = ['green', 'olive', 'grey', 'yellow', 'red', '#8b0000'] const predictionFieldValue = ( predictions, { field, thresholds, indicatorMap, infoField, infoTitle },