From 386ceb76a26c4ecd6900a9f8223e1493b736a25e Mon Sep 17 00:00:00 2001
From: Shifa Zhang <zhangshifa07504@gmail.com>
Date: Tue, 27 Sep 2022 11:55:16 -0400
Subject: [PATCH 01/96] First commit for LIRICAL data loading.

---
 .../0048_phenotypeprioritization.py           | 31 ++++++
 seqr/models.py                                | 38 ++++++++
 seqr/urls.py                                  |  3 +-
 seqr/views/apis/data_manager_api.py           | 96 ++++++++++++++++++-
 ui/pages/DataManagement/DataManagement.jsx    |  2 +
 .../components/PhenotypePri.jsx               | 31 ++++++
 ui/pages/DataManagement/reducers.js           | 17 ++++
 ui/pages/DataManagement/selectors.js          |  1 +
 8 files changed, 217 insertions(+), 2 deletions(-)
 create mode 100644 seqr/migrations/0048_phenotypeprioritization.py
 create mode 100644 ui/pages/DataManagement/components/PhenotypePri.jsx

diff --git a/seqr/migrations/0048_phenotypeprioritization.py b/seqr/migrations/0048_phenotypeprioritization.py
new file mode 100644
index 0000000000..bb24d75db8
--- /dev/null
+++ b/seqr/migrations/0048_phenotypeprioritization.py
@@ -0,0 +1,31 @@
+# Generated by Django 3.2.15 on 2022-09-27 15:01
+
+from django.db import migrations, models
+import django.db.models.deletion
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('seqr', '0047_auto_20220908_1851'),
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name='PhenotypePrioritization',
+            fields=[
+                ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+                ('gene_id', models.CharField(max_length=20)),
+                ('tool', models.CharField(choices=[('E', 'exomiser'), ('L', 'lirical')], max_length=1)),
+                ('rank', models.IntegerField()),
+                ('disease_id', models.CharField(max_length=32)),
+                ('score1', models.FloatField(null=True)),
+                ('score2', models.FloatField(null=True)),
+                ('score3', models.FloatField(null=True)),
+                ('sample', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='seqr.sample')),
+            ],
+            options={
+                'unique_together': {('sample', 'gene_id', 'disease_id')},
+            },
+        ),
+    ]
diff --git a/seqr/models.py b/seqr/models.py
index d71dd36e0b..a7ce22508e 100644
--- a/seqr/models.py
+++ b/seqr/models.py
@@ -1053,3 +1053,41 @@ class Meta:
         unique_together = ('sample', 'gene_id')
 
         json_fields = ['gene_id', 'tpm']
+
+
+class PhenotypePrioritization(DeletableSampleMetadataModel):
+    EXOMISER = 'exomiser'
+    LIRICAL = 'lirical'
+    EXOMISER_CHOICE = 'E'
+    LIRICAL_CHOICE = 'L'
+    SCORE_NAME1 = 'scoreName1'
+    SCORE_NAME2 = 'scoreName2'
+    SCORE_NAME3 = 'scoreName3'
+    TOOL_CHOICES = (
+        (EXOMISER_CHOICE, EXOMISER),
+        (LIRICAL_CHOICE, LIRICAL)
+    )
+    SCORE_NAMES = {
+        EXOMISER_CHOICE: {
+            SCORE_NAME1: 'exomiser_score',
+            SCORE_NAME2: 'phenotype_score',
+            SCORE_NAME3: 'variant_score',
+        },
+        LIRICAL_CHOICE: {
+            SCORE_NAME1: 'post_test_probability',
+            SCORE_NAME2: 'compositeLR',
+            SCORE_NAME3: None,
+        }
+    }
+
+    tool = models.CharField(max_length=1, choices=TOOL_CHOICES)
+    rank = models.IntegerField()
+    disease_id = models.CharField(max_length=32)
+    score1 = models.FloatField(null=True)
+    score2 = models.FloatField(null=True)
+    score3 = models.FloatField(null=True)
+
+    class Meta:
+        unique_together = ('sample', 'gene_id', 'disease_id')
+
+        json_fields = ['gene_id', 'tool', 'rank', 'disease_id', 'score1', 'score2', 'score3']
diff --git a/seqr/urls.py b/seqr/urls.py
index 30042b4257..3882d9229e 100644
--- a/seqr/urls.py
+++ b/seqr/urls.py
@@ -114,7 +114,7 @@
     forgot_password
 
 from seqr.views.apis.data_manager_api import elasticsearch_status, upload_qc_pipeline_output, delete_index, \
-    update_rna_seq, load_rna_seq_sample_data, proxy_to_kibana
+    update_rna_seq, load_rna_seq_sample_data, load_phenotype_pri_data, proxy_to_kibana
 from seqr.views.apis.report_api import \
     anvil_export, \
     discovery_sheet, \
@@ -307,6 +307,7 @@
     'data_management/get_all_users': get_all_users,
     'data_management/update_rna_seq': update_rna_seq,
     'data_management/load_rna_seq_sample/(?P<sample_guid>[^/]+)': load_rna_seq_sample_data,
+    'data_management/load_phenotype_pri_data': load_phenotype_pri_data,
 
     'summary_data/saved_variants/(?P<tag>[^/]+)': saved_variants_page,
     'summary_data/success_story/(?P<success_story_types>[^/]+)': success_story,
diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py
index d0f7301d7d..7fe2ff938a 100644
--- a/seqr/views/apis/data_manager_api.py
+++ b/seqr/views/apis/data_manager_api.py
@@ -23,7 +23,9 @@
 from seqr.views.utils.json_utils import create_json_response, _to_camel_case
 from seqr.views.utils.permissions_utils import data_manager_required
 
-from seqr.models import Sample, Individual, RnaSeqOutlier, RnaSeqTpm
+from seqr.models import Sample, Individual, RnaSeqOutlier, RnaSeqTpm, PhenotypePrioritization, Project
+
+from reference_data.models import Omim
 
 from settings import KIBANA_SERVER, KIBANA_ELASTICSEARCH_PASSWORD
 
@@ -395,6 +397,98 @@ def load_rna_seq_sample_data(request, sample_guid):
     return create_json_response({'success': True})
 
 
+@data_manager_required
+def load_phenotype_pri_data(request):
+    request_json = json.loads(request.body)
+
+    file_name = request_json['file']
+    ignore_extra_samples = request_json['ignoreExtraSamples']
+
+    logger.info(f'Loading phenotype prioritization data from {file_name}', request.user)
+    records = _load_phenotype_pri_file(file_name, ignore_extra_samples)
+    models = PhenotypePrioritization.objects.bulk_create([PhenotypePrioritization(**data) for data in records])
+    sample_guids = [data['sample'].guid for data in records]
+    logger.info(f'create {len(models)} PhenotypePrioritization', request.user, db_update={
+        'dbEntity': PhenotypePrioritization, 'numEntities': len(models), 'parentEntityIds': sample_guids,
+        'updateType': 'bulk_create',
+    })
+
+    return create_json_response({
+        'info': ['Phenotype prioritization data loaded'],
+        'warnings': [],
+        'fileName': file_name,
+    })
+
+
+
+EXPECTED_HEADER = ['tool', 'project', 'sampleId', 'rank', 'geneId', 'diseaseId', 'diseaseName',
+                   PhenotypePrioritization.SCORE_NAME1, 'score1',
+                   PhenotypePrioritization.SCORE_NAME2, 'score2',
+                   PhenotypePrioritization.SCORE_NAME3, 'score3']
+
+
+def _get_phenotype_pri(record, i, ignore_extra_samples):
+    tool = next((k for k, v in PhenotypePrioritization.TOOL_CHOICES if v == record['tool']), None)
+    if not tool:
+        raise ValueError('Expecting {} for the "tool" column but found {} (record {})'.format(
+            ', '.join([v for k, v in PhenotypePrioritization.TOOL_CHOICES]), record['tool'], i))
+
+    project_name = record['project']
+    projects = Project.objects.filter(name=project_name)
+    if len(projects) < 1:
+        raise ValueError(f'Project {project_name} is not found (record {i})')
+    project = projects[0]
+
+    sample_id = record['sampleId']
+    samples = Sample.objects.filter(sample_id=sample_id, individual__family__project=project, is_active=True,
+                                    dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS)
+    if len(samples) != 1:
+        if ignore_extra_samples:
+            return
+        raise ValueError(f'Sample with ID {sample_id} is not found (record {i})')
+
+    disease_id = record['diseaseId']
+    if disease_id.startswith('OMIM:'):
+        omim_recs = Omim.objects.filter(phenotype_mim_number=int(disease_id.replace('OMIM:', '')))
+        if len(omim_recs) < 1:
+            raise ValueError(f'Disease ID {disease_id} can\'t be found in Omim (record {i})')
+    else:
+        raise ValueError(f'Unknown disease ID {disease_id} (record {i})')
+
+    for score_name, value in PhenotypePrioritization.SCORE_NAMES[tool].items():
+        if record.get(score_name) != value:
+            raise ValueError(f'Expecting {value} for {score_name} but {record[score_name]} found (record {i})')
+
+    return {
+        'sample': samples[0],
+        'gene_id': record['geneId'],
+        'tool': tool,
+        'rank': int(record['rank']),
+        'disease_id': disease_id,
+        'score1': float(record['score1']),
+        'score2': float(record['score2']) if PhenotypePrioritization.SCORE_NAMES[tool][PhenotypePrioritization.SCORE_NAME2] else None,
+        'score3': float(record['score3']) if PhenotypePrioritization.SCORE_NAMES[tool][PhenotypePrioritization.SCORE_NAME3] else None,
+    }
+
+
+def _load_phenotype_pri_file(file_name, ignore_extra_samples):
+    lines = file_iter(file_name)
+
+    header = next(lines).rstrip().split('\t')
+    missing_header = [h for h in EXPECTED_HEADER if h not in header]
+    if len(missing_header):
+        raise ValueError('The following required columns are missing: {}'.format(', '.join(missing_header)))
+
+    records = []
+    for i, line in enumerate(lines):
+            row = line.rstrip().split('\t')
+            record = {header[cnt]: col for cnt, col in enumerate(row)}
+            record = _get_phenotype_pri(record, i, ignore_extra_samples)
+            if record:
+                records.append(record)
+    return records
+
+
 # Hop-by-hop HTTP response headers shouldn't be forwarded.
 # More info at: http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.5.1
 EXCLUDE_HTTP_RESPONSE_HEADERS = {
diff --git a/ui/pages/DataManagement/DataManagement.jsx b/ui/pages/DataManagement/DataManagement.jsx
index 7729149c68..ab25d5e7a3 100644
--- a/ui/pages/DataManagement/DataManagement.jsx
+++ b/ui/pages/DataManagement/DataManagement.jsx
@@ -10,6 +10,7 @@ import ElasticsearchStatus from './components/ElasticsearchStatus'
 import RnaSeq from './components/RnaSeq'
 import SampleQc from './components/SampleQc'
 import Users from './components/Users'
+import PhenotypePri from './components/PhenotypePri'
 
 const IFRAME_STYLE = { position: 'fixed', left: '0', top: '95px' }
 
@@ -22,6 +23,7 @@ export const DATA_MANAGEMENT_PAGES = [
   { path: 'sample_qc', component: SampleQc },
   { path: 'rna_seq', component: RnaSeq },
   { path: 'users', component: Users },
+  { path: 'lirical_exomiser', component: PhenotypePri },
 ]
 
 const DataManagement = ({ match, user }) => (
diff --git a/ui/pages/DataManagement/components/PhenotypePri.jsx b/ui/pages/DataManagement/components/PhenotypePri.jsx
new file mode 100644
index 0000000000..de64606ba3
--- /dev/null
+++ b/ui/pages/DataManagement/components/PhenotypePri.jsx
@@ -0,0 +1,31 @@
+import { connect } from 'react-redux'
+
+import { validators } from 'shared/components/form/FormHelpers'
+import { BooleanCheckbox } from 'shared/components/form/Inputs'
+import UploadFormPage from 'shared/components/page/UploadFormPage'
+
+import { getPhenoPriUploadStats } from '../selectors'
+import { uploadPhenoPri } from '../reducers'
+
+const mapStateToProps = state => ({
+  fields: [
+    {
+      name: 'file',
+      label: 'Phenotype-based prioritization data (.tsv)',
+      placeholder: 'gs:// Google bucket path',
+      validate: validators.required,
+    },
+    {
+      name: 'ignoreExtraSamples',
+      component: BooleanCheckbox,
+      label: 'Ignore extra samples',
+    },
+  ],
+  uploadStats: getPhenoPriUploadStats(state),
+})
+
+const mapDispatchToProps = {
+  onSubmit: uploadPhenoPri,
+}
+
+export default connect(mapStateToProps, mapDispatchToProps)(UploadFormPage)
diff --git a/ui/pages/DataManagement/reducers.js b/ui/pages/DataManagement/reducers.js
index 1c20feb3d9..080b56b338 100644
--- a/ui/pages/DataManagement/reducers.js
+++ b/ui/pages/DataManagement/reducers.js
@@ -8,6 +8,7 @@ const REQUEST_ELASTICSEARCH_STATUS = 'REQUEST_ELASTICSEARCH_STATUS'
 const RECEIVE_ELASTICSEARCH_STATUS = 'RECEIVE_ELASTICSEARCH_STATUS'
 const RECEIVE_PIPELINE_UPLOAD_STATS = 'RECEIVE_PIPELINE_UPLOAD_STATS'
 const RECEIVE_RNA_SEQ_UPLOAD_STATS = 'RECEIVE_RNA_SEQ_UPLOAD_STATS'
+const RECEIVE_PHENO_PRI_UPLOAD_STATS = 'RECEIVE_PHENO_PRI_UPLOAD_STATS'
 const REQUEST_ALL_USERS = 'REQUEST_ALL_USERS'
 const RECEIVE_ALL_USERS = 'RECEIVE_ALL_USERS'
 
@@ -75,11 +76,27 @@ export const uploadRnaSeq = values => (dispatch) => {
   })
 }
 
+export const uploadPhenoPri = values => (dispatch) => {
+  let successResponseJson = null
+  return new HttpRequestHelper(
+    '/api/data_management/load_phenotype_pri_data',
+    (responseJson) => {
+      successResponseJson = responseJson
+    },
+    (e) => {
+      successResponseJson = { warnings: [e.message] }
+    },
+  ).post(values).then(() => {
+    dispatch({ type: RECEIVE_PHENO_PRI_UPLOAD_STATS, newValue: successResponseJson })
+  })
+}
+
 export const reducers = {
   elasticsearchStatusLoading: loadingReducer(REQUEST_ELASTICSEARCH_STATUS, RECEIVE_ELASTICSEARCH_STATUS),
   elasticsearchStatus: createSingleObjectReducer(RECEIVE_ELASTICSEARCH_STATUS),
   qcUploadStats: createSingleValueReducer(RECEIVE_PIPELINE_UPLOAD_STATS, {}),
   rnaSeqUploadStats: createSingleValueReducer(RECEIVE_RNA_SEQ_UPLOAD_STATS, {}),
+  phenoPriUploadStats: createSingleValueReducer(RECEIVE_PHENO_PRI_UPLOAD_STATS, {}),
   allUsers: createSingleValueReducer(RECEIVE_ALL_USERS, [], 'users'),
   allUsersLoading: loadingReducer(REQUEST_ALL_USERS, RECEIVE_ALL_USERS),
 }
diff --git a/ui/pages/DataManagement/selectors.js b/ui/pages/DataManagement/selectors.js
index 6d59629130..48ffc39d45 100644
--- a/ui/pages/DataManagement/selectors.js
+++ b/ui/pages/DataManagement/selectors.js
@@ -4,3 +4,4 @@ export const getQcUploadStats = state => state.qcUploadStats
 export const getRnaSeqUploadStats = state => state.rnaSeqUploadStats
 export const getAllUsersLoading = state => state.allUsersLoading.isLoading
 export const getAllUsers = state => state.allUsers
+export const getPhenoPriUploadStats = state => state.phenoPriUploadStats

From 9c22bf34e8ed30984dd6f9d4a1f90b64d6cca5a1 Mon Sep 17 00:00:00 2001
From: Shifa Zhang <zhangshifa07504@gmail.com>
Date: Wed, 28 Sep 2022 15:24:29 -0400
Subject: [PATCH 02/96] Update phenotype pri model.

---
 .../0048_phenotypeprioritization.py           |  6 ++--
 seqr/models.py                                | 28 +++++++------------
 seqr/views/apis/data_manager_api.py           | 21 +++++++-------
 3 files changed, 23 insertions(+), 32 deletions(-)

diff --git a/seqr/migrations/0048_phenotypeprioritization.py b/seqr/migrations/0048_phenotypeprioritization.py
index bb24d75db8..7b08e44af9 100644
--- a/seqr/migrations/0048_phenotypeprioritization.py
+++ b/seqr/migrations/0048_phenotypeprioritization.py
@@ -1,4 +1,4 @@
-# Generated by Django 3.2.15 on 2022-09-27 15:01
+# Generated by Django 3.2.15 on 2022-09-28 15:26
 
 from django.db import migrations, models
 import django.db.models.deletion
@@ -19,9 +19,7 @@ class Migration(migrations.Migration):
                 ('tool', models.CharField(choices=[('E', 'exomiser'), ('L', 'lirical')], max_length=1)),
                 ('rank', models.IntegerField()),
                 ('disease_id', models.CharField(max_length=32)),
-                ('score1', models.FloatField(null=True)),
-                ('score2', models.FloatField(null=True)),
-                ('score3', models.FloatField(null=True)),
+                ('scores', models.JSONField()),
                 ('sample', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='seqr.sample')),
             ],
             options={
diff --git a/seqr/models.py b/seqr/models.py
index a7ce22508e..d15119c78f 100644
--- a/seqr/models.py
+++ b/seqr/models.py
@@ -1056,38 +1056,30 @@ class Meta:
 
 
 class PhenotypePrioritization(DeletableSampleMetadataModel):
-    EXOMISER = 'exomiser'
-    LIRICAL = 'lirical'
     EXOMISER_CHOICE = 'E'
     LIRICAL_CHOICE = 'L'
-    SCORE_NAME1 = 'scoreName1'
-    SCORE_NAME2 = 'scoreName2'
-    SCORE_NAME3 = 'scoreName3'
     TOOL_CHOICES = (
-        (EXOMISER_CHOICE, EXOMISER),
-        (LIRICAL_CHOICE, LIRICAL)
+        (EXOMISER_CHOICE, 'exomiser'),
+        (LIRICAL_CHOICE, 'lirical')
     )
-    SCORE_NAMES = {
+    SCORE_FIELDS = {
         EXOMISER_CHOICE: {
-            SCORE_NAME1: 'exomiser_score',
-            SCORE_NAME2: 'phenotype_score',
-            SCORE_NAME3: 'variant_score',
+            'exomiser_score': 'e',
+            'phenotype_score': 'p',
+            'variant_score': 'v',
         },
         LIRICAL_CHOICE: {
-            SCORE_NAME1: 'post_test_probability',
-            SCORE_NAME2: 'compositeLR',
-            SCORE_NAME3: None,
+            'post_test_probability': 'p',
+            'compositeLR': 'c',
         }
     }
 
     tool = models.CharField(max_length=1, choices=TOOL_CHOICES)
     rank = models.IntegerField()
     disease_id = models.CharField(max_length=32)
-    score1 = models.FloatField(null=True)
-    score2 = models.FloatField(null=True)
-    score3 = models.FloatField(null=True)
+    scores = models.JSONField()
 
     class Meta:
         unique_together = ('sample', 'gene_id', 'disease_id')
 
-        json_fields = ['gene_id', 'tool', 'rank', 'disease_id', 'score1', 'score2', 'score3']
+        json_fields = ['gene_id', 'tool', 'rank', 'disease_id', 'scores']
diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py
index 7fe2ff938a..5e354190c7 100644
--- a/seqr/views/apis/data_manager_api.py
+++ b/seqr/views/apis/data_manager_api.py
@@ -420,11 +420,8 @@ def load_phenotype_pri_data(request):
     })
 
 
-
 EXPECTED_HEADER = ['tool', 'project', 'sampleId', 'rank', 'geneId', 'diseaseId', 'diseaseName',
-                   PhenotypePrioritization.SCORE_NAME1, 'score1',
-                   PhenotypePrioritization.SCORE_NAME2, 'score2',
-                   PhenotypePrioritization.SCORE_NAME3, 'score3']
+                   'scoreName1', 'score1', 'scoreName2', 'score2', 'scoreName3', 'score3']
 
 
 def _get_phenotype_pri(record, i, ignore_extra_samples):
@@ -455,9 +452,15 @@ def _get_phenotype_pri(record, i, ignore_extra_samples):
     else:
         raise ValueError(f'Unknown disease ID {disease_id} (record {i})')
 
-    for score_name, value in PhenotypePrioritization.SCORE_NAMES[tool].items():
-        if record.get(score_name) != value:
-            raise ValueError(f'Expecting {value} for {score_name} but {record[score_name]} found (record {i})')
+    scores = {}
+    for score in ['1', '2', '3']:
+        scoreName = record.get('scoreName' + score)
+        if scoreName:
+            score_field = PhenotypePrioritization.SCORE_FIELDS[tool][scoreName]
+            if not score_field:
+                raise ValueError(f'Unexpected score name {scoreName} (record {i})')
+            score = record.get('score' + score)
+            scores[score_field] = float(score)
 
     return {
         'sample': samples[0],
@@ -465,9 +468,7 @@ def _get_phenotype_pri(record, i, ignore_extra_samples):
         'tool': tool,
         'rank': int(record['rank']),
         'disease_id': disease_id,
-        'score1': float(record['score1']),
-        'score2': float(record['score2']) if PhenotypePrioritization.SCORE_NAMES[tool][PhenotypePrioritization.SCORE_NAME2] else None,
-        'score3': float(record['score3']) if PhenotypePrioritization.SCORE_NAMES[tool][PhenotypePrioritization.SCORE_NAME3] else None,
+        'scores': scores,
     }
 
 

From ce7ed898daee52f968c9e921c98d62b03960adfd Mon Sep 17 00:00:00 2001
From: Shifa Zhang <zhangshifa07504@gmail.com>
Date: Thu, 29 Sep 2022 15:42:50 -0400
Subject: [PATCH 03/96] Add displaying LIRICAL.

---
 seqr/views/utils/variant_utils.py             |  12 +-
 ui/redux/rootReducer.js                       |   1 +
 ui/redux/selectors.js                         |   1 +
 .../components/panel/variants/VariantGene.jsx | 117 ++++++++++++------
 .../components/panel/variants/selectors.js    |  18 ++-
 5 files changed, 107 insertions(+), 42 deletions(-)

diff --git a/seqr/views/utils/variant_utils.py b/seqr/views/utils/variant_utils.py
index 8d3eecf37c..7bccb0e628 100644
--- a/seqr/views/utils/variant_utils.py
+++ b/seqr/views/utils/variant_utils.py
@@ -128,6 +128,12 @@ def _get_rna_seq_outliers(gene_ids, families):
     return data_by_individual_gene
 
 
+def _get_phenotype_pri_data(gene_ids, families):
+    data_by_individual_gene = defaultdict(lambda: {'outliers': {}})
+
+    return data_by_individual_gene
+
+
 def _add_family_has_rna_tpm(families_by_guid):
     tpm_families = RnaSeqTpm.objects.filter(
         sample__individual__family__guid__in=families_by_guid.keys()
@@ -159,7 +165,8 @@ def _add_pa_detail(locus_list_gene, locus_list_guid, gene_json):
 LOAD_FAMILY_CONTEXT_PARAM = 'loadFamilyContext'
 
 def get_variants_response(request, saved_variants, response_variants=None, add_all_context=False, include_igv=True,
-                          add_locus_list_detail=False, include_rna_seq=True, include_project_name=False):
+                          add_locus_list_detail=False, include_rna_seq=True, include_project_name=False,
+                          include_phe_pri=True):
     response = get_json_for_saved_variants_with_tags(saved_variants, add_details=True)
 
     variants = list(response['savedVariantsByGuid'].values()) if response_variants is None else response_variants
@@ -204,4 +211,7 @@ def get_variants_response(request, saved_variants, response_variants=None, add_a
         if families_by_guid:
             _add_family_has_rna_tpm(families_by_guid)
 
+    if include_phe_pri:
+        response['phePriData'] = _get_phenotype_pri_data(genes.keys(), families)
+
     return response
diff --git a/ui/redux/rootReducer.js b/ui/redux/rootReducer.js
index 979eb441af..792fd96faa 100644
--- a/ui/redux/rootReducer.js
+++ b/ui/redux/rootReducer.js
@@ -319,6 +319,7 @@ const rootReducer = combineReducers({
   mmeResultsByGuid: createObjectsByIdReducer(RECEIVE_DATA, 'mmeResultsByGuid'),
   genesById: createObjectsByIdReducer(RECEIVE_DATA, 'genesById'),
   rnaSeqDataByIndividual: createObjectsByIdReducer(RECEIVE_DATA, 'rnaSeqData'),
+  phePriDataByIndividual: createObjectsByIdReducer(RECEIVE_DATA, 'phePriData'),
   genesLoading: loadingReducer(REQUEST_GENES, RECEIVE_DATA),
   hpoTermsByParent: createObjectsByIdReducer(RECEIVE_HPO_TERMS),
   hpoTermsLoading: loadingReducer(REQUEST_HPO_TERMS, RECEIVE_HPO_TERMS),
diff --git a/ui/redux/selectors.js b/ui/redux/selectors.js
index 3067761236..0e143793a8 100644
--- a/ui/redux/selectors.js
+++ b/ui/redux/selectors.js
@@ -30,6 +30,7 @@ export const getLocusListsByGuid = state => state.locusListsByGuid
 export const getLocusListsIsLoading = state => state.locusListsLoading.isLoading
 export const getLocusListIsLoading = state => state.locusListLoading.isLoading
 export const getRnaSeqDataByIndividual = state => state.rnaSeqDataByIndividual
+export const getPhePriDataByIndividual = state => state.phePriDataByIndividual
 export const getUser = state => state.user
 export const getUserOptionsByUsername = state => state.userOptionsByUsername
 export const getUserOptionsIsLoading = state => state.userOptionsLoading.isLoading
diff --git a/ui/shared/components/panel/variants/VariantGene.jsx b/ui/shared/components/panel/variants/VariantGene.jsx
index 298a79402f..80e1b1c26c 100644
--- a/ui/shared/components/panel/variants/VariantGene.jsx
+++ b/ui/shared/components/panel/variants/VariantGene.jsx
@@ -18,7 +18,7 @@ import { GeneSearchLink } from '../../buttons/SearchResultsLink'
 import ShowGeneModal from '../../buttons/ShowGeneModal'
 import Modal from '../../modal/Modal'
 import { GenCC, ClingenLabel } from '../genes/GeneDetail'
-import { getRnaSeqOutilerDataByFamilyGene } from './selectors'
+import { getRnaSeqOutilerDataByFamilyGene, getPhePriDataByFamilyGene } from './selectors'
 
 const RnaSeqTpm = React.lazy(() => import('./RnaSeqTpm'))
 
@@ -314,7 +314,42 @@ const GENE_DISEASE_DETAIL_SECTIONS = [
   },
 ]
 
-const RNA_SEQ_DETAIL_FIELDS = ['zScore', 'pValue', 'pAdjust']
+const LIRICAL = 'L'
+const EXOMISER = 'E'
+
+const SAMPLE_GENE_DETAIL_FIELDS = {
+  rnaSeqData: { infos: [], scores: ['zScore', 'pValue', 'pAdjust'] },
+  liricalData: { infos: ['rank', 'diseases'], scores: ['postTestProbability', 'LR'] },
+  exomiserData: { infos: ['rank', 'diseases'], scores: ['exomiserScore', 'phenotypeScore', 'variantScore'] },
+}
+
+const sampleGeneDetailsDisplay = (geneId, sampleGeneData, dataType) => (
+  <div>
+    <Table basic="very" compact="very">
+      <Table.Header>
+        <Table.Row>
+          <Table.HeaderCell />
+          {Object.values(SAMPLE_GENE_DETAIL_FIELDS[dataType]).flat().map(
+            field => <Table.HeaderCell key={field}>{camelcaseToTitlecase(field).replace(' ', '-')}</Table.HeaderCell>,
+          )}
+        </Table.Row>
+      </Table.Header>
+      <Table.Body>
+        {Object.entries(sampleGeneData[geneId]).map(([individual, data]) => (
+          <Table.Row key={individual}>
+            <Table.HeaderCell>{individual}</Table.HeaderCell>
+            {SAMPLE_GENE_DETAIL_FIELDS[dataType].infos.map(
+              field => <Table.Cell key={field}>{data[field]}</Table.Cell>,
+            )}
+            {SAMPLE_GENE_DETAIL_FIELDS[dataType].scores.map(
+              field => <Table.Cell key={field}>{data[field].toPrecision(3)}</Table.Cell>,
+            )}
+          </Table.Row>
+        ))}
+      </Table.Body>
+    </Table>
+  </div>
+)
 
 const GENE_DETAIL_SECTIONS = [
   {
@@ -370,31 +405,27 @@ const GENE_DETAIL_SECTIONS = [
     color: 'pink',
     description: 'RNA-Seq Outlier',
     label: 'RNA-Seq',
-    showDetails: (gene, rnaSeqData) => rnaSeqData && rnaSeqData[gene.geneId],
-    detailsDisplay: (gene, rnaSeqData) => (
-      <div>
-        This gene is flagged as an outlier for RNA-Seq in the following samples
-        <Table basic="very" compact="very">
-          <Table.Header>
-            <Table.Row>
-              <Table.HeaderCell />
-              {RNA_SEQ_DETAIL_FIELDS.map(
-                field => <Table.HeaderCell key={field}>{camelcaseToTitlecase(field).replace(' ', '-')}</Table.HeaderCell>,
-              )}
-            </Table.Row>
-          </Table.Header>
-          <Table.Body>
-            {Object.entries(rnaSeqData[gene.geneId]).map(([individual, data]) => (
-              <Table.Row key={individual}>
-                <Table.HeaderCell>{individual}</Table.HeaderCell>
-                {RNA_SEQ_DETAIL_FIELDS.map(
-                  field => <Table.Cell key={field}>{data[field].toPrecision(3)}</Table.Cell>,
-                )}
-              </Table.Row>
-            ))}
-          </Table.Body>
-        </Table>
-      </div>
+    showDetails: (gene, { rnaSeqData }) => rnaSeqData && rnaSeqData[gene.geneId],
+    detailsDisplay: (gene, { rnaSeqData }) => (
+      sampleGeneDetailsDisplay(gene.geneId, rnaSeqData, 'rnaSeqData')
+    ),
+  },
+  {
+    color: 'orange',
+    description: 'LIRICAL Phenotype Prioritization',
+    label: 'LIRICAL',
+    showDetails: (gene, { phePriData }) => phePriData && phePriData[LIRICAL][gene.geneId],
+    detailsDisplay: (gene, { phePriData }) => (
+      sampleGeneDetailsDisplay(gene.geneId, phePriData, 'lirical')
+    ),
+  },
+  {
+    color: 'orange',
+    description: 'Exomiser Phenotype Prioritization',
+    label: 'Exomiser',
+    showDetails: (gene, { phePriData }) => phePriData && phePriData[EXOMISER][gene.geneId],
+    detailsDisplay: (gene, { phePriData }) => (
+      sampleGeneDetailsDisplay(gene.geneId, phePriData, 'exomiser')
     ),
   },
 ]
@@ -421,9 +452,12 @@ const OmimSegments = styled(Segment.Group).attrs({ size: 'tiny', horizontal: tru
   }
 `
 
-const getDetailSections = (configs, gene, compact, labelProps, rnaSeqData) => configs.map(
+const getDetailSections = (configs, gene, compact, labelProps, sampleGeneData) => configs.map(
   ({ showDetails, detailsDisplay, ...sectionConfig }) => (
-    { ...sectionConfig, detail: showDetails(gene, rnaSeqData) && detailsDisplay(gene, rnaSeqData) }),
+    {
+      ...sectionConfig,
+      detail: showDetails(gene, sampleGeneData) && detailsDisplay(gene, sampleGeneData),
+    }),
 ).filter(({ detail }) => detail).map(({ detail, expandedDisplay, ...sectionConfig }) => (
   (expandedDisplay && !compact) ? (
     <OmimSegments key={sectionConfig.label}>
@@ -446,9 +480,9 @@ const getDetailSections = (configs, gene, compact, labelProps, rnaSeqData) => co
 ))
 
 export const GeneDetails = React.memo((
-  { gene, compact, showLocusLists, showInlineDetails, rnaSeqData, ...labelProps },
+  { gene, compact, showLocusLists, showInlineDetails, sampleGeneData, ...labelProps },
 ) => {
-  const geneDetails = getDetailSections(GENE_DETAIL_SECTIONS, gene, compact, labelProps, rnaSeqData)
+  const geneDetails = getDetailSections(GENE_DETAIL_SECTIONS, gene, compact, labelProps, sampleGeneData)
   const geneDiseaseDetails = getDetailSections(GENE_DISEASE_DETAIL_SECTIONS, gene, compact, labelProps)
   const hasLocusLists = showLocusLists && gene.locusListGuids.length > 0
   const showDivider = !showInlineDetails && geneDetails.length > 0 && (hasLocusLists || geneDiseaseDetails.length > 0)
@@ -477,7 +511,7 @@ GeneDetails.propTypes = {
   compact: PropTypes.bool,
   showLocusLists: PropTypes.bool,
   showInlineDetails: PropTypes.bool,
-  rnaSeqData: PropTypes.object,
+  sampleGeneData: PropTypes.object,
 }
 
 const GeneSearchLinkWithPopup = props => (
@@ -497,7 +531,7 @@ const getGeneConsequence = (geneId, variant) => {
 }
 
 const BaseVariantGene = React.memo((
-  { geneId, gene, variant, compact, showInlineDetails, compoundHetToggle, hasRnaTpmData, rnaSeqData },
+  { geneId, gene, variant, compact, showInlineDetails, compoundHetToggle, hasRnaTpmData, sampleGeneData },
 ) => {
   const geneConsequence = getGeneConsequence(geneId, variant)
 
@@ -514,7 +548,7 @@ const BaseVariantGene = React.memo((
       showInlineDetails={showInlineDetails}
       margin={showInlineDetails ? '1em .5em 0px 0px' : null}
       horizontal={showInlineDetails}
-      rnaSeqData={rnaSeqData}
+      sampleGeneData={sampleGeneData}
       showLocusLists
     />
   )
@@ -592,12 +626,15 @@ BaseVariantGene.propTypes = {
   showInlineDetails: PropTypes.bool,
   compoundHetToggle: PropTypes.func,
   hasRnaTpmData: PropTypes.bool,
-  rnaSeqData: PropTypes.object,
+  sampleGeneData: PropTypes.object,
 }
 
 const getRnaSeqProps = (state, ownProps) => ({
   hasRnaTpmData: getFamiliesByGuid(state)[ownProps.variant.familyGuids[0]]?.hasRnaTpmData,
-  rnaSeqData: getRnaSeqOutilerDataByFamilyGene(state)[ownProps.variant.familyGuids[0]],
+  sampleGeneData: {
+    rnaSeqData: getRnaSeqOutilerDataByFamilyGene(state)[ownProps.variant.familyGuids[0]],
+    phePriData: getPhePriDataByFamilyGene(state)[ownProps.variant.familyGuids[0]],
+  },
 })
 
 const mapStateToProps = (state, ownProps) => ({
@@ -613,7 +650,7 @@ class VariantGenes extends React.PureComponent {
     variant: PropTypes.object.isRequired,
     mainGeneId: PropTypes.string,
     genesById: PropTypes.object.isRequired,
-    rnaSeqData: PropTypes.object,
+    sampleGeneData: PropTypes.object,
     hasRnaTpmData: PropTypes.bool,
     showMainGene: PropTypes.bool,
   }
@@ -629,7 +666,7 @@ class VariantGenes extends React.PureComponent {
   }
 
   render() {
-    const { variant, genesById, mainGeneId, showMainGene, rnaSeqData, hasRnaTpmData } = this.props
+    const { variant, genesById, mainGeneId, showMainGene, sampleGeneData, hasRnaTpmData } = this.props
     const { showAll } = this.state
     const geneIds = Object.keys(variant.transcripts || {})
     const genes = geneIds.map(geneId => genesById[geneId]).filter(gene => gene)
@@ -648,7 +685,7 @@ class VariantGenes extends React.PureComponent {
               geneId={gene.geneId}
               gene={gene}
               variant={variant}
-              rnaSeqData={rnaSeqData}
+              sampleGeneData={sampleGeneData}
               hasRnaTpmData={hasRnaTpmData}
               showInlineDetails={!mainGeneId}
               compact
@@ -679,7 +716,7 @@ class VariantGenes extends React.PureComponent {
                     details={sectionGenes.length > 0 && sectionGenes.map(gene => (
                       <div key={gene.geneId}>
                         <Header size="small" content={gene.geneSymbol} />
-                        {detailsDisplay(gene, rnaSeqData)}
+                        {detailsDisplay(gene, sampleGeneData)}
                         <VerticalSpacer height={5} />
                       </div>
                     ))}
diff --git a/ui/shared/components/panel/variants/selectors.js b/ui/shared/components/panel/variants/selectors.js
index 2d5e90c8b4..b1871f60d6 100644
--- a/ui/shared/components/panel/variants/selectors.js
+++ b/ui/shared/components/panel/variants/selectors.js
@@ -14,7 +14,7 @@ import {
 } from 'shared/utils/constants'
 import {
   getVariantTagsByGuid, getVariantNotesByGuid, getSavedVariantsByGuid, getAnalysisGroupsByGuid, getGenesById, getUser,
-  getFamiliesByGuid, getProjectsByGuid, getIndividualsByGuid, getRnaSeqDataByIndividual,
+  getFamiliesByGuid, getProjectsByGuid, getIndividualsByGuid, getRnaSeqDataByIndividual, getPhePriDataByIndividual,
 } from 'redux/selectors'
 
 export const getRnaSeqOutilerDataByFamilyGene = createSelector(
@@ -33,6 +33,22 @@ export const getRnaSeqOutilerDataByFamilyGene = createSelector(
   ),
 )
 
+export const getPhePriDataByFamilyGene = createSelector(
+  getIndividualsByGuid,
+  getPhePriDataByIndividual,
+  (individualsByGuid, phePriDataByIndividual) => Object.entries(phePriDataByIndividual).reduce(
+    (acc, [individualGuid, rnaSeqData]) => {
+      const { familyGuid, displayName } = individualsByGuid[individualGuid]
+      acc[familyGuid] = Object.entries(rnaSeqData.outliers || {}).reduce(
+        (acc2, [geneId, data]) => (data.isSignificant ?
+          { ...acc2, [geneId]: { ...(acc2[geneId] || {}), [displayName]: data } } : acc2
+        ), acc[familyGuid] || {},
+      )
+      return acc
+    }, {},
+  ),
+)
+
 // Saved variant selectors
 export const getSavedVariantTableState = state => (
   state.currentProjectGuid ? state.savedVariantTableState : state.allProjectSavedVariantTableState

From 81b9ca4baf1184b580b28047eb806aa8a97b06b1 Mon Sep 17 00:00:00 2001
From: Shifa Zhang <zhangshifa07504@gmail.com>
Date: Thu, 29 Sep 2022 15:55:48 -0400
Subject: [PATCH 04/96] Change the model to save all original input data.

---
 .../0048_phenotypeprioritization.py           |  3 ++-
 seqr/models.py                                | 12 +-----------
 seqr/views/apis/data_manager_api.py           | 19 +++++--------------
 3 files changed, 8 insertions(+), 26 deletions(-)

diff --git a/seqr/migrations/0048_phenotypeprioritization.py b/seqr/migrations/0048_phenotypeprioritization.py
index 7b08e44af9..ae3964cd67 100644
--- a/seqr/migrations/0048_phenotypeprioritization.py
+++ b/seqr/migrations/0048_phenotypeprioritization.py
@@ -1,4 +1,4 @@
-# Generated by Django 3.2.15 on 2022-09-28 15:26
+# Generated by Django 3.2.15 on 2022-09-29 19:51
 
 from django.db import migrations, models
 import django.db.models.deletion
@@ -19,6 +19,7 @@ class Migration(migrations.Migration):
                 ('tool', models.CharField(choices=[('E', 'exomiser'), ('L', 'lirical')], max_length=1)),
                 ('rank', models.IntegerField()),
                 ('disease_id', models.CharField(max_length=32)),
+                ('disease_name', models.TextField()),
                 ('scores', models.JSONField()),
                 ('sample', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='seqr.sample')),
             ],
diff --git a/seqr/models.py b/seqr/models.py
index d15119c78f..1fc05e352e 100644
--- a/seqr/models.py
+++ b/seqr/models.py
@@ -1062,21 +1062,11 @@ class PhenotypePrioritization(DeletableSampleMetadataModel):
         (EXOMISER_CHOICE, 'exomiser'),
         (LIRICAL_CHOICE, 'lirical')
     )
-    SCORE_FIELDS = {
-        EXOMISER_CHOICE: {
-            'exomiser_score': 'e',
-            'phenotype_score': 'p',
-            'variant_score': 'v',
-        },
-        LIRICAL_CHOICE: {
-            'post_test_probability': 'p',
-            'compositeLR': 'c',
-        }
-    }
 
     tool = models.CharField(max_length=1, choices=TOOL_CHOICES)
     rank = models.IntegerField()
     disease_id = models.CharField(max_length=32)
+    disease_name = models.TextField()
     scores = models.JSONField()
 
     class Meta:
diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py
index 5e354190c7..cf5492b5b4 100644
--- a/seqr/views/apis/data_manager_api.py
+++ b/seqr/views/apis/data_manager_api.py
@@ -25,8 +25,6 @@
 
 from seqr.models import Sample, Individual, RnaSeqOutlier, RnaSeqTpm, PhenotypePrioritization, Project
 
-from reference_data.models import Omim
-
 from settings import KIBANA_SERVER, KIBANA_ELASTICSEARCH_PASSWORD
 
 logger = SeqrLogger(__name__)
@@ -445,22 +443,14 @@ def _get_phenotype_pri(record, i, ignore_extra_samples):
         raise ValueError(f'Sample with ID {sample_id} is not found (record {i})')
 
     disease_id = record['diseaseId']
-    if disease_id.startswith('OMIM:'):
-        omim_recs = Omim.objects.filter(phenotype_mim_number=int(disease_id.replace('OMIM:', '')))
-        if len(omim_recs) < 1:
-            raise ValueError(f'Disease ID {disease_id} can\'t be found in Omim (record {i})')
-    else:
-        raise ValueError(f'Unknown disease ID {disease_id} (record {i})')
+    disease_name = record['diseaseName']
 
     scores = {}
     for score in ['1', '2', '3']:
-        scoreName = record.get('scoreName' + score)
-        if scoreName:
-            score_field = PhenotypePrioritization.SCORE_FIELDS[tool][scoreName]
-            if not score_field:
-                raise ValueError(f'Unexpected score name {scoreName} (record {i})')
+        score_name = record.get('scoreName' + score)
+        if score_name:
             score = record.get('score' + score)
-            scores[score_field] = float(score)
+            scores[score_name] = float(score)
 
     return {
         'sample': samples[0],
@@ -468,6 +458,7 @@ def _get_phenotype_pri(record, i, ignore_extra_samples):
         'tool': tool,
         'rank': int(record['rank']),
         'disease_id': disease_id,
+        'disease_name': disease_name,
         'scores': scores,
     }
 

From 595dbce1da73f89e7151b5b762f92cc1ea926073 Mon Sep 17 00:00:00 2001
From: Shifa Zhang <zhangshifa07504@gmail.com>
Date: Fri, 30 Sep 2022 11:54:54 -0400
Subject: [PATCH 05/96] Add phenotype prioritization data fetching.

---
 seqr/views/utils/orm_to_json_utils.py            |  7 +++++++
 seqr/views/utils/variant_utils.py                | 14 +++++++++++---
 ui/shared/components/panel/variants/selectors.js |  4 ++--
 3 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/seqr/views/utils/orm_to_json_utils.py b/seqr/views/utils/orm_to_json_utils.py
index 86b5f6e2aa..ee29272a7b 100644
--- a/seqr/views/utils/orm_to_json_utils.py
+++ b/seqr/views/utils/orm_to_json_utils.py
@@ -866,3 +866,10 @@ def _process_result(data, model):
         data['isSignificant'] = data['pAdjust'] < model.SIGNIFICANCE_THRESHOLD
 
     return _get_json_for_models(models, process_result=_process_result, **kwargs)
+
+
+def get_json_for_phenotype_pri(models, **kwargs):
+    def _process_result(data):
+        data['scores'] = {_to_camel_case(score) for score, value in data['scores'].items()}
+
+    return _get_json_for_models(models, process_result=_process_result, **kwargs)
diff --git a/seqr/views/utils/variant_utils.py b/seqr/views/utils/variant_utils.py
index 7bccb0e628..ac7cbc8c4c 100644
--- a/seqr/views/utils/variant_utils.py
+++ b/seqr/views/utils/variant_utils.py
@@ -3,12 +3,12 @@
 import redis
 
 from seqr.models import SavedVariant, VariantSearchResults, Family, LocusList, LocusListInterval, LocusListGene, \
-    RnaSeqOutlier, RnaSeqTpm
+    RnaSeqOutlier, RnaSeqTpm, PhenotypePrioritization
 from seqr.utils.elasticsearch.utils import get_es_variants_for_variant_ids
 from seqr.utils.gene_utils import get_genes_for_variants
 from seqr.views.utils.json_to_orm_utils import update_model_from_json
 from seqr.views.utils.orm_to_json_utils import get_json_for_discovery_tags, get_json_for_locus_lists, \
-    _get_json_for_models, get_json_for_rna_seq_outliers, get_json_for_saved_variants_with_tags
+    _get_json_for_models, get_json_for_rna_seq_outliers, get_json_for_saved_variants_with_tags, get_json_for_phenotype_pri
 from seqr.views.utils.permissions_utils import has_case_review_permissions, user_is_analyst
 from seqr.views.utils.project_context_utils import add_project_tag_types, add_families_context
 from settings import REDIS_SERVICE_HOSTNAME, REDIS_SERVICE_PORT
@@ -129,7 +129,15 @@ def _get_rna_seq_outliers(gene_ids, families):
 
 
 def _get_phenotype_pri_data(gene_ids, families):
-    data_by_individual_gene = defaultdict(lambda: {'outliers': {}})
+    data_by_individual_gene = defaultdict(lambda: {'phepri': {}})
+
+    phe_pri_data = get_json_for_phenotype_pri(
+        PhenotypePrioritization.objects.filter(gene_id__in=gene_ids, sample__individual__family__in=families),
+        nested_fields=[{'fields': ('sample', 'individual', 'guid'), 'key': 'individualGuid'}],
+    )
+
+    for data in phe_pri_data:
+        data_by_individual_gene[data.pop('individualGuid')]['phepri'][data['geneId']] = data
 
     return data_by_individual_gene
 
diff --git a/ui/shared/components/panel/variants/selectors.js b/ui/shared/components/panel/variants/selectors.js
index b1871f60d6..bc9af2f7b0 100644
--- a/ui/shared/components/panel/variants/selectors.js
+++ b/ui/shared/components/panel/variants/selectors.js
@@ -37,9 +37,9 @@ export const getPhePriDataByFamilyGene = createSelector(
   getIndividualsByGuid,
   getPhePriDataByIndividual,
   (individualsByGuid, phePriDataByIndividual) => Object.entries(phePriDataByIndividual).reduce(
-    (acc, [individualGuid, rnaSeqData]) => {
+    (acc, [individualGuid, phePriData]) => {
       const { familyGuid, displayName } = individualsByGuid[individualGuid]
-      acc[familyGuid] = Object.entries(rnaSeqData.outliers || {}).reduce(
+      acc[familyGuid] = Object.entries(phePriData.outliers || {}).reduce(
         (acc2, [geneId, data]) => (data.isSignificant ?
           { ...acc2, [geneId]: { ...(acc2[geneId] || {}), [displayName]: data } } : acc2
         ), acc[familyGuid] || {},

From bb32278fae3a66e731075924c860804c106dc51b Mon Sep 17 00:00:00 2001
From: Shifa Zhang <zhangshifa07504@gmail.com>
Date: Tue, 4 Oct 2022 15:46:36 -0400
Subject: [PATCH 06/96] Update data loading.

---
 .../0048_phenotypeprioritization.py           |  30 -----
 seqr/models.py                                |   6 +-
 seqr/views/apis/data_manager_api.py           |  67 +----------
 seqr/views/utils/dataset_utils.py             | 106 +++++++++++++++++-
 4 files changed, 110 insertions(+), 99 deletions(-)
 delete mode 100644 seqr/migrations/0048_phenotypeprioritization.py

diff --git a/seqr/migrations/0048_phenotypeprioritization.py b/seqr/migrations/0048_phenotypeprioritization.py
deleted file mode 100644
index ae3964cd67..0000000000
--- a/seqr/migrations/0048_phenotypeprioritization.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# Generated by Django 3.2.15 on 2022-09-29 19:51
-
-from django.db import migrations, models
-import django.db.models.deletion
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('seqr', '0047_auto_20220908_1851'),
-    ]
-
-    operations = [
-        migrations.CreateModel(
-            name='PhenotypePrioritization',
-            fields=[
-                ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
-                ('gene_id', models.CharField(max_length=20)),
-                ('tool', models.CharField(choices=[('E', 'exomiser'), ('L', 'lirical')], max_length=1)),
-                ('rank', models.IntegerField()),
-                ('disease_id', models.CharField(max_length=32)),
-                ('disease_name', models.TextField()),
-                ('scores', models.JSONField()),
-                ('sample', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='seqr.sample')),
-            ],
-            options={
-                'unique_together': {('sample', 'gene_id', 'disease_id')},
-            },
-        ),
-    ]
diff --git a/seqr/models.py b/seqr/models.py
index 1fc05e352e..6b1e98fc5a 100644
--- a/seqr/models.py
+++ b/seqr/models.py
@@ -1062,6 +1062,10 @@ class PhenotypePrioritization(DeletableSampleMetadataModel):
         (EXOMISER_CHOICE, 'exomiser'),
         (LIRICAL_CHOICE, 'lirical')
     )
+    TOOL_LOOKUP = {v: k for k, v in TOOL_CHOICES}
+
+    individual = models.ForeignKey('Individual', on_delete=models.CASCADE, db_index=True)
+    gene_id = models.CharField(max_length=20)  # ensembl ID
 
     tool = models.CharField(max_length=1, choices=TOOL_CHOICES)
     rank = models.IntegerField()
@@ -1070,6 +1074,4 @@ class PhenotypePrioritization(DeletableSampleMetadataModel):
     scores = models.JSONField()
 
     class Meta:
-        unique_together = ('sample', 'gene_id', 'disease_id')
-
         json_fields = ['gene_id', 'tool', 'rank', 'disease_id', 'scores']
diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py
index cf5492b5b4..097035273a 100644
--- a/seqr/views/apis/data_manager_api.py
+++ b/seqr/views/apis/data_manager_api.py
@@ -18,7 +18,7 @@
 from seqr.utils.file_utils import file_iter, does_file_exist
 from seqr.utils.logging_utils import SeqrLogger
 
-from seqr.views.utils.dataset_utils import load_rna_seq_outlier, load_rna_seq_tpm
+from seqr.views.utils.dataset_utils import load_rna_seq_outlier, load_rna_seq_tpm, load_phenotype_pri_file
 from seqr.views.utils.file_utils import parse_file, get_temp_upload_directory, load_uploaded_file
 from seqr.views.utils.json_utils import create_json_response, _to_camel_case
 from seqr.views.utils.permissions_utils import data_manager_required
@@ -403,7 +403,7 @@ def load_phenotype_pri_data(request):
     ignore_extra_samples = request_json['ignoreExtraSamples']
 
     logger.info(f'Loading phenotype prioritization data from {file_name}', request.user)
-    records = _load_phenotype_pri_file(file_name, ignore_extra_samples)
+    records = load_phenotype_pri_file(file_name, ignore_extra_samples)
     models = PhenotypePrioritization.objects.bulk_create([PhenotypePrioritization(**data) for data in records])
     sample_guids = [data['sample'].guid for data in records]
     logger.info(f'create {len(models)} PhenotypePrioritization', request.user, db_update={
@@ -418,69 +418,6 @@ def load_phenotype_pri_data(request):
     })
 
 
-EXPECTED_HEADER = ['tool', 'project', 'sampleId', 'rank', 'geneId', 'diseaseId', 'diseaseName',
-                   'scoreName1', 'score1', 'scoreName2', 'score2', 'scoreName3', 'score3']
-
-
-def _get_phenotype_pri(record, i, ignore_extra_samples):
-    tool = next((k for k, v in PhenotypePrioritization.TOOL_CHOICES if v == record['tool']), None)
-    if not tool:
-        raise ValueError('Expecting {} for the "tool" column but found {} (record {})'.format(
-            ', '.join([v for k, v in PhenotypePrioritization.TOOL_CHOICES]), record['tool'], i))
-
-    project_name = record['project']
-    projects = Project.objects.filter(name=project_name)
-    if len(projects) < 1:
-        raise ValueError(f'Project {project_name} is not found (record {i})')
-    project = projects[0]
-
-    sample_id = record['sampleId']
-    samples = Sample.objects.filter(sample_id=sample_id, individual__family__project=project, is_active=True,
-                                    dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS)
-    if len(samples) != 1:
-        if ignore_extra_samples:
-            return
-        raise ValueError(f'Sample with ID {sample_id} is not found (record {i})')
-
-    disease_id = record['diseaseId']
-    disease_name = record['diseaseName']
-
-    scores = {}
-    for score in ['1', '2', '3']:
-        score_name = record.get('scoreName' + score)
-        if score_name:
-            score = record.get('score' + score)
-            scores[score_name] = float(score)
-
-    return {
-        'sample': samples[0],
-        'gene_id': record['geneId'],
-        'tool': tool,
-        'rank': int(record['rank']),
-        'disease_id': disease_id,
-        'disease_name': disease_name,
-        'scores': scores,
-    }
-
-
-def _load_phenotype_pri_file(file_name, ignore_extra_samples):
-    lines = file_iter(file_name)
-
-    header = next(lines).rstrip().split('\t')
-    missing_header = [h for h in EXPECTED_HEADER if h not in header]
-    if len(missing_header):
-        raise ValueError('The following required columns are missing: {}'.format(', '.join(missing_header)))
-
-    records = []
-    for i, line in enumerate(lines):
-            row = line.rstrip().split('\t')
-            record = {header[cnt]: col for cnt, col in enumerate(row)}
-            record = _get_phenotype_pri(record, i, ignore_extra_samples)
-            if record:
-                records.append(record)
-    return records
-
-
 # Hop-by-hop HTTP response headers shouldn't be forwarded.
 # More info at: http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.5.1
 EXCLUDE_HTTP_RESPONSE_HEADERS = {
diff --git a/seqr/views/utils/dataset_utils.py b/seqr/views/utils/dataset_utils.py
index 3e95ebbd5f..8941612348 100644
--- a/seqr/views/utils/dataset_utils.py
+++ b/seqr/views/utils/dataset_utils.py
@@ -1,11 +1,12 @@
 import elasticsearch_dsl
 from collections import defaultdict
-from django.db.models import prefetch_related_objects
+from django.db.models import prefetch_related_objects, Value, TextField
+from django.db.models.functions import Concat
 from django.utils import timezone
 from tqdm import tqdm
 import random
 
-from seqr.models import Sample, Individual, Family, RnaSeqOutlier, RnaSeqTpm
+from seqr.models import Sample, Individual, Family, RnaSeqOutlier, RnaSeqTpm, PhenotypePrioritization
 from seqr.utils.elasticsearch.utils import get_es_client, get_index_metadata
 from seqr.utils.file_utils import file_iter
 from seqr.utils.logging_utils import log_model_bulk_update, SeqrLogger
@@ -449,3 +450,104 @@ def _load_rna_seq(model_cls, file_path, user, mapping_file, ignore_extra_samples
         logger.warning(warning, user)
 
     return samples_to_load, info, warnings
+
+
+PHENOTYPE_PRI_HEADER = {
+    'tool': 'tool', 'project': 'project', 'sampleId': 'sample_id', 'rank': 'rank', 'geneId': 'gene_id',
+    'diseaseId': 'disease_id', 'diseaseName': 'disease_name', 'scoreName1': 'score_name1', 'score1': 'score1',
+    'scoreName2': 'score_name2', 'score2': 'score2', 'scoreName3': 'score_name3', 'score3': 'score3'}
+
+
+def _parse_phenotype_pri_row(row):
+    record = {mapped_key: row[key] for key, mapped_key in PHENOTYPE_PRI_HEADER.items()}
+
+    tool = PhenotypePrioritization.TOOL_LOOKUP.get(record.get('tool'), None)
+    if not tool:
+        raise ValueError('Expecting {} for the "tool" column but found {}'.format(
+            ', '.join([v for k, v in PhenotypePrioritization.TOOL_CHOICES]), row['tool']))
+    record['tool'] = tool
+
+    scores = {}
+    for score in ['1', '2', '3']:
+        score_name = record.pop('scoreName' + score, None)
+        if score_name:
+            scores[score_name] = record.pop('score' + score, None)
+
+    return record
+
+
+def load_phenotype_pri_file(file_path, user, ignore_extra_samples):
+    samples_by_id = defaultdict(dict)
+    f = file_iter(file_path)
+    header = _parse_tsv_row(next(f))
+    missing_cols = [col for col in PHENOTYPE_PRI_HEADER.keys() if col not in header]
+    if missing_cols:
+        raise ValueError(f'Invalid file: missing column(s) {", ".join(missing_cols)}')
+
+    for line in tqdm(f, unit=' rows'):
+        row = dict(zip(header, _parse_tsv_row(line)))
+        record = _parse_phenotype_pri_row(row)
+        sample_id = record.pop('sample_id', None)
+        project = record.pop('project', None)
+        if not sample_id or not project:
+            raise ValueError('Both sample ID and project fields are required.')
+        if samples_by_id[sample_id]:
+            if project != samples_by_id[sample_id]['project']:
+                raise ValueError(f'Invalid project name for sample {sample_id}')
+            samples_by_id[sample_id]['records'].append(record)
+        else:
+            samples_by_id[sample_id]['project'] = project
+            samples_by_id[sample_id]['records'] = [record]
+
+    message = f'Parsed {len(samples_by_id)} LIRICAL/Exomiser phenotype-based prioritization samples'
+    info = [message]
+    logger.info(message, user)
+
+    existing_inds = Individual.objects.annotate(
+        indv_project=Concat('individual_id', Value('/', output_field=TextField()), 'family__project__name')
+    ).filter(
+        indv_project__in={sample_id + '/' + value['project'] for sample_id, value in samples_by_id}
+    )
+
+    for ind in existing_inds:
+        samples_by_id[ind.individual_id]['individual'] = ind
+
+    warnings = []
+    extra_ids = set()
+    records_to_load_by_id = defaultdict(lambda: defaultdict(list))
+    for sample_id, value in samples_by_id.items():
+        if value['individual']:
+            for rec in value['records']:
+                rec['individual'] = value['individual']
+                records_to_load_by_id[sample_id][rec['tool']].append(rec)
+        else:
+            extra_ids.add(sample_id)
+
+    if extra_ids:
+        skipped_samples = ', '.join(sorted(extra_ids))
+        if ignore_extra_samples:
+            warnings = [f'Skipped loading for the following {len(extra_ids)} unmatched samples: {skipped_samples}']
+        else:
+            raise ValueError(f'Unable to find matches for the following samples: {skipped_samples}')
+
+    # Delete old data
+    to_delete = PhenotypePrioritization.objects.annotate(tool_ind=Concat('tool', 'individual')).filter(
+        tool_ind__in=[tool+sample_id for sample_id, value in records_to_load_by_id.items() for tool in value.keys()],
+    )
+    if to_delete:
+        prefetch_related_objects(to_delete, 'individual')
+        logger.info(f'delete {len(to_delete)} {PhenotypePrioritization.__name__}s', user, db_update={
+            'dbEntity': PhenotypePrioritization.__name__, 'numEntities': len(to_delete), 'updateType': 'bulk_delete',
+            'parentEntityIds': list({model.individual.guid for model in to_delete}),
+        })
+        to_delete.delete()
+
+    prefetch_related_objects(existing_inds, 'family__project')
+    projects = {ind.family.project.name for ind in existing_inds}
+    project_names = ', '.join(sorted(projects))
+    message = 'Attempted data loading for {} LIRICAL/Exomiser records in the following {} projects: {}'.format(
+        len(records_to_load_by_id), len(projects), project_names)
+    info.append(message)
+    logger.info(message, user)
+
+    return [rec for tools in records_to_load_by_id.values() for recs in tools.values() for rec in recs]

From f7b0bf340023f8b418e1f71ef2ab64366be1560b Mon Sep 17 00:00:00 2001
From: Shifa Zhang <zhangshifa07504@gmail.com>
Date: Wed, 5 Oct 2022 12:19:28 -0400
Subject: [PATCH 07/96] Update the data loading backend.

---
 .../0048_phenotypeprioritization.py           | 27 ++++++
 seqr/models.py                                | 11 +--
 seqr/views/apis/data_manager_api.py           | 21 ++---
 seqr/views/utils/dataset_utils.py             | 89 +++++++++++--------
 4 files changed, 94 insertions(+), 54 deletions(-)
 create mode 100644 seqr/migrations/0048_phenotypeprioritization.py

diff --git a/seqr/migrations/0048_phenotypeprioritization.py b/seqr/migrations/0048_phenotypeprioritization.py
new file mode 100644
index 0000000000..d249142dcc
--- /dev/null
+++ b/seqr/migrations/0048_phenotypeprioritization.py
@@ -0,0 +1,27 @@
+# Generated by Django 3.2.15 on 2022-10-05 16:09
+
+from django.db import migrations, models
+import django.db.models.deletion
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('seqr', '0047_auto_20220908_1851'),
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name='PhenotypePrioritization',
+            fields=[
+                ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+                ('gene_id', models.CharField(max_length=20)),
+                ('tool', models.CharField(choices=[('E', 'exomiser'), ('L', 'lirical')], max_length=1)),
+                ('rank', models.IntegerField()),
+                ('disease_id', models.CharField(max_length=32)),
+                ('disease_name', models.TextField()),
+                ('scores', models.JSONField()),
+                ('individual', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='seqr.individual')),
+            ],
+        ),
+    ]
diff --git a/seqr/models.py b/seqr/models.py
index 6b1e98fc5a..8165bc5382 100644
--- a/seqr/models.py
+++ b/seqr/models.py
@@ -1055,12 +1055,10 @@ class Meta:
         json_fields = ['gene_id', 'tpm']
 
 
-class PhenotypePrioritization(DeletableSampleMetadataModel):
-    EXOMISER_CHOICE = 'E'
-    LIRICAL_CHOICE = 'L'
+class PhenotypePrioritization(models.Model):
     TOOL_CHOICES = (
-        (EXOMISER_CHOICE, 'exomiser'),
-        (LIRICAL_CHOICE, 'lirical')
+        ('E', 'exomiser'),
+        ('L', 'lirical')
     )
     TOOL_LOOKUP = {v: k for k, v in TOOL_CHOICES}
 
@@ -1073,5 +1071,8 @@ class PhenotypePrioritization(DeletableSampleMetadataModel):
     disease_name = models.TextField()
     scores = models.JSONField()
 
+    def __unicode__(self):
+        return "%s:%s:%s" % (self.individual.individual_id, self.gene_id, self.disease_id)
+
     class Meta:
         json_fields = ['gene_id', 'tool', 'rank', 'disease_id', 'scores']
diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py
index 097035273a..ce602ce335 100644
--- a/seqr/views/apis/data_manager_api.py
+++ b/seqr/views/apis/data_manager_api.py
@@ -18,12 +18,12 @@
 from seqr.utils.file_utils import file_iter, does_file_exist
 from seqr.utils.logging_utils import SeqrLogger
 
-from seqr.views.utils.dataset_utils import load_rna_seq_outlier, load_rna_seq_tpm, load_phenotype_pri_file
+from seqr.views.utils.dataset_utils import load_rna_seq_outlier, load_rna_seq_tpm, load_phenotype_pri
 from seqr.views.utils.file_utils import parse_file, get_temp_upload_directory, load_uploaded_file
 from seqr.views.utils.json_utils import create_json_response, _to_camel_case
 from seqr.views.utils.permissions_utils import data_manager_required
 
-from seqr.models import Sample, Individual, RnaSeqOutlier, RnaSeqTpm, PhenotypePrioritization, Project
+from seqr.models import Sample, Individual, RnaSeqOutlier, RnaSeqTpm, PhenotypePrioritization
 
 from settings import KIBANA_SERVER, KIBANA_ELASTICSEARCH_PASSWORD
 
@@ -400,21 +400,22 @@ def load_phenotype_pri_data(request):
     request_json = json.loads(request.body)
 
     file_name = request_json['file']
-    ignore_extra_samples = request_json['ignoreExtraSamples']
+    ignore_extra_samples = request_json.get('ignoreExtraSamples', False)
 
     logger.info(f'Loading phenotype prioritization data from {file_name}', request.user)
-    records = load_phenotype_pri_file(file_name, ignore_extra_samples)
+    records, info, warnings = load_phenotype_pri(file_name, request.user, ignore_extra_samples)
     models = PhenotypePrioritization.objects.bulk_create([PhenotypePrioritization(**data) for data in records])
-    sample_guids = [data['sample'].guid for data in records]
-    logger.info(f'create {len(models)} PhenotypePrioritization', request.user, db_update={
-        'dbEntity': PhenotypePrioritization, 'numEntities': len(models), 'parentEntityIds': sample_guids,
+    ind_guids = {data['individual'].guid for data in records}
+    logger.info(f'create {len(models)} {PhenotypePrioritization.__name__}', request.user, db_update={
+        'dbEntity': PhenotypePrioritization.__name__, 'numEntities': len(models), 'parentEntityIds': sorted(ind_guids),
         'updateType': 'bulk_create',
     })
+    info.append(f'Loaded {len(models)} LIRICAL/Exomiser data records')
 
     return create_json_response({
-        'info': ['Phenotype prioritization data loaded'],
-        'warnings': [],
-        'fileName': file_name,
+        'info': info,
+        'warnings': warnings,
+        'success': True
     })
 
 
diff --git a/seqr/views/utils/dataset_utils.py b/seqr/views/utils/dataset_utils.py
index 8941612348..0e5831590e 100644
--- a/seqr/views/utils/dataset_utils.py
+++ b/seqr/views/utils/dataset_utils.py
@@ -1,6 +1,6 @@
 import elasticsearch_dsl
 from collections import defaultdict
-from django.db.models import prefetch_related_objects, Value, TextField
+from django.db.models import prefetch_related_objects, TextField
 from django.db.models.functions import Concat
 from django.utils import timezone
 from tqdm import tqdm
@@ -452,8 +452,9 @@ def _load_rna_seq(model_cls, file_path, user, mapping_file, ignore_extra_samples
     return samples_to_load, info, warnings
 
 
+TOOL_FIELD = 'tool'
 PHENOTYPE_PRI_HEADER = {
-    'tool': 'tool', 'project': 'project', 'sampleId': 'sample_id', 'rank': 'rank', 'geneId': 'gene_id',
+    'tool': TOOL_FIELD, 'project': 'project', 'sampleId': 'sample_id', 'rank': 'rank', 'geneId': 'gene_id',
     'diseaseId': 'disease_id', 'diseaseName': 'disease_name', 'scoreName1': 'score_name1', 'score1': 'score1',
     'scoreName2': 'score_name2', 'score2': 'score2', 'scoreName3': 'score_name3', 'score3': 'score3'}
 
@@ -461,29 +462,32 @@ def _load_rna_seq(model_cls, file_path, user, mapping_file, ignore_extra_samples
 def _parse_phenotype_pri_row(row):
     record = {mapped_key: row[key] for key, mapped_key in PHENOTYPE_PRI_HEADER.items()}
 
-    tool = PhenotypePrioritization.TOOL_LOOKUP.get(record.get('tool'), None)
+    tool = PhenotypePrioritization.TOOL_LOOKUP.get(record[TOOL_FIELD], None)
     if not tool:
         raise ValueError('Expecting {} for the "tool" column but found {}'.format(
-            ', '.join([v for k, v in PhenotypePrioritization.TOOL_CHOICES]), row['tool']))
-    record['tool'] = tool
+            ', '.join([v for k, v in PhenotypePrioritization.TOOL_CHOICES]), record[TOOL_FIELD]))
+    record[TOOL_FIELD] = tool
 
-    scores = {}
-    for score in ['1', '2', '3']:
-        score_name = record.pop('scoreName' + score, None)
+    scores = {record.pop('score_name1'): record.pop('score1')}
+    for score_index in ['2', '3']:
+        score_name = record.pop('score_name' + score_index, None)
+        score = record.pop('score' + score_index, None)
         if score_name:
-            scores[score_name] = record.pop('score' + score, None)
+            scores[score_name] = score
+    record['scores'] = scores
 
     return record
 
 
-def load_phenotype_pri_file(file_path, user, ignore_extra_samples):
-    samples_by_id = defaultdict(dict)
+def _load_phenotype_pri_file(file_path):
+    data_by_id = defaultdict(dict)
     f = file_iter(file_path)
     header = _parse_tsv_row(next(f))
     missing_cols = [col for col in PHENOTYPE_PRI_HEADER.keys() if col not in header]
     if missing_cols:
         raise ValueError(f'Invalid file: missing column(s) {", ".join(missing_cols)}')
 
+    count = 0
     for line in tqdm(f, unit=' rows'):
         row = dict(zip(header, _parse_tsv_row(line)))
         record = _parse_phenotype_pri_row(row)
@@ -491,63 +495,70 @@ def load_phenotype_pri_file(file_path, user, ignore_extra_samples):
         project = record.pop('project', None)
         if not sample_id or not project:
             raise ValueError('Both sample ID and project fields are required.')
-        if samples_by_id[sample_id]:
-            if project != samples_by_id[sample_id]['project']:
+        if data_by_id[sample_id]:
+            if project != data_by_id[sample_id]['project']:  # a sample must belong to a single project
                 raise ValueError(f'Invalid project name for sample {sample_id}')
-            samples_by_id[sample_id]['records'].append(record)
+            data_by_id[sample_id]['records'].append(record)
         else:
-            samples_by_id[sample_id]['project'] = project
-            samples_by_id[sample_id]['records'] = [record]
+            data_by_id[sample_id]['project'] = project
+            data_by_id[sample_id]['records'] = [record]
+        count += 1
+
+    return count, data_by_id
+
 
-    message = f'Parsed {len(samples_by_id)} LIRICAL/Exomiser phenotype-based prioritization samples'
+def load_phenotype_pri(file_path, user, ignore_extra_samples):
+    count, data_by_id = _load_phenotype_pri_file(file_path)
+
+    message = f'Parsed {count} LIRICAL/Exomiser data records in {len(data_by_id)} samples'
     info = [message]
     logger.info(message, user)
 
-    existing_inds = Individual.objects.annotate(
-        indv_project=Concat('individual_id', Value('/', output_field=TextField()), 'family__project__name')
-    ).filter(
-        indv_project__in={sample_id + '/' + value['project'] for sample_id, value in samples_by_id}
-    )
-
-    for ind in existing_inds:
-        samples_by_id[ind.individual_id]['individual'] = ind
+    indivs = Individual.objects.filter(individual_id__in=data_by_id.keys())
+    prefetch_related_objects(indivs, 'family__project')
+    existing_indivs_by_id = {ind.individual_id: ind for ind in indivs
+                             if ind.family.project.name == data_by_id[ind.individual_id]['project']}
 
-    warnings = []
     extra_ids = set()
-    records_to_load_by_id = defaultdict(lambda: defaultdict(list))
-    for sample_id, value in samples_by_id.items():
-        if value['individual']:
+    extra_records = 0
+    for sample_id, value in data_by_id.items():
+        if existing_indivs_by_id[sample_id]:
             for rec in value['records']:
-                rec['individual'] = value['individual']
-                records_to_load_by_id[sample_id][rec['tool']].append(rec)
+                rec['individual'] = existing_indivs_by_id[sample_id]
         else:
+            data_by_id.pop(sample_id)
             extra_ids.add(sample_id)
+            extra_records += len(value['records'])
 
-    if extra_ids:
+    warnings = []
+    if extra_records:
         skipped_samples = ', '.join(sorted(extra_ids))
         if ignore_extra_samples:
-            warnings = [f'Skipped loading for the following {len(extra_ids)} unmatched samples: {skipped_samples}']
+            warnings = [f'Skipped loading {extra_records} records for the following {len(extra_ids)} unmatched samples: {skipped_samples}']
         else:
             raise ValueError(f'Unable to find matches for the following samples: {skipped_samples}')
 
     # Delete old data
-    to_delete = PhenotypePrioritization.objects.annotate(tool_ind=Concat('tool', 'individual')).filter(
-        tool_ind__in=[tool+sample_id for sample_id, value in records_to_load_by_id.items() for tool in value.keys()],
+    to_delete = PhenotypePrioritization.objects.annotate(
+        tool_ind=Concat('tool', 'individual__individual_id', output_field=TextField())
+    ).filter(
+        tool_ind__in={rec[TOOL_FIELD]+sample_id for sample_id, values in data_by_id.items() for rec in values['records']},
     )
     if to_delete:
         prefetch_related_objects(to_delete, 'individual')
+        info.append(f'Deleted {len(to_delete)} existing LIRICAL/Exomiser records')
         logger.info(f'delete {len(to_delete)} {PhenotypePrioritization.__name__}s', user, db_update={
             'dbEntity': PhenotypePrioritization.__name__, 'numEntities': len(to_delete), 'updateType': 'bulk_delete',
             'parentEntityIds': list({model.individual.guid for model in to_delete}),
         })
         to_delete.delete()
 
-    prefetch_related_objects(existing_inds, 'family__project')
-    projects = {ind.family.project.name for ind in existing_inds}
+    records_to_load = [rec for value in data_by_id.values() for rec in value['records']]
+    projects = {value['project'] for value in data_by_id.values()}
     project_names = ', '.join(sorted(projects))
     message = 'Attempted data loading for {} LIRICAL/Exomiser records in the following {} projects: {}'.format(
-        len(records_to_load_by_id), len(projects), project_names)
+        len(records_to_load), len(projects), project_names)
     info.append(message)
     logger.info(message, user)
 
-    return [rec for tools in records_to_load_by_id.values() for recs in tools.values() for rec in recs]
+    return records_to_load, info, warnings

From 2495ffabcba68221aaa50dd3568b95cbdf51414d Mon Sep 17 00:00:00 2001
From: Shifa Zhang <zhangshifa07504@gmail.com>
Date: Wed, 5 Oct 2022 12:23:31 -0400
Subject: [PATCH 08/96] Add change log.

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 46630a7edd..ed0b3a1f0c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,7 @@
 # _seqr_ Changes
 
 ## dev
+* Add PhenotypePrioritization model (REQUIRES DB MIGRATION)
 
 ## 9/28/22
 * Add Gregor fields to sample manifest (REQUIRES DB MIGRATION)

From 9e5bfb63aedd127cab918f78e939b9b74e590c6a Mon Sep 17 00:00:00 2001
From: Shifa Zhang <zhangshifa07504@gmail.com>
Date: Tue, 11 Oct 2022 10:05:39 -0400
Subject: [PATCH 09/96] Update LIRICAL display.

---
 seqr/models.py                                     |  6 ++++--
 seqr/views/utils/dataset_utils.py                  |  2 +-
 seqr/views/utils/orm_to_json_utils.py              |  2 +-
 seqr/views/utils/variant_utils.py                  |  9 +++++----
 .../components/panel/variants/VariantGene.jsx      |  4 +---
 ui/shared/components/panel/variants/selectors.js   | 14 ++++++++++----
 ui/shared/utils/constants.js                       |  3 +++
 7 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/seqr/models.py b/seqr/models.py
index 8165bc5382..9b4de74617 100644
--- a/seqr/models.py
+++ b/seqr/models.py
@@ -1056,9 +1056,11 @@ class Meta:
 
 
 class PhenotypePrioritization(models.Model):
+    EXOMISER_CHOICE = 'E'
+    LIRICAL_CHOICE = 'L'
     TOOL_CHOICES = (
-        ('E', 'exomiser'),
-        ('L', 'lirical')
+        (EXOMISER_CHOICE, 'exomiser'),
+        (LIRICAL_CHOICE, 'lirical')
     )
     TOOL_LOOKUP = {v: k for k, v in TOOL_CHOICES}
 
diff --git a/seqr/views/utils/dataset_utils.py b/seqr/views/utils/dataset_utils.py
index 0e5831590e..95c0e7cd13 100644
--- a/seqr/views/utils/dataset_utils.py
+++ b/seqr/views/utils/dataset_utils.py
@@ -460,7 +460,7 @@ def _load_rna_seq(model_cls, file_path, user, mapping_file, ignore_extra_samples
 
 
 def _parse_phenotype_pri_row(row):
-    record = {mapped_key: row[key] for key, mapped_key in PHENOTYPE_PRI_HEADER.items()}
+    record = {mapped_key: row.get(key, None) for key, mapped_key in PHENOTYPE_PRI_HEADER.items()}
 
     tool = PhenotypePrioritization.TOOL_LOOKUP.get(record[TOOL_FIELD], None)
     if not tool:
diff --git a/seqr/views/utils/orm_to_json_utils.py b/seqr/views/utils/orm_to_json_utils.py
index ee29272a7b..4688ba440a 100644
--- a/seqr/views/utils/orm_to_json_utils.py
+++ b/seqr/views/utils/orm_to_json_utils.py
@@ -869,7 +869,7 @@ def _process_result(data, model):
 
 
 def get_json_for_phenotype_pri(models, **kwargs):
-    def _process_result(data):
+    def _process_result(data, model):
         data['scores'] = {_to_camel_case(score) for score, value in data['scores'].items()}
 
     return _get_json_for_models(models, process_result=_process_result, **kwargs)
diff --git a/seqr/views/utils/variant_utils.py b/seqr/views/utils/variant_utils.py
index ac7cbc8c4c..16d15dab99 100644
--- a/seqr/views/utils/variant_utils.py
+++ b/seqr/views/utils/variant_utils.py
@@ -129,15 +129,16 @@ def _get_rna_seq_outliers(gene_ids, families):
 
 
 def _get_phenotype_pri_data(gene_ids, families):
-    data_by_individual_gene = defaultdict(lambda: {'phepri': {}})
+    data_by_individual_gene = defaultdict(lambda: {PhenotypePrioritization.LIRICAL_CHOICE: {},
+                                                   PhenotypePrioritization.EXOMISER_CHOICE: {}})
 
     phe_pri_data = get_json_for_phenotype_pri(
-        PhenotypePrioritization.objects.filter(gene_id__in=gene_ids, sample__individual__family__in=families),
-        nested_fields=[{'fields': ('sample', 'individual', 'guid'), 'key': 'individualGuid'}],
+        PhenotypePrioritization.objects.filter(gene_id__in=gene_ids, individual__family__in=families),
+        nested_fields=[{'fields': ('individual', 'guid'), 'key': 'individualGuid'}],
     )
 
     for data in phe_pri_data:
-        data_by_individual_gene[data.pop('individualGuid')]['phepri'][data['geneId']] = data
+        data_by_individual_gene[data.pop('individualGuid')][data['tool']][data['geneId']] = data
 
     return data_by_individual_gene
 
diff --git a/ui/shared/components/panel/variants/VariantGene.jsx b/ui/shared/components/panel/variants/VariantGene.jsx
index 80e1b1c26c..bcb7a7258e 100644
--- a/ui/shared/components/panel/variants/VariantGene.jsx
+++ b/ui/shared/components/panel/variants/VariantGene.jsx
@@ -9,6 +9,7 @@ import { getGenesById, getLocusListsByGuid, getFamiliesByGuid } from 'redux/sele
 import { panelAppUrl, moiToMoiInitials } from '../../../utils/panelAppUtils'
 import {
   MISSENSE_THRESHHOLD, LOF_THRESHHOLD, PANEL_APP_CONFIDENCE_LEVEL_COLORS, PANEL_APP_CONFIDENCE_DESCRIPTION,
+  LIRICAL, EXOMISER,
 } from '../../../utils/constants'
 import { compareObjects } from '../../../utils/sortUtils'
 import { camelcaseToTitlecase } from '../../../utils/stringUtils'
@@ -314,9 +315,6 @@ const GENE_DISEASE_DETAIL_SECTIONS = [
   },
 ]
 
-const LIRICAL = 'L'
-const EXOMISER = 'E'
-
 const SAMPLE_GENE_DETAIL_FIELDS = {
   rnaSeqData: { infos: [], scores: ['zScore', 'pValue', 'pAdjust'] },
   liricalData: { infos: ['rank', 'diseases'], scores: ['postTestProbability', 'LR'] },
diff --git a/ui/shared/components/panel/variants/selectors.js b/ui/shared/components/panel/variants/selectors.js
index bc9af2f7b0..1dd2e01ae1 100644
--- a/ui/shared/components/panel/variants/selectors.js
+++ b/ui/shared/components/panel/variants/selectors.js
@@ -11,6 +11,7 @@ import {
   VARIANT_SORT_LOOKUP,
   SHOW_ALL,
   VARIANT_EXPORT_DATA,
+  LIRICAL, EXOMISER,
 } from 'shared/utils/constants'
 import {
   getVariantTagsByGuid, getVariantNotesByGuid, getSavedVariantsByGuid, getAnalysisGroupsByGuid, getGenesById, getUser,
@@ -33,16 +34,21 @@ export const getRnaSeqOutilerDataByFamilyGene = createSelector(
   ),
 )
 
+const TOOLS = [LIRICAL, EXOMISER]
 export const getPhePriDataByFamilyGene = createSelector(
   getIndividualsByGuid,
   getPhePriDataByIndividual,
   (individualsByGuid, phePriDataByIndividual) => Object.entries(phePriDataByIndividual).reduce(
     (acc, [individualGuid, phePriData]) => {
       const { familyGuid, displayName } = individualsByGuid[individualGuid]
-      acc[familyGuid] = Object.entries(phePriData.outliers || {}).reduce(
-        (acc2, [geneId, data]) => (data.isSignificant ?
-          { ...acc2, [geneId]: { ...(acc2[geneId] || {}), [displayName]: data } } : acc2
-        ), acc[familyGuid] || {},
+      acc[familyGuid] = TOOLS.reduce(
+        (accTool, tool) => ({
+          ...accTool,
+          [tool]: Object.entries(phePriData[tool] || {}).reduce(
+            (acc2, [geneId, data]) => ({ ...acc2, [geneId]: { ...(acc2[geneId] || {}), [displayName]: data } }),
+            acc[familyGuid] || {},
+          ),
+        }), {},
       )
       return acc
     }, {},
diff --git a/ui/shared/utils/constants.js b/ui/shared/utils/constants.js
index e7614422ed..b24906f303 100644
--- a/ui/shared/utils/constants.js
+++ b/ui/shared/utils/constants.js
@@ -1304,6 +1304,9 @@ const VARIANT_ICON_COLORS = {
   green: '#21a926',
 }
 
+export const LIRICAL = 'L'
+export const EXOMISER = 'E'
+
 export const PANEL_APP_CONFIDENCE_DESCRIPTION = {
   0: 'No Panel App confidence level',
   1: 'Red, lowest level of confidence; 1 of the 4 sources or from other sources.',

From 5f3583a350b60f6e4b8fde4bcba39bc2b1264500 Mon Sep 17 00:00:00 2001
From: Shifa Zhang <zhangshifa07504@gmail.com>
Date: Wed, 12 Oct 2022 13:09:59 -0400
Subject: [PATCH 10/96] Update per review comments.

---
 .../0048_phenotypeprioritization.py           |   6 +-
 seqr/models.py                                |  34 +++--
 seqr/urls.py                                  |   4 +-
 seqr/utils/logging_utils.py                   |   7 +-
 seqr/views/apis/data_manager_api.py           |  64 +++++++--
 seqr/views/utils/dataset_utils.py             | 123 ++++--------------
 ui/pages/DataManagement/DataManagement.jsx    |   4 +-
 .../components/PhenotypePri.jsx               |  31 -----
 .../components/PhenotypePrioritization.jsx    |  27 ++++
 ui/pages/DataManagement/reducers.js           |  21 +--
 ui/pages/DataManagement/selectors.js          |   2 +-
 11 files changed, 147 insertions(+), 176 deletions(-)
 delete mode 100644 ui/pages/DataManagement/components/PhenotypePri.jsx
 create mode 100644 ui/pages/DataManagement/components/PhenotypePrioritization.jsx

diff --git a/seqr/migrations/0048_phenotypeprioritization.py b/seqr/migrations/0048_phenotypeprioritization.py
index d249142dcc..5f7900bdc3 100644
--- a/seqr/migrations/0048_phenotypeprioritization.py
+++ b/seqr/migrations/0048_phenotypeprioritization.py
@@ -1,7 +1,8 @@
-# Generated by Django 3.2.15 on 2022-10-05 16:09
+# Generated by Django 3.2.15 on 2022-10-12 15:03
 
 from django.db import migrations, models
 import django.db.models.deletion
+import seqr.models
 
 
 class Migration(migrations.Migration):
@@ -16,12 +17,13 @@ class Migration(migrations.Migration):
             fields=[
                 ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
                 ('gene_id', models.CharField(max_length=20)),
-                ('tool', models.CharField(choices=[('E', 'exomiser'), ('L', 'lirical')], max_length=1)),
+                ('tool', models.CharField(max_length=20)),
                 ('rank', models.IntegerField()),
                 ('disease_id', models.CharField(max_length=32)),
                 ('disease_name', models.TextField()),
                 ('scores', models.JSONField()),
                 ('individual', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='seqr.individual')),
             ],
+            bases=(models.Model, seqr.models.BulkOperationBase),
         ),
     ]
diff --git a/seqr/models.py b/seqr/models.py
index 8165bc5382..d47db0c1c1 100644
--- a/seqr/models.py
+++ b/seqr/models.py
@@ -45,6 +45,28 @@ def get_audit_field_names(audit_field):
     return list(_get_audit_fields(audit_field).keys())
 
 
+class BulkOperationBase:
+    @classmethod
+    def bulk_create(cls, user, new_models, parent=None):
+        """Helper bulk create method that logs the creation"""
+        for model in new_models:
+            model.created_by = user
+        models = cls.objects.bulk_create(new_models)
+        log_model_bulk_update(logger, models, user, 'create', parent=parent)
+        return models
+
+    @classmethod
+    def bulk_delete(cls, user, queryset=None, parent=None, **filter_kwargs):
+        """Helper bulk delete method that logs the deletion"""
+        if queryset is None:
+            queryset = cls.objects.filter(**filter_kwargs)
+        log_model_bulk_update(logger, queryset, user, 'delete', parent=parent)
+        return queryset.delete()
+
+    class Meta:
+        abstract = True
+
+
 class CustomModelBase(base.ModelBase):
     def __new__(cls, name, bases, attrs, **kwargs):
         audit_fields = getattr(attrs.get('Meta'), 'audit_fields', None)
@@ -1055,17 +1077,11 @@ class Meta:
         json_fields = ['gene_id', 'tpm']
 
 
-class PhenotypePrioritization(models.Model):
-    TOOL_CHOICES = (
-        ('E', 'exomiser'),
-        ('L', 'lirical')
-    )
-    TOOL_LOOKUP = {v: k for k, v in TOOL_CHOICES}
-
+class PhenotypePrioritization(models.Model, BulkOperationBase):
     individual = models.ForeignKey('Individual', on_delete=models.CASCADE, db_index=True)
     gene_id = models.CharField(max_length=20)  # ensembl ID
 
-    tool = models.CharField(max_length=1, choices=TOOL_CHOICES)
+    tool = models.CharField(max_length=20)
     rank = models.IntegerField()
     disease_id = models.CharField(max_length=32)
     disease_name = models.TextField()
@@ -1075,4 +1091,4 @@ def __unicode__(self):
         return "%s:%s:%s" % (self.individual.individual_id, self.gene_id, self.disease_id)
 
     class Meta:
-        json_fields = ['gene_id', 'tool', 'rank', 'disease_id', 'scores']
+        json_fields = ['gene_id', 'tool', 'rank', 'disease_id', 'disease_name', 'scores']
diff --git a/seqr/urls.py b/seqr/urls.py
index 901f941556..88d837fb5e 100644
--- a/seqr/urls.py
+++ b/seqr/urls.py
@@ -114,7 +114,7 @@
     forgot_password
 
 from seqr.views.apis.data_manager_api import elasticsearch_status, upload_qc_pipeline_output, delete_index, \
-    update_rna_seq, load_rna_seq_sample_data, load_phenotype_pri_data, proxy_to_kibana
+    update_rna_seq, load_rna_seq_sample_data, proxy_to_kibana, load_phenotype_prioritization_data
 from seqr.views.apis.report_api import \
     anvil_export, \
     discovery_sheet, \
@@ -307,7 +307,7 @@
     'data_management/get_all_users': get_all_users,
     'data_management/update_rna_seq': update_rna_seq,
     'data_management/load_rna_seq_sample/(?P<sample_guid>[^/]+)': load_rna_seq_sample_data,
-    'data_management/load_phenotype_pri_data': load_phenotype_pri_data,
+    'data_management/load_phenotype_prioritization_data': load_phenotype_prioritization_data,
 
     'summary_data/saved_variants/(?P<tag>[^/]+)': saved_variants_page,
     'summary_data/success_story/(?P<success_story_types>[^/]+)': success_story,
diff --git a/seqr/utils/logging_utils.py b/seqr/utils/logging_utils.py
index f6e5ea1ef4..59c1174c08 100644
--- a/seqr/utils/logging_utils.py
+++ b/seqr/utils/logging_utils.py
@@ -73,11 +73,14 @@ def log_model_update(logger, model, user, update_type, update_fields=None):
     logger.info('{} {} {}'.format(update_type, db_entity, entity_id), user, db_update=db_update)
 
 
-def log_model_bulk_update(logger, models, user, update_type, update_fields=None):
+def log_model_bulk_update(logger, models, user, update_type, update_fields=None, parent=None):
     if not models:
         return []
     db_entity = type(models[0]).__name__
-    entity_ids = [o.guid for o in models]
+    if parent:
+        entity_ids = list({getattr(o, parent).guid for o in models})
+    else:
+        entity_ids = [o.guid for o in models]
     db_update = {
         'dbEntity': db_entity, 'entityIds': entity_ids, 'updateType': 'bulk_{}'.format(update_type),
     }
diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py
index ce602ce335..7024d67398 100644
--- a/seqr/views/apis/data_manager_api.py
+++ b/seqr/views/apis/data_manager_api.py
@@ -9,7 +9,8 @@
 import urllib3
 
 from django.contrib.postgres.aggregates import ArrayAgg
-from django.db.models import Max
+from django.db.models import Max, TextField
+from django.db.models.functions import Concat
 from django.http.response import HttpResponse
 from django.views.decorators.csrf import csrf_exempt
 from requests.exceptions import ConnectionError as RequestConnectionError
@@ -18,7 +19,7 @@
 from seqr.utils.file_utils import file_iter, does_file_exist
 from seqr.utils.logging_utils import SeqrLogger
 
-from seqr.views.utils.dataset_utils import load_rna_seq_outlier, load_rna_seq_tpm, load_phenotype_pri
+from seqr.views.utils.dataset_utils import load_rna_seq_outlier, load_rna_seq_tpm, load_phenotype_prioritization_data_file
 from seqr.views.utils.file_utils import parse_file, get_temp_upload_directory, load_uploaded_file
 from seqr.views.utils.json_utils import create_json_response, _to_camel_case
 from seqr.views.utils.permissions_utils import data_manager_required
@@ -395,26 +396,65 @@ def load_rna_seq_sample_data(request, sample_guid):
     return create_json_response({'success': True})
 
 
+def _load_phenotype_prioritization(file_path, user):
+    data_by_id = load_phenotype_prioritization_data_file(file_path)
+
+    all_samples = [sample for project_samples in data_by_id.values() for sample in project_samples.values()]
+    all_records = [rec for sample_records in all_samples for rec in sample_records]
+    message = f'Parsed {len(all_records)} LIRICAL/Exomiser data records in {len(all_samples)} samples'
+    info = [message]
+    logger.info(message, user)
+
+    for project, project_samples in data_by_id.items():
+        indivs = Individual.objects.filter(family__project__name=project, individual_id__in=project_samples.keys())
+        existing_indivs_by_id = {ind.individual_id: ind for ind in indivs}
+
+        tool_sample_id_set = set()
+        for sample_id, records in project_samples.items():
+            if existing_indivs_by_id[sample_id]:
+                for rec in records:
+                    rec['individual'] = existing_indivs_by_id[sample_id]
+                    tool_sample_id_set.add(f'{rec["tool"]}{sample_id}')
+            else:
+                raise ValueError(f'Individual {sample_id} doesn\'t exist in project {project}')
+
+        # Delete old data
+        to_delete = PhenotypePrioritization.objects.annotate(
+            tool_ind=Concat('tool', 'individual__individual_id', output_field=TextField())
+        ).filter(
+            tool_ind__in=tool_sample_id_set,
+        )
+        if to_delete:
+            deleted, _ = PhenotypePrioritization.bulk_delete(user, to_delete, parent='individual')
+            message = f'Deleted {deleted} existing phenotype-based prioritization records from project {project}'
+            info.append(message)
+            logger.info(message, user)
+
+
+    project_names = ', '.join(sorted(data_by_id.keys()))
+    message = 'Attempted data loading for {} phenotype-based prioritization records in the following {} projects: {}'.format(
+        len(all_records), len(data_by_id.keys()), project_names)
+    info.append(message)
+    logger.info(message, user)
+
+    return all_records, info
+
+
 @data_manager_required
-def load_phenotype_pri_data(request):
+def load_phenotype_prioritization_data(request):
     request_json = json.loads(request.body)
 
     file_name = request_json['file']
-    ignore_extra_samples = request_json.get('ignoreExtraSamples', False)
 
     logger.info(f'Loading phenotype prioritization data from {file_name}', request.user)
-    records, info, warnings = load_phenotype_pri(file_name, request.user, ignore_extra_samples)
-    models = PhenotypePrioritization.objects.bulk_create([PhenotypePrioritization(**data) for data in records])
-    ind_guids = {data['individual'].guid for data in records}
-    logger.info(f'create {len(models)} {PhenotypePrioritization.__name__}', request.user, db_update={
-        'dbEntity': PhenotypePrioritization.__name__, 'numEntities': len(models), 'parentEntityIds': sorted(ind_guids),
-        'updateType': 'bulk_create',
-    })
+    records, info = _load_phenotype_prioritization(file_name, request.user)
+    models = PhenotypePrioritization.bulk_create(request.user, [PhenotypePrioritization(**data) for data in records],
+                                                 parent='individual')
+
     info.append(f'Loaded {len(models)} LIRICAL/Exomiser data records')
 
     return create_json_response({
         'info': info,
-        'warnings': warnings,
         'success': True
     })
 
diff --git a/seqr/views/utils/dataset_utils.py b/seqr/views/utils/dataset_utils.py
index 0e5831590e..99562007be 100644
--- a/seqr/views/utils/dataset_utils.py
+++ b/seqr/views/utils/dataset_utils.py
@@ -1,17 +1,17 @@
 import elasticsearch_dsl
 from collections import defaultdict
-from django.db.models import prefetch_related_objects, TextField
-from django.db.models.functions import Concat
+from django.db.models import prefetch_related_objects
 from django.utils import timezone
 from tqdm import tqdm
 import random
 
-from seqr.models import Sample, Individual, Family, RnaSeqOutlier, RnaSeqTpm, PhenotypePrioritization
+from seqr.models import Sample, Individual, Family, RnaSeqOutlier, RnaSeqTpm
 from seqr.utils.elasticsearch.utils import get_es_client, get_index_metadata
 from seqr.utils.file_utils import file_iter
 from seqr.utils.logging_utils import log_model_bulk_update, SeqrLogger
 from seqr.views.utils.file_utils import parse_file
 from seqr.views.utils.permissions_utils import get_internal_projects
+from seqr.views.utils.json_utils import _to_snake_case
 
 logger = SeqrLogger(__name__)
 
@@ -452,113 +452,38 @@ def _load_rna_seq(model_cls, file_path, user, mapping_file, ignore_extra_samples
     return samples_to_load, info, warnings
 
 
-TOOL_FIELD = 'tool'
-PHENOTYPE_PRI_HEADER = {
-    'tool': TOOL_FIELD, 'project': 'project', 'sampleId': 'sample_id', 'rank': 'rank', 'geneId': 'gene_id',
-    'diseaseId': 'disease_id', 'diseaseName': 'disease_name', 'scoreName1': 'score_name1', 'score1': 'score1',
-    'scoreName2': 'score_name2', 'score2': 'score2', 'scoreName3': 'score_name3', 'score3': 'score3'}
+PHENOTYPE_PRI_HEADER = ['tool', 'project', 'sampleId', 'rank', 'geneId', 'diseaseId', 'diseaseName', 'scoreName1', 'score1']
+MAX_SCORES = 100
 
 
 def _parse_phenotype_pri_row(row):
-    record = {mapped_key: row[key] for key, mapped_key in PHENOTYPE_PRI_HEADER.items()}
-
-    tool = PhenotypePrioritization.TOOL_LOOKUP.get(record[TOOL_FIELD], None)
-    if not tool:
-        raise ValueError('Expecting {} for the "tool" column but found {}'.format(
-            ', '.join([v for k, v in PhenotypePrioritization.TOOL_CHOICES]), record[TOOL_FIELD]))
-    record[TOOL_FIELD] = tool
-
-    scores = {record.pop('score_name1'): record.pop('score1')}
-    for score_index in ['2', '3']:
-        score_name = record.pop('score_name' + score_index, None)
-        score = record.pop('score' + score_index, None)
-        if score_name:
-            scores[score_name] = score
+    record = {_to_snake_case(key): row[key] for key in PHENOTYPE_PRI_HEADER[:-2]}
+
+    scores = {}
+    for i in range(1, MAX_SCORES):
+        if not row[f'scoreName{i}']:
+            break
+        scores[row[f'scoreName{i}']] = row[f'score{i}']
     record['scores'] = scores
 
-    return record
+    yield record['sample_id'], record
 
 
-def _load_phenotype_pri_file(file_path):
-    data_by_id = defaultdict(dict)
+def load_phenotype_prioritization_data_file(file_path):
+    data_by_id = defaultdict(lambda: defaultdict(list))
     f = file_iter(file_path)
     header = _parse_tsv_row(next(f))
-    missing_cols = [col for col in PHENOTYPE_PRI_HEADER.keys() if col not in header]
+    missing_cols = [col for col in PHENOTYPE_PRI_HEADER if col not in header]
     if missing_cols:
         raise ValueError(f'Invalid file: missing column(s) {", ".join(missing_cols)}')
 
-    count = 0
     for line in tqdm(f, unit=' rows'):
         row = dict(zip(header, _parse_tsv_row(line)))
-        record = _parse_phenotype_pri_row(row)
-        sample_id = record.pop('sample_id', None)
-        project = record.pop('project', None)
-        if not sample_id or not project:
-            raise ValueError('Both sample ID and project fields are required.')
-        if data_by_id[sample_id]:
-            if project != data_by_id[sample_id]['project']:  # a sample must belong to a single project
-                raise ValueError(f'Invalid project name for sample {sample_id}')
-            data_by_id[sample_id]['records'].append(record)
-        else:
-            data_by_id[sample_id]['project'] = project
-            data_by_id[sample_id]['records'] = [record]
-        count += 1
-
-    return count, data_by_id
-
-
-def load_phenotype_pri(file_path, user, ignore_extra_samples):
-    count, data_by_id = _load_phenotype_pri_file(file_path)
-
-    message = f'Parsed {count} LIRICAL/Exomiser data records in {len(data_by_id)} samples'
-    info = [message]
-    logger.info(message, user)
-
-    indivs = Individual.objects.filter(individual_id__in=data_by_id.keys())
-    prefetch_related_objects(indivs, 'family__project')
-    existing_indivs_by_id = {ind.individual_id: ind for ind in indivs
-                             if ind.family.project.name == data_by_id[ind.individual_id]['project']}
-
-    extra_ids = set()
-    extra_records = 0
-    for sample_id, value in data_by_id.items():
-        if existing_indivs_by_id[sample_id]:
-            for rec in value['records']:
-                rec['individual'] = existing_indivs_by_id[sample_id]
-        else:
-            data_by_id.pop(sample_id)
-            extra_ids.add(sample_id)
-            extra_records += len(value['records'])
-
-    warnings = []
-    if extra_records:
-        skipped_samples = ', '.join(sorted(extra_ids))
-        if ignore_extra_samples:
-            warnings = [f'Skipped loading {extra_records} records for the following {len(extra_ids)} unmatched samples: {skipped_samples}']
-        else:
-            raise ValueError(f'Unable to find matches for the following samples: {skipped_samples}')
-
-    # Delete old data
-    to_delete = PhenotypePrioritization.objects.annotate(
-        tool_ind=Concat('tool', 'individual__individual_id', output_field=TextField())
-    ).filter(
-        tool_ind__in={rec[TOOL_FIELD]+sample_id for sample_id, values in data_by_id.items() for rec in values['records']},
-    )
-    if to_delete:
-        prefetch_related_objects(to_delete, 'individual')
-        info.append(f'Deleted {len(to_delete)} existing LIRICAL/Exomiser records')
-        logger.info(f'delete {len(to_delete)} {PhenotypePrioritization.__name__}s', user, db_update={
-            'dbEntity': PhenotypePrioritization.__name__, 'numEntities': len(to_delete), 'updateType': 'bulk_delete',
-            'parentEntityIds': list({model.individual.guid for model in to_delete}),
-        })
-        to_delete.delete()
-
-    records_to_load = [rec for value in data_by_id.values() for rec in value['records']]
-    projects = {value['project'] for value in data_by_id.values()}
-    project_names = ', '.join(sorted(projects))
-    message = 'Attempted data loading for {} LIRICAL/Exomiser records in the following {} projects: {}'.format(
-        len(records_to_load), len(projects), project_names)
-    info.append(message)
-    logger.info(message, user)
-
-    return records_to_load, info, warnings
+        for sample_id, row_dict in _parse_phenotype_pri_row(row):
+            row_dict.pop('sample_id')
+            project = row_dict.pop('project', None)
+            if not sample_id or not project:
+                raise ValueError('Both sample ID and project fields are required.')
+            data_by_id[project][sample_id].append(row_dict)
+
+    return data_by_id
diff --git a/ui/pages/DataManagement/DataManagement.jsx b/ui/pages/DataManagement/DataManagement.jsx
index ab25d5e7a3..dfc809d641 100644
--- a/ui/pages/DataManagement/DataManagement.jsx
+++ b/ui/pages/DataManagement/DataManagement.jsx
@@ -10,7 +10,7 @@ import ElasticsearchStatus from './components/ElasticsearchStatus'
 import RnaSeq from './components/RnaSeq'
 import SampleQc from './components/SampleQc'
 import Users from './components/Users'
-import PhenotypePri from './components/PhenotypePri'
+import PhenotypePrioritization from './components/PhenotypePrioritization'
 
 const IFRAME_STYLE = { position: 'fixed', left: '0', top: '95px' }
 
@@ -23,7 +23,7 @@ export const DATA_MANAGEMENT_PAGES = [
   { path: 'sample_qc', component: SampleQc },
   { path: 'rna_seq', component: RnaSeq },
   { path: 'users', component: Users },
-  { path: 'lirical_exomiser', component: PhenotypePri },
+  { path: 'phenotype_prioritization', component: PhenotypePrioritization },
 ]
 
 const DataManagement = ({ match, user }) => (
diff --git a/ui/pages/DataManagement/components/PhenotypePri.jsx b/ui/pages/DataManagement/components/PhenotypePri.jsx
deleted file mode 100644
index de64606ba3..0000000000
--- a/ui/pages/DataManagement/components/PhenotypePri.jsx
+++ /dev/null
@@ -1,31 +0,0 @@
-import { connect } from 'react-redux'
-
-import { validators } from 'shared/components/form/FormHelpers'
-import { BooleanCheckbox } from 'shared/components/form/Inputs'
-import UploadFormPage from 'shared/components/page/UploadFormPage'
-
-import { getPhenoPriUploadStats } from '../selectors'
-import { uploadPhenoPri } from '../reducers'
-
-const mapStateToProps = state => ({
-  fields: [
-    {
-      name: 'file',
-      label: 'Phenotype-based prioritization data (.tsv)',
-      placeholder: 'gs:// Google bucket path',
-      validate: validators.required,
-    },
-    {
-      name: 'ignoreExtraSamples',
-      component: BooleanCheckbox,
-      label: 'Ignore extra samples',
-    },
-  ],
-  uploadStats: getPhenoPriUploadStats(state),
-})
-
-const mapDispatchToProps = {
-  onSubmit: uploadPhenoPri,
-}
-
-export default connect(mapStateToProps, mapDispatchToProps)(UploadFormPage)
diff --git a/ui/pages/DataManagement/components/PhenotypePrioritization.jsx b/ui/pages/DataManagement/components/PhenotypePrioritization.jsx
new file mode 100644
index 0000000000..1d5a3ba175
--- /dev/null
+++ b/ui/pages/DataManagement/components/PhenotypePrioritization.jsx
@@ -0,0 +1,27 @@
+import { connect } from 'react-redux'
+
+import { validators } from 'shared/components/form/FormHelpers'
+import UploadFormPage from 'shared/components/page/UploadFormPage'
+
+import { getPhePriUploadStats } from '../selectors'
+import { uploadPhenotypePrioritization } from '../reducers'
+
+const FIELDS = [
+  {
+    name: 'file',
+    label: 'Phenotype-based prioritization data (.tsv)',
+    placeholder: 'gs:// Google bucket path',
+    validate: validators.required,
+  },
+]
+
+const mapStateToProps = state => ({
+  fields: FIELDS,
+  uploadStats: getPhePriUploadStats(state),
+})
+
+const mapDispatchToProps = {
+  onSubmit: uploadPhenotypePrioritization,
+}
+
+export default connect(mapStateToProps, mapDispatchToProps)(UploadFormPage)
diff --git a/ui/pages/DataManagement/reducers.js b/ui/pages/DataManagement/reducers.js
index 080b56b338..7cc15fd611 100644
--- a/ui/pages/DataManagement/reducers.js
+++ b/ui/pages/DataManagement/reducers.js
@@ -8,7 +8,7 @@ const REQUEST_ELASTICSEARCH_STATUS = 'REQUEST_ELASTICSEARCH_STATUS'
 const RECEIVE_ELASTICSEARCH_STATUS = 'RECEIVE_ELASTICSEARCH_STATUS'
 const RECEIVE_PIPELINE_UPLOAD_STATS = 'RECEIVE_PIPELINE_UPLOAD_STATS'
 const RECEIVE_RNA_SEQ_UPLOAD_STATS = 'RECEIVE_RNA_SEQ_UPLOAD_STATS'
-const RECEIVE_PHENO_PRI_UPLOAD_STATS = 'RECEIVE_PHENO_PRI_UPLOAD_STATS'
+const RECEIVE_PHE_PRI_UPLOAD_STATS = 'RECEIVE_PHE_PRI_UPLOAD_STATS'
 const REQUEST_ALL_USERS = 'REQUEST_ALL_USERS'
 const RECEIVE_ALL_USERS = 'RECEIVE_ALL_USERS'
 
@@ -76,27 +76,16 @@ export const uploadRnaSeq = values => (dispatch) => {
   })
 }
 
-export const uploadPhenoPri = values => (dispatch) => {
-  let successResponseJson = null
-  return new HttpRequestHelper(
-    '/api/data_management/load_phenotype_pri_data',
-    (responseJson) => {
-      successResponseJson = responseJson
-    },
-    (e) => {
-      successResponseJson = { warnings: [e.message] }
-    },
-  ).post(values).then(() => {
-    dispatch({ type: RECEIVE_PHENO_PRI_UPLOAD_STATS, newValue: successResponseJson })
-  })
-}
+export const uploadPhenotypePrioritization = values => submitRequest(
+  'load_phenotype_prioritization_data', RECEIVE_PHE_PRI_UPLOAD_STATS, values,
+)
 
 export const reducers = {
   elasticsearchStatusLoading: loadingReducer(REQUEST_ELASTICSEARCH_STATUS, RECEIVE_ELASTICSEARCH_STATUS),
   elasticsearchStatus: createSingleObjectReducer(RECEIVE_ELASTICSEARCH_STATUS),
   qcUploadStats: createSingleValueReducer(RECEIVE_PIPELINE_UPLOAD_STATS, {}),
   rnaSeqUploadStats: createSingleValueReducer(RECEIVE_RNA_SEQ_UPLOAD_STATS, {}),
-  phenoPriUploadStats: createSingleValueReducer(RECEIVE_PHENO_PRI_UPLOAD_STATS, {}),
+  phePriUploadStats: createSingleValueReducer(RECEIVE_PHE_PRI_UPLOAD_STATS, {}),
   allUsers: createSingleValueReducer(RECEIVE_ALL_USERS, [], 'users'),
   allUsersLoading: loadingReducer(REQUEST_ALL_USERS, RECEIVE_ALL_USERS),
 }
diff --git a/ui/pages/DataManagement/selectors.js b/ui/pages/DataManagement/selectors.js
index 48ffc39d45..2944714d68 100644
--- a/ui/pages/DataManagement/selectors.js
+++ b/ui/pages/DataManagement/selectors.js
@@ -4,4 +4,4 @@ export const getQcUploadStats = state => state.qcUploadStats
 export const getRnaSeqUploadStats = state => state.rnaSeqUploadStats
 export const getAllUsersLoading = state => state.allUsersLoading.isLoading
 export const getAllUsers = state => state.allUsers
-export const getPhenoPriUploadStats = state => state.phenoPriUploadStats
+export const getPhePriUploadStats = state => state.phePriUploadStats

From 334791e2899c8d930ef8c8784c21056031b5f7cd Mon Sep 17 00:00:00 2001
From: Shifa Zhang <zhangshifa07504@gmail.com>
Date: Wed, 12 Oct 2022 15:50:53 -0400
Subject: [PATCH 11/96] Remove an extra blank line.

---
 seqr/views/apis/data_manager_api.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py
index 7024d67398..ee94355277 100644
--- a/seqr/views/apis/data_manager_api.py
+++ b/seqr/views/apis/data_manager_api.py
@@ -430,7 +430,6 @@ def _load_phenotype_prioritization(file_path, user):
             info.append(message)
             logger.info(message, user)
 
-
     project_names = ', '.join(sorted(data_by_id.keys()))
     message = 'Attempted data loading for {} phenotype-based prioritization records in the following {} projects: {}'.format(
         len(all_records), len(data_by_id.keys()), project_names)

From 362e7d8ca55cc36c7b6339fce6cfab8b389e2021 Mon Sep 17 00:00:00 2001
From: Shifa Zhang <zhangshifa07504@gmail.com>
Date: Fri, 14 Oct 2022 10:47:02 -0400
Subject: [PATCH 12/96] Update lirical display.

---
 seqr/views/utils/dataset_utils.py             |  5 +-
 seqr/views/utils/orm_to_json_utils.py         |  4 +-
 seqr/views/utils/variant_utils.py             | 21 +++---
 .../components/panel/variants/VariantGene.jsx | 72 +++++++++----------
 .../components/panel/variants/selectors.js    | 29 ++++----
 5 files changed, 64 insertions(+), 67 deletions(-)

diff --git a/seqr/views/utils/dataset_utils.py b/seqr/views/utils/dataset_utils.py
index 99562007be..3551d0c9c0 100644
--- a/seqr/views/utils/dataset_utils.py
+++ b/seqr/views/utils/dataset_utils.py
@@ -461,9 +461,10 @@ def _parse_phenotype_pri_row(row):
 
     scores = {}
     for i in range(1, MAX_SCORES):
-        if not row[f'scoreName{i}']:
+        score_name = row.get(f'scoreName{i}')
+        if not score_name:
             break
-        scores[row[f'scoreName{i}']] = row[f'score{i}']
+        scores[score_name] = float(row[f'score{i}'])
     record['scores'] = scores
 
     yield record['sample_id'], record
diff --git a/seqr/views/utils/orm_to_json_utils.py b/seqr/views/utils/orm_to_json_utils.py
index 4688ba440a..9fd48a0a25 100644
--- a/seqr/views/utils/orm_to_json_utils.py
+++ b/seqr/views/utils/orm_to_json_utils.py
@@ -868,8 +868,8 @@ def _process_result(data, model):
     return _get_json_for_models(models, process_result=_process_result, **kwargs)
 
 
-def get_json_for_phenotype_pri(models, **kwargs):
+def get_json_for_phenotype_prioritization(models, **kwargs):
     def _process_result(data, model):
-        data['scores'] = {_to_camel_case(score) for score, value in data['scores'].items()}
+        data['scores'] = {_to_camel_case(score): value for score, value in data['scores'].items()}
 
     return _get_json_for_models(models, process_result=_process_result, **kwargs)
diff --git a/seqr/views/utils/variant_utils.py b/seqr/views/utils/variant_utils.py
index 16d15dab99..8ab37c6765 100644
--- a/seqr/views/utils/variant_utils.py
+++ b/seqr/views/utils/variant_utils.py
@@ -8,7 +8,8 @@
 from seqr.utils.gene_utils import get_genes_for_variants
 from seqr.views.utils.json_to_orm_utils import update_model_from_json
 from seqr.views.utils.orm_to_json_utils import get_json_for_discovery_tags, get_json_for_locus_lists, \
-    _get_json_for_models, get_json_for_rna_seq_outliers, get_json_for_saved_variants_with_tags, get_json_for_phenotype_pri
+    _get_json_for_models, get_json_for_rna_seq_outliers, get_json_for_saved_variants_with_tags,\
+    get_json_for_phenotype_prioritization
 from seqr.views.utils.permissions_utils import has_case_review_permissions, user_is_analyst
 from seqr.views.utils.project_context_utils import add_project_tag_types, add_families_context
 from settings import REDIS_SERVICE_HOSTNAME, REDIS_SERVICE_PORT
@@ -128,17 +129,16 @@ def _get_rna_seq_outliers(gene_ids, families):
     return data_by_individual_gene
 
 
-def _get_phenotype_pri_data(gene_ids, families):
-    data_by_individual_gene = defaultdict(lambda: {PhenotypePrioritization.LIRICAL_CHOICE: {},
-                                                   PhenotypePrioritization.EXOMISER_CHOICE: {}})
+def _get_phenotype_prioritization(gene_ids, families):
+    data_by_individual_gene = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
 
-    phe_pri_data = get_json_for_phenotype_pri(
+    data_dicts = _get_json_for_models(
         PhenotypePrioritization.objects.filter(gene_id__in=gene_ids, individual__family__in=families),
         nested_fields=[{'fields': ('individual', 'guid'), 'key': 'individualGuid'}],
     )
 
-    for data in phe_pri_data:
-        data_by_individual_gene[data.pop('individualGuid')][data['tool']][data['geneId']] = data
+    for data in data_dicts:
+        data_by_individual_gene[data.pop('individualGuid')][data['tool']][data['geneId']].append(data)
 
     return data_by_individual_gene
 
@@ -173,9 +173,10 @@ def _add_pa_detail(locus_list_gene, locus_list_guid, gene_json):
 LOAD_PROJECT_TAG_TYPES_CONTEXT_PARAM = 'loadProjectTagTypes'
 LOAD_FAMILY_CONTEXT_PARAM = 'loadFamilyContext'
 
+
 def get_variants_response(request, saved_variants, response_variants=None, add_all_context=False, include_igv=True,
                           add_locus_list_detail=False, include_rna_seq=True, include_project_name=False,
-                          include_phe_pri=True):
+                          include_phenotype_prioritization=True):
     response = get_json_for_saved_variants_with_tags(saved_variants, add_details=True)
 
     variants = list(response['savedVariantsByGuid'].values()) if response_variants is None else response_variants
@@ -220,7 +221,7 @@ def get_variants_response(request, saved_variants, response_variants=None, add_a
         if families_by_guid:
             _add_family_has_rna_tpm(families_by_guid)
 
-    if include_phe_pri:
-        response['phePriData'] = _get_phenotype_pri_data(genes.keys(), families)
+    if include_phenotype_prioritization:
+        response['phePriData'] = _get_phenotype_prioritization(genes.keys(), families)
 
     return response
diff --git a/ui/shared/components/panel/variants/VariantGene.jsx b/ui/shared/components/panel/variants/VariantGene.jsx
index bcb7a7258e..29eb41f603 100644
--- a/ui/shared/components/panel/variants/VariantGene.jsx
+++ b/ui/shared/components/panel/variants/VariantGene.jsx
@@ -9,7 +9,6 @@ import { getGenesById, getLocusListsByGuid, getFamiliesByGuid } from 'redux/sele
 import { panelAppUrl, moiToMoiInitials } from '../../../utils/panelAppUtils'
 import {
   MISSENSE_THRESHHOLD, LOF_THRESHHOLD, PANEL_APP_CONFIDENCE_LEVEL_COLORS, PANEL_APP_CONFIDENCE_DESCRIPTION,
-  LIRICAL, EXOMISER,
 } from '../../../utils/constants'
 import { compareObjects } from '../../../utils/sortUtils'
 import { camelcaseToTitlecase } from '../../../utils/stringUtils'
@@ -315,39 +314,34 @@ const GENE_DISEASE_DETAIL_SECTIONS = [
   },
 ]
 
-const SAMPLE_GENE_DETAIL_FIELDS = {
-  rnaSeqData: { infos: [], scores: ['zScore', 'pValue', 'pAdjust'] },
-  liricalData: { infos: ['rank', 'diseases'], scores: ['postTestProbability', 'LR'] },
-  exomiserData: { infos: ['rank', 'diseases'], scores: ['exomiserScore', 'phenotypeScore', 'variantScore'] },
-}
-
-const sampleGeneDetailsDisplay = (geneId, sampleGeneData, dataType) => (
-  <div>
-    <Table basic="very" compact="very">
-      <Table.Header>
-        <Table.Row>
-          <Table.HeaderCell />
-          {Object.values(SAMPLE_GENE_DETAIL_FIELDS[dataType]).flat().map(
-            field => <Table.HeaderCell key={field}>{camelcaseToTitlecase(field).replace(' ', '-')}</Table.HeaderCell>,
-          )}
-        </Table.Row>
-      </Table.Header>
-      <Table.Body>
-        {Object.entries(sampleGeneData[geneId]).map(([individual, data]) => (
-          <Table.Row key={individual}>
-            <Table.HeaderCell>{individual}</Table.HeaderCell>
-            {SAMPLE_GENE_DETAIL_FIELDS[dataType].infos.map(
-              field => <Table.Cell key={field}>{data[field]}</Table.Cell>,
-            )}
-            {SAMPLE_GENE_DETAIL_FIELDS[dataType].scores.map(
-              field => <Table.Cell key={field}>{data[field].toPrecision(3)}</Table.Cell>,
-            )}
+const sampleGeneDetailsDisplay = (geneId, sampleGeneData) => {
+  const { scores, ...info } = Object.values(Object.values(sampleGeneData)[0])[0][0]
+  const infoKeys = Object.keys(info)
+  const scoreKeys = Object.keys(scores || {})
+  return (
+    <div>
+      <Table basic="very" compact="very">
+        <Table.Header>
+          <Table.Row>
+            <Table.HeaderCell />
+            {infoKeys.concat(scoreKeys).map(field => (
+              <Table.HeaderCell key={field}>{camelcaseToTitlecase(field).replace(' ', '-')}</Table.HeaderCell>
+            ))}
           </Table.Row>
-        ))}
-      </Table.Body>
-    </Table>
-  </div>
-)
+        </Table.Header>
+        <Table.Body>
+          {Object.entries(sampleGeneData[geneId]).map(([individual, data]) => (data.map(row => (
+            <Table.Row key={individual + row.diseaseId}>
+              <Table.HeaderCell>{individual}</Table.HeaderCell>
+              {infoKeys.map(field => <Table.Cell key={field}>{row[field]}</Table.Cell>)}
+              {scoreKeys.map(field => <Table.Cell key={field}>{row.scores[field].toPrecision(3)}</Table.Cell>)}
+            </Table.Row>
+          ))))}
+        </Table.Body>
+      </Table>
+    </div>
+  )
+}
 
 const GENE_DETAIL_SECTIONS = [
   {
@@ -410,20 +404,20 @@ const GENE_DETAIL_SECTIONS = [
   },
   {
     color: 'orange',
-    description: 'LIRICAL Phenotype Prioritization',
+    description: 'Phenotype Prioritization',
     label: 'LIRICAL',
-    showDetails: (gene, { phePriData }) => phePriData && phePriData[LIRICAL][gene.geneId],
+    showDetails: (gene, { phePriData }) => phePriData && phePriData.lirical && phePriData.lirical[gene.geneId],
     detailsDisplay: (gene, { phePriData }) => (
-      sampleGeneDetailsDisplay(gene.geneId, phePriData, 'lirical')
+      sampleGeneDetailsDisplay(gene.geneId, phePriData.lirical)
     ),
   },
   {
     color: 'orange',
-    description: 'Exomiser Phenotype Prioritization',
+    description: 'Phenotype Prioritization',
     label: 'Exomiser',
-    showDetails: (gene, { phePriData }) => phePriData && phePriData[EXOMISER][gene.geneId],
+    showDetails: (gene, { phePriData }) => phePriData && phePriData.exomiser && phePriData.exomiser[gene.geneId],
     detailsDisplay: (gene, { phePriData }) => (
-      sampleGeneDetailsDisplay(gene.geneId, phePriData, 'exomiser')
+      sampleGeneDetailsDisplay(gene.geneId, phePriData.exomiser)
     ),
   },
 ]
diff --git a/ui/shared/components/panel/variants/selectors.js b/ui/shared/components/panel/variants/selectors.js
index 1dd2e01ae1..d6d1abcff7 100644
--- a/ui/shared/components/panel/variants/selectors.js
+++ b/ui/shared/components/panel/variants/selectors.js
@@ -11,7 +11,6 @@ import {
   VARIANT_SORT_LOOKUP,
   SHOW_ALL,
   VARIANT_EXPORT_DATA,
-  LIRICAL, EXOMISER,
 } from 'shared/utils/constants'
 import {
   getVariantTagsByGuid, getVariantNotesByGuid, getSavedVariantsByGuid, getAnalysisGroupsByGuid, getGenesById, getUser,
@@ -25,31 +24,33 @@ export const getRnaSeqOutilerDataByFamilyGene = createSelector(
     (acc, [individualGuid, rnaSeqData]) => {
       const { familyGuid, displayName } = individualsByGuid[individualGuid]
       acc[familyGuid] = Object.entries(rnaSeqData.outliers || {}).reduce(
-        (acc2, [geneId, data]) => (data.isSignificant ?
-          { ...acc2, [geneId]: { ...(acc2[geneId] || {}), [displayName]: data } } : acc2
-        ), acc[familyGuid] || {},
+        (acc2, [geneId, data]) => {
+          const { zScore, pValue, pAdjust } = data
+          return (data.isSignificant ? {
+            ...acc2,
+            [geneId]: { ...(acc2[geneId] || {}), [displayName]: [{ scores: { zScore, pValue, pAdjust } }] },
+          } : acc2)
+        },
+        acc[familyGuid] || {},
       )
       return acc
     }, {},
   ),
 )
 
-const TOOLS = [LIRICAL, EXOMISER]
 export const getPhePriDataByFamilyGene = createSelector(
   getIndividualsByGuid,
   getPhePriDataByIndividual,
   (individualsByGuid, phePriDataByIndividual) => Object.entries(phePriDataByIndividual).reduce(
     (acc, [individualGuid, phePriData]) => {
       const { familyGuid, displayName } = individualsByGuid[individualGuid]
-      acc[familyGuid] = TOOLS.reduce(
-        (accTool, tool) => ({
-          ...accTool,
-          [tool]: Object.entries(phePriData[tool] || {}).reduce(
-            (acc2, [geneId, data]) => ({ ...acc2, [geneId]: { ...(acc2[geneId] || {}), [displayName]: data } }),
-            acc[familyGuid] || {},
-          ),
-        }), {},
-      )
+      acc[familyGuid] = Object.entries(phePriData).reduce((accTool, [tool, toolData]) => ({
+        ...accTool,
+        [tool]: Object.entries(toolData).reduce((acc2, [geneId, data]) => ({
+          ...acc2,
+          [geneId]: { ...(acc2[geneId] || {}), [displayName]: data },
+        }), {}),
+      }), acc[familyGuid] || {})
       return acc
     }, {},
   ),

From c29633f29b3d3c856805973ad724fc61de916f35 Mon Sep 17 00:00:00 2001
From: Shifa Zhang <zhangshifa07504@gmail.com>
Date: Fri, 14 Oct 2022 16:46:37 -0400
Subject: [PATCH 13/96] Update per review comments.

---
 .../0048_phenotypeprioritization.py           |  2 +-
 seqr/models.py                                | 51 ++++++-------
 seqr/utils/logging_utils.py                   |  7 +-
 seqr/views/apis/data_manager_api.py           | 75 ++++++++-----------
 seqr/views/apis/data_manager_api_tests.py     | 35 +++++----
 seqr/views/utils/dataset_utils.py             | 30 ++++----
 6 files changed, 95 insertions(+), 105 deletions(-)

diff --git a/seqr/migrations/0048_phenotypeprioritization.py b/seqr/migrations/0048_phenotypeprioritization.py
index 5f7900bdc3..6333471c06 100644
--- a/seqr/migrations/0048_phenotypeprioritization.py
+++ b/seqr/migrations/0048_phenotypeprioritization.py
@@ -1,4 +1,4 @@
-# Generated by Django 3.2.15 on 2022-10-12 15:03
+# Generated by Django 3.2.15 on 2022-10-14 20:38
 
 from django.db import migrations, models
 import django.db.models.deletion
diff --git a/seqr/models.py b/seqr/models.py
index d47db0c1c1..4658c5002a 100644
--- a/seqr/models.py
+++ b/seqr/models.py
@@ -7,7 +7,7 @@
 from django.contrib.postgres.fields import ArrayField
 from django.core.exceptions import PermissionDenied
 from django.db import models
-from django.db.models import base, options, ForeignKey, JSONField
+from django.db.models import base, options, ForeignKey, JSONField, prefetch_related_objects
 from django.utils import timezone
 from django.utils.text import slugify as __slugify
 
@@ -45,28 +45,6 @@ def get_audit_field_names(audit_field):
     return list(_get_audit_fields(audit_field).keys())
 
 
-class BulkOperationBase:
-    @classmethod
-    def bulk_create(cls, user, new_models, parent=None):
-        """Helper bulk create method that logs the creation"""
-        for model in new_models:
-            model.created_by = user
-        models = cls.objects.bulk_create(new_models)
-        log_model_bulk_update(logger, models, user, 'create', parent=parent)
-        return models
-
-    @classmethod
-    def bulk_delete(cls, user, queryset=None, parent=None, **filter_kwargs):
-        """Helper bulk delete method that logs the deletion"""
-        if queryset is None:
-            queryset = cls.objects.filter(**filter_kwargs)
-        log_model_bulk_update(logger, queryset, user, 'delete', parent=parent)
-        return queryset.delete()
-
-    class Meta:
-        abstract = True
-
-
 class CustomModelBase(base.ModelBase):
     def __new__(cls, name, bases, attrs, **kwargs):
         audit_fields = getattr(attrs.get('Meta'), 'audit_fields', None)
@@ -1033,19 +1011,36 @@ def __unicode__(self):
     def _compute_guid(self):
         return 'VSR%07d_%s' % (self.id, _slugify(str(self)))
 
-class DeletableSampleMetadataModel(models.Model):
 
-    sample = models.ForeignKey('Sample', on_delete=models.CASCADE, db_index=True)
-    gene_id = models.CharField(max_length=20)  # ensembl ID
+class BulkOperationBase:
+    @classmethod
+    def bulk_create(cls, user, new_models, parent=None):
+        """Helper bulk create method that logs the creation"""
+        for model in new_models:
+            model.created_by = user
+        models = cls.objects.bulk_create(new_models)
+        log_model_bulk_update(logger, models, user, 'create', parent=parent)
+        return models
 
     @classmethod
-    def bulk_delete(cls, user, queryset=None, **filter_kwargs):
+    def bulk_delete(cls, user, queryset=None, parent=None, **filter_kwargs):
         """Helper bulk delete method that logs the deletion"""
         if queryset is None:
             queryset = cls.objects.filter(**filter_kwargs)
-        log_model_bulk_update(logger, queryset, user, 'delete')
+        if parent:
+            prefetch_related_objects(queryset, parent)
+        log_model_bulk_update(logger, queryset, user, 'delete', parent=parent)
         return queryset.delete()
 
+    class Meta:
+        abstract = True
+
+
+class DeletableSampleMetadataModel(models.Model, BulkOperationBase):
+
+    sample = models.ForeignKey('Sample', on_delete=models.CASCADE, db_index=True)
+    gene_id = models.CharField(max_length=20)  # ensembl ID
+
     def __unicode__(self):
         return "%s:%s" % (self.sample.sample_id, self.gene_id)
 
diff --git a/seqr/utils/logging_utils.py b/seqr/utils/logging_utils.py
index 59c1174c08..eb205af86d 100644
--- a/seqr/utils/logging_utils.py
+++ b/seqr/utils/logging_utils.py
@@ -77,13 +77,12 @@ def log_model_bulk_update(logger, models, user, update_type, update_fields=None,
     if not models:
         return []
     db_entity = type(models[0]).__name__
-    if parent:
-        entity_ids = list({getattr(o, parent).guid for o in models})
-    else:
-        entity_ids = [o.guid for o in models]
+    entity_ids = [o.guid if hasattr(o, 'guid') else o.id for o in models]
     db_update = {
         'dbEntity': db_entity, 'entityIds': entity_ids, 'updateType': 'bulk_{}'.format(update_type),
     }
+    if parent:
+        db_update['parentEntityIds'] = list({getattr(model, parent).guid for model in models})
     if update_fields:
         db_update['updateFields'] = list(update_fields)
     logger.info(
diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py
index ee94355277..8e8c610b5f 100644
--- a/seqr/views/apis/data_manager_api.py
+++ b/seqr/views/apis/data_manager_api.py
@@ -9,8 +9,7 @@
 import urllib3
 
 from django.contrib.postgres.aggregates import ArrayAgg
-from django.db.models import Max, TextField
-from django.db.models.functions import Concat
+from django.db.models import Max
 from django.http.response import HttpResponse
 from django.views.decorators.csrf import csrf_exempt
 from requests.exceptions import ConnectionError as RequestConnectionError
@@ -388,53 +387,45 @@ def load_rna_seq_sample_data(request, sample_guid):
         data_by_gene = json.loads(row.split('\t\t')[1])
 
     model_cls = RNA_DATA_TYPE_CONFIGS[data_type]['model_class']
-    models = model_cls.objects.bulk_create([model_cls(sample=sample, **data) for data in data_by_gene.values()])
-    logger.info(f'create {len(models)} {model_cls.__name__}', request.user, db_update={
-        'dbEntity': model_cls.__name__, 'numEntities': len(models), 'parentEntityIds': [sample_guid], 'updateType': 'bulk_create',
-    })
+    model_cls.bulk_create(request.user, [model_cls(sample=sample, **data) for data in data_by_gene.values()], parent='sample')
 
     return create_json_response({'success': True})
 
 
+def _log_append_info(user, info, message):
+    info.append(message)
+    logger.info(message, user)
+
+
 def _load_phenotype_prioritization(file_path, user):
-    data_by_id = load_phenotype_prioritization_data_file(file_path)
+    tool, data_by_project_sample_id = load_phenotype_prioritization_data_file(file_path)
 
-    all_samples = [sample for project_samples in data_by_id.values() for sample in project_samples.values()]
-    all_records = [rec for sample_records in all_samples for rec in sample_records]
-    message = f'Parsed {len(all_records)} LIRICAL/Exomiser data records in {len(all_samples)} samples'
-    info = [message]
-    logger.info(message, user)
+    info = []
+    _log_append_info(user, info, f'Parsed {tool.upper()} data for project(s): {", ".join(data_by_project_sample_id.keys())}')
 
-    for project, project_samples in data_by_id.items():
-        indivs = Individual.objects.filter(family__project__name=project, individual_id__in=project_samples.keys())
+    all_records = []
+    to_delete = None
+    for project, records_by_sample in data_by_project_sample_id.items():
+        indivs = Individual.objects.filter(family__project__name=project, individual_id__in=records_by_sample.keys())
         existing_indivs_by_id = {ind.individual_id: ind for ind in indivs}
 
-        tool_sample_id_set = set()
-        for sample_id, records in project_samples.items():
-            if existing_indivs_by_id[sample_id]:
-                for rec in records:
-                    rec['individual'] = existing_indivs_by_id[sample_id]
-                    tool_sample_id_set.add(f'{rec["tool"]}{sample_id}')
-            else:
-                raise ValueError(f'Individual {sample_id} doesn\'t exist in project {project}')
+        missing_individuals = set(records_by_sample.keys()) - set(existing_indivs_by_id.keys())
+        if missing_individuals:
+            raise ValueError(f'Individual {", ".join(list(missing_individuals))} doesn\'t exist')
+        for sample_id, records in records_by_sample.items():
+            for rec in records:
+                rec['individual'] = existing_indivs_by_id[sample_id]
 
-        # Delete old data
-        to_delete = PhenotypePrioritization.objects.annotate(
-            tool_ind=Concat('tool', 'individual__individual_id', output_field=TextField())
-        ).filter(
-            tool_ind__in=tool_sample_id_set,
-        )
-        if to_delete:
-            deleted, _ = PhenotypePrioritization.bulk_delete(user, to_delete, parent='individual')
-            message = f'Deleted {deleted} existing phenotype-based prioritization records from project {project}'
-            info.append(message)
-            logger.info(message, user)
-
-    project_names = ', '.join(sorted(data_by_id.keys()))
-    message = 'Attempted data loading for {} phenotype-based prioritization records in the following {} projects: {}'.format(
-        len(all_records), len(data_by_id.keys()), project_names)
-    info.append(message)
-    logger.info(message, user)
+        exist_records = PhenotypePrioritization.objects.filter(tool=tool, individual__in=indivs)
+        to_delete = to_delete | exist_records if to_delete else exist_records
+
+        records = [rec for records in records_by_sample.values() for rec in records]
+        _log_append_info(user, info, f'Attempted loading {len(records)} records of {tool.upper()} data to project {project}')
+        all_records += records
+
+    if to_delete:
+        deleted, _ = PhenotypePrioritization.bulk_delete(user, to_delete, parent='individual')
+        _log_append_info(user, info, f'Deleted {deleted} existing {tool.upper()} records')
 
     return all_records, info
 
@@ -445,12 +436,12 @@ def load_phenotype_prioritization_data(request):
 
     file_name = request_json['file']
 
-    logger.info(f'Loading phenotype prioritization data from {file_name}', request.user)
-    records, info = _load_phenotype_prioritization(file_name, request.user)
+    logger.info(f'Loading phenotype-based prioritization data from {file_name}', request.user)
+    records, info, tool = _load_phenotype_prioritization(file_name, request.user)
     models = PhenotypePrioritization.bulk_create(request.user, [PhenotypePrioritization(**data) for data in records],
                                                  parent='individual')
 
-    info.append(f'Loaded {len(models)} LIRICAL/Exomiser data records')
+    info.append(f'Loaded {len(models)} {tool.upper()} data records')
 
     return create_json_response({
         'info': info,
diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py
index 74ee63de81..9315a71b3a 100644
--- a/seqr/views/apis/data_manager_api_tests.py
+++ b/seqr/views/apis/data_manager_api_tests.py
@@ -594,7 +594,9 @@ def test_kibana_proxy(self):
     @mock.patch('seqr.utils.file_utils.subprocess.Popen')
     @mock.patch('seqr.views.apis.data_manager_api.gzip.open')
     @mock.patch('seqr.views.utils.dataset_utils.logger')
-    def test_update_rna_seq(self, mock_logger, mock_open, mock_subprocess, mock_load_uploaded_file, mock_os, mock_datetime):
+    @mock.patch('seqr.models.logger')
+    def test_update_rna_seq(self, mock_model_logger, mock_logger, mock_open, mock_subprocess, mock_load_uploaded_file,
+                            mock_os, mock_datetime):
         url = reverse(update_rna_seq)
         self.check_data_manager_login(url)
 
@@ -687,14 +689,13 @@ def mock_write(content):
                 response_json = response.json()
                 self.assertDictEqual(response_json, {'info': info, 'warnings': warnings, 'sampleGuids': [mock.ANY], 'fileName': file_name})
                 deleted_count = params.get('deleted_count', params['initial_model_count'])
-                mock_logger.info.assert_has_calls(
-                    [mock.call(info_log, self.data_manager_user) for info_log in info] + [
-                        mock.call(f'delete {deleted_count} {model_cls.__name__}s', self.data_manager_user, db_update={
-                            'dbEntity': model_cls.__name__, 'numEntities': deleted_count, 'parentEntityIds': mock.ANY, 'updateType': 'bulk_delete',
-                        }),
-                    ], any_order=True
-                )
-                self.assertTrue(RNA_SAMPLE_GUID in mock_logger.info.call_args_list[1].kwargs['db_update']['parentEntityIds'])
+                mock_logger.info.assert_has_calls([mock.call(info_log, self.data_manager_user) for info_log in info])
+                mock_model_logger.info.assert_called_with(
+                    f'delete {deleted_count} {model_cls.__name__}s', self.data_manager_user, db_update={
+                        'dbEntity': model_cls.__name__, 'entityIds': mock.ANY, 'parentEntityIds': mock.ANY,
+                        'updateType': 'bulk_delete',
+                })
+                self.assertTrue(RNA_SAMPLE_GUID in mock_model_logger.info.call_args_list[1].kwargs['db_update']['parentEntityIds'])
                 mock_logger.warning.assert_has_calls([mock.call(warn_log, self.data_manager_user) for warn_log in warnings])
 
                 # test database models are correct
@@ -718,7 +719,8 @@ def mock_write(content):
     @mock.patch('seqr.views.apis.data_manager_api.os')
     @mock.patch('seqr.views.apis.data_manager_api.gzip.open')
     @mock.patch('seqr.views.apis.data_manager_api.logger')
-    def test_load_rna_seq_sample_data(self, mock_logger, mock_open, mock_os):
+    @mock.patch('seqr.models.logger')
+    def test_load_rna_seq_sample_data(self, mock_model_logger, mock_logger, mock_open, mock_os):
         mock_os.path.join.side_effect = lambda *args: '/'.join(args[1:])
 
         url = reverse(load_rna_seq_sample_data, args=[RNA_SAMPLE_GUID])
@@ -743,11 +745,12 @@ def test_load_rna_seq_sample_data(self, mock_logger, mock_open, mock_os):
 
                 mock_open.assert_called_with(file_name, 'rt')
 
-                mock_logger.info.assert_has_calls([
-                    mock.call('Loading outlier data for NA19675_D2', self.data_manager_user),
-                    mock.call(f'create 2 {model_cls.__name__}', self.data_manager_user, db_update={
-                        'dbEntity':  model_cls.__name__, 'numEntities': 2, 'parentEntityIds': [RNA_SAMPLE_GUID], 'updateType': 'bulk_create',
-                    }),
-                ])
+                mock_logger.info.assert_called_with('Loading outlier data for NA19675_D2', self.data_manager_user)
+                mock_model_logger.info.assert_called_with(
+                    f'create 2 {model_cls.__name__}s', self.data_manager_user, db_update={
+                        'dbEntity': model_cls.__name__, 'entityIds': mock.ANY, 'parentEntityIds': [RNA_SAMPLE_GUID],
+                        'updateType': 'bulk_create',
+                    }
+                )
 
                 self.assertListEqual(params['get_models_json'](models), params['expected_models_json'])
diff --git a/seqr/views/utils/dataset_utils.py b/seqr/views/utils/dataset_utils.py
index 99562007be..8134c821cb 100644
--- a/seqr/views/utils/dataset_utils.py
+++ b/seqr/views/utils/dataset_utils.py
@@ -419,12 +419,7 @@ def _load_rna_seq(model_cls, file_path, user, mapping_file, ignore_extra_samples
     individual_db_ids = {s.individual_id for s in samples}
     to_delete = model_cls.objects.filter(sample__individual_id__in=individual_db_ids).exclude(sample__data_source=data_source)
     if to_delete:
-        prefetch_related_objects(to_delete, 'sample')
-        logger.info(f'delete {len(to_delete)} {model_cls.__name__}s', user, db_update={
-            'dbEntity': model_cls.__name__, 'numEntities': len(to_delete), 'updateType': 'bulk_delete',
-            'parentEntityIds': list({model.sample.guid for model in to_delete}),
-        })
-        to_delete.delete()
+        model_cls.bulk_delete(user, to_delete, parent='sample')
 
     loaded_sample_ids = set(model_cls.objects.filter(sample__in=samples).values_list('sample_id', flat=True).distinct())
     samples_to_load = {
@@ -452,31 +447,34 @@ def _load_rna_seq(model_cls, file_path, user, mapping_file, ignore_extra_samples
     return samples_to_load, info, warnings
 
 
-PHENOTYPE_PRI_HEADER = ['tool', 'project', 'sampleId', 'rank', 'geneId', 'diseaseId', 'diseaseName', 'scoreName1', 'score1']
+PHENOTYPE_PRIORITIZATION_HEADER = ['tool', 'project', 'sampleId', 'rank', 'geneId', 'diseaseId', 'diseaseName']
+PHENOTYPE_PRIORITIZATION_REQUIRED_HEADER = PHENOTYPE_PRIORITIZATION_HEADER + ['scoreName1', 'score1']
 MAX_SCORES = 100
 
 
 def _parse_phenotype_pri_row(row):
-    record = {_to_snake_case(key): row[key] for key in PHENOTYPE_PRI_HEADER[:-2]}
+    record = {_to_snake_case(key): row[key] for key in PHENOTYPE_PRIORITIZATION_HEADER}
 
     scores = {}
     for i in range(1, MAX_SCORES):
-        if not row[f'scoreName{i}']:
+        score_name = row.get(f'scoreName{i}')
+        if not score_name:
             break
-        scores[row[f'scoreName{i}']] = row[f'score{i}']
+        scores[score_name] = float(row[f'score{i}'])
     record['scores'] = scores
 
     yield record['sample_id'], record
 
 
 def load_phenotype_prioritization_data_file(file_path):
-    data_by_id = defaultdict(lambda: defaultdict(list))
+    data_by_project_sample_id = defaultdict(lambda: defaultdict(list))
     f = file_iter(file_path)
     header = _parse_tsv_row(next(f))
-    missing_cols = [col for col in PHENOTYPE_PRI_HEADER if col not in header]
+    missing_cols = [col for col in PHENOTYPE_PRIORITIZATION_REQUIRED_HEADER if col not in header]
     if missing_cols:
         raise ValueError(f'Invalid file: missing column(s) {", ".join(missing_cols)}')
 
+    tool = None
     for line in tqdm(f, unit=' rows'):
         row = dict(zip(header, _parse_tsv_row(line)))
         for sample_id, row_dict in _parse_phenotype_pri_row(row):
@@ -484,6 +482,10 @@ def load_phenotype_prioritization_data_file(file_path):
             project = row_dict.pop('project', None)
             if not sample_id or not project:
                 raise ValueError('Both sample ID and project fields are required.')
-            data_by_id[project][sample_id].append(row_dict)
+            data_by_project_sample_id[project][sample_id].append(row_dict)
+            if tool and tool != row_dict['tool']:
+                raise ValueError(f'Multiple tools found {tool} and {row_dict["tool"]}. Only one is supported.')
+            if not tool:
+                tool = row_dict['tool']
 
-    return data_by_id
+    return tool, data_by_project_sample_id

From a87f77c062e3122061d9bbb232ecdec25c02a0ec Mon Sep 17 00:00:00 2001
From: Shifa Zhang <zhangshifa07504@gmail.com>
Date: Mon, 17 Oct 2022 13:01:44 -0400
Subject: [PATCH 14/96] Add dynamic phenotype-base prioritization configs.

---
 .../components/panel/variants/VariantGene.jsx | 38 +++++++++----------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/ui/shared/components/panel/variants/VariantGene.jsx b/ui/shared/components/panel/variants/VariantGene.jsx
index 29eb41f603..089a353484 100644
--- a/ui/shared/components/panel/variants/VariantGene.jsx
+++ b/ui/shared/components/panel/variants/VariantGene.jsx
@@ -402,24 +402,6 @@ const GENE_DETAIL_SECTIONS = [
       sampleGeneDetailsDisplay(gene.geneId, rnaSeqData, 'rnaSeqData')
     ),
   },
-  {
-    color: 'orange',
-    description: 'Phenotype Prioritization',
-    label: 'LIRICAL',
-    showDetails: (gene, { phePriData }) => phePriData && phePriData.lirical && phePriData.lirical[gene.geneId],
-    detailsDisplay: (gene, { phePriData }) => (
-      sampleGeneDetailsDisplay(gene.geneId, phePriData.lirical)
-    ),
-  },
-  {
-    color: 'orange',
-    description: 'Phenotype Prioritization',
-    label: 'Exomiser',
-    showDetails: (gene, { phePriData }) => phePriData && phePriData.exomiser && phePriData.exomiser[gene.geneId],
-    detailsDisplay: (gene, { phePriData }) => (
-      sampleGeneDetailsDisplay(gene.geneId, phePriData.exomiser)
-    ),
-  },
 ]
 
 const OmimSegments = styled(Segment.Group).attrs({ size: 'tiny', horizontal: true, compact: true })`
@@ -471,10 +453,28 @@ const getDetailSections = (configs, gene, compact, labelProps, sampleGeneData) =
   )
 ))
 
+const addPhenotypePrioritizationConfig = (configs, phePriInfo) => (
+  phePriInfo ? [
+    ...configs,
+    ...Object.keys(phePriInfo).map(tool => (
+      {
+        color: 'orange',
+        description: 'Phenotype Prioritization',
+        label: tool.toUpper(),
+        showDetails: (gene, { phePriData }) => phePriData && phePriData[tool] && phePriData[tool][gene.geneId],
+        detailsDisplay: (gene, { phePriData }) => (
+          sampleGeneDetailsDisplay(gene.geneId, phePriData[tool])
+        ),
+      }
+    )),
+  ] : configs
+)
+
 export const GeneDetails = React.memo((
   { gene, compact, showLocusLists, showInlineDetails, sampleGeneData, ...labelProps },
 ) => {
-  const geneDetails = getDetailSections(GENE_DETAIL_SECTIONS, gene, compact, labelProps, sampleGeneData)
+  const geneDetailConfigs = addPhenotypePrioritizationConfig(GENE_DETAIL_SECTIONS, sampleGeneData.phePriData)
+  const geneDetails = getDetailSections(geneDetailConfigs, gene, compact, labelProps, sampleGeneData)
   const geneDiseaseDetails = getDetailSections(GENE_DISEASE_DETAIL_SECTIONS, gene, compact, labelProps)
   const hasLocusLists = showLocusLists && gene.locusListGuids.length > 0
   const showDivider = !showInlineDetails && geneDetails.length > 0 && (hasLocusLists || geneDiseaseDetails.length > 0)

From 419549c95fb1399b7f18ca48fc4329d1e28b731c Mon Sep 17 00:00:00 2001
From: Shifa Zhang <zhangshifa07504@gmail.com>
Date: Tue, 18 Oct 2022 09:32:59 -0400
Subject: [PATCH 15/96] Update display.

---
 seqr/views/apis/data_manager_api.py            |  2 +-
 seqr/views/utils/variant_utils.py              |  2 +-
 .../components/panel/variants/VariantGene.jsx  |  2 +-
 .../components/panel/variants/selectors.js     | 18 +++++++++++-------
 ui/shared/utils/constants.js                   |  3 ---
 5 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py
index 8e8c610b5f..5023736ac1 100644
--- a/seqr/views/apis/data_manager_api.py
+++ b/seqr/views/apis/data_manager_api.py
@@ -427,7 +427,7 @@ def _load_phenotype_prioritization(file_path, user):
         deleted, _ = PhenotypePrioritization.bulk_delete(user, to_delete, parent='individual')
         _log_append_info(user, info, f'Deleted {deleted} existing {tool.upper()} records')
 
-    return all_records, info
+    return all_records, info, tool
 
 
 @data_manager_required
diff --git a/seqr/views/utils/variant_utils.py b/seqr/views/utils/variant_utils.py
index 5854b465a4..dcc93d1f80 100644
--- a/seqr/views/utils/variant_utils.py
+++ b/seqr/views/utils/variant_utils.py
@@ -140,7 +140,7 @@ def _get_phenotype_prioritization(gene_ids, families):
     )
 
     for data in data_dicts:
-        data_by_individual_gene[data.pop('individualGuid')][data['tool']][data['geneId']].append(data)
+        data_by_individual_gene[data.pop('individualGuid')][data.pop('tool')][data['geneId']].append(data)
 
     return data_by_individual_gene
 
diff --git a/ui/shared/components/panel/variants/VariantGene.jsx b/ui/shared/components/panel/variants/VariantGene.jsx
index 089a353484..905911cde7 100644
--- a/ui/shared/components/panel/variants/VariantGene.jsx
+++ b/ui/shared/components/panel/variants/VariantGene.jsx
@@ -460,7 +460,7 @@ const addPhenotypePrioritizationConfig = (configs, phePriInfo) => (
       {
         color: 'orange',
         description: 'Phenotype Prioritization',
-        label: tool.toUpper(),
+        label: tool.toUpperCase(),
         showDetails: (gene, { phePriData }) => phePriData && phePriData[tool] && phePriData[tool][gene.geneId],
         detailsDisplay: (gene, { phePriData }) => (
           sampleGeneDetailsDisplay(gene.geneId, phePriData[tool])
diff --git a/ui/shared/components/panel/variants/selectors.js b/ui/shared/components/panel/variants/selectors.js
index d6d1abcff7..69403de525 100644
--- a/ui/shared/components/panel/variants/selectors.js
+++ b/ui/shared/components/panel/variants/selectors.js
@@ -17,6 +17,7 @@ import {
   getFamiliesByGuid, getProjectsByGuid, getIndividualsByGuid, getRnaSeqDataByIndividual, getPhePriDataByIndividual,
 } from 'redux/selectors'
 
+const RNA_SEQ_SCORE_FIELDS = ['zScore', 'pValue', 'pAdjust']
 export const getRnaSeqOutilerDataByFamilyGene = createSelector(
   getIndividualsByGuid,
   getRnaSeqDataByIndividual,
@@ -24,14 +25,17 @@ export const getRnaSeqOutilerDataByFamilyGene = createSelector(
     (acc, [individualGuid, rnaSeqData]) => {
       const { familyGuid, displayName } = individualsByGuid[individualGuid]
       acc[familyGuid] = Object.entries(rnaSeqData.outliers || {}).reduce(
-        (acc2, [geneId, data]) => {
-          const { zScore, pValue, pAdjust } = data
-          return (data.isSignificant ? {
+        (acc2, [geneId, data]) => (data.isSignificant ?
+          {
             ...acc2,
-            [geneId]: { ...(acc2[geneId] || {}), [displayName]: [{ scores: { zScore, pValue, pAdjust } }] },
-          } : acc2)
-        },
-        acc[familyGuid] || {},
+            [geneId]: {
+              ...(acc2[geneId] || {}),
+              [displayName]: [{
+                scores: RNA_SEQ_SCORE_FIELDS.reduce((scoreAcc, score) => ({ ...scoreAcc, [score]: data[score] }), {}),
+              }],
+            },
+          } : acc2
+        ), acc[familyGuid] || {},
       )
       return acc
     }, {},
diff --git a/ui/shared/utils/constants.js b/ui/shared/utils/constants.js
index f9538f53a0..5409cf63ab 100644
--- a/ui/shared/utils/constants.js
+++ b/ui/shared/utils/constants.js
@@ -1311,9 +1311,6 @@ const VARIANT_ICON_COLORS = {
   green: '#21a926',
 }
 
-export const LIRICAL = 'L'
-export const EXOMISER = 'E'
-
 export const PANEL_APP_CONFIDENCE_DESCRIPTION = {
   0: 'No Panel App confidence level',
   1: 'Red, lowest level of confidence; 1 of the 4 sources or from other sources.',

From 0b6cdd891f08f8404add3b95005f3ef1093ce8c2 Mon Sep 17 00:00:00 2001
From: Shifa Zhang <zhangshifa07504@gmail.com>
Date: Tue, 18 Oct 2022 09:34:31 -0400
Subject: [PATCH 16/96] Add a return value.

---
 seqr/views/apis/data_manager_api.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py
index 8e8c610b5f..5023736ac1 100644
--- a/seqr/views/apis/data_manager_api.py
+++ b/seqr/views/apis/data_manager_api.py
@@ -427,7 +427,7 @@ def _load_phenotype_prioritization(file_path, user):
         deleted, _ = PhenotypePrioritization.bulk_delete(user, to_delete, parent='individual')
         _log_append_info(user, info, f'Deleted {deleted} existing {tool.upper()} records')
 
-    return all_records, info
+    return all_records, info, tool
 
 
 @data_manager_required

From 260b0c7348abdd395e9356357db615d0b25cd127 Mon Sep 17 00:00:00 2001
From: Shifa Zhang <zhangshifa07504@gmail.com>
Date: Tue, 18 Oct 2022 10:03:47 -0400
Subject: [PATCH 17/96] Temporarily make the tests happy.

---
 seqr/views/apis/saved_variant_api_tests.py  | 2 +-
 seqr/views/apis/summary_data_api_tests.py   | 2 +-
 seqr/views/apis/variant_search_api_tests.py | 1 +
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/seqr/views/apis/saved_variant_api_tests.py b/seqr/views/apis/saved_variant_api_tests.py
index 1cc28bc634..e2202cf4c1 100644
--- a/seqr/views/apis/saved_variant_api_tests.py
+++ b/seqr/views/apis/saved_variant_api_tests.py
@@ -27,7 +27,7 @@
 
 SAVED_VARIANT_RESPONSE_KEYS = {
     'variantTagsByGuid', 'variantNotesByGuid', 'variantFunctionalDataByGuid', 'savedVariantsByGuid',
-    'genesById', 'locusListsByGuid', 'rnaSeqData', 'mmeSubmissionsByGuid',
+    'genesById', 'locusListsByGuid', 'rnaSeqData', 'mmeSubmissionsByGuid', 'phePriData',
 }
 
 COMPOUND_HET_3_JSON = {
diff --git a/seqr/views/apis/summary_data_api_tests.py b/seqr/views/apis/summary_data_api_tests.py
index b02346fdd5..b622e62149 100644
--- a/seqr/views/apis/summary_data_api_tests.py
+++ b/seqr/views/apis/summary_data_api_tests.py
@@ -25,7 +25,7 @@
 SAVED_VARIANT_RESPONSE_KEYS = {
     'projectsByGuid', 'locusListsByGuid', 'savedVariantsByGuid', 'variantFunctionalDataByGuid', 'genesById',
     'variantNotesByGuid', 'individualsByGuid', 'variantTagsByGuid', 'familiesByGuid', 'familyNotesByGuid',
-    'mmeSubmissionsByGuid',
+    'mmeSubmissionsByGuid', 'phePriData',
 }
 
 
diff --git a/seqr/views/apis/variant_search_api_tests.py b/seqr/views/apis/variant_search_api_tests.py
index 2f2fa74e8a..f4276973a9 100644
--- a/seqr/views/apis/variant_search_api_tests.py
+++ b/seqr/views/apis/variant_search_api_tests.py
@@ -82,6 +82,7 @@
     },
     'locusListsByGuid': {LOCUS_LIST_GUID: {'intervals': mock.ANY}},
     'rnaSeqData': {'I000001_na19675': {'outliers': {'ENSG00000268903': mock.ANY}}},
+    'phePriData': {},
     'mmeSubmissionsByGuid': {'MS000001_na19675': {k: mock.ANY for k in MATCHMAKER_SUBMISSION_FIELDS}},
 }
 

From 4ceeaa659087fb8b3cbf5e55a5c9426435d5241d Mon Sep 17 00:00:00 2001
From: Shifa Zhang <zhangshifa07504@gmail.com>
Date: Tue, 18 Oct 2022 11:16:32 -0400
Subject: [PATCH 18/96] Fix JS test failures.

---
 ui/shared/components/panel/variants/selectors.js      | 6 ++++--
 ui/shared/components/panel/variants/selectors.test.js | 8 ++++----
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/ui/shared/components/panel/variants/selectors.js b/ui/shared/components/panel/variants/selectors.js
index 69403de525..d125d2acd0 100644
--- a/ui/shared/components/panel/variants/selectors.js
+++ b/ui/shared/components/panel/variants/selectors.js
@@ -31,7 +31,9 @@ export const getRnaSeqOutilerDataByFamilyGene = createSelector(
             [geneId]: {
               ...(acc2[geneId] || {}),
               [displayName]: [{
-                scores: RNA_SEQ_SCORE_FIELDS.reduce((scoreAcc, score) => ({ ...scoreAcc, [score]: data[score] }), {}),
+                scores: RNA_SEQ_SCORE_FIELDS.reduce(
+                  (sAcc, score) => (data[score] ? { ...sAcc, [score]: data[score] } : sAcc), {},
+                ),
               }],
             },
           } : acc2
@@ -45,7 +47,7 @@ export const getRnaSeqOutilerDataByFamilyGene = createSelector(
 export const getPhePriDataByFamilyGene = createSelector(
   getIndividualsByGuid,
   getPhePriDataByIndividual,
-  (individualsByGuid, phePriDataByIndividual) => Object.entries(phePriDataByIndividual).reduce(
+  (individualsByGuid, phePriDataByIndividual) => Object.entries(phePriDataByIndividual || {}).reduce(
     (acc, [individualGuid, phePriData]) => {
       const { familyGuid, displayName } = individualsByGuid[individualGuid]
       acc[familyGuid] = Object.entries(phePriData).reduce((accTool, [tool, toolData]) => ({
diff --git a/ui/shared/components/panel/variants/selectors.test.js b/ui/shared/components/panel/variants/selectors.test.js
index 628604d3a4..6d7ff7ce69 100644
--- a/ui/shared/components/panel/variants/selectors.test.js
+++ b/ui/shared/components/panel/variants/selectors.test.js
@@ -96,15 +96,15 @@ test('getRnaSeqOutilerDataByFamilyGene', () => {
   expect(getRnaSeqOutilerDataByFamilyGene(RNA_SEQ_STATE)).toEqual({
     F011652_1: {
       ENSG00000228198: {
-        NA19678: { isSignificant: true, pValue: 0.0004 },
-        NA19679_1: { isSignificant: true, pValue: 0.01 },
+        NA19678: [{ scores: { pValue: 0.0004 } }],
+        NA19679_1: [{ scores: { pValue: 0.01 } }],
       },
       ENSG00000164458: {
-        NA19678: { isSignificant: true, pValue: 0.0073 },
+        NA19678: [{ scores: { pValue: 0.0073 } }],
       },
     },
     F011652_2: {
-      ENSG00000228198: { NA19678_2: { isSignificant: true, pValue: 0.0214 } },
+      ENSG00000228198: { NA19678_2: [{ scores: { pValue: 0.0214 } }] },
     },
   })
 })

From 9c564e4a367462c5c69b49407928066d5f156296 Mon Sep 17 00:00:00 2001
From: Shifa Zhang <zhangshifa07504@gmail.com>
Date: Wed, 19 Oct 2022 11:57:06 -0400
Subject: [PATCH 19/96] Add preliminary tests and update logging texts.

---
 seqr/views/apis/data_manager_api.py       |  2 +-
 seqr/views/apis/data_manager_api_tests.py | 50 ++++++++++++++++++++++-
 seqr/views/utils/dataset_utils.py         | 10 ++---
 3 files changed, 55 insertions(+), 7 deletions(-)

diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py
index 5023736ac1..777791d52d 100644
--- a/seqr/views/apis/data_manager_api.py
+++ b/seqr/views/apis/data_manager_api.py
@@ -411,7 +411,7 @@ def _load_phenotype_prioritization(file_path, user):
 
         missing_individuals = set(records_by_sample.keys()) - set(existing_indivs_by_id.keys())
         if missing_individuals:
-            raise ValueError(f'Individual {", ".join(list(missing_individuals))} doesn\'t exist')
+            raise ValueError(f'Can\'t find individuals {", ".join(sorted(list(missing_individuals)))}')
         for sample_id, records in records_by_sample.items():
             for rec in records:
                 rec['individual'] = existing_indivs_by_id[sample_id]
diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py
index 9315a71b3a..e673f20b8a 100644
--- a/seqr/views/apis/data_manager_api_tests.py
+++ b/seqr/views/apis/data_manager_api_tests.py
@@ -6,7 +6,7 @@
 import responses
 
 from seqr.views.apis.data_manager_api import elasticsearch_status, upload_qc_pipeline_output, delete_index, \
-    update_rna_seq, load_rna_seq_sample_data
+    update_rna_seq, load_rna_seq_sample_data, load_phenotype_prioritization_data
 from seqr.views.utils.orm_to_json_utils import get_json_for_rna_seq_outliers
 from seqr.views.utils.test_utils import AuthenticationTestCase, urllib3_responses
 from seqr.models import Individual, RnaSeqOutlier, RnaSeqTpm, Sample
@@ -281,6 +281,21 @@
 RNA_TPM_SAMPLE_DATA = [f'{RNA_SAMPLE_GUID}\t\t{json.dumps(SAMPLE_GENE_TPM_DATA)}\n']
 RNA_FILENAME_TEMPLATE = 'rna_sample_data__{}__2020-04-15T00:00:00.json.gz'
 
+PHENOTYPE_PRIORITIZATION_HEADER = ['tool\tproject\tsampleId\trank\tgeneId\tdiseaseId\tdiseaseName\tscoreName1\tscore1\tscoreName2\tscore2\tscoreName3\tscore3']
+PHENOTYPE_PRIORITIZATION_MISS_HEADER = ['tool\tsampleId\trank\tgeneId\tdiseaseName\tscoreName1\tscore1\tscoreName2\tscore2\tscoreName3\tscore3']
+LIRICAL_NO_PROJECT_DATA = ['lirical']
+LIRICAL_NO_EXIST_INDV_DATA = [
+    'lirical\tCMG_Beggs_WGS\tNA19678\t1\tENSG00000105357\tOMIM:618460\tKhan-Khan-Katsanis syndrome\tpost_test_probability\t0\tcompositeLR\t0.066',
+    'lirical\tCMG_Beggs_WGS\tNA19679\t1\tENSG00000105357\tOMIM:618460\tKhan-Khan-Katsanis syndrome\tpost_test_probability\t0\tcompositeLR\t0.066',
+]
+LIRICAL_DATA = [
+    'lirical\t1kg project nåme with uniçøde\tNA19678\t1\tENSG00000105357\tOMIM:618460\tKhan-Khan-Katsanis syndrome\tpost_test_probability\t0\tcompositeLR\t0.066',
+    'lirical\t1kg project nåme with uniçøde\tNA19678\t2\tENSG00000105357\tOMIM:219800\t"Cystinosis, nephropathic"\tpost_test_probability\t0\tcompositeLR\t0.003\t\t',
+]
+EXOMISER_DATA = [
+    'exomiser\tCMG_Beggs_WGS\tBEG_1230-1_01\t1\tENSG00000105357\tORPHA:2131\tAlternating hemiplegia of childhood\texomiser_score\t0.977923765\tphenotype_score\t0.603998205\tvariant_score\t1',
+    'exomiser\tCMG_Beggs_WGS\tBEG_1230-1_01\t3\tENSG00000105357\tORPHA:71517\tRapid-onset dystonia-parkinsonism\texomiser_score\t0.977923765\tphenotype_score\t0.551578222\tvariant_score\t1'
+]
 
 class DataManagerAPITest(AuthenticationTestCase):
     fixtures = ['users', '1kg_project', 'reference_data']
@@ -754,3 +769,36 @@ def test_load_rna_seq_sample_data(self, mock_model_logger, mock_logger, mock_ope
                 )
 
                 self.assertListEqual(params['get_models_json'](models), params['expected_models_json'])
+
+    @mock.patch('seqr.views.utils.dataset_utils.file_iter')
+    @mock.patch('seqr.views.apis.data_manager_api.logger')
+    @mock.patch('seqr.models.logger')
+    def test_load_phenotype_prioritization_data(self, mock_model_logger, mock_logger, mock_file_iter):
+        url = reverse(load_phenotype_prioritization_data)
+        self.check_data_manager_login(url)
+
+        mock_file_iter.return_value = iter(PHENOTYPE_PRIORITIZATION_MISS_HEADER)
+        response = self.client.post(url, content_type='application/json', data=json.dumps({'file': 'lirical_data.tsv.gz'}))
+        self.assertEqual(response.status_code, 500)
+        self.assertEqual(response.json()['error'], 'Invalid file: missing column(s) project, diseaseId')
+        mock_logger.info.assert_called_with('Loading phenotype-based prioritization data from lirical_data.tsv.gz', self.data_manager_user)
+        mock_file_iter.assert_called_with('lirical_data.tsv.gz')
+
+        mock_file_iter.return_value = iter(PHENOTYPE_PRIORITIZATION_HEADER + LIRICAL_NO_PROJECT_DATA)
+        response = self.client.post(url, content_type='application/json', data=json.dumps({'file': 'lirical_data.tsv.gz'}))
+        self.assertEqual(response.status_code, 500)
+        self.assertEqual(response.json()['error'], 'Both sample ID and project fields are required.')
+
+        mock_file_iter.return_value = iter(PHENOTYPE_PRIORITIZATION_HEADER + LIRICAL_DATA + EXOMISER_DATA)
+        response = self.client.post(url, content_type='application/json', data=json.dumps({'file': 'lirical_data.tsv.gz'}))
+        self.assertEqual(response.status_code, 500)
+        self.assertEqual(response.json()['error'], 'Multiple tools found lirical and exomiser. Only one in a file is supported.')
+
+        mock_file_iter.return_value = iter(PHENOTYPE_PRIORITIZATION_HEADER + LIRICAL_NO_EXIST_INDV_DATA)
+        response = self.client.post(url, content_type='application/json', data=json.dumps({'file': 'lirical_data.tsv.gz'}))
+        self.assertEqual(response.status_code, 500)
+        self.assertEqual(response.json()['error'], 'Can\'t find individuals NA19678, NA19679')
+
+        mock_file_iter.return_value = iter(PHENOTYPE_PRIORITIZATION_HEADER + LIRICAL_DATA)
+        response = self.client.post(url, content_type='application/json', data=json.dumps({'file': 'lirical_data.tsv.gz'}))
+        self.assertEqual(response.status_code, 200)
diff --git a/seqr/views/utils/dataset_utils.py b/seqr/views/utils/dataset_utils.py
index 8134c821cb..fbaae56b18 100644
--- a/seqr/views/utils/dataset_utils.py
+++ b/seqr/views/utils/dataset_utils.py
@@ -453,7 +453,7 @@ def _load_rna_seq(model_cls, file_path, user, mapping_file, ignore_extra_samples
 
 
 def _parse_phenotype_pri_row(row):
-    record = {_to_snake_case(key): row[key] for key in PHENOTYPE_PRIORITIZATION_HEADER}
+    record = {_to_snake_case(key): row.get(key) for key in PHENOTYPE_PRIORITIZATION_HEADER}
 
     scores = {}
     for i in range(1, MAX_SCORES):
@@ -463,7 +463,7 @@ def _parse_phenotype_pri_row(row):
         scores[score_name] = float(row[f'score{i}'])
     record['scores'] = scores
 
-    yield record['sample_id'], record
+    yield record
 
 
 def load_phenotype_prioritization_data_file(file_path):
@@ -477,14 +477,14 @@ def load_phenotype_prioritization_data_file(file_path):
     tool = None
     for line in tqdm(f, unit=' rows'):
         row = dict(zip(header, _parse_tsv_row(line)))
-        for sample_id, row_dict in _parse_phenotype_pri_row(row):
-            row_dict.pop('sample_id')
+        for row_dict in _parse_phenotype_pri_row(row):
+            sample_id = row_dict.pop('sample_id', None)
             project = row_dict.pop('project', None)
             if not sample_id or not project:
                 raise ValueError('Both sample ID and project fields are required.')
             data_by_project_sample_id[project][sample_id].append(row_dict)
             if tool and tool != row_dict['tool']:
-                raise ValueError(f'Multiple tools found {tool} and {row_dict["tool"]}. Only one is supported.')
+                raise ValueError(f'Multiple tools found {tool} and {row_dict["tool"]}. Only one in a file is supported.')
             if not tool:
                 tool = row_dict['tool']
 

From 43727b4068f8da2f8645767bc85fda7734d39f45 Mon Sep 17 00:00:00 2001
From: Shifa Zhang <zhangshifa07504@gmail.com>
Date: Thu, 20 Oct 2022 16:23:41 -0400
Subject: [PATCH 20/96] Update per review comments.

---
 seqr/models.py                            |  14 +--
 seqr/utils/logging_utils.py               |  18 ++-
 seqr/views/apis/data_manager_api.py       |  61 +++++-----
 seqr/views/apis/data_manager_api_tests.py | 141 +++++++++++++++++-----
 seqr/views/utils/dataset_utils.py         |   4 +-
 seqr/views/utils/permissions_utils.py     |   6 +
 6 files changed, 172 insertions(+), 72 deletions(-)

diff --git a/seqr/models.py b/seqr/models.py
index 4658c5002a..b1f085c9b6 100644
--- a/seqr/models.py
+++ b/seqr/models.py
@@ -7,13 +7,13 @@
 from django.contrib.postgres.fields import ArrayField
 from django.core.exceptions import PermissionDenied
 from django.db import models
-from django.db.models import base, options, ForeignKey, JSONField, prefetch_related_objects
+from django.db.models import base, options, ForeignKey, JSONField
 from django.utils import timezone
 from django.utils.text import slugify as __slugify
 
 from guardian.shortcuts import assign_perm
 
-from seqr.utils.logging_utils import log_model_update, log_model_bulk_update, SeqrLogger
+from seqr.utils.logging_utils import log_model_update, log_model_bulk_update, SeqrLogger, log_model_no_guid_bulk_update
 from seqr.utils.xpos_utils import get_chrom_pos
 from seqr.views.utils.terra_api_utils import anvil_enabled
 from reference_data.models import GENOME_VERSION_GRCh37, GENOME_VERSION_CHOICES
@@ -1014,22 +1014,20 @@ def _compute_guid(self):
 
 class BulkOperationBase:
     @classmethod
-    def bulk_create(cls, user, new_models, parent=None):
+    def bulk_create(cls, user, new_models):
         """Helper bulk create method that logs the creation"""
         for model in new_models:
             model.created_by = user
         models = cls.objects.bulk_create(new_models)
-        log_model_bulk_update(logger, models, user, 'create', parent=parent)
+        log_model_no_guid_bulk_update(logger, models, user, 'create')
         return models
 
     @classmethod
-    def bulk_delete(cls, user, queryset=None, parent=None, **filter_kwargs):
+    def bulk_delete(cls, user, queryset=None, **filter_kwargs):
         """Helper bulk delete method that logs the deletion"""
         if queryset is None:
             queryset = cls.objects.filter(**filter_kwargs)
-        if parent:
-            prefetch_related_objects(queryset, parent)
-        log_model_bulk_update(logger, queryset, user, 'delete', parent=parent)
+        log_model_no_guid_bulk_update(logger, queryset, user, 'delete')
         return queryset.delete()
 
     class Meta:
diff --git a/seqr/utils/logging_utils.py b/seqr/utils/logging_utils.py
index eb205af86d..b696887a5c 100644
--- a/seqr/utils/logging_utils.py
+++ b/seqr/utils/logging_utils.py
@@ -73,18 +73,26 @@ def log_model_update(logger, model, user, update_type, update_fields=None):
     logger.info('{} {} {}'.format(update_type, db_entity, entity_id), user, db_update=db_update)
 
 
-def log_model_bulk_update(logger, models, user, update_type, update_fields=None, parent=None):
+def log_model_bulk_update(logger, models, user, update_type, update_fields=None):
     if not models:
         return []
     db_entity = type(models[0]).__name__
-    entity_ids = [o.guid if hasattr(o, 'guid') else o.id for o in models]
+    entity_ids = [o.guid for o in models]
     db_update = {
         'dbEntity': db_entity, 'entityIds': entity_ids, 'updateType': 'bulk_{}'.format(update_type),
     }
-    if parent:
-        db_update['parentEntityIds'] = list({getattr(model, parent).guid for model in models})
     if update_fields:
         db_update['updateFields'] = list(update_fields)
     logger.info(
         '{} {} {}s'.format(update_type, len(entity_ids), db_entity), user, db_update=db_update)
-    return entity_ids
\ No newline at end of file
+    return entity_ids
+
+
+def log_model_no_guid_bulk_update(logger, models, user, update_type):
+    if not models:
+        return []
+    db_entity = type(models[0]).__name__
+    db_update = {
+        'dbEntity': db_entity, 'numEntities': len(models), 'updateType': 'bulk_{}'.format(update_type),
+    }
+    logger.info(f'{update_type} {db_entity}s', user, db_update=db_update)
diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py
index 777791d52d..2a0366277b 100644
--- a/seqr/views/apis/data_manager_api.py
+++ b/seqr/views/apis/data_manager_api.py
@@ -21,9 +21,9 @@
 from seqr.views.utils.dataset_utils import load_rna_seq_outlier, load_rna_seq_tpm, load_phenotype_prioritization_data_file
 from seqr.views.utils.file_utils import parse_file, get_temp_upload_directory, load_uploaded_file
 from seqr.views.utils.json_utils import create_json_response, _to_camel_case
-from seqr.views.utils.permissions_utils import data_manager_required
+from seqr.views.utils.permissions_utils import data_manager_required, is_internal_project
 
-from seqr.models import Sample, Individual, RnaSeqOutlier, RnaSeqTpm, PhenotypePrioritization
+from seqr.models import Sample, Individual, RnaSeqOutlier, RnaSeqTpm, PhenotypePrioritization, Project
 
 from settings import KIBANA_SERVER, KIBANA_ELASTICSEARCH_PASSWORD
 
@@ -387,7 +387,7 @@ def load_rna_seq_sample_data(request, sample_guid):
         data_by_gene = json.loads(row.split('\t\t')[1])
 
     model_cls = RNA_DATA_TYPE_CONFIGS[data_type]['model_class']
-    model_cls.bulk_create(request.user, [model_cls(sample=sample, **data) for data in data_by_gene.values()], parent='sample')
+    model_cls.bulk_create(request.user, [model_cls(sample=sample, **data) for data in data_by_gene.values()])
 
     return create_json_response({'success': True})
 
@@ -397,21 +397,37 @@ def _log_append_info(user, info, message):
     logger.info(message, user)
 
 
-def _load_phenotype_prioritization(file_path, user):
-    tool, data_by_project_sample_id = load_phenotype_prioritization_data_file(file_path)
+@data_manager_required
+def load_phenotype_prioritization_data(request):
+    request_json = json.loads(request.body)
+
+    file_path = request_json['file']
 
     info = []
-    _log_append_info(user, info, f'Parsed {tool.upper()} data for project(s): {", ".join(data_by_project_sample_id.keys())}')
+    _log_append_info(request.user, info, f'Loading phenotype-based prioritization data from {file_path}')
+
+    try:
+        tool, data_by_project_sample_id = load_phenotype_prioritization_data_file(file_path)
+    except ValueError as e:
+        return create_json_response({'error': str(e)}, status=400)
 
     all_records = []
     to_delete = None
-    for project, records_by_sample in data_by_project_sample_id.items():
-        indivs = Individual.objects.filter(family__project__name=project, individual_id__in=records_by_sample.keys())
+    error = None
+    for project_name, records_by_sample in data_by_project_sample_id.items():
+        projects = [p for p in Project.objects.filter(name=project_name) if is_internal_project(p)]
+        if not projects or len(projects) > 1:
+            error = f'Project not found or multiple projects with the same name {project_name}'
+            break
+        _log_append_info(request.user, info, f'Parsed {tool.upper()} data for project: {project_name}')
+
+        indivs = Individual.objects.filter(family__project=projects[0], individual_id__in=records_by_sample.keys())
         existing_indivs_by_id = {ind.individual_id: ind for ind in indivs}
 
         missing_individuals = set(records_by_sample.keys()) - set(existing_indivs_by_id.keys())
         if missing_individuals:
-            raise ValueError(f'Can\'t find individuals {", ".join(sorted(list(missing_individuals)))}')
+            error = f'Can\'t find individuals {", ".join(sorted(list(missing_individuals)))}'
+            break
         for sample_id, records in records_by_sample.items():
             for rec in records:
                 rec['individual'] = existing_indivs_by_id[sample_id]
@@ -420,28 +436,19 @@ def _load_phenotype_prioritization(file_path, user):
         to_delete = to_delete | exist_records if to_delete else exist_records
 
         records = [rec for records in records_by_sample.values() for rec in records]
-        _log_append_info(user, info, f'Attempted loading {len(records)} records of {tool.upper()} data to project {project}')
+        _log_append_info(request.user, info,
+                         f'Attempted loading {len(records)} records of {tool.upper()} data to project {project_name}')
         all_records += records
 
-    if to_delete:
-        deleted, _ = PhenotypePrioritization.bulk_delete(user, to_delete, parent='individual')
-        _log_append_info(user, info, f'Deleted {deleted} existing {tool.upper()} records')
-
-    return all_records, info, tool
-
+    if error:
+        return create_json_response({'error': error}, status=400)
 
-@data_manager_required
-def load_phenotype_prioritization_data(request):
-    request_json = json.loads(request.body)
-
-    file_name = request_json['file']
-
-    logger.info(f'Loading phenotype-based prioritization data from {file_name}', request.user)
-    records, info, tool = _load_phenotype_prioritization(file_name, request.user)
-    models = PhenotypePrioritization.bulk_create(request.user, [PhenotypePrioritization(**data) for data in records],
-                                                 parent='individual')
+    if to_delete:
+        deleted, _ = PhenotypePrioritization.bulk_delete(request.user, to_delete)
+        _log_append_info(request.user, info, f'Deleted {deleted} existing {tool.upper()} records')
 
-    info.append(f'Loaded {len(models)} {tool.upper()} data records')
+    models = PhenotypePrioritization.bulk_create(request.user, [PhenotypePrioritization(**data) for data in all_records])
+    _log_append_info(request.user, info, f'Loaded {len(models)} {tool.upper()} data records')
 
     return create_json_response({
         'info': info,
diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py
index e673f20b8a..85e1fcca0d 100644
--- a/seqr/views/apis/data_manager_api_tests.py
+++ b/seqr/views/apis/data_manager_api_tests.py
@@ -8,8 +8,8 @@
 from seqr.views.apis.data_manager_api import elasticsearch_status, upload_qc_pipeline_output, delete_index, \
     update_rna_seq, load_rna_seq_sample_data, load_phenotype_prioritization_data
 from seqr.views.utils.orm_to_json_utils import get_json_for_rna_seq_outliers
-from seqr.views.utils.test_utils import AuthenticationTestCase, urllib3_responses
-from seqr.models import Individual, RnaSeqOutlier, RnaSeqTpm, Sample
+from seqr.views.utils.test_utils import AuthenticationTestCase, urllib3_responses, AnvilAuthenticationTestCase
+from seqr.models import Individual, RnaSeqOutlier, RnaSeqTpm, Sample, Project
 
 
 PROJECT_GUID = 'R0001_1kg'
@@ -281,23 +281,38 @@
 RNA_TPM_SAMPLE_DATA = [f'{RNA_SAMPLE_GUID}\t\t{json.dumps(SAMPLE_GENE_TPM_DATA)}\n']
 RNA_FILENAME_TEMPLATE = 'rna_sample_data__{}__2020-04-15T00:00:00.json.gz'
 
-PHENOTYPE_PRIORITIZATION_HEADER = ['tool\tproject\tsampleId\trank\tgeneId\tdiseaseId\tdiseaseName\tscoreName1\tscore1\tscoreName2\tscore2\tscoreName3\tscore3']
-PHENOTYPE_PRIORITIZATION_MISS_HEADER = ['tool\tsampleId\trank\tgeneId\tdiseaseName\tscoreName1\tscore1\tscoreName2\tscore2\tscoreName3\tscore3']
-LIRICAL_NO_PROJECT_DATA = ['lirical']
+PHENOTYPE_PRIORITIZATION_HEADER = [['tool', 'project', 'sampleId', 'rank', 'geneId', 'diseaseId', 'diseaseName',
+                                   'scoreName1', 'score1', 'scoreName2', 'score2', 'scoreName3', 'score3']]
+PHENOTYPE_PRIORITIZATION_MISS_HEADER = [['tool', 'sampleId', 'rank', 'geneId', 'diseaseName', 'scoreName1', 'score1',
+                                        'scoreName2', 'score2', 'scoreName3', 'score3']]
+LIRICAL_NO_PROJECT_DATA = [['lirical']]
+LIRICAL_PROJECT_NOT_EXIST_DATA = [
+    ['lirical', 'CMG_Beggs_WGS', 'NA19678', '1', 'ENSG00000105357', 'OMIM:618460', 'Khan-Khan-Katsanis syndrome',
+     'post_test_probability', '0', 'compositeLR', '0.066'],
+]
 LIRICAL_NO_EXIST_INDV_DATA = [
-    'lirical\tCMG_Beggs_WGS\tNA19678\t1\tENSG00000105357\tOMIM:618460\tKhan-Khan-Katsanis syndrome\tpost_test_probability\t0\tcompositeLR\t0.066',
-    'lirical\tCMG_Beggs_WGS\tNA19679\t1\tENSG00000105357\tOMIM:618460\tKhan-Khan-Katsanis syndrome\tpost_test_probability\t0\tcompositeLR\t0.066',
+    ['lirical', '1kg project nåme with uniçøde', 'NA19678x', '1', 'ENSG00000105357', 'OMIM:618460',
+     'Khan-Khan-Katsanis syndrome', 'post_test_probability', '0', 'compositeLR', '0.066'],
+    ['lirical', '1kg project nåme with uniçøde', 'NA19679x', '1', 'ENSG00000105357', 'OMIM:618460',
+     'Khan-Khan-Katsanis syndrome', 'post_test_probability', '0', 'compositeLR', '0.066'],
 ]
 LIRICAL_DATA = [
-    'lirical\t1kg project nåme with uniçøde\tNA19678\t1\tENSG00000105357\tOMIM:618460\tKhan-Khan-Katsanis syndrome\tpost_test_probability\t0\tcompositeLR\t0.066',
-    'lirical\t1kg project nåme with uniçøde\tNA19678\t2\tENSG00000105357\tOMIM:219800\t"Cystinosis, nephropathic"\tpost_test_probability\t0\tcompositeLR\t0.003\t\t',
+    ['lirical', '1kg project nåme with uniçøde', 'NA19678', '1', 'ENSG00000105357', 'OMIM:618460',
+     'Khan-Khan-Katsanis syndrome', 'post_test_probability', '0', 'compositeLR', '0.066'],
+    ['lirical', 'Test Reprocessed Project', 'NA20885', '2', 'ENSG00000105357', 'OMIM:219800',
+     '"Cystinosis, nephropathic"', 'post_test_probability', '0', 'compositeLR', '0.003', '', ''],
 ]
 EXOMISER_DATA = [
-    'exomiser\tCMG_Beggs_WGS\tBEG_1230-1_01\t1\tENSG00000105357\tORPHA:2131\tAlternating hemiplegia of childhood\texomiser_score\t0.977923765\tphenotype_score\t0.603998205\tvariant_score\t1',
-    'exomiser\tCMG_Beggs_WGS\tBEG_1230-1_01\t3\tENSG00000105357\tORPHA:71517\tRapid-onset dystonia-parkinsonism\texomiser_score\t0.977923765\tphenotype_score\t0.551578222\tvariant_score\t1'
+    ['exomiser', 'CMG_Beggs_WGS', 'BEG_1230-1_01', '1', 'ENSG00000105357', 'ORPHA:2131',
+     'Alternating hemiplegia of childhood', 'exomiser_score', '0.977923765', 'phenotype_score', '0.603998205',
+     'variant_score', '1'],
+    ['exomiser', 'CMG_Beggs_WGS', 'BEG_1230-1_01', '3', 'ENSG00000105357', 'ORPHA:71517',
+     'Rapid-onset dystonia-parkinsonism', 'exomiser_score', '0.977923765', 'phenotype_score', '0.551578222',
+     'variant_score', '1']
 ]
 
-class DataManagerAPITest(AuthenticationTestCase):
+
+class DataManagerAPITest(object):
     fixtures = ['users', '1kg_project', 'reference_data']
 
     @urllib3_responses.activate
@@ -706,11 +721,9 @@ def mock_write(content):
                 deleted_count = params.get('deleted_count', params['initial_model_count'])
                 mock_logger.info.assert_has_calls([mock.call(info_log, self.data_manager_user) for info_log in info])
                 mock_model_logger.info.assert_called_with(
-                    f'delete {deleted_count} {model_cls.__name__}s', self.data_manager_user, db_update={
-                        'dbEntity': model_cls.__name__, 'entityIds': mock.ANY, 'parentEntityIds': mock.ANY,
-                        'updateType': 'bulk_delete',
-                })
-                self.assertTrue(RNA_SAMPLE_GUID in mock_model_logger.info.call_args_list[1].kwargs['db_update']['parentEntityIds'])
+                    f'delete {model_cls.__name__}s', self.data_manager_user,
+                    db_update={'dbEntity': model_cls.__name__, 'numEntities': deleted_count, 'updateType': 'bulk_delete'}
+                )
                 mock_logger.warning.assert_has_calls([mock.call(warn_log, self.data_manager_user) for warn_log in warnings])
 
                 # test database models are correct
@@ -762,14 +775,17 @@ def test_load_rna_seq_sample_data(self, mock_model_logger, mock_logger, mock_ope
 
                 mock_logger.info.assert_called_with('Loading outlier data for NA19675_D2', self.data_manager_user)
                 mock_model_logger.info.assert_called_with(
-                    f'create 2 {model_cls.__name__}s', self.data_manager_user, db_update={
-                        'dbEntity': model_cls.__name__, 'entityIds': mock.ANY, 'parentEntityIds': [RNA_SAMPLE_GUID],
-                        'updateType': 'bulk_create',
+                    f'create {model_cls.__name__}s', self.data_manager_user, db_update={
+                        'dbEntity': model_cls.__name__, 'numEntities': 2, 'updateType': 'bulk_create',
                     }
                 )
 
                 self.assertListEqual(params['get_models_json'](models), params['expected_models_json'])
 
+    @classmethod
+    def _join_data(cls, data):
+        return iter(['\t'.join(line) for line in data])
+
     @mock.patch('seqr.views.utils.dataset_utils.file_iter')
     @mock.patch('seqr.views.apis.data_manager_api.logger')
     @mock.patch('seqr.models.logger')
@@ -777,28 +793,93 @@ def test_load_phenotype_prioritization_data(self, mock_model_logger, mock_logger
         url = reverse(load_phenotype_prioritization_data)
         self.check_data_manager_login(url)
 
-        mock_file_iter.return_value = iter(PHENOTYPE_PRIORITIZATION_MISS_HEADER)
+        mock_file_iter.return_value = self._join_data(PHENOTYPE_PRIORITIZATION_MISS_HEADER)
         response = self.client.post(url, content_type='application/json', data=json.dumps({'file': 'lirical_data.tsv.gz'}))
-        self.assertEqual(response.status_code, 500)
+        self.assertEqual(response.status_code, 400)
         self.assertEqual(response.json()['error'], 'Invalid file: missing column(s) project, diseaseId')
         mock_logger.info.assert_called_with('Loading phenotype-based prioritization data from lirical_data.tsv.gz', self.data_manager_user)
         mock_file_iter.assert_called_with('lirical_data.tsv.gz')
 
-        mock_file_iter.return_value = iter(PHENOTYPE_PRIORITIZATION_HEADER + LIRICAL_NO_PROJECT_DATA)
+        mock_file_iter.return_value = self._join_data(PHENOTYPE_PRIORITIZATION_HEADER + LIRICAL_NO_PROJECT_DATA)
         response = self.client.post(url, content_type='application/json', data=json.dumps({'file': 'lirical_data.tsv.gz'}))
-        self.assertEqual(response.status_code, 500)
+        self.assertEqual(response.status_code, 400)
         self.assertEqual(response.json()['error'], 'Both sample ID and project fields are required.')
 
-        mock_file_iter.return_value = iter(PHENOTYPE_PRIORITIZATION_HEADER + LIRICAL_DATA + EXOMISER_DATA)
+        mock_file_iter.return_value = self._join_data(PHENOTYPE_PRIORITIZATION_HEADER + LIRICAL_DATA + EXOMISER_DATA)
         response = self.client.post(url, content_type='application/json', data=json.dumps({'file': 'lirical_data.tsv.gz'}))
-        self.assertEqual(response.status_code, 500)
+        self.assertEqual(response.status_code, 400)
         self.assertEqual(response.json()['error'], 'Multiple tools found lirical and exomiser. Only one in a file is supported.')
 
-        mock_file_iter.return_value = iter(PHENOTYPE_PRIORITIZATION_HEADER + LIRICAL_NO_EXIST_INDV_DATA)
+        mock_file_iter.return_value = self._join_data(PHENOTYPE_PRIORITIZATION_HEADER + LIRICAL_PROJECT_NOT_EXIST_DATA)
         response = self.client.post(url, content_type='application/json', data=json.dumps({'file': 'lirical_data.tsv.gz'}))
-        self.assertEqual(response.status_code, 500)
-        self.assertEqual(response.json()['error'], 'Can\'t find individuals NA19678, NA19679')
+        self.assertEqual(response.status_code, 400)
+        self.assertEqual(response.json()['error'], 'Project not found or multiple projects with the same name CMG_Beggs_WGS')
 
-        mock_file_iter.return_value = iter(PHENOTYPE_PRIORITIZATION_HEADER + LIRICAL_DATA)
+        project = Project.objects.get(name='Empty Project')
+        project.name = '1kg project nåme with uniçøde'
+        project.save()
+        mock_file_iter.return_value = self._join_data(PHENOTYPE_PRIORITIZATION_HEADER + LIRICAL_DATA)
+        response = self.client.post(url, content_type='application/json', data=json.dumps({'file': 'lirical_data.tsv.gz'}))
+        self.assertEqual(response.status_code, 400)
+        self.assertEqual(response.json()['error'], 'Project not found or multiple projects with the same name 1kg project nåme with uniçøde')
+        project.name = 'Empty Project'
+        project.save()
+
+        mock_logger.reset_mock()
+        mock_file_iter.return_value = self._join_data(PHENOTYPE_PRIORITIZATION_HEADER + LIRICAL_NO_EXIST_INDV_DATA)
+        response = self.client.post(url, content_type='application/json', data=json.dumps({'file': 'lirical_data.tsv.gz'}))
+        self.assertEqual(response.status_code, 400)
+        self.assertEqual(response.json()['error'], 'Can\'t find individuals NA19678x, NA19679x')
+        info = [
+            'Loading phenotype-based prioritization data from lirical_data.tsv.gz',
+            'Parsed LIRICAL data for project: 1kg project nåme with uniçøde'
+        ]
+        mock_logger.info.assert_has_calls([mock.call(info_log, self.data_manager_user) for info_log in info])
+        mock_model_logger.info.assert_not_called()
+
+        info = [
+            'Loading phenotype-based prioritization data from lirical_data.tsv.gz',
+            'Parsed LIRICAL data for project: 1kg project nåme with uniçøde',
+            'Attempted loading 1 records of LIRICAL data to project 1kg project nåme with uniçøde',
+            'Parsed LIRICAL data for project: Test Reprocessed Project',
+            'Attempted loading 1 records of LIRICAL data to project Test Reprocessed Project',
+        ]
+
+        mock_logger.reset_mock()
+        mock_file_iter.return_value = self._join_data(PHENOTYPE_PRIORITIZATION_HEADER + LIRICAL_DATA)
+        response = self.client.post(url, content_type='application/json', data=json.dumps({'file': 'lirical_data.tsv.gz'}))
+        self.assertEqual(response.status_code, 200)
+        add_only_info = info + ['Loaded 2 LIRICAL data records']
+        self.assertEqual(response.json()['info'], add_only_info)
+        mock_logger.info.assert_has_calls([mock.call(info_log, self.data_manager_user) for info_log in add_only_info])
+        db_update = {'dbEntity': 'PhenotypePrioritization', 'numEntities': 2, 'updateType': 'bulk_create'}
+        mock_model_logger.info.assert_called_with('create PhenotypePrioritizations', self.data_manager_user, db_update=db_update)
+
+        mock_logger.reset_mock()
+        mock_model_logger.reset_mock()
+        mock_file_iter.return_value = self._join_data(PHENOTYPE_PRIORITIZATION_HEADER + LIRICAL_DATA)
         response = self.client.post(url, content_type='application/json', data=json.dumps({'file': 'lirical_data.tsv.gz'}))
         self.assertEqual(response.status_code, 200)
+        info += ['Deleted 2 existing LIRICAL records', 'Loaded 2 LIRICAL data records']
+        self.assertEqual(response.json()['info'], info)
+        mock_logger.info.assert_has_calls([mock.call(info_log, self.data_manager_user) for info_log in info])
+        mock_model_logger.info.assert_has_calls([
+            mock.call('delete PhenotypePrioritizations', self.data_manager_user, db_update={
+                'dbEntity': 'PhenotypePrioritization', 'numEntities': 2, 'updateType': 'bulk_delete',
+            }),
+            mock.call('create PhenotypePrioritizations', self.data_manager_user, db_update=db_update),
+        ])
+
+
+# Tests for AnVIL access disabled
+class LocalDataManagerAPITest(AuthenticationTestCase, DataManagerAPITest):
+    fixtures = ['users', '1kg_project']
+
+
+# Test for permissions from AnVIL only
+class AnvilDataManagerAPITest(AnvilAuthenticationTestCase, DataManagerAPITest):
+    fixtures = ['users', 'social_auth', '1kg_project']
+
+    @mock.patch('seqr.views.utils.permissions_utils.INTERNAL_NAMESPACES', ['my-seqr-billing'])
+    def test_load_phenotype_prioritization_data(self):
+        super(AnvilDataManagerAPITest, self).test_load_phenotype_prioritization_data()
diff --git a/seqr/views/utils/dataset_utils.py b/seqr/views/utils/dataset_utils.py
index fbaae56b18..4cfd0241c7 100644
--- a/seqr/views/utils/dataset_utils.py
+++ b/seqr/views/utils/dataset_utils.py
@@ -483,9 +483,9 @@ def load_phenotype_prioritization_data_file(file_path):
             if not sample_id or not project:
                 raise ValueError('Both sample ID and project fields are required.')
             data_by_project_sample_id[project][sample_id].append(row_dict)
-            if tool and tool != row_dict['tool']:
-                raise ValueError(f'Multiple tools found {tool} and {row_dict["tool"]}. Only one in a file is supported.')
             if not tool:
                 tool = row_dict['tool']
+            elif tool != row_dict['tool']:
+                raise ValueError(f'Multiple tools found {tool} and {row_dict["tool"]}. Only one in a file is supported.')
 
     return tool, data_by_project_sample_id
diff --git a/seqr/views/utils/permissions_utils.py b/seqr/views/utils/permissions_utils.py
index d4ac50c02d..de84e8d472 100644
--- a/seqr/views/utils/permissions_utils.py
+++ b/seqr/views/utils/permissions_utils.py
@@ -103,6 +103,12 @@ def is_internal_anvil_project(project):
     return anvil_enabled() and project.workspace_namespace in INTERNAL_NAMESPACES
 
 
+def is_internal_project(project):
+    if anvil_enabled():
+        return project.workspace_namespace in INTERNAL_NAMESPACES
+    return True
+
+
 def get_internal_projects():
     if anvil_enabled():
         return Project.objects.filter(workspace_namespace__in=INTERNAL_NAMESPACES)

From 25644cc015296b1e893a8157dfa74dd7271e732d Mon Sep 17 00:00:00 2001
From: Shifa Zhang <zhangshifa07504@gmail.com>
Date: Fri, 21 Oct 2022 10:18:05 -0400
Subject: [PATCH 21/96] Fix a codacy error.

---
 seqr/views/apis/data_manager_api_tests.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py
index 85e1fcca0d..bee13b28fe 100644
--- a/seqr/views/apis/data_manager_api_tests.py
+++ b/seqr/views/apis/data_manager_api_tests.py
@@ -881,5 +881,5 @@ class AnvilDataManagerAPITest(AnvilAuthenticationTestCase, DataManagerAPITest):
     fixtures = ['users', 'social_auth', '1kg_project']
 
     @mock.patch('seqr.views.utils.permissions_utils.INTERNAL_NAMESPACES', ['my-seqr-billing'])
-    def test_load_phenotype_prioritization_data(self):
-        super(AnvilDataManagerAPITest, self).test_load_phenotype_prioritization_data()
+    def test_load_phenotype_prioritization_data(self, *args):
+        super(AnvilDataManagerAPITest, self).test_load_phenotype_prioritization_data(*args)

From 9351e7cd8006028007da62ee009530caf95813f7 Mon Sep 17 00:00:00 2001
From: Shifa Zhang <zhangshifa07504@gmail.com>
Date: Fri, 21 Oct 2022 15:29:42 -0400
Subject: [PATCH 22/96] Update the sampleGene selector.

---
 .../components/panel/variants/VariantGene.jsx |  7 +-
 .../components/panel/variants/selectors.js    | 78 +++++++++----------
 .../panel/variants/selectors.test.js          | 25 +++---
 3 files changed, 55 insertions(+), 55 deletions(-)

diff --git a/ui/shared/components/panel/variants/VariantGene.jsx b/ui/shared/components/panel/variants/VariantGene.jsx
index 905911cde7..84caaf78cc 100644
--- a/ui/shared/components/panel/variants/VariantGene.jsx
+++ b/ui/shared/components/panel/variants/VariantGene.jsx
@@ -18,7 +18,7 @@ import { GeneSearchLink } from '../../buttons/SearchResultsLink'
 import ShowGeneModal from '../../buttons/ShowGeneModal'
 import Modal from '../../modal/Modal'
 import { GenCC, ClingenLabel } from '../genes/GeneDetail'
-import { getRnaSeqOutilerDataByFamilyGene, getPhePriDataByFamilyGene } from './selectors'
+import { getSampleGeneDataByFamilyGene } from './selectors'
 
 const RnaSeqTpm = React.lazy(() => import('./RnaSeqTpm'))
 
@@ -623,10 +623,7 @@ BaseVariantGene.propTypes = {
 
 const getRnaSeqProps = (state, ownProps) => ({
   hasRnaTpmData: getFamiliesByGuid(state)[ownProps.variant.familyGuids[0]]?.hasRnaTpmData,
-  sampleGeneData: {
-    rnaSeqData: getRnaSeqOutilerDataByFamilyGene(state)[ownProps.variant.familyGuids[0]],
-    phePriData: getPhePriDataByFamilyGene(state)[ownProps.variant.familyGuids[0]],
-  },
+  sampleGeneData: getSampleGeneDataByFamilyGene(state)[ownProps.variant.familyGuids[0]] || {},
 })
 
 const mapStateToProps = (state, ownProps) => ({
diff --git a/ui/shared/components/panel/variants/selectors.js b/ui/shared/components/panel/variants/selectors.js
index d125d2acd0..f741d671ac 100644
--- a/ui/shared/components/panel/variants/selectors.js
+++ b/ui/shared/components/panel/variants/selectors.js
@@ -18,48 +18,48 @@ import {
 } from 'redux/selectors'
 
 const RNA_SEQ_SCORE_FIELDS = ['zScore', 'pValue', 'pAdjust']
-export const getRnaSeqOutilerDataByFamilyGene = createSelector(
+export const getSampleGeneDataByFamilyGene = createSelector(
   getIndividualsByGuid,
   getRnaSeqDataByIndividual,
-  (individualsByGuid, rnaSeqDataByIndividual) => Object.entries(rnaSeqDataByIndividual).reduce(
-    (acc, [individualGuid, rnaSeqData]) => {
-      const { familyGuid, displayName } = individualsByGuid[individualGuid]
-      acc[familyGuid] = Object.entries(rnaSeqData.outliers || {}).reduce(
-        (acc2, [geneId, data]) => (data.isSignificant ?
-          {
-            ...acc2,
-            [geneId]: {
-              ...(acc2[geneId] || {}),
-              [displayName]: [{
-                scores: RNA_SEQ_SCORE_FIELDS.reduce(
-                  (sAcc, score) => (data[score] ? { ...sAcc, [score]: data[score] } : sAcc), {},
-                ),
-              }],
-            },
-          } : acc2
-        ), acc[familyGuid] || {},
-      )
-      return acc
-    }, {},
-  ),
-)
-
-export const getPhePriDataByFamilyGene = createSelector(
-  getIndividualsByGuid,
   getPhePriDataByIndividual,
-  (individualsByGuid, phePriDataByIndividual) => Object.entries(phePriDataByIndividual || {}).reduce(
-    (acc, [individualGuid, phePriData]) => {
-      const { familyGuid, displayName } = individualsByGuid[individualGuid]
-      acc[familyGuid] = Object.entries(phePriData).reduce((accTool, [tool, toolData]) => ({
-        ...accTool,
-        [tool]: Object.entries(toolData).reduce((acc2, [geneId, data]) => ({
-          ...acc2,
-          [geneId]: { ...(acc2[geneId] || {}), [displayName]: data },
-        }), {}),
-      }), acc[familyGuid] || {})
-      return acc
-    }, {},
-  ),
+  (individualsByGuid, rnaSeqDataByIndividual, phePriDataByIndividual) => {
+    const rnaSeqD = Object.entries(rnaSeqDataByIndividual).reduce(
+      (acc, [individualGuid, rnaSeqData]) => {
+        const { familyGuid, displayName } = individualsByGuid[individualGuid]
+        acc[familyGuid] = acc[familyGuid] || {}
+        acc[familyGuid].rnaSeqData = Object.entries(rnaSeqData.outliers || {}).reduce(
+          (acc2, [geneId, data]) => (data.isSignificant ?
+            {
+              ...acc2,
+              [geneId]: {
+                ...(acc2[geneId] || {}),
+                [displayName]: [{
+                  scores: RNA_SEQ_SCORE_FIELDS.reduce(
+                    (sAcc, score) => (data[score] ? { ...sAcc, [score]: data[score] } : sAcc), {},
+                  ),
+                }],
+              },
+            } : acc2
+          ), acc[familyGuid].rnaSeqData || {},
+        )
+        return acc
+      }, {},
+    )
+    return Object.entries(phePriDataByIndividual || {}).reduce(
+      (acc, [individualGuid, phePriData]) => {
+        const { familyGuid, displayName } = individualsByGuid[individualGuid]
+        acc[familyGuid] = acc[familyGuid] || {}
+        acc[familyGuid].phePriData = Object.entries(phePriData).reduce((accTool, [tool, toolData]) => ({
+          ...accTool,
+          [tool]: Object.entries(toolData).reduce((acc2, [geneId, data]) => ({
+            ...acc2,
+            [geneId]: { ...(acc2[geneId] || {}), [displayName]: data },
+          }), {}),
+        }), acc[familyGuid].phePriData || {})
+        return acc
+      }, rnaSeqD,
+    )
+  },
 )
 
 // Saved variant selectors
diff --git a/ui/shared/components/panel/variants/selectors.test.js b/ui/shared/components/panel/variants/selectors.test.js
index 6d7ff7ce69..7ca77945dd 100644
--- a/ui/shared/components/panel/variants/selectors.test.js
+++ b/ui/shared/components/panel/variants/selectors.test.js
@@ -5,7 +5,7 @@ import {
   getPairedSelectedSavedVariants,
   getVisibleSortedSavedVariants,
   getPairedFilteredSavedVariants,
-  getRnaSeqOutilerDataByFamilyGene,
+  getSampleGeneDataByFamilyGene,
 } from './selectors'
 
 test('getPairedSelectedSavedVariants', () => {
@@ -92,20 +92,23 @@ const RNA_SEQ_STATE = {
   ...STATE_WITH_2_FAMILIES,
 }
 
-test('getRnaSeqOutilerDataByFamilyGene', () => {
-  expect(getRnaSeqOutilerDataByFamilyGene(RNA_SEQ_STATE)).toEqual({
+test('getSampleGeneDataByFamilyGene', () => {
+  expect(getSampleGeneDataByFamilyGene(RNA_SEQ_STATE)).toEqual({
     F011652_1: {
-      ENSG00000228198: {
-        NA19678: [{ scores: { pValue: 0.0004 } }],
-        NA19679_1: [{ scores: { pValue: 0.01 } }],
-      },
-      ENSG00000164458: {
-        NA19678: [{ scores: { pValue: 0.0073 } }],
+      rnaSeqData: {
+        ENSG00000228198: {
+          NA19678: [{ scores: { pValue: 0.0004 } }],
+          NA19679_1: [{ scores: { pValue: 0.01 } }],
+        },
+        ENSG00000164458: {
+          NA19678: [{ scores: { pValue: 0.0073 } }],
+        },
       },
     },
     F011652_2: {
-      ENSG00000228198: { NA19678_2: [{ scores: { pValue: 0.0214 } }] },
+      rnaSeqData: {
+        ENSG00000228198: { NA19678_2: [{ scores: { pValue: 0.0214 } }] },
+      },
     },
   })
 })
-

From 1cb52b47c24f19edf993f79aad6b6c74c6660597 Mon Sep 17 00:00:00 2001
From: Shifa Zhang <zhangshifa07504@gmail.com>
Date: Tue, 25 Oct 2022 10:31:19 -0400
Subject: [PATCH 23/96] Add backend tests.

---
 seqr/fixtures/1kg_project.json              | 32 +++++++++++++++++++++
 seqr/views/apis/variant_search_api_tests.py | 10 ++++++-
 2 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/seqr/fixtures/1kg_project.json b/seqr/fixtures/1kg_project.json
index 2960130ef9..f17418353a 100644
--- a/seqr/fixtures/1kg_project.json
+++ b/seqr/fixtures/1kg_project.json
@@ -1287,6 +1287,38 @@
         "tpm": 1.01
     }
 },
+{
+    "model": "seqr.phenotypeprioritization",
+    "pk": 1,
+    "fields": {
+        "individual": 1,
+        "gene_id": "ENSG00000268903",
+        "tool": "lirical",
+        "rank": 1,
+        "disease_id": "OMIM:618460",
+        "disease_name": "Khan-Khan-Katsanis syndrome",
+        "scores": {
+            "post_test_probability": 0,
+            "compositeLR": 0.066
+        }
+    }
+},
+{
+    "model": "seqr.phenotypeprioritization",
+    "pk": 2,
+    "fields": {
+        "individual": 1,
+        "gene_id": "ENSG00000268903",
+        "tool": "lirical",
+        "rank": 2,
+        "disease_id": "OMIM:219800",
+        "disease_name": "Cystinosis, nephropathic",
+        "scores": {
+            "post_test_probability": 0,
+            "compositeLR": 0.003
+        }
+    }
+},
 {
     "model": "seqr.igvsample",
     "pk": 145,
diff --git a/seqr/views/apis/variant_search_api_tests.py b/seqr/views/apis/variant_search_api_tests.py
index f4276973a9..958888d628 100644
--- a/seqr/views/apis/variant_search_api_tests.py
+++ b/seqr/views/apis/variant_search_api_tests.py
@@ -82,7 +82,14 @@
     },
     'locusListsByGuid': {LOCUS_LIST_GUID: {'intervals': mock.ANY}},
     'rnaSeqData': {'I000001_na19675': {'outliers': {'ENSG00000268903': mock.ANY}}},
-    'phePriData': {},
+    'phePriData': {'I000001_na19675': {
+        'lirical': {'ENSG00000268903': [
+            {'diseaseId': 'OMIM:618460', 'diseaseName': 'Khan-Khan-Katsanis syndrome', 'geneId': 'ENSG00000268903',
+             'rank': 1, 'scores': {'compositeLR': 0.066, 'post_test_probability': 0}},
+            {'diseaseId': 'OMIM:219800', 'diseaseName': 'Cystinosis, nephropathic', 'geneId': 'ENSG00000268903',
+             'rank': 2, 'scores': {'compositeLR': 0.003, 'post_test_probability': 0}}
+        ]}
+    }},
     'mmeSubmissionsByGuid': {'MS000001_na19675': {k: mock.ANY for k in MATCHMAKER_SUBMISSION_FIELDS}},
 }
 
@@ -394,6 +401,7 @@ def test_query_variants(self, mock_get_variants, mock_get_gene_counts, mock_erro
                 'VT1726970_2103343353_r0004_tes': EXPECTED_TAG, 'VT1726945_2103343353_r0390_100': EXPECTED_TAG,
             },
             'variantFunctionalDataByGuid': {},
+            'phePriData': {},
             'rnaSeqData': {},
             'mmeSubmissionsByGuid': {},
         })

From bc1babaed2845b33aa29c0feb8dda210af6c7a7b Mon Sep 17 00:00:00 2001
From: Shifa Zhang <zhangshifa07504@gmail.com>
Date: Tue, 25 Oct 2022 11:14:03 -0400
Subject: [PATCH 24/96] Add frontend tests.

---
 .../panel/variants/selectors.test.js          | 30 +++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)

diff --git a/ui/shared/components/panel/variants/selectors.test.js b/ui/shared/components/panel/variants/selectors.test.js
index 7ca77945dd..4bd954b31e 100644
--- a/ui/shared/components/panel/variants/selectors.test.js
+++ b/ui/shared/components/panel/variants/selectors.test.js
@@ -73,7 +73,7 @@ test('getVisibleSortedSavedVariants', () => {
   expect(savedVariants[0].variantGuid).toEqual('SV0000002_1248367227_r0390_100')
 })
 
-const RNA_SEQ_STATE = {
+const RNA_SEQ_PHENOTYPE_PRIORITIZATION_STATE = {
   rnaSeqDataByIndividual: {
     I021476_na19678_1: {
       outliers: {
@@ -89,11 +89,24 @@ const RNA_SEQ_STATE = {
     },
     I021476_na19678_2: { outliers: { ENSG00000228198: { isSignificant: true, pValue: 0.0214 } } },
   },
+  phePriDataByIndividual: {
+    I021476_na19678_1: {
+      lirical: {
+        ENSG00000228198: [{
+          diseaseId: 'OMIM:618460',
+          diseaseName: 'Khan-Khan-Katsanis syndrome',
+          geneId: 'ENSG00000228198',
+          rank: 1,
+          scores: { compositeLR: 0.066, post_test_probability: 0 },
+        }],
+      },
+    },
+  },
   ...STATE_WITH_2_FAMILIES,
 }
 
 test('getSampleGeneDataByFamilyGene', () => {
-  expect(getSampleGeneDataByFamilyGene(RNA_SEQ_STATE)).toEqual({
+  expect(getSampleGeneDataByFamilyGene(RNA_SEQ_PHENOTYPE_PRIORITIZATION_STATE)).toEqual({
     F011652_1: {
       rnaSeqData: {
         ENSG00000228198: {
@@ -104,6 +117,19 @@ test('getSampleGeneDataByFamilyGene', () => {
           NA19678: [{ scores: { pValue: 0.0073 } }],
         },
       },
+      phePriData: {
+        lirical: {
+          ENSG00000228198: {
+            NA19678: [{
+              diseaseId: 'OMIM:618460',
+              diseaseName: 'Khan-Khan-Katsanis syndrome',
+              geneId: 'ENSG00000228198',
+              rank: 1,
+              scores: { compositeLR: 0.066, post_test_probability: 0 },
+            }],
+          },
+        },
+      },
     },
     F011652_2: {
       rnaSeqData: {

From 1b728e061b55f1e199bcc98bd2133ad85a0eff36 Mon Sep 17 00:00:00 2001
From: Shifa Zhang <zhangshifa07504@gmail.com>
Date: Thu, 27 Oct 2022 15:40:22 -0400
Subject: [PATCH 25/96] Update to use Datatable.

---
 seqr/views/apis/saved_variant_api_tests.py    |   2 +-
 seqr/views/apis/summary_data_api_tests.py     |   2 +-
 seqr/views/apis/variant_search_api_tests.py   |   4 +-
 seqr/views/utils/variant_utils.py             |   4 +-
 ui/redux/rootReducer.js                       |   2 +-
 ui/redux/selectors.js                         |   2 +-
 .../components/panel/variants/VariantGene.jsx | 134 ++++++++++--------
 .../components/panel/variants/selectors.js    |  73 +++++-----
 .../panel/variants/selectors.test.js          |  71 +++++-----
 9 files changed, 148 insertions(+), 146 deletions(-)

diff --git a/seqr/views/apis/saved_variant_api_tests.py b/seqr/views/apis/saved_variant_api_tests.py
index e2202cf4c1..62b6adc481 100644
--- a/seqr/views/apis/saved_variant_api_tests.py
+++ b/seqr/views/apis/saved_variant_api_tests.py
@@ -27,7 +27,7 @@
 
 SAVED_VARIANT_RESPONSE_KEYS = {
     'variantTagsByGuid', 'variantNotesByGuid', 'variantFunctionalDataByGuid', 'savedVariantsByGuid',
-    'genesById', 'locusListsByGuid', 'rnaSeqData', 'mmeSubmissionsByGuid', 'phePriData',
+    'genesById', 'locusListsByGuid', 'rnaSeqData', 'mmeSubmissionsByGuid', 'phenotypeGeneScores',
 }
 
 COMPOUND_HET_3_JSON = {
diff --git a/seqr/views/apis/summary_data_api_tests.py b/seqr/views/apis/summary_data_api_tests.py
index b622e62149..260337ddba 100644
--- a/seqr/views/apis/summary_data_api_tests.py
+++ b/seqr/views/apis/summary_data_api_tests.py
@@ -25,7 +25,7 @@
 SAVED_VARIANT_RESPONSE_KEYS = {
     'projectsByGuid', 'locusListsByGuid', 'savedVariantsByGuid', 'variantFunctionalDataByGuid', 'genesById',
     'variantNotesByGuid', 'individualsByGuid', 'variantTagsByGuid', 'familiesByGuid', 'familyNotesByGuid',
-    'mmeSubmissionsByGuid', 'phePriData',
+    'mmeSubmissionsByGuid', 'phenotypeGeneScores',
 }
 
 
diff --git a/seqr/views/apis/variant_search_api_tests.py b/seqr/views/apis/variant_search_api_tests.py
index 958888d628..471bc0e943 100644
--- a/seqr/views/apis/variant_search_api_tests.py
+++ b/seqr/views/apis/variant_search_api_tests.py
@@ -82,7 +82,7 @@
     },
     'locusListsByGuid': {LOCUS_LIST_GUID: {'intervals': mock.ANY}},
     'rnaSeqData': {'I000001_na19675': {'outliers': {'ENSG00000268903': mock.ANY}}},
-    'phePriData': {'I000001_na19675': {
+    'phenotypeGeneScores': {'I000001_na19675': {
         'lirical': {'ENSG00000268903': [
             {'diseaseId': 'OMIM:618460', 'diseaseName': 'Khan-Khan-Katsanis syndrome', 'geneId': 'ENSG00000268903',
              'rank': 1, 'scores': {'compositeLR': 0.066, 'post_test_probability': 0}},
@@ -401,7 +401,7 @@ def test_query_variants(self, mock_get_variants, mock_get_gene_counts, mock_erro
                 'VT1726970_2103343353_r0004_tes': EXPECTED_TAG, 'VT1726945_2103343353_r0390_100': EXPECTED_TAG,
             },
             'variantFunctionalDataByGuid': {},
-            'phePriData': {},
+            'phenotypeGeneScores': {},
             'rnaSeqData': {},
             'mmeSubmissionsByGuid': {},
         })
diff --git a/seqr/views/utils/variant_utils.py b/seqr/views/utils/variant_utils.py
index dcc93d1f80..f43f98243d 100644
--- a/seqr/views/utils/variant_utils.py
+++ b/seqr/views/utils/variant_utils.py
@@ -140,7 +140,7 @@ def _get_phenotype_prioritization(gene_ids, families):
     )
 
     for data in data_dicts:
-        data_by_individual_gene[data.pop('individualGuid')][data.pop('tool')][data['geneId']].append(data)
+        data_by_individual_gene[data.pop('individualGuid')][data.pop('geneId')][data.pop('tool')].append(data)
 
     return data_by_individual_gene
 
@@ -237,6 +237,6 @@ def get_variants_response(request, saved_variants, response_variants=None, add_a
             _add_family_has_rna_tpm(families_by_guid)
 
     if include_phenotype_prioritization:
-        response['phePriData'] = _get_phenotype_prioritization(genes.keys(), families)
+        response['phenotypeGeneScores'] = _get_phenotype_prioritization(genes.keys(), families)
 
     return response
diff --git a/ui/redux/rootReducer.js b/ui/redux/rootReducer.js
index 792fd96faa..9d505084d2 100644
--- a/ui/redux/rootReducer.js
+++ b/ui/redux/rootReducer.js
@@ -319,7 +319,7 @@ const rootReducer = combineReducers({
   mmeResultsByGuid: createObjectsByIdReducer(RECEIVE_DATA, 'mmeResultsByGuid'),
   genesById: createObjectsByIdReducer(RECEIVE_DATA, 'genesById'),
   rnaSeqDataByIndividual: createObjectsByIdReducer(RECEIVE_DATA, 'rnaSeqData'),
-  phePriDataByIndividual: createObjectsByIdReducer(RECEIVE_DATA, 'phePriData'),
+  phenotypeGeneScoresByIndividual: createObjectsByIdReducer(RECEIVE_DATA, 'phenotypeGeneScores'),
   genesLoading: loadingReducer(REQUEST_GENES, RECEIVE_DATA),
   hpoTermsByParent: createObjectsByIdReducer(RECEIVE_HPO_TERMS),
   hpoTermsLoading: loadingReducer(REQUEST_HPO_TERMS, RECEIVE_HPO_TERMS),
diff --git a/ui/redux/selectors.js b/ui/redux/selectors.js
index 0e143793a8..e71de638f9 100644
--- a/ui/redux/selectors.js
+++ b/ui/redux/selectors.js
@@ -30,7 +30,7 @@ export const getLocusListsByGuid = state => state.locusListsByGuid
 export const getLocusListsIsLoading = state => state.locusListsLoading.isLoading
 export const getLocusListIsLoading = state => state.locusListLoading.isLoading
 export const getRnaSeqDataByIndividual = state => state.rnaSeqDataByIndividual
-export const getPhePriDataByIndividual = state => state.phePriDataByIndividual
+export const getPhenotypeGeneScoresByIndividual = state => state.phenotypeGeneScoresByIndividual
 export const getUser = state => state.user
 export const getUserOptionsByUsername = state => state.userOptionsByUsername
 export const getUserOptionsIsLoading = state => state.userOptionsLoading.isLoading
diff --git a/ui/shared/components/panel/variants/VariantGene.jsx b/ui/shared/components/panel/variants/VariantGene.jsx
index 84caaf78cc..2edb303fc2 100644
--- a/ui/shared/components/panel/variants/VariantGene.jsx
+++ b/ui/shared/components/panel/variants/VariantGene.jsx
@@ -6,19 +6,19 @@ import { NavLink } from 'react-router-dom'
 import { Label, Popup, List, Header, Segment, Divider, Table, Button, Loader } from 'semantic-ui-react'
 
 import { getGenesById, getLocusListsByGuid, getFamiliesByGuid } from 'redux/selectors'
+import DataTable from 'shared/components/table/DataTable'
 import { panelAppUrl, moiToMoiInitials } from '../../../utils/panelAppUtils'
 import {
   MISSENSE_THRESHHOLD, LOF_THRESHHOLD, PANEL_APP_CONFIDENCE_LEVEL_COLORS, PANEL_APP_CONFIDENCE_DESCRIPTION,
 } from '../../../utils/constants'
 import { compareObjects } from '../../../utils/sortUtils'
-import { camelcaseToTitlecase } from '../../../utils/stringUtils'
 import { HorizontalSpacer, VerticalSpacer } from '../../Spacers'
 import { InlineHeader, NoBorderTable, ButtonLink, ColoredLabel } from '../../StyledComponents'
 import { GeneSearchLink } from '../../buttons/SearchResultsLink'
 import ShowGeneModal from '../../buttons/ShowGeneModal'
 import Modal from '../../modal/Modal'
 import { GenCC, ClingenLabel } from '../genes/GeneDetail'
-import { getSampleGeneDataByFamilyGene } from './selectors'
+import { getIndividualGeneDataByFamilyGene } from './selectors'
 
 const RnaSeqTpm = React.lazy(() => import('./RnaSeqTpm'))
 
@@ -314,33 +314,31 @@ const GENE_DISEASE_DETAIL_SECTIONS = [
   },
 ]
 
-const sampleGeneDetailsDisplay = (geneId, sampleGeneData) => {
-  const { scores, ...info } = Object.values(Object.values(sampleGeneData)[0])[0][0]
-  const infoKeys = Object.keys(info)
-  const scoreKeys = Object.keys(scores || {})
-  return (
-    <div>
-      <Table basic="very" compact="very">
-        <Table.Header>
-          <Table.Row>
-            <Table.HeaderCell />
-            {infoKeys.concat(scoreKeys).map(field => (
-              <Table.HeaderCell key={field}>{camelcaseToTitlecase(field).replace(' ', '-')}</Table.HeaderCell>
-            ))}
-          </Table.Row>
-        </Table.Header>
-        <Table.Body>
-          {Object.entries(sampleGeneData[geneId]).map(([individual, data]) => (data.map(row => (
-            <Table.Row key={individual + row.diseaseId}>
-              <Table.HeaderCell>{individual}</Table.HeaderCell>
-              {infoKeys.map(field => <Table.Cell key={field}>{row[field]}</Table.Cell>)}
-              {scoreKeys.map(field => <Table.Cell key={field}>{row.scores[field].toPrecision(3)}</Table.Cell>)}
-            </Table.Row>
-          ))))}
-        </Table.Body>
-      </Table>
-    </div>
-  )
+const RNA_SEQ_COLUMNS = [
+  { name: 'individual', content: '', width: 3 },
+  { name: 'zScore', content: 'Z-Score', width: 3, format: ({ zScore }) => (zScore ? zScore.toPrecision(3) : null) },
+  { name: 'pValue', content: 'P-Value', width: 3, format: ({ pValue }) => (pValue ? pValue.toPrecision(3) : null) },
+  { name: 'pAdjust', content: 'P-Adjust', width: 3, format: ({ pAdjust }) => (pAdjust ? pAdjust.toPrecision(3) : null) },
+]
+
+const PHENOTYPE_GENE_INFO_COLUMNS = [
+  { name: 'individual', content: '', width: 3 },
+  { name: 'rank', content: 'Rank', width: 3 },
+  { name: 'diseaseName', content: 'Disease', width: 3, format: ({ diseaseName, diseaseId }) => `${diseaseName} (${diseaseId})` },
+]
+
+const PHENOTYPE_GENE_SCORE_COLUMNS = {
+  lirical: [
+    ...PHENOTYPE_GENE_INFO_COLUMNS,
+    { name: 'scores.post_test_probability', content: 'Posttest-Probability', width: 3, format: ({ scores }) => (scores.post_test_probability.toPrecision(3)) },
+    { name: 'scores.compositeLR', content: 'Composite-LR', width: 3, format: ({ scores }) => (scores.compositeLR.toPrecision(3)) },
+  ],
+  exomiser: [
+    ...PHENOTYPE_GENE_INFO_COLUMNS,
+    { name: 'scores.exomiser_score', content: 'Exomiser-Score', width: 3, format: ({ scores }) => (scores.exomiser_score.toPrecision(3)) },
+    { name: 'scores.phenotype_score', content: 'Phenotype-Score', width: 3, format: ({ scores }) => (scores.phenotype_score.toPrecision(3)) },
+    { name: 'scores.variant_score', content: 'Variant-Score', width: 3, format: ({ scores }) => (scores.variant_score.toPrecision(3)) },
+  ],
 }
 
 const GENE_DETAIL_SECTIONS = [
@@ -399,9 +397,34 @@ const GENE_DETAIL_SECTIONS = [
     label: 'RNA-Seq',
     showDetails: (gene, { rnaSeqData }) => rnaSeqData && rnaSeqData[gene.geneId],
     detailsDisplay: (gene, { rnaSeqData }) => (
-      sampleGeneDetailsDisplay(gene.geneId, rnaSeqData, 'rnaSeqData')
+      <DataTable
+        basic="very"
+        data={rnaSeqData[gene.geneId]}
+        idField="geneId"
+        columns={RNA_SEQ_COLUMNS}
+      />
     ),
   },
+  {
+    color: 'orange',
+    description: 'Phenotype Prioritization',
+    lable: 'PhenotypeGene',
+    showDetails: (gene, { phenotypeGeneScores }) => phenotypeGeneScores && phenotypeGeneScores[gene.geneId],
+    detailsDisplay: (gene, { phenotypeGeneScores }) => (Object.entries(phenotypeGeneScores[gene.geneId]).map(
+      ([tool, data]) => ([
+        tool,
+        (
+          <DataTable
+            basic="very"
+            data={data}
+            idField="diseaseId"
+            defaultSortColumn="rank"
+            columns={PHENOTYPE_GENE_SCORE_COLUMNS[tool]}
+          />
+        ),
+      ]),
+    )),
+  },
 ]
 
 const OmimSegments = styled(Segment.Group).attrs({ size: 'tiny', horizontal: true, compact: true })`
@@ -426,13 +449,18 @@ const OmimSegments = styled(Segment.Group).attrs({ size: 'tiny', horizontal: tru
   }
 `
 
-const getDetailSections = (configs, gene, compact, labelProps, sampleGeneData) => configs.map(
+const getDetailSections = (configs, gene, compact, labelProps, individualGeneData) => configs.map(
   ({ showDetails, detailsDisplay, ...sectionConfig }) => (
     {
       ...sectionConfig,
-      detail: showDetails(gene, sampleGeneData) && detailsDisplay(gene, sampleGeneData),
+      detail: showDetails(gene, individualGeneData) && detailsDisplay(gene, individualGeneData),
     }),
-).filter(({ detail }) => detail).map(({ detail, expandedDisplay, ...sectionConfig }) => (
+).reduce((acc, config) => (Array.isArray(config.detail) ?
+  [
+    ...acc,
+    ...config.detail.map(([tool, detail]) => ({ ...config, label: tool.toUpperCase(), detail })),
+  ] : [...acc, config]),
+[]).filter(({ detail }) => detail).map(({ detail, expandedDisplay, ...sectionConfig }) => (
   (expandedDisplay && !compact) ? (
     <OmimSegments key={sectionConfig.label}>
       <Segment color={sectionConfig.color}>
@@ -453,28 +481,10 @@ const getDetailSections = (configs, gene, compact, labelProps, sampleGeneData) =
   )
 ))
 
-const addPhenotypePrioritizationConfig = (configs, phePriInfo) => (
-  phePriInfo ? [
-    ...configs,
-    ...Object.keys(phePriInfo).map(tool => (
-      {
-        color: 'orange',
-        description: 'Phenotype Prioritization',
-        label: tool.toUpperCase(),
-        showDetails: (gene, { phePriData }) => phePriData && phePriData[tool] && phePriData[tool][gene.geneId],
-        detailsDisplay: (gene, { phePriData }) => (
-          sampleGeneDetailsDisplay(gene.geneId, phePriData[tool])
-        ),
-      }
-    )),
-  ] : configs
-)
-
 export const GeneDetails = React.memo((
-  { gene, compact, showLocusLists, showInlineDetails, sampleGeneData, ...labelProps },
+  { gene, compact, showLocusLists, showInlineDetails, individualGeneData, ...labelProps },
 ) => {
-  const geneDetailConfigs = addPhenotypePrioritizationConfig(GENE_DETAIL_SECTIONS, sampleGeneData.phePriData)
-  const geneDetails = getDetailSections(geneDetailConfigs, gene, compact, labelProps, sampleGeneData)
+  const geneDetails = getDetailSections(GENE_DETAIL_SECTIONS, gene, compact, labelProps, individualGeneData)
   const geneDiseaseDetails = getDetailSections(GENE_DISEASE_DETAIL_SECTIONS, gene, compact, labelProps)
   const hasLocusLists = showLocusLists && gene.locusListGuids.length > 0
   const showDivider = !showInlineDetails && geneDetails.length > 0 && (hasLocusLists || geneDiseaseDetails.length > 0)
@@ -503,7 +513,7 @@ GeneDetails.propTypes = {
   compact: PropTypes.bool,
   showLocusLists: PropTypes.bool,
   showInlineDetails: PropTypes.bool,
-  sampleGeneData: PropTypes.object,
+  individualGeneData: PropTypes.object,
 }
 
 const GeneSearchLinkWithPopup = props => (
@@ -523,7 +533,7 @@ const getGeneConsequence = (geneId, variant) => {
 }
 
 const BaseVariantGene = React.memo((
-  { geneId, gene, variant, compact, showInlineDetails, compoundHetToggle, hasRnaTpmData, sampleGeneData },
+  { geneId, gene, variant, compact, showInlineDetails, compoundHetToggle, hasRnaTpmData, individualGeneData },
 ) => {
   const geneConsequence = getGeneConsequence(geneId, variant)
 
@@ -540,7 +550,7 @@ const BaseVariantGene = React.memo((
       showInlineDetails={showInlineDetails}
       margin={showInlineDetails ? '1em .5em 0px 0px' : null}
       horizontal={showInlineDetails}
-      sampleGeneData={sampleGeneData}
+      individualGeneData={individualGeneData}
       showLocusLists
     />
   )
@@ -618,12 +628,12 @@ BaseVariantGene.propTypes = {
   showInlineDetails: PropTypes.bool,
   compoundHetToggle: PropTypes.func,
   hasRnaTpmData: PropTypes.bool,
-  sampleGeneData: PropTypes.object,
+  individualGeneData: PropTypes.object,
 }
 
 const getRnaSeqProps = (state, ownProps) => ({
   hasRnaTpmData: getFamiliesByGuid(state)[ownProps.variant.familyGuids[0]]?.hasRnaTpmData,
-  sampleGeneData: getSampleGeneDataByFamilyGene(state)[ownProps.variant.familyGuids[0]] || {},
+  individualGeneData: getIndividualGeneDataByFamilyGene(state)[ownProps.variant.familyGuids[0]] || {},
 })
 
 const mapStateToProps = (state, ownProps) => ({
@@ -639,7 +649,7 @@ class VariantGenes extends React.PureComponent {
     variant: PropTypes.object.isRequired,
     mainGeneId: PropTypes.string,
     genesById: PropTypes.object.isRequired,
-    sampleGeneData: PropTypes.object,
+    individualGeneData: PropTypes.object,
     hasRnaTpmData: PropTypes.bool,
     showMainGene: PropTypes.bool,
   }
@@ -655,7 +665,7 @@ class VariantGenes extends React.PureComponent {
   }
 
   render() {
-    const { variant, genesById, mainGeneId, showMainGene, sampleGeneData, hasRnaTpmData } = this.props
+    const { variant, genesById, mainGeneId, showMainGene, individualGeneData, hasRnaTpmData } = this.props
     const { showAll } = this.state
     const geneIds = Object.keys(variant.transcripts || {})
     const genes = geneIds.map(geneId => genesById[geneId]).filter(gene => gene)
@@ -674,7 +684,7 @@ class VariantGenes extends React.PureComponent {
               geneId={gene.geneId}
               gene={gene}
               variant={variant}
-              sampleGeneData={sampleGeneData}
+              individualGeneData={individualGeneData}
               hasRnaTpmData={hasRnaTpmData}
               showInlineDetails={!mainGeneId}
               compact
@@ -705,7 +715,7 @@ class VariantGenes extends React.PureComponent {
                     details={sectionGenes.length > 0 && sectionGenes.map(gene => (
                       <div key={gene.geneId}>
                         <Header size="small" content={gene.geneSymbol} />
-                        {detailsDisplay(gene, sampleGeneData)}
+                        {detailsDisplay(gene, individualGeneData)}
                         <VerticalSpacer height={5} />
                       </div>
                     ))}
diff --git a/ui/shared/components/panel/variants/selectors.js b/ui/shared/components/panel/variants/selectors.js
index f741d671ac..3cb862df91 100644
--- a/ui/shared/components/panel/variants/selectors.js
+++ b/ui/shared/components/panel/variants/selectors.js
@@ -14,52 +14,43 @@ import {
 } from 'shared/utils/constants'
 import {
   getVariantTagsByGuid, getVariantNotesByGuid, getSavedVariantsByGuid, getAnalysisGroupsByGuid, getGenesById, getUser,
-  getFamiliesByGuid, getProjectsByGuid, getIndividualsByGuid, getRnaSeqDataByIndividual, getPhePriDataByIndividual,
+  getFamiliesByGuid, getProjectsByGuid, getIndividualsByGuid, getRnaSeqDataByIndividual,
+  getPhenotypeGeneScoresByIndividual,
 } from 'redux/selectors'
 
-const RNA_SEQ_SCORE_FIELDS = ['zScore', 'pValue', 'pAdjust']
-export const getSampleGeneDataByFamilyGene = createSelector(
+export const getIndividualGeneDataByFamilyGene = createSelector(
   getIndividualsByGuid,
   getRnaSeqDataByIndividual,
-  getPhePriDataByIndividual,
-  (individualsByGuid, rnaSeqDataByIndividual, phePriDataByIndividual) => {
-    const rnaSeqD = Object.entries(rnaSeqDataByIndividual).reduce(
-      (acc, [individualGuid, rnaSeqData]) => {
-        const { familyGuid, displayName } = individualsByGuid[individualGuid]
-        acc[familyGuid] = acc[familyGuid] || {}
-        acc[familyGuid].rnaSeqData = Object.entries(rnaSeqData.outliers || {}).reduce(
-          (acc2, [geneId, data]) => (data.isSignificant ?
-            {
-              ...acc2,
-              [geneId]: {
-                ...(acc2[geneId] || {}),
-                [displayName]: [{
-                  scores: RNA_SEQ_SCORE_FIELDS.reduce(
-                    (sAcc, score) => (data[score] ? { ...sAcc, [score]: data[score] } : sAcc), {},
-                  ),
-                }],
-              },
-            } : acc2
-          ), acc[familyGuid].rnaSeqData || {},
-        )
-        return acc
-      }, {},
-    )
-    return Object.entries(phePriDataByIndividual || {}).reduce(
-      (acc, [individualGuid, phePriData]) => {
-        const { familyGuid, displayName } = individualsByGuid[individualGuid]
-        acc[familyGuid] = acc[familyGuid] || {}
-        acc[familyGuid].phePriData = Object.entries(phePriData).reduce((accTool, [tool, toolData]) => ({
-          ...accTool,
-          [tool]: Object.entries(toolData).reduce((acc2, [geneId, data]) => ({
-            ...acc2,
-            [geneId]: { ...(acc2[geneId] || {}), [displayName]: data },
-          }), {}),
-        }), acc[familyGuid].phePriData || {})
+  getPhenotypeGeneScoresByIndividual,
+  (individualsByGuid, rnaSeqDataByIndividual, phenotypeGeneScoresByIndividual) => (
+    Object.values(individualsByGuid).reduce((acc, { individualGuid, familyGuid, displayName }) => {
+      const rnaSeqData = rnaSeqDataByIndividual && rnaSeqDataByIndividual[individualGuid]?.outliers
+      const phenotypeGeneScores = phenotypeGeneScoresByIndividual && phenotypeGeneScoresByIndividual[individualGuid]
+      if (!rnaSeqData && !phenotypeGeneScores) {
         return acc
-      }, rnaSeqD,
-    )
-  },
+      }
+      return {
+        ...acc,
+        [familyGuid]: {
+          rnaSeqData: Object.entries(rnaSeqData || {}).reduce(
+            (acc2, [geneId, data]) => (data.isSignificant ? {
+              ...acc2,
+              [geneId]: [...(acc2[geneId] || []), { ...data, individual: displayName }],
+            } : acc2), acc[familyGuid]?.rnaSeqData || {},
+          ),
+          phenotypeGeneScores: Object.entries(phenotypeGeneScores || {}).reduce(
+            (acc2, [geneId, dataByTool]) => ({
+              ...acc2,
+              [geneId]: Object.entries(dataByTool).reduce((acc3, [tool, data]) => ({
+                ...acc3,
+                [tool]: [...(acc3[tool] || []), ...data.map(d => ({ ...d, individual: displayName }))],
+              }), acc2[geneId] || {}),
+            }), acc[familyGuid]?.phenotypeGeneScores || {},
+          ),
+        },
+      }
+    }, {})
+  ),
 )
 
 // Saved variant selectors
diff --git a/ui/shared/components/panel/variants/selectors.test.js b/ui/shared/components/panel/variants/selectors.test.js
index 4bd954b31e..7379f72273 100644
--- a/ui/shared/components/panel/variants/selectors.test.js
+++ b/ui/shared/components/panel/variants/selectors.test.js
@@ -5,7 +5,7 @@ import {
   getPairedSelectedSavedVariants,
   getVisibleSortedSavedVariants,
   getPairedFilteredSavedVariants,
-  getSampleGeneDataByFamilyGene,
+  getIndividualGeneDataByFamilyGene,
 } from './selectors'
 
 test('getPairedSelectedSavedVariants', () => {
@@ -89,7 +89,7 @@ const RNA_SEQ_PHENOTYPE_PRIORITIZATION_STATE = {
     },
     I021476_na19678_2: { outliers: { ENSG00000228198: { isSignificant: true, pValue: 0.0214 } } },
   },
-  phePriDataByIndividual: {
+  phenotypeGeneScoresByIndividual: {
     I021476_na19678_1: {
       lirical: {
         ENSG00000228198: [{
@@ -105,36 +105,37 @@ const RNA_SEQ_PHENOTYPE_PRIORITIZATION_STATE = {
   ...STATE_WITH_2_FAMILIES,
 }
 
-test('getSampleGeneDataByFamilyGene', () => {
-  expect(getSampleGeneDataByFamilyGene(RNA_SEQ_PHENOTYPE_PRIORITIZATION_STATE)).toEqual({
-    F011652_1: {
-      rnaSeqData: {
-        ENSG00000228198: {
-          NA19678: [{ scores: { pValue: 0.0004 } }],
-          NA19679_1: [{ scores: { pValue: 0.01 } }],
-        },
-        ENSG00000164458: {
-          NA19678: [{ scores: { pValue: 0.0073 } }],
-        },
-      },
-      phePriData: {
-        lirical: {
-          ENSG00000228198: {
-            NA19678: [{
-              diseaseId: 'OMIM:618460',
-              diseaseName: 'Khan-Khan-Katsanis syndrome',
-              geneId: 'ENSG00000228198',
-              rank: 1,
-              scores: { compositeLR: 0.066, post_test_probability: 0 },
-            }],
-          },
-        },
-      },
-    },
-    F011652_2: {
-      rnaSeqData: {
-        ENSG00000228198: { NA19678_2: [{ scores: { pValue: 0.0214 } }] },
-      },
-    },
-  })
-})
+// Temporarily remove the test.
+// test('getIndividualGeneDataByFamilyGene', () => {
+//   expect(getIndividualGeneDataByFamilyGene(RNA_SEQ_PHENOTYPE_PRIORITIZATION_STATE)).toEqual({
+//     F011652_1: {
+//       rnaSeqData: {
+//         ENSG00000228198: [
+//           { individual: 'NA19678', isSignificant: true, pValue: 0.0004 },
+//           { individual: 'NA19679_1', isSignificant: true, pValue: 0.01 },
+//         ],
+//         ENSG00000164458: [
+//           { individual: 'NA19678', isSignificant: true, pValue: 0.0073 },
+//         ],
+//       },
+//       phenotypeGeneScores: {
+//         ENSG00000228198: {
+//           lirical: [{
+//             individual: 'NA19678',
+//             diseaseId: 'OMIM:618460',
+//             diseaseName: 'Khan-Khan-Katsanis syndrome',
+//             geneId: 'ENSG00000228198',
+//             rank: 1,
+//             scores: { compositeLR: 0.066, post_test_probability: 0 },
+//           }],
+//         },
+//       },
+//     },
+//     F011652_2: {
+//       rnaSeqData: {
+//         ENSG00000228198: [{ individual: 'NA19678_2', isSignificant: true, pValue: 0.0214 }],
+//       },
+//       phenotypeGeneScores: {},
+//     },
+//   })
+// })

From f447e94618d02fbc4315b43b86aafe99a1ba348c Mon Sep 17 00:00:00 2001
From: Shifa Zhang <zhangshifa07504@gmail.com>
Date: Thu, 27 Oct 2022 17:30:42 -0400
Subject: [PATCH 26/96] Resolve the test issues.

---
 seqr/views/apis/variant_search_api_tests.py   | 13 ++--
 .../components/panel/variants/selectors.js    |  2 +-
 .../panel/variants/selectors.test.js          | 71 +++++++++----------
 3 files changed, 44 insertions(+), 42 deletions(-)

diff --git a/seqr/views/apis/variant_search_api_tests.py b/seqr/views/apis/variant_search_api_tests.py
index 471bc0e943..5dd1574274 100644
--- a/seqr/views/apis/variant_search_api_tests.py
+++ b/seqr/views/apis/variant_search_api_tests.py
@@ -83,11 +83,11 @@
     'locusListsByGuid': {LOCUS_LIST_GUID: {'intervals': mock.ANY}},
     'rnaSeqData': {'I000001_na19675': {'outliers': {'ENSG00000268903': mock.ANY}}},
     'phenotypeGeneScores': {'I000001_na19675': {
-        'lirical': {'ENSG00000268903': [
-            {'diseaseId': 'OMIM:618460', 'diseaseName': 'Khan-Khan-Katsanis syndrome', 'geneId': 'ENSG00000268903',
-             'rank': 1, 'scores': {'compositeLR': 0.066, 'post_test_probability': 0}},
-            {'diseaseId': 'OMIM:219800', 'diseaseName': 'Cystinosis, nephropathic', 'geneId': 'ENSG00000268903',
-             'rank': 2, 'scores': {'compositeLR': 0.003, 'post_test_probability': 0}}
+        'ENSG00000268903': {'lirical': [
+            {'diseaseId': 'OMIM:618460', 'diseaseName': 'Khan-Khan-Katsanis syndrome', 'rank': 1,
+             'scores': {'compositeLR': 0.066, 'post_test_probability': 0}},
+            {'diseaseId': 'OMIM:219800', 'diseaseName': 'Cystinosis, nephropathic', 'rank': 2,
+             'scores': {'compositeLR': 0.003, 'post_test_probability': 0}}
         ]}
     }},
     'mmeSubmissionsByGuid': {'MS000001_na19675': {k: mock.ANY for k in MATCHMAKER_SUBMISSION_FIELDS}},
@@ -282,6 +282,7 @@ def test_query_variants(self, mock_get_variants, mock_get_gene_counts, mock_erro
         self.assertEqual(response.status_code, 200)
         response_json = response.json()
         self.assertSetEqual(set(response_json.keys()), set(EXPECTED_SEARCH_RESPONSE.keys()))
+        self.maxDiff = None
         self.assertDictEqual(response_json, EXPECTED_SEARCH_RESPONSE)
         self.assertSetEqual(
             set(response_json['search']['projectFamilies'][0]['familyGuids']), {'F000001_1', 'F000002_2'})
@@ -495,6 +496,7 @@ def _get_variants(results_model, **kwargs):
         self.assertEqual(response.status_code, 200)
         response_json = response.json()
         self.assertSetEqual(set(response_json.keys()), set(EXPECTED_SEARCH_RESPONSE.keys()))
+        self.maxDiff = None
         self.assertDictEqual(response_json, EXPECTED_SEARCH_RESPONSE)
         self._assert_expected_results_context(response_json)
         self.assertSetEqual(
@@ -642,6 +644,7 @@ def test_query_single_variant(self, mock_get_variant):
         expected_search_response['variantNotesByGuid'].pop('VN0714935_2103343353_r0390_100')
         expected_search_response['genesById'].pop('ENSG00000233653')
         expected_search_response['searchedVariants'] = [single_family_variant]
+        self.maxDiff = None
         self.assertDictEqual(response_json, expected_search_response)
         self._assert_expected_results_family_context(response_json, locus_list_detail=True)
         self.assertSetEqual(set(response_json['projectsByGuid'][PROJECT_GUID].keys()), PROJECT_TAG_TYPE_FIELDS)
diff --git a/ui/shared/components/panel/variants/selectors.js b/ui/shared/components/panel/variants/selectors.js
index 3cb862df91..15a9871c31 100644
--- a/ui/shared/components/panel/variants/selectors.js
+++ b/ui/shared/components/panel/variants/selectors.js
@@ -23,7 +23,7 @@ export const getIndividualGeneDataByFamilyGene = createSelector(
   getRnaSeqDataByIndividual,
   getPhenotypeGeneScoresByIndividual,
   (individualsByGuid, rnaSeqDataByIndividual, phenotypeGeneScoresByIndividual) => (
-    Object.values(individualsByGuid).reduce((acc, { individualGuid, familyGuid, displayName }) => {
+    Object.entries(individualsByGuid).reduce((acc, [individualGuid, { familyGuid, displayName }]) => {
       const rnaSeqData = rnaSeqDataByIndividual && rnaSeqDataByIndividual[individualGuid]?.outliers
       const phenotypeGeneScores = phenotypeGeneScoresByIndividual && phenotypeGeneScoresByIndividual[individualGuid]
       if (!rnaSeqData && !phenotypeGeneScores) {
diff --git a/ui/shared/components/panel/variants/selectors.test.js b/ui/shared/components/panel/variants/selectors.test.js
index 7379f72273..ddb45c94e6 100644
--- a/ui/shared/components/panel/variants/selectors.test.js
+++ b/ui/shared/components/panel/variants/selectors.test.js
@@ -91,8 +91,8 @@ const RNA_SEQ_PHENOTYPE_PRIORITIZATION_STATE = {
   },
   phenotypeGeneScoresByIndividual: {
     I021476_na19678_1: {
-      lirical: {
-        ENSG00000228198: [{
+      ENSG00000228198: {
+        lirical: [{
           diseaseId: 'OMIM:618460',
           diseaseName: 'Khan-Khan-Katsanis syndrome',
           geneId: 'ENSG00000228198',
@@ -105,37 +105,36 @@ const RNA_SEQ_PHENOTYPE_PRIORITIZATION_STATE = {
   ...STATE_WITH_2_FAMILIES,
 }
 
-// Temporarily remove the test.
-// test('getIndividualGeneDataByFamilyGene', () => {
-//   expect(getIndividualGeneDataByFamilyGene(RNA_SEQ_PHENOTYPE_PRIORITIZATION_STATE)).toEqual({
-//     F011652_1: {
-//       rnaSeqData: {
-//         ENSG00000228198: [
-//           { individual: 'NA19678', isSignificant: true, pValue: 0.0004 },
-//           { individual: 'NA19679_1', isSignificant: true, pValue: 0.01 },
-//         ],
-//         ENSG00000164458: [
-//           { individual: 'NA19678', isSignificant: true, pValue: 0.0073 },
-//         ],
-//       },
-//       phenotypeGeneScores: {
-//         ENSG00000228198: {
-//           lirical: [{
-//             individual: 'NA19678',
-//             diseaseId: 'OMIM:618460',
-//             diseaseName: 'Khan-Khan-Katsanis syndrome',
-//             geneId: 'ENSG00000228198',
-//             rank: 1,
-//             scores: { compositeLR: 0.066, post_test_probability: 0 },
-//           }],
-//         },
-//       },
-//     },
-//     F011652_2: {
-//       rnaSeqData: {
-//         ENSG00000228198: [{ individual: 'NA19678_2', isSignificant: true, pValue: 0.0214 }],
-//       },
-//       phenotypeGeneScores: {},
-//     },
-//   })
-// })
+test('getIndividualGeneDataByFamilyGene', () => {
+  expect(getIndividualGeneDataByFamilyGene(RNA_SEQ_PHENOTYPE_PRIORITIZATION_STATE)).toEqual({
+    F011652_1: {
+      rnaSeqData: {
+        ENSG00000228198: [
+          { individual: 'NA19678', isSignificant: true, pValue: 0.0004 },
+          { individual: 'NA19679_1', isSignificant: true, pValue: 0.01 },
+        ],
+        ENSG00000164458: [
+          { individual: 'NA19678', isSignificant: true, pValue: 0.0073 },
+        ],
+      },
+      phenotypeGeneScores: {
+        ENSG00000228198: {
+          lirical: [{
+            individual: 'NA19678',
+            diseaseId: 'OMIM:618460',
+            diseaseName: 'Khan-Khan-Katsanis syndrome',
+            geneId: 'ENSG00000228198',
+            rank: 1,
+            scores: { compositeLR: 0.066, post_test_probability: 0 },
+          }],
+        },
+      },
+    },
+    F011652_2: {
+      rnaSeqData: {
+        ENSG00000228198: [{ individual: 'NA19678_2', isSignificant: true, pValue: 0.0214 }],
+      },
+      phenotypeGeneScores: {},
+    },
+  })
+})

From c5e0cbe2509e65a84a11802f89c8ad0a8db1b894 Mon Sep 17 00:00:00 2001
From: Shifa Zhang <zhangshifa07504@gmail.com>
Date: Fri, 28 Oct 2022 10:08:10 -0400
Subject: [PATCH 27/96] Update tests and score headers.

---
 seqr/views/apis/variant_search_api_tests.py   | 23 +++++----
 .../components/panel/variants/VariantGene.jsx | 50 +++++++++----------
 2 files changed, 39 insertions(+), 34 deletions(-)

diff --git a/seqr/views/apis/variant_search_api_tests.py b/seqr/views/apis/variant_search_api_tests.py
index 5dd1574274..50a0c6477b 100644
--- a/seqr/views/apis/variant_search_api_tests.py
+++ b/seqr/views/apis/variant_search_api_tests.py
@@ -83,16 +83,18 @@
     'locusListsByGuid': {LOCUS_LIST_GUID: {'intervals': mock.ANY}},
     'rnaSeqData': {'I000001_na19675': {'outliers': {'ENSG00000268903': mock.ANY}}},
     'phenotypeGeneScores': {'I000001_na19675': {
-        'ENSG00000268903': {'lirical': [
-            {'diseaseId': 'OMIM:618460', 'diseaseName': 'Khan-Khan-Katsanis syndrome', 'rank': 1,
-             'scores': {'compositeLR': 0.066, 'post_test_probability': 0}},
-            {'diseaseId': 'OMIM:219800', 'diseaseName': 'Cystinosis, nephropathic', 'rank': 2,
-             'scores': {'compositeLR': 0.003, 'post_test_probability': 0}}
-        ]}
+        'ENSG00000268903': {'lirical': mock.ANY}
     }},
     'mmeSubmissionsByGuid': {'MS000001_na19675': {k: mock.ANY for k in MATCHMAKER_SUBMISSION_FIELDS}},
 }
 
+EXPECTED_LIRICAL_DATA = [
+    {'diseaseId': 'OMIM:219800', 'diseaseName': 'Cystinosis, nephropathic', 'rank': 2,
+     'scores': {'compositeLR': 0.003, 'post_test_probability': 0}},
+    {'diseaseId': 'OMIM:618460', 'diseaseName': 'Khan-Khan-Katsanis syndrome', 'rank': 1,
+     'scores': {'compositeLR': 0.066, 'post_test_probability': 0}},
+]
+
 EXPECTED_SEARCH_CONTEXT_RESPONSE = {
     'savedSearchesByGuid': {
         'VS0000001_de_novo_dominant_res': mock.ANY, 'VS0000002_recessive_restrictiv': mock.ANY, 'VS0000003_de_novo_dominant_per': mock.ANY,
@@ -282,8 +284,9 @@ def test_query_variants(self, mock_get_variants, mock_get_gene_counts, mock_erro
         self.assertEqual(response.status_code, 200)
         response_json = response.json()
         self.assertSetEqual(set(response_json.keys()), set(EXPECTED_SEARCH_RESPONSE.keys()))
-        self.maxDiff = None
         self.assertDictEqual(response_json, EXPECTED_SEARCH_RESPONSE)
+        lirical_data = response_json['phenotypeGeneScores']['I000001_na19675']['ENSG00000268903']['lirical']
+        self.assertListEqual(sorted(lirical_data, key=lambda d: d['diseaseId']), EXPECTED_LIRICAL_DATA)
         self.assertSetEqual(
             set(response_json['search']['projectFamilies'][0]['familyGuids']), {'F000001_1', 'F000002_2'})
         self._assert_expected_results_context(response_json)
@@ -496,8 +499,9 @@ def _get_variants(results_model, **kwargs):
         self.assertEqual(response.status_code, 200)
         response_json = response.json()
         self.assertSetEqual(set(response_json.keys()), set(EXPECTED_SEARCH_RESPONSE.keys()))
-        self.maxDiff = None
         self.assertDictEqual(response_json, EXPECTED_SEARCH_RESPONSE)
+        lirical_data = response_json['phenotypeGeneScores']['I000001_na19675']['ENSG00000268903']['lirical']
+        self.assertListEqual(sorted(lirical_data, key=lambda d: d['diseaseId']), EXPECTED_LIRICAL_DATA)
         self._assert_expected_results_context(response_json)
         self.assertSetEqual(
             set(response_json['search']['projectFamilies'][0]['familyGuids']), expected_searched_families)
@@ -644,8 +648,9 @@ def test_query_single_variant(self, mock_get_variant):
         expected_search_response['variantNotesByGuid'].pop('VN0714935_2103343353_r0390_100')
         expected_search_response['genesById'].pop('ENSG00000233653')
         expected_search_response['searchedVariants'] = [single_family_variant]
-        self.maxDiff = None
         self.assertDictEqual(response_json, expected_search_response)
+        lirical_data = response_json['phenotypeGeneScores']['I000001_na19675']['ENSG00000268903']['lirical']
+        self.assertListEqual(sorted(lirical_data, key=lambda d: d['diseaseId']), EXPECTED_LIRICAL_DATA)
         self._assert_expected_results_family_context(response_json, locus_list_detail=True)
         self.assertSetEqual(set(response_json['projectsByGuid'][PROJECT_GUID].keys()), PROJECT_TAG_TYPE_FIELDS)
         self.assertSetEqual(set(response_json['familiesByGuid'].keys()), {'F000001_1'})
diff --git a/ui/shared/components/panel/variants/VariantGene.jsx b/ui/shared/components/panel/variants/VariantGene.jsx
index 2edb303fc2..17f886ee39 100644
--- a/ui/shared/components/panel/variants/VariantGene.jsx
+++ b/ui/shared/components/panel/variants/VariantGene.jsx
@@ -12,6 +12,7 @@ import {
   MISSENSE_THRESHHOLD, LOF_THRESHHOLD, PANEL_APP_CONFIDENCE_LEVEL_COLORS, PANEL_APP_CONFIDENCE_DESCRIPTION,
 } from '../../../utils/constants'
 import { compareObjects } from '../../../utils/sortUtils'
+import { camelcaseToTitlecase, snakecaseToTitlecase } from '../../../utils/stringUtils'
 import { HorizontalSpacer, VerticalSpacer } from '../../Spacers'
 import { InlineHeader, NoBorderTable, ButtonLink, ColoredLabel } from '../../StyledComponents'
 import { GeneSearchLink } from '../../buttons/SearchResultsLink'
@@ -327,19 +328,7 @@ const PHENOTYPE_GENE_INFO_COLUMNS = [
   { name: 'diseaseName', content: 'Disease', width: 3, format: ({ diseaseName, diseaseId }) => `${diseaseName} (${diseaseId})` },
 ]
 
-const PHENOTYPE_GENE_SCORE_COLUMNS = {
-  lirical: [
-    ...PHENOTYPE_GENE_INFO_COLUMNS,
-    { name: 'scores.post_test_probability', content: 'Posttest-Probability', width: 3, format: ({ scores }) => (scores.post_test_probability.toPrecision(3)) },
-    { name: 'scores.compositeLR', content: 'Composite-LR', width: 3, format: ({ scores }) => (scores.compositeLR.toPrecision(3)) },
-  ],
-  exomiser: [
-    ...PHENOTYPE_GENE_INFO_COLUMNS,
-    { name: 'scores.exomiser_score', content: 'Exomiser-Score', width: 3, format: ({ scores }) => (scores.exomiser_score.toPrecision(3)) },
-    { name: 'scores.phenotype_score', content: 'Phenotype-Score', width: 3, format: ({ scores }) => (scores.phenotype_score.toPrecision(3)) },
-    { name: 'scores.variant_score', content: 'Variant-Score', width: 3, format: ({ scores }) => (scores.variant_score.toPrecision(3)) },
-  ],
-}
+const PHENOTYPE_GENE_SCORE_COLUMNS = {}
 
 const GENE_DETAIL_SECTIONS = [
   {
@@ -411,18 +400,29 @@ const GENE_DETAIL_SECTIONS = [
     lable: 'PhenotypeGene',
     showDetails: (gene, { phenotypeGeneScores }) => phenotypeGeneScores && phenotypeGeneScores[gene.geneId],
     detailsDisplay: (gene, { phenotypeGeneScores }) => (Object.entries(phenotypeGeneScores[gene.geneId]).map(
-      ([tool, data]) => ([
-        tool,
-        (
-          <DataTable
-            basic="very"
-            data={data}
-            idField="diseaseId"
-            defaultSortColumn="rank"
-            columns={PHENOTYPE_GENE_SCORE_COLUMNS[tool]}
-          />
-        ),
-      ]),
+      ([tool, data]) => {
+        PHENOTYPE_GENE_SCORE_COLUMNS[tool] = [
+          ...PHENOTYPE_GENE_INFO_COLUMNS,
+          ...Object.keys(data[0].scores).map(score => ({
+            name: score,
+            content: snakecaseToTitlecase(camelcaseToTitlecase(score)).replace(' ', '-'),
+            width: 3,
+            format: ({ scores }) => (scores[score].toPrecision(3)),
+          })),
+        ]
+        return ([
+          tool,
+          (
+            <DataTable
+              basic="very"
+              data={data}
+              idField="diseaseId"
+              defaultSortColumn="rank"
+              columns={PHENOTYPE_GENE_SCORE_COLUMNS[tool]}
+            />
+          ),
+        ])
+      },
     )),
   },
 ]

From 11b73db4cfc480e9a39d233636d33a8a562060f4 Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Fri, 28 Oct 2022 17:12:42 -0400
Subject: [PATCH 28/96] update 38 igv reference

---
 seqr/urls.py                                   |  2 +-
 seqr/views/apis/igv_api.py                     | 10 ++++++++--
 ui/shared/components/panel/family/constants.js | 16 ++++++++--------
 3 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/seqr/urls.py b/seqr/urls.py
index 5f1142dcca..84553fb6e4 100644
--- a/seqr/urls.py
+++ b/seqr/urls.py
@@ -254,7 +254,7 @@
     'gene_info/(?P<gene_id>[^/]+)/note/(?P<note_guid>[^/]+)/delete': delete_gene_note_handler,
 
     'hpo_terms/(?P<hpo_parent_id>[^/]+)': get_hpo_terms,
-    'igv_genomes/(?P<file_path>.*)': igv_genomes_proxy,
+    'igv_genomes/(?P<cloud_host>[^/]+)/(?P<file_path>.*)': igv_genomes_proxy,
 
     'locus_lists/(?P<locus_list_guid>[^/]+)/update': update_locus_list_handler,
     'locus_lists/(?P<locus_list_guid>[^/]+)/delete': delete_locus_list_handler,
diff --git a/seqr/views/apis/igv_api.py b/seqr/views/apis/igv_api.py
index 5eb009455c..d21e2768b1 100644
--- a/seqr/views/apis/igv_api.py
+++ b/seqr/views/apis/igv_api.py
@@ -202,7 +202,13 @@ def _stream_file(request, path):
     return resp
 
 
-def igv_genomes_proxy(request, file_path):
+CLOUD_HOSTS = {
+    's3': 'https://s3.amazonaws.com',
+    'gs': 'https://storage.googleapis.com',
+}
+
+
+def igv_genomes_proxy(request, cloud_host, file_path):
     # IGV does not properly set CORS header and cannot directly access the genomes resource from the browser without
     # using this server-side proxy
     headers = {}
@@ -210,7 +216,7 @@ def igv_genomes_proxy(request, file_path):
     if range_header:
         headers['Range'] = range_header
 
-    genome_response = requests.get('https://s3.amazonaws.com/igv.{}'.format(file_path), headers=headers)
+    genome_response = requests.get(f'{CLOUD_HOSTS[cloud_host]}/{file_path}', headers=headers)
     proxy_response = HttpResponse(
         content=genome_response.content,
         status=genome_response.status_code,
diff --git a/ui/shared/components/panel/family/constants.js b/ui/shared/components/panel/family/constants.js
index fd76518e24..908205a332 100644
--- a/ui/shared/components/panel/family/constants.js
+++ b/ui/shared/components/panel/family/constants.js
@@ -88,23 +88,23 @@ const BASE_REFERENCE_URL = '/api/igv_genomes'
 const REFERENCE_URLS = [
   {
     key: 'fastaURL',
-    baseUrl: `${BASE_REFERENCE_URL}/broadinstitute.org/genomes/seq`,
+    baseUrl: BASE_REFERENCE_URL,
     path: {
-      37: 'hg19/hg19.fasta',
-      38: 'hg38/hg38.fa',
+      37: 's3/igv.broadinstitute.org/genomes/seq/hg19/hg19.fasta',
+      38: 'gs/gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta',
     },
   },
   {
     key: 'cytobandURL',
-    baseUrl: BASE_REFERENCE_URL,
+    baseUrl: `${BASE_REFERENCE_URL}/s3`,
     path: {
-      37: 'broadinstitute.org/genomes/seq/hg19/cytoBand.txt',
-      38: 'org.genomes/hg38/annotations/cytoBandIdeo.txt.gz',
+      37: 'igv.broadinstitute.org/genomes/seq/hg19/cytoBand.txt',
+      38: 'igv.org.genomes/hg38/annotations/cytoBandIdeo.txt.gz',
     },
   },
   {
     key: 'aliasURL',
-    baseUrl: `${BASE_REFERENCE_URL}/org.genomes`,
+    baseUrl: `${BASE_REFERENCE_URL}/s3/igv.org.genomes`,
     path: {
       37: 'hg19/hg19_alias.tab',
       38: 'hg38/hg38_alias.tab',
@@ -127,7 +127,7 @@ const REFERENCE_TRACKS = [
   {
     name: 'Refseq',
     indexPostfix: 'tbi',
-    baseUrl: `${BASE_REFERENCE_URL}/org.genomes`,
+    baseUrl: `${BASE_REFERENCE_URL}/s3/igv.org.genomes`,
     path: {
       37: 'hg19/refGene.sorted.txt.gz',
       38: 'hg38/refGene.sorted.txt.gz',

From b42aa13b815ee129f3142a5cff44ee8703da8c24 Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Mon, 31 Oct 2022 10:43:30 -0400
Subject: [PATCH 29/96] clean up

---
 seqr/views/apis/igv_api.py | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/seqr/views/apis/igv_api.py b/seqr/views/apis/igv_api.py
index d21e2768b1..1d4bdd76d3 100644
--- a/seqr/views/apis/igv_api.py
+++ b/seqr/views/apis/igv_api.py
@@ -16,7 +16,11 @@
     login_and_policies_required, pm_or_data_manager_required
 
 GS_STORAGE_ACCESS_CACHE_KEY = 'gs_storage_access_cache_entry'
-
+GS_STORAGE_URL = 'https://storage.googleapis.com'
+CLOUD_STORAGE_URLS = {
+    's3': 'https://s3.amazonaws.com',
+    'gs': GS_STORAGE_URL,
+}
 
 @pm_or_data_manager_required
 def receive_igv_table_handler(request, project_guid):
@@ -141,7 +145,7 @@ def _stream_gs(request, gs_path):
     headers = _get_gs_rest_api_headers(request.META.get('HTTP_RANGE'), gs_path, user=request.user)
 
     response = requests.get(
-        'https://storage.googleapis.com/{}'.format(gs_path.replace('gs://', '', 1)),
+        f"{GS_STORAGE_URL}/{gs_path.replace('gs://', '', 1)}",
         headers=headers,
         stream=True)
 
@@ -202,12 +206,6 @@ def _stream_file(request, path):
     return resp
 
 
-CLOUD_HOSTS = {
-    's3': 'https://s3.amazonaws.com',
-    'gs': 'https://storage.googleapis.com',
-}
-
-
 def igv_genomes_proxy(request, cloud_host, file_path):
     # IGV does not properly set CORS header and cannot directly access the genomes resource from the browser without
     # using this server-side proxy
@@ -216,7 +214,7 @@ def igv_genomes_proxy(request, cloud_host, file_path):
     if range_header:
         headers['Range'] = range_header
 
-    genome_response = requests.get(f'{CLOUD_HOSTS[cloud_host]}/{file_path}', headers=headers)
+    genome_response = requests.get(f'{CLOUD_STORAGE_URLS[cloud_host]}/{file_path}', headers=headers)
     proxy_response = HttpResponse(
         content=genome_response.content,
         status=genome_response.status_code,

From 2346883cf4d01b6636c9c69e232b40efbeaceff6 Mon Sep 17 00:00:00 2001
From: Shifa Zhang <zhangshifa07504@gmail.com>
Date: Mon, 31 Oct 2022 10:43:33 -0400
Subject: [PATCH 30/96] Update condition for loading individual gene data and
 optimize frontend.

---
 seqr/views/apis/summary_data_api.py           |  2 +-
 seqr/views/apis/summary_data_api_tests.py     |  2 +-
 seqr/views/utils/orm_to_json_utils.py         |  7 --
 seqr/views/utils/variant_utils.py             |  8 +-
 ui/shared/components/panel/fixtures.js        |  1 +
 .../components/panel/variants/VariantGene.jsx | 89 ++++++++++---------
 .../components/panel/variants/selectors.js    | 46 +++++-----
 .../panel/variants/selectors.test.js          | 12 +--
 8 files changed, 82 insertions(+), 85 deletions(-)

diff --git a/seqr/views/apis/summary_data_api.py b/seqr/views/apis/summary_data_api.py
index 718781e1af..05bdff681d 100644
--- a/seqr/views/apis/summary_data_api.py
+++ b/seqr/views/apis/summary_data_api.py
@@ -97,7 +97,7 @@ def saved_variants_page(request, tag):
 
     response_json = get_variants_response(
         request, saved_variant_models, add_all_context=True, include_igv=False, add_locus_list_detail=True,
-        include_rna_seq=False, include_project_name=True,
+        include_individual_gene_scores=False, include_project_name=True,
     )
 
     return create_json_response(response_json)
diff --git a/seqr/views/apis/summary_data_api_tests.py b/seqr/views/apis/summary_data_api_tests.py
index 260337ddba..b02346fdd5 100644
--- a/seqr/views/apis/summary_data_api_tests.py
+++ b/seqr/views/apis/summary_data_api_tests.py
@@ -25,7 +25,7 @@
 SAVED_VARIANT_RESPONSE_KEYS = {
     'projectsByGuid', 'locusListsByGuid', 'savedVariantsByGuid', 'variantFunctionalDataByGuid', 'genesById',
     'variantNotesByGuid', 'individualsByGuid', 'variantTagsByGuid', 'familiesByGuid', 'familyNotesByGuid',
-    'mmeSubmissionsByGuid', 'phenotypeGeneScores',
+    'mmeSubmissionsByGuid',
 }
 
 
diff --git a/seqr/views/utils/orm_to_json_utils.py b/seqr/views/utils/orm_to_json_utils.py
index 4f762c9a77..39ccfbde8c 100644
--- a/seqr/views/utils/orm_to_json_utils.py
+++ b/seqr/views/utils/orm_to_json_utils.py
@@ -867,10 +867,3 @@ def _process_result(data, model):
         data['isSignificant'] = data['pAdjust'] < model.SIGNIFICANCE_THRESHOLD
 
     return _get_json_for_models(models, process_result=_process_result, **kwargs)
-
-
-def get_json_for_phenotype_prioritization(models, **kwargs):
-    def _process_result(data, model):
-        data['scores'] = {_to_camel_case(score): value for score, value in data['scores'].items()}
-
-    return _get_json_for_models(models, process_result=_process_result, **kwargs)
diff --git a/seqr/views/utils/variant_utils.py b/seqr/views/utils/variant_utils.py
index f43f98243d..c5ed9e893a 100644
--- a/seqr/views/utils/variant_utils.py
+++ b/seqr/views/utils/variant_utils.py
@@ -11,7 +11,7 @@
 from seqr.views.utils.json_to_orm_utils import update_model_from_json
 from seqr.views.utils.orm_to_json_utils import get_json_for_discovery_tags, get_json_for_locus_lists, \
     _get_json_for_models, get_json_for_rna_seq_outliers, get_json_for_saved_variants_with_tags, \
-    get_json_for_matchmaker_submissions, get_json_for_phenotype_prioritization
+    get_json_for_matchmaker_submissions
 from seqr.views.utils.permissions_utils import has_case_review_permissions, user_is_analyst
 from seqr.views.utils.project_context_utils import add_project_tag_types, add_families_context
 from settings import REDIS_SERVICE_HOSTNAME, REDIS_SERVICE_PORT
@@ -177,8 +177,7 @@ def _add_pa_detail(locus_list_gene, locus_list_guid, gene_json):
 
 
 def get_variants_response(request, saved_variants, response_variants=None, add_all_context=False, include_igv=True,
-                          add_locus_list_detail=False, include_rna_seq=True, include_project_name=False,
-                          include_phenotype_prioritization=True):
+                          add_locus_list_detail=False, include_individual_gene_scores=True, include_project_name=False):
     response = get_json_for_saved_variants_with_tags(saved_variants, add_details=True)
 
     variants = list(response['savedVariantsByGuid'].values()) if response_variants is None else response_variants
@@ -230,13 +229,12 @@ def get_variants_response(request, saved_variants, response_variants=None, add_a
             has_case_review_perm=bool(project) and has_case_review_permissions(project, request.user), include_igv=include_igv,
         )
 
-    if include_rna_seq:
+    if include_individual_gene_scores:
         response['rnaSeqData'] = _get_rna_seq_outliers(genes.keys(), families)
         families_by_guid = response.get('familiesByGuid')
         if families_by_guid:
             _add_family_has_rna_tpm(families_by_guid)
 
-    if include_phenotype_prioritization:
         response['phenotypeGeneScores'] = _get_phenotype_prioritization(genes.keys(), families)
 
     return response
diff --git a/ui/shared/components/panel/fixtures.js b/ui/shared/components/panel/fixtures.js
index a95cdb6cb0..405c0485dd 100644
--- a/ui/shared/components/panel/fixtures.js
+++ b/ui/shared/components/panel/fixtures.js
@@ -333,6 +333,7 @@ export const STATE1 = {
       tpms: { ENSG00000228198: { tpm: 1.03, geneId: 'ENSG00000228198' } },
     },
   },
+  phenotypeGeneScoresByIndividual: {},
   mmeSubmissionsByGuid: {},
   project: {
     createdDate: '2016-05-16T05:37:08.634Z',
diff --git a/ui/shared/components/panel/variants/VariantGene.jsx b/ui/shared/components/panel/variants/VariantGene.jsx
index 17f886ee39..2f632f7158 100644
--- a/ui/shared/components/panel/variants/VariantGene.jsx
+++ b/ui/shared/components/panel/variants/VariantGene.jsx
@@ -12,7 +12,7 @@ import {
   MISSENSE_THRESHHOLD, LOF_THRESHHOLD, PANEL_APP_CONFIDENCE_LEVEL_COLORS, PANEL_APP_CONFIDENCE_DESCRIPTION,
 } from '../../../utils/constants'
 import { compareObjects } from '../../../utils/sortUtils'
-import { camelcaseToTitlecase, snakecaseToTitlecase } from '../../../utils/stringUtils'
+import { camelcaseToTitlecase } from '../../../utils/stringUtils'
 import { HorizontalSpacer, VerticalSpacer } from '../../Spacers'
 import { InlineHeader, NoBorderTable, ButtonLink, ColoredLabel } from '../../StyledComponents'
 import { GeneSearchLink } from '../../buttons/SearchResultsLink'
@@ -315,21 +315,35 @@ const GENE_DISEASE_DETAIL_SECTIONS = [
   },
 ]
 
+const RNA_SEQ_DETAIL_FIELDS = ['zScore', 'pValue', 'pAdjust']
+
+const INDIVIDUAL_NAME_COLUMN = { name: 'individualName', content: '', width: 3, format: ({ individualName }) => (<b>{individualName}</b>) }
+
 const RNA_SEQ_COLUMNS = [
-  { name: 'individual', content: '', width: 3 },
-  { name: 'zScore', content: 'Z-Score', width: 3, format: ({ zScore }) => (zScore ? zScore.toPrecision(3) : null) },
-  { name: 'pValue', content: 'P-Value', width: 3, format: ({ pValue }) => (pValue ? pValue.toPrecision(3) : null) },
-  { name: 'pAdjust', content: 'P-Adjust', width: 3, format: ({ pAdjust }) => (pAdjust ? pAdjust.toPrecision(3) : null) },
+  INDIVIDUAL_NAME_COLUMN,
+  ...RNA_SEQ_DETAIL_FIELDS.map(name => (
+    { name, content: camelcaseToTitlecase(name).replace(' ', '-'), format: row => row[name].toPrecision(3) }
+  )),
 ]
 
 const PHENOTYPE_GENE_INFO_COLUMNS = [
-  { name: 'individual', content: '', width: 3 },
-  { name: 'rank', content: 'Rank', width: 3 },
+  INDIVIDUAL_NAME_COLUMN,
   { name: 'diseaseName', content: 'Disease', width: 3, format: ({ diseaseName, diseaseId }) => `${diseaseName} (${diseaseId})` },
+  { name: 'rank', content: 'Rank', width: 3 },
+  {
+    name: 'scores',
+    content: 'Scores',
+    width: 12,
+    format: ({ scores }) => Object.keys(scores).sort().map(scoreName => (
+      <div key={scoreName}>
+        <b>{camelcaseToTitlecase(scoreName).replace(' ', '-')}</b>
+        : &nbsp;
+        { scores[scoreName].toPrecision(3) }
+      </div>
+    )),
+  },
 ]
 
-const PHENOTYPE_GENE_SCORE_COLUMNS = {}
-
 const GENE_DETAIL_SECTIONS = [
   {
     color: 'red',
@@ -386,43 +400,34 @@ const GENE_DETAIL_SECTIONS = [
     label: 'RNA-Seq',
     showDetails: (gene, { rnaSeqData }) => rnaSeqData && rnaSeqData[gene.geneId],
     detailsDisplay: (gene, { rnaSeqData }) => (
-      <DataTable
-        basic="very"
-        data={rnaSeqData[gene.geneId]}
-        idField="geneId"
-        columns={RNA_SEQ_COLUMNS}
-      />
+      <div>
+        This gene is flagged as an outlier for RNA-Seq in the following samples
+        <DataTable
+          basic="very"
+          data={rnaSeqData[gene.geneId]}
+          idField="individualName"
+          columns={RNA_SEQ_COLUMNS}
+        />
+      </div>
     ),
   },
   {
     color: 'orange',
     description: 'Phenotype Prioritization',
-    lable: 'PhenotypeGene',
     showDetails: (gene, { phenotypeGeneScores }) => phenotypeGeneScores && phenotypeGeneScores[gene.geneId],
     detailsDisplay: (gene, { phenotypeGeneScores }) => (Object.entries(phenotypeGeneScores[gene.geneId]).map(
-      ([tool, data]) => {
-        PHENOTYPE_GENE_SCORE_COLUMNS[tool] = [
-          ...PHENOTYPE_GENE_INFO_COLUMNS,
-          ...Object.keys(data[0].scores).map(score => ({
-            name: score,
-            content: snakecaseToTitlecase(camelcaseToTitlecase(score)).replace(' ', '-'),
-            width: 3,
-            format: ({ scores }) => (scores[score].toPrecision(3)),
-          })),
-        ]
-        return ([
-          tool,
-          (
-            <DataTable
-              basic="very"
-              data={data}
-              idField="diseaseId"
-              defaultSortColumn="rank"
-              columns={PHENOTYPE_GENE_SCORE_COLUMNS[tool]}
-            />
-          ),
-        ])
-      },
+      ([tool, data]) => ({
+        label: tool.toUpperCase(),
+        detail: (
+          <DataTable
+            basic="very"
+            data={data}
+            idField="rowId"
+            defaultSortColumn="rank"
+            columns={PHENOTYPE_GENE_INFO_COLUMNS}
+          />
+        ),
+      }),
     )),
   },
 ]
@@ -458,9 +463,9 @@ const getDetailSections = (configs, gene, compact, labelProps, individualGeneDat
 ).reduce((acc, config) => (Array.isArray(config.detail) ?
   [
     ...acc,
-    ...config.detail.map(([tool, detail]) => ({ ...config, label: tool.toUpperCase(), detail })),
-  ] : [...acc, config]),
-[]).filter(({ detail }) => detail).map(({ detail, expandedDisplay, ...sectionConfig }) => (
+    ...config.detail.map(detail => ({ ...config, ...detail })),
+  ] : (config.detail && [...acc, config]) || acc),
+[]).map(({ detail, expandedDisplay, ...sectionConfig }) => (
   (expandedDisplay && !compact) ? (
     <OmimSegments key={sectionConfig.label}>
       <Segment color={sectionConfig.color}>
diff --git a/ui/shared/components/panel/variants/selectors.js b/ui/shared/components/panel/variants/selectors.js
index 15a9871c31..d0cf64aba1 100644
--- a/ui/shared/components/panel/variants/selectors.js
+++ b/ui/shared/components/panel/variants/selectors.js
@@ -24,31 +24,31 @@ export const getIndividualGeneDataByFamilyGene = createSelector(
   getPhenotypeGeneScoresByIndividual,
   (individualsByGuid, rnaSeqDataByIndividual, phenotypeGeneScoresByIndividual) => (
     Object.entries(individualsByGuid).reduce((acc, [individualGuid, { familyGuid, displayName }]) => {
-      const rnaSeqData = rnaSeqDataByIndividual && rnaSeqDataByIndividual[individualGuid]?.outliers
-      const phenotypeGeneScores = phenotypeGeneScoresByIndividual && phenotypeGeneScoresByIndividual[individualGuid]
-      if (!rnaSeqData && !phenotypeGeneScores) {
-        return acc
+      const rnaSeqData = rnaSeqDataByIndividual[individualGuid]?.outliers
+      const phenotypeGeneScores = phenotypeGeneScoresByIndividual[individualGuid]
+      acc[familyGuid] = acc[familyGuid] || {}
+      if (rnaSeqData) {
+        acc[familyGuid].rnaSeqData = Object.entries(rnaSeqData || {}).reduce(
+          (acc2, [geneId, data]) => (data.isSignificant ? {
+            ...acc2,
+            [geneId]: [...(acc2[geneId] || []), { ...data, individualName: displayName }],
+          } : acc2), acc[familyGuid]?.rnaSeqData || {},
+        )
       }
-      return {
-        ...acc,
-        [familyGuid]: {
-          rnaSeqData: Object.entries(rnaSeqData || {}).reduce(
-            (acc2, [geneId, data]) => (data.isSignificant ? {
-              ...acc2,
-              [geneId]: [...(acc2[geneId] || []), { ...data, individual: displayName }],
-            } : acc2), acc[familyGuid]?.rnaSeqData || {},
-          ),
-          phenotypeGeneScores: Object.entries(phenotypeGeneScores || {}).reduce(
-            (acc2, [geneId, dataByTool]) => ({
-              ...acc2,
-              [geneId]: Object.entries(dataByTool).reduce((acc3, [tool, data]) => ({
-                ...acc3,
-                [tool]: [...(acc3[tool] || []), ...data.map(d => ({ ...d, individual: displayName }))],
-              }), acc2[geneId] || {}),
-            }), acc[familyGuid]?.phenotypeGeneScores || {},
-          ),
-        },
+      if (phenotypeGeneScores) {
+        acc[familyGuid].phenotypeGeneScores = Object.entries(phenotypeGeneScores || {}).reduce(
+          (acc2, [geneId, dataByTool]) => ({
+            ...acc2,
+            [geneId]: Object.entries(dataByTool).reduce((acc3, [tool, data]) => ({
+              ...acc3,
+              [tool]: [...(acc3[tool] || []), ...data.map(d => ({
+                ...d, individualName: displayName, rowId: `${displayName}-${d.diseaseId}`,
+              }))],
+            }), acc2[geneId] || {}),
+          }), acc[familyGuid]?.phenotypeGeneScores || {},
+        )
       }
+      return acc
     }, {})
   ),
 )
diff --git a/ui/shared/components/panel/variants/selectors.test.js b/ui/shared/components/panel/variants/selectors.test.js
index ddb45c94e6..6e0580cc00 100644
--- a/ui/shared/components/panel/variants/selectors.test.js
+++ b/ui/shared/components/panel/variants/selectors.test.js
@@ -110,20 +110,21 @@ test('getIndividualGeneDataByFamilyGene', () => {
     F011652_1: {
       rnaSeqData: {
         ENSG00000228198: [
-          { individual: 'NA19678', isSignificant: true, pValue: 0.0004 },
-          { individual: 'NA19679_1', isSignificant: true, pValue: 0.01 },
+          { individualName: 'NA19678', isSignificant: true, pValue: 0.0004 },
+          { individualName: 'NA19679_1', isSignificant: true, pValue: 0.01 },
         ],
         ENSG00000164458: [
-          { individual: 'NA19678', isSignificant: true, pValue: 0.0073 },
+          { individualName: 'NA19678', isSignificant: true, pValue: 0.0073 },
         ],
       },
       phenotypeGeneScores: {
         ENSG00000228198: {
           lirical: [{
-            individual: 'NA19678',
+            individualName: 'NA19678',
             diseaseId: 'OMIM:618460',
             diseaseName: 'Khan-Khan-Katsanis syndrome',
             geneId: 'ENSG00000228198',
+            rowId: 'NA19678-OMIM:618460',
             rank: 1,
             scores: { compositeLR: 0.066, post_test_probability: 0 },
           }],
@@ -132,9 +133,8 @@ test('getIndividualGeneDataByFamilyGene', () => {
     },
     F011652_2: {
       rnaSeqData: {
-        ENSG00000228198: [{ individual: 'NA19678_2', isSignificant: true, pValue: 0.0214 }],
+        ENSG00000228198: [{ individualName: 'NA19678_2', isSignificant: true, pValue: 0.0214 }],
       },
-      phenotypeGeneScores: {},
     },
   })
 })

From 1298b5c3acf47631a823f421f6908baa8bec8651 Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Mon, 31 Oct 2022 10:48:20 -0400
Subject: [PATCH 31/96] update unit tests

---
 seqr/views/apis/igv_api_tests.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/seqr/views/apis/igv_api_tests.py b/seqr/views/apis/igv_api_tests.py
index 6d4d582fed..1d49b8a931 100644
--- a/seqr/views/apis/igv_api_tests.py
+++ b/seqr/views/apis/igv_api_tests.py
@@ -217,27 +217,28 @@ def test_add_alignment_sample(self, mock_local_file_exists, mock_subprocess):
         self.assertEqual(response.status_code, 200)
 
     @responses.activate
-    def test_igv_genomes_proxyy(self):
-        url_path = 'org.genomes/foo?query=true'
-        url = reverse(igv_genomes_proxy, args=[url_path])
+    def test_igv_genomes_proxy(self):
+        url_path = 'igv.org.genomes/foo?query=true'
+        s3_url = reverse(igv_genomes_proxy, args=['s3', url_path])
 
         expected_body = {'genes': ['GENE1', 'GENE2']}
         responses.add(
             responses.GET, 'https://s3.amazonaws.com/igv.org.genomes/foo?query=true', match_querystring=True,
             content_type='application/json', body=json.dumps(expected_body))
 
-        response = self.client.get(url)
+        response = self.client.get(s3_url)
         self.assertEqual(response.status_code, 200)
         self.assertDictEqual(json.loads(response.content), expected_body)
         self.assertIsNone(responses.calls[0].request.headers.get('Range'))
 
         # test with range header proxy
+        gs_url = reverse(igv_genomes_proxy, args=['gs', 'test-bucket/foo.fasta'])
         expected_content = 'test file content'
-        responses.replace(
-            responses.GET, 'https://s3.amazonaws.com/igv.org.genomes/foo?query=true', match_querystring=True,
+        responses.add(
+            responses.GET, 'https://storage.googleapis.com/test-bucket/foo.fasta', match_querystring=True,
             body=expected_content)
 
-        response = self.client.get(url, HTTP_RANGE='bytes=100-200')
+        response = self.client.get(gs_url, HTTP_RANGE='bytes=100-200')
         self.assertEqual(response.status_code, 200)
         self.assertEqual(response.content.decode(), expected_content)
         self.assertEqual(responses.calls[1].request.headers.get('Range'), 'bytes=100-200')

From 3db96d3f60c578ed92d0048e2182e2ee1bcf4473 Mon Sep 17 00:00:00 2001
From: Shifa Zhang <zhangshifa07504@gmail.com>
Date: Mon, 31 Oct 2022 10:59:20 -0400
Subject: [PATCH 32/96] Adjust column width.

---
 ui/shared/components/panel/variants/VariantGene.jsx | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/ui/shared/components/panel/variants/VariantGene.jsx b/ui/shared/components/panel/variants/VariantGene.jsx
index 2f632f7158..de97455ca3 100644
--- a/ui/shared/components/panel/variants/VariantGene.jsx
+++ b/ui/shared/components/panel/variants/VariantGene.jsx
@@ -317,7 +317,7 @@ const GENE_DISEASE_DETAIL_SECTIONS = [
 
 const RNA_SEQ_DETAIL_FIELDS = ['zScore', 'pValue', 'pAdjust']
 
-const INDIVIDUAL_NAME_COLUMN = { name: 'individualName', content: '', width: 3, format: ({ individualName }) => (<b>{individualName}</b>) }
+const INDIVIDUAL_NAME_COLUMN = { name: 'individualName', content: '', format: ({ individualName }) => (<b>{individualName}</b>) }
 
 const RNA_SEQ_COLUMNS = [
   INDIVIDUAL_NAME_COLUMN,
@@ -328,8 +328,8 @@ const RNA_SEQ_COLUMNS = [
 
 const PHENOTYPE_GENE_INFO_COLUMNS = [
   INDIVIDUAL_NAME_COLUMN,
-  { name: 'diseaseName', content: 'Disease', width: 3, format: ({ diseaseName, diseaseId }) => `${diseaseName} (${diseaseId})` },
-  { name: 'rank', content: 'Rank', width: 3 },
+  { name: 'diseaseName', content: 'Disease', format: ({ diseaseName, diseaseId }) => `${diseaseName} (${diseaseId})` },
+  { name: 'rank', content: 'Rank' },
   {
     name: 'scores',
     content: 'Scores',

From 842556f882aab51ac1ec4cfa7bab094da2ca6ca5 Mon Sep 17 00:00:00 2001
From: Shifa Zhang <zhangshifa07504@gmail.com>
Date: Mon, 31 Oct 2022 11:03:41 -0400
Subject: [PATCH 33/96] Update the score names while loading data.

---
 seqr/views/utils/dataset_utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/seqr/views/utils/dataset_utils.py b/seqr/views/utils/dataset_utils.py
index 4cfd0241c7..f57edbcbe3 100644
--- a/seqr/views/utils/dataset_utils.py
+++ b/seqr/views/utils/dataset_utils.py
@@ -11,7 +11,7 @@
 from seqr.utils.logging_utils import log_model_bulk_update, SeqrLogger
 from seqr.views.utils.file_utils import parse_file
 from seqr.views.utils.permissions_utils import get_internal_projects
-from seqr.views.utils.json_utils import _to_snake_case
+from seqr.views.utils.json_utils import _to_snake_case, _to_camel_case
 
 logger = SeqrLogger(__name__)
 
@@ -460,7 +460,7 @@ def _parse_phenotype_pri_row(row):
         score_name = row.get(f'scoreName{i}')
         if not score_name:
             break
-        scores[score_name] = float(row[f'score{i}'])
+        scores[_to_camel_case(_to_snake_case(score_name))] = float(row[f'score{i}'])
     record['scores'] = scores
 
     yield record

From 313c3fa2a751caa58b6810300f696193a38d334e Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Mon, 31 Oct 2022 11:34:54 -0400
Subject: [PATCH 34/96] remove 1kg from server search

---
 seqr/utils/elasticsearch/constants.py      |  5 +----
 seqr/utils/elasticsearch/es_search.py      |  1 +
 seqr/utils/elasticsearch/es_utils_tests.py | 23 ----------------------
 seqr/views/utils/test_utils.py             | 10 ----------
 4 files changed, 2 insertions(+), 37 deletions(-)

diff --git a/seqr/utils/elasticsearch/constants.py b/seqr/utils/elasticsearch/constants.py
index 2c6c5a6496..0016a1a6b9 100644
--- a/seqr/utils/elasticsearch/constants.py
+++ b/seqr/utils/elasticsearch/constants.py
@@ -90,9 +90,6 @@
         'filter_AF': [],
         'Het': None,
     },
-    'g1k': {
-        'filter_AF': ['g1k_POPMAX_AF'],
-    },
     'exac': {
         'filter_AF': ['exac_AF_POPMAX'],
         'AC': 'exac_AC_Adj',
@@ -259,7 +256,7 @@
                 'source': "doc.containsKey(params.field) ? (doc[params.field].empty ? 0 : doc[params.field].value) : 1"
             }
         }
-    }] for sort, pop_key in {'gnomad': 'gnomad_genomes', 'gnomad_exomes': 'gnomad_exomes', '1kg': 'g1k', 'callset_af': 'callset'}.items()}
+    }] for sort, pop_key in {'gnomad': 'gnomad_genomes', 'gnomad_exomes': 'gnomad_exomes', 'callset_af': 'callset'}.items()}
 SORT_FIELDS.update(POPULATION_SORTS)
 PREDICTOR_SORT_FIELDS = {
     'cadd': 'cadd_PHRED',
diff --git a/seqr/utils/elasticsearch/es_search.py b/seqr/utils/elasticsearch/es_search.py
index 2fde3d884b..2eda40e1c6 100644
--- a/seqr/utils/elasticsearch/es_search.py
+++ b/seqr/utils/elasticsearch/es_search.py
@@ -325,6 +325,7 @@ def _filter_by_in_silico(self, in_silico_filters):
             self._filter(_in_silico_filter(in_silico_filters))
 
     def _filter_by_frequency(self, frequencies, clinvar_terms=None):
+        frequencies = {pop: v for pop, v in (frequencies or {}).items() if pop in POPULATIONS}
         if not frequencies:
             return
 
diff --git a/seqr/utils/elasticsearch/es_utils_tests.py b/seqr/utils/elasticsearch/es_utils_tests.py
index 0e6bcef1aa..2a26d66a2b 100644
--- a/seqr/utils/elasticsearch/es_utils_tests.py
+++ b/seqr/utils/elasticsearch/es_utils_tests.py
@@ -798,14 +798,6 @@
     'AC',
     'AF',
     'AN',
-    'g1k_AC',
-    'g1k_Hom',
-    'g1k_Hemi',
-    'g1k_POPMAX_AF',
-    'g1k_AF',
-    'g1k_AN',
-    'g1k_Het',
-    'g1k_ID',
     'gnomad_genomes_AC',
     'gnomad_genomes_Hom',
     'gnomad_genomes_Hemi',
@@ -927,7 +919,6 @@
 MITO_SOURCE_ONLY_FIELDS = [
     'callset_max_hl',
     'exac_max_hl',
-    'g1k_max_hl',
     'gnomad_exomes_max_hl',
     'gnomad_genomes_max_hl',
     'gnomad_svs_max_hl',
@@ -1656,13 +1647,6 @@ def test_filtered_get_es_variants(self):
                                 {'range': {'exac_AC_Adj': {'lte': 2}}}
                             ]}
                         },
-                        {'bool': {
-                            'minimum_should_match': 1,
-                            'should': [
-                                {'bool': {'must_not': [{'exists': {'field': 'g1k_POPMAX_AF'}}]}},
-                                {'range': {'g1k_POPMAX_AF': {'lte': 0.001}}}
-                            ]
-                        }},
                         {'bool': {
                             'minimum_should_match': 1,
                             'should': [
@@ -1721,13 +1705,6 @@ def test_filtered_get_es_variants(self):
                         {'range': {'AF': {'lte': 0.1}}}
                     ],
                     'must': [
-                        {'bool': {
-                            'minimum_should_match': 1,
-                            'should': [
-                                {'bool': {'must_not': [{'exists': {'field': 'g1k_POPMAX_AF'}}]}},
-                                {'range': {'g1k_POPMAX_AF': {'lte': 0.05}}}
-                            ]
-                        }},
                         {'bool': {
                             'minimum_should_match': 1,
                             'should': [
diff --git a/seqr/views/utils/test_utils.py b/seqr/views/utils/test_utils.py
index d0e72f3f76..c927f935a1 100644
--- a/seqr/views/utils/test_utils.py
+++ b/seqr/views/utils/test_utils.py
@@ -868,8 +868,6 @@ def call_request_json(self, index=-1):
         'populations': {
             'callset': {'an': 32, 'ac': 2, 'hom': None, 'af': 0.063, 'hemi': None, 'filter_af': None, 'het': None,
                         'id': None, 'max_hl': None},
-            'g1k': {'an': 0, 'ac': 0, 'hom': 0, 'af': 0.0, 'hemi': 0, 'filter_af': None, 'het': 0, 'id': None,
-                    'max_hl': None},
             'gnomad_genomes': {'an': 30946, 'ac': 4, 'hom': 0, 'af': 0.00012925741614425127, 'hemi': 0,
                                'filter_af': 0.0004590314436538903, 'het': 0, 'id': None, 'max_hl': None},
             'exac': {'an': 121308, 'ac': 8, 'hom': 0, 'af': 0.00006589, 'hemi': 0, 'filter_af': 0.0006726888333653661,
@@ -952,8 +950,6 @@ def call_request_json(self, index=-1):
         'populations': {
             'callset': {'an': 32, 'ac': 1, 'hom': None, 'af': 0.031, 'hemi': None, 'filter_af': None, 'het': None,
                         'id': None, 'max_hl': None},
-            'g1k': {'an': 0, 'ac': 0, 'hom': 0, 'af': 0.0, 'hemi': 0, 'filter_af': None, 'het': 0, 'id': None,
-                    'max_hl': None},
             'gnomad_genomes': {'an': 0, 'ac': 0, 'hom': 0, 'af': 0.0, 'hemi': 0, 'filter_af': None, 'het': 0,
                                'id': None, 'max_hl': None},
             'exac': {'an': 121336, 'ac': 6, 'hom': 0, 'af': 0.00004942, 'hemi': 0, 'filter_af': 0.000242306760358614,
@@ -1039,8 +1035,6 @@ def call_request_json(self, index=-1):
     'populations': {
         'callset': {'an': None, 'ac': None, 'hom': None, 'af': None, 'hemi': None, 'filter_af': None, 'het': None,
                     'id': None, 'max_hl': None},
-        'g1k': {'an': None, 'ac': None, 'hom': None, 'af': None, 'hemi': None, 'filter_af': None, 'het': None,
-                'id': None, 'max_hl': None},
         'gnomad_genomes': {'an': None, 'ac': None, 'hom': None, 'af': None, 'hemi': None, 'filter_af': None,
                            'het': None, 'id': None, 'max_hl': None},
         'exac': {'an': None, 'ac': None, 'hom': None, 'af': None, 'hemi': None, 'filter_af': None, 'het': None,
@@ -1126,8 +1120,6 @@ def call_request_json(self, index=-1):
     'populations': {
         'callset': {'an': None, 'ac': None, 'hom': None, 'af': None, 'hemi': None, 'filter_af': None, 'het': None,
                     'id': None, 'max_hl': None},
-        'g1k': {'an': None, 'ac': None, 'hom': None, 'af': None, 'hemi': None, 'filter_af': None, 'het': None,
-                'id': None, 'max_hl': None},
         'gnomad_genomes': {'an': None, 'ac': None, 'hom': None, 'af': None, 'hemi': None, 'filter_af': None,
                            'het': None, 'id': None, 'max_hl': None},
         'exac': {'an': None, 'ac': None, 'hom': None, 'af': None, 'hemi': None, 'filter_af': None, 'het': None,
@@ -1207,8 +1199,6 @@ def call_request_json(self, index=-1):
               'het': None, 'hom': None, 'id': None, 'max_hl': None},
          'exac': {'ac': None, 'af': None, 'an': None, 'filter_af': None, 'hemi': None,
               'het': None, 'hom': None, 'id': None, 'max_hl': None},
-         'g1k': {'ac': None, 'af': None, 'an': None, 'filter_af': None, 'hemi': None,
-                 'het': None, 'hom': None, 'id': None, 'max_hl': None},
          'gnomad_exomes': {'ac': None, 'af': None, 'an': None, 'filter_af': None,
                            'hemi': None, 'het': None, 'hom': None, 'id': None, 'max_hl': None},
          'gnomad_genomes': {'ac': None, 'af': None, 'an': None, 'filter_af': None,

From 57d9a0aedc8a04aaa7e9819766016bee75aa213c Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Mon, 31 Oct 2022 11:37:46 -0400
Subject: [PATCH 35/96] remove 1kg from variant client side

---
 ui/shared/components/panel/variants/Frequencies.jsx | 1 -
 ui/shared/utils/constants.js                        | 3 ---
 2 files changed, 4 deletions(-)

diff --git a/ui/shared/components/panel/variants/Frequencies.jsx b/ui/shared/components/panel/variants/Frequencies.jsx
index 1194ebdca0..043a4aa252 100644
--- a/ui/shared/components/panel/variants/Frequencies.jsx
+++ b/ui/shared/components/panel/variants/Frequencies.jsx
@@ -162,7 +162,6 @@ const CALLSET_POP = { field: 'callset', fieldTitle: 'This Callset', acDisplay: '
 const POPULATIONS = [
   { field: 'sv_callset', fieldTitle: 'This Callset', acDisplay: 'AC', helpMessage: SV_CALLSET_CRITERIA_MESSAGE },
   CALLSET_POP,
-  { field: 'g1k', fieldTitle: '1kg WGS' },
   {
     field: 'exac',
     fieldTitle: 'ExAC',
diff --git a/ui/shared/utils/constants.js b/ui/shared/utils/constants.js
index 03c1c59cf2..02daab7de0 100644
--- a/ui/shared/utils/constants.js
+++ b/ui/shared/utils/constants.js
@@ -957,7 +957,6 @@ const SORT_BY_PROTEIN_CONSQ = 'PROTEIN_CONSEQUENCE'
 const SORT_BY_GNOMAD_GENOMES = 'GNOMAD'
 const SORT_BY_GNOMAD_EXOMES = 'GNOMAD_EXOMES'
 const SORT_BY_CALLSET_AF = 'CALLSET_AF'
-const SORT_BY_1KG = '1KG'
 const SORT_BY_CONSTRAINT = 'CONSTRAINT'
 const SORT_BY_CADD = 'CADD'
 const SORT_BY_REVEL = 'REVEL'
@@ -1023,7 +1022,6 @@ const VARIANT_SORT_OPTONS = [
   { value: SORT_BY_GNOMAD_GENOMES, text: 'gnomAD Genomes Frequency', comparator: populationComparator('gnomad_genomes') },
   { value: SORT_BY_GNOMAD_EXOMES, text: 'gnomAD Exomes Frequency', comparator: populationComparator('gnomad_exomes') },
   { value: SORT_BY_CALLSET_AF, text: 'Callset AF', comparator: populationComparator('callset') },
-  { value: SORT_BY_1KG, text: '1kg  Frequency', comparator: populationComparator('g1k') },
   { value: SORT_BY_CADD, text: 'Cadd', comparator: predictionComparator('cadd') },
   { value: SORT_BY_REVEL, text: 'Revel', comparator: predictionComparator('revel') },
   { value: SORT_BY_EIGEN, text: 'Eigen', comparator: predictionComparator('eigen') },
@@ -1225,7 +1223,6 @@ export const VARIANT_EXPORT_DATA = [
   { header: 'alt' },
   { header: 'gene', getVal: variant => getVariantMainTranscript(variant).geneSymbol },
   { header: 'worst_consequence', getVal: variant => getVariantMainTranscript(variant).majorConsequence },
-  { header: '1kg_freq', getVal: getPopAf('g1k') },
   { header: 'exac_freq', getVal: getPopAf('exac') },
   { header: 'gnomad_genomes_freq', getVal: getPopAf('gnomad_genomes') },
   { header: 'gnomad_exomes_freq', getVal: getPopAf('gnomad_exomes') },

From 568b8516ee96f4b66317ad3373e05e50bb8bab7b Mon Sep 17 00:00:00 2001
From: Shifa Zhang <zhangshifa07504@gmail.com>
Date: Mon, 31 Oct 2022 13:25:02 -0400
Subject: [PATCH 36/96] Add a comment.

---
 seqr/views/utils/dataset_utils.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/seqr/views/utils/dataset_utils.py b/seqr/views/utils/dataset_utils.py
index f57edbcbe3..0537417a68 100644
--- a/seqr/views/utils/dataset_utils.py
+++ b/seqr/views/utils/dataset_utils.py
@@ -460,6 +460,8 @@ def _parse_phenotype_pri_row(row):
         score_name = row.get(f'scoreName{i}')
         if not score_name:
             break
+        # We have both camel case and snake case in the score field names, so convert them to snake case first (those
+        # in snake case kept unchanged), then to camel case.
         scores[_to_camel_case(_to_snake_case(score_name))] = float(row[f'score{i}'])
     record['scores'] = scores
 

From 251b8a8838091cecef1a72ef68bbd5aceae2579c Mon Sep 17 00:00:00 2001
From: Shifa Zhang <zhangshifa07504@gmail.com>
Date: Mon, 31 Oct 2022 16:50:01 -0400
Subject: [PATCH 37/96] Update the column widths for the datatable.

---
 ui/shared/components/panel/variants/VariantGene.jsx | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/ui/shared/components/panel/variants/VariantGene.jsx b/ui/shared/components/panel/variants/VariantGene.jsx
index de97455ca3..5aa9341c19 100644
--- a/ui/shared/components/panel/variants/VariantGene.jsx
+++ b/ui/shared/components/panel/variants/VariantGene.jsx
@@ -90,7 +90,7 @@ const LocusListsContainer = styled.div`
 const GeneLabel = React.memo(({ popupHeader, popupContent, showEmpty, ...labelProps }) => {
   const content = <GeneLabelContent {...labelProps} />
   return (popupContent || showEmpty) ?
-    <Popup header={popupHeader} trigger={content} content={popupContent} size="tiny" wide hoverable /> : content
+    <Popup header={popupHeader} trigger={content} content={popupContent} size="tiny" wide="very" hoverable /> : content
 })
 
 GeneLabel.propTypes = {
@@ -327,13 +327,13 @@ const RNA_SEQ_COLUMNS = [
 ]
 
 const PHENOTYPE_GENE_INFO_COLUMNS = [
-  INDIVIDUAL_NAME_COLUMN,
-  { name: 'diseaseName', content: 'Disease', format: ({ diseaseName, diseaseId }) => `${diseaseName} (${diseaseId})` },
-  { name: 'rank', content: 'Rank' },
+  { ...INDIVIDUAL_NAME_COLUMN, width: 4 },
+  { name: 'diseaseName', content: 'Disease', width: 5, format: ({ diseaseName, diseaseId }) => `${diseaseName} (${diseaseId})` },
+  { name: 'rank', content: 'Rank', width: 1 },
   {
     name: 'scores',
     content: 'Scores',
-    width: 12,
+    width: 6,
     format: ({ scores }) => Object.keys(scores).sort().map(scoreName => (
       <div key={scoreName}>
         <b>{camelcaseToTitlecase(scoreName).replace(' ', '-')}</b>
@@ -422,6 +422,8 @@ const GENE_DETAIL_SECTIONS = [
           <DataTable
             basic="very"
             data={data}
+            singleLine
+            fixedWidth
             idField="rowId"
             defaultSortColumn="rank"
             columns={PHENOTYPE_GENE_INFO_COLUMNS}

From 84cb2561affa9544740595a1fa01ed5e73df4324 Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Mon, 31 Oct 2022 16:50:21 -0400
Subject: [PATCH 38/96] update transcript models

---
 .../management/commands/update_gencode.py     | 14 ++++++++--
 .../migrations/0021_auto_20221031_2049.py     | 27 +++++++++++++++++++
 reference_data/models.py                      |  7 ++++-
 3 files changed, 45 insertions(+), 3 deletions(-)
 create mode 100644 reference_data/migrations/0021_auto_20221031_2049.py

diff --git a/reference_data/management/commands/update_gencode.py b/reference_data/management/commands/update_gencode.py
index b8e4088ba0..fbd70b4495 100644
--- a/reference_data/management/commands/update_gencode.py
+++ b/reference_data/management/commands/update_gencode.py
@@ -135,8 +135,16 @@ def _parse_line(line, i, new_genes, new_transcripts,  existing_gene_ids, existin
 
     # parse info field
     info_fields = [x.strip().split() for x in record['info'].split(';') if x != '']
-    info_fields = {k: v.strip('"') for k, v in info_fields}
-    record.update(info_fields)
+    info_dict = {}
+    for k, v in info_fields:
+        v = v.strip('"')
+        if k == 'tag':
+            if k not in info_dict:
+                info_dict[k] = []
+            info_dict[k].append(v)
+        else:
+            info_dict[k] = v
+    record.update(info_dict)
 
     record['gene_id'] = record['gene_id'].split('.')[0]
     if 'transcript_id' in record:
@@ -179,6 +187,8 @@ def _parse_line(line, i, new_genes, new_transcripts,  existing_gene_ids, existin
             "end_grch{}".format(genome_version): record["end"],
             "strand_grch{}".format(genome_version): record["strand"],
         })
+        if 'MANE_Select' in record.get('tag', []):
+            new_transcripts[record['transcript_id']]['is_mane_select'] = True
 
     elif record['feature_type'] == 'CDS':
         if record["transcript_id"] in existing_transcript_ids:
diff --git a/reference_data/migrations/0021_auto_20221031_2049.py b/reference_data/migrations/0021_auto_20221031_2049.py
new file mode 100644
index 0000000000..f1052679b2
--- /dev/null
+++ b/reference_data/migrations/0021_auto_20221031_2049.py
@@ -0,0 +1,27 @@
+# Generated by Django 3.2.16 on 2022-10-31 20:49
+
+from django.db import migrations, models
+import django.db.models.deletion
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('reference_data', '0020_clingen'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='transcriptinfo',
+            name='is_mane_select',
+            field=models.BooleanField(default=False),
+        ),
+        migrations.CreateModel(
+            name='RefseqTranscript',
+            fields=[
+                ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+                ('refseq_id', models.CharField(max_length=20)),
+                ('transcript', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='reference_data.transcriptinfo')),
+            ],
+        ),
+    ]
diff --git a/reference_data/models.py b/reference_data/models.py
index 2c47d353a9..b749e4b2a4 100644
--- a/reference_data/models.py
+++ b/reference_data/models.py
@@ -113,7 +113,7 @@ class TranscriptInfo(models.Model):
     gene = models.ForeignKey(GeneInfo, on_delete=models.CASCADE)
 
     transcript_id = models.CharField(max_length=20, db_index=True, unique=True)  # without the version suffix
-    #protein_id = models.CharField(max_length=20, null=True)
+    is_mane_select = models.BooleanField(default=False)
 
     chrom_grch37 = models.CharField(max_length=2, null=True, blank=True)
     start_grch37 = models.IntegerField(null=True, blank=True)
@@ -128,6 +128,11 @@ class TranscriptInfo(models.Model):
     coding_region_size_grch38 = models.IntegerField(default=0)  # number of protein-coding bases (= 0 for non-coding genes)
 
 
+class RefseqTranscript(models.Model):
+    transcript = models.ForeignKey(TranscriptInfo, on_delete=models.CASCADE)
+    refseq_id = models.CharField(max_length=20)
+
+
 # based on # ftp://ftp.broadinstitute.org/pub/ExAC_release/release0.3.1/functional_gene_constraint/fordist_cleaned_exac_r03_march16_z_pli_rec_null_data.txt
 class GeneConstraint(models.Model):
     gene = models.ForeignKey(GeneInfo, on_delete=models.CASCADE)

From ae366e83869294f7d543f2432acb4967047a85af Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Mon, 31 Oct 2022 16:53:27 -0400
Subject: [PATCH 39/96] update tests

---
 reference_data/management/tests/update_gencode_tests.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/reference_data/management/tests/update_gencode_tests.py b/reference_data/management/tests/update_gencode_tests.py
index dd8e21cf55..0b42be2b8a 100644
--- a/reference_data/management/tests/update_gencode_tests.py
+++ b/reference_data/management/tests/update_gencode_tests.py
@@ -26,7 +26,7 @@
     'chr1	HAVANA	exon	11869	12227	.	+	.	gene_id "ENSG00000223972.5_2"; transcript_id "ENST00000456328.2_1"; gene_type "transcribed_unprocessed_pseudogene"; gene_name "DDX11L1"; transcript_type "lncRNA"; transcript_name "DDX11L1-202"; exon_number 1; exon_id "ENSE00002234944.1_1"; level 2; transcript_support_level 1; hgnc_id "HGNC:37102"; tag "basic"; havana_gene "OTTHUMG00000000961.2_2"; havana_transcript "OTTHUMT00000362751.1_1"; remap_original_location "chr1:+:11869-12227"; remap_status "full_contig";\n',
     # Not existing gene_id
     'chr1	HAVANA	gene	621059	622053	.	-	.	gene_id "ENSG00000284662.1_2"; gene_type "protein_coding"; gene_name "OR4F16"; level 2; hgnc_id "HGNC:15079"; havana_gene "OTTHUMG00000002581.3_2"; remap_status "full_contig"; remap_num_mappings 1; remap_target_status "overlap";\n',
-    'chr1	HAVANA	transcript	621059	622053	.	-	.	gene_id "ENSG00000284662.1_2"; transcript_id "ENST00000332831.4_2"; gene_type "protein_coding"; gene_name "OR4F16"; transcript_type "protein_coding"; transcript_name "OR4F16-201"; level 2; protein_id "ENSP00000329982.2"; transcript_support_level "NA"; hgnc_id "HGNC:15079"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS41221.1"; havana_gene "OTTHUMG00000002581.3_2"; havana_transcript "OTTHUMT00000007334.3_2"; remap_num_mappings 1; remap_status "full_contig"; remap_target_status "overlap";\n',
+    'chr1	HAVANA	transcript	621059	622053	.	-	.	gene_id "ENSG00000284662.1_2"; transcript_id "ENST00000332831.4_2"; gene_type "protein_coding"; gene_name "OR4F16"; transcript_type "protein_coding"; transcript_name "OR4F16-201"; level 2; protein_id "ENSP00000329982.2"; transcript_support_level "NA"; hgnc_id "HGNC:15079"; tag "basic"; tag "MANE_Select"; tag "CCDS"; ccdsid "CCDS41221.1"; havana_gene "OTTHUMG00000002581.3_2"; havana_transcript "OTTHUMT00000007334.3_2"; remap_num_mappings 1; remap_status "full_contig"; remap_target_status "overlap";\n',
     # feature_type is 'CDS'
     # gene_id not in existing_gene_ids and transcript_size > ...
     'chr1	HAVANA	CDS	621099	622034	.	-	0	gene_id "ENSG00000284662.1_2"; transcript_id "ENST00000332831.4_2"; gene_type "protein_coding"; gene_name "OR4F16"; transcript_type "protein_coding"; transcript_name "OR4F16-201"; exon_number 1; exon_id "ENSE00002324228.3"; level 2; protein_id "ENSP00000329982.2"; transcript_support_level "NA"; hgnc_id "HGNC:15079"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS41221.1"; havana_gene "OTTHUMG00000002581.3_2"; havana_transcript "OTTHUMT00000007334.3_2"; remap_original_location "chr1:-:685719-686654"; remap_status "full_contig";\n',
@@ -178,12 +178,14 @@ def test_update_gencode_command(self, mock_logger):
         self.assertEqual(TranscriptInfo.objects.all().count(), 2)
         trans_info = TranscriptInfo.objects.get(transcript_id = 'ENST00000456328')
         self.assertEqual(trans_info.gene.gene_id, 'ENSG00000223972')
+        self.assertFalse(trans_info.is_mane_select)
         trans_info = TranscriptInfo.objects.get(transcript_id = 'ENST00000332831')
         self.assertEqual(trans_info.start_grch37, 621059)
         self.assertEqual(trans_info.end_grch37, 622053)
         self.assertEqual(trans_info.strand_grch37, '-')
         self.assertEqual(trans_info.chrom_grch37, '1')
         self.assertEqual(trans_info.gene.gene_id, 'ENSG00000284662')
+        self.assertTrue(trans_info.is_mane_select)
 
         # Test normal command function with a --reset option
         mock_logger.reset_mock()

From c70ba3d287fe9a527a2af50ee3d5b0d565e2edc7 Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Mon, 31 Oct 2022 17:24:28 -0400
Subject: [PATCH 40/96] move gencode parsing to helper utility

---
 .../commands/update_all_reference_data.py     |   9 +-
 .../management/commands/update_gencode.py     | 145 +---------------
 .../commands/utils/gencode_utils.py           | 156 ++++++++++++++++++
 .../management/tests/update_gencode_tests.py  |  13 +-
 4 files changed, 170 insertions(+), 153 deletions(-)
 create mode 100644 reference_data/management/commands/utils/gencode_utils.py

diff --git a/reference_data/management/commands/update_all_reference_data.py b/reference_data/management/commands/update_all_reference_data.py
index 36df468c6b..e3314c2e9c 100644
--- a/reference_data/management/commands/update_all_reference_data.py
+++ b/reference_data/management/commands/update_all_reference_data.py
@@ -2,6 +2,7 @@
 from collections import OrderedDict
 from django.core.management.base import BaseCommand
 
+from reference_data.management.commands.utils.gencode_utils import LATEST_GENCODE_RELEASE, OLD_GENCODE_RELEASES
 from reference_data.management.commands.utils.update_utils import update_records
 from reference_data.management.commands.update_human_phenotype_ontology import update_hpo
 from reference_data.management.commands.update_dbnsfp_gene import DbNSFPReferenceDataHandler
@@ -52,11 +53,9 @@ def handle(self, *args, **options):
         if not options["skip_gencode"]:
             # Download latest version first, and then add any genes from old releases not included in the latest release
             # Old gene ids are used in the gene constraint table and other datasets, as well as older sequencing data
-            update_gencode(31, reset=True)
-            update_gencode(29)
-            update_gencode(28)
-            update_gencode(27)
-            update_gencode(19)
+            update_gencode(LATEST_GENCODE_RELEASE, reset=True)
+            for release in OLD_GENCODE_RELEASES:
+                update_gencode(release)
             updated.append('gencode')
 
         if not options["skip_omim"]:
diff --git a/reference_data/management/commands/update_gencode.py b/reference_data/management/commands/update_gencode.py
index fbd70b4495..d040903c20 100644
--- a/reference_data/management/commands/update_gencode.py
+++ b/reference_data/management/commands/update_gencode.py
@@ -1,24 +1,12 @@
-import collections
-import gzip
 import logging
-import os
-from tqdm import tqdm
 
-from django.core.management.base import BaseCommand, CommandError
+from django.core.management.base import BaseCommand
 
-from reference_data.management.commands.utils.download_utils import download_file
+from reference_data.management.commands.utils.gencode_utils import load_gencode_records
 from reference_data.models import GeneInfo, TranscriptInfo, GENOME_VERSION_GRCh37, GENOME_VERSION_GRCh38
 
 logger = logging.getLogger(__name__)
 
-GENCODE_GTF_URL = "http://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_{gencode_release}/gencode.v{gencode_release}.annotation.gtf.gz"
-GENCODE_LIFT37_GTF_URL = "http://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_{gencode_release}/GRCh37_mapping/gencode.v{gencode_release}lift37.annotation.gtf.gz"
-
-# expected GTF file header
-GENCODE_FILE_HEADER = [
-    'chrom', 'source', 'feature_type', 'start', 'end', 'score', 'strand', 'phase', 'info'
-]
-
 
 class Command(BaseCommand):
     help = "Loads the GRCh37 and/or GRCh38 versions of the Gencode GTF from a particular Gencode release"
@@ -38,35 +26,6 @@ def handle(self, *args, **options):
             reset=options['reset'])
 
 
-def _get_valid_gencode_gtf_paths(gencode_release, gencode_gtf_path, genome_version):
-    if gencode_gtf_path and genome_version and os.path.isfile(gencode_gtf_path):
-        if gencode_release == 19 and genome_version != GENOME_VERSION_GRCh37:
-            raise CommandError("Invalid genome_version: {}. gencode v19 only has a GRCh37 version".format(genome_version))
-        elif gencode_release <= 22 and genome_version != GENOME_VERSION_GRCh38:
-            raise CommandError("Invalid genome_version: {}. gencode v20, v21, v22 only have a GRCh38 version".format(genome_version))
-        elif genome_version != GENOME_VERSION_GRCh38 and "lift" not in gencode_gtf_path.lower():
-            raise CommandError("Invalid genome_version for file: {}. gencode v23 and up must have 'lift' in the filename or genome_version arg must be GRCh38".format(gencode_gtf_path))
-
-        gencode_gtf_paths = {genome_version: gencode_gtf_path}
-    elif gencode_gtf_path and not genome_version:
-        raise CommandError("The genome version must also be specified after the gencode GTF file path")
-    else:
-        if gencode_release == 19:
-            urls = [('37', GENCODE_GTF_URL.format(gencode_release=gencode_release))]
-        elif gencode_release <= 22:
-            urls = [('38', GENCODE_GTF_URL.format(gencode_release=gencode_release))]
-        else:
-            urls = [
-                ('37', GENCODE_LIFT37_GTF_URL.format(gencode_release=gencode_release)),
-                ('38', GENCODE_GTF_URL.format(gencode_release=gencode_release)),
-            ]
-        gencode_gtf_paths = {}
-        for genome_version, url in urls:
-            local_filename = download_file(url)
-            gencode_gtf_paths.update({genome_version: local_filename})
-    return gencode_gtf_paths
-
-
 def update_gencode(gencode_release, gencode_gtf_path=None, genome_version=None, reset=False):
     """Update GeneInfo and TranscriptInfo tables.
 
@@ -78,8 +37,6 @@ def update_gencode(gencode_release, gencode_gtf_path=None, genome_version=None,
             Setting this to False can be useful to sequentially load more than one gencode release so that data in the
             tables represents the union of multiple gencode releases.
     """
-    gencode_gtf_paths = _get_valid_gencode_gtf_paths(gencode_release, gencode_gtf_path, genome_version)
-
     if reset:
         logger.info("Dropping the {} existing TranscriptInfo entries".format(TranscriptInfo.objects.count()))
         TranscriptInfo.objects.all().delete()
@@ -91,17 +48,8 @@ def update_gencode(gencode_release, gencode_gtf_path=None, genome_version=None,
         transcript.transcript_id for transcript in TranscriptInfo.objects.all().only('transcript_id')
     }
 
-    counters = collections.defaultdict(int)
-    new_genes = collections.defaultdict(dict)
-    new_transcripts = collections.defaultdict(dict)
-
-    for genome_version, gencode_gtf_path in gencode_gtf_paths.items():
-        logger.info("Loading {} (genome version: {})".format(gencode_gtf_path, genome_version))
-        with gzip.open(gencode_gtf_path, 'rt') as gencode_file:
-            for i, line in enumerate(tqdm(gencode_file, unit=' gencode records')):
-                _parse_line(
-                    line, i, new_genes, new_transcripts, existing_gene_ids, existing_transcript_ids, counters,
-                    genome_version, gencode_release)
+    new_genes, new_transcripts, counters = load_gencode_records(
+        gencode_release, gencode_gtf_path, genome_version, existing_gene_ids, existing_transcript_ids)
 
     logger.info('Creating {} GeneInfo records'.format(len(new_genes)))
     counters["genes_created"] = len(new_genes)
@@ -118,88 +66,3 @@ def update_gencode(gencode_release, gencode_gtf_path=None, genome_version=None,
     logger.info("Stats: ")
     for k, v in counters.items():
         logger.info("  %s: %s" % (k, v))
-
-def _parse_line(line, i, new_genes, new_transcripts,  existing_gene_ids, existing_transcript_ids, counters, genome_version, gencode_release):
-    line = line.rstrip('\r\n')
-    if not line or line.startswith('#'):
-        return
-    fields = line.split('\t')
-
-    if len(fields) != len(GENCODE_FILE_HEADER):
-        raise ValueError("Unexpected number of fields on line #%s: %s" % (i, fields))
-
-    record = dict(zip(GENCODE_FILE_HEADER, fields))
-
-    if record['feature_type'] not in ('gene', 'transcript', 'CDS'):
-        return
-
-    # parse info field
-    info_fields = [x.strip().split() for x in record['info'].split(';') if x != '']
-    info_dict = {}
-    for k, v in info_fields:
-        v = v.strip('"')
-        if k == 'tag':
-            if k not in info_dict:
-                info_dict[k] = []
-            info_dict[k].append(v)
-        else:
-            info_dict[k] = v
-    record.update(info_dict)
-
-    record['gene_id'] = record['gene_id'].split('.')[0]
-    if 'transcript_id' in record:
-        record['transcript_id'] = record['transcript_id'].split('.')[0]
-    record['chrom'] = record['chrom'].replace("chr", "").upper()
-    record['start'] = int(record['start'])
-    record['end'] = int(record['end'])
-
-    if len(record["chrom"]) > 2:
-        return  # skip super-contigs
-
-    if record['feature_type'] == 'gene':
-        if record["gene_id"] in existing_gene_ids:
-            counters["genes_skipped"] += 1
-            return
-
-        new_genes[record['gene_id']].update({
-            "gene_id": record["gene_id"],
-            "gene_symbol": record["gene_name"],
-
-            "chrom_grch{}".format(genome_version): record["chrom"],
-            "start_grch{}".format(genome_version): record["start"],
-            "end_grch{}".format(genome_version): record["end"],
-            "strand_grch{}".format(genome_version): record["strand"],
-
-            "gencode_gene_type": record["gene_type"],
-            "gencode_release": int(gencode_release),
-        })
-
-    elif record['feature_type'] == 'transcript':
-        if record["transcript_id"] in existing_transcript_ids:
-            counters["transcripts_skipped"] += 1
-            return
-
-        new_transcripts[record['transcript_id']].update({
-            "gene_id": record["gene_id"],
-            "transcript_id": record["transcript_id"],
-            "chrom_grch{}".format(genome_version): record["chrom"],
-            "start_grch{}".format(genome_version): record["start"],
-            "end_grch{}".format(genome_version): record["end"],
-            "strand_grch{}".format(genome_version): record["strand"],
-        })
-        if 'MANE_Select' in record.get('tag', []):
-            new_transcripts[record['transcript_id']]['is_mane_select'] = True
-
-    elif record['feature_type'] == 'CDS':
-        if record["transcript_id"] in existing_transcript_ids:
-            return
-
-        coding_region_size_field_name = "coding_region_size_grch{}".format(genome_version)
-        # add + 1 because GTF has 1-based coords. (https://useast.ensembl.org/info/website/upload/gff.html)
-        transcript_size = record["end"] - record["start"] + 1
-        transcript_size += new_transcripts[record['transcript_id']].get(coding_region_size_field_name, 0)
-        new_transcripts[record['transcript_id']][coding_region_size_field_name] = transcript_size
-
-        if record['gene_id'] not in existing_gene_ids and \
-                transcript_size > new_genes[record['gene_id']].get(coding_region_size_field_name, 0):
-            new_genes[record['gene_id']][coding_region_size_field_name] = transcript_size
\ No newline at end of file
diff --git a/reference_data/management/commands/utils/gencode_utils.py b/reference_data/management/commands/utils/gencode_utils.py
new file mode 100644
index 0000000000..a0fbefa54d
--- /dev/null
+++ b/reference_data/management/commands/utils/gencode_utils.py
@@ -0,0 +1,156 @@
+import collections
+import gzip
+import logging
+import os
+from tqdm import tqdm
+
+from django.core.management.base import CommandError
+
+from reference_data.management.commands.utils.download_utils import download_file
+from reference_data.models import GENOME_VERSION_GRCh37, GENOME_VERSION_GRCh38
+
+logger = logging.getLogger(__name__)
+
+LATEST_GENCODE_RELEASE = 31
+OLD_GENCODE_RELEASES = [29, 28, 27, 19]
+
+GENCODE_GTF_URL = "http://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_{gencode_release}/gencode.v{gencode_release}.annotation.gtf.gz"
+GENCODE_LIFT37_GTF_URL = "http://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_{gencode_release}/GRCh37_mapping/gencode.v{gencode_release}lift37.annotation.gtf.gz"
+
+# expected GTF file header
+GENCODE_FILE_HEADER = [
+    'chrom', 'source', 'feature_type', 'start', 'end', 'score', 'strand', 'phase', 'info'
+]
+
+
+def _get_valid_gencode_gtf_paths(gencode_release, gencode_gtf_path, genome_version):
+    if gencode_gtf_path and genome_version and os.path.isfile(gencode_gtf_path):
+        if gencode_release == 19 and genome_version != GENOME_VERSION_GRCh37:
+            raise CommandError("Invalid genome_version: {}. gencode v19 only has a GRCh37 version".format(genome_version))
+        elif gencode_release <= 22 and genome_version != GENOME_VERSION_GRCh38:
+            raise CommandError("Invalid genome_version: {}. gencode v20, v21, v22 only have a GRCh38 version".format(genome_version))
+        elif genome_version != GENOME_VERSION_GRCh38 and "lift" not in gencode_gtf_path.lower():
+            raise CommandError("Invalid genome_version for file: {}. gencode v23 and up must have 'lift' in the filename or genome_version arg must be GRCh38".format(gencode_gtf_path))
+
+        gencode_gtf_paths = {genome_version: gencode_gtf_path}
+    elif gencode_gtf_path and not genome_version:
+        raise CommandError("The genome version must also be specified after the gencode GTF file path")
+    else:
+        if gencode_release == 19:
+            urls = [('37', GENCODE_GTF_URL.format(gencode_release=gencode_release))]
+        elif gencode_release <= 22:
+            urls = [('38', GENCODE_GTF_URL.format(gencode_release=gencode_release))]
+        else:
+            urls = [
+                ('37', GENCODE_LIFT37_GTF_URL.format(gencode_release=gencode_release)),
+                ('38', GENCODE_GTF_URL.format(gencode_release=gencode_release)),
+            ]
+        gencode_gtf_paths = {}
+        for genome_version, url in urls:
+            local_filename = download_file(url)
+            gencode_gtf_paths.update({genome_version: local_filename})
+    return gencode_gtf_paths
+
+
+def load_gencode_records(gencode_release, gencode_gtf_path=None, genome_version=None, existing_gene_ids=None, existing_transcript_ids=None):
+    gencode_gtf_paths = _get_valid_gencode_gtf_paths(gencode_release, gencode_gtf_path, genome_version)
+
+    counters = collections.defaultdict(int)
+    new_genes = collections.defaultdict(dict)
+    new_transcripts = collections.defaultdict(dict)
+
+    for genome_version, gencode_gtf_path in gencode_gtf_paths.items():
+        logger.info("Loading {} (genome version: {})".format(gencode_gtf_path, genome_version))
+        with gzip.open(gencode_gtf_path, 'rt') as gencode_file:
+            for i, line in enumerate(tqdm(gencode_file, unit=' gencode records')):
+                _parse_line(
+                    line, i, new_genes, new_transcripts, existing_gene_ids, existing_transcript_ids, counters,
+                    genome_version, gencode_release)
+
+    return new_genes, new_transcripts, counters
+
+
+def _parse_line(line, i, new_genes, new_transcripts,  existing_gene_ids, existing_transcript_ids, counters, genome_version, gencode_release):
+    line = line.rstrip('\r\n')
+    if not line or line.startswith('#'):
+        return
+    fields = line.split('\t')
+
+    if len(fields) != len(GENCODE_FILE_HEADER):
+        raise ValueError("Unexpected number of fields on line #%s: %s" % (i, fields))
+
+    record = dict(zip(GENCODE_FILE_HEADER, fields))
+
+    if record['feature_type'] not in ('gene', 'transcript', 'CDS'):
+        return
+
+    # parse info field
+    info_fields = [x.strip().split() for x in record['info'].split(';') if x != '']
+    info_dict = {}
+    for k, v in info_fields:
+        v = v.strip('"')
+        if k == 'tag':
+            if k not in info_dict:
+                info_dict[k] = []
+            info_dict[k].append(v)
+        else:
+            info_dict[k] = v
+    record.update(info_dict)
+
+    record['gene_id'] = record['gene_id'].split('.')[0]
+    if 'transcript_id' in record:
+        record['transcript_id'] = record['transcript_id'].split('.')[0]
+    record['chrom'] = record['chrom'].replace("chr", "").upper()
+    record['start'] = int(record['start'])
+    record['end'] = int(record['end'])
+
+    if len(record["chrom"]) > 2:
+        return  # skip super-contigs
+
+    if record['feature_type'] == 'gene':
+        if record["gene_id"] in existing_gene_ids:
+            counters["genes_skipped"] += 1
+            return
+
+        new_genes[record['gene_id']].update({
+            "gene_id": record["gene_id"],
+            "gene_symbol": record["gene_name"],
+
+            "chrom_grch{}".format(genome_version): record["chrom"],
+            "start_grch{}".format(genome_version): record["start"],
+            "end_grch{}".format(genome_version): record["end"],
+            "strand_grch{}".format(genome_version): record["strand"],
+
+            "gencode_gene_type": record["gene_type"],
+            "gencode_release": int(gencode_release),
+        })
+
+    elif record['feature_type'] == 'transcript':
+        if record["transcript_id"] in existing_transcript_ids:
+            counters["transcripts_skipped"] += 1
+            return
+
+        new_transcripts[record['transcript_id']].update({
+            "gene_id": record["gene_id"],
+            "transcript_id": record["transcript_id"],
+            "chrom_grch{}".format(genome_version): record["chrom"],
+            "start_grch{}".format(genome_version): record["start"],
+            "end_grch{}".format(genome_version): record["end"],
+            "strand_grch{}".format(genome_version): record["strand"],
+        })
+        if 'MANE_Select' in record.get('tag', []):
+            new_transcripts[record['transcript_id']]['is_mane_select'] = True
+
+    elif record['feature_type'] == 'CDS':
+        if record["transcript_id"] in existing_transcript_ids:
+            return
+
+        coding_region_size_field_name = "coding_region_size_grch{}".format(genome_version)
+        # add + 1 because GTF has 1-based coords. (https://useast.ensembl.org/info/website/upload/gff.html)
+        transcript_size = record["end"] - record["start"] + 1
+        transcript_size += new_transcripts[record['transcript_id']].get(coding_region_size_field_name, 0)
+        new_transcripts[record['transcript_id']][coding_region_size_field_name] = transcript_size
+
+        if record['gene_id'] not in existing_gene_ids and \
+                transcript_size > new_genes[record['gene_id']].get(coding_region_size_field_name, 0):
+            new_genes[record['gene_id']][coding_region_size_field_name] = transcript_size
\ No newline at end of file
diff --git a/reference_data/management/tests/update_gencode_tests.py b/reference_data/management/tests/update_gencode_tests.py
index 0b42be2b8a..a115984ddd 100644
--- a/reference_data/management/tests/update_gencode_tests.py
+++ b/reference_data/management/tests/update_gencode_tests.py
@@ -97,7 +97,7 @@ def test_update_gencode_command_arguments(self, mock_isfile):
         mock_isfile.assert_called_with('mock_path/tmp2.gz')
         self.assertEqual(str(ce.exception), "Invalid genome_version for file: mock_path/tmp2.gz. gencode v23 and up must have 'lift' in the filename or genome_version arg must be GRCh38")
 
-    @mock.patch('reference_data.management.commands.update_gencode.logger')
+    @mock.patch('reference_data.management.commands.utils.gencode_utils.logger')
     def test_update_gencode_command_bad_gtf_data(self, mock_logger):
         # Test wrong number data feilds in a line
         temp_bad_file_path = os.path.join(self.test_dir, 'bad.gencode.v23lift37.annotation.gtf.gz')
@@ -148,13 +148,13 @@ def test_update_gencode_command_url_generation(self, mock_tempfile, mock_logger)
         self.assertEqual(responses.calls[0].request.url, url_23_lift)
         self.assertEqual(responses.calls[2].request.url, url_23)
 
+    @mock.patch('reference_data.management.commands.utils.gencode_utils.logger')
     @mock.patch('reference_data.management.commands.update_gencode.logger')
-    def test_update_gencode_command(self, mock_logger):
+    def test_update_gencode_command(self, mock_logger, mock_utils_logger):
         # Test normal command function
         call_command('update_gencode', '--gencode-release=31', self.temp_file_path, '37')
+        mock_utils_logger.info.assert_called_with('Loading {} (genome version: 37)'.format(self.temp_file_path))
         calls = [
-            mock.call(
-                'Loading {} (genome version: 37)'.format(self.temp_file_path)),
             mock.call('Creating 1 GeneInfo records'),
             mock.call('Creating 2 TranscriptInfo records'),
             mock.call('Done'),
@@ -190,11 +190,10 @@ def test_update_gencode_command(self, mock_logger):
         # Test normal command function with a --reset option
         mock_logger.reset_mock()
         call_command('update_gencode', '--reset', '--gencode-release=31', self.temp_file_path, '37')
+        mock_utils_logger.info.assert_called_with('Loading {} (genome version: 37)'.format(self.temp_file_path))
         calls = [
             mock.call('Dropping the 2 existing TranscriptInfo entries'),
             mock.call('Dropping the 50 existing GeneInfo entries'),
-            mock.call(
-                'Loading {} (genome version: 37)'.format(self.temp_file_path)),
             mock.call('Creating 2 GeneInfo records'),
             mock.call('Creating 2 TranscriptInfo records'),
             mock.call('Done'),
@@ -202,7 +201,7 @@ def test_update_gencode_command(self, mock_logger):
             mock.call('  genes_created: 2'),
             mock.call('  transcripts_created: 2')
         ]
-        mock_logger.info.assert_has_calls(calls)
+        # mock_logger.info.assert_has_calls(calls)
 
         self.assertEqual(GeneInfo.objects.all().count(), 2)
         gene_info = GeneInfo.objects.get(gene_id = 'ENSG00000223972')

From d8e0fc4ccb3c7d6a0daf4a27589582b8aaae96f0 Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Mon, 31 Oct 2022 17:49:56 -0400
Subject: [PATCH 41/96] add manage command for updating latest transcripts

---
 .../management/commands/update_gencode.py     |  8 ++-----
 .../commands/update_gencode_transcripts.py    | 21 +++++++++++++++++++
 .../commands/utils/gencode_utils.py           | 15 ++++++++++---
 .../management/tests/update_gencode_tests.py  | 16 +++++++++-----
 4 files changed, 46 insertions(+), 14 deletions(-)
 create mode 100644 reference_data/management/commands/update_gencode_transcripts.py

diff --git a/reference_data/management/commands/update_gencode.py b/reference_data/management/commands/update_gencode.py
index d040903c20..58b49f2750 100644
--- a/reference_data/management/commands/update_gencode.py
+++ b/reference_data/management/commands/update_gencode.py
@@ -2,7 +2,7 @@
 
 from django.core.management.base import BaseCommand
 
-from reference_data.management.commands.utils.gencode_utils import load_gencode_records
+from reference_data.management.commands.utils.gencode_utils import load_gencode_records, create_transcript_info
 from reference_data.models import GeneInfo, TranscriptInfo, GENOME_VERSION_GRCh37, GENOME_VERSION_GRCh38
 
 logger = logging.getLogger(__name__)
@@ -54,13 +54,9 @@ def update_gencode(gencode_release, gencode_gtf_path=None, genome_version=None,
     logger.info('Creating {} GeneInfo records'.format(len(new_genes)))
     counters["genes_created"] = len(new_genes)
     GeneInfo.objects.bulk_create([GeneInfo(**record) for record in new_genes.values()])
-    gene_id_to_gene_info = {g.gene_id: g for g in GeneInfo.objects.all().only('gene_id')}
 
-    logger.info('Creating {} TranscriptInfo records'.format(len(new_transcripts)))
     counters["transcripts_created"] = len(new_transcripts)
-    TranscriptInfo.objects.bulk_create([
-        TranscriptInfo(gene=gene_id_to_gene_info[record.pop('gene_id')], **record) for record in new_transcripts.values()
-    ], batch_size=50000)
+    create_transcript_info(new_transcripts)
 
     logger.info("Done")
     logger.info("Stats: ")
diff --git a/reference_data/management/commands/update_gencode_transcripts.py b/reference_data/management/commands/update_gencode_transcripts.py
new file mode 100644
index 0000000000..d7d48d89d1
--- /dev/null
+++ b/reference_data/management/commands/update_gencode_transcripts.py
@@ -0,0 +1,21 @@
+import logging
+
+from django.core.management.base import BaseCommand
+
+from reference_data.management.commands.utils.gencode_utils import load_gencode_records, create_transcript_info, \
+    LATEST_GENCODE_RELEASE
+from reference_data.models import TranscriptInfo
+
+logger = logging.getLogger(__name__)
+
+
+class Command(BaseCommand):
+    help = "Reloads just the Gencode transcripts from the latest Gencode release"
+
+    def handle(self, *args, **options):
+        transcripts = TranscriptInfo.objects.filter(gene__gencode_release=LATEST_GENCODE_RELEASE)
+        logger.info("Dropping the {} existing TranscriptInfo entries".format(transcripts.count()))
+        transcripts.delete()
+
+        _, new_transcripts, _ = load_gencode_records(LATEST_GENCODE_RELEASE)
+        create_transcript_info(new_transcripts)
diff --git a/reference_data/management/commands/utils/gencode_utils.py b/reference_data/management/commands/utils/gencode_utils.py
index a0fbefa54d..3466941b07 100644
--- a/reference_data/management/commands/utils/gencode_utils.py
+++ b/reference_data/management/commands/utils/gencode_utils.py
@@ -7,7 +7,7 @@
 from django.core.management.base import CommandError
 
 from reference_data.management.commands.utils.download_utils import download_file
-from reference_data.models import GENOME_VERSION_GRCh37, GENOME_VERSION_GRCh38
+from reference_data.models import GeneInfo, TranscriptInfo, GENOME_VERSION_GRCh37, GENOME_VERSION_GRCh38
 
 logger = logging.getLogger(__name__)
 
@@ -64,12 +64,21 @@ def load_gencode_records(gencode_release, gencode_gtf_path=None, genome_version=
         with gzip.open(gencode_gtf_path, 'rt') as gencode_file:
             for i, line in enumerate(tqdm(gencode_file, unit=' gencode records')):
                 _parse_line(
-                    line, i, new_genes, new_transcripts, existing_gene_ids, existing_transcript_ids, counters,
-                    genome_version, gencode_release)
+                    line, i, new_genes, new_transcripts, existing_gene_ids or set(), existing_transcript_ids or set(),
+                    counters, genome_version, gencode_release)
 
     return new_genes, new_transcripts, counters
 
 
+def create_transcript_info(new_transcripts):
+    gene_id_to_gene_info = {g.gene_id: g for g in GeneInfo.objects.order_by('gencode_release').only('gene_id')}
+    logger.info('Creating {} TranscriptInfo records'.format(len(new_transcripts)))
+    TranscriptInfo.objects.bulk_create([
+        TranscriptInfo(gene=gene_id_to_gene_info[record.pop('gene_id')], **record) for record in
+        new_transcripts.values()
+    ], batch_size=50000)
+
+
 def _parse_line(line, i, new_genes, new_transcripts,  existing_gene_ids, existing_transcript_ids, counters, genome_version, gencode_release):
     line = line.rstrip('\r\n')
     if not line or line.startswith('#'):
diff --git a/reference_data/management/tests/update_gencode_tests.py b/reference_data/management/tests/update_gencode_tests.py
index a115984ddd..e87f9e1680 100644
--- a/reference_data/management/tests/update_gencode_tests.py
+++ b/reference_data/management/tests/update_gencode_tests.py
@@ -153,10 +153,12 @@ def test_update_gencode_command_url_generation(self, mock_tempfile, mock_logger)
     def test_update_gencode_command(self, mock_logger, mock_utils_logger):
         # Test normal command function
         call_command('update_gencode', '--gencode-release=31', self.temp_file_path, '37')
-        mock_utils_logger.info.assert_called_with('Loading {} (genome version: 37)'.format(self.temp_file_path))
+        mock_utils_logger.info.assert_has_calls([
+            mock.call('Loading {} (genome version: 37)'.format(self.temp_file_path)),
+            mock.call('Creating 2 TranscriptInfo records'),
+        ])
         calls = [
             mock.call('Creating 1 GeneInfo records'),
-            mock.call('Creating 2 TranscriptInfo records'),
             mock.call('Done'),
             mock.call('Stats: '),
             mock.call('  genes_skipped: 1'),
@@ -178,6 +180,7 @@ def test_update_gencode_command(self, mock_logger, mock_utils_logger):
         self.assertEqual(TranscriptInfo.objects.all().count(), 2)
         trans_info = TranscriptInfo.objects.get(transcript_id = 'ENST00000456328')
         self.assertEqual(trans_info.gene.gene_id, 'ENSG00000223972')
+        self.assertEqual(trans_info.gene.gencode_release, 27)
         self.assertFalse(trans_info.is_mane_select)
         trans_info = TranscriptInfo.objects.get(transcript_id = 'ENST00000332831')
         self.assertEqual(trans_info.start_grch37, 621059)
@@ -185,23 +188,26 @@ def test_update_gencode_command(self, mock_logger, mock_utils_logger):
         self.assertEqual(trans_info.strand_grch37, '-')
         self.assertEqual(trans_info.chrom_grch37, '1')
         self.assertEqual(trans_info.gene.gene_id, 'ENSG00000284662')
+        self.assertEqual(trans_info.gene.gencode_release, 31)
         self.assertTrue(trans_info.is_mane_select)
 
         # Test normal command function with a --reset option
         mock_logger.reset_mock()
         call_command('update_gencode', '--reset', '--gencode-release=31', self.temp_file_path, '37')
-        mock_utils_logger.info.assert_called_with('Loading {} (genome version: 37)'.format(self.temp_file_path))
+        mock_utils_logger.info.assert_has_calls([
+            mock.call('Loading {} (genome version: 37)'.format(self.temp_file_path)),
+            mock.call('Creating 2 TranscriptInfo records'),
+        ])
         calls = [
             mock.call('Dropping the 2 existing TranscriptInfo entries'),
             mock.call('Dropping the 50 existing GeneInfo entries'),
             mock.call('Creating 2 GeneInfo records'),
-            mock.call('Creating 2 TranscriptInfo records'),
             mock.call('Done'),
             mock.call('Stats: '),
             mock.call('  genes_created: 2'),
             mock.call('  transcripts_created: 2')
         ]
-        # mock_logger.info.assert_has_calls(calls)
+        mock_logger.info.assert_has_calls(calls)
 
         self.assertEqual(GeneInfo.objects.all().count(), 2)
         gene_info = GeneInfo.objects.get(gene_id = 'ENSG00000223972')

From 2e3427c9202cdee6f050d6e4b176804a0c423c47 Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Mon, 31 Oct 2022 18:04:04 -0400
Subject: [PATCH 42/96] add manage command for updating latest transcripts

---
 .../management/tests/update_gencode_tests.py  | 61 ++++++++++++++-----
 1 file changed, 47 insertions(+), 14 deletions(-)

diff --git a/reference_data/management/tests/update_gencode_tests.py b/reference_data/management/tests/update_gencode_tests.py
index e87f9e1680..80f7cdba80 100644
--- a/reference_data/management/tests/update_gencode_tests.py
+++ b/reference_data/management/tests/update_gencode_tests.py
@@ -148,9 +148,26 @@ def test_update_gencode_command_url_generation(self, mock_tempfile, mock_logger)
         self.assertEqual(responses.calls[0].request.url, url_23_lift)
         self.assertEqual(responses.calls[2].request.url, url_23)
 
+    def _has_expected_new_transcripts(self):
+        self.assertEqual(TranscriptInfo.objects.all().count(), 2)
+        trans_info = TranscriptInfo.objects.get(transcript_id='ENST00000456328')
+        self.assertEqual(trans_info.gene.gene_id, 'ENSG00000223972')
+        self.assertEqual(trans_info.gene.gencode_release, 27)
+        self.assertFalse(trans_info.is_mane_select)
+        trans_info = TranscriptInfo.objects.get(transcript_id='ENST00000332831')
+        self.assertEqual(trans_info.start_grch37, 621059)
+        self.assertEqual(trans_info.end_grch37, 622053)
+        self.assertEqual(trans_info.strand_grch37, '-')
+        self.assertEqual(trans_info.chrom_grch37, '1')
+        self.assertEqual(trans_info.gene.gene_id, 'ENSG00000284662')
+        self.assertEqual(trans_info.gene.gencode_release, 31)
+        self.assertTrue(trans_info.is_mane_select)
+
+    @responses.activate
     @mock.patch('reference_data.management.commands.utils.gencode_utils.logger')
+    @mock.patch('reference_data.management.commands.update_gencode_transcripts.logger')
     @mock.patch('reference_data.management.commands.update_gencode.logger')
-    def test_update_gencode_command(self, mock_logger, mock_utils_logger):
+    def test_update_gencode_command(self, mock_logger, mock_update_transcripts_logger, mock_utils_logger):
         # Test normal command function
         call_command('update_gencode', '--gencode-release=31', self.temp_file_path, '37')
         mock_utils_logger.info.assert_has_calls([
@@ -177,19 +194,7 @@ def test_update_gencode_command(self, mock_logger, mock_utils_logger):
         self.assertEqual(gene_info.gencode_gene_type, 'protein_coding')
         self.assertEqual(gene_info.gene_symbol, 'OR4F16')
 
-        self.assertEqual(TranscriptInfo.objects.all().count(), 2)
-        trans_info = TranscriptInfo.objects.get(transcript_id = 'ENST00000456328')
-        self.assertEqual(trans_info.gene.gene_id, 'ENSG00000223972')
-        self.assertEqual(trans_info.gene.gencode_release, 27)
-        self.assertFalse(trans_info.is_mane_select)
-        trans_info = TranscriptInfo.objects.get(transcript_id = 'ENST00000332831')
-        self.assertEqual(trans_info.start_grch37, 621059)
-        self.assertEqual(trans_info.end_grch37, 622053)
-        self.assertEqual(trans_info.strand_grch37, '-')
-        self.assertEqual(trans_info.chrom_grch37, '1')
-        self.assertEqual(trans_info.gene.gene_id, 'ENSG00000284662')
-        self.assertEqual(trans_info.gene.gencode_release, 31)
-        self.assertTrue(trans_info.is_mane_select)
+        self._has_expected_new_transcripts()
 
         # Test normal command function with a --reset option
         mock_logger.reset_mock()
@@ -222,3 +227,31 @@ def test_update_gencode_command(self, mock_logger, mock_utils_logger):
         self.assertEqual(gene_info.gene_symbol, 'OR4F16')
         self.assertEqual(gene_info.end_grch37, 622053)
         self.assertEqual(gene_info.strand_grch37, '-')
+
+        # Test only reloading transcripts
+        tmp_dir = tempfile.gettempdir()
+        mock_tempfile.gettempdir.return_value = tmp_dir
+        with open(self.temp_file_path, 'rb') as f:
+            gtf_content = f.read()
+
+        url = 'http://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_31/gencode.v31.annotation.gtf.gz'
+        responses.add(responses.HEAD, url, headers={"Content-Length": "1024"})
+        responses.add(responses.GET, url, body=gtf_content, stream=True)
+        url_lift = 'http://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_31/GRCh37_mapping/gencode.v31lift37.annotation.gtf.gz'
+        responses.add(responses.HEAD, url_lift, headers={"Content-Length": "1024"})
+        responses.add(responses.GET, url_lift, body=gtf_content, stream=True)
+
+        call_command('update_gencode_transcripts')
+
+        self.assertEqual(GeneInfo.objects.all().count(), 2)
+        self._has_expected_new_transcripts()
+        mock_utils_logger.info.assert_has_calls([
+            mock.call('Loading {} (genome version: 37)'.format(self.temp_file_path)),
+            mock.call('Creating 2 TranscriptInfo records'),
+        ])
+        mock_update_transcripts_logger.info.assert_has_calls([
+            mock.call('Dropping the 2 existing TranscriptInfo entries'),
+        ])
+
+        self.assertEqual(responses.calls[0].request.url, url_lift)
+        self.assertEqual(responses.calls[2].request.url, url)

From 2595bfe8e819e4dda6e072ba254f23ac728e8bc7 Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Mon, 31 Oct 2022 18:08:53 -0400
Subject: [PATCH 43/96] add update transcript tests

---
 reference_data/management/tests/update_gencode_tests.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/reference_data/management/tests/update_gencode_tests.py b/reference_data/management/tests/update_gencode_tests.py
index 80f7cdba80..881366246f 100644
--- a/reference_data/management/tests/update_gencode_tests.py
+++ b/reference_data/management/tests/update_gencode_tests.py
@@ -148,11 +148,11 @@ def test_update_gencode_command_url_generation(self, mock_tempfile, mock_logger)
         self.assertEqual(responses.calls[0].request.url, url_23_lift)
         self.assertEqual(responses.calls[2].request.url, url_23)
 
-    def _has_expected_new_transcripts(self):
+    def _has_expected_new_transcripts(self, expected_release=27):
         self.assertEqual(TranscriptInfo.objects.all().count(), 2)
         trans_info = TranscriptInfo.objects.get(transcript_id='ENST00000456328')
         self.assertEqual(trans_info.gene.gene_id, 'ENSG00000223972')
-        self.assertEqual(trans_info.gene.gencode_release, 27)
+        self.assertEqual(trans_info.gene.gencode_release, expected_release)
         self.assertFalse(trans_info.is_mane_select)
         trans_info = TranscriptInfo.objects.get(transcript_id='ENST00000332831')
         self.assertEqual(trans_info.start_grch37, 621059)
@@ -164,10 +164,11 @@ def _has_expected_new_transcripts(self):
         self.assertTrue(trans_info.is_mane_select)
 
     @responses.activate
+    @mock.patch('reference_data.management.commands.utils.download_utils.tempfile')
     @mock.patch('reference_data.management.commands.utils.gencode_utils.logger')
     @mock.patch('reference_data.management.commands.update_gencode_transcripts.logger')
     @mock.patch('reference_data.management.commands.update_gencode.logger')
-    def test_update_gencode_command(self, mock_logger, mock_update_transcripts_logger, mock_utils_logger):
+    def test_update_gencode_command(self, mock_logger, mock_update_transcripts_logger, mock_utils_logger, mock_tempfile):
         # Test normal command function
         call_command('update_gencode', '--gencode-release=31', self.temp_file_path, '37')
         mock_utils_logger.info.assert_has_calls([
@@ -244,7 +245,7 @@ def test_update_gencode_command(self, mock_logger, mock_update_transcripts_logge
         call_command('update_gencode_transcripts')
 
         self.assertEqual(GeneInfo.objects.all().count(), 2)
-        self._has_expected_new_transcripts()
+        self._has_expected_new_transcripts(expected_release=31)
         mock_utils_logger.info.assert_has_calls([
             mock.call('Loading {} (genome version: 37)'.format(self.temp_file_path)),
             mock.call('Creating 2 TranscriptInfo records'),

From 9060c3abad9d0b2fa90f3f32fbf7f75ff0415962 Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Mon, 31 Oct 2022 18:24:16 -0400
Subject: [PATCH 44/96] clean up

---
 reference_data/management/commands/utils/gencode_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/reference_data/management/commands/utils/gencode_utils.py b/reference_data/management/commands/utils/gencode_utils.py
index 3466941b07..3df38101dc 100644
--- a/reference_data/management/commands/utils/gencode_utils.py
+++ b/reference_data/management/commands/utils/gencode_utils.py
@@ -71,7 +71,7 @@ def load_gencode_records(gencode_release, gencode_gtf_path=None, genome_version=
 
 
 def create_transcript_info(new_transcripts):
-    gene_id_to_gene_info = {g.gene_id: g for g in GeneInfo.objects.order_by('gencode_release').only('gene_id')}
+    gene_id_to_gene_info = {g.gene_id: g for g in GeneInfo.objects.all().only('gene_id')}
     logger.info('Creating {} TranscriptInfo records'.format(len(new_transcripts)))
     TranscriptInfo.objects.bulk_create([
         TranscriptInfo(gene=gene_id_to_gene_info[record.pop('gene_id')], **record) for record in

From f02ec02a3cee8c7f9ec7f7165c78824459c53e0f Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Tue, 1 Nov 2022 10:53:31 -0400
Subject: [PATCH 45/96] update refseq manage command;

---
 .../management/commands/update_refseq.py      | 47 +++++++++++++++++++
 .../commands/utils/gencode_utils.py           | 12 ++---
 .../management/commands/utils/update_utils.py |  3 +-
 .../migrations/0021_auto_20221031_2049.py     |  2 +-
 reference_data/models.py                      |  2 +-
 5 files changed, 57 insertions(+), 9 deletions(-)
 create mode 100644 reference_data/management/commands/update_refseq.py

diff --git a/reference_data/management/commands/update_refseq.py b/reference_data/management/commands/update_refseq.py
new file mode 100644
index 0000000000..4dee3114f6
--- /dev/null
+++ b/reference_data/management/commands/update_refseq.py
@@ -0,0 +1,47 @@
+import logging
+from django.core.management.base import CommandError
+
+from reference_data.management.commands.utils.gencode_utils import GENCODE_URL_TEMPLATE, LATEST_GENCODE_RELEASE
+from reference_data.management.commands.utils.update_utils import GeneCommand, ReferenceDataHandler
+from reference_data.models import TranscriptInfo, RefseqTranscript
+
+logger = logging.getLogger(__name__)
+
+
+class RefseqReferenceDataHandler(ReferenceDataHandler):
+
+    model_cls = RefseqTranscript
+    url = GENCODE_URL_TEMPLATE.format(path='', file='.metadata.RefSeq.gz', gencode_release=LATEST_GENCODE_RELEASE)
+    gene_key = 'transcript'
+
+    def __init__(self, **kwargs):
+        if TranscriptInfo.objects.count() == 0:
+            raise CommandError("TranscriptInfo table is empty. Run './manage.py update_gencode' before running this command.")
+
+        self.transcript_id_map = {
+            t.transcript_id: t for t in TranscriptInfo.objects.all().only('transcript_id')
+        }
+
+    @staticmethod
+    def get_file_header(f):
+        return ['transcript_id', 'refseq_id', 'additional_info']
+
+    @staticmethod
+    def parse_record(record):
+        yield {
+            'transcript_id': record['transcript_id'].split('.')[0],
+            'refseq_id': record['refseq_id'],
+        }
+
+    def get_gene_for_record(self, record):
+        transcript_id = record.pop('transcript_id')
+        # only create a record for the first occurrence of a given transcript
+        transcript = self.transcript_id_map.pop(transcript_id, None)
+
+        if not transcript:
+            raise ValueError(f'Transcript "{transcript_id}" not found in the TranscriptInfo table')
+        return transcript
+
+
+class Command(GeneCommand):
+    reference_data_handler = RefseqReferenceDataHandler
diff --git a/reference_data/management/commands/utils/gencode_utils.py b/reference_data/management/commands/utils/gencode_utils.py
index 3df38101dc..1bd0d42ec0 100644
--- a/reference_data/management/commands/utils/gencode_utils.py
+++ b/reference_data/management/commands/utils/gencode_utils.py
@@ -14,8 +14,7 @@
 LATEST_GENCODE_RELEASE = 31
 OLD_GENCODE_RELEASES = [29, 28, 27, 19]
 
-GENCODE_GTF_URL = "http://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_{gencode_release}/gencode.v{gencode_release}.annotation.gtf.gz"
-GENCODE_LIFT37_GTF_URL = "http://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_{gencode_release}/GRCh37_mapping/gencode.v{gencode_release}lift37.annotation.gtf.gz"
+GENCODE_URL_TEMPLATE = 'http://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_{gencode_release}/{path}gencode.v{gencode_release}{file}'
 
 # expected GTF file header
 GENCODE_FILE_HEADER = [
@@ -36,14 +35,15 @@ def _get_valid_gencode_gtf_paths(gencode_release, gencode_gtf_path, genome_versi
     elif gencode_gtf_path and not genome_version:
         raise CommandError("The genome version must also be specified after the gencode GTF file path")
     else:
+        gtf_url = GENCODE_URL_TEMPLATE.format(path='', file='.annotation.gtf.gz', gencode_release=gencode_release)
         if gencode_release == 19:
-            urls = [('37', GENCODE_GTF_URL.format(gencode_release=gencode_release))]
+            urls = [('37', gtf_url)]
         elif gencode_release <= 22:
-            urls = [('38', GENCODE_GTF_URL.format(gencode_release=gencode_release))]
+            urls = [('38', gtf_url)]
         else:
             urls = [
-                ('37', GENCODE_LIFT37_GTF_URL.format(gencode_release=gencode_release)),
-                ('38', GENCODE_GTF_URL.format(gencode_release=gencode_release)),
+                ('37', GENCODE_URL_TEMPLATE.format(path='GRCh37_mapping/', file='lift37.annotation.gtf.gz', gencode_release=gencode_release)),
+                ('38', gtf_url),
             ]
         gencode_gtf_paths = {}
         for genome_version, url in urls:
diff --git a/reference_data/management/commands/utils/update_utils.py b/reference_data/management/commands/utils/update_utils.py
index 6f1780e16b..780f27470a 100644
--- a/reference_data/management/commands/utils/update_utils.py
+++ b/reference_data/management/commands/utils/update_utils.py
@@ -19,6 +19,7 @@ class ReferenceDataHandler(object):
     post_process_models = None
     batch_size = None
     keep_existing_records = False
+    gene_key = 'gene'
 
     def __init__(self, **kwargs):
         if GeneInfo.objects.count() == 0:
@@ -96,7 +97,7 @@ def update_records(reference_data_handler, file_path=None):
                         continue
 
                     try:
-                        record['gene'] = reference_data_handler.get_gene_for_record(record)
+                        record[reference_data_handler.gene_key] = reference_data_handler.get_gene_for_record(record)
                     except ValueError as e:
                         skip_counter += 1
                         logger.debug(e)
diff --git a/reference_data/migrations/0021_auto_20221031_2049.py b/reference_data/migrations/0021_auto_20221031_2049.py
index f1052679b2..7d2a512654 100644
--- a/reference_data/migrations/0021_auto_20221031_2049.py
+++ b/reference_data/migrations/0021_auto_20221031_2049.py
@@ -21,7 +21,7 @@ class Migration(migrations.Migration):
             fields=[
                 ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
                 ('refseq_id', models.CharField(max_length=20)),
-                ('transcript', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='reference_data.transcriptinfo')),
+                ('transcript', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, to='reference_data.transcriptinfo')),
             ],
         ),
     ]
diff --git a/reference_data/models.py b/reference_data/models.py
index b749e4b2a4..d268ff2c4f 100644
--- a/reference_data/models.py
+++ b/reference_data/models.py
@@ -129,7 +129,7 @@ class TranscriptInfo(models.Model):
 
 
 class RefseqTranscript(models.Model):
-    transcript = models.ForeignKey(TranscriptInfo, on_delete=models.CASCADE)
+    transcript = models.OneToOneField(TranscriptInfo, on_delete=models.CASCADE)
     refseq_id = models.CharField(max_length=20)
 
 

From ed4b97502242962583f0059b6ea78da60bb6d031 Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Tue, 1 Nov 2022 10:54:58 -0400
Subject: [PATCH 46/96] bump changelog

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 48373552fb..6deccafec4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,8 @@
 # _seqr_ Changes
 
 ## dev
+* Add Refseq and MANE transcript info (REQUIRES DB MIGRATION)
+  * To add new data, run the `update_gencode_transcripts` and `update_refseq` commands
 
 ## 10/13/22
 * Link MME submissions to saved variants (REQUIRES DB MIGRATION)

From 30483de5dc806d587ebb31eee59f7993ba5ebce5 Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Tue, 1 Nov 2022 10:58:47 -0400
Subject: [PATCH 47/96] add refseq to all reference data command;

---
 .../management/commands/update_all_reference_data.py   |  2 ++
 .../tests/update_all_reference_data_tests.py           | 10 +++++++---
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/reference_data/management/commands/update_all_reference_data.py b/reference_data/management/commands/update_all_reference_data.py
index e3314c2e9c..94435b27d6 100644
--- a/reference_data/management/commands/update_all_reference_data.py
+++ b/reference_data/management/commands/update_all_reference_data.py
@@ -14,6 +14,7 @@
 from reference_data.management.commands.update_gene_cn_sensitivity import CNSensitivityReferenceDataHandler
 from reference_data.management.commands.update_gencc import GenCCReferenceDataHandler
 from reference_data.management.commands.update_clingen import ClinGenReferenceDataHandler
+from reference_data.management.commands.update_refseq import RefseqReferenceDataHandler
 
 
 logger = logging.getLogger(__name__)
@@ -26,6 +27,7 @@
     ("mgi", MGIReferenceDataHandler),
     ("gencc", GenCCReferenceDataHandler),
     ("clingen", ClinGenReferenceDataHandler),
+    ("refseq", RefseqReferenceDataHandler),
     ("hpo", None),
 ])
 
diff --git a/reference_data/management/tests/update_all_reference_data_tests.py b/reference_data/management/tests/update_all_reference_data_tests.py
index 5510244401..8548b934a1 100644
--- a/reference_data/management/tests/update_all_reference_data_tests.py
+++ b/reference_data/management/tests/update_all_reference_data_tests.py
@@ -18,7 +18,7 @@ def mgi_exception():
 
 SKIP_ARGS = [
     '--skip-gencode', '--skip-dbnsfp-gene', '--skip-gene-constraint', '--skip-primate-ai', '--skip-mgi', '--skip-hpo',
-    '--skip-gene-cn-sensitivity', '--skip-gencc', '--skip-clingen',
+    '--skip-gene-cn-sensitivity', '--skip-gencc', '--skip-clingen', '--skip-refseq',
 ]
 
 class UpdateAllReferenceDataTest(TestCase):
@@ -41,6 +41,9 @@ def setUp(self):
         patcher = mock.patch('reference_data.management.commands.update_clingen.ClinGenReferenceDataHandler', lambda: 'clingen')
         patcher.start()
         self.addCleanup(patcher.stop)
+        patcher = mock.patch('reference_data.management.commands.update_refseq.RefseqReferenceDataHandler', lambda: 'refseq')
+        patcher.start()
+        self.addCleanup(patcher.stop)
 
         patcher = mock.patch('reference_data.management.commands.update_mgi.MGIReferenceDataHandler')
         patcher.start().side_effect = mgi_exception
@@ -91,7 +94,7 @@ def test_update_all_reference_data_command(self):
         self.mock_omim.assert_called_with('test_key')
         self.mock_cached_omim.assert_not_called()
 
-        self.assertEqual(self.mock_update_records.call_count, 6)
+        self.assertEqual(self.mock_update_records.call_count, 7)
         calls = [
             mock.call('omim'),
             mock.call('dbnsfp_gene'),
@@ -99,6 +102,7 @@ def test_update_all_reference_data_command(self):
             mock.call('gene_cn_sensitivity'),
             mock.call('gencc'),
             mock.call('clingen'),
+            mock.call('refseq'),
         ]
         self.mock_update_records.assert_has_calls(calls)
 
@@ -106,7 +110,7 @@ def test_update_all_reference_data_command(self):
 
         calls = [
             mock.call('Done'),
-            mock.call('Updated: gencode, omim, dbnsfp_gene, gene_constraint, gene_cn_sensitivity, gencc, clingen, hpo'),
+            mock.call('Updated: gencode, omim, dbnsfp_gene, gene_constraint, gene_cn_sensitivity, gencc, clingen, refseq, hpo'),
             mock.call('Failed to Update: primate_ai, mgi')
         ]
         self.mock_logger.info.assert_has_calls(calls)

From 645f5f76e503dd7d135101a0fb646be5f7311f08 Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Tue, 1 Nov 2022 11:33:25 -0400
Subject: [PATCH 48/96] add refseq to all reference data command;

---
 .../management/tests/update_gencode_tests.py  |  5 ++-
 .../management/tests/update_refseq_tests.py   | 23 ++++++++++
 seqr/fixtures/reference_data.json             | 43 +++++++++++++++++++
 3 files changed, 69 insertions(+), 2 deletions(-)
 create mode 100644 reference_data/management/tests/update_refseq_tests.py

diff --git a/reference_data/management/tests/update_gencode_tests.py b/reference_data/management/tests/update_gencode_tests.py
index 881366246f..1c22824111 100644
--- a/reference_data/management/tests/update_gencode_tests.py
+++ b/reference_data/management/tests/update_gencode_tests.py
@@ -149,7 +149,6 @@ def test_update_gencode_command_url_generation(self, mock_tempfile, mock_logger)
         self.assertEqual(responses.calls[2].request.url, url_23)
 
     def _has_expected_new_transcripts(self, expected_release=27):
-        self.assertEqual(TranscriptInfo.objects.all().count(), 2)
         trans_info = TranscriptInfo.objects.get(transcript_id='ENST00000456328')
         self.assertEqual(trans_info.gene.gene_id, 'ENSG00000223972')
         self.assertEqual(trans_info.gene.gencode_release, expected_release)
@@ -195,6 +194,7 @@ def test_update_gencode_command(self, mock_logger, mock_update_transcripts_logge
         self.assertEqual(gene_info.gencode_gene_type, 'protein_coding')
         self.assertEqual(gene_info.gene_symbol, 'OR4F16')
 
+        self.assertEqual(TranscriptInfo.objects.all().count(), 4)
         self._has_expected_new_transcripts()
 
         # Test normal command function with a --reset option
@@ -205,7 +205,7 @@ def test_update_gencode_command(self, mock_logger, mock_update_transcripts_logge
             mock.call('Creating 2 TranscriptInfo records'),
         ])
         calls = [
-            mock.call('Dropping the 2 existing TranscriptInfo entries'),
+            mock.call('Dropping the 4 existing TranscriptInfo entries'),
             mock.call('Dropping the 50 existing GeneInfo entries'),
             mock.call('Creating 2 GeneInfo records'),
             mock.call('Done'),
@@ -245,6 +245,7 @@ def test_update_gencode_command(self, mock_logger, mock_update_transcripts_logge
         call_command('update_gencode_transcripts')
 
         self.assertEqual(GeneInfo.objects.all().count(), 2)
+        self.assertEqual(TranscriptInfo.objects.all().count(), 2)
         self._has_expected_new_transcripts(expected_release=31)
         mock_utils_logger.info.assert_has_calls([
             mock.call('Loading {} (genome version: 37)'.format(self.temp_file_path)),
diff --git a/reference_data/management/tests/update_refseq_tests.py b/reference_data/management/tests/update_refseq_tests.py
new file mode 100644
index 0000000000..47cdfef776
--- /dev/null
+++ b/reference_data/management/tests/update_refseq_tests.py
@@ -0,0 +1,23 @@
+from reference_data.models import RefseqTranscript
+from reference_data.management.tests.test_utils import ReferenceDataCommandTestCase
+
+
+class UpdateRefseqTest(ReferenceDataCommandTestCase):
+    URL = 'http://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_31/gencode.v31.metadata.RefSeq.gz'
+    DATA = [
+        'ENST00000258436.1	NR_026874.2	\n',
+        'ENST00000258436.1	NR_122045.1	\n',
+        'ENST00000342066.8	NM_152486.3	NP_689699.2\n',
+        'ENST00000505820.7	NM_015658.4	NP_056473.3\n',
+    ]
+
+    def test_update_refseq_command(self):
+        self._test_update_command(
+            'update_refseq', 'RefseqTranscript', created_records=2, skipped_records=2)
+
+        self.assertEqual(RefseqTranscript.objects.count(), 2)
+        self.assertListEqual(
+            list(RefseqTranscript.objects.order_by('transcript_id').values('transcript__transcript_id', 'refseq_id')), [
+                {'transcript__transcript_id': 'ENST00000258436', 'refseq_id': 'NR_026874.2'},
+                {'transcript__transcript_id': 'ENST00000505820', 'refseq_id': 'NM_015658.4'}
+            ])
diff --git a/seqr/fixtures/reference_data.json b/seqr/fixtures/reference_data.json
index c95aa8a911..95136d98ce 100644
--- a/seqr/fixtures/reference_data.json
+++ b/seqr/fixtures/reference_data.json
@@ -978,6 +978,49 @@
         "gencode_gene_type": "antisense_RNA",
         "gencode_release": 27
     }
+}, {
+    "model": "reference_data.transcriptinfo",
+    "pk": 1,
+    "fields": {
+        "gene_id": 48,
+        "transcript_id": "ENST00000258436",
+        "is_mane_select": true,
+        "chrom_grch37": "1",
+        "start_grch37": 696291,
+        "end_grch37": 697369,
+        "strand_grch37": "+",
+        "coding_region_size_grch37": 0,
+        "chrom_grch38": "1",
+        "start_grch38": 760911,
+        "end_grch38": 761989,
+        "strand_grch38": "+",
+        "coding_region_size_grch38": 0
+    }
+}, {
+    "model": "reference_data.transcriptinfo",
+    "pk": 2,
+    "fields": {
+        "gene_id": 6,
+        "transcript_id": "ENST00000505820",
+        "is_mane_select": false,
+        "chrom_grch37": "1",
+        "start_grch37": 696291,
+        "end_grch37": 697369,
+        "strand_grch37": "+",
+        "coding_region_size_grch37": 0,
+        "chrom_grch38": "1",
+        "start_grch38": 760911,
+        "end_grch38": 761989,
+        "strand_grch38": "+",
+        "coding_region_size_grch38": 0
+    }
+}, {
+    "model": "reference_data.refseqtranscript",
+    "pk": 1,
+    "fields": {
+        "transcript_id": 1,
+        "refseq_id": "NM_017900.2"
+    }
 },
 {
     "model": "reference_data.omim",

From 0c2eae42b41b439b42ca0106ae948149e7a566a4 Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Tue, 1 Nov 2022 14:14:57 -0400
Subject: [PATCH 49/96] return transcripts to variant response

---
 reference_data/models.py              |  3 +++
 seqr/views/utils/orm_to_json_utils.py |  2 +-
 seqr/views/utils/variant_utils.py     | 29 +++++++++++++++++++--------
 3 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/reference_data/models.py b/reference_data/models.py
index d268ff2c4f..ce221425aa 100644
--- a/reference_data/models.py
+++ b/reference_data/models.py
@@ -127,6 +127,9 @@ class TranscriptInfo(models.Model):
     strand_grch38 = models.CharField(max_length=1, null=True, blank=True)
     coding_region_size_grch38 = models.IntegerField(default=0)  # number of protein-coding bases (= 0 for non-coding genes)
 
+    class Meta:
+        json_fields = ['transcript_id', 'is_mane_select']
+
 
 class RefseqTranscript(models.Model):
     transcript = models.OneToOneField(TranscriptInfo, on_delete=models.CASCADE)
diff --git a/seqr/views/utils/orm_to_json_utils.py b/seqr/views/utils/orm_to_json_utils.py
index 0a549b56c3..f5ab626949 100644
--- a/seqr/views/utils/orm_to_json_utils.py
+++ b/seqr/views/utils/orm_to_json_utils.py
@@ -72,7 +72,7 @@ def _get_json_for_models(models, nested_fields=None, user=None, is_analyst=None,
             if not field_value:
                 field_value = model
                 for field in nested_field['fields']:
-                    field_value = getattr(field_value, field) if field_value else None
+                    field_value = getattr(field_value, field, None) if field_value else None
 
             result[nested_field.get('key', _to_camel_case('_'.join(nested_field['fields'])))] = field_value
 
diff --git a/seqr/views/utils/variant_utils.py b/seqr/views/utils/variant_utils.py
index 182730abfc..797c00e18d 100644
--- a/seqr/views/utils/variant_utils.py
+++ b/seqr/views/utils/variant_utils.py
@@ -4,6 +4,7 @@
 import redis
 
 from matchmaker.models import MatchmakerSubmissionGenes, MatchmakerSubmission
+from reference_data.models import TranscriptInfo
 from seqr.models import SavedVariant, VariantSearchResults, Family, LocusList, LocusListInterval, LocusListGene, \
     RnaSeqOutlier, RnaSeqTpm
 from seqr.utils.elasticsearch.utils import get_es_variants_for_variant_ids
@@ -79,19 +80,30 @@ def get_variant_key(xpos=None, ref=None, alt=None, genomeVersion=None, **kwargs)
     return '{}-{}-{}_{}'.format(xpos, ref, alt, genomeVersion)
 
 
-def _saved_variant_genes(variants):
+def _saved_variant_genes_transcripts(variants):
     gene_ids = set()
+    transcript_ids = set()
     for variant in variants:
-        if isinstance(variant, list):
-            for compound_het in variant:
-                gene_ids.update(list(compound_het.get('transcripts', {}).keys()))
-        else:
-            gene_ids.update(list(variant.get('transcripts', {}).keys()))
+        if not isinstance(variant, list):
+            variant = [variant]
+        for var in variant:
+            for gene_id, transcripts in var.get('transcripts', {}).items():
+                gene_ids.add(gene_id)
+                transcript_ids.update([t['transcriptId'] for t in transcripts if t.get('transcriptId')])
+
     genes = get_genes_for_variants(gene_ids)
     for gene in genes.values():
         if gene:
             gene['locusListGuids'] = []
-    return genes
+
+    transcripts = {
+        t['transcriptId']: t for t in _get_json_for_models(
+            TranscriptInfo.objects.filter(transcript_id__in=transcript_ids),
+            nested_fields=[{'fields': ('refseqtranscript', 'refseq_id'), 'key': 'refseqId'}]
+        )
+    }
+
+    return genes, transcripts
 
 
 def _add_locus_lists(projects, genes, add_list_detail=False, user=None, is_analyst=None):
@@ -180,7 +192,8 @@ def get_variants_response(request, saved_variants, response_variants=None, add_a
         discovery_tags, discovery_response = get_json_for_discovery_tags(response['savedVariantsByGuid'].values(), request.user)
         response.update(discovery_response)
 
-    genes = _saved_variant_genes(variants)
+    genes, transcripts = _saved_variant_genes_transcripts(variants)
+    response['transcriptsById'] = transcripts
     response['locusListsByGuid'] = _add_locus_lists(
         projects, genes, add_list_detail=add_locus_list_detail, user=request.user, is_analyst=is_analyst)
 

From efa92a836ebda28ec33bb7ead55e7f03c49c4698 Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Tue, 1 Nov 2022 14:27:03 -0400
Subject: [PATCH 50/96] update unit tests

---
 reference_data/management/tests/update_refseq_tests.py | 4 ++--
 seqr/fixtures/reference_data.json                      | 4 ++--
 seqr/views/apis/saved_variant_api_tests.py             | 7 ++++++-
 seqr/views/apis/variant_search_api_tests.py            | 2 ++
 4 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/reference_data/management/tests/update_refseq_tests.py b/reference_data/management/tests/update_refseq_tests.py
index 47cdfef776..c99d14a1bd 100644
--- a/reference_data/management/tests/update_refseq_tests.py
+++ b/reference_data/management/tests/update_refseq_tests.py
@@ -8,7 +8,7 @@ class UpdateRefseqTest(ReferenceDataCommandTestCase):
         'ENST00000258436.1	NR_026874.2	\n',
         'ENST00000258436.1	NR_122045.1	\n',
         'ENST00000342066.8	NM_152486.3	NP_689699.2\n',
-        'ENST00000505820.7	NM_015658.4	NP_056473.3\n',
+        'ENST00000624735.7	NM_015658.4	NP_056473.3\n',
     ]
 
     def test_update_refseq_command(self):
@@ -19,5 +19,5 @@ def test_update_refseq_command(self):
         self.assertListEqual(
             list(RefseqTranscript.objects.order_by('transcript_id').values('transcript__transcript_id', 'refseq_id')), [
                 {'transcript__transcript_id': 'ENST00000258436', 'refseq_id': 'NR_026874.2'},
-                {'transcript__transcript_id': 'ENST00000505820', 'refseq_id': 'NM_015658.4'}
+                {'transcript__transcript_id': 'ENST00000624735', 'refseq_id': 'NM_015658.4'}
             ])
diff --git a/seqr/fixtures/reference_data.json b/seqr/fixtures/reference_data.json
index 95136d98ce..b1a52fbf07 100644
--- a/seqr/fixtures/reference_data.json
+++ b/seqr/fixtures/reference_data.json
@@ -1000,8 +1000,8 @@
     "model": "reference_data.transcriptinfo",
     "pk": 2,
     "fields": {
-        "gene_id": 6,
-        "transcript_id": "ENST00000505820",
+        "gene_id": 2,
+        "transcript_id": "ENST00000624735",
         "is_mane_select": false,
         "chrom_grch37": "1",
         "start_grch37": 696291,
diff --git a/seqr/views/apis/saved_variant_api_tests.py b/seqr/views/apis/saved_variant_api_tests.py
index 1cc28bc634..22a9fe7927 100644
--- a/seqr/views/apis/saved_variant_api_tests.py
+++ b/seqr/views/apis/saved_variant_api_tests.py
@@ -27,7 +27,7 @@
 
 SAVED_VARIANT_RESPONSE_KEYS = {
     'variantTagsByGuid', 'variantNotesByGuid', 'variantFunctionalDataByGuid', 'savedVariantsByGuid',
-    'genesById', 'locusListsByGuid', 'rnaSeqData', 'mmeSubmissionsByGuid',
+    'genesById', 'locusListsByGuid', 'rnaSeqData', 'mmeSubmissionsByGuid', 'transcriptsById',
 }
 
 COMPOUND_HET_3_JSON = {
@@ -162,6 +162,11 @@ def test_saved_variant_data(self):
         self.assertSetEqual(set(response_json['genesById'].keys()), {'ENSG00000135953'})
         self.assertSetEqual(set(response_json['genesById']['ENSG00000135953'].keys()), gene_fields)
 
+        self.assertDictEqual(
+            response_json['transcriptsById'],
+            {'ENST00000258436': {'isManeSelect': True, 'refseqId': 'NM_017900.2', 'transcriptId': 'ENST00000258436'}},
+        )
+
         self.assertDictEqual(response_json['rnaSeqData'], {'I000001_na19675': {
             'outliers': {
                 'ENSG00000135953': {
diff --git a/seqr/views/apis/variant_search_api_tests.py b/seqr/views/apis/variant_search_api_tests.py
index ed484dc19c..d132673fa5 100644
--- a/seqr/views/apis/variant_search_api_tests.py
+++ b/seqr/views/apis/variant_search_api_tests.py
@@ -65,6 +65,7 @@
         'SV0000002_1248367227_r0390_100': EXPECTED_SAVED_VARIANT,
     },
     'genesById': {'ENSG00000227232': expected_pa_gene, 'ENSG00000268903': EXPECTED_GENE, 'ENSG00000233653': EXPECTED_GENE},
+    'transcriptsById': {'ENST00000624735': {'isManeSelect': False, 'refseqId': None, 'transcriptId': 'ENST00000624735'}},
     'search': {
         'search': SEARCH,
         'projectFamilies': [{'projectGuid': PROJECT_GUID, 'familyGuids': mock.ANY}],
@@ -388,6 +389,7 @@ def test_query_variants(self, mock_get_variants, mock_get_gene_counts, mock_erro
             'searchedVariants': COMP_HET_VARAINTS,
             'savedVariantsByGuid': {'SV0000002_1248367227_r0390_100': EXPECTED_SAVED_VARIANT},
             'genesById': {'ENSG00000233653': EXPECTED_GENE},
+            'transcriptsById': {},
             'variantTagsByGuid': {
                 'VT1726970_2103343353_r0004_tes': EXPECTED_TAG, 'VT1726945_2103343353_r0390_100': EXPECTED_TAG,
             },

From 1d15b6cbb9ab5affd852be96b3a523dee08f7742 Mon Sep 17 00:00:00 2001
From: Shifa Zhang <zhangshifa07504@gmail.com>
Date: Tue, 1 Nov 2022 14:31:34 -0400
Subject: [PATCH 51/96] Update bulk op logs.

---
 seqr/models.py                            |  3 ++
 seqr/utils/logging_utils.py               | 17 +++---
 seqr/views/apis/data_manager_api.py       | 28 ++++------
 seqr/views/apis/data_manager_api_tests.py | 63 +++++++++++------------
 seqr/views/utils/dataset_utils.py         |  2 +-
 5 files changed, 53 insertions(+), 60 deletions(-)

diff --git a/seqr/models.py b/seqr/models.py
index b1f085c9b6..bda7a18131 100644
--- a/seqr/models.py
+++ b/seqr/models.py
@@ -1035,6 +1035,7 @@ class Meta:
 
 
 class DeletableSampleMetadataModel(models.Model, BulkOperationBase):
+    PARENT_FIELD = 'sample'
 
     sample = models.ForeignKey('Sample', on_delete=models.CASCADE, db_index=True)
     gene_id = models.CharField(max_length=20)  # ensembl ID
@@ -1071,6 +1072,8 @@ class Meta:
 
 
 class PhenotypePrioritization(models.Model, BulkOperationBase):
+    PARENT_FIELD = 'individual'
+
     individual = models.ForeignKey('Individual', on_delete=models.CASCADE, db_index=True)
     gene_id = models.CharField(max_length=20)  # ensembl ID
 
diff --git a/seqr/utils/logging_utils.py b/seqr/utils/logging_utils.py
index b696887a5c..1d1c58769e 100644
--- a/seqr/utils/logging_utils.py
+++ b/seqr/utils/logging_utils.py
@@ -1,6 +1,7 @@
 import json
 import logging
 
+from django.db.models import prefetch_related_objects
 from settings import DEPLOYMENT_TYPE
 from typing import Optional
 
@@ -89,10 +90,12 @@ def log_model_bulk_update(logger, models, user, update_type, update_fields=None)
 
 
 def log_model_no_guid_bulk_update(logger, models, user, update_type):
-    if not models:
-        return []
-    db_entity = type(models[0]).__name__
-    db_update = {
-        'dbEntity': db_entity, 'numEntities': len(models), 'updateType': 'bulk_{}'.format(update_type),
-    }
-    logger.info(f'{update_type} {db_entity}s', user, db_update=db_update)
+    if models:
+        db_entity = type(models[0]).__name__
+        prefetch_related_objects(models, models[0].PARENT_FIELD)
+        parent_ids = {getattr(model, models[0].PARENT_FIELD).guid for model in models}
+        db_update = {
+            'dbEntity': db_entity, 'numEntities': len(models), 'parentEntityIds': parent_ids,
+            'updateType': 'bulk_{}'.format(update_type),
+        }
+        logger.info(f'{update_type} {db_entity}s', user, db_update=db_update)
diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py
index 2a0366277b..33079c5e60 100644
--- a/seqr/views/apis/data_manager_api.py
+++ b/seqr/views/apis/data_manager_api.py
@@ -392,34 +392,27 @@ def load_rna_seq_sample_data(request, sample_guid):
     return create_json_response({'success': True})
 
 
-def _log_append_info(user, info, message):
-    info.append(message)
-    logger.info(message, user)
-
-
 @data_manager_required
 def load_phenotype_prioritization_data(request):
     request_json = json.loads(request.body)
 
     file_path = request_json['file']
 
-    info = []
-    _log_append_info(request.user, info, f'Loading phenotype-based prioritization data from {file_path}')
-
     try:
         tool, data_by_project_sample_id = load_phenotype_prioritization_data_file(file_path)
     except ValueError as e:
         return create_json_response({'error': str(e)}, status=400)
 
+    info = [f'Loaded {tool.upper()} data from {file_path}']
+
     all_records = []
-    to_delete = None
     error = None
     for project_name, records_by_sample in data_by_project_sample_id.items():
         projects = [p for p in Project.objects.filter(name=project_name) if is_internal_project(p)]
         if not projects or len(projects) > 1:
-            error = f'Project not found or multiple projects with the same name {project_name}'
+            error = f'Multiple projects with the same name {project_name}'\
+                if projects else f'Project ({project_name}) not found'
             break
-        _log_append_info(request.user, info, f'Parsed {tool.upper()} data for project: {project_name}')
 
         indivs = Individual.objects.filter(family__project=projects[0], individual_id__in=records_by_sample.keys())
         existing_indivs_by_id = {ind.individual_id: ind for ind in indivs}
@@ -433,22 +426,19 @@ def load_phenotype_prioritization_data(request):
                 rec['individual'] = existing_indivs_by_id[sample_id]
 
         exist_records = PhenotypePrioritization.objects.filter(tool=tool, individual__in=indivs)
-        to_delete = to_delete | exist_records if to_delete else exist_records
+        deleted, _ = PhenotypePrioritization.bulk_delete(request.user, exist_records)
 
         records = [rec for records in records_by_sample.values() for rec in records]
-        _log_append_info(request.user, info,
-                         f'Attempted loading {len(records)} records of {tool.upper()} data to project {project_name}')
+        delete_info = f'deleted {deleted} record(s), ' if deleted else ''
+        info.append(f'Project {project_name}: {delete_info}loaded {len(records)} record(s)')
         all_records += records
 
     if error:
         return create_json_response({'error': error}, status=400)
 
-    if to_delete:
-        deleted, _ = PhenotypePrioritization.bulk_delete(request.user, to_delete)
-        _log_append_info(request.user, info, f'Deleted {deleted} existing {tool.upper()} records')
+    PhenotypePrioritization.bulk_create(request.user, [PhenotypePrioritization(**data) for data in all_records])
 
-    models = PhenotypePrioritization.bulk_create(request.user, [PhenotypePrioritization(**data) for data in all_records])
-    _log_append_info(request.user, info, f'Loaded {len(models)} {tool.upper()} data records')
+    logger.info('\n'.join(info), request.user)
 
     return create_json_response({
         'info': info,
diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py
index bee13b28fe..9e60c82107 100644
--- a/seqr/views/apis/data_manager_api_tests.py
+++ b/seqr/views/apis/data_manager_api_tests.py
@@ -722,7 +722,8 @@ def mock_write(content):
                 mock_logger.info.assert_has_calls([mock.call(info_log, self.data_manager_user) for info_log in info])
                 mock_model_logger.info.assert_called_with(
                     f'delete {model_cls.__name__}s', self.data_manager_user,
-                    db_update={'dbEntity': model_cls.__name__, 'numEntities': deleted_count, 'updateType': 'bulk_delete'}
+                    db_update={'dbEntity': model_cls.__name__, 'numEntities': deleted_count,
+                               'parentEntityIds': {RNA_SAMPLE_GUID}, 'updateType': 'bulk_delete'}
                 )
                 mock_logger.warning.assert_has_calls([mock.call(warn_log, self.data_manager_user) for warn_log in warnings])
 
@@ -776,7 +777,8 @@ def test_load_rna_seq_sample_data(self, mock_model_logger, mock_logger, mock_ope
                 mock_logger.info.assert_called_with('Loading outlier data for NA19675_D2', self.data_manager_user)
                 mock_model_logger.info.assert_called_with(
                     f'create {model_cls.__name__}s', self.data_manager_user, db_update={
-                        'dbEntity': model_cls.__name__, 'numEntities': 2, 'updateType': 'bulk_create',
+                        'dbEntity': model_cls.__name__, 'numEntities': 2, 'parentEntityIds': {RNA_SAMPLE_GUID},
+                        'updateType': 'bulk_create',
                     }
                 )
 
@@ -797,7 +799,6 @@ def test_load_phenotype_prioritization_data(self, mock_model_logger, mock_logger
         response = self.client.post(url, content_type='application/json', data=json.dumps({'file': 'lirical_data.tsv.gz'}))
         self.assertEqual(response.status_code, 400)
         self.assertEqual(response.json()['error'], 'Invalid file: missing column(s) project, diseaseId')
-        mock_logger.info.assert_called_with('Loading phenotype-based prioritization data from lirical_data.tsv.gz', self.data_manager_user)
         mock_file_iter.assert_called_with('lirical_data.tsv.gz')
 
         mock_file_iter.return_value = self._join_data(PHENOTYPE_PRIORITIZATION_HEADER + LIRICAL_NO_PROJECT_DATA)
@@ -813,46 +814,33 @@ def test_load_phenotype_prioritization_data(self, mock_model_logger, mock_logger
         mock_file_iter.return_value = self._join_data(PHENOTYPE_PRIORITIZATION_HEADER + LIRICAL_PROJECT_NOT_EXIST_DATA)
         response = self.client.post(url, content_type='application/json', data=json.dumps({'file': 'lirical_data.tsv.gz'}))
         self.assertEqual(response.status_code, 400)
-        self.assertEqual(response.json()['error'], 'Project not found or multiple projects with the same name CMG_Beggs_WGS')
+        self.assertEqual(response.json()['error'], 'Project (CMG_Beggs_WGS) not found')
 
-        project = Project.objects.get(name='Empty Project')
-        project.name = '1kg project nåme with uniçøde'
-        project.save()
+        project = Project.objects.create(created_by=self.data_manager_user,
+                                         name='1kg project nåme with uniçøde', workspace_namespace='my-seqr-billing')
         mock_file_iter.return_value = self._join_data(PHENOTYPE_PRIORITIZATION_HEADER + LIRICAL_DATA)
         response = self.client.post(url, content_type='application/json', data=json.dumps({'file': 'lirical_data.tsv.gz'}))
         self.assertEqual(response.status_code, 400)
-        self.assertEqual(response.json()['error'], 'Project not found or multiple projects with the same name 1kg project nåme with uniçøde')
-        project.name = 'Empty Project'
-        project.save()
+        self.assertEqual(response.json()['error'], 'Multiple projects with the same name 1kg project nåme with uniçøde')
+        project.delete()
 
-        mock_logger.reset_mock()
         mock_file_iter.return_value = self._join_data(PHENOTYPE_PRIORITIZATION_HEADER + LIRICAL_NO_EXIST_INDV_DATA)
         response = self.client.post(url, content_type='application/json', data=json.dumps({'file': 'lirical_data.tsv.gz'}))
         self.assertEqual(response.status_code, 400)
         self.assertEqual(response.json()['error'], 'Can\'t find individuals NA19678x, NA19679x')
-        info = [
-            'Loading phenotype-based prioritization data from lirical_data.tsv.gz',
-            'Parsed LIRICAL data for project: 1kg project nåme with uniçøde'
-        ]
-        mock_logger.info.assert_has_calls([mock.call(info_log, self.data_manager_user) for info_log in info])
-        mock_model_logger.info.assert_not_called()
 
-        info = [
-            'Loading phenotype-based prioritization data from lirical_data.tsv.gz',
-            'Parsed LIRICAL data for project: 1kg project nåme with uniçøde',
-            'Attempted loading 1 records of LIRICAL data to project 1kg project nåme with uniçøde',
-            'Parsed LIRICAL data for project: Test Reprocessed Project',
-            'Attempted loading 1 records of LIRICAL data to project Test Reprocessed Project',
-        ]
-
-        mock_logger.reset_mock()
         mock_file_iter.return_value = self._join_data(PHENOTYPE_PRIORITIZATION_HEADER + LIRICAL_DATA)
         response = self.client.post(url, content_type='application/json', data=json.dumps({'file': 'lirical_data.tsv.gz'}))
         self.assertEqual(response.status_code, 200)
-        add_only_info = info + ['Loaded 2 LIRICAL data records']
-        self.assertEqual(response.json()['info'], add_only_info)
-        mock_logger.info.assert_has_calls([mock.call(info_log, self.data_manager_user) for info_log in add_only_info])
-        db_update = {'dbEntity': 'PhenotypePrioritization', 'numEntities': 2, 'updateType': 'bulk_create'}
+        info = [
+            'Loaded LIRICAL data from lirical_data.tsv.gz',
+            'Project 1kg project nåme with uniçøde: loaded 1 record(s)',
+            'Project Test Reprocessed Project: loaded 1 record(s)'
+        ]
+        self.assertEqual(response.json()['info'], info)
+        mock_logger.info.assert_called_with('\n'.join(info), self.data_manager_user)
+        db_update = {'dbEntity': 'PhenotypePrioritization', 'numEntities': 2,
+                     'parentEntityIds': {'I000002_na19678', 'I000015_na20885'}, 'updateType': 'bulk_create'}
         mock_model_logger.info.assert_called_with('create PhenotypePrioritizations', self.data_manager_user, db_update=db_update)
 
         mock_logger.reset_mock()
@@ -860,12 +848,21 @@ def test_load_phenotype_prioritization_data(self, mock_model_logger, mock_logger
         mock_file_iter.return_value = self._join_data(PHENOTYPE_PRIORITIZATION_HEADER + LIRICAL_DATA)
         response = self.client.post(url, content_type='application/json', data=json.dumps({'file': 'lirical_data.tsv.gz'}))
         self.assertEqual(response.status_code, 200)
-        info += ['Deleted 2 existing LIRICAL records', 'Loaded 2 LIRICAL data records']
+        info = [
+            'Loaded LIRICAL data from lirical_data.tsv.gz',
+            'Project 1kg project nåme with uniçøde: deleted 1 record(s), loaded 1 record(s)',
+            'Project Test Reprocessed Project: deleted 1 record(s), loaded 1 record(s)',
+        ]
         self.assertEqual(response.json()['info'], info)
-        mock_logger.info.assert_has_calls([mock.call(info_log, self.data_manager_user) for info_log in info])
+        mock_logger.info.assert_called_with('\n'.join(info), self.data_manager_user)
         mock_model_logger.info.assert_has_calls([
             mock.call('delete PhenotypePrioritizations', self.data_manager_user, db_update={
-                'dbEntity': 'PhenotypePrioritization', 'numEntities': 2, 'updateType': 'bulk_delete',
+                'dbEntity': 'PhenotypePrioritization', 'numEntities': 1,
+                'parentEntityIds': {'I000002_na19678'}, 'updateType': 'bulk_delete',
+            }),
+            mock.call('delete PhenotypePrioritizations', self.data_manager_user, db_update={
+                'dbEntity': 'PhenotypePrioritization', 'numEntities': 1,
+                'parentEntityIds': {'I000015_na20885'}, 'updateType': 'bulk_delete',
             }),
             mock.call('create PhenotypePrioritizations', self.data_manager_user, db_update=db_update),
         ])
diff --git a/seqr/views/utils/dataset_utils.py b/seqr/views/utils/dataset_utils.py
index 0537417a68..f5a54c0335 100644
--- a/seqr/views/utils/dataset_utils.py
+++ b/seqr/views/utils/dataset_utils.py
@@ -419,7 +419,7 @@ def _load_rna_seq(model_cls, file_path, user, mapping_file, ignore_extra_samples
     individual_db_ids = {s.individual_id for s in samples}
     to_delete = model_cls.objects.filter(sample__individual_id__in=individual_db_ids).exclude(sample__data_source=data_source)
     if to_delete:
-        model_cls.bulk_delete(user, to_delete, parent='sample')
+        model_cls.bulk_delete(user, to_delete)
 
     loaded_sample_ids = set(model_cls.objects.filter(sample__in=samples).values_list('sample_id', flat=True).distinct())
     samples_to_load = {

From b8f77deb010e71e0cf85b246e644be2d078c2021 Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Tue, 1 Nov 2022 14:42:29 -0400
Subject: [PATCH 52/96] show mane transcript

---
 ui/redux/rootReducer.js                       |  1 +
 ui/redux/selectors.js                         |  1 +
 .../components/panel/variants/Transcripts.jsx | 44 ++++++++++++-------
 3 files changed, 29 insertions(+), 17 deletions(-)

diff --git a/ui/redux/rootReducer.js b/ui/redux/rootReducer.js
index 2cdf7923a8..839e44cb92 100644
--- a/ui/redux/rootReducer.js
+++ b/ui/redux/rootReducer.js
@@ -347,6 +347,7 @@ const rootReducer = combineReducers({
   searchGeneBreakdownLoading: loadingReducer(REQUEST_SEARCH_GENE_BREAKDOWN, RECEIVE_SEARCH_GENE_BREAKDOWN),
   savedSearchesByGuid: createObjectsByIdReducer(RECEIVE_SAVED_SEARCHES, 'savedSearchesByGuid'),
   savedSearchesLoading: loadingReducer(REQUEST_SAVED_SEARCHES, RECEIVE_SAVED_SEARCHES),
+  transcriptsById: createObjectsByIdReducer(RECEIVE_DATA, 'transcriptsById'),
   user: createSingleObjectReducer(UPDATE_USER),
   newUser: zeroActionsReducer,
   userOptionsByUsername: createSingleValueReducer(RECEIVE_USER_OPTIONS, {}),
diff --git a/ui/redux/selectors.js b/ui/redux/selectors.js
index 0c2e8b6729..f54f69b5fe 100644
--- a/ui/redux/selectors.js
+++ b/ui/redux/selectors.js
@@ -25,6 +25,7 @@ export const getMmeSubmissionsByGuid = state => state.mmeSubmissionsByGuid
 export const getMmeResultsByGuid = state => state.mmeResultsByGuid
 export const getGenesById = state => state.genesById
 export const getGenesIsLoading = state => state.genesLoading.isLoading
+export const getTranscriptsById = state => state.transcriptsById
 export const getHpoTermsByParent = state => state.hpoTermsByParent
 export const getHpoTermsIsLoading = state => state.hpoTermsLoading.isLoading
 export const getLocusListsByGuid = state => state.locusListsByGuid
diff --git a/ui/shared/components/panel/variants/Transcripts.jsx b/ui/shared/components/panel/variants/Transcripts.jsx
index 8f6a39db11..7177e12cc8 100644
--- a/ui/shared/components/panel/variants/Transcripts.jsx
+++ b/ui/shared/components/panel/variants/Transcripts.jsx
@@ -4,7 +4,7 @@ import styled from 'styled-components'
 import { connect } from 'react-redux'
 import { Label, Header, Table, Segment } from 'semantic-ui-react'
 
-import { getGenesById } from 'redux/selectors'
+import { getGenesById, getTranscriptsById } from 'redux/selectors'
 import { updateVariantMainTranscript } from 'redux/rootReducer'
 import { VerticalSpacer } from '../../Spacers'
 import DispatchRequestButton from '../../buttons/DispatchRequestButton'
@@ -22,7 +22,25 @@ const AnnotationLabel = styled.small`
   padding-right: 10px;
 `
 
-const Transcripts = React.memo(({ variant, genesById, updateMainTranscript }) => (
+const TRANSCRIPT_LABELS = [
+  {
+    content: 'Canonical',
+    color: 'green',
+    shouldShow: transcript => transcript.canonical,
+  },
+  {
+    content: 'MANE Select',
+    color: 'teal',
+    shouldShow: (transcript, transcriptsById) => transcriptsById[transcript.transcriptId]?.isManeSelect,
+  },
+  {
+    content: 'seqr Chosen Transcript',
+    color: 'blue',
+    shouldShow: transcript => transcript.transcriptRank === 0,
+  },
+]
+
+const Transcripts = React.memo(({ variant, genesById, transcriptsById, updateMainTranscript }) => (
   variant.transcripts && Object.entries(variant.transcripts).sort((transcriptsA, transcriptsB) => (
     Math.min(...transcriptsA[1].map(t => t.transcriptRank)) - Math.min(...transcriptsB[1].map(t => t.transcriptRank))
   )).map(([geneId, geneTranscripts]) => (
@@ -40,23 +58,13 @@ const Transcripts = React.memo(({ variant, genesById, updateMainTranscript }) =>
               <Table.Row key={transcript.transcriptId}>
                 <Table.Cell width={3}>
                   <TranscriptLink variant={variant} transcript={transcript} />
+                  {/* TODO show refseq ID */}
                   <div>
-                    {
-                      transcript.transcriptRank === 0 && (
-                        <span>
-                          <VerticalSpacer height={5} />
-                          <Label content="seqr Chosen Transcript" color="blue" size="small" />
-                        </span>
-                      )
-                    }
-                    {
-                      transcript.canonical && (
-                        <span>
-                          <VerticalSpacer height={5} />
-                          <Label content="Canonical Transcript" color="green" size="small" />
-                        </span>
+                    {TRANSCRIPT_LABELS.map(({ shouldShow, ...labelProps }) => (
+                      shouldShow(transcript, transcriptsById) && (
+                        <Label key={labelProps.content} size="small" horizontal {...labelProps} />
                       )
-                    }
+                    ))}
                     {
                       variant.variantGuid && (
                         <span>
@@ -119,11 +127,13 @@ const Transcripts = React.memo(({ variant, genesById, updateMainTranscript }) =>
 Transcripts.propTypes = {
   variant: PropTypes.object.isRequired,
   genesById: PropTypes.object.isRequired,
+  transcriptsById: PropTypes.object.isRequired,
   updateMainTranscript: PropTypes.func.isRequired,
 }
 
 const mapStateToProps = state => ({
   genesById: getGenesById(state),
+  transcriptsById: getTranscriptsById(state),
 })
 
 const mapDispatchToProps = (dispatch, ownProps) => ({

From 242b1c19966cb8d107c77b7487da51232a8505c5 Mon Sep 17 00:00:00 2001
From: Shifa Zhang <zhangshifa07504@gmail.com>
Date: Wed, 2 Nov 2022 09:38:24 -0400
Subject: [PATCH 53/96] Update the logs and tests.

---
 seqr/views/apis/data_manager_api.py       | 11 +++--
 seqr/views/apis/data_manager_api_tests.py | 52 ++++++++++++++++++-----
 seqr/views/utils/dataset_utils.py         |  2 +-
 3 files changed, 51 insertions(+), 14 deletions(-)

diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py
index 33079c5e60..bd3c397181 100644
--- a/seqr/views/apis/data_manager_api.py
+++ b/seqr/views/apis/data_manager_api.py
@@ -406,6 +406,7 @@ def load_phenotype_prioritization_data(request):
     info = [f'Loaded {tool.upper()} data from {file_path}']
 
     all_records = []
+    to_delete = PhenotypePrioritization.objects.none()
     error = None
     for project_name, records_by_sample in data_by_project_sample_id.items():
         projects = [p for p in Project.objects.filter(name=project_name) if is_internal_project(p)]
@@ -426,16 +427,20 @@ def load_phenotype_prioritization_data(request):
                 rec['individual'] = existing_indivs_by_id[sample_id]
 
         exist_records = PhenotypePrioritization.objects.filter(tool=tool, individual__in=indivs)
-        deleted, _ = PhenotypePrioritization.bulk_delete(request.user, exist_records)
-
         records = [rec for records in records_by_sample.values() for rec in records]
-        delete_info = f'deleted {deleted} record(s), ' if deleted else ''
+
+        delete_info = f'deleted {len(exist_records)} record(s), ' if exist_records else ''
         info.append(f'Project {project_name}: {delete_info}loaded {len(records)} record(s)')
+
+        to_delete |= exist_records
         all_records += records
 
     if error:
         return create_json_response({'error': error}, status=400)
 
+    if to_delete:
+        PhenotypePrioritization.bulk_delete(request.user, to_delete)
+
     PhenotypePrioritization.bulk_create(request.user, [PhenotypePrioritization(**data) for data in all_records])
 
     logger.info('\n'.join(info), request.user)
diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py
index 9e60c82107..2a00b3778f 100644
--- a/seqr/views/apis/data_manager_api_tests.py
+++ b/seqr/views/apis/data_manager_api_tests.py
@@ -7,9 +7,9 @@
 
 from seqr.views.apis.data_manager_api import elasticsearch_status, upload_qc_pipeline_output, delete_index, \
     update_rna_seq, load_rna_seq_sample_data, load_phenotype_prioritization_data
-from seqr.views.utils.orm_to_json_utils import get_json_for_rna_seq_outliers
+from seqr.views.utils.orm_to_json_utils import get_json_for_rna_seq_outliers, _get_json_for_models
 from seqr.views.utils.test_utils import AuthenticationTestCase, urllib3_responses, AnvilAuthenticationTestCase
-from seqr.models import Individual, RnaSeqOutlier, RnaSeqTpm, Sample, Project
+from seqr.models import Individual, RnaSeqOutlier, RnaSeqTpm, Sample, Project, PhenotypePrioritization
 
 
 PROJECT_GUID = 'R0001_1kg'
@@ -310,6 +310,32 @@
      'Rapid-onset dystonia-parkinsonism', 'exomiser_score', '0.977923765', 'phenotype_score', '0.551578222',
      'variant_score', '1']
 ]
+UPDATE_LIRICAL_DATA = [
+    ['lirical', '1kg project nåme with uniçøde', 'NA19678', '3', 'ENSG00000105357', 'OMIM:618460',
+     'Khan-Khan-Katsanis syndrome', 'post_test_probability', '0', 'compositeLR', '0.066'],
+    ['lirical', '1kg project nåme with uniçøde', 'NA19678', '4', 'ENSG00000105357', 'OMIM:219800',
+     '"Cystinosis, nephropathic"', 'post_test_probability', '0', 'compositeLR', '0.003', '', ''],
+]
+
+EXPECTED_LIRICAL_DATA = [
+    {'diseaseId': 'OMIM:618460', 'geneId': 'ENSG00000105357', 'diseaseName': 'Khan-Khan-Katsanis syndrome',
+     'scores': {'compositeLR': 0.066, 'postTestProbability': 0.0},
+     'tool': 'lirical', 'rank': 1, 'individualGuid': 'I000002_na19678'},
+    {'diseaseId': 'OMIM:219800', 'geneId': 'ENSG00000105357', 'diseaseName': 'Cystinosis, nephropathic',
+     'scores': {'compositeLR': 0.003, 'postTestProbability': 0.0},
+     'tool': 'lirical', 'rank': 2, 'individualGuid': 'I000015_na20885'}
+]
+EXPECTED_UPDATED_LIRICAL_DATA = [
+    {'diseaseId': 'OMIM:219800', 'geneId': 'ENSG00000105357', 'diseaseName': 'Cystinosis, nephropathic',
+     'scores': {'compositeLR': 0.003, 'postTestProbability': 0.0},
+     'tool': 'lirical', 'rank': 2, 'individualGuid': 'I000015_na20885'},
+    {'diseaseId': 'OMIM:618460', 'geneId': 'ENSG00000105357', 'diseaseName': 'Khan-Khan-Katsanis syndrome',
+     'scores': {'compositeLR': 0.066, 'postTestProbability': 0.0},
+     'tool': 'lirical', 'rank': 3, 'individualGuid': 'I000002_na19678'},
+    {'diseaseId': 'OMIM:219800', 'geneId': 'ENSG00000105357', 'diseaseName': 'Cystinosis, nephropathic',
+     'scores': {'compositeLR': 0.003, 'postTestProbability': 0.0},
+     'tool': 'lirical', 'rank': 4, 'individualGuid': 'I000002_na19678'},
+]
 
 
 class DataManagerAPITest(object):
@@ -829,6 +855,7 @@ def test_load_phenotype_prioritization_data(self, mock_model_logger, mock_logger
         self.assertEqual(response.status_code, 400)
         self.assertEqual(response.json()['error'], 'Can\'t find individuals NA19678x, NA19679x')
 
+        # Test a successful operation
         mock_file_iter.return_value = self._join_data(PHENOTYPE_PRIORITIZATION_HEADER + LIRICAL_DATA)
         response = self.client.post(url, content_type='application/json', data=json.dumps({'file': 'lirical_data.tsv.gz'}))
         self.assertEqual(response.status_code, 200)
@@ -842,16 +869,19 @@ def test_load_phenotype_prioritization_data(self, mock_model_logger, mock_logger
         db_update = {'dbEntity': 'PhenotypePrioritization', 'numEntities': 2,
                      'parentEntityIds': {'I000002_na19678', 'I000015_na20885'}, 'updateType': 'bulk_create'}
         mock_model_logger.info.assert_called_with('create PhenotypePrioritizations', self.data_manager_user, db_update=db_update)
+        saved_data = _get_json_for_models(PhenotypePrioritization.objects.filter(),
+                                          nested_fields=[{'fields': ('individual', 'guid'), 'key': 'individualGuid'}])
+        self.assertListEqual(saved_data, EXPECTED_LIRICAL_DATA)
 
+        # Test uploading new data
         mock_logger.reset_mock()
         mock_model_logger.reset_mock()
-        mock_file_iter.return_value = self._join_data(PHENOTYPE_PRIORITIZATION_HEADER + LIRICAL_DATA)
+        mock_file_iter.return_value = self._join_data(PHENOTYPE_PRIORITIZATION_HEADER + UPDATE_LIRICAL_DATA)
         response = self.client.post(url, content_type='application/json', data=json.dumps({'file': 'lirical_data.tsv.gz'}))
         self.assertEqual(response.status_code, 200)
         info = [
             'Loaded LIRICAL data from lirical_data.tsv.gz',
-            'Project 1kg project nåme with uniçøde: deleted 1 record(s), loaded 1 record(s)',
-            'Project Test Reprocessed Project: deleted 1 record(s), loaded 1 record(s)',
+            'Project 1kg project nåme with uniçøde: deleted 1 record(s), loaded 2 record(s)'
         ]
         self.assertEqual(response.json()['info'], info)
         mock_logger.info.assert_called_with('\n'.join(info), self.data_manager_user)
@@ -860,12 +890,14 @@ def test_load_phenotype_prioritization_data(self, mock_model_logger, mock_logger
                 'dbEntity': 'PhenotypePrioritization', 'numEntities': 1,
                 'parentEntityIds': {'I000002_na19678'}, 'updateType': 'bulk_delete',
             }),
-            mock.call('delete PhenotypePrioritizations', self.data_manager_user, db_update={
-                'dbEntity': 'PhenotypePrioritization', 'numEntities': 1,
-                'parentEntityIds': {'I000015_na20885'}, 'updateType': 'bulk_delete',
-            }),
-            mock.call('create PhenotypePrioritizations', self.data_manager_user, db_update=db_update),
+            mock.call('create PhenotypePrioritizations', self.data_manager_user,
+                      db_update={'dbEntity': 'PhenotypePrioritization', 'numEntities': 2,
+                     'parentEntityIds': {'I000002_na19678'}, 'updateType': 'bulk_create'}),
         ])
+        self.maxDiff = None
+        saved_data = _get_json_for_models(PhenotypePrioritization.objects.filter(),
+                                          nested_fields=[{'fields': ('individual', 'guid'), 'key': 'individualGuid'}])
+        self.assertListEqual(saved_data, EXPECTED_UPDATED_LIRICAL_DATA)
 
 
 # Tests for AnVIL access disabled
diff --git a/seqr/views/utils/dataset_utils.py b/seqr/views/utils/dataset_utils.py
index f5a54c0335..da458360af 100644
--- a/seqr/views/utils/dataset_utils.py
+++ b/seqr/views/utils/dataset_utils.py
@@ -449,7 +449,7 @@ def _load_rna_seq(model_cls, file_path, user, mapping_file, ignore_extra_samples
 
 PHENOTYPE_PRIORITIZATION_HEADER = ['tool', 'project', 'sampleId', 'rank', 'geneId', 'diseaseId', 'diseaseName']
 PHENOTYPE_PRIORITIZATION_REQUIRED_HEADER = PHENOTYPE_PRIORITIZATION_HEADER + ['scoreName1', 'score1']
-MAX_SCORES = 100
+MAX_SCORES = 16
 
 
 def _parse_phenotype_pri_row(row):

From bff082440d88c247ce076309cc24491a4b755315 Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Wed, 2 Nov 2022 15:42:53 -0400
Subject: [PATCH 54/96] show refseq

---
 .../components/panel/variants/Transcripts.jsx     | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/ui/shared/components/panel/variants/Transcripts.jsx b/ui/shared/components/panel/variants/Transcripts.jsx
index 7177e12cc8..c4374661f7 100644
--- a/ui/shared/components/panel/variants/Transcripts.jsx
+++ b/ui/shared/components/panel/variants/Transcripts.jsx
@@ -22,6 +22,8 @@ const AnnotationLabel = styled.small`
   padding-right: 10px;
 `
 
+const HeaderLabel = AnnotationLabel.withComponent('span')
+
 const TRANSCRIPT_LABELS = [
   {
     content: 'Canonical',
@@ -58,7 +60,18 @@ const Transcripts = React.memo(({ variant, genesById, transcriptsById, updateMai
               <Table.Row key={transcript.transcriptId}>
                 <Table.Cell width={3}>
                   <TranscriptLink variant={variant} transcript={transcript} />
-                  {/* TODO show refseq ID */}
+                  {transcriptsById[transcript.transcriptId]?.refseqId && (
+                    <div>
+                      <HeaderLabel>RefSeq:</HeaderLabel>
+                      <a
+                        href={`https://www.ncbi.nlm.nih.gov/nuccore/${transcriptsById[transcript.transcriptId].refseqId}`}
+                        target="_blank"
+                        rel="noreferrer"
+                      >
+                        {transcriptsById[transcript.transcriptId].refseqId}
+                      </a>
+                    </div>
+                  )}
                   <div>
                     {TRANSCRIPT_LABELS.map(({ shouldShow, ...labelProps }) => (
                       shouldShow(transcript, transcriptsById) && (

From 4b3b8bfa91c873696cfb65a3d133471e9998ae99 Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Wed, 2 Nov 2022 16:31:50 -0400
Subject: [PATCH 55/96] fix unit tests

---
 seqr/views/apis/summary_data_api_tests.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/seqr/views/apis/summary_data_api_tests.py b/seqr/views/apis/summary_data_api_tests.py
index b02346fdd5..e448bda955 100644
--- a/seqr/views/apis/summary_data_api_tests.py
+++ b/seqr/views/apis/summary_data_api_tests.py
@@ -25,7 +25,7 @@
 SAVED_VARIANT_RESPONSE_KEYS = {
     'projectsByGuid', 'locusListsByGuid', 'savedVariantsByGuid', 'variantFunctionalDataByGuid', 'genesById',
     'variantNotesByGuid', 'individualsByGuid', 'variantTagsByGuid', 'familiesByGuid', 'familyNotesByGuid',
-    'mmeSubmissionsByGuid',
+    'mmeSubmissionsByGuid', 'transcriptsById',
 }
 
 

From e4d918a1e019d5ea87be8848113f5602390d4a83 Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Wed, 2 Nov 2022 16:39:46 -0400
Subject: [PATCH 56/96] codacy fixes

---
 reference_data/management/commands/utils/gencode_utils.py | 4 +++-
 reference_data/management/tests/update_gencode_tests.py   | 6 +++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/reference_data/management/commands/utils/gencode_utils.py b/reference_data/management/commands/utils/gencode_utils.py
index 1bd0d42ec0..4b6bd8ca90 100644
--- a/reference_data/management/commands/utils/gencode_utils.py
+++ b/reference_data/management/commands/utils/gencode_utils.py
@@ -29,7 +29,9 @@ def _get_valid_gencode_gtf_paths(gencode_release, gencode_gtf_path, genome_versi
         elif gencode_release <= 22 and genome_version != GENOME_VERSION_GRCh38:
             raise CommandError("Invalid genome_version: {}. gencode v20, v21, v22 only have a GRCh38 version".format(genome_version))
         elif genome_version != GENOME_VERSION_GRCh38 and "lift" not in gencode_gtf_path.lower():
-            raise CommandError("Invalid genome_version for file: {}. gencode v23 and up must have 'lift' in the filename or genome_version arg must be GRCh38".format(gencode_gtf_path))
+            raise CommandError(
+                f"Invalid genome_version for file: {gencode_gtf_path}. gencode v23 and up must have 'lift' in the "
+                "filename or genome_version arg must be GRCh38")
 
         gencode_gtf_paths = {genome_version: gencode_gtf_path}
     elif gencode_gtf_path and not genome_version:
diff --git a/reference_data/management/tests/update_gencode_tests.py b/reference_data/management/tests/update_gencode_tests.py
index 1c22824111..735ffa39af 100644
--- a/reference_data/management/tests/update_gencode_tests.py
+++ b/reference_data/management/tests/update_gencode_tests.py
@@ -26,7 +26,11 @@
     'chr1	HAVANA	exon	11869	12227	.	+	.	gene_id "ENSG00000223972.5_2"; transcript_id "ENST00000456328.2_1"; gene_type "transcribed_unprocessed_pseudogene"; gene_name "DDX11L1"; transcript_type "lncRNA"; transcript_name "DDX11L1-202"; exon_number 1; exon_id "ENSE00002234944.1_1"; level 2; transcript_support_level 1; hgnc_id "HGNC:37102"; tag "basic"; havana_gene "OTTHUMG00000000961.2_2"; havana_transcript "OTTHUMT00000362751.1_1"; remap_original_location "chr1:+:11869-12227"; remap_status "full_contig";\n',
     # Not existing gene_id
     'chr1	HAVANA	gene	621059	622053	.	-	.	gene_id "ENSG00000284662.1_2"; gene_type "protein_coding"; gene_name "OR4F16"; level 2; hgnc_id "HGNC:15079"; havana_gene "OTTHUMG00000002581.3_2"; remap_status "full_contig"; remap_num_mappings 1; remap_target_status "overlap";\n',
-    'chr1	HAVANA	transcript	621059	622053	.	-	.	gene_id "ENSG00000284662.1_2"; transcript_id "ENST00000332831.4_2"; gene_type "protein_coding"; gene_name "OR4F16"; transcript_type "protein_coding"; transcript_name "OR4F16-201"; level 2; protein_id "ENSP00000329982.2"; transcript_support_level "NA"; hgnc_id "HGNC:15079"; tag "basic"; tag "MANE_Select"; tag "CCDS"; ccdsid "CCDS41221.1"; havana_gene "OTTHUMG00000002581.3_2"; havana_transcript "OTTHUMT00000007334.3_2"; remap_num_mappings 1; remap_status "full_contig"; remap_target_status "overlap";\n',
+    'chr1	HAVANA	transcript	621059	622053	.	-	.	gene_id "ENSG00000284662.1_2"; transcript_id "ENST00000332831.4_2"; '
+    'gene_type "protein_coding"; gene_name "OR4F16"; transcript_type "protein_coding"; transcript_name "OR4F16-201"; level 2; '
+    'protein_id "ENSP00000329982.2"; transcript_support_level "NA"; hgnc_id "HGNC:15079"; tag "basic"; tag "MANE_Select"; tag "CCDS"; '
+    'ccdsid "CCDS41221.1"; havana_gene "OTTHUMG00000002581.3_2"; havana_transcript "OTTHUMT00000007334.3_2"; remap_num_mappings 1; '
+    'remap_status "full_contig"; remap_target_status "overlap";\n',
     # feature_type is 'CDS'
     # gene_id not in existing_gene_ids and transcript_size > ...
     'chr1	HAVANA	CDS	621099	622034	.	-	0	gene_id "ENSG00000284662.1_2"; transcript_id "ENST00000332831.4_2"; gene_type "protein_coding"; gene_name "OR4F16"; transcript_type "protein_coding"; transcript_name "OR4F16-201"; exon_number 1; exon_id "ENSE00002324228.3"; level 2; protein_id "ENSP00000329982.2"; transcript_support_level "NA"; hgnc_id "HGNC:15079"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS41221.1"; havana_gene "OTTHUMG00000002581.3_2"; havana_transcript "OTTHUMT00000007334.3_2"; remap_original_location "chr1:-:685719-686654"; remap_status "full_contig";\n',

From 13dcd8a319241413b0ff92913dfdad41159df744 Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Wed, 2 Nov 2022 16:47:20 -0400
Subject: [PATCH 57/96] fix codacy complexity error

---
 .../commands/utils/gencode_utils.py           | 97 +++++++++++--------
 1 file changed, 54 insertions(+), 43 deletions(-)

diff --git a/reference_data/management/commands/utils/gencode_utils.py b/reference_data/management/commands/utils/gencode_utils.py
index 4b6bd8ca90..538ec56bd4 100644
--- a/reference_data/management/commands/utils/gencode_utils.py
+++ b/reference_data/management/commands/utils/gencode_utils.py
@@ -96,24 +96,7 @@ def _parse_line(line, i, new_genes, new_transcripts,  existing_gene_ids, existin
         return
 
     # parse info field
-    info_fields = [x.strip().split() for x in record['info'].split(';') if x != '']
-    info_dict = {}
-    for k, v in info_fields:
-        v = v.strip('"')
-        if k == 'tag':
-            if k not in info_dict:
-                info_dict[k] = []
-            info_dict[k].append(v)
-        else:
-            info_dict[k] = v
-    record.update(info_dict)
-
-    record['gene_id'] = record['gene_id'].split('.')[0]
-    if 'transcript_id' in record:
-        record['transcript_id'] = record['transcript_id'].split('.')[0]
-    record['chrom'] = record['chrom'].replace("chr", "").upper()
-    record['start'] = int(record['start'])
-    record['end'] = int(record['end'])
+    _parse_record(record)
 
     if len(record["chrom"]) > 2:
         return  # skip super-contigs
@@ -123,35 +106,13 @@ def _parse_line(line, i, new_genes, new_transcripts,  existing_gene_ids, existin
             counters["genes_skipped"] += 1
             return
 
-        new_genes[record['gene_id']].update({
-            "gene_id": record["gene_id"],
-            "gene_symbol": record["gene_name"],
-
-            "chrom_grch{}".format(genome_version): record["chrom"],
-            "start_grch{}".format(genome_version): record["start"],
-            "end_grch{}".format(genome_version): record["end"],
-            "strand_grch{}".format(genome_version): record["strand"],
-
-            "gencode_gene_type": record["gene_type"],
-            "gencode_release": int(gencode_release),
-        })
-
+        new_genes[record['gene_id']].update(_parse_gene_record(record, genome_version, gencode_release))
     elif record['feature_type'] == 'transcript':
         if record["transcript_id"] in existing_transcript_ids:
             counters["transcripts_skipped"] += 1
             return
 
-        new_transcripts[record['transcript_id']].update({
-            "gene_id": record["gene_id"],
-            "transcript_id": record["transcript_id"],
-            "chrom_grch{}".format(genome_version): record["chrom"],
-            "start_grch{}".format(genome_version): record["start"],
-            "end_grch{}".format(genome_version): record["end"],
-            "strand_grch{}".format(genome_version): record["strand"],
-        })
-        if 'MANE_Select' in record.get('tag', []):
-            new_transcripts[record['transcript_id']]['is_mane_select'] = True
-
+        new_transcripts[record['transcript_id']].update(_parse_transcript_record(record, genome_version))
     elif record['feature_type'] == 'CDS':
         if record["transcript_id"] in existing_transcript_ids:
             return
@@ -164,4 +125,54 @@ def _parse_line(line, i, new_genes, new_transcripts,  existing_gene_ids, existin
 
         if record['gene_id'] not in existing_gene_ids and \
                 transcript_size > new_genes[record['gene_id']].get(coding_region_size_field_name, 0):
-            new_genes[record['gene_id']][coding_region_size_field_name] = transcript_size
\ No newline at end of file
+            new_genes[record['gene_id']][coding_region_size_field_name] = transcript_size
+
+
+def _parse_record(record):
+    info_fields = [x.strip().split() for x in record['info'].split(';') if x != '']
+    info_dict = {}
+    for k, v in info_fields:
+        v = v.strip('"')
+        if k == 'tag':
+            if k not in info_dict:
+                info_dict[k] = []
+            info_dict[k].append(v)
+        else:
+            info_dict[k] = v
+    record.update(info_dict)
+
+    record['gene_id'] = record['gene_id'].split('.')[0]
+    if 'transcript_id' in record:
+        record['transcript_id'] = record['transcript_id'].split('.')[0]
+    record['chrom'] = record['chrom'].replace("chr", "").upper()
+    record['start'] = int(record['start'])
+    record['end'] = int(record['end'])
+
+
+def _parse_gene_record(record, genome_version, gencode_release):
+    return {
+        "gene_id": record["gene_id"],
+        "gene_symbol": record["gene_name"],
+
+        "chrom_grch{}".format(genome_version): record["chrom"],
+        "start_grch{}".format(genome_version): record["start"],
+        "end_grch{}".format(genome_version): record["end"],
+        "strand_grch{}".format(genome_version): record["strand"],
+
+        "gencode_gene_type": record["gene_type"],
+        "gencode_release": int(gencode_release),
+    }
+
+
+def _parse_transcript_record(record, genome_version):
+    transcript = {
+        "gene_id": record["gene_id"],
+        "transcript_id": record["transcript_id"],
+        "chrom_grch{}".format(genome_version): record["chrom"],
+        "start_grch{}".format(genome_version): record["start"],
+        "end_grch{}".format(genome_version): record["end"],
+        "strand_grch{}".format(genome_version): record["strand"],
+    }
+    if 'MANE_Select' in record.get('tag', []):
+        transcript['is_mane_select'] = True
+    return transcript

From dbef284c734052fb1d832010c379380419d03f81 Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Wed, 2 Nov 2022 16:56:04 -0400
Subject: [PATCH 58/96] do not overwrite project categories

---
 seqr/views/apis/project_api_tests.py  | 1 +
 seqr/views/utils/orm_to_json_utils.py | 5 ++---
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/seqr/views/apis/project_api_tests.py b/seqr/views/apis/project_api_tests.py
index 23f1e7fd25..a9dd4d2dd1 100644
--- a/seqr/views/apis/project_api_tests.py
+++ b/seqr/views/apis/project_api_tests.py
@@ -186,6 +186,7 @@ def test_project_page_data(self):
         self.assertSetEqual(set(response_json.keys()), PROJECT_PAGE_RESPONSE_KEYS)
         project_fields = set()
         project_fields.update(PROJECT_FIELDS)
+        project_fields.remove('projectCategoryGuids')
         self.assertSetEqual(set(response_json['projectsByGuid'][PROJECT_GUID].keys()), project_fields)
         self.assertEqual(
             response_json['projectsByGuid'][PROJECT_GUID]['lastAccessedDate'][:10],
diff --git a/seqr/views/utils/orm_to_json_utils.py b/seqr/views/utils/orm_to_json_utils.py
index 0a549b56c3..83eac57dd2 100644
--- a/seqr/views/utils/orm_to_json_utils.py
+++ b/seqr/views/utils/orm_to_json_utils.py
@@ -147,15 +147,14 @@ def get_json_for_projects(projects, user=None, is_analyst=None, add_project_cate
     """
     def _process_result(result, project):
         result.update({
-            'projectCategoryGuids': list(
-                project.projectcategory_set.values_list('guid', flat=True)
-            ) if add_project_category_guids_field else [],
             'isMmeEnabled': result['isMmeEnabled'] and not result['isDemo'],
             'userIsCreator': project.created_by == user,
             'isAnalystProject': is_internal_anvil_project(project),
         })
         if add_permissions:
             result['canEdit'] = has_project_permissions(project, user, can_edit=True)
+        if add_project_category_guids_field:
+            result['projectCategoryGuids'] = list(project.projectcategory_set.values_list('guid', flat=True))
 
     prefetch_related_objects(projects, 'created_by')
     if add_project_category_guids_field:

From 96a3f210b6a3f097699c42b0e2f22074052a2678 Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Wed, 2 Nov 2022 17:18:18 -0400
Subject: [PATCH 59/96] set correct precedence for search project context

---
 seqr/views/apis/variant_search_api.py       | 13 ++++++-------
 seqr/views/apis/variant_search_api_tests.py |  6 ++++--
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/seqr/views/apis/variant_search_api.py b/seqr/views/apis/variant_search_api.py
index 4d6fed6e16..d3097aea64 100644
--- a/seqr/views/apis/variant_search_api.py
+++ b/seqr/views/apis/variant_search_api.py
@@ -78,14 +78,8 @@ def _get_or_create_results_model(search_hash, search_context, user):
         if not search_context:
             raise Exception('Invalid search hash: {}'.format(search_hash))
 
-        project_families = search_context.get('projectFamilies')
         all_project_genome_version = _all_project_family_search_genome(search_context)
-        if project_families:
-            all_families = set()
-            for project_family in project_families:
-                all_families.update(project_family['familyGuids'])
-            families = Family.objects.filter(guid__in=all_families)
-        elif all_project_genome_version:
+        if all_project_genome_version:
             omit_projects = [p.guid for p in Project.objects.filter(is_demo=True).only('guid')]
             project_guids = [
                 project_guid for project_guid in get_project_guids_user_can_view(user, limit_data_manager=True)
@@ -95,6 +89,11 @@ def _get_or_create_results_model(search_hash, search_context, user):
                 project__guid__in=project_guids, project__genome_version=all_project_genome_version)
         elif search_context.get('projectGuids'):
             families = Family.objects.filter(project__guid__in=search_context['projectGuids'])
+        elif search_context.get('projectFamilies'):
+            all_families = set()
+            for project_family in search_context['projectFamilies']:
+                all_families.update(project_family['familyGuids'])
+            families = Family.objects.filter(guid__in=all_families)
         else:
             raise Exception('Invalid search: no projects/ families specified')
 
diff --git a/seqr/views/apis/variant_search_api_tests.py b/seqr/views/apis/variant_search_api_tests.py
index ed484dc19c..159b6a1311 100644
--- a/seqr/views/apis/variant_search_api_tests.py
+++ b/seqr/views/apis/variant_search_api_tests.py
@@ -480,7 +480,8 @@ def _get_variants(results_model, **kwargs):
             'F000001_1', 'F000002_2', 'F000003_3', 'F000004_4', 'F000005_5', 'F000006_6', 'F000007_7', 'F000008_8',
             'F000009_9', 'F000010_10', 'F000013_13'}
         response = self.client.post(url, content_type='application/json', data=json.dumps({
-            'allGenomeProjectFamilies': '37', 'search': SEARCH
+            'allGenomeProjectFamilies': '37', 'search': SEARCH,
+            'projectFamilies': [{'projectGuid': PROJECT_GUID, 'familyGuids': ['F000001_1']}]
         }))
         self.assertEqual(response.status_code, 200)
         response_json = response.json()
@@ -504,7 +505,8 @@ def test_query_all_project_families_variants(self, mock_get_variants):
         mock_get_variants.side_effect = _get_es_variants
 
         response = self.client.post(url, content_type='application/json', data=json.dumps({
-            'projectGuids': ['R0003_test'], 'search': SEARCH
+            'projectGuids': ['R0003_test'], 'search': SEARCH,
+            'projectFamilies': [{'projectGuid':  'R0003_test', 'familyGuids': ['F000011_11']}],
         }))
         self.assertEqual(response.status_code, 200)
         response_json = response.json()

From cfbd0c2bc23c659b9b290af3752a85372d515908 Mon Sep 17 00:00:00 2001
From: Shifa Zhang <zhangshifa07504@gmail.com>
Date: Thu, 3 Nov 2022 09:44:30 -0400
Subject: [PATCH 60/96] Update the project validation and logging.

---
 .../0048_phenotypeprioritization.py           |  4 +-
 seqr/models.py                                | 27 +++++++---
 seqr/utils/logging_utils.py                   | 13 -----
 seqr/views/apis/data_manager_api.py           | 49 ++++++++++++-------
 seqr/views/apis/data_manager_api_tests.py     | 45 ++++++++---------
 seqr/views/utils/permissions_utils.py         |  6 ---
 6 files changed, 71 insertions(+), 73 deletions(-)

diff --git a/seqr/migrations/0048_phenotypeprioritization.py b/seqr/migrations/0048_phenotypeprioritization.py
index 6333471c06..00563cbeac 100644
--- a/seqr/migrations/0048_phenotypeprioritization.py
+++ b/seqr/migrations/0048_phenotypeprioritization.py
@@ -1,8 +1,7 @@
-# Generated by Django 3.2.15 on 2022-10-14 20:38
+# Generated by Django 3.2.16 on 2022-11-02 19:38
 
 from django.db import migrations, models
 import django.db.models.deletion
-import seqr.models
 
 
 class Migration(migrations.Migration):
@@ -24,6 +23,5 @@ class Migration(migrations.Migration):
                 ('scores', models.JSONField()),
                 ('individual', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='seqr.individual')),
             ],
-            bases=(models.Model, seqr.models.BulkOperationBase),
         ),
     ]
diff --git a/seqr/models.py b/seqr/models.py
index bda7a18131..705f499f74 100644
--- a/seqr/models.py
+++ b/seqr/models.py
@@ -7,13 +7,13 @@
 from django.contrib.postgres.fields import ArrayField
 from django.core.exceptions import PermissionDenied
 from django.db import models
-from django.db.models import base, options, ForeignKey, JSONField
+from django.db.models import base, options, ForeignKey, JSONField, prefetch_related_objects
 from django.utils import timezone
 from django.utils.text import slugify as __slugify
 
 from guardian.shortcuts import assign_perm
 
-from seqr.utils.logging_utils import log_model_update, log_model_bulk_update, SeqrLogger, log_model_no_guid_bulk_update
+from seqr.utils.logging_utils import log_model_update, log_model_bulk_update, SeqrLogger
 from seqr.utils.xpos_utils import get_chrom_pos
 from seqr.views.utils.terra_api_utils import anvil_enabled
 from reference_data.models import GENOME_VERSION_GRCh37, GENOME_VERSION_CHOICES
@@ -1012,14 +1012,27 @@ def _compute_guid(self):
         return 'VSR%07d_%s' % (self.id, _slugify(str(self)))
 
 
-class BulkOperationBase:
+class BulkOperationBase(models.Model):
+
+    @classmethod
+    def log_model_no_guid_bulk_update(cls, models, user, update_type):
+        if models:
+            db_entity = type(models[0]).__name__
+            prefetch_related_objects(models, models[0].PARENT_FIELD)
+            parent_ids = {getattr(model, models[0].PARENT_FIELD).guid for model in models}
+            db_update = {
+                'dbEntity': db_entity, 'numEntities': len(models), 'parentEntityIds': parent_ids,
+                'updateType': 'bulk_{}'.format(update_type),
+            }
+            logger.info(f'{update_type} {db_entity}s', user, db_update=db_update)
+
     @classmethod
     def bulk_create(cls, user, new_models):
         """Helper bulk create method that logs the creation"""
         for model in new_models:
             model.created_by = user
         models = cls.objects.bulk_create(new_models)
-        log_model_no_guid_bulk_update(logger, models, user, 'create')
+        cls.log_model_no_guid_bulk_update(models, user, 'create')
         return models
 
     @classmethod
@@ -1027,14 +1040,14 @@ def bulk_delete(cls, user, queryset=None, **filter_kwargs):
         """Helper bulk delete method that logs the deletion"""
         if queryset is None:
             queryset = cls.objects.filter(**filter_kwargs)
-        log_model_no_guid_bulk_update(logger, queryset, user, 'delete')
+        cls.log_model_no_guid_bulk_update(queryset, user, 'delete')
         return queryset.delete()
 
     class Meta:
         abstract = True
 
 
-class DeletableSampleMetadataModel(models.Model, BulkOperationBase):
+class DeletableSampleMetadataModel(BulkOperationBase):
     PARENT_FIELD = 'sample'
 
     sample = models.ForeignKey('Sample', on_delete=models.CASCADE, db_index=True)
@@ -1071,7 +1084,7 @@ class Meta:
         json_fields = ['gene_id', 'tpm']
 
 
-class PhenotypePrioritization(models.Model, BulkOperationBase):
+class PhenotypePrioritization(BulkOperationBase):
     PARENT_FIELD = 'individual'
 
     individual = models.ForeignKey('Individual', on_delete=models.CASCADE, db_index=True)
diff --git a/seqr/utils/logging_utils.py b/seqr/utils/logging_utils.py
index 1d1c58769e..88ca198ff2 100644
--- a/seqr/utils/logging_utils.py
+++ b/seqr/utils/logging_utils.py
@@ -1,7 +1,6 @@
 import json
 import logging
 
-from django.db.models import prefetch_related_objects
 from settings import DEPLOYMENT_TYPE
 from typing import Optional
 
@@ -87,15 +86,3 @@ def log_model_bulk_update(logger, models, user, update_type, update_fields=None)
     logger.info(
         '{} {} {}s'.format(update_type, len(entity_ids), db_entity), user, db_update=db_update)
     return entity_ids
-
-
-def log_model_no_guid_bulk_update(logger, models, user, update_type):
-    if models:
-        db_entity = type(models[0]).__name__
-        prefetch_related_objects(models, models[0].PARENT_FIELD)
-        parent_ids = {getattr(model, models[0].PARENT_FIELD).guid for model in models}
-        db_update = {
-            'dbEntity': db_entity, 'numEntities': len(models), 'parentEntityIds': parent_ids,
-            'updateType': 'bulk_{}'.format(update_type),
-        }
-        logger.info(f'{update_type} {db_entity}s', user, db_update=db_update)
diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py
index bd3c397181..7043b2da90 100644
--- a/seqr/views/apis/data_manager_api.py
+++ b/seqr/views/apis/data_manager_api.py
@@ -21,7 +21,7 @@
 from seqr.views.utils.dataset_utils import load_rna_seq_outlier, load_rna_seq_tpm, load_phenotype_prioritization_data_file
 from seqr.views.utils.file_utils import parse_file, get_temp_upload_directory, load_uploaded_file
 from seqr.views.utils.json_utils import create_json_response, _to_camel_case
-from seqr.views.utils.permissions_utils import data_manager_required, is_internal_project
+from seqr.views.utils.permissions_utils import data_manager_required, get_internal_projects
 
 from seqr.models import Sample, Individual, RnaSeqOutlier, RnaSeqTpm, PhenotypePrioritization, Project
 
@@ -399,41 +399,54 @@ def load_phenotype_prioritization_data(request):
     file_path = request_json['file']
 
     try:
-        tool, data_by_project_sample_id = load_phenotype_prioritization_data_file(file_path)
+        tool, data_by_project_indiv_id = load_phenotype_prioritization_data_file(file_path)
     except ValueError as e:
         return create_json_response({'error': str(e)}, status=400)
 
-    info = [f'Loaded {tool.upper()} data from {file_path}']
+    info = [f'Loaded {tool.title()} data from {file_path}']
+    logger.info(f'Loading {tool.title()} data from {file_path}', request.user)
+
+    internal_projects = get_internal_projects().filter(name__in=data_by_project_indiv_id)
+    projects_by_name = {p_name: [project for project in internal_projects if project.name == p_name]
+                       for p_name in data_by_project_indiv_id.keys()}
+    missing_projects = [p_name for p_name, projects in projects_by_name.items() if len(projects) == 0]
+    missing_info = f"Project {', '.join(missing_projects)} not found. " if missing_projects else ''
+    conflict_projects = [p_name for p_name, projects in projects_by_name.items() if len(projects) > 1]
+    conflict_info = f"Projects with conflict name(s) {', '.join(conflict_projects)}." if conflict_projects else ''
+
+    if missing_info or conflict_info:
+        return create_json_response({'error': missing_info + conflict_info}, status=400)
 
     all_records = []
     to_delete = PhenotypePrioritization.objects.none()
     error = None
-    for project_name, records_by_sample in data_by_project_sample_id.items():
-        projects = [p for p in Project.objects.filter(name=project_name) if is_internal_project(p)]
-        if not projects or len(projects) > 1:
-            error = f'Multiple projects with the same name {project_name}'\
-                if projects else f'Project ({project_name}) not found'
-            break
-
-        indivs = Individual.objects.filter(family__project=projects[0], individual_id__in=records_by_sample.keys())
+    for project_name, records_by_indiv in data_by_project_indiv_id.items():
+        indivs = Individual.objects.filter(family__project=projects_by_name[project_name][0],
+                                           individual_id__in=records_by_indiv.keys())
         existing_indivs_by_id = {ind.individual_id: ind for ind in indivs}
 
-        missing_individuals = set(records_by_sample.keys()) - set(existing_indivs_by_id.keys())
+        missing_individuals = set(records_by_indiv.keys()) - set(existing_indivs_by_id.keys())
         if missing_individuals:
-            error = f'Can\'t find individuals {", ".join(sorted(list(missing_individuals)))}'
+            error = f"Can't find individuals {', '.join(sorted(list(missing_individuals)))}"
             break
-        for sample_id, records in records_by_sample.items():
+        indiv_records = []
+        for sample_id, records in records_by_indiv.items():
             for rec in records:
                 rec['individual'] = existing_indivs_by_id[sample_id]
+                indiv_records.append(rec)
 
         exist_records = PhenotypePrioritization.objects.filter(tool=tool, individual__in=indivs)
-        records = [rec for records in records_by_sample.values() for rec in records]
 
         delete_info = f'deleted {len(exist_records)} record(s), ' if exist_records else ''
-        info.append(f'Project {project_name}: {delete_info}loaded {len(records)} record(s)')
+        info.append(f'Project {project_name}: {delete_info}loaded {len(indiv_records)} record(s)')
+
+        logger.info(
+            f'Project {project_name}: deleting {len(exist_records)} record(s), loading {len(indiv_records)} record(s)',
+            request.user
+        )
 
         to_delete |= exist_records
-        all_records += records
+        all_records += indiv_records
 
     if error:
         return create_json_response({'error': error}, status=400)
@@ -443,8 +456,6 @@ def load_phenotype_prioritization_data(request):
 
     PhenotypePrioritization.bulk_create(request.user, [PhenotypePrioritization(**data) for data in all_records])
 
-    logger.info('\n'.join(info), request.user)
-
     return create_json_response({
         'info': info,
         'success': True
diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py
index 2a00b3778f..4e49389004 100644
--- a/seqr/views/apis/data_manager_api_tests.py
+++ b/seqr/views/apis/data_manager_api_tests.py
@@ -8,7 +8,7 @@
 from seqr.views.apis.data_manager_api import elasticsearch_status, upload_qc_pipeline_output, delete_index, \
     update_rna_seq, load_rna_seq_sample_data, load_phenotype_prioritization_data
 from seqr.views.utils.orm_to_json_utils import get_json_for_rna_seq_outliers, _get_json_for_models
-from seqr.views.utils.test_utils import AuthenticationTestCase, urllib3_responses, AnvilAuthenticationTestCase
+from seqr.views.utils.test_utils import AuthenticationTestCase, urllib3_responses
 from seqr.models import Individual, RnaSeqOutlier, RnaSeqTpm, Sample, Project, PhenotypePrioritization
 
 
@@ -338,7 +338,7 @@
 ]
 
 
-class DataManagerAPITest(object):
+class DataManagerAPITest(AuthenticationTestCase):
     fixtures = ['users', '1kg_project', 'reference_data']
 
     @urllib3_responses.activate
@@ -840,32 +840,39 @@ def test_load_phenotype_prioritization_data(self, mock_model_logger, mock_logger
         mock_file_iter.return_value = self._join_data(PHENOTYPE_PRIORITIZATION_HEADER + LIRICAL_PROJECT_NOT_EXIST_DATA)
         response = self.client.post(url, content_type='application/json', data=json.dumps({'file': 'lirical_data.tsv.gz'}))
         self.assertEqual(response.status_code, 400)
-        self.assertEqual(response.json()['error'], 'Project (CMG_Beggs_WGS) not found')
+        self.assertEqual(response.json()['error'], 'Project CMG_Beggs_WGS not found. ')
+        mock_logger.info.assert_called_with(f'Loading Lirical data from lirical_data.tsv.gz', self.data_manager_user)
 
         project = Project.objects.create(created_by=self.data_manager_user,
                                          name='1kg project nåme with uniçøde', workspace_namespace='my-seqr-billing')
-        mock_file_iter.return_value = self._join_data(PHENOTYPE_PRIORITIZATION_HEADER + LIRICAL_DATA)
+        mock_file_iter.return_value = self._join_data(
+            PHENOTYPE_PRIORITIZATION_HEADER + LIRICAL_DATA + LIRICAL_PROJECT_NOT_EXIST_DATA)
         response = self.client.post(url, content_type='application/json', data=json.dumps({'file': 'lirical_data.tsv.gz'}))
         self.assertEqual(response.status_code, 400)
-        self.assertEqual(response.json()['error'], 'Multiple projects with the same name 1kg project nåme with uniçøde')
+        self.assertEqual(response.json()['error'], 'Project CMG_Beggs_WGS not found. Projects with conflict name(s) 1kg project nåme with uniçøde.')
         project.delete()
 
         mock_file_iter.return_value = self._join_data(PHENOTYPE_PRIORITIZATION_HEADER + LIRICAL_NO_EXIST_INDV_DATA)
         response = self.client.post(url, content_type='application/json', data=json.dumps({'file': 'lirical_data.tsv.gz'}))
         self.assertEqual(response.status_code, 400)
-        self.assertEqual(response.json()['error'], 'Can\'t find individuals NA19678x, NA19679x')
+        self.assertEqual(response.json()['error'], "Can't find individuals NA19678x, NA19679x")
 
         # Test a successful operation
+        mock_logger.reset_mock()
         mock_file_iter.return_value = self._join_data(PHENOTYPE_PRIORITIZATION_HEADER + LIRICAL_DATA)
         response = self.client.post(url, content_type='application/json', data=json.dumps({'file': 'lirical_data.tsv.gz'}))
         self.assertEqual(response.status_code, 200)
         info = [
-            'Loaded LIRICAL data from lirical_data.tsv.gz',
+            'Loaded Lirical data from lirical_data.tsv.gz',
             'Project 1kg project nåme with uniçøde: loaded 1 record(s)',
             'Project Test Reprocessed Project: loaded 1 record(s)'
         ]
         self.assertEqual(response.json()['info'], info)
-        mock_logger.info.assert_called_with('\n'.join(info), self.data_manager_user)
+        mock_logger.info.assert_has_calls([
+            mock.call(f'Loading Lirical data from lirical_data.tsv.gz', self.data_manager_user),
+            mock.call(f'Project 1kg project nåme with uniçøde: deleting 0 record(s), loading 1 record(s)', self.data_manager_user),
+            mock.call(f'Project Test Reprocessed Project: deleting 0 record(s), loading 1 record(s)', self.data_manager_user),
+        ])
         db_update = {'dbEntity': 'PhenotypePrioritization', 'numEntities': 2,
                      'parentEntityIds': {'I000002_na19678', 'I000015_na20885'}, 'updateType': 'bulk_create'}
         mock_model_logger.info.assert_called_with('create PhenotypePrioritizations', self.data_manager_user, db_update=db_update)
@@ -880,11 +887,14 @@ def test_load_phenotype_prioritization_data(self, mock_model_logger, mock_logger
         response = self.client.post(url, content_type='application/json', data=json.dumps({'file': 'lirical_data.tsv.gz'}))
         self.assertEqual(response.status_code, 200)
         info = [
-            'Loaded LIRICAL data from lirical_data.tsv.gz',
+            'Loaded Lirical data from lirical_data.tsv.gz',
             'Project 1kg project nåme with uniçøde: deleted 1 record(s), loaded 2 record(s)'
         ]
         self.assertEqual(response.json()['info'], info)
-        mock_logger.info.assert_called_with('\n'.join(info), self.data_manager_user)
+        mock_logger.info.assert_has_calls([
+            mock.call(f'Loading Lirical data from lirical_data.tsv.gz', self.data_manager_user),
+            mock.call(f'Project 1kg project nåme with uniçøde: deleting 1 record(s), loading 2 record(s)', self.data_manager_user),
+        ])
         mock_model_logger.info.assert_has_calls([
             mock.call('delete PhenotypePrioritizations', self.data_manager_user, db_update={
                 'dbEntity': 'PhenotypePrioritization', 'numEntities': 1,
@@ -894,21 +904,6 @@ def test_load_phenotype_prioritization_data(self, mock_model_logger, mock_logger
                       db_update={'dbEntity': 'PhenotypePrioritization', 'numEntities': 2,
                      'parentEntityIds': {'I000002_na19678'}, 'updateType': 'bulk_create'}),
         ])
-        self.maxDiff = None
         saved_data = _get_json_for_models(PhenotypePrioritization.objects.filter(),
                                           nested_fields=[{'fields': ('individual', 'guid'), 'key': 'individualGuid'}])
         self.assertListEqual(saved_data, EXPECTED_UPDATED_LIRICAL_DATA)
-
-
-# Tests for AnVIL access disabled
-class LocalDataManagerAPITest(AuthenticationTestCase, DataManagerAPITest):
-    fixtures = ['users', '1kg_project']
-
-
-# Test for permissions from AnVIL only
-class AnvilDataManagerAPITest(AnvilAuthenticationTestCase, DataManagerAPITest):
-    fixtures = ['users', 'social_auth', '1kg_project']
-
-    @mock.patch('seqr.views.utils.permissions_utils.INTERNAL_NAMESPACES', ['my-seqr-billing'])
-    def test_load_phenotype_prioritization_data(self, *args):
-        super(AnvilDataManagerAPITest, self).test_load_phenotype_prioritization_data(*args)
diff --git a/seqr/views/utils/permissions_utils.py b/seqr/views/utils/permissions_utils.py
index ed20ade194..b7e902b9ac 100644
--- a/seqr/views/utils/permissions_utils.py
+++ b/seqr/views/utils/permissions_utils.py
@@ -103,12 +103,6 @@ def is_internal_anvil_project(project):
     return anvil_enabled() and project.workspace_namespace in INTERNAL_NAMESPACES
 
 
-def is_internal_project(project):
-    if anvil_enabled():
-        return project.workspace_namespace in INTERNAL_NAMESPACES
-    return True
-
-
 def get_internal_projects():
     if anvil_enabled():
         return Project.objects.filter(workspace_namespace__in=INTERNAL_NAMESPACES)

From 9bd4f0004aa9808dc67b734a459a91e3b28c0fd8 Mon Sep 17 00:00:00 2001
From: Shifa Zhang <zhangshifa07504@gmail.com>
Date: Thu, 3 Nov 2022 09:54:59 -0400
Subject: [PATCH 61/96] Fix codacy issues.

---
 seqr/views/apis/data_manager_api.py       |  2 +-
 seqr/views/apis/data_manager_api_tests.py | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py
index 7043b2da90..f8196474fd 100644
--- a/seqr/views/apis/data_manager_api.py
+++ b/seqr/views/apis/data_manager_api.py
@@ -23,7 +23,7 @@
 from seqr.views.utils.json_utils import create_json_response, _to_camel_case
 from seqr.views.utils.permissions_utils import data_manager_required, get_internal_projects
 
-from seqr.models import Sample, Individual, RnaSeqOutlier, RnaSeqTpm, PhenotypePrioritization, Project
+from seqr.models import Sample, Individual, RnaSeqOutlier, RnaSeqTpm, PhenotypePrioritization
 
 from settings import KIBANA_SERVER, KIBANA_ELASTICSEARCH_PASSWORD
 
diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py
index 4e49389004..07fef4ef98 100644
--- a/seqr/views/apis/data_manager_api_tests.py
+++ b/seqr/views/apis/data_manager_api_tests.py
@@ -841,7 +841,7 @@ def test_load_phenotype_prioritization_data(self, mock_model_logger, mock_logger
         response = self.client.post(url, content_type='application/json', data=json.dumps({'file': 'lirical_data.tsv.gz'}))
         self.assertEqual(response.status_code, 400)
         self.assertEqual(response.json()['error'], 'Project CMG_Beggs_WGS not found. ')
-        mock_logger.info.assert_called_with(f'Loading Lirical data from lirical_data.tsv.gz', self.data_manager_user)
+        mock_logger.info.assert_called_with('Loading Lirical data from lirical_data.tsv.gz', self.data_manager_user)
 
         project = Project.objects.create(created_by=self.data_manager_user,
                                          name='1kg project nåme with uniçøde', workspace_namespace='my-seqr-billing')
@@ -869,9 +869,9 @@ def test_load_phenotype_prioritization_data(self, mock_model_logger, mock_logger
         ]
         self.assertEqual(response.json()['info'], info)
         mock_logger.info.assert_has_calls([
-            mock.call(f'Loading Lirical data from lirical_data.tsv.gz', self.data_manager_user),
-            mock.call(f'Project 1kg project nåme with uniçøde: deleting 0 record(s), loading 1 record(s)', self.data_manager_user),
-            mock.call(f'Project Test Reprocessed Project: deleting 0 record(s), loading 1 record(s)', self.data_manager_user),
+            mock.call('Loading Lirical data from lirical_data.tsv.gz', self.data_manager_user),
+            mock.call('Project 1kg project nåme with uniçøde: deleting 0 record(s), loading 1 record(s)', self.data_manager_user),
+            mock.call('Project Test Reprocessed Project: deleting 0 record(s), loading 1 record(s)', self.data_manager_user),
         ])
         db_update = {'dbEntity': 'PhenotypePrioritization', 'numEntities': 2,
                      'parentEntityIds': {'I000002_na19678', 'I000015_na20885'}, 'updateType': 'bulk_create'}
@@ -892,8 +892,8 @@ def test_load_phenotype_prioritization_data(self, mock_model_logger, mock_logger
         ]
         self.assertEqual(response.json()['info'], info)
         mock_logger.info.assert_has_calls([
-            mock.call(f'Loading Lirical data from lirical_data.tsv.gz', self.data_manager_user),
-            mock.call(f'Project 1kg project nåme with uniçøde: deleting 1 record(s), loading 2 record(s)', self.data_manager_user),
+            mock.call('Loading Lirical data from lirical_data.tsv.gz', self.data_manager_user),
+            mock.call('Project 1kg project nåme with uniçøde: deleting 1 record(s), loading 2 record(s)', self.data_manager_user),
         ])
         mock_model_logger.info.assert_has_calls([
             mock.call('delete PhenotypePrioritizations', self.data_manager_user, db_update={

From 1f95aba47d4008dc4d95124aa9d4ac44d0911065 Mon Sep 17 00:00:00 2001
From: Shifa Zhang <zhangshifa07504@gmail.com>
Date: Thu, 3 Nov 2022 12:38:05 -0400
Subject: [PATCH 62/96] Update backend tests and frontend selector and
 datatable styling.

---
 seqr/views/apis/data_manager_api_tests.py     |  3 +
 seqr/views/apis/variant_search_api_tests.py   | 12 ++--
 seqr/views/utils/variant_utils.py             |  2 +-
 .../components/panel/variants/VariantGene.jsx | 62 ++++++++++---------
 .../components/panel/variants/selectors.js    | 11 ++--
 5 files changed, 49 insertions(+), 41 deletions(-)

diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py
index 07fef4ef98..84167d6fae 100644
--- a/seqr/views/apis/data_manager_api_tests.py
+++ b/seqr/views/apis/data_manager_api_tests.py
@@ -858,7 +858,10 @@ def test_load_phenotype_prioritization_data(self, mock_model_logger, mock_logger
         self.assertEqual(response.json()['error'], "Can't find individuals NA19678x, NA19679x")
 
         # Test a successful operation
+        exist_data = PhenotypePrioritization.objects.filter()
+        PhenotypePrioritization.bulk_delete(self.data_manager_user, exist_data)  # clear existing data
         mock_logger.reset_mock()
+        mock_model_logger.reset_mock()
         mock_file_iter.return_value = self._join_data(PHENOTYPE_PRIORITIZATION_HEADER + LIRICAL_DATA)
         response = self.client.post(url, content_type='application/json', data=json.dumps({'file': 'lirical_data.tsv.gz'}))
         self.assertEqual(response.status_code, 200)
diff --git a/seqr/views/apis/variant_search_api_tests.py b/seqr/views/apis/variant_search_api_tests.py
index 71edc58f71..12f8827b46 100644
--- a/seqr/views/apis/variant_search_api_tests.py
+++ b/seqr/views/apis/variant_search_api_tests.py
@@ -284,8 +284,8 @@ def test_query_variants(self, mock_get_variants, mock_get_gene_counts, mock_erro
         response_json = response.json()
         self.assertSetEqual(set(response_json.keys()), set(EXPECTED_SEARCH_RESPONSE.keys()))
         self.assertDictEqual(response_json, EXPECTED_SEARCH_RESPONSE)
-        lirical_data = response_json['phenotypeGeneScores']['I000001_na19675']['ENSG00000268903']['lirical']
-        self.assertListEqual(sorted(lirical_data, key=lambda d: d['diseaseId']), EXPECTED_LIRICAL_DATA)
+        self.assertListEqual(response_json['phenotypeGeneScores']['I000001_na19675']['ENSG00000268903']['lirical'],
+                             EXPECTED_LIRICAL_DATA)
         self.assertSetEqual(
             set(response_json['search']['projectFamilies'][0]['familyGuids']), {'F000001_1', 'F000002_2'})
         self._assert_expected_results_context(response_json)
@@ -499,8 +499,8 @@ def _get_variants(results_model, **kwargs):
         response_json = response.json()
         self.assertSetEqual(set(response_json.keys()), set(EXPECTED_SEARCH_RESPONSE.keys()))
         self.assertDictEqual(response_json, EXPECTED_SEARCH_RESPONSE)
-        lirical_data = response_json['phenotypeGeneScores']['I000001_na19675']['ENSG00000268903']['lirical']
-        self.assertListEqual(sorted(lirical_data, key=lambda d: d['diseaseId']), EXPECTED_LIRICAL_DATA)
+        self.assertListEqual(response_json['phenotypeGeneScores']['I000001_na19675']['ENSG00000268903']['lirical'],
+                             EXPECTED_LIRICAL_DATA)
         self._assert_expected_results_context(response_json)
         self.assertSetEqual(
             set(response_json['search']['projectFamilies'][0]['familyGuids']), expected_searched_families)
@@ -648,8 +648,8 @@ def test_query_single_variant(self, mock_get_variant):
         expected_search_response['genesById'].pop('ENSG00000233653')
         expected_search_response['searchedVariants'] = [single_family_variant]
         self.assertDictEqual(response_json, expected_search_response)
-        lirical_data = response_json['phenotypeGeneScores']['I000001_na19675']['ENSG00000268903']['lirical']
-        self.assertListEqual(sorted(lirical_data, key=lambda d: d['diseaseId']), EXPECTED_LIRICAL_DATA)
+        self.assertListEqual(response_json['phenotypeGeneScores']['I000001_na19675']['ENSG00000268903']['lirical'],
+                             EXPECTED_LIRICAL_DATA)
         self._assert_expected_results_family_context(response_json, locus_list_detail=True)
         self.assertSetEqual(set(response_json['projectsByGuid'][PROJECT_GUID].keys()), PROJECT_TAG_TYPE_FIELDS)
         self.assertSetEqual(set(response_json['familiesByGuid'].keys()), {'F000001_1'})
diff --git a/seqr/views/utils/variant_utils.py b/seqr/views/utils/variant_utils.py
index c5ed9e893a..e73daa0e28 100644
--- a/seqr/views/utils/variant_utils.py
+++ b/seqr/views/utils/variant_utils.py
@@ -135,7 +135,7 @@ def _get_phenotype_prioritization(gene_ids, families):
     data_by_individual_gene = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
 
     data_dicts = _get_json_for_models(
-        PhenotypePrioritization.objects.filter(gene_id__in=gene_ids, individual__family__in=families),
+        PhenotypePrioritization.objects.filter(gene_id__in=gene_ids, individual__family__in=families).order_by('disease_id'),
         nested_fields=[{'fields': ('individual', 'guid'), 'key': 'individualGuid'}],
     )
 
diff --git a/ui/shared/components/panel/variants/VariantGene.jsx b/ui/shared/components/panel/variants/VariantGene.jsx
index 5aa9341c19..9eab7ed815 100644
--- a/ui/shared/components/panel/variants/VariantGene.jsx
+++ b/ui/shared/components/panel/variants/VariantGene.jsx
@@ -90,7 +90,7 @@ const LocusListsContainer = styled.div`
 const GeneLabel = React.memo(({ popupHeader, popupContent, showEmpty, ...labelProps }) => {
   const content = <GeneLabelContent {...labelProps} />
   return (popupContent || showEmpty) ?
-    <Popup header={popupHeader} trigger={content} content={popupContent} size="tiny" wide="very" hoverable /> : content
+    <Popup header={popupHeader} trigger={content} content={popupContent} size="tiny" wide hoverable /> : content
 })
 
 GeneLabel.propTypes = {
@@ -327,13 +327,22 @@ const RNA_SEQ_COLUMNS = [
 ]
 
 const PHENOTYPE_GENE_INFO_COLUMNS = [
-  { ...INDIVIDUAL_NAME_COLUMN, width: 4 },
-  { name: 'diseaseName', content: 'Disease', width: 5, format: ({ diseaseName, diseaseId }) => `${diseaseName} (${diseaseId})` },
-  { name: 'rank', content: 'Rank', width: 1 },
+  INDIVIDUAL_NAME_COLUMN,
+  {
+    name: 'diseaseName',
+    content: 'Disease',
+    format: ({ diseaseName, diseaseId }) => (
+      <div>
+        {diseaseName}
+        <br />
+        <i>{diseaseId}</i>
+      </div>
+    ),
+  },
+  { name: 'rank', content: 'Rank' },
   {
     name: 'scores',
     content: 'Scores',
-    width: 6,
     format: ({ scores }) => Object.keys(scores).sort().map(scoreName => (
       <div key={scoreName}>
         <b>{camelcaseToTitlecase(scoreName).replace(' ', '-')}</b>
@@ -344,6 +353,15 @@ const PHENOTYPE_GENE_INFO_COLUMNS = [
   },
 ]
 
+const getDataTable = props => (
+  <DataTable
+    basic="very"
+    compact="very"
+    singleLine
+    {...props}
+  />
+)
+
 const GENE_DETAIL_SECTIONS = [
   {
     color: 'red',
@@ -398,37 +416,23 @@ const GENE_DETAIL_SECTIONS = [
     color: 'pink',
     description: 'RNA-Seq Outlier',
     label: 'RNA-Seq',
-    showDetails: (gene, { rnaSeqData }) => rnaSeqData && rnaSeqData[gene.geneId],
-    detailsDisplay: (gene, { rnaSeqData }) => (
+    showDetails: (gene, indivGeneData) => indivGeneData?.rnaSeqData && indivGeneData.rnaSeqData[gene.geneId],
+    detailsDisplay: (gene, indivGeneData) => (
       <div>
         This gene is flagged as an outlier for RNA-Seq in the following samples
-        <DataTable
-          basic="very"
-          data={rnaSeqData[gene.geneId]}
-          idField="individualName"
-          columns={RNA_SEQ_COLUMNS}
-        />
+        {getDataTable({ data: indivGeneData.rnaSeqData[gene.geneId], idField: 'individualName', columns: RNA_SEQ_COLUMNS })}
       </div>
     ),
   },
   {
     color: 'orange',
     description: 'Phenotype Prioritization',
-    showDetails: (gene, { phenotypeGeneScores }) => phenotypeGeneScores && phenotypeGeneScores[gene.geneId],
-    detailsDisplay: (gene, { phenotypeGeneScores }) => (Object.entries(phenotypeGeneScores[gene.geneId]).map(
+    showDetails: (gene, indivGeneData) => indivGeneData?.phenotypeGeneScores &&
+      indivGeneData.phenotypeGeneScores[gene.geneId],
+    detailsDisplay: (gene, indivGeneData) => (Object.entries(indivGeneData.phenotypeGeneScores[gene.geneId]).map(
       ([tool, data]) => ({
         label: tool.toUpperCase(),
-        detail: (
-          <DataTable
-            basic="very"
-            data={data}
-            singleLine
-            fixedWidth
-            idField="rowId"
-            defaultSortColumn="rank"
-            columns={PHENOTYPE_GENE_INFO_COLUMNS}
-          />
-        ),
+        detail: getDataTable({ data, idField: 'rowId', columns: PHENOTYPE_GENE_INFO_COLUMNS, defaultSortColumn: 'rank' }),
       }),
     )),
   },
@@ -462,11 +466,11 @@ const getDetailSections = (configs, gene, compact, labelProps, individualGeneDat
       ...sectionConfig,
       detail: showDetails(gene, individualGeneData) && detailsDisplay(gene, individualGeneData),
     }),
-).reduce((acc, config) => (Array.isArray(config.detail) ?
+).filter(({ detail }) => detail).reduce((acc, config) => (Array.isArray(config.detail) ?
   [
     ...acc,
     ...config.detail.map(detail => ({ ...config, ...detail })),
-  ] : (config.detail && [...acc, config]) || acc),
+  ] : [...acc, config]),
 []).map(({ detail, expandedDisplay, ...sectionConfig }) => (
   (expandedDisplay && !compact) ? (
     <OmimSegments key={sectionConfig.label}>
@@ -640,7 +644,7 @@ BaseVariantGene.propTypes = {
 
 const getRnaSeqProps = (state, ownProps) => ({
   hasRnaTpmData: getFamiliesByGuid(state)[ownProps.variant.familyGuids[0]]?.hasRnaTpmData,
-  individualGeneData: getIndividualGeneDataByFamilyGene(state)[ownProps.variant.familyGuids[0]] || {},
+  individualGeneData: getIndividualGeneDataByFamilyGene(state)[ownProps.variant.familyGuids[0]],
 })
 
 const mapStateToProps = (state, ownProps) => ({
diff --git a/ui/shared/components/panel/variants/selectors.js b/ui/shared/components/panel/variants/selectors.js
index d0cf64aba1..dbeebd01b1 100644
--- a/ui/shared/components/panel/variants/selectors.js
+++ b/ui/shared/components/panel/variants/selectors.js
@@ -26,17 +26,18 @@ export const getIndividualGeneDataByFamilyGene = createSelector(
     Object.entries(individualsByGuid).reduce((acc, [individualGuid, { familyGuid, displayName }]) => {
       const rnaSeqData = rnaSeqDataByIndividual[individualGuid]?.outliers
       const phenotypeGeneScores = phenotypeGeneScoresByIndividual[individualGuid]
-      acc[familyGuid] = acc[familyGuid] || {}
       if (rnaSeqData) {
-        acc[familyGuid].rnaSeqData = Object.entries(rnaSeqData || {}).reduce(
+        acc[familyGuid] = acc[familyGuid] || {}
+        acc[familyGuid].rnaSeqData = Object.entries(rnaSeqData).reduce(
           (acc2, [geneId, data]) => (data.isSignificant ? {
             ...acc2,
             [geneId]: [...(acc2[geneId] || []), { ...data, individualName: displayName }],
-          } : acc2), acc[familyGuid]?.rnaSeqData || {},
+          } : acc2), acc[familyGuid].rnaSeqData || {},
         )
       }
       if (phenotypeGeneScores) {
-        acc[familyGuid].phenotypeGeneScores = Object.entries(phenotypeGeneScores || {}).reduce(
+        acc[familyGuid] = acc[familyGuid] || {}
+        acc[familyGuid].phenotypeGeneScores = Object.entries(phenotypeGeneScores).reduce(
           (acc2, [geneId, dataByTool]) => ({
             ...acc2,
             [geneId]: Object.entries(dataByTool).reduce((acc3, [tool, data]) => ({
@@ -45,7 +46,7 @@ export const getIndividualGeneDataByFamilyGene = createSelector(
                 ...d, individualName: displayName, rowId: `${displayName}-${d.diseaseId}`,
               }))],
             }), acc2[geneId] || {}),
-          }), acc[familyGuid]?.phenotypeGeneScores || {},
+          }), acc[familyGuid].phenotypeGeneScores || {},
         )
       }
       return acc

From fa3e2ce2f76611f2c9791bbac01473e0b7118276 Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Fri, 4 Nov 2022 12:19:20 -0400
Subject: [PATCH 63/96] clean up shared bahavior for mock gtf url content

---
 .../management/tests/update_gencode_tests.py  | 31 ++++++-------------
 1 file changed, 10 insertions(+), 21 deletions(-)

diff --git a/reference_data/management/tests/update_gencode_tests.py b/reference_data/management/tests/update_gencode_tests.py
index 735ffa39af..cd28c01dd7 100644
--- a/reference_data/management/tests/update_gencode_tests.py
+++ b/reference_data/management/tests/update_gencode_tests.py
@@ -49,6 +49,8 @@ def setUp(self):
         self.temp_file_path = os.path.join(self.test_dir, 'gencode.v31lift37.annotation.gtf.gz')
         with gzip.open(self.temp_file_path, 'wt') as f:
             f.write(''.join(GTF_DATA))
+        with open(self.temp_file_path, 'rb') as f:
+            self.gzipped_gtf_data = f.read()
 
     def tearDown(self):
         # Close the file, the directory will be removed after the test
@@ -115,18 +117,11 @@ def test_update_gencode_command_bad_gtf_data(self, mock_logger):
 
     @responses.activate
     @mock.patch('reference_data.management.commands.update_gencode.logger')
-    @mock.patch('reference_data.management.commands.utils.download_utils.tempfile')
-    def test_update_gencode_command_url_generation(self, mock_tempfile, mock_logger):
+    def test_update_gencode_command_url_generation(self, mock_logger):
         # Test the code paths of generating urls, gencode_release == 19
-        tmp_dir = tempfile.gettempdir()
-        mock_tempfile.gettempdir.return_value = tmp_dir
-
-        with open(self.temp_file_path, 'rb') as f:
-            gtf_content = f.read()
-
         url_19 = 'http://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_19/gencode.v19.annotation.gtf.gz'
         responses.add(responses.HEAD, url_19, headers={"Content-Length": "1024"})
-        responses.add(responses.GET, url_19, body=gtf_content, stream=True)
+        responses.add(responses.GET, url_19, body=self.gzipped_gtf_data, stream=True)
         call_command('update_gencode', '--gencode-release=19')
         self.assertEqual(responses.calls[0].request.url, url_19)
         responses.reset()
@@ -135,7 +130,7 @@ def test_update_gencode_command_url_generation(self, mock_tempfile, mock_logger)
         mock_logger.reset_mock()
         url_20 = 'http://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_20/gencode.v20.annotation.gtf.gz'
         responses.add(responses.HEAD, url_20, headers={"Content-Length": "1024"})
-        responses.add(responses.GET, url_20, body=gtf_content, stream=True)
+        responses.add(responses.GET, url_20, body=self.gzipped_gtf_data, stream=True)
         call_command('update_gencode', '--gencode-release=20')
         self.assertEqual(responses.calls[0].request.url, url_20)
         responses.reset()
@@ -144,10 +139,10 @@ def test_update_gencode_command_url_generation(self, mock_tempfile, mock_logger)
         mock_logger.reset_mock()
         url_23 = 'http://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_23/gencode.v23.annotation.gtf.gz'
         responses.add(responses.HEAD, url_23, headers={"Content-Length": "1024"})
-        responses.add(responses.GET, url_23, body=gtf_content, stream=True)
+        responses.add(responses.GET, url_23, body=self.gzipped_gtf_data, stream=True)
         url_23_lift = 'http://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_23/GRCh37_mapping/gencode.v23lift37.annotation.gtf.gz'
         responses.add(responses.HEAD, url_23_lift, headers={"Content-Length": "1024"})
-        responses.add(responses.GET, url_23_lift, body=gtf_content, stream=True)
+        responses.add(responses.GET, url_23_lift, body=self.gzipped_gtf_data, stream=True)
         call_command('update_gencode', '--gencode-release=23')
         self.assertEqual(responses.calls[0].request.url, url_23_lift)
         self.assertEqual(responses.calls[2].request.url, url_23)
@@ -167,11 +162,10 @@ def _has_expected_new_transcripts(self, expected_release=27):
         self.assertTrue(trans_info.is_mane_select)
 
     @responses.activate
-    @mock.patch('reference_data.management.commands.utils.download_utils.tempfile')
     @mock.patch('reference_data.management.commands.utils.gencode_utils.logger')
     @mock.patch('reference_data.management.commands.update_gencode_transcripts.logger')
     @mock.patch('reference_data.management.commands.update_gencode.logger')
-    def test_update_gencode_command(self, mock_logger, mock_update_transcripts_logger, mock_utils_logger, mock_tempfile):
+    def test_update_gencode_command(self, mock_logger, mock_update_transcripts_logger, mock_utils_logger):
         # Test normal command function
         call_command('update_gencode', '--gencode-release=31', self.temp_file_path, '37')
         mock_utils_logger.info.assert_has_calls([
@@ -234,17 +228,12 @@ def test_update_gencode_command(self, mock_logger, mock_update_transcripts_logge
         self.assertEqual(gene_info.strand_grch37, '-')
 
         # Test only reloading transcripts
-        tmp_dir = tempfile.gettempdir()
-        mock_tempfile.gettempdir.return_value = tmp_dir
-        with open(self.temp_file_path, 'rb') as f:
-            gtf_content = f.read()
-
         url = 'http://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_31/gencode.v31.annotation.gtf.gz'
         responses.add(responses.HEAD, url, headers={"Content-Length": "1024"})
-        responses.add(responses.GET, url, body=gtf_content, stream=True)
+        responses.add(responses.GET, url, body=self.gzipped_gtf_data, stream=True)
         url_lift = 'http://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_31/GRCh37_mapping/gencode.v31lift37.annotation.gtf.gz'
         responses.add(responses.HEAD, url_lift, headers={"Content-Length": "1024"})
-        responses.add(responses.GET, url_lift, body=gtf_content, stream=True)
+        responses.add(responses.GET, url_lift, body=self.gzipped_gtf_data, stream=True)
 
         call_command('update_gencode_transcripts')
 

From 8c945a017786f7cddb1b4126a4504be23743dd9b Mon Sep 17 00:00:00 2001
From: Shifa Zhang <zhangshifa07504@gmail.com>
Date: Fri, 4 Nov 2022 12:39:18 -0400
Subject: [PATCH 64/96] Update the fixture to avoid test conflict.

---
 seqr/fixtures/1kg_project.json              |  4 ++--
 seqr/views/apis/variant_search_api_tests.py | 22 ++++++++-------------
 2 files changed, 10 insertions(+), 16 deletions(-)

diff --git a/seqr/fixtures/1kg_project.json b/seqr/fixtures/1kg_project.json
index f17418353a..966cbf0d6c 100644
--- a/seqr/fixtures/1kg_project.json
+++ b/seqr/fixtures/1kg_project.json
@@ -1293,7 +1293,7 @@
     "fields": {
         "individual": 1,
         "gene_id": "ENSG00000268903",
-        "tool": "lirical",
+        "tool": "exomiser",
         "rank": 1,
         "disease_id": "OMIM:618460",
         "disease_name": "Khan-Khan-Katsanis syndrome",
@@ -1309,7 +1309,7 @@
     "fields": {
         "individual": 1,
         "gene_id": "ENSG00000268903",
-        "tool": "lirical",
+        "tool": "exomiser",
         "rank": 2,
         "disease_id": "OMIM:219800",
         "disease_name": "Cystinosis, nephropathic",
diff --git a/seqr/views/apis/variant_search_api_tests.py b/seqr/views/apis/variant_search_api_tests.py
index 12f8827b46..a0790f7e54 100644
--- a/seqr/views/apis/variant_search_api_tests.py
+++ b/seqr/views/apis/variant_search_api_tests.py
@@ -58,6 +58,13 @@
 ]
 expected_detail_saved_variant['mainTranscriptId'] = mock.ANY
 
+EXPECTED_EXOMISER_DATA = [
+    {'diseaseId': 'OMIM:219800', 'diseaseName': 'Cystinosis, nephropathic', 'rank': 2,
+     'scores': {'compositeLR': 0.003, 'post_test_probability': 0}},
+    {'diseaseId': 'OMIM:618460', 'diseaseName': 'Khan-Khan-Katsanis syndrome', 'rank': 1,
+     'scores': {'compositeLR': 0.066, 'post_test_probability': 0}},
+]
+
 EXPECTED_SEARCH_RESPONSE = {
     'searchedVariants': VARIANTS,
     'savedVariantsByGuid': {
@@ -82,18 +89,11 @@
     'locusListsByGuid': {LOCUS_LIST_GUID: {'intervals': mock.ANY}},
     'rnaSeqData': {'I000001_na19675': {'outliers': {'ENSG00000268903': mock.ANY}}},
     'phenotypeGeneScores': {'I000001_na19675': {
-        'ENSG00000268903': {'lirical': mock.ANY}
+        'ENSG00000268903': {'exomiser': EXPECTED_EXOMISER_DATA}
     }},
     'mmeSubmissionsByGuid': {'MS000001_na19675': {k: mock.ANY for k in MATCHMAKER_SUBMISSION_FIELDS}},
 }
 
-EXPECTED_LIRICAL_DATA = [
-    {'diseaseId': 'OMIM:219800', 'diseaseName': 'Cystinosis, nephropathic', 'rank': 2,
-     'scores': {'compositeLR': 0.003, 'post_test_probability': 0}},
-    {'diseaseId': 'OMIM:618460', 'diseaseName': 'Khan-Khan-Katsanis syndrome', 'rank': 1,
-     'scores': {'compositeLR': 0.066, 'post_test_probability': 0}},
-]
-
 EXPECTED_SEARCH_CONTEXT_RESPONSE = {
     'savedSearchesByGuid': {
         'VS0000001_de_novo_dominant_res': mock.ANY, 'VS0000002_recessive_restrictiv': mock.ANY, 'VS0000003_de_novo_dominant_per': mock.ANY,
@@ -284,8 +284,6 @@ def test_query_variants(self, mock_get_variants, mock_get_gene_counts, mock_erro
         response_json = response.json()
         self.assertSetEqual(set(response_json.keys()), set(EXPECTED_SEARCH_RESPONSE.keys()))
         self.assertDictEqual(response_json, EXPECTED_SEARCH_RESPONSE)
-        self.assertListEqual(response_json['phenotypeGeneScores']['I000001_na19675']['ENSG00000268903']['lirical'],
-                             EXPECTED_LIRICAL_DATA)
         self.assertSetEqual(
             set(response_json['search']['projectFamilies'][0]['familyGuids']), {'F000001_1', 'F000002_2'})
         self._assert_expected_results_context(response_json)
@@ -499,8 +497,6 @@ def _get_variants(results_model, **kwargs):
         response_json = response.json()
         self.assertSetEqual(set(response_json.keys()), set(EXPECTED_SEARCH_RESPONSE.keys()))
         self.assertDictEqual(response_json, EXPECTED_SEARCH_RESPONSE)
-        self.assertListEqual(response_json['phenotypeGeneScores']['I000001_na19675']['ENSG00000268903']['lirical'],
-                             EXPECTED_LIRICAL_DATA)
         self._assert_expected_results_context(response_json)
         self.assertSetEqual(
             set(response_json['search']['projectFamilies'][0]['familyGuids']), expected_searched_families)
@@ -648,8 +644,6 @@ def test_query_single_variant(self, mock_get_variant):
         expected_search_response['genesById'].pop('ENSG00000233653')
         expected_search_response['searchedVariants'] = [single_family_variant]
         self.assertDictEqual(response_json, expected_search_response)
-        self.assertListEqual(response_json['phenotypeGeneScores']['I000001_na19675']['ENSG00000268903']['lirical'],
-                             EXPECTED_LIRICAL_DATA)
         self._assert_expected_results_family_context(response_json, locus_list_detail=True)
         self.assertSetEqual(set(response_json['projectsByGuid'][PROJECT_GUID].keys()), PROJECT_TAG_TYPE_FIELDS)
         self.assertSetEqual(set(response_json['familiesByGuid'].keys()), {'F000001_1'})

From 5c4e04811e29cec3d73bbe22c478129ced1c49ba Mon Sep 17 00:00:00 2001
From: Shifa Zhang <zhangshifa07504@gmail.com>
Date: Fri, 4 Nov 2022 12:59:46 -0400
Subject: [PATCH 65/96] Update per review comments.

---
 seqr/models.py                            | 19 ++++++++++---------
 seqr/views/apis/data_manager_api.py       |  6 ------
 seqr/views/apis/data_manager_api_tests.py | 21 ++++-----------------
 3 files changed, 14 insertions(+), 32 deletions(-)

diff --git a/seqr/models.py b/seqr/models.py
index 705f499f74..68bbe3bc2c 100644
--- a/seqr/models.py
+++ b/seqr/models.py
@@ -1016,15 +1016,16 @@ class BulkOperationBase(models.Model):
 
     @classmethod
     def log_model_no_guid_bulk_update(cls, models, user, update_type):
-        if models:
-            db_entity = type(models[0]).__name__
-            prefetch_related_objects(models, models[0].PARENT_FIELD)
-            parent_ids = {getattr(model, models[0].PARENT_FIELD).guid for model in models}
-            db_update = {
-                'dbEntity': db_entity, 'numEntities': len(models), 'parentEntityIds': parent_ids,
-                'updateType': 'bulk_{}'.format(update_type),
-            }
-            logger.info(f'{update_type} {db_entity}s', user, db_update=db_update)
+        if not models:
+            return
+        db_entity = cls.__name__
+        prefetch_related_objects(models, cls.PARENT_FIELD)
+        parent_ids = {getattr(model, cls.PARENT_FIELD).guid for model in models}
+        db_update = {
+            'dbEntity': db_entity, 'numEntities': len(models), 'parentEntityIds': parent_ids,
+            'updateType': 'bulk_{}'.format(update_type),
+        }
+        logger.info(f'{update_type} {db_entity}s', user, db_update=db_update)
 
     @classmethod
     def bulk_create(cls, user, new_models):
diff --git a/seqr/views/apis/data_manager_api.py b/seqr/views/apis/data_manager_api.py
index f8196474fd..2e36496dbf 100644
--- a/seqr/views/apis/data_manager_api.py
+++ b/seqr/views/apis/data_manager_api.py
@@ -404,7 +404,6 @@ def load_phenotype_prioritization_data(request):
         return create_json_response({'error': str(e)}, status=400)
 
     info = [f'Loaded {tool.title()} data from {file_path}']
-    logger.info(f'Loading {tool.title()} data from {file_path}', request.user)
 
     internal_projects = get_internal_projects().filter(name__in=data_by_project_indiv_id)
     projects_by_name = {p_name: [project for project in internal_projects if project.name == p_name]
@@ -440,11 +439,6 @@ def load_phenotype_prioritization_data(request):
         delete_info = f'deleted {len(exist_records)} record(s), ' if exist_records else ''
         info.append(f'Project {project_name}: {delete_info}loaded {len(indiv_records)} record(s)')
 
-        logger.info(
-            f'Project {project_name}: deleting {len(exist_records)} record(s), loading {len(indiv_records)} record(s)',
-            request.user
-        )
-
         to_delete |= exist_records
         all_records += indiv_records
 
diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py
index 07fef4ef98..efcae70710 100644
--- a/seqr/views/apis/data_manager_api_tests.py
+++ b/seqr/views/apis/data_manager_api_tests.py
@@ -815,9 +815,8 @@ def _join_data(cls, data):
         return iter(['\t'.join(line) for line in data])
 
     @mock.patch('seqr.views.utils.dataset_utils.file_iter')
-    @mock.patch('seqr.views.apis.data_manager_api.logger')
     @mock.patch('seqr.models.logger')
-    def test_load_phenotype_prioritization_data(self, mock_model_logger, mock_logger, mock_file_iter):
+    def test_load_phenotype_prioritization_data(self, mock_logger, mock_file_iter):
         url = reverse(load_phenotype_prioritization_data)
         self.check_data_manager_login(url)
 
@@ -841,7 +840,6 @@ def test_load_phenotype_prioritization_data(self, mock_model_logger, mock_logger
         response = self.client.post(url, content_type='application/json', data=json.dumps({'file': 'lirical_data.tsv.gz'}))
         self.assertEqual(response.status_code, 400)
         self.assertEqual(response.json()['error'], 'Project CMG_Beggs_WGS not found. ')
-        mock_logger.info.assert_called_with('Loading Lirical data from lirical_data.tsv.gz', self.data_manager_user)
 
         project = Project.objects.create(created_by=self.data_manager_user,
                                          name='1kg project nåme with uniçøde', workspace_namespace='my-seqr-billing')
@@ -858,7 +856,6 @@ def test_load_phenotype_prioritization_data(self, mock_model_logger, mock_logger
         self.assertEqual(response.json()['error'], "Can't find individuals NA19678x, NA19679x")
 
         # Test a successful operation
-        mock_logger.reset_mock()
         mock_file_iter.return_value = self._join_data(PHENOTYPE_PRIORITIZATION_HEADER + LIRICAL_DATA)
         response = self.client.post(url, content_type='application/json', data=json.dumps({'file': 'lirical_data.tsv.gz'}))
         self.assertEqual(response.status_code, 200)
@@ -868,21 +865,15 @@ def test_load_phenotype_prioritization_data(self, mock_model_logger, mock_logger
             'Project Test Reprocessed Project: loaded 1 record(s)'
         ]
         self.assertEqual(response.json()['info'], info)
-        mock_logger.info.assert_has_calls([
-            mock.call('Loading Lirical data from lirical_data.tsv.gz', self.data_manager_user),
-            mock.call('Project 1kg project nåme with uniçøde: deleting 0 record(s), loading 1 record(s)', self.data_manager_user),
-            mock.call('Project Test Reprocessed Project: deleting 0 record(s), loading 1 record(s)', self.data_manager_user),
-        ])
         db_update = {'dbEntity': 'PhenotypePrioritization', 'numEntities': 2,
                      'parentEntityIds': {'I000002_na19678', 'I000015_na20885'}, 'updateType': 'bulk_create'}
-        mock_model_logger.info.assert_called_with('create PhenotypePrioritizations', self.data_manager_user, db_update=db_update)
-        saved_data = _get_json_for_models(PhenotypePrioritization.objects.filter(),
+        mock_logger.info.assert_called_with('create PhenotypePrioritizations', self.data_manager_user, db_update=db_update)
+        saved_data = _get_json_for_models(PhenotypePrioritization.objects.filter(tool='lirical'),
                                           nested_fields=[{'fields': ('individual', 'guid'), 'key': 'individualGuid'}])
         self.assertListEqual(saved_data, EXPECTED_LIRICAL_DATA)
 
         # Test uploading new data
         mock_logger.reset_mock()
-        mock_model_logger.reset_mock()
         mock_file_iter.return_value = self._join_data(PHENOTYPE_PRIORITIZATION_HEADER + UPDATE_LIRICAL_DATA)
         response = self.client.post(url, content_type='application/json', data=json.dumps({'file': 'lirical_data.tsv.gz'}))
         self.assertEqual(response.status_code, 200)
@@ -892,10 +883,6 @@ def test_load_phenotype_prioritization_data(self, mock_model_logger, mock_logger
         ]
         self.assertEqual(response.json()['info'], info)
         mock_logger.info.assert_has_calls([
-            mock.call('Loading Lirical data from lirical_data.tsv.gz', self.data_manager_user),
-            mock.call('Project 1kg project nåme with uniçøde: deleting 1 record(s), loading 2 record(s)', self.data_manager_user),
-        ])
-        mock_model_logger.info.assert_has_calls([
             mock.call('delete PhenotypePrioritizations', self.data_manager_user, db_update={
                 'dbEntity': 'PhenotypePrioritization', 'numEntities': 1,
                 'parentEntityIds': {'I000002_na19678'}, 'updateType': 'bulk_delete',
@@ -904,6 +891,6 @@ def test_load_phenotype_prioritization_data(self, mock_model_logger, mock_logger
                       db_update={'dbEntity': 'PhenotypePrioritization', 'numEntities': 2,
                      'parentEntityIds': {'I000002_na19678'}, 'updateType': 'bulk_create'}),
         ])
-        saved_data = _get_json_for_models(PhenotypePrioritization.objects.filter(),
+        saved_data = _get_json_for_models(PhenotypePrioritization.objects.filter(tool='lirical'),
                                           nested_fields=[{'fields': ('individual', 'guid'), 'key': 'individualGuid'}])
         self.assertListEqual(saved_data, EXPECTED_UPDATED_LIRICAL_DATA)

From c4fb9dc13148f436366e205e5d5747ff6bcf4e8e Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Fri, 4 Nov 2022 15:04:24 -0400
Subject: [PATCH 66/96] do not include no data individuals in gregor report

---
 seqr/views/apis/report_api.py       | 4 +++-
 seqr/views/apis/report_api_tests.py | 6 +++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py
index 8737dd4d59..cb68e3b00e 100644
--- a/seqr/views/apis/report_api.py
+++ b/seqr/views/apis/report_api.py
@@ -707,7 +707,9 @@ def gregor_export(request, consent_code):
     individuals = Individual.objects.filter(
         family__project__consent_code=consent_code[0],
         family__project__in=projects,
-    ).prefetch_related('family__project', 'mother', 'father')
+        sample__elasticsearch_index__isnull=False,
+    ).distinct().prefetch_related('family__project', 'mother', 'father')
+
     participant_rows = []
     family_map = {}
     phenotype_rows = []
diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py
index e827158357..c93671a78a 100644
--- a/seqr/views/apis/report_api_tests.py
+++ b/seqr/views/apis/report_api_tests.py
@@ -503,7 +503,7 @@ def test_gregor_export(self, mock_zip, mock_datetime):
         ])
         participant_file, family_file, phenotype_file, analyte_file, experiment_file, read_file, read_set_file, called_file = files
 
-        self.assertEqual(len(participant_file), 15)
+        self.assertEqual(len(participant_file), 14)
         self.assertEqual(participant_file[0], [
             'participant_id', 'internal_project_id', 'gregor_center', 'consent_code', 'recontactable', 'prior_testing',
             'pmid_id', 'family_id', 'paternal_id', 'maternal_id', 'twin_id', 'proband_relationship',
@@ -516,7 +516,7 @@ def test_gregor_export(self, mock_zip, mock_datetime):
             'Middle Eastern or North African', 'Unknown', '', '21', 'Affected', 'myopathy', '18',
         ], participant_file)
 
-        self.assertEqual(len(family_file), 11)
+        self.assertEqual(len(family_file), 10)
         self.assertEqual(family_file[0], [
             'family_id', 'consanguinity', 'consanguinity_detail', 'pedigree_file', 'pedigree_file_detail',
             'family_history_detail',
@@ -535,7 +535,7 @@ def test_gregor_export(self, mock_zip, mock_datetime):
             '', 'Broad_NA19675_1', 'HP:0001674', 'Absent', 'HPO', 'originally indicated', '', '',
         ], phenotype_file)
 
-        self.assertEqual(len(analyte_file), 15)
+        self.assertEqual(len(analyte_file), 14)
         self.assertEqual(analyte_file[0], [
             'analyte_id', 'participant_id', 'analyte_type', 'analyte_processing_details', 'primary_biosample',
             'primary_biosample_id', 'primary_biosample_details', 'tissue_affected_status', 'age_at_collection',

From 07ed3054a586c5acc8fbd7bc9e781b059cbfd616 Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Fri, 4 Nov 2022 15:13:29 -0400
Subject: [PATCH 67/96] update gnomad predictor display

---
 ui/shared/utils/constants.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ui/shared/utils/constants.js b/ui/shared/utils/constants.js
index 02daab7de0..ca6d664532 100644
--- a/ui/shared/utils/constants.js
+++ b/ui/shared/utils/constants.js
@@ -1188,7 +1188,7 @@ export const PREDICTOR_FIELDS = [
   { field: 'mut_taster', group: MISSENSE_IN_SILICO_GROUP, indicatorMap: MUTTASTER_MAP },
   { field: 'fathmm', group: MISSENSE_IN_SILICO_GROUP, indicatorMap: INDICATOR_MAP },
   { field: 'apogee', warningThreshold: 0.5, dangerThreshold: 0.5 },
-  { field: 'gnomad_noncoding', displayOnly: true, warningThreshold: 0, dangerThreshold: 1.5 },
+  { field: 'gnomad_noncoding', fieldTitle: 'gnomAD Constraint', displayOnly: true, warningThreshold: 0, dangerThreshold: 1.5 },
   { field: 'haplogroup_defining', indicatorMap: { Y: { color: 'green', value: '' } } },
   { field: 'mitotip', indicatorMap: MITOTIP_MAP },
   { field: 'hmtvar', warningThreshold: 0.35, dangerThreshold: 0.35 },

From 1d64d193554a5028e88c8f79368e8200dc925f1a Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Fri, 4 Nov 2022 15:43:11 -0400
Subject: [PATCH 68/96] update cn sensititvity details

---
 .../components/panel/genes/GeneDetail.jsx     | 27 ++++++++++++++-----
 .../components/panel/variants/VariantGene.jsx | 12 ++++-----
 2 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/ui/shared/components/panel/genes/GeneDetail.jsx b/ui/shared/components/panel/genes/GeneDetail.jsx
index 87e3e54cf3..b07b5fa2b4 100644
--- a/ui/shared/components/panel/genes/GeneDetail.jsx
+++ b/ui/shared/components/panel/genes/GeneDetail.jsx
@@ -146,9 +146,20 @@ const DosageSensitivity = ({ gene, clingenField, scoreFields, sensitivityType, t
       scores={gene.cnSensitivity}
       fields={scoreFields}
       rankDescription="intolerant of LoF mutations"
-      note={`These are a score under development by the Talkowski lab that predict whether a gene is ${sensitivityType}
-      based on large chromosomal microarray data set analysis. Scores >${threshold} are considered to have high
-      likelihood to be ${sensitivityType}.`}
+      note={(
+        <span>
+          These are a score developed by the Talkowski lab [
+          <a href="https://pubmed.ncbi.nlm.nih.gov/35917817" target="_blank" rel="noreferrer">
+            Collins et al. 2022
+          </a>
+          ] that predict whether a gene is &nbsp;
+          {sensitivityType}
+          &nbsp; based on large chromosomal microarray data set analysis. Scores &gt;
+          {threshold}
+          &nbsp; are considered to have high likelihood to be &nbsp;
+          {sensitivityType}
+        </span>
+      )}
     />
   </div>
 )
@@ -161,8 +172,10 @@ DosageSensitivity.propTypes = {
   gene: PropTypes.object,
 }
 
-const HAPLOINSUFFICIENT_FIELDS = [{ field: 'phi', label: 'pHI-score' }]
-const TRIPLOSENSITIVE_FIELDS = [{ field: 'pts', label: 'pTS-score' }]
+export const HI_THRESHOLD = 0.86
+export const TS_THRESHOLD = 0.94
+const HAPLOINSUFFICIENT_FIELDS = [{ field: 'phi', label: 'pHaplo' }]
+const TRIPLOSENSITIVE_FIELDS = [{ field: 'pts', label: 'pTriplo' }]
 const STAT_DETAILS = [
   { title: 'Coding Size', content: gene => `${((gene.codingRegionSizeGrch38 || gene.codingRegionSizeGrch37) / 1000).toPrecision(2)}kb` },
   {
@@ -204,7 +217,7 @@ const STAT_DETAILS = [
         clingenField="haploinsufficiency"
         scoreFields={HAPLOINSUFFICIENT_FIELDS}
         sensitivityType="haploinsufficient"
-        threshold="0.84"
+        threshold={HI_THRESHOLD}
       />
     ),
   },
@@ -216,7 +229,7 @@ const STAT_DETAILS = [
         clingenField="triplosensitivity"
         scoreFields={TRIPLOSENSITIVE_FIELDS}
         sensitivityType="triplosensitive"
-        threshold="0.993"
+        threshold={TS_THRESHOLD}
       />
     ),
   },
diff --git a/ui/shared/components/panel/variants/VariantGene.jsx b/ui/shared/components/panel/variants/VariantGene.jsx
index 298a79402f..dd9c20862c 100644
--- a/ui/shared/components/panel/variants/VariantGene.jsx
+++ b/ui/shared/components/panel/variants/VariantGene.jsx
@@ -17,14 +17,12 @@ import { InlineHeader, NoBorderTable, ButtonLink, ColoredLabel } from '../../Sty
 import { GeneSearchLink } from '../../buttons/SearchResultsLink'
 import ShowGeneModal from '../../buttons/ShowGeneModal'
 import Modal from '../../modal/Modal'
-import { GenCC, ClingenLabel } from '../genes/GeneDetail'
+import { GenCC, ClingenLabel, HI_THRESHOLD, TS_THRESHOLD } from '../genes/GeneDetail'
 import { getRnaSeqOutilerDataByFamilyGene } from './selectors'
 
 const RnaSeqTpm = React.lazy(() => import('./RnaSeqTpm'))
 
 const CONSTRAINED_GENE_RANK_THRESHOLD = 1000
-const HI_THRESHOLD = 0.84
-const TS_THRESHOLD = 0.993
 
 const BaseGeneLabelContent = styled(({ color, customColor, label, maxWidth, dispatch, ...props }) => {
   const labelProps = {
@@ -352,8 +350,8 @@ const GENE_DETAIL_SECTIONS = [
     label: 'HI',
     showDetails: gene => gene.cnSensitivity.phi && gene.cnSensitivity.phi > HI_THRESHOLD,
     detailsDisplay: gene => (
-      `These are a score under development by the Talkowski lab that predict whether a gene is haploinsufficient based 
-      on large chromosomal microarray data set analysis. Scores >0.84 are considered to have high likelihood to be 
+      `These are a score developed by the Talkowski lab that predict whether a gene is haploinsufficient based 
+      on large chromosomal microarray data set analysis. Scores >${HI_THRESHOLD} are considered to have high likelihood to be 
       haploinsufficient. This gene has a score of ${gene.cnSensitivity.phi.toPrecision(4)}.`),
   },
   {
@@ -362,8 +360,8 @@ const GENE_DETAIL_SECTIONS = [
     label: 'TS',
     showDetails: gene => gene.cnSensitivity.pts && gene.cnSensitivity.pts > TS_THRESHOLD,
     detailsDisplay: gene => (
-      `These are a score under development by the Talkowski lab that predict whether a gene is triplosensitive based on
-       large chromosomal microarray dataset analysis. Scores >0.993 are considered to have high likelihood to be 
+      `These are a score developed by the Talkowski lab that predict whether a gene is triplosensitive based on
+       large chromosomal microarray dataset analysis. Scores >${TS_THRESHOLD} are considered to have high likelihood to be 
        triplosensitive. This gene has a score of ${gene.cnSensitivity.pts.toPrecision(4)}.`),
   },
   {

From 6a505bf31ce6512405a20ccf04456e10445d18cb Mon Sep 17 00:00:00 2001
From: Shifa Zhang <zhangshifa07504@gmail.com>
Date: Fri, 4 Nov 2022 16:36:50 -0400
Subject: [PATCH 69/96] Update the hover table and the header text.

---
 .../components/panel/variants/VariantGene.jsx | 30 +++++++++++--------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/ui/shared/components/panel/variants/VariantGene.jsx b/ui/shared/components/panel/variants/VariantGene.jsx
index 9eab7ed815..4e9f1a8c89 100644
--- a/ui/shared/components/panel/variants/VariantGene.jsx
+++ b/ui/shared/components/panel/variants/VariantGene.jsx
@@ -322,7 +322,7 @@ const INDIVIDUAL_NAME_COLUMN = { name: 'individualName', content: '', format: ({
 const RNA_SEQ_COLUMNS = [
   INDIVIDUAL_NAME_COLUMN,
   ...RNA_SEQ_DETAIL_FIELDS.map(name => (
-    { name, content: camelcaseToTitlecase(name).replace(' ', '-'), format: row => row[name].toPrecision(3) }
+    { name, content: camelcaseToTitlecase(name), format: row => row[name].toPrecision(3) }
   )),
 ]
 
@@ -345,7 +345,7 @@ const PHENOTYPE_GENE_INFO_COLUMNS = [
     content: 'Scores',
     format: ({ scores }) => Object.keys(scores).sort().map(scoreName => (
       <div key={scoreName}>
-        <b>{camelcaseToTitlecase(scoreName).replace(' ', '-')}</b>
+        <b>{camelcaseToTitlecase(scoreName)}</b>
         : &nbsp;
         { scores[scoreName].toPrecision(3) }
       </div>
@@ -353,14 +353,7 @@ const PHENOTYPE_GENE_INFO_COLUMNS = [
   },
 ]
 
-const getDataTable = props => (
-  <DataTable
-    basic="very"
-    compact="very"
-    singleLine
-    {...props}
-  />
-)
+const hoverTableProps = { basic: 'very', compact: 'very', singleLine: true }
 
 const GENE_DETAIL_SECTIONS = [
   {
@@ -420,7 +413,12 @@ const GENE_DETAIL_SECTIONS = [
     detailsDisplay: (gene, indivGeneData) => (
       <div>
         This gene is flagged as an outlier for RNA-Seq in the following samples
-        {getDataTable({ data: indivGeneData.rnaSeqData[gene.geneId], idField: 'individualName', columns: RNA_SEQ_COLUMNS })}
+        <DataTable
+          {...hoverTableProps}
+          data={indivGeneData.rnaSeqData[gene.geneId]}
+          idField="individualName"
+          columns={RNA_SEQ_COLUMNS}
+        />
       </div>
     ),
   },
@@ -432,7 +430,15 @@ const GENE_DETAIL_SECTIONS = [
     detailsDisplay: (gene, indivGeneData) => (Object.entries(indivGeneData.phenotypeGeneScores[gene.geneId]).map(
       ([tool, data]) => ({
         label: tool.toUpperCase(),
-        detail: getDataTable({ data, idField: 'rowId', columns: PHENOTYPE_GENE_INFO_COLUMNS, defaultSortColumn: 'rank' }),
+        detail: (
+          <DataTable
+            {...hoverTableProps}
+            data={data}
+            idField="rowId"
+            columns={PHENOTYPE_GENE_INFO_COLUMNS}
+            defaultSortColumn="rank"
+          />
+        ),
       }),
     )),
   },

From dd89b87edb92d8f90b409349c5edfc6937ffe101 Mon Sep 17 00:00:00 2001
From: Shifa Zhang <zhangshifa07504@gmail.com>
Date: Fri, 4 Nov 2022 16:53:11 -0400
Subject: [PATCH 70/96] Add a blank line for codacy.

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index cd766b0440..9a1097a26d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,10 +2,10 @@
 
 ## dev
 * Add PhenotypePrioritization model (REQUIRES DB MIGRATION)
+
 * Add Refseq and MANE transcript info (REQUIRES DB MIGRATION)
   * To add new data, run the `update_gencode_transcripts` and `update_refseq` commands
 
-
 ## 10/13/22
 * Link MME submissions to saved variants (REQUIRES DB MIGRATION)
 

From bfd86e159c6a9ace6a8fb601091b6d6667ea0444 Mon Sep 17 00:00:00 2001
From: sjahl <636687+sjahl@users.noreply.github.com>
Date: Mon, 7 Nov 2022 10:52:47 -0500
Subject: [PATCH 71/96] Delete servctl artifacts and unused k8s manifests

---
 deploy/__init__.py                            |   0
 deploy/docker/curator/Dockerfile              |  17 -
 deploy/docker/curator/curator.list            |   1 -
 deploy/docker/redis/Dockerfile                |   9 -
 deploy/docker/redis/bashrc                    |  11 -
 deploy/docker/redis/redis.conf                |   3 -
 .../configure-snapshot-repo.yaml              |  38 --
 .../elasticsearch/elasticsearch.gcloud.yaml   | 240 ------------
 .../persistent-volumes/es-data.yaml           |  28 --
 .../elasticsearch/snapshot-cronjob.yaml       | 103 -----
 deploy/kubernetes/gcloud-dev-settings.yaml    |  43 --
 deploy/kubernetes/gcloud-prod-settings.yaml   |  34 --
 deploy/kubernetes/kibana/kibana.gcloud.yaml   |  33 --
 deploy/kubernetes/namespace.yaml              |   6 -
 deploy/kubernetes/redis/redis.gcloud.yaml     |  63 ---
 deploy/kubernetes/seqr/seqr.gcloud.yaml       | 261 -------------
 deploy/kubernetes/shared-settings.yaml        |  52 ---
 deploy/secrets/gcloud/.gitignore              |   2 -
 deploy/secrets/shared/gcloud/README.md        |  15 -
 deploy/secrets/shared/gcloud/boto             |  13 -
 .../shared/gcloud/service-account-key.json    |  12 -
 deploy/servctl_utils/__init__.py              |   0
 deploy/servctl_utils/deploy_command_utils.py  | 368 ------------------
 deploy/servctl_utils/kubectl_utils.py         | 152 --------
 deploy/servctl_utils/other_command_utils.py   |  66 ----
 deploy/servctl_utils/shell_utils.py           |  89 -----
 deploy/servctl_utils/yaml_settings_utils.py   |  96 -----
 servctl                                       |  90 -----
 28 files changed, 1845 deletions(-)
 delete mode 100644 deploy/__init__.py
 delete mode 100644 deploy/docker/curator/Dockerfile
 delete mode 100644 deploy/docker/curator/curator.list
 delete mode 100644 deploy/docker/redis/Dockerfile
 delete mode 100644 deploy/docker/redis/bashrc
 delete mode 100644 deploy/docker/redis/redis.conf
 delete mode 100644 deploy/kubernetes/elasticsearch/configure-snapshot-repo.yaml
 delete mode 100644 deploy/kubernetes/elasticsearch/elasticsearch.gcloud.yaml
 delete mode 100644 deploy/kubernetes/elasticsearch/persistent-volumes/es-data.yaml
 delete mode 100644 deploy/kubernetes/elasticsearch/snapshot-cronjob.yaml
 delete mode 100644 deploy/kubernetes/gcloud-dev-settings.yaml
 delete mode 100644 deploy/kubernetes/gcloud-prod-settings.yaml
 delete mode 100644 deploy/kubernetes/kibana/kibana.gcloud.yaml
 delete mode 100644 deploy/kubernetes/namespace.yaml
 delete mode 100644 deploy/kubernetes/redis/redis.gcloud.yaml
 delete mode 100644 deploy/kubernetes/seqr/seqr.gcloud.yaml
 delete mode 100644 deploy/kubernetes/shared-settings.yaml
 delete mode 100644 deploy/secrets/gcloud/.gitignore
 delete mode 100644 deploy/secrets/shared/gcloud/README.md
 delete mode 100644 deploy/secrets/shared/gcloud/boto
 delete mode 100644 deploy/secrets/shared/gcloud/service-account-key.json
 delete mode 100644 deploy/servctl_utils/__init__.py
 delete mode 100644 deploy/servctl_utils/deploy_command_utils.py
 delete mode 100644 deploy/servctl_utils/kubectl_utils.py
 delete mode 100644 deploy/servctl_utils/other_command_utils.py
 delete mode 100644 deploy/servctl_utils/shell_utils.py
 delete mode 100644 deploy/servctl_utils/yaml_settings_utils.py
 delete mode 100755 servctl

diff --git a/deploy/__init__.py b/deploy/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/deploy/docker/curator/Dockerfile b/deploy/docker/curator/Dockerfile
deleted file mode 100644
index cb1465b9c1..0000000000
--- a/deploy/docker/curator/Dockerfile
+++ /dev/null
@@ -1,17 +0,0 @@
-FROM debian:stable
-
-SHELL ["/bin/bash", "-o", "pipefail", "-c"]
-RUN apt-get update && apt-get install -y --no-install-recommends \
-    ca-certificates \
-    curl \
-    gnupg2 \
-    && apt-get clean \
-    && rm -rf /var/lib/apt/lists/*
-
-RUN curl -s https://packages.elastic.co/GPG-KEY-elasticsearch | gpg --dearmor | tee /etc/apt/trusted.gpg.d/curator-stable.gpg
-COPY curator.list /etc/apt/sources.list.d/curator.list
-
-RUN apt-get update && apt-get install -y --no-install-recommends \
-    elasticsearch-curator \
-    && apt-get clean \
-    && rm -rf /var/lib/apt/lists/*
diff --git a/deploy/docker/curator/curator.list b/deploy/docker/curator/curator.list
deleted file mode 100644
index 5054643fe6..0000000000
--- a/deploy/docker/curator/curator.list
+++ /dev/null
@@ -1 +0,0 @@
-deb [arch=amd64] https://packages.elastic.co/curator/5/debian stable main
diff --git a/deploy/docker/redis/Dockerfile b/deploy/docker/redis/Dockerfile
deleted file mode 100644
index 48fae152a2..0000000000
--- a/deploy/docker/redis/Dockerfile
+++ /dev/null
@@ -1,9 +0,0 @@
-FROM redis:6.2.6
-
-LABEL maintainer="Broad TGG"
-
-COPY bashrc /root/.bashrc
-
-COPY redis.conf /usr/local/etc/redis/redis.conf
-
-CMD [ "redis-server", "/usr/local/etc/redis/redis.conf" ]
\ No newline at end of file
diff --git a/deploy/docker/redis/bashrc b/deploy/docker/redis/bashrc
deleted file mode 100644
index 2aceb99195..0000000000
--- a/deploy/docker/redis/bashrc
+++ /dev/null
@@ -1,11 +0,0 @@
-# shell
-export PS1="\h:\w]$ "
-
-export SHELL=/bin/bash
-
-export LS_OPTIONS='--color=auto'
-alias ll="ls -al"
-alias less='less -m -g -i--underline-special --SILENT'
-alias more='less'
-
-export TERM=xterm
\ No newline at end of file
diff --git a/deploy/docker/redis/redis.conf b/deploy/docker/redis/redis.conf
deleted file mode 100644
index dd43a886e9..0000000000
--- a/deploy/docker/redis/redis.conf
+++ /dev/null
@@ -1,3 +0,0 @@
-maxmemory-policy allkeys-lru
-maxmemory 12gb
-bind 0.0.0.0
diff --git a/deploy/kubernetes/elasticsearch/configure-snapshot-repo.yaml b/deploy/kubernetes/elasticsearch/configure-snapshot-repo.yaml
deleted file mode 100644
index bdda012847..0000000000
--- a/deploy/kubernetes/elasticsearch/configure-snapshot-repo.yaml
+++ /dev/null
@@ -1,38 +0,0 @@
-apiVersion: batch/v1
-kind: Job
-metadata:
-  name: configure-es-snapshot-repo
-  labels:
-    name: configure-es-snapshot-repo
-spec:
-  backoffLimit: 3
-  template:
-    spec:
-      containers:
-      - name: addrepo
-        image: curlimages/curl:7.75.0
-        env:
-        - name: ES_PASSWORD
-          valueFrom:
-            secretKeyRef:
-              name: kibana-secrets
-              key: elasticsearch.password
-        - name: ELASTICSEARCH_SERVICE_HOSTNAME
-          value: "{{ ELASTICSEARCH_SERVICE_HOSTNAME }}"
-        - name: ES_SNAPSHOTS_BUCKET
-          value: "{{ ES_SNAPSHOTS_BUCKET }}"
-        args:
-        - /bin/sh
-        - -c
-        - |
-            curl -u "kibana:$ES_PASSWORD" -X PUT "${ELASTICSEARCH_SERVICE_HOSTNAME}:9200/_snapshot/snapshot_storage?pretty" -H 'Content-Type: application/json' --data @- <<EOF
-            {
-              "type": "gcs",
-              "settings": {
-                "bucket": "${ES_SNAPSHOTS_BUCKET}",
-                "client": "default",
-                "compress": true
-              }
-            }
-            EOF
-      restartPolicy: Never
diff --git a/deploy/kubernetes/elasticsearch/elasticsearch.gcloud.yaml b/deploy/kubernetes/elasticsearch/elasticsearch.gcloud.yaml
deleted file mode 100644
index a50300a9c8..0000000000
--- a/deploy/kubernetes/elasticsearch/elasticsearch.gcloud.yaml
+++ /dev/null
@@ -1,240 +0,0 @@
-apiVersion: elasticsearch.k8s.elastic.co/v1
-kind: Elasticsearch
-metadata:
-  name: elasticsearch
-  namespace: {{ NAMESPACE }}
-  labels:
-    name: elasticsearch
-    deployment: {{ DEPLOY_TO }}
-spec:
-  version: {{ ELASTICSEARCH_VERSION }}
-  auth:
-    roles:
-      - secretName: elasticsearch-secrets
-    fileRealm:
-      - secretName: elasticsearch-secrets
-  http:
-    tls:
-      selfSignedCertificate:
-        disabled: true
-  {% if ES_CONFIGURE_SNAPSHOTS %}
-  secureSettings:
-    - secretName: es-snapshot-gcs-secrets
-  {% endif %}
-  nodeSets:
-  - name: master-node
-    count: {{ ES_MASTER_NUM_PODS }}
-    config:
-      node.master: true
-      node.data: false
-      node.ingest: false
-      node.ml: false
-      node.remote_cluster_client: false
-      cluster.routing.allocation.disk.watermark.low: "91%"
-      cluster.routing.allocation.disk.watermark.high: "95%"
-      cluster.routing.allocation.disk.watermark.flood_stage: "97%"
-    podTemplate:
-      metadata:
-        labels:
-          name: elasticsearch
-          deployment: {{ DEPLOY_TO }}
-      spec:
-        affinity:
-          nodeAffinity:
-            requiredDuringSchedulingIgnoredDuringExecution:
-              nodeSelectorTerms:
-              - matchExpressions:
-                - key: nodeType
-                  operator: NotIn
-                  values:
-                  - data
-        initContainers:
-          - name: sysctl
-            securityContext:
-              privileged: true
-            command: ['sh', '-c', 'sysctl -w vm.max_map_count=262144']
-          - name: install-plugins
-            command: ['sh', '-c', 'bin/elasticsearch-plugin install --batch repository-gcs']
-        containers:
-        - name: elasticsearch
-          imagePullPolicy: {{ IMAGE_PULL_POLICY }}
-    volumeClaimTemplates:
-        - metadata:
-            name: elasticsearch-data
-          spec:
-            accessModes:
-              - ReadWriteOnce
-            storageClassName: standard
-            resources:
-              requests:
-                storage: {{ ES_POD_VOLUME_SIZE }}
-  - name: client-node
-    count: {{ ES_CLIENT_NUM_PODS }}
-    config:
-      node.master: false
-      node.data: false
-      node.ingest: false
-      node.ml: false
-      node.remote_cluster_client: false
-      cluster.routing.allocation.disk.watermark.low: "91%"
-      cluster.routing.allocation.disk.watermark.high: "95%"
-      cluster.routing.allocation.disk.watermark.flood_stage: "97%"
-    podTemplate:
-      metadata:
-        labels:
-          name: elasticsearch
-          deployment: {{ DEPLOY_TO }}
-      spec:
-        affinity:
-          nodeAffinity:
-            requiredDuringSchedulingIgnoredDuringExecution:
-              nodeSelectorTerms:
-              - matchExpressions:
-                - key: nodeType
-                  operator: NotIn
-                  values:
-                  - data
-        initContainers:
-          - name: sysctl
-            securityContext:
-              privileged: true
-            command: ['sh', '-c', 'sysctl -w vm.max_map_count=262144']
-          - name: install-plugins
-            command: ['sh', '-c', 'bin/elasticsearch-plugin install --batch repository-gcs']
-        containers:
-        - name: elasticsearch
-          imagePullPolicy: {{ IMAGE_PULL_POLICY }}
-    volumeClaimTemplates:
-        - metadata:
-            name: elasticsearch-data
-          spec:
-            accessModes:
-              - ReadWriteOnce
-            storageClassName: standard
-            resources:
-              requests:
-                storage: {{ ES_POD_VOLUME_SIZE }}
-  - name: data
-    count:  {{ ES_DATA_NUM_PODS }}
-    config:
-      node.master: false
-      node.data: true
-      node.ingest: false
-      node.ml: false
-      node.remote_cluster_client: false
-      cluster.routing.allocation.disk.watermark.low: "91%"
-      cluster.routing.allocation.disk.watermark.high: "95%"
-      cluster.routing.allocation.disk.watermark.flood_stage: "97%"
-      cluster.routing.allocation.total_shards_per_node: {{ ELASTICSEARCH_MAX_SHARDS_PER_NODE }}
-    podTemplate:
-      metadata:
-        labels:
-          name: elasticsearch
-          deployment: {{ DEPLOY_TO }}
-      spec:
-      {% if ES_DATA_NUM_PODS > 1 %}
-        affinity:
-          nodeAffinity:
-            requiredDuringSchedulingIgnoredDuringExecution:
-              nodeSelectorTerms:
-              - matchExpressions:
-                - key: nodeType
-                  operator: In
-                  values:
-                  - data
-      {% endif %}
-        initContainers:
-        - name: sysctl
-          securityContext:
-            privileged: true
-          command: ['sh', '-c', 'sysctl -w vm.max_map_count=262144']
-        - name: install-plugins
-          command: ['sh', '-c', 'bin/elasticsearch-plugin install --batch repository-gcs']
-        containers:
-        - name: elasticsearch
-          imagePullPolicy: {{ IMAGE_PULL_POLICY }}
-          env:
-          - name: ES_JAVA_OPTS
-            value: "-Xms{{ ELASTICSEARCH_JVM_MEMORY }} -Xmx{{ ELASTICSEARCH_JVM_MEMORY }}"
-          resources:
-            requests:
-              memory: {{ ELASTICSEARCH_CONTAINER_MEMORY }}
-            limits:
-              memory: {{ ELASTICSEARCH_CONTAINER_MEMORY }}
-    volumeClaimTemplates:
-    - metadata:
-        name: elasticsearch-data
-      spec:
-        accessModes:
-          - ReadWriteOnce
-        storageClassName: {{ ES_DATA_DISK_TYPE }}-storage-class
-        resources:
-          requests:
-            storage: {{ ES_DATA_DISK_SIZE }}
-{% if ES_LOADING_NUM_PODS > 0 %}
-  - name: data-loading-node
-    count: {{ ES_LOADING_NUM_PODS }}
-    config:
-      node.master: false
-      node.data: true
-      node.ingest: true
-      node.ml: false
-      node.remote_cluster_client: false
-      cluster.routing.allocation.disk.watermark.low: "91%"
-      cluster.routing.allocation.disk.watermark.high: "95%"
-      cluster.routing.allocation.disk.watermark.flood_stage: "97%"
-    podTemplate:
-      metadata:
-        labels:
-          name: elasticsearch
-          deployment: {{ DEPLOY_TO }}
-      spec:
-        affinity:
-          nodeAffinity:
-            requiredDuringSchedulingIgnoredDuringExecution:
-              nodeSelectorTerms:
-              - matchExpressions:
-                - key: nodeType
-                  operator: NotIn
-                  values:
-                  - data
-        initContainers:
-          - name: sysctl
-            securityContext:
-              privileged: true
-            command: ['sh', '-c', 'sysctl -w vm.max_map_count=262144']
-          - name: install-plugins
-            command: ['sh', '-c', 'bin/elasticsearch-plugin install --batch repository-gcs']
-        containers:
-        - name: elasticsearch
-          imagePullPolicy: {{ IMAGE_PULL_POLICY }}
-    volumeClaimTemplates:
-        - metadata:
-            name: elasticsearch-data
-          spec:
-            accessModes:
-              - ReadWriteOnce
-            storageClassName: standard
-            resources:
-              requests:
-                storage: {{ ES_LOADING_DISK_SIZE }}
-{% endif %}
----
-apiVersion: v1
-kind: Service
-metadata:
-  name: elasticsearch-es-data-nodes
-  annotations:
-    cloud.google.com/load-balancer-type: "Internal"
-spec:
-  type: LoadBalancer
-{% if ES_DATA_SERVICE_IP %}
-  loadBalancerIP: {{ ES_DATA_SERVICE_IP }}
-{% endif %}
-  ports:
-    - name: http
-      port: 9200
-      targetPort: 9200
-  selector:
-    elasticsearch.k8s.elastic.co/cluster-name: elasticsearch
-    elasticsearch.k8s.elastic.co/node-data: "true"
diff --git a/deploy/kubernetes/elasticsearch/persistent-volumes/es-data.yaml b/deploy/kubernetes/elasticsearch/persistent-volumes/es-data.yaml
deleted file mode 100644
index 1255e1fb29..0000000000
--- a/deploy/kubernetes/elasticsearch/persistent-volumes/es-data.yaml
+++ /dev/null
@@ -1,28 +0,0 @@
-kind: StorageClass
-apiVersion: storage.k8s.io/v1beta1
-metadata:
-  name: {{ ES_DATA_DISK_TYPE }}-storage-class
-  namespace: {{ NAMESPACE }}
-provisioner: kubernetes.io/gce-pd
-parameters:
-  type: {{ ES_DATA_DISK_TYPE }}
-  zone: us-central1-b
----
-apiVersion: v1
-kind: PersistentVolume
-metadata:
-  name: {{ DISK_NAME }}
-  namespace: {{ NAMESPACE }}
-  labels:
-    name: elasticsearch
-    deployment: {{ DEPLOY_TO }}
-spec:
-  capacity:
-    storage: {{ ES_DATA_DISK_SIZE }}
-  accessModes:
-    - ReadWriteOnce
-  persistentVolumeReclaimPolicy: Retain
-  storageClassName: {{ ES_DATA_DISK_TYPE }}-storage-class
-  gcePersistentDisk:
-    fsType: ext4
-    pdName: {{ DISK_NAME }}
\ No newline at end of file
diff --git a/deploy/kubernetes/elasticsearch/snapshot-cronjob.yaml b/deploy/kubernetes/elasticsearch/snapshot-cronjob.yaml
deleted file mode 100644
index 9ac946bdd4..0000000000
--- a/deploy/kubernetes/elasticsearch/snapshot-cronjob.yaml
+++ /dev/null
@@ -1,103 +0,0 @@
----
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: curator-config
-  labels:
-    app: es-monthly-snapshotter
-data:
-  curator_config.yaml: |
-    ---
-    client:
-      hosts:
-        - ${ELASTICSEARCH_SERVICE_HOSTNAME}
-      port: 9200
-      use_ssl: False
-      username: kibana
-      password: ${ES_PASSWORD}
-      timeout: 30
-      master_only: False
-
-    logging:
-      loglevel: INFO
-      logfile:
-      logformat: json
-      blacklist: ['elasticsearch', 'urllib3']
-  curator_actions.yaml: |
-    ---
-    actions:
-      1:
-        action: snapshot
-        description: >-
-          Create a snapshot of the current set of indices. The filtertype
-          of 'none' tells the snapshot to contain all indices.
-        options:
-          repository: snapshot_storage
-          wait_for_completion: True
-          name: snapshot-%Y.%m.%d
-        filters:
-        - filtertype: none
-      2:
-        action: delete_snapshots
-        description: >-
-          Delete snapshots older than 45 days from the snapshot_storage repository
-          (based on creation_date), for 'snapshot-' prefixed snapshots.
-        options:
-          repository: snapshot_storage
-          disable_action: False
-          timeout_override: 3600
-        filters:
-        - filtertype: pattern
-          kind: prefix
-          value: snapshot-
-          exclude:
-        - filtertype: age
-          source: creation_date
-          direction: older
-          unit: days
-          unit_count: 45
----
-apiVersion: batch/v1beta1
-kind: CronJob
-metadata:
-  name: es-monthly-snapshotter
-  labels:
-    name: es-monthly-snapshotter
-spec:
-  schedule: "@monthly"
-  concurrencyPolicy: Forbid
-  jobTemplate:
-    spec:
-      template:
-        metadata:
-          labels:
-            app: es-monthly-snapshotter
-        spec:
-          volumes:
-            - name: curator-yamls
-              configMap:
-                name: curator-config
-          containers:
-          - name: es-monthly-snapshotter
-            image: {{ DOCKER_IMAGE_PREFIX }}/curator{{ DOCKER_IMAGE_TAG }}
-            imagePullPolicy: {{ IMAGE_PULL_POLICY }}
-            volumeMounts:
-            - name: curator-yamls
-              mountPath: /curator/configs
-            env:
-            - name: LC_ALL
-              value: C.UTF-8
-            - name: LANG
-              value: C.UTF-8
-            - name: ES_PASSWORD
-              valueFrom:
-                secretKeyRef:
-                  name: kibana-secrets
-                  key: elasticsearch.password
-            - name: ELASTICSEARCH_SERVICE_HOSTNAME
-              value: "{{ ELASTICSEARCH_SERVICE_HOSTNAME }}"
-            args:
-            - /bin/sh
-            - -c
-            - '/usr/bin/curator --config /curator/configs/curator_config.yaml /curator/configs/curator_actions.yaml'
-          restartPolicy: OnFailure
diff --git a/deploy/kubernetes/gcloud-dev-settings.yaml b/deploy/kubernetes/gcloud-dev-settings.yaml
deleted file mode 100644
index d6cc5e756a..0000000000
--- a/deploy/kubernetes/gcloud-dev-settings.yaml
+++ /dev/null
@@ -1,43 +0,0 @@
-#####################################################
-####### SETTINGS THAT MAY NEED TO BE MODIFIED #######
-
-CLUSTER_NAME: 'seqr-cluster-dev'
-
-CLUSTER_EXTERNAL_IP: '104.198.135.79'
-CLUSTER_HOSTNAME: 'seqr-dev.broadinstitute.org'
-
-GUNICORN_WORKER_THREADS: 4
-
-SEQR_POD_MEMORY_LIMIT: 15Gi
-REDIS_POD_MEMORY_LIMIT: 8Gi
-
-TERRA_API_ROOT_URL: 'https://firecloud-orchestration.dsde-dev.broadinstitute.org'
-
-CLOUDSQL_AVAILABILITY_TYPE: 'zonal'
-GCS_MEDIA_ROOT_BUCKET: 'seqr-media-storage-public-dev'
-
-####  Elasticsearch settings ####
-
-ES_DATA_NUM_PODS: 1
-ES_CLIENT_NUM_PODS: 1
-ES_MASTER_NUM_PODS: 1
-ES_LOADING_NUM_PODS: 0
-
-ELASTICSEARCH_JVM_MEMORY: 1g
-ELASTICSEARCH_CONTAINER_MEMORY: 2Gi
-
-ES_DATA_DISK_TYPE: pd-standard
-ES_DATA_DISK_SIZE: 10
-ES_POD_VOLUME_SIZE: '1Gi'
-
-ES_CONFIGURE_SNAPSHOTS: true
-ES_SNAPSHOTS_ACCOUNT_NAME: seqr-es-snapshots-dev
-ES_SNAPSHOTS_BUCKET: seqr-es-7-snapshots-dev
-
-###########################################################
-####### SETTINGS THAT SHOULDN'T NEED TO BE MODIFIED #######
-
-NAMESPACE: gcloud-dev
-
-DEPLOY_TO: 'gcloud-dev'
-DEPLOYMENT_TYPE: 'dev'
diff --git a/deploy/kubernetes/gcloud-prod-settings.yaml b/deploy/kubernetes/gcloud-prod-settings.yaml
deleted file mode 100644
index 66b2084c9e..0000000000
--- a/deploy/kubernetes/gcloud-prod-settings.yaml
+++ /dev/null
@@ -1,34 +0,0 @@
-#####################################################
-####### SETTINGS THAT MAY NEED TO BE MODIFIED #######
-
-CLUSTER_NAME: 'seqr-cluster-prod'
-
-CLUSTER_EXTERNAL_IP: '35.194.17.244'
-CLUSTER_HOSTNAME: 'seqr.broadinstitute.org'
-
-ES_DATA_SERVICE_IP: 10.128.0.52
-ES_CONFIGURE_SNAPSHOTS: true
-ES_SNAPSHOTS_ACCOUNT_NAME: seqr-es-snapshots-prod
-ES_SNAPSHOTS_BUCKET: seqr-es-7-snapshots-prod
-
-RUN_CRON_JOBS: true
-
-GUNICORN_WORKER_THREADS: 20    # number of webserver threads
-
-SEQR_POD_MEMORY_LIMIT: 35Gi
-REDIS_POD_MEMORY_LIMIT: 10Gi
-
-TERRA_API_ROOT_URL: 'https://api.firecloud.org'
-AIRFLOW_WEBSERVER_URL: 'https://rf1b81cf3e6c1c7e7p-tp.appspot.com'
-
-CLOUDSQL_AVAILABILITY_TYPE: 'regional'
-
-GCS_MEDIA_ROOT_BUCKET: 'seqr-media-storage-public-prod'
-
-###########################################################
-####### SETTINGS THAT SHOULDN'T NEED TO BE MODIFIED #######
-
-NAMESPACE: default
-
-DEPLOY_TO: 'gcloud-prod'
-DEPLOYMENT_TYPE: 'prod'
diff --git a/deploy/kubernetes/kibana/kibana.gcloud.yaml b/deploy/kubernetes/kibana/kibana.gcloud.yaml
deleted file mode 100644
index a93c57ecd4..0000000000
--- a/deploy/kubernetes/kibana/kibana.gcloud.yaml
+++ /dev/null
@@ -1,33 +0,0 @@
-apiVersion: kibana.k8s.elastic.co/v1
-kind: Kibana
-metadata:
-  name: kibana
-  namespace: {{ NAMESPACE }}
-  labels:
-    name: kibana
-    deployment: {{ DEPLOY_TO }}
-spec:
-  version: {{ ELASTICSEARCH_VERSION }}
-  count: 1
-  elasticsearchRef:
-    name: elasticsearch
-    namespace: {{ NAMESPACE }}
-  config:
-    xpack.security.sameSiteCookies: None
-    xpack.security.secureCookies: true
-    server.customResponseHeaders: {"x-frame-options":"sameorigin"}
-  podTemplate:
-    metadata:
-      annotations:
-            linkerd.io/inject: enabled
-      labels:
-        name: kibana
-        deployment: {{ DEPLOY_TO }}
-    spec:
-      automountServiceAccountToken: true
-      nodeSelector:
-        cloud.google.com/gke-nodepool: 'default-pool'
-  http:
-    tls:
-      selfSignedCertificate:
-        disabled: true
diff --git a/deploy/kubernetes/namespace.yaml b/deploy/kubernetes/namespace.yaml
deleted file mode 100644
index 36bb8aa25c..0000000000
--- a/deploy/kubernetes/namespace.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-# isolates secrets and other objects between different environments (gcloud-dev, gcloud-prod, etc)
-# see: https://kubernetes.io/blog/2015/08/using-kubernetes-namespaces-to-manage/
-apiVersion: v1
-kind: Namespace
-metadata:
-  name: {{ NAMESPACE }}
diff --git a/deploy/kubernetes/redis/redis.gcloud.yaml b/deploy/kubernetes/redis/redis.gcloud.yaml
deleted file mode 100644
index 23ad7e8533..0000000000
--- a/deploy/kubernetes/redis/redis.gcloud.yaml
+++ /dev/null
@@ -1,63 +0,0 @@
-kind: Service
-apiVersion: v1
-metadata:
-  name: redis
-  namespace: {{ NAMESPACE }}
-  labels:
-    name: redis
-    deployment: {{ DEPLOY_TO }}
-spec:
-  ports:
-  - port: {{ REDIS_SERVICE_PORT }}
-  selector:
-    name: redis
----
-kind: Deployment
-apiVersion: apps/v1
-metadata:
-  name: redis
-  namespace: {{ NAMESPACE }}
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      name: redis
-  template:
-    metadata:
-      annotations:
-        linkerd.io/inject: enabled
-        config.linkerd.io/opaque-ports: "6379"
-      labels:
-        name: redis
-        deployment: {{ DEPLOY_TO }}
-    spec:
-      containers:
-      - name: redis-pod
-        ports:
-        - containerPort: {{ REDIS_SERVICE_PORT }}
-          protocol: TCP
-        imagePullPolicy: {{ IMAGE_PULL_POLICY }}
-        image: {{ DOCKER_IMAGE_PREFIX }}/redis{{ DOCKER_IMAGE_TAG }}
-        resources:
-          requests:
-            memory: "0.25Gi"
-            cpu: "0.01"
-          limits:
-            memory: "{{ REDIS_POD_MEMORY_LIMIT }}"
-            cpu: "1"
-      restartPolicy: Always
-      dnsPolicy: ClusterFirst
-      nodeSelector:
-        cloud.google.com/gke-nodepool: 'default-pool'
-      affinity:
-        podAffinity:
-          preferredDuringSchedulingIgnoredDuringExecution:
-            - weight: 1.0
-              podAffinityTerm:
-                labelSelector:
-                  matchExpressions:
-                    - key: "name"
-                      operator: In
-                      values:
-                      - seqr
-                topologyKey: "kubernetes.io/hostname"
diff --git a/deploy/kubernetes/seqr/seqr.gcloud.yaml b/deploy/kubernetes/seqr/seqr.gcloud.yaml
deleted file mode 100644
index be406b86d7..0000000000
--- a/deploy/kubernetes/seqr/seqr.gcloud.yaml
+++ /dev/null
@@ -1,261 +0,0 @@
-kind: Service
-apiVersion: v1
-metadata:
-  name: seqr
-  namespace: {{ NAMESPACE }}
-  labels:
-    name: seqr
-    deployment: {{ DEPLOY_TO }}
-spec:
-  type: NodePort
-  ports:
-  - port: {{ SEQR_SERVICE_PORT }}
-  selector:
-    name: seqr
----
-kind: Deployment
-apiVersion: apps/v1
-metadata:
-  name: seqr
-  namespace: {{ NAMESPACE }}
-  labels:
-    name: seqr
-    deployment: {{ DEPLOY_TO }}
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      name: seqr
-  template:
-    metadata:
-      annotations:
-        linkerd.io/inject: enabled
-        config.linkerd.io/opaque-ports: "6379,5432"
-      labels:
-        name: seqr
-        deployment: {{ DEPLOY_TO }}
-    spec:
-      priorityClassName: high-priority
-      volumes:
-      - name: gcloud-client-secrets-volume
-        secret:
-          secretName: gcloud-client-secrets
-      - name: matchbox-secrets-volume
-        secret:
-          secretName: matchbox-secrets
-      # - name: elasticsearch-es-http-certs-public
-      #   secret:
-      #     secretName: elasticsearch-es-http-certs-public
-      containers:
-      - name: seqr-pod
-        ports:
-        - containerPort: 8000
-          protocol: TCP
-        imagePullPolicy: {{ IMAGE_PULL_POLICY }}
-        image: {{ DOCKER_IMAGE_PREFIX }}/seqr{{ DOCKER_IMAGE_TAG }}
-        volumeMounts:
-        - name: gcloud-client-secrets-volume
-          mountPath: /.config
-          readOnly: true
-        - name: matchbox-secrets-volume
-          mountPath: {{ MME_CONFIG_DIR }}
-        # - name: elasticsearch-es-http-certs-public
-        #   mountPath: "/elasticsearch-certs"
-        #   readOnly: true
-        resources:
-          requests:
-            memory: "0.5Gi"
-            cpu: "0.05"
-          limits:
-            memory: "{{ SEQR_POD_MEMORY_LIMIT }}"
-            cpu: "8"
-        envFrom:
-          - configMapRef:
-              name: all-settings
-        env:
-        - name: POSTGRES_USERNAME
-          value: "{{ POSTGRES_USERNAME }}"
-        - name: POSTGRES_PASSWORD
-          valueFrom:
-            secretKeyRef:
-              name: postgres-secrets
-              key: password
-        - name: OMIM_KEY
-          valueFrom:
-            secretKeyRef:
-              name: seqr-secrets
-              key: omim_key
-        - name: POSTMARK_SERVER_TOKEN
-          valueFrom:
-            secretKeyRef:
-              name: seqr-secrets
-              key: postmark_server_token
-        - name: SLACK_TOKEN
-          valueFrom:
-            secretKeyRef:
-              name: seqr-secrets
-              key: slack_token
-        - name: AIRTABLE_API_KEY
-          valueFrom:
-            secretKeyRef:
-              name: seqr-secrets
-              key: airtable_key
-        - name: AIRFLOW_API_AUDIENCE
-          valueFrom:
-            secretKeyRef:
-              name: seqr-secrets
-              key: airflow_api_audience
-        - name: DJANGO_KEY
-          valueFrom:
-            secretKeyRef:
-              name: seqr-secrets
-              key: django_key
-        - name: SEQR_ES_PASSWORD
-          valueFrom:
-            secretKeyRef:
-              name: seqr-secrets
-              key: seqr_es_password
-        - name: SOCIAL_AUTH_GOOGLE_OAUTH2_CLIENT_ID
-          valueFrom:
-            secretKeyRef:
-              name: seqr-secrets
-              key: google_client_id
-        - name: SOCIAL_AUTH_GOOGLE_OAUTH2_SECRET
-          valueFrom:
-            secretKeyRef:
-              name: seqr-secrets
-              key: google_client_secret
-        - name: GA_TOKEN_ID
-          valueFrom:
-            secretKeyRef:
-              name: seqr-secrets
-              key: ga_token_id
-        - name: KIBANA_ES_PASSWORD
-          valueFrom:
-            secretKeyRef:
-              name: kibana-secrets
-              key: elasticsearch.password
-        - name: GCLOUD_PROJECT      #  TODO can delete these and just rely on 'envFrom configMapRef' above once https://github.com/kubernetes/minikube/issues/2295 is fixed
-          value: "{{ GCLOUD_PROJECT }}"
-        - name: GCLOUD_ZONE
-          value: "{{ GCLOUD_ZONE }}"
-        - name: GUNICORN_WORKER_THREADS
-          value: "{{ GUNICORN_WORKER_THREADS }}"
-        - name: POSTGRES_SERVICE_HOSTNAME
-          value: "{{ POSTGRES_SERVICE_HOSTNAME }}"
-        - name: ELASTICSEARCH_SERVICE_HOSTNAME
-          value: "{{ ELASTICSEARCH_SERVICE_HOSTNAME }}"
-        - name: KIBANA_SERVICE_HOSTNAME
-          value: "{{ KIBANA_SERVICE_HOSTNAME }}"
-        - name: REDIS_SERVICE_HOSTNAME
-          value: "{{ REDIS_SERVICE_HOSTNAME }}"
-        - name: POSTGRES_SERVICE_PORT
-          value: "{{ POSTGRES_SERVICE_PORT }}"
-        - name: ELASTICSEARCH_SERVICE_PORT
-          value: "{{ ELASTICSEARCH_SERVICE_PORT }}"
-        - name: KIBANA_SERVICE_PORT
-          value: "{{ KIBANA_SERVICE_PORT }}"
-        - name: DEPLOYMENT_TYPE
-          value: "{{ DEPLOYMENT_TYPE }}"
-        - name: SEQR_GIT_BRANCH
-          value: "{{ SEQR_GIT_BRANCH }}"
-        - name: BASE_URL
-          value: "https://{{ CLUSTER_HOSTNAME }}/"
-        - name: RUN_CRON_JOBS
-          value: "{{ RUN_CRON_JOBS }}"
-        - name: MME_CONFIG_DIR
-          value: "{{ MME_CONFIG_DIR }}"
-        - name: TERRA_API_ROOT_URL
-          value: "{{ TERRA_API_ROOT_URL }}"
-        - name: AIRFLOW_WEBSERVER_URL
-          value: "{{ AIRFLOW_WEBSERVER_URL }}"
-        - name: ELASTICSEARCH_PROTOCOL
-          value: "{{ ELASTICSEARCH_PROTOCOL }}"
-        # - name: ELASTICSEARCH_CA_PATH
-        #   value: "{{ ELASTICSEARCH_CA_PATH }}"
-        - name: TERRA_PERMS_CACHE_EXPIRE_SECONDS
-          value: "{{ TERRA_PERMS_CACHE_EXPIRE_SECONDS }}"
-        - name: TERRA_WORKSPACE_CACHE_EXPIRE_SECONDS
-          value: "{{ TERRA_WORKSPACE_CACHE_EXPIRE_SECONDS }}"
-        - name: INTERNAL_NAMESPACES
-          value: "{{ INTERNAL_NAMESPACES }}"
-        - name: ANALYST_USER_GROUP
-          value: "{{ ANALYST_USER_GROUP }}"
-        - name: PM_USER_GROUP
-          value: "{{ PM_USER_GROUP }}"
-        - name: GCS_MEDIA_ROOT_BUCKET
-          value: "{{ GCS_MEDIA_ROOT_BUCKET }}"
-        readinessProbe:
-          exec:
-            command:
-            - /bin/bash
-            - -c
-            - /readiness_probe
-      - name: cloud-sql-proxy
-        image: gcr.io/cloudsql-docker/gce-proxy:1.17
-        command:
-          - "/cloud_sql_proxy"
-          - "-instances={{ GCLOUD_PROJECT }}:{{ GCLOUD_ZONE }}:postgres-{{ DEPLOYMENT_TYPE}}=tcp:{{ POSTGRES_SERVICE_PORT }}"
-          - "-credential_file=/.config/service-account-key.json"
-        securityContext:
-          # The default Cloud SQL proxy image runs as the "nonroot" user and group (uid: 65532) by default.
-          runAsNonRoot: true
-        volumeMounts:
-        - name: gcloud-client-secrets-volume
-          mountPath: /.config
-          readOnly: true
-        resources:
-          requests:
-            memory: "1Gi"
-            cpu: "1"
-          limits:
-            memory: "2Gi"
-            cpu: "2"
-      restartPolicy: Always
-      dnsPolicy: ClusterFirst
-      affinity:
-        podAntiAffinity:
-          preferredDuringSchedulingIgnoredDuringExecution:
-            - weight: 1.0
-              podAffinityTerm:
-                labelSelector:
-                  matchExpressions:
-                    - key: "name"
-                      operator: In
-                      values:
-                      - seqr
-                topologyKey: "kubernetes.io/hostname"
-      nodeSelector:
-        cloud.google.com/gke-nodepool: 'default-pool'
----
-# docs @ https://kubernetes.github.io/ingress-nginx/user-guide/basic-usage/
-kind: Ingress
-apiVersion: networking.k8s.io/v1
-metadata:
-  name: ingress-nginx
-  namespace: {{ NAMESPACE }}
-  annotations:
-    kubernetes.io/ingress.class: "nginx"
-    nginx.ingress.kubernetes.io/configuration-snippet: |
-      proxy_set_header l5d-dst-override $service_name.$namespace.svc.cluster.local:$service_port;
-      grpc_set_header l5d-dst-override $service_name.$namespace.svc.cluster.local:$service_port;
-    nginx.ingress.kubernetes.io/enable-modsecurity: "true"
-    nginx.ingress.kubernetes.io/modsecurity-snippet: |
-      SecRuleEngine On
-spec:
-  ingressClassName: nginx
-  tls:
-    - hosts:
-      - {{ CLUSTER_HOSTNAME }}
-      secretName: nginx-secrets
-  rules:
-    - host: {{ CLUSTER_HOSTNAME }}
-      http:
-        paths:
-        - backend:
-            service:
-              name: seqr
-              port:
-                number: {{ SEQR_SERVICE_PORT }}
-          path: /
-          pathType: ImplementationSpecific
diff --git a/deploy/kubernetes/shared-settings.yaml b/deploy/kubernetes/shared-settings.yaml
deleted file mode 100644
index 0262c7b120..0000000000
--- a/deploy/kubernetes/shared-settings.yaml
+++ /dev/null
@@ -1,52 +0,0 @@
-#####################################################
-####### SETTINGS THAT MAY NEED TO BE MODIFIED #######
-ELASTICSEARCH_VERSION: 7.16.3
-
-ES_DATA_DISK_TYPE: pd-ssd
-ES_DATA_DISK_SIZE: 3200
-ES_LOADING_DISK_SIZE: '100Gi'
-ES_POD_VOLUME_SIZE: '50Gi'
-
-ELASTICSEARCH_JVM_MEMORY: 22g
-ELASTICSEARCH_CONTAINER_MEMORY: 44Gi
-ELASTICSEARCH_MAX_SHARDS_PER_NODE: 450
-
-ES_DATA_NUM_PODS: 5
-ES_CLIENT_NUM_PODS: 4
-ES_MASTER_NUM_PODS: 3
-ES_LOADING_NUM_PODS: 4
-
-ELASTICSEARCH_PROTOCOL: http
-# ELASTICSEARCH_CA_PATH: /elasticsearch-certs/ca.crt
-
-SEQR_GIT_BRANCH: 'master'
-POSTGRES_USERNAME: postgres
-
-INTERNAL_NAMESPACES: gregor-consortium,seqr-access
-ANALYST_USER_GROUP: TGG_Users
-PM_USER_GROUP: TGG_PM
-
-TERRA_PERMS_CACHE_EXPIRE_SECONDS: 60
-TERRA_WORKSPACE_CACHE_EXPIRE_SECONDS: 300
-
-###########################################################
-####### SETTINGS THAT SHOULDN'T NEED TO BE MODIFIED #######
-GCLOUD_PROJECT: 'seqr-project'
-GCLOUD_ZONE: 'us-central1-b'
-
-DOCKER_IMAGE_PREFIX: 'gcr.io/seqr-project'
-IMAGE_PULL_POLICY: 'Always'
-
-DEPLOYMENT_TEMP_DIR: '/tmp'
-MME_CONFIG_DIR: '/mme'
-
-POSTGRES_SERVICE_HOSTNAME: '127.0.0.1'
-ELASTICSEARCH_SERVICE_HOSTNAME: elasticsearch-es-http
-KIBANA_SERVICE_HOSTNAME: kibana-kb-http
-REDIS_SERVICE_HOSTNAME: redis
-
-POSTGRES_SERVICE_PORT: 5432
-ELASTICSEARCH_SERVICE_PORT: 9200
-KIBANA_SERVICE_PORT: 5601
-SEQR_SERVICE_PORT: 8000
-REDIS_SERVICE_PORT: 6379
diff --git a/deploy/secrets/gcloud/.gitignore b/deploy/secrets/gcloud/.gitignore
deleted file mode 100644
index d6b7ef32c8..0000000000
--- a/deploy/secrets/gcloud/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-*
-!.gitignore
diff --git a/deploy/secrets/shared/gcloud/README.md b/deploy/secrets/shared/gcloud/README.md
deleted file mode 100644
index 076ab066a5..0000000000
--- a/deploy/secrets/shared/gcloud/README.md
+++ /dev/null
@@ -1,15 +0,0 @@
-###### google service account key
-
-This is used to authenticate gcloud and gsutils command-line tools within the seqr and pipeline-runner pods,
-so that gcloud and gsutil can be used to access public google storage buckets, etc. without having to first prompt for authentication.
-
-For example, this is used within the pipeline-runner component when loading data into elasticsearch to allow the spark/hail loading pipeline to directly
-access reference data files in gs://seqr-reference-data via the Cloud Storage connector.
-
-The key provided below is generated using a temporary google account created just for generating this key.
-To create your own key file, see instructions @ https://cloud.google.com/dataproc/docs/concepts/connectors/install-storage-connector
-
-It can be created by:
-1) Go to https://console.developers.google.com/apis/credentials
-1) In the "Create credentials" drop-down, select "Service account key"
-1) Select "Compute Engine default service account" and Key type "JSON"
diff --git a/deploy/secrets/shared/gcloud/boto b/deploy/secrets/shared/gcloud/boto
deleted file mode 100644
index 20a9a15b29..0000000000
--- a/deploy/secrets/shared/gcloud/boto
+++ /dev/null
@@ -1,13 +0,0 @@
-[Credentials]
-gs_service_key_file = /.config/service-account-key.json
-
-[Boto]
-https_validate_certificates = True
-
-[GoogleCompute]
-
-[GSUtil]
-content_language = en
-default_api_version = 2
-
-[OAuth2]
diff --git a/deploy/secrets/shared/gcloud/service-account-key.json b/deploy/secrets/shared/gcloud/service-account-key.json
deleted file mode 100644
index f8f966855a..0000000000
--- a/deploy/secrets/shared/gcloud/service-account-key.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-"type": "service_account",
-"project_id": "upheld-rookery-214823",
-"private_key_id": "71e48dbc184abd0f4a36a327772bec2537485f2c",
-"private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQChGMr2HPSEM67i\nY8CrmaUqmBthRmsQU4wnG+RF8XNc9lzMscFhL0cvJIC9GmnWgLFs7Gg+CeeDKB7X\nwE8xZt+H34xxa3v2du46ioD8p14ZfgK6N0/bW8kR1jIPs9li3PmuKpcyHE+EuPwW\nJZmq5FXEIqwuW75OYw3ANm5AcOXq3SE1nHIsduRzYPLBMey0g2psZBnNhqtK1IpE\no4CYqYQQ73vgtOHuT6INd6o6Jl9cNM5jw0jw63KWUWuLAvw2nKhVRoPa+SdQhx1H\nP0rDKYyUMBDcQJNYlsxfatwRlEMPlKbe5hZAKG6TXREOf2dRDAKtN90OFN+Sj1id\nxz9M+MIXAgMBAAECggEAA6YonseMIFx9VyJ0UE7EJ4FXiW4LaczCrXYbxxvPhakM\n9BQIsddc0OwOQHpTMGks269FMzaNOHLVtARmptNSN+Uv7LfHEI+kGM5fWpm972zj\nye0L0Ye0N8O20ZU81Nj1jOTpFNkAUhzQlP3YC79tU+qHJOuuFV94BcUV3T7Rbm73\nqJQRw12bNYbtmUye7383R+glIma1bUPbmoOP+LXZ650kKschpoIxgIAQnYYDFhSD\nvrC8H/VmYvBETh2kgLZKR3Ny9O6Woz3RtXR942Z7/HNu3WAoNFyI13uvwKregEd7\nbCtCSUFI+i1eSE1paF6mFldl+bmO1dgoQzse/yQldQKBgQDOVTDwhP6l6n22xQQY\nK7KCO/GNhb8isKhsMqG8Aq0Bke+i4pU2QcuIEvlWj+Qm9hCTzuCaILBkOdEOh3Qr\nQTKZ8SAX51p02llBYJRNsmoGAv/vWbMjU5IjUsfwNmp3Dnct+elAQeduDIihOmcz\naa0v/DRBu3+0C0DPQST54bRRqwKBgQDH4Akm3wVLW/DsGiWvQ+WNi9ul954s80d5\nJTTQskxqyllMzk3QH2TM3E2tJpJ56d/4J0xA2BKkw698a3eZ+Pw1PJ64dAAISL8R\nvnPrECqixg61KLP3TWfIhKfhtPHk+9u8CYP1CBI6edYrFAocqTFj5RiXP7GdGlIN\npjQJiLs9RQKBgQC9qlvhTC+Urne/EV2RJmHiC14g3If1SHSZePVpEQCdh9vDXAsT\nI+oG/IIMyxpLojdc13do1pGun+MHOwZTWIw+LAN3Vai175zKVxAcV9vUCN/g0hC0\ncXbNV6QIsMyvAjIVpjt+YNZvN8BEjJ4fCY40pN4YalWaWBnMKjLmLE6IHwKBgQCH\n+cEgIa8vYmAXr34dqxo63n/SfA+u0t636vjwVawpJnn0B5HmWcUwnVt2xXv21oXZ\nA5sbriOY1cvH4DCgnkOuUAxGvhvbhun9v0yLCReIXydYVVWBk7ZcMusnYPsDZdhK\nJ0sPYqtL/uEVs29hqjUAmbXOB7XkhCh4Qq66IbwH5QKBgDv5YmRi9FXM/jq63H+s\n+dmQ4c7zgC+CY1P9vzXyaD21og+IDWe9z7akcgI+5QBQJRieVv2otmpxPzLmsUCf\nRnTAQ8NC8uSKK9C8qRpz3AjlAwuyTAOkD0UEtrnCymxvdJOBcatwZkSVQaZ+mpSI\n4DN8ephEYckCqTX0I+w4qA2e\n-----END PRIVATE KEY-----\n",
-"client_email": "service-account@upheld-rookery-214823.iam.gserviceaccount.com",
-"client_id": "105638657933353087134",
-"auth_uri": "https://accounts.google.com/o/oauth2/auth",
-"token_uri": "https://oauth2.googleapis.com/token",
-"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
-"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/service-account%40upheld-rookery-214823.iam.gserviceaccount.com"
-}
diff --git a/deploy/servctl_utils/__init__.py b/deploy/servctl_utils/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/deploy/servctl_utils/deploy_command_utils.py b/deploy/servctl_utils/deploy_command_utils.py
deleted file mode 100644
index aa79b3b2e0..0000000000
--- a/deploy/servctl_utils/deploy_command_utils.py
+++ /dev/null
@@ -1,368 +0,0 @@
-import collections
-import glob
-import logging
-import os
-from pprint import pformat
-import time
-
-from deploy.servctl_utils.kubectl_utils import is_pod_running, \
-    wait_until_pod_is_running as sleep_until_pod_is_running, wait_until_pod_is_ready as sleep_until_pod_is_ready, \
-    wait_for_resource, wait_for_not_resource
-from deploy.servctl_utils.yaml_settings_utils import process_jinja_template, load_settings
-from deploy.servctl_utils.shell_utils import run
-
-logging.basicConfig(format='%(asctime)s %(levelname)-8s %(message)s')
-logger = logging.getLogger()
-logger.setLevel(logging.INFO)
-
-
-DEPLOYMENT_ENVS = ['gcloud-prod', 'gcloud-dev']
-
-DEPLOYMENT_TARGETS = [
-    "settings",
-    "secrets",
-    "elasticsearch",
-    "kibana",
-    "redis",
-    "seqr",
-    "elasticsearch-snapshot-config",
-]
-
-GCLOUD_CLIENT = 'gcloud-client'
-
-SECRETS = {
-    'elasticsearch': ['users', 'users_roles', 'roles.yml'],
-    'es-snapshot-gcs': ['{deploy_to}/gcs.client.default.credentials_file'],
-    GCLOUD_CLIENT: ['service-account-key.json'],
-    'kibana': ['elasticsearch.password'],
-    'matchbox': ['{deploy_to}/config.json'],
-    'nginx': ['{deploy_to}/tls.key', '{deploy_to}/tls.crt'],
-    'postgres': ['{deploy_to}/password'],
-    'seqr': [
-        'omim_key', 'postmark_server_token', 'slack_token', 'airtable_key', 'django_key', 'seqr_es_password', 'airflow_api_audience',
-        '{deploy_to}/google_client_id',  '{deploy_to}/google_client_secret', '{deploy_to}/ga_token_id',
-    ],
-}
-
-
-def deploy_settings(settings):
-    """Deploy settings as a config map"""
-    if settings["ONLY_PUSH_TO_REGISTRY"]:
-        return
-
-    # write out a ConfigMap file
-    configmap_file_path = os.path.join(settings["DEPLOYMENT_TEMP_DIR"], "deploy/kubernetes/all-settings.properties")
-    with open(configmap_file_path, "w") as f:
-        for key, value in settings.items():
-            if value is None:
-                continue
-
-            f.write('%s=%s\n' % (key, value))
-
-    create_namespace(settings)
-
-    run("kubectl delete configmap all-settings", errors_to_ignore=["not found"])
-    run("kubectl create configmap all-settings --from-file=%(configmap_file_path)s" % locals())
-    run("kubectl get configmaps all-settings -o yaml")
-
-
-def deploy_secrets(settings, components=None):
-    """Deploys or updates k8s secrets."""
-
-    if settings["ONLY_PUSH_TO_REGISTRY"]:
-        return
-
-    print_separator("secrets")
-
-    create_namespace(settings)
-
-    if not components:
-        components = SECRETS.keys()
-
-    # deploy secrets
-    for secret_label in components:
-        run("kubectl delete secret {}-secrets".format(secret_label), verbose=False, errors_to_ignore=["not found"])
-
-    for secret_label in components:
-        secret_files = SECRETS.get(secret_label)
-        if not secret_files:
-            raise Exception('Invalid secret component {}'.format(secret_label))
-
-        secret_command = ['kubectl create secret generic {secret_label}-secrets'.format(secret_label=secret_label)]
-        secret_command += [
-            '--from-file deploy/secrets/gcloud/{secret_label}/{file}'.format(secret_label=secret_label, file=file)
-            for file in secret_files
-        ]
-        if secret_label == GCLOUD_CLIENT:
-            secret_command.append('--from-file deploy/secrets/shared/gcloud/boto')
-        run(" ".join(secret_command).format(deploy_to=settings['DEPLOY_TO']), errors_to_ignore=["already exists"])
-
-
-def deploy_elasticsearch(settings):
-    print_separator("elasticsearch")
-
-    if settings["ONLY_PUSH_TO_REGISTRY"]:
-        return
-
-    # create persistent volumes
-    pv_template_path = 'deploy/kubernetes/elasticsearch/persistent-volumes/es-data.yaml'
-    num_disks = settings['ES_DATA_NUM_PODS']
-    disk_names = [
-        '{cluster_name}-es-data-disk{suffix}'.format(
-            cluster_name=settings['CLUSTER_NAME'], suffix='-{}'.format(i + 1) if num_disks > 1 else '')
-        for i in range(num_disks)]
-    for disk_name in disk_names:
-        volume_settings = {'DISK_NAME': disk_name}
-        volume_settings.update(settings)
-        _process_templates(volume_settings, [pv_template_path])
-        run('kubectl create -f {}/{}'.format(settings['DEPLOYMENT_TEMP_DIR'], pv_template_path),
-            print_command=True, errors_to_ignore=['already exists'])
-
-    deploy_pod("elasticsearch", settings, wait_until_pod_is_running=False)
-
-    wait_for_not_resource(
-        'elasticsearch', resource_type='elasticsearch', json_path='{.items[0].status.phase}', invalid_status='Invalid',
-        deployment_target=settings["DEPLOY_TO"], verbose_template='elasticsearch status')
-
-    total_pods = 0
-    for num_pods in ['ES_DATA_NUM_PODS', 'ES_CLIENT_NUM_PODS', 'ES_MASTER_NUM_PODS', 'ES_LOADING_NUM_PODS']:
-        total_pods += settings.get(num_pods, 0)
-    for pod_number_i in range(total_pods):
-        sleep_until_pod_is_running('elasticsearch', deployment_target=settings["DEPLOY_TO"], pod_number=pod_number_i)
-    for pod_number_i in range(total_pods):
-        sleep_until_pod_is_ready('elasticsearch', deployment_target=settings["DEPLOY_TO"], pod_number=pod_number_i)
-
-    wait_for_resource(
-        'elasticsearch', resource_type='elasticsearch', json_path='{.items[0].status.phase}', expected_status='Ready',
-        deployment_target=settings["DEPLOY_TO"], verbose_template='elasticsearch status')
-
-    wait_for_resource(
-        'elasticsearch', resource_type='elasticsearch', json_path='{.items[0].status.health}', expected_status='green',
-        deployment_target=settings["DEPLOY_TO"], verbose_template='elasticsearch health')
-
-
-def deploy_elasticsearch_snapshot_config(settings):
-    print_separator('elasticsearch snapshot configuration')
-
-    docker_build("curator", settings)
-
-    if settings["ONLY_PUSH_TO_REGISTRY"]:
-        return
-
-    if settings['ES_CONFIGURE_SNAPSHOTS']:
-        # run the k8s job to set up the repo
-        run('kubectl apply -f %(DEPLOYMENT_TEMP_DIR)s/deploy/kubernetes/elasticsearch/configure-snapshot-repo.yaml' % settings)
-        wait_for_resource(
-            'configure-es-snapshot-repo', resource_type='job', json_path='{.items[0].status.conditions[0].type}',
-            expected_status='Complete')
-        # clean up the job after completion
-        run('kubectl delete -f %(DEPLOYMENT_TEMP_DIR)s/deploy/kubernetes/elasticsearch/configure-snapshot-repo.yaml' % settings)
-        # Set up the monthly cron job
-        run('kubectl apply -f %(DEPLOYMENT_TEMP_DIR)s/deploy/kubernetes/elasticsearch/snapshot-cronjob.yaml' % settings)
-
-
-def deploy_redis(settings):
-    print_separator("redis")
-
-    docker_build("redis", settings, [])
-
-    deploy_pod("redis", settings, wait_until_pod_is_ready=True)
-
-
-def deploy_seqr(settings):
-    print_separator("seqr")
-
-    if settings['BUILD_DOCKER_IMAGES']:
-        raise Exception("seqr image docker builds via servctl have been deprecated. Please ensure that your desired "
-                        "build has been produced via Cloudbuild and GCR, and then run the deployment without the "
-                        "docker build flag.")
-
-    if settings["DELETE_BEFORE_DEPLOY"]:
-        delete_pod("seqr", settings)
-
-    deploy_pod("seqr", settings, wait_until_pod_is_ready=True)
-
-
-def deploy_kibana(settings):
-    print_separator("kibana")
-
-    docker_build("kibana", settings)
-
-    deploy_pod("kibana", settings, wait_until_pod_is_ready=True)
-
-    wait_for_resource(
-        'kibana', resource_type='kibana', json_path='{.items[0].status.health}', expected_status='green',
-        deployment_target=settings["DEPLOY_TO"], verbose_template='kibana health')
-
-
-def deploy(deployment_target, components, output_dir=None, runtime_settings=None):
-    """Deploy one or more components to the kubernetes cluster specified as the deployment_target.
-
-    Args:
-        deployment_target (string): value from DEPLOYMENT_ENVS - eg. "gcloud-dev"
-            indentifying which cluster to deploy these components to
-        components (list): The list of component names to deploy (eg. "postgres", "redis" - each string must be in
-            constants.DEPLOYABLE_COMPONENTS). Order doesn't matter.
-        output_dir (string): path of directory where to put deployment logs and rendered config files
-        runtime_settings (dict): a dictionary of other key-value pairs that override settings file(s) values.
-    """
-    runtime_settings = runtime_settings or {}
-
-    if not components:
-        raise ValueError("components list is empty")
-
-    if components:
-        run('deploy/kubectl_helpers/utils/check_context.sh {}'.format(deployment_target.replace('gcloud-', '')))
-
-    settings = prepare_settings_for_deployment(deployment_target, output_dir, runtime_settings)
-
-    # make sure namespace exists
-    if not runtime_settings.get("ONLY_PUSH_TO_REGISTRY"):
-        create_namespace(settings)
-
-    if components[0] == 'secrets':
-        deploy_secrets(settings, components=components[1:])
-        return
-
-    # call deploy_* functions for each component in "components" list, in the order that these components are listed in DEPLOYMENT_TARGETS
-    for component in DEPLOYMENT_TARGETS:
-        if component in components:
-            # only deploy requested components
-            func_name = "deploy_" + component.replace("-", "_")
-            f = globals().get(func_name)
-            if f is not None:
-                f(settings)
-            else:
-                raise ValueError("'deploy_{}' function not found. Is '{}' a valid component name?".format(func_name, component))
-
-
-def prepare_settings_for_deployment(deployment_target, output_dir, runtime_settings):
-    # parse settings files
-    settings = collections.OrderedDict()
-    load_settings([
-        "deploy/kubernetes/shared-settings.yaml",
-        "deploy/kubernetes/%(deployment_target)s-settings.yaml" % locals(),
-        ], settings)
-
-    settings.update(runtime_settings)
-
-    # make sure all keys are upper-case
-    settings = {key.upper(): value for key, value in settings.items()}
-
-    # configure deployment dir
-    settings["DEPLOYMENT_TEMP_DIR"] = os.path.join(
-        settings["DEPLOYMENT_TEMP_DIR"],
-        "deployments/%(TIMESTAMP)s_%(DEPLOY_TO)s" % settings)
-
-    logger.info("==> Settings:\n%s" % pformat(settings))
-
-    # re-configure logging output to write to log
-    log_dir = os.path.join(settings["DEPLOYMENT_TEMP_DIR"], "logs")
-    if not os.path.isdir(log_dir):
-        os.makedirs(log_dir)
-    log_file_path = os.path.join(log_dir, "deploy.log")
-    sh = logging.StreamHandler(open(log_file_path, "w"))
-    sh.setLevel(logging.INFO)
-    logger.addHandler(sh)
-    logger.info("Starting log file: %(log_file_path)s" % locals())
-
-    template_file_paths = glob.glob("deploy/kubernetes/*.yaml") + glob.glob("deploy/kubernetes/*/*.yaml")
-    _process_templates(settings, template_file_paths)
-
-    return settings
-
-
-def _process_templates(settings, template_file_paths):
-    # process Jinja templates to replace template variables with values from settings. Write results to temp output directory.
-    input_base_dir = settings["BASE_DIR"]
-    output_base_dir = settings["DEPLOYMENT_TEMP_DIR"]
-    for file_path in template_file_paths:
-        process_jinja_template(input_base_dir, file_path, settings, output_base_dir)
-
-def print_separator(label):
-    message = "       DEPLOY %s       " % (label,)
-    logger.info("=" * len(message))
-    logger.info(message)
-    logger.info("=" * len(message) + "\n")
-
-
-def create_namespace(settings):
-    run("kubectl create -f %(DEPLOYMENT_TEMP_DIR)s/deploy/kubernetes/namespace.yaml" % settings, errors_to_ignore=["already exists"])
-
-    # switch kubectl to use the new namespace
-    run("kubectl config set-context $(kubectl config current-context) --namespace=%(NAMESPACE)s" % settings)
-
-
-def docker_build(component_label, settings, custom_build_args=()):
-    params = dict(settings)   # make a copy before modifying
-    params["COMPONENT_LABEL"] = component_label
-    params["DOCKER_IMAGE_NAME"] = "%(DOCKER_IMAGE_PREFIX)s/%(COMPONENT_LABEL)s" % params
-
-    docker_tags = set([
-        "",
-        ":latest",
-        ":%(TIMESTAMP)s" % settings,
-        ])
-
-    if settings.get("DOCKER_IMAGE_TAG"):
-        docker_tags.add(params["DOCKER_IMAGE_TAG"])
-    if component_label == 'elasticsearch' and settings.get('ELASTICSEARCH_VERSION'):
-        docker_tags.add("%(DOCKER_IMAGE_TAG)s-%(ELASTICSEARCH_VERSION)s" % settings)
-
-    if not settings["BUILD_DOCKER_IMAGES"]:
-        logger.info("Skipping docker build step. Use --build-docker-image to build a new image (and --force to build from the beginning)")
-    else:
-        docker_build_command = ""
-        docker_build_command += "docker build deploy/docker/%(COMPONENT_LABEL)s/ "
-        docker_build_command += (" ".join(custom_build_args) + " ")
-        if settings["FORCE_BUILD_DOCKER_IMAGES"]:
-            docker_build_command += "--no-cache "
-
-        for tag in docker_tags:
-            docker_image_name_with_tag = params["DOCKER_IMAGE_NAME"] + tag
-            docker_build_command += "-t %(docker_image_name_with_tag)s " % locals()
-
-        run(docker_build_command % params, verbose=True)
-
-    if settings["PUSH_TO_REGISTRY"]:
-        for tag in docker_tags:
-            docker_image_name_with_tag = params["DOCKER_IMAGE_NAME"] + tag
-            docker_push_command = ""
-            docker_push_command += "docker push %(docker_image_name_with_tag)s" % locals()
-            run(docker_push_command, verbose=True)
-            logger.info("==> Finished uploading image: %(docker_image_name_with_tag)s" % locals())
-
-
-def deploy_pod(component_label, settings, wait_until_pod_is_running=True, wait_until_pod_is_ready=False):
-    if settings["ONLY_PUSH_TO_REGISTRY"]:
-        return
-
-    if settings["DELETE_BEFORE_DEPLOY"]:
-        delete_pod(component_label, settings)
-
-    run(" ".join([
-        "kubectl apply",
-        "-f %(DEPLOYMENT_TEMP_DIR)s/deploy/kubernetes/"+component_label+"/"+component_label+".gcloud.yaml"
-    ]) % settings)
-
-    if wait_until_pod_is_running:
-        sleep_until_pod_is_running(component_label, deployment_target=settings["DEPLOY_TO"])
-
-    if wait_until_pod_is_ready:
-        sleep_until_pod_is_ready(component_label, deployment_target=settings["DEPLOY_TO"])
-
-
-def delete_pod(component_label, settings, custom_yaml_filename=None):
-    deployment_target = settings["DEPLOY_TO"]
-
-    yaml_filename = custom_yaml_filename or (component_label+".gcloud.yaml")
-
-    if is_pod_running(component_label, deployment_target):
-        run(" ".join([
-            "kubectl delete",
-            "-f %(DEPLOYMENT_TEMP_DIR)s/deploy/kubernetes/"+component_label+"/"+yaml_filename,
-            ]) % settings, errors_to_ignore=["not found"])
-
-    logger.info("waiting for \"%s\" to exit Running status" % component_label)
-    while is_pod_running(component_label, deployment_target):
-        time.sleep(5)
diff --git a/deploy/servctl_utils/kubectl_utils.py b/deploy/servctl_utils/kubectl_utils.py
deleted file mode 100644
index 65ea9c1b9e..0000000000
--- a/deploy/servctl_utils/kubectl_utils.py
+++ /dev/null
@@ -1,152 +0,0 @@
-import logging
-import time
-
-from deploy.servctl_utils.shell_utils import run
-
-logging.basicConfig(format='%(asctime)s %(levelname)-8s %(message)s')
-logger = logging.getLogger()
-logger.setLevel(logging.INFO)
-
-
-def _get_resource_info(
-        resource_type="pod",
-        labels=None,
-        json_path="{.items[].metadata.name}",
-        errors_to_ignore=("array index out of bounds:",),
-        verbose=False,
-    ):
-    """Runs 'kubectl get <resource_type> -l <label1=value1,label2=value2...> -o jsonpath=<json path>' and returns its output.
-
-    Args:
-        resource_type (string): "pod", "service", etc.
-        labels (dict): (eg. {'name': 'phenotips'})
-        json_path (string): a json path query string (for example, "{.items[*].metadata.name}")
-        errors_to_ignore (list):
-        verbose (bool):
-
-    Returns:
-        (string) kubectl command output (eg. "postgres-410765475-1vtkn") or None if the kubectl command returned nothing
-    """
-    labels = labels or {}
-
-    l_arg = "-l {}".format(",".join(["%s=%s" % (key, value) for key, value in labels.items()])) if labels else ""
-
-    output = run(
-        "kubectl get %(resource_type)s %(l_arg)s -o jsonpath=%(json_path)s" % locals(),
-        errors_to_ignore=errors_to_ignore,
-        print_command=False,
-        verbose=verbose,
-    )
-
-    return output.strip('\n') if output is not None else None
-
-
-def _get_resource_status(resource_name, json_path_of_status, deployment_target=None, resource_type='pod', verbose_template='status'):
-    """Utility method for looking up a resources's status."""
-    labels = {"name": resource_name}
-    if deployment_target:
-        labels["deployment"] = deployment_target
-
-    result = _get_resource_info(
-        labels=labels,
-        resource_type=resource_type,
-        json_path=json_path_of_status,
-        errors_to_ignore=["array index out of bounds: index"],
-        verbose=False,
-    )
-
-    if verbose_template:
-        logger.info('{} = {}'.format(verbose_template, result))
-
-    return result
-
-
-def is_pod_running(pod_name, deployment_target=None, pod_number=0, verbose=True):
-    """Returns True if the given pod is in "Running" state, and False otherwise."""
-
-    json_path = "{.items[%(pod_number)s].status.phase}" % locals()
-
-    verbose_template = None
-    if verbose:
-        verbose_template = '{}[{}].is_running'.format(pod_name, pod_number)
-
-    status = _get_resource_status(
-        pod_name, json_path, deployment_target=deployment_target, verbose_template=verbose_template)
-
-    return status == 'Running'
-
-
-def is_pod_ready(pod_name, deployment_target=None, pod_number=0, verbose=True):
-    """Returns True if the given pod is in "Ready" state, and False otherwise."""
-
-    json_path = "{.items[%(pod_number)s].status.containerStatuses[0].ready}" % locals()
-
-    verbose_template = None
-    if verbose:
-        verbose_template = '{}[{}].is_ready'.format(pod_name, pod_number)
-
-    status = _get_resource_status(
-        pod_name, json_path, deployment_target=deployment_target, verbose_template=verbose_template)
-
-    return status == 'true'
-
-
-def wait_until_pod_is_running(pod_name, deployment_target=None, pod_number=0):
-    """Sleeps until the pod enters "Running" state"""
-
-    logger.info("waiting for \"%s\" pod #%s to enter Running state" % (pod_name, pod_number))
-    while not is_pod_running(pod_name, deployment_target, pod_number=pod_number):
-        time.sleep(5)
-
-
-def wait_until_pod_is_ready(pod_name, deployment_target=None, pod_number=0):
-    """Sleeps until the pod enters "Ready" state"""
-
-    logger.info("waiting for \"%s\" pod #%s to complete initialization" % (pod_name, pod_number))
-    while not is_pod_ready(pod_name, deployment_target, pod_number=pod_number):
-        time.sleep(5)
-
-
-def wait_for_resource(resource_name, json_path, expected_status, deployment_target=None, verbose_template='status', resource_type='pod'):
-    """Sleeps until the given resource has the expected status."""
-    while expected_status != _get_resource_status(
-            resource_name, json_path, deployment_target=deployment_target, verbose_template=verbose_template,
-            resource_type=resource_type):
-        time.sleep(5)
-
-
-def wait_for_not_resource(resource_name, json_path, invalid_status, deployment_target=None, verbose_template='status', resource_type='pod'):
-    """Sleeps until the given resource does not have the invalid status."""
-    status = None
-    while not status or status == invalid_status:
-        time.sleep(5)
-        status =  _get_resource_status(
-            resource_name, json_path, deployment_target=deployment_target, verbose_template=verbose_template,
-            resource_type=resource_type)
-
-
-def get_resource_name(name_label, resource_type, deployment_target=None, pod_number=0):
-    """Takes a resource label (eg. "phenotips") and returns the full resource name (eg. "phenotips-cdd4d7dc9-vgmjx").
-
-    If there are multiple resources with the given label, it returns the 1st one by default.
-
-    Args:
-          name_label (string): the "name" label of the resource
-          resource_type (string): the type of the resource - eg. pod
-          deployment_target (string): value from DEPLOYMENT_TARGETS - eg. "minikube", "gcloud-dev", etc.
-          pod_number (int): if there are multiple pods with the given label, it returns this one of the pods.
-
-    Returns:
-        string: full name of the pod, or None if such a pod doesn't exist
-    """
-    labels = {"name": name_label}
-    if deployment_target:
-        labels["deployment"] = deployment_target
-
-    return _get_resource_info(
-        labels=labels,
-        resource_type=resource_type,
-        json_path="{.items[%(pod_number)s].metadata.name}" % locals(),
-        errors_to_ignore=["array index out of bounds: index 0"],
-        verbose=False,
-    )
diff --git a/deploy/servctl_utils/other_command_utils.py b/deploy/servctl_utils/other_command_utils.py
deleted file mode 100644
index 1c236b622e..0000000000
--- a/deploy/servctl_utils/other_command_utils.py
+++ /dev/null
@@ -1,66 +0,0 @@
-import logging
-import time
-
-from deploy.servctl_utils.kubectl_utils import is_pod_running, wait_for_resource, get_resource_name
-from deploy.servctl_utils.yaml_settings_utils import load_settings
-from deploy.servctl_utils.shell_utils import run
-
-logging.basicConfig(format='%(asctime)s %(levelname)-8s %(message)s')
-logger = logging.getLogger(__name__)
-logger.setLevel(logging.INFO)
-
-
-def delete_component(component, deployment_target=None):
-    """Runs kubectl commands to delete any running deployment, service, or pod objects for the given component(s).
-
-    Args:
-        component (string): component to delete (eg. 'kibana').
-        deployment_target (string): value from DEPLOYMENT_TARGETS - eg. "gcloud-dev"
-    """
-    pod_name = run(
-        'deploy/kubectl_helpers/utils/get_pod_name.sh {} {}'.format(deployment_target.replace('gcloud-', ''), component))
-
-    if component == "cockpit":
-        run("kubectl delete rc cockpit", errors_to_ignore=["not found"])
-    elif component == 'elasticsearch':
-        run('kubectl delete elasticsearch elasticsearch', errors_to_ignore=['not found'])
-        # Deleting a released persistent volume does not delete the data on the underlying disk
-        wait_for_resource(
-            component, '{.items[0].status.phase}', 'Released', deployment_target=deployment_target, resource_type='pv')
-        pv = get_resource_name(component, resource_type='pv', deployment_target=deployment_target)
-        while pv:
-            run('kubectl delete pv {}'.format(pv))
-            pv = get_resource_name(component, resource_type='pv', deployment_target=deployment_target)
-    elif component == 'kibana':
-        run('kubectl delete kibana kibana', errors_to_ignore=['not found'])
-
-    run("kubectl delete deployments %(component)s" % locals(), errors_to_ignore=["not found"])
-    run("kubectl delete services %(component)s" % locals(), errors_to_ignore=["not found"])
-
-    if pod_name:
-        run("kubectl delete pods %(pod_name)s" % locals(), errors_to_ignore=["not found"])
-
-        logger.info("waiting for \"%s\" to exit Running status" % component)
-        while is_pod_running(component, deployment_target):
-            time.sleep(5)
-
-
-    # print services and pods status
-    run("kubectl get services" % locals(), verbose=True)
-    run("kubectl get pods" % locals(), verbose=True)
-
-
-def delete_all(deployment_target):
-    """Runs kubectl and gcloud commands to delete the given cluster and all objects in it.
-
-    Args:
-        deployment_target (string): value from DEPLOYMENT_TARGETS - eg. "gcloud-dev"
-
-    """
-    run('deploy/kubectl_helpers/utils/check_context.sh {}'.format(deployment_target.replace('gcloud-', '')))
-    settings = {}
-
-    load_settings([
-        "deploy/kubernetes/shared-settings.yaml",
-        "deploy/kubernetes/%(deployment_target)s-settings.yaml" % locals(),
-    ], settings)
diff --git a/deploy/servctl_utils/shell_utils.py b/deploy/servctl_utils/shell_utils.py
deleted file mode 100644
index e109ecf4e3..0000000000
--- a/deploy/servctl_utils/shell_utils.py
+++ /dev/null
@@ -1,89 +0,0 @@
-import logging
-import os
-import subprocess # nosec
-import sys
-from io import StringIO
-
-logger = logging.getLogger(__name__)
-
-ALWAYS_IGNORE_WARNINGS = [
-    'WARNING: the gcp auth plugin is deprecated',
-    'Warning: storage.k8s.io/v1beta1 StorageClass is deprecated',
-    'To learn more, consult https://cloud.google.com/blog/products/containers-kubernetes/kubectl-auth-changes-in-gke',
-]
-
-
-def run(command, errors_to_ignore=None, print_command=True, verbose=True, **kwargs):
-
-    """Runs the given command in a shell.
-
-    Args:
-        command (string): the command to run
-        errors_to_ignore (list): if the command's return code isn't ok, but its
-            output contains one of the strings in this list, the bad return code will be ignored,
-            and this function will return None. Otherwise, it raises a RuntimeException.
-        print_command (bool): whether to print command before running
-        verbose (bool): whether to print command output while command is running
-    Return:
-        string: command output (combined stdout and stderr), or if return_subprocess_obj=True the return 2-tuple: (output, subprocess Popen object)
-    """
-    full_env = dict(os.environ)  # copy external environment
-
-    if print_command:
-        logger.info("==> %(command)s" % locals())
-
-    # pipe output to log
-    p = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, env=full_env, **kwargs) # nosec
-    log_buffer = _get_command_output(p, verbose)
-    p.wait()
-
-    output = '\n'.join([line for line in log_buffer.getvalue().split('\n')
-                        if all(ignore_str not in line for ignore_str in ALWAYS_IGNORE_WARNINGS)])
-
-    if p.returncode != 0:
-        should_ignore = False
-        if errors_to_ignore:
-            should_ignore = all(
-                any([error_to_ignore in error for error_to_ignore in errors_to_ignore])
-                for error in str(output.strip()).split('\n')
-            )
-
-        if should_ignore:
-            return None
-        else:
-            raise RuntimeError(output)
-    else:
-        return output
-
-
-def _get_command_output(p, verbose):
-    line_buffer = StringIO()
-    log_buffer = StringIO()
-    previous_is_slash_r = False
-    while True:
-        out = p.stdout.read(1).decode('utf-8')
-        if out == '' and p.poll() is not None:
-            break
-        if out != '':
-            try:
-                log_buffer.write(out)
-                if verbose:
-                    line_buffer.write(out)
-                    if out.endswith('\r') or (out.endswith('\n') and previous_is_slash_r):
-                        sys.stdout.write(line_buffer.getvalue())
-                        sys.stdout.flush()
-                        line_buffer = StringIO()
-                        previous_is_slash_r = True
-                    elif out.endswith('\n'):
-                        line_content = line_buffer.getvalue().rstrip('\n')
-                        if all(ignore_str not in line_content for ignore_str in ALWAYS_IGNORE_WARNINGS):
-                            logger.info(line_content)
-                        line_buffer = StringIO()
-                        previous_is_slash_r = False
-                    else:
-                        previous_is_slash_r = False
-            except UnicodeDecodeError:
-                pass
-
-    return log_buffer
-
diff --git a/deploy/servctl_utils/yaml_settings_utils.py b/deploy/servctl_utils/yaml_settings_utils.py
deleted file mode 100644
index fda76cc2a6..0000000000
--- a/deploy/servctl_utils/yaml_settings_utils.py
+++ /dev/null
@@ -1,96 +0,0 @@
-import collections
-import io
-import jinja2
-import logging
-import os
-import time
-import yaml
-
-
-logger = logging.getLogger()
-
-
-def _parse_jinja_template(jinja_template_path, template_variables):
-    with io.open(jinja_template_path, encoding="UTF-8") as jinja_template_file:
-        try:
-            jinja_template_contents = jinja_template_file.read()
-            yaml_string = jinja2.Template(jinja_template_contents).render(template_variables)
-        except TypeError:
-            raise ValueError('unable to render file: %(e)s' % locals())
-
-    return yaml_string
-
-
-def load_settings(settings_file_paths, settings=None):
-    """Parses yaml settings file(s) and returns a dictionary of settings.
-    These yaml files are treated as Jinja templates.
-    If a settings dictionary is also provided as an argument, it will be used as context for Jinja template processing.
-
-    Args:
-        settings_file_paths (list): a list of yaml settings file paths to load
-        settings (dict): optional dictionary of settings values
-
-    Return:
-        dict: settings file containing all settings parsed from the given settings file(s)
-    """
-
-    if settings is None:
-        settings = collections.OrderedDict()
-
-    # add generic global options
-    settings["TIMESTAMP"] = time.strftime("%Y%m%d_%H%M%S")
-
-    # process settings_file_paths
-    for settings_path in settings_file_paths:
-        yaml_string = _parse_jinja_template(settings_path, template_variables=settings)
-
-        try:
-            settings_from_this_file = yaml.load(yaml_string, Loader=yaml.Loader) # nosec
-        except yaml.parser.ParserError:
-            raise ValueError('Unable to parse yaml file %(settings_path)s: %(e)s' % locals())
-
-        if not settings_from_this_file:
-            raise ValueError('yaml file %(settings_path)s appears to be empty' % locals())
-
-        logger.info("Parsed %3d settings from %s" % (len(settings_from_this_file), settings_path))
-
-        settings.update(settings_from_this_file)
-
-    return settings
-
-
-def process_jinja_template(input_base_dir, relative_file_path, template_variables, output_base_dir):
-    """Reads a Jinja template from the given input file path, applies template_variables, and writes the result to
-    {output_base_dir}/{relative_file_path}.
-
-    Args:
-        input_base_dir (string): The base directory for input file paths.
-        relative_file_path (string): template file path relative to base_dir
-        template_variables (dict): dictionary of key-value pairs for resolving any variables in the Jinja template
-        output_base_dir (string): The rendered jinja template will be written to {output_base_dir}/{relative_file_path}
-    """
-
-    # read in {input_base_dir}/{relative_file_path file}
-    yaml_string = _parse_jinja_template(
-        os.path.join(input_base_dir, relative_file_path),
-        template_variables=template_variables)
-
-    logger.info("Parsed %s" % relative_file_path)
-
-    # write out yaml_string to {output_base_dir}/{relative_file_path file}
-    output_file_path = os.path.join(output_base_dir, relative_file_path)
-    output_dir_path = os.path.dirname(output_file_path)
-
-    if not os.path.isdir(output_dir_path):
-        os.makedirs(output_dir_path)
-
-    try:
-        with open(output_file_path, 'w') as ostream:
-            ostream.write(yaml_string)
-    except Exception as e:
-        logger.error("Couldn't write out %s" % relative_file_path)
-        raise e
-
-    #os.chmod(output_file_path, 0x777)
-    logger.info("-- wrote out %s" % output_file_path)
-
diff --git a/servctl b/servctl
deleted file mode 100755
index 78680cd0a0..0000000000
--- a/servctl
+++ /dev/null
@@ -1,90 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-import logging
-import os
-import sys
-
-from deploy.servctl_utils.deploy_command_utils import deploy, DEPLOYMENT_TARGETS, DEPLOYMENT_ENVS, SECRETS
-from deploy.servctl_utils.other_command_utils import delete_component, delete_all
-
-logging.basicConfig(stream=sys.stdout, format='%(asctime)s %(levelname)-8s %(message)s', level=logging.INFO)
-logger = logging.getLogger(__name__)
-logger.setLevel(logging.INFO)
-
-DEPLOY_COMMAND = 'deploy'
-DEPLOY_ALL_COMMAND = 'deploy-all'
-DELETE_COMMAND = 'delete'
-DELETE_ALL_COMMAND = 'delete-all'
-
-p = argparse.ArgumentParser()
-subparsers = p.add_subparsers(dest='command')
-
-COMPONENT_ARG_CHOICES = DEPLOYMENT_TARGETS
-DEPLOYMENT_TARGETS_SET = set(DEPLOYMENT_ENVS)
-
-## DELETE_COMMAND
-sp = subparsers.add_parser(DELETE_COMMAND, description="Terminate any live deployments, services, and pods for a given component")
-sp.add_argument("components", nargs="+", help="terminate all deployments, services, and pods for the given component(s)", choices=COMPONENT_ARG_CHOICES)
-sp.add_argument("deployment_target", choices=DEPLOYMENT_TARGETS_SET, help="kubernetes cluster")
-
-## DELETE_ALL_COMMAND
-sp = subparsers.add_parser(DELETE_ALL_COMMAND, description="Delete all components + the cluster")
-sp.add_argument("deployment_target", choices=DEPLOYMENT_TARGETS_SET, help="kubernetes cluster")
-
-
-for command_name in [DEPLOY_COMMAND, DEPLOY_ALL_COMMAND]:
-    sp = subparsers.add_parser(command_name, description="Deploy one or more components")
-    sp.add_argument("-d", "--delete-before-deploy", action="store_true", help="run 'kubectl delete' on component(s) before deploying them")
-    sp.add_argument("-b", "--build-docker-images", action="store_true", help="build the docker image for each component before deploying it. ")
-    sp.add_argument("-f", "--force", action="store_true", help="same as -b and -d together. Also, forces docker image to be rebuilt from the beginning with --no-cache.")
-    g = sp.add_mutually_exclusive_group()
-    g.add_argument("-p", "--push-to-registry", action="store_true", help="push local docker image to a docker registry. The destination registry is determined by the "
-        "DOCKER_IMAGE_PREFIX value in shared-settings.yaml. "
-        "When deploying to gcloud-dev or gcloud-prod, -p is necessary to make an image available because google container engine clusters "
-        "can only use images from the google container registry and/or public registries like dockerhub.")
-    g.add_argument("--only-push-to-registry", action="store_true", help="push local docker image to a docker registry and then stop, without deploying the component(s) to kubernetes.")
-    sp.add_argument("-t", "--docker-image-tag", help="deploy docker image(s) with this tag. If -b also used, the built docker images will be tagged with this.")
-
-    if command_name == DEPLOY_COMMAND:
-        choices = set(SECRETS.keys())
-        choices.update(COMPONENT_ARG_CHOICES)
-        sp.add_argument("components", nargs="+", help="one or more components to deploy", choices=choices)
-
-    sp.add_argument("deployment_target", choices=DEPLOYMENT_TARGETS_SET, help="which kubernetes cluster to deploy to")
-
-
-args = p.parse_args()
-
-# process command-line args
-
-if args.command in [DEPLOY_COMMAND, DEPLOY_ALL_COMMAND]:
-    # transfer args to runtime_settings
-    runtime_settings = {}
-    runtime_settings["BASE_DIR"] = os.path.abspath(os.path.dirname(__file__))
-    runtime_settings['FORCE_BUILD_DOCKER_IMAGES'] = bool(args.force)
-    runtime_settings["DELETE_BEFORE_DEPLOY"] = True if args.delete_before_deploy or args.force else None
-    runtime_settings["BUILD_DOCKER_IMAGES"] = True if args.build_docker_images or args.force else None
-    runtime_settings["PUSH_TO_REGISTRY"] = bool(args.push_to_registry) or bool(args.only_push_to_registry)
-    runtime_settings["ONLY_PUSH_TO_REGISTRY"] = bool(args.only_push_to_registry)
-    runtime_settings["DOCKER_IMAGE_TAG"] = (":" + args.docker_image_tag) if args.docker_image_tag else (":" + args.deployment_target)
-
-    if args.command == DEPLOY_COMMAND:
-        components_to_deploy = args.components
-
-    elif args.command == DEPLOY_ALL_COMMAND:
-        components_to_deploy = DEPLOYMENT_TARGETS
-
-    deploy(args.deployment_target, components=components_to_deploy, runtime_settings=runtime_settings)
-
-    logger.info("==> Deployed " + ", ".join(components_to_deploy))
-
-elif args.command == DELETE_COMMAND:
-    for component in args.components:
-        delete_component(component, deployment_target=args.deployment_target)
-
-elif args.command == DELETE_ALL_COMMAND:
-    delete_all(args.deployment_target)
-
-else:
-    p.error("Unexpected command: " + str(args.command))

From c861843327fb3c5070a91319a4379dd576558bdc Mon Sep 17 00:00:00 2001
From: sjahl <636687+sjahl@users.noreply.github.com>
Date: Mon, 7 Nov 2022 10:55:39 -0500
Subject: [PATCH 72/96] remove servctl Jinja2 dependency

---
 requirements-dev.in  |  1 -
 requirements-dev.txt | 18 +++++++++++-------
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/requirements-dev.in b/requirements-dev.in
index 9892f9da11..ff056bf0d8 100644
--- a/requirements-dev.in
+++ b/requirements-dev.in
@@ -6,4 +6,3 @@ mock                      # mock objects for unit tests
 pip-tools                 # tool for managing our python dependency tree
 responses                 # mock HTTP responses for unit tests
 urllib3-mock              # mock urllib3 for tests
-Jinja2                    # jinja templates are used by servctl k8s deployment scripts
diff --git a/requirements-dev.txt b/requirements-dev.txt
index c529095e07..8d71a53143 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -35,14 +35,10 @@ idna==2.10
     # via
     #   -c requirements.txt
     #   requests
-jinja2==3.0.3
+importlib-metadata==5.0.0
     # via
-    #   -c requirements.txt
-    #   -r requirements-dev.in
-markupsafe==2.1.0
-    # via
-    #   -c requirements.txt
-    #   jinja2
+    #   click
+    #   pep517
 mock==4.0.3
     # via -r requirements-dev.in
 pep517==0.12.0
@@ -70,6 +66,10 @@ sqlparse==0.4.2
     #   django-debug-toolbar
 tomli==2.0.1
     # via pep517
+typing-extensions==4.4.0
+    # via
+    #   asgiref
+    #   importlib-metadata
 urllib3==1.26.8
     # via
     #   -c requirements.txt
@@ -79,6 +79,10 @@ urllib3-mock==0.3.3
     # via -r requirements-dev.in
 wheel==0.37.1
     # via pip-tools
+zipp==3.10.0
+    # via
+    #   importlib-metadata
+    #   pep517
 
 # The following packages are considered to be unsafe in a requirements file:
 # pip

From 3b414f7601e35abbb47288d88b720bc22a07a9aa Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Mon, 7 Nov 2022 13:39:13 -0500
Subject: [PATCH 73/96] better override parsing

---
 seqr/utils/elasticsearch/es_search.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/seqr/utils/elasticsearch/es_search.py b/seqr/utils/elasticsearch/es_search.py
index 2eda40e1c6..59f41391ca 100644
--- a/seqr/utils/elasticsearch/es_search.py
+++ b/seqr/utils/elasticsearch/es_search.py
@@ -243,16 +243,15 @@ def filter_variants(self, inheritance=None, genes=None, intervals=None, rs_ids=N
 
         self._filter_by_location(genes, intervals, variant_ids, rs_ids, locus)
 
-        clinvar_terms, hgmd_classes = _parse_pathogenicity_filter(pathogenicity or {})
-        self._filter_by_frequency(frequencies, clinvar_terms=clinvar_terms)
+        annotations, new_svs = self._parse_annotation_overrides(annotations, pathogenicity)
+
+        self._filter_by_frequency(frequencies)
 
         self._filter_by_in_silico(in_silico)
 
         if quality_filter and quality_filter.get('vcf_filter') is not None:
             self._filter(~Q('exists', field='filters'))
 
-        annotations, new_svs = self._parse_annotation_overrides(annotations, clinvar_terms, hgmd_classes)
-
         inheritance_mode = (inheritance or {}).get('mode')
         inheritance_filter = (inheritance or {}).get('filter') or {}
         if inheritance_filter.get('genotype'):
@@ -284,7 +283,9 @@ def filter_variants(self, inheritance=None, genes=None, intervals=None, rs_ids=N
         if has_comp_het_search and annotations_secondary and dataset_type and comp_het_dataset_type != dataset_type:
             self.update_dataset_type(_dataset_type_for_annotations(annotations_secondary), keep_previous=True)
 
-    def _parse_annotation_overrides(self, annotations, clinvar_terms, hgmd_classes):
+    def _parse_annotation_overrides(self, annotations, pathogenicity):
+        clinvar_terms, hgmd_classes = _parse_pathogenicity_filter(pathogenicity or {})
+
         annotations = {k: v for k, v in (annotations or {}).items() if v}
         new_svs = bool(annotations.pop(NEW_SV_FIELD, False))
         splice_ai = annotations.pop(SPLICE_AI_FIELD, None)
@@ -324,12 +325,13 @@ def _filter_by_in_silico(self, in_silico_filters):
         if in_silico_filters:
             self._filter(_in_silico_filter(in_silico_filters))
 
-    def _filter_by_frequency(self, frequencies, clinvar_terms=None):
+    def _filter_by_frequency(self, frequencies):
         frequencies = {pop: v for pop, v in (frequencies or {}).items() if pop in POPULATIONS}
         if not frequencies:
             return
 
-        clinvar_path_filters = [f for f in clinvar_terms if f in CLINVAR_PATH_SIGNIFICANCES]
+        clinvar_terms = self._consequence_overrides.get(CLINVAR_KEY)
+        clinvar_path_filters = [f for f in clinvar_terms if f in CLINVAR_PATH_SIGNIFICANCES] if clinvar_terms else None
         path_override = bool(clinvar_path_filters) and any(
             freqs.get('af') or 1 < PATH_FREQ_OVERRIDE_CUTOFF for freqs in frequencies.values())
 

From 46431e187d36f15ab4522c0b7a0ea7572a57375c Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Mon, 7 Nov 2022 13:45:46 -0500
Subject: [PATCH 74/96] better new_svs parsing

---
 seqr/utils/elasticsearch/es_search.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/seqr/utils/elasticsearch/es_search.py b/seqr/utils/elasticsearch/es_search.py
index 59f41391ca..23ed253ca5 100644
--- a/seqr/utils/elasticsearch/es_search.py
+++ b/seqr/utils/elasticsearch/es_search.py
@@ -243,7 +243,7 @@ def filter_variants(self, inheritance=None, genes=None, intervals=None, rs_ids=N
 
         self._filter_by_location(genes, intervals, variant_ids, rs_ids, locus)
 
-        annotations, new_svs = self._parse_annotation_overrides(annotations, pathogenicity)
+        annotations = self._parse_annotation_overrides(annotations, pathogenicity)
 
         self._filter_by_frequency(frequencies)
 
@@ -262,7 +262,7 @@ def filter_variants(self, inheritance=None, genes=None, intervals=None, rs_ids=N
             self._filter_families_for_inheritance(inheritance_filter, skipped_sample_count)
 
         quality_filters_by_family = _quality_filters_by_family(
-            quality_filter, self.samples_by_family_index, self._indices, new_svs=new_svs)
+            quality_filter, self.samples_by_family_index, self._indices, new_svs=self._consequence_overrides.get(NEW_SV_FIELD))
 
         has_comp_het_search = inheritance_mode in {RECESSIVE, COMPOUND_HET} and not self.previous_search_results.get('grouped_results')
         if has_comp_het_search:
@@ -273,7 +273,7 @@ def filter_variants(self, inheritance=None, genes=None, intervals=None, rs_ids=N
                     self.update_dataset_type(comp_het_dataset_type)
                 return
 
-        dataset_type = self._filter_by_annotations(annotations, new_svs)
+        dataset_type = self._filter_by_annotations(annotations)
 
         if skip_genotype_filter and not inheritance_mode:
             return
@@ -299,8 +299,10 @@ def _parse_annotation_overrides(self, annotations, pathogenicity):
             self._consequence_overrides[SPLICE_AI_FIELD] = float(splice_ai)
         if screen:
             self._consequence_overrides[SCREEN_KEY] = screen
+        if new_svs:
+            self._consequence_overrides[NEW_SV_FIELD] = new_svs
 
-        return annotations, new_svs
+        return annotations
 
     def _filter_by_location(self, genes, intervals, variant_ids, rs_ids, locus):
         if genes or intervals:
@@ -376,9 +378,10 @@ def _get_annotation_override_filter(self):
             return None
         return _or_filters(filters)
 
-    def _filter_by_annotations(self, annotations, new_svs):
+    def _filter_by_annotations(self, annotations):
         dataset_type = None
         annotation_override_filter = self._get_annotation_override_filter()
+        new_svs = self._consequence_overrides.get(NEW_SV_FIELD)
 
         if self._allowed_consequences:
             consequences_filter = _annotations_filter(self._allowed_consequences)

From 07dd833ebf5474778467d35cd641002abae5382b Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Mon, 7 Nov 2022 13:53:45 -0500
Subject: [PATCH 75/96] make get quality filters a class method

---
 seqr/utils/elasticsearch/es_search.py | 73 +++++++++++++--------------
 1 file changed, 36 insertions(+), 37 deletions(-)

diff --git a/seqr/utils/elasticsearch/es_search.py b/seqr/utils/elasticsearch/es_search.py
index 23ed253ca5..fd190e12d8 100644
--- a/seqr/utils/elasticsearch/es_search.py
+++ b/seqr/utils/elasticsearch/es_search.py
@@ -261,8 +261,7 @@ def filter_variants(self, inheritance=None, genes=None, intervals=None, rs_ids=N
         if inheritance:
             self._filter_families_for_inheritance(inheritance_filter, skipped_sample_count)
 
-        quality_filters_by_family = _quality_filters_by_family(
-            quality_filter, self.samples_by_family_index, self._indices, new_svs=self._consequence_overrides.get(NEW_SV_FIELD))
+        quality_filters_by_family = self._get_quality_filters_by_family(quality_filter)
 
         has_comp_het_search = inheritance_mode in {RECESSIVE, COMPOUND_HET} and not self.previous_search_results.get('grouped_results')
         if has_comp_het_search:
@@ -1285,6 +1284,41 @@ def parse_variant_id(cls, variant_id):
             raise ValueError('Invalid variant id')
         return var_fields[0].lstrip('chr'), int(var_fields[1]), var_fields[2], var_fields[3]
 
+    def _get_quality_filters_by_family(self, quality_filter):
+        quality_field_configs = {
+            'min_{}'.format(field): {'field': field, 'step': step} for field, step in QUALITY_QUERY_FIELDS.items()
+        }
+        quality_filter = dict({field: 0 for field in quality_field_configs.keys()}, **(quality_filter or {}))
+        for field, config in quality_field_configs.items():
+            if quality_filter[field] % config['step'] != 0:
+                raise Exception('Invalid {} filter {}'.format(config['field'], quality_filter[field]))
+
+        quality_filters_by_family = {}
+        new_svs = self._consequence_overrides.get(NEW_SV_FIELD)
+        if new_svs or any(quality_filter[field] for field in quality_field_configs.keys()):
+            family_sample_ids = defaultdict(set)
+            for index in self._indices:
+                family_samples_by_id = self.samples_by_family_index[index]
+                for family_guid, samples_by_id in family_samples_by_id.items():
+                    family_sample_ids[family_guid].update(samples_by_id.keys())
+
+            for family_guid, sample_ids in sorted(family_sample_ids.items()):
+                quality_q = Q('terms', samples_new_call=sorted(sample_ids)) if new_svs else Q()
+                for sample_id in sorted(sample_ids):
+                    for field, config in sorted(quality_field_configs.items()):
+                        if quality_filter[field]:
+                            q = _build_or_filter('term', [
+                                {'samples_{}_{}_to_{}'.format(config['field'], i, i + config['step']): sample_id}
+                                for i in range(0, quality_filter[field], config['step'])
+                            ])
+                            if field == 'min_ab':
+                                #  AB only relevant for hets
+                                quality_q &= ~Q(q) | ~Q('term', samples_num_alt_1=sample_id)
+                            else:
+                                quality_q &= ~Q(q)
+                quality_filters_by_family[family_guid] = quality_q
+        return quality_filters_by_family
+
 
 # TODO  move liftover to hail pipeline once upgraded to 0.2 (https://github.com/broadinstitute/seqr/issues/1010)
 LIFTOVER_GRCH38_TO_GRCH37 = None
@@ -1308,41 +1342,6 @@ def _get_family_affected_status(samples_by_id, inheritance_filter):
     return affected_status
 
 
-def _quality_filters_by_family(quality_filter, samples_by_family_index, indices, new_svs=False):
-    quality_field_configs = {
-        'min_{}'.format(field): {'field': field, 'step': step} for field, step in QUALITY_QUERY_FIELDS.items()
-    }
-    quality_filter = dict({field: 0 for field in quality_field_configs.keys()}, **(quality_filter or {}))
-    for field, config in quality_field_configs.items():
-        if quality_filter[field] % config['step'] != 0:
-            raise Exception('Invalid {} filter {}'.format(config['field'], quality_filter[field]))
-
-    quality_filters_by_family = {}
-    if new_svs or any(quality_filter[field] for field in quality_field_configs.keys()):
-        family_sample_ids = defaultdict(set)
-        for index in indices:
-            family_samples_by_id = samples_by_family_index[index]
-            for family_guid, samples_by_id in family_samples_by_id.items():
-                family_sample_ids[family_guid].update(samples_by_id.keys())
-
-        for family_guid, sample_ids in sorted(family_sample_ids.items()):
-            quality_q = Q('terms', samples_new_call=sorted(sample_ids)) if new_svs else Q()
-            for sample_id in sorted(sample_ids):
-                for field, config in sorted(quality_field_configs.items()):
-                    if quality_filter[field]:
-                        q = _build_or_filter('term', [
-                            {'samples_{}_{}_to_{}'.format(config['field'], i, i + config['step']): sample_id}
-                            for i in range(0, quality_filter[field], config['step'])
-                        ])
-                        if field == 'min_ab':
-                            #  AB only relevant for hets
-                            quality_q &= ~Q(q) | ~Q('term', samples_num_alt_1=sample_id)
-                        else:
-                            quality_q &= ~Q(q)
-            quality_filters_by_family[family_guid] = quality_q
-    return quality_filters_by_family
-
-
 def _any_affected_sample_filter(sample_ids):
     sample_ids = sorted(sample_ids)
     return Q('terms', samples_num_alt_1=sample_ids) | Q('terms', samples_num_alt_2=sample_ids) | Q('terms', samples=sample_ids)

From bc749dd53522beff8f5b70c617c174b2fc88823b Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Mon, 7 Nov 2022 15:37:05 -0500
Subject: [PATCH 76/96] path override helper

---
 seqr/utils/elasticsearch/es_search.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/seqr/utils/elasticsearch/es_search.py b/seqr/utils/elasticsearch/es_search.py
index fd190e12d8..1564a37395 100644
--- a/seqr/utils/elasticsearch/es_search.py
+++ b/seqr/utils/elasticsearch/es_search.py
@@ -331,9 +331,8 @@ def _filter_by_frequency(self, frequencies):
         if not frequencies:
             return
 
-        clinvar_terms = self._consequence_overrides.get(CLINVAR_KEY)
-        clinvar_path_filters = [f for f in clinvar_terms if f in CLINVAR_PATH_SIGNIFICANCES] if clinvar_terms else None
-        path_override = bool(clinvar_path_filters) and any(
+        path_filter = self._get_clinvar_pathogenic_override_filter()
+        path_override = path_filter is not None and any(
             freqs.get('af') or 1 < PATH_FREQ_OVERRIDE_CUTOFF for freqs in frequencies.values())
 
         q = Q()
@@ -355,10 +354,18 @@ def _filter_by_frequency(self, frequencies):
                 q &= _pop_freq_filter(POPULATIONS[pop]['Hemi'], freqs['hh'])
 
         if path_override:
-            q |= (_pathogenicity_filter(clinvar_path_filters) & path_q)
+            q |= (path_filter & path_q)
 
         self._filter(q)
 
+    def _get_clinvar_pathogenic_override_filter(self):
+        clinvar_path_terms = [
+            f for f in self._consequence_overrides.get(CLINVAR_KEY, []) if f in CLINVAR_PATH_SIGNIFICANCES
+        ]
+        if clinvar_path_terms:
+            return _pathogenicity_filter(clinvar_path_terms)
+        return None
+
     def _get_annotation_override_filter(self):
         filters = []
         pathogenicity_filter = _pathogenicity_filter(

From 5cbf3fbe1b01c9a44be9fda00afbe4c9decb742b Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Mon, 7 Nov 2022 15:41:01 -0500
Subject: [PATCH 77/96] add pathogenicity override for qulaity filters

---
 seqr/utils/elasticsearch/es_search.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/seqr/utils/elasticsearch/es_search.py b/seqr/utils/elasticsearch/es_search.py
index 1564a37395..d84e5f7a98 100644
--- a/seqr/utils/elasticsearch/es_search.py
+++ b/seqr/utils/elasticsearch/es_search.py
@@ -1309,6 +1309,7 @@ def _get_quality_filters_by_family(self, quality_filter):
                 for family_guid, samples_by_id in family_samples_by_id.items():
                     family_sample_ids[family_guid].update(samples_by_id.keys())
 
+            path_filter = self._get_clinvar_pathogenic_override_filter()
             for family_guid, sample_ids in sorted(family_sample_ids.items()):
                 quality_q = Q('terms', samples_new_call=sorted(sample_ids)) if new_svs else Q()
                 for sample_id in sorted(sample_ids):
@@ -1323,6 +1324,8 @@ def _get_quality_filters_by_family(self, quality_filter):
                                 quality_q &= ~Q(q) | ~Q('term', samples_num_alt_1=sample_id)
                             else:
                                 quality_q &= ~Q(q)
+                    if path_filter:
+                        quality_q |= path_filter
                 quality_filters_by_family[family_guid] = quality_q
         return quality_filters_by_family
 

From c2dff3dcd9f15262cb4db75113489314e151a01b Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Mon, 7 Nov 2022 16:36:10 -0500
Subject: [PATCH 78/96] update tests

---
 seqr/utils/elasticsearch/es_search.py      |  4 ++--
 seqr/utils/elasticsearch/es_utils_tests.py | 22 +++++++++++++++++-----
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/seqr/utils/elasticsearch/es_search.py b/seqr/utils/elasticsearch/es_search.py
index d84e5f7a98..3ade55949b 100644
--- a/seqr/utils/elasticsearch/es_search.py
+++ b/seqr/utils/elasticsearch/es_search.py
@@ -1324,8 +1324,8 @@ def _get_quality_filters_by_family(self, quality_filter):
                                 quality_q &= ~Q(q) | ~Q('term', samples_num_alt_1=sample_id)
                             else:
                                 quality_q &= ~Q(q)
-                    if path_filter:
-                        quality_q |= path_filter
+                if path_filter:
+                    quality_q |= path_filter
                 quality_filters_by_family[family_guid] = quality_q
         return quality_filters_by_family
 
diff --git a/seqr/utils/elasticsearch/es_utils_tests.py b/seqr/utils/elasticsearch/es_utils_tests.py
index 2a26d66a2b..d3cf1670eb 100644
--- a/seqr/utils/elasticsearch/es_utils_tests.py
+++ b/seqr/utils/elasticsearch/es_utils_tests.py
@@ -1778,7 +1778,7 @@ def test_filtered_get_es_variants(self):
                                     {'term': {'samples_num_alt_2': 'HG00731'}}
                                 ]
                             }},
-                            {'bool': {
+                            {'bool': {'should': [{'bool': {
                                 'minimum_should_match': 1,
                                 'should': [
                                     {'bool': {
@@ -1826,7 +1826,9 @@ def test_filtered_get_es_variants(self):
                                         ]
                                     }},
                                 ]
-                            }}
+                            }}, {'terms': {
+                                'clinvar_clinical_significance': ['Likely_pathogenic', 'Pathogenic', 'Pathogenic/Likely_pathogenic']
+                            }}]}}
                         ],
                     }},
                     {'bool': {
@@ -1835,7 +1837,7 @@ def test_filtered_get_es_variants(self):
                                 {'term': {'samples_num_alt_1': 'NA20870'}},
                                 {'term': {'samples_num_alt_2': 'NA20870'}}
                             ]}},
-                            {'bool': {
+                            {'bool': {'should': [{'bool': {
                                 'minimum_should_match': 1,
                                 'should': [
                                     {'bool': {
@@ -1851,7 +1853,9 @@ def test_filtered_get_es_variants(self):
                                     {'term': {'samples_gq_5_to_10': 'NA20870'}},
                                     {'term': {'samples_gq_10_to_15': 'NA20870'}},
                                 ]
-                            }}
+                            }}, {'terms': {
+                                'clinvar_clinical_significance': ['Likely_pathogenic', 'Pathogenic', 'Pathogenic/Likely_pathogenic']
+                            }}]}}
                         ],
                         '_name': 'F000003_3'
                     }},
@@ -2052,8 +2056,16 @@ def test_compound_het_get_es_variants_secondary_annotation(self):
             {'terms': {'transcriptConsequenceTerms': ['frameshift_variant', 'intron']}},
         ]}}
 
+        inheritance_query = deepcopy(COMPOUND_HET_INHERITANCE_QUERY)
+        for fam_q in inheritance_query['bool']['should']:
+            fam_quality_q = fam_q['bool']['must'][1]
+            fam_quality_q['bool'] = {'should': [
+                deepcopy(fam_quality_q),
+                {'terms': {'clinvar_clinical_significance': ['Pathogenic', 'Pathogenic/Likely_pathogenic']}}
+            ]}
+
         self.assertExecutedSearch(
-            filters=[annotation_query, COMPOUND_HET_INHERITANCE_QUERY],
+            filters=[annotation_query, inheritance_query],
             gene_aggs=True,
             start_index=0,
             size=1

From a48147f3347a01246f4e59e147b84d6d710804e3 Mon Sep 17 00:00:00 2001
From: Shifa Zhang <zhangshifa07504@gmail.com>
Date: Mon, 7 Nov 2022 16:51:29 -0500
Subject: [PATCH 79/96] Change the constant name for hover props and RnaSeq
 title.

---
 ui/shared/components/panel/variants/VariantGene.jsx | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/ui/shared/components/panel/variants/VariantGene.jsx b/ui/shared/components/panel/variants/VariantGene.jsx
index 4e9f1a8c89..65455bae0f 100644
--- a/ui/shared/components/panel/variants/VariantGene.jsx
+++ b/ui/shared/components/panel/variants/VariantGene.jsx
@@ -322,7 +322,7 @@ const INDIVIDUAL_NAME_COLUMN = { name: 'individualName', content: '', format: ({
 const RNA_SEQ_COLUMNS = [
   INDIVIDUAL_NAME_COLUMN,
   ...RNA_SEQ_DETAIL_FIELDS.map(name => (
-    { name, content: camelcaseToTitlecase(name), format: row => row[name].toPrecision(3) }
+    { name, content: camelcaseToTitlecase(name).replace(' ', '-'), format: row => row[name].toPrecision(3) }
   )),
 ]
 
@@ -353,7 +353,7 @@ const PHENOTYPE_GENE_INFO_COLUMNS = [
   },
 ]
 
-const hoverTableProps = { basic: 'very', compact: 'very', singleLine: true }
+const HOVER_DATA_TABLE_PROPS = { basic: 'very', compact: 'very', singleLine: true }
 
 const GENE_DETAIL_SECTIONS = [
   {
@@ -414,7 +414,7 @@ const GENE_DETAIL_SECTIONS = [
       <div>
         This gene is flagged as an outlier for RNA-Seq in the following samples
         <DataTable
-          {...hoverTableProps}
+          {...HOVER_DATA_TABLE_PROPS}
           data={indivGeneData.rnaSeqData[gene.geneId]}
           idField="individualName"
           columns={RNA_SEQ_COLUMNS}
@@ -432,7 +432,7 @@ const GENE_DETAIL_SECTIONS = [
         label: tool.toUpperCase(),
         detail: (
           <DataTable
-            {...hoverTableProps}
+            {...HOVER_DATA_TABLE_PROPS}
             data={data}
             idField="rowId"
             columns={PHENOTYPE_GENE_INFO_COLUMNS}

From 6d1d30437765f9a5d66f1cedcd2d98629b79f6c8 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 8 Nov 2022 10:23:35 +0000
Subject: [PATCH 80/96] Bump loader-utils from 1.1.0 to 1.4.1 in /ui

Bumps [loader-utils](https://github.com/webpack/loader-utils) from 1.1.0 to 1.4.1.
- [Release notes](https://github.com/webpack/loader-utils/releases)
- [Changelog](https://github.com/webpack/loader-utils/blob/v1.4.1/CHANGELOG.md)
- [Commits](https://github.com/webpack/loader-utils/compare/v1.1.0...v1.4.1)

---
updated-dependencies:
- dependency-name: loader-utils
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 ui/package-lock.json | 227 +++++++++++++------------------------------
 1 file changed, 66 insertions(+), 161 deletions(-)

diff --git a/ui/package-lock.json b/ui/package-lock.json
index 1528f81f96..4af75ec509 100644
--- a/ui/package-lock.json
+++ b/ui/package-lock.json
@@ -6315,9 +6315,9 @@
       "dev": true
     },
     "node_modules/big.js": {
-      "version": "3.2.0",
-      "resolved": "https://registry.npmjs.org/big.js/-/big.js-3.2.0.tgz",
-      "integrity": "sha512-+hN/Zh2D08Mx65pZ/4g5bsmNiZUuChDiQfTUQ7qJr4/kuopCr88xZsAXv6mBoZEsUI4OuGHlX59qE94K2mMW8Q==",
+      "version": "5.2.2",
+      "resolved": "https://registry.npmjs.org/big.js/-/big.js-5.2.2.tgz",
+      "integrity": "sha512-vyL2OymJxmarO8gxMr0mhChsO9QGwhynfuu4+MHTAW6czfq9humCB7rKpUjDd9YUiDPU4mzpyupFSvOClAwbmQ==",
       "dev": true,
       "engines": {
         "node": "*"
@@ -7395,24 +7395,6 @@
         "node": ">= 10.13.0"
       }
     },
-    "node_modules/css-loader/node_modules/big.js": {
-      "version": "5.2.2",
-      "resolved": "https://registry.npmjs.org/big.js/-/big.js-5.2.2.tgz",
-      "integrity": "sha512-vyL2OymJxmarO8gxMr0mhChsO9QGwhynfuu4+MHTAW6czfq9humCB7rKpUjDd9YUiDPU4mzpyupFSvOClAwbmQ==",
-      "dev": true,
-      "engines": {
-        "node": "*"
-      }
-    },
-    "node_modules/css-loader/node_modules/emojis-list": {
-      "version": "3.0.0",
-      "resolved": "https://registry.npmjs.org/emojis-list/-/emojis-list-3.0.0.tgz",
-      "integrity": "sha512-/kyM18EfinwXZbno9FyUGeFh87KC8HRQBQGildHZbEuRyWFOmv1U10o9BBp8XVZDVNNuQKyIGIu5ZYAAXJ0V2Q==",
-      "dev": true,
-      "engines": {
-        "node": ">= 4"
-      }
-    },
     "node_modules/css-loader/node_modules/json5": {
       "version": "2.2.0",
       "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.0.tgz",
@@ -7429,9 +7411,9 @@
       }
     },
     "node_modules/css-loader/node_modules/loader-utils": {
-      "version": "2.0.0",
-      "resolved": "https://registry.npmjs.org/loader-utils/-/loader-utils-2.0.0.tgz",
-      "integrity": "sha512-rP4F0h2RaWSvPEkD7BLDFQnvSf+nK+wr3ESUjNTyAGobqrijmW92zc+SO6d4p4B1wh7+B/Jg1mkQe5NYUEHtHQ==",
+      "version": "2.0.3",
+      "resolved": "https://registry.npmjs.org/loader-utils/-/loader-utils-2.0.3.tgz",
+      "integrity": "sha512-THWqIsn8QRnvLl0shHYVBN9syumU8pYWEHPTmkiVGd+7K5eFNVSY6AJhRvgGF70gg1Dz+l/k8WicvFCxdEs60A==",
       "dev": true,
       "dependencies": {
         "big.js": "^5.2.2",
@@ -8527,12 +8509,12 @@
       "dev": true
     },
     "node_modules/emojis-list": {
-      "version": "2.1.0",
-      "resolved": "https://registry.npmjs.org/emojis-list/-/emojis-list-2.1.0.tgz",
-      "integrity": "sha1-TapNnbAPmBmIDHn6RXrlsJof04k=",
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/emojis-list/-/emojis-list-3.0.0.tgz",
+      "integrity": "sha512-/kyM18EfinwXZbno9FyUGeFh87KC8HRQBQGildHZbEuRyWFOmv1U10o9BBp8XVZDVNNuQKyIGIu5ZYAAXJ0V2Q==",
       "dev": true,
       "engines": {
-        "node": ">= 0.10"
+        "node": ">= 4"
       }
     },
     "node_modules/encodeurl": {
@@ -15418,19 +15400,31 @@
       }
     },
     "node_modules/loader-utils": {
-      "version": "1.1.0",
-      "resolved": "https://registry.npmjs.org/loader-utils/-/loader-utils-1.1.0.tgz",
-      "integrity": "sha1-yYrvSIvM7aL/teLeZG1qdUQp9c0=",
+      "version": "1.4.1",
+      "resolved": "https://registry.npmjs.org/loader-utils/-/loader-utils-1.4.1.tgz",
+      "integrity": "sha512-1Qo97Y2oKaU+Ro2xnDMR26g1BwMT29jNbem1EvcujW2jqt+j5COXyscjM7bLQkM9HaxI7pkWeW7gnI072yMI9Q==",
       "dev": true,
       "dependencies": {
-        "big.js": "^3.1.3",
-        "emojis-list": "^2.0.0",
-        "json5": "^0.5.0"
+        "big.js": "^5.2.2",
+        "emojis-list": "^3.0.0",
+        "json5": "^1.0.1"
       },
       "engines": {
         "node": ">=4.0.0"
       }
     },
+    "node_modules/loader-utils/node_modules/json5": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/json5/-/json5-1.0.1.tgz",
+      "integrity": "sha512-aKS4WQjPenRxiQsC93MNfjx+nbF4PAdYzmd/1JIj8HYzqfbu86beTuNgXDzPknWk0n0uARlyewZo4s++ES36Ow==",
+      "dev": true,
+      "dependencies": {
+        "minimist": "^1.2.0"
+      },
+      "bin": {
+        "json5": "lib/cli.js"
+      }
+    },
     "node_modules/locate-path": {
       "version": "2.0.0",
       "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-2.0.0.tgz",
@@ -18803,20 +18797,6 @@
       "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==",
       "dev": true
     },
-    "node_modules/react-dev-utils/node_modules/cross-spawn": {
-      "version": "7.0.3",
-      "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz",
-      "integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==",
-      "dev": true,
-      "dependencies": {
-        "path-key": "^3.1.0",
-        "shebang-command": "^2.0.0",
-        "which": "^2.0.1"
-      },
-      "engines": {
-        "node": ">= 8"
-      }
-    },
     "node_modules/react-dev-utils/node_modules/escape-string-regexp": {
       "version": "4.0.0",
       "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz",
@@ -19015,15 +18995,6 @@
         "node": ">=4"
       }
     },
-    "node_modules/react-dev-utils/node_modules/path-key": {
-      "version": "3.1.1",
-      "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz",
-      "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==",
-      "dev": true,
-      "engines": {
-        "node": ">=8"
-      }
-    },
     "node_modules/react-dev-utils/node_modules/pkg-up": {
       "version": "3.1.0",
       "resolved": "https://registry.npmjs.org/pkg-up/-/pkg-up-3.1.0.tgz",
@@ -19054,27 +19025,6 @@
       "integrity": "sha512-/6UZ2qgEyH2aqzYZgQPxEnz33NJ2gNsnHA2o5+o4wW9bLM/JYQitNP9xPhsXwC08hMMovfGe/8retsdDsczPRg==",
       "dev": true
     },
-    "node_modules/react-dev-utils/node_modules/shebang-command": {
-      "version": "2.0.0",
-      "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz",
-      "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==",
-      "dev": true,
-      "dependencies": {
-        "shebang-regex": "^3.0.0"
-      },
-      "engines": {
-        "node": ">=8"
-      }
-    },
-    "node_modules/react-dev-utils/node_modules/shebang-regex": {
-      "version": "3.0.0",
-      "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz",
-      "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==",
-      "dev": true,
-      "engines": {
-        "node": ">=8"
-      }
-    },
     "node_modules/react-dev-utils/node_modules/strip-ansi": {
       "version": "6.0.1",
       "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz",
@@ -19087,21 +19037,6 @@
         "node": ">=8"
       }
     },
-    "node_modules/react-dev-utils/node_modules/which": {
-      "version": "2.0.2",
-      "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz",
-      "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==",
-      "dev": true,
-      "dependencies": {
-        "isexe": "^2.0.0"
-      },
-      "bin": {
-        "node-which": "bin/node-which"
-      },
-      "engines": {
-        "node": ">= 8"
-      }
-    },
     "node_modules/react-dom": {
       "version": "17.0.2",
       "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-17.0.2.tgz",
@@ -19299,6 +19234,11 @@
         "react-is": "^16.8.1"
       }
     },
+    "node_modules/react-redux/node_modules/prop-types/node_modules/react-is": {
+      "version": "16.13.1",
+      "resolved": "https://registry.npmjs.org/react-is/-/react-is-16.13.1.tgz",
+      "integrity": "sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ=="
+    },
     "node_modules/react-redux/node_modules/react-is": {
       "version": "17.0.2",
       "resolved": "https://registry.npmjs.org/react-is/-/react-is-17.0.2.tgz",
@@ -29400,9 +29340,9 @@
       "dev": true
     },
     "big.js": {
-      "version": "3.2.0",
-      "resolved": "https://registry.npmjs.org/big.js/-/big.js-3.2.0.tgz",
-      "integrity": "sha512-+hN/Zh2D08Mx65pZ/4g5bsmNiZUuChDiQfTUQ7qJr4/kuopCr88xZsAXv6mBoZEsUI4OuGHlX59qE94K2mMW8Q==",
+      "version": "5.2.2",
+      "resolved": "https://registry.npmjs.org/big.js/-/big.js-5.2.2.tgz",
+      "integrity": "sha512-vyL2OymJxmarO8gxMr0mhChsO9QGwhynfuu4+MHTAW6czfq9humCB7rKpUjDd9YUiDPU4mzpyupFSvOClAwbmQ==",
       "dev": true
     },
     "binary-extensions": {
@@ -30326,18 +30266,6 @@
         "semver": "^7.3.5"
       },
       "dependencies": {
-        "big.js": {
-          "version": "5.2.2",
-          "resolved": "https://registry.npmjs.org/big.js/-/big.js-5.2.2.tgz",
-          "integrity": "sha512-vyL2OymJxmarO8gxMr0mhChsO9QGwhynfuu4+MHTAW6czfq9humCB7rKpUjDd9YUiDPU4mzpyupFSvOClAwbmQ==",
-          "dev": true
-        },
-        "emojis-list": {
-          "version": "3.0.0",
-          "resolved": "https://registry.npmjs.org/emojis-list/-/emojis-list-3.0.0.tgz",
-          "integrity": "sha512-/kyM18EfinwXZbno9FyUGeFh87KC8HRQBQGildHZbEuRyWFOmv1U10o9BBp8XVZDVNNuQKyIGIu5ZYAAXJ0V2Q==",
-          "dev": true
-        },
         "json5": {
           "version": "2.2.0",
           "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.0.tgz",
@@ -30348,9 +30276,9 @@
           }
         },
         "loader-utils": {
-          "version": "2.0.0",
-          "resolved": "https://registry.npmjs.org/loader-utils/-/loader-utils-2.0.0.tgz",
-          "integrity": "sha512-rP4F0h2RaWSvPEkD7BLDFQnvSf+nK+wr3ESUjNTyAGobqrijmW92zc+SO6d4p4B1wh7+B/Jg1mkQe5NYUEHtHQ==",
+          "version": "2.0.3",
+          "resolved": "https://registry.npmjs.org/loader-utils/-/loader-utils-2.0.3.tgz",
+          "integrity": "sha512-THWqIsn8QRnvLl0shHYVBN9syumU8pYWEHPTmkiVGd+7K5eFNVSY6AJhRvgGF70gg1Dz+l/k8WicvFCxdEs60A==",
           "dev": true,
           "requires": {
             "big.js": "^5.2.2",
@@ -31314,9 +31242,9 @@
       "dev": true
     },
     "emojis-list": {
-      "version": "2.1.0",
-      "resolved": "https://registry.npmjs.org/emojis-list/-/emojis-list-2.1.0.tgz",
-      "integrity": "sha1-TapNnbAPmBmIDHn6RXrlsJof04k=",
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/emojis-list/-/emojis-list-3.0.0.tgz",
+      "integrity": "sha512-/kyM18EfinwXZbno9FyUGeFh87KC8HRQBQGildHZbEuRyWFOmv1U10o9BBp8XVZDVNNuQKyIGIu5ZYAAXJ0V2Q==",
       "dev": true
     },
     "encodeurl": {
@@ -36883,14 +36811,25 @@
       "dev": true
     },
     "loader-utils": {
-      "version": "1.1.0",
-      "resolved": "https://registry.npmjs.org/loader-utils/-/loader-utils-1.1.0.tgz",
-      "integrity": "sha1-yYrvSIvM7aL/teLeZG1qdUQp9c0=",
+      "version": "1.4.1",
+      "resolved": "https://registry.npmjs.org/loader-utils/-/loader-utils-1.4.1.tgz",
+      "integrity": "sha512-1Qo97Y2oKaU+Ro2xnDMR26g1BwMT29jNbem1EvcujW2jqt+j5COXyscjM7bLQkM9HaxI7pkWeW7gnI072yMI9Q==",
       "dev": true,
       "requires": {
-        "big.js": "^3.1.3",
-        "emojis-list": "^2.0.0",
-        "json5": "^0.5.0"
+        "big.js": "^5.2.2",
+        "emojis-list": "^3.0.0",
+        "json5": "^1.0.1"
+      },
+      "dependencies": {
+        "json5": {
+          "version": "1.0.1",
+          "resolved": "https://registry.npmjs.org/json5/-/json5-1.0.1.tgz",
+          "integrity": "sha512-aKS4WQjPenRxiQsC93MNfjx+nbF4PAdYzmd/1JIj8HYzqfbu86beTuNgXDzPknWk0n0uARlyewZo4s++ES36Ow==",
+          "dev": true,
+          "requires": {
+            "minimist": "^1.2.0"
+          }
+        }
       }
     },
     "locate-path": {
@@ -39744,17 +39683,6 @@
           "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==",
           "dev": true
         },
-        "cross-spawn": {
-          "version": "7.0.3",
-          "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz",
-          "integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==",
-          "dev": true,
-          "requires": {
-            "path-key": "^3.1.0",
-            "shebang-command": "^2.0.0",
-            "which": "^2.0.1"
-          }
-        },
         "escape-string-regexp": {
           "version": "4.0.0",
           "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz",
@@ -39889,12 +39817,6 @@
           "integrity": "sha1-zg6+ql94yxiSXqfYENe1mwEP1RU=",
           "dev": true
         },
-        "path-key": {
-          "version": "3.1.1",
-          "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz",
-          "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==",
-          "dev": true
-        },
         "pkg-up": {
           "version": "3.1.0",
           "resolved": "https://registry.npmjs.org/pkg-up/-/pkg-up-3.1.0.tgz",
@@ -39921,21 +39843,6 @@
           "integrity": "sha512-/6UZ2qgEyH2aqzYZgQPxEnz33NJ2gNsnHA2o5+o4wW9bLM/JYQitNP9xPhsXwC08hMMovfGe/8retsdDsczPRg==",
           "dev": true
         },
-        "shebang-command": {
-          "version": "2.0.0",
-          "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz",
-          "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==",
-          "dev": true,
-          "requires": {
-            "shebang-regex": "^3.0.0"
-          }
-        },
-        "shebang-regex": {
-          "version": "3.0.0",
-          "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz",
-          "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==",
-          "dev": true
-        },
         "strip-ansi": {
           "version": "6.0.1",
           "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz",
@@ -39944,15 +39851,6 @@
           "requires": {
             "ansi-regex": "^5.0.1"
           }
-        },
-        "which": {
-          "version": "2.0.2",
-          "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz",
-          "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==",
-          "dev": true,
-          "requires": {
-            "isexe": "^2.0.0"
-          }
         }
       }
     },
@@ -40122,6 +40020,13 @@
             "loose-envify": "^1.4.0",
             "object-assign": "^4.1.1",
             "react-is": "^16.8.1"
+          },
+          "dependencies": {
+            "react-is": {
+              "version": "16.13.1",
+              "resolved": "https://registry.npmjs.org/react-is/-/react-is-16.13.1.tgz",
+              "integrity": "sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ=="
+            }
           }
         },
         "react-is": {

From ff0289ea4afeea02869dfa1cb5ad8160576002f8 Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Tue, 8 Nov 2022 11:11:50 -0500
Subject: [PATCH 81/96] fix selector labels for kubectl heplers

---
 deploy/kubectl_helpers/utils/get_pod_info.sh | 8 +++-----
 deploy/kubectl_helpers/utils/get_pod_name.sh | 9 ++++++---
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/deploy/kubectl_helpers/utils/get_pod_info.sh b/deploy/kubectl_helpers/utils/get_pod_info.sh
index cc85bd9647..da2b437420 100755
--- a/deploy/kubectl_helpers/utils/get_pod_info.sh
+++ b/deploy/kubectl_helpers/utils/get_pod_info.sh
@@ -2,9 +2,7 @@
 
 set -u
 
-DEPLOYMENT_TARGET=$1
-COMPONENT=$2
-JSON_PATH=$3
+COMPONENT=$1
+JSON_PATH=$2
 
-
-kubectl get pod -l "name=${COMPONENT},deployment=gcloud-${DEPLOYMENT_TARGET}" -o "jsonpath=${JSON_PATH}"
+kubectl get pod -l "app.kubernetes.io/name=${COMPONENT}" -o "jsonpath=${JSON_PATH}"
diff --git a/deploy/kubectl_helpers/utils/get_pod_name.sh b/deploy/kubectl_helpers/utils/get_pod_name.sh
index 7360b4d71b..fc8fed40df 100755
--- a/deploy/kubectl_helpers/utils/get_pod_name.sh
+++ b/deploy/kubectl_helpers/utils/get_pod_name.sh
@@ -2,14 +2,17 @@
 
 set -e
 
+DEPLOYMENT_TARGET=$1
+COMPONENT=$2
+
 DIR=$(dirname "$BASH_SOURCE")
 
-"${DIR}"/check_context.sh "$@"
+"${DIR}"/check_context.sh "${DEPLOYMENT_TARGET}"
 
-STATUS=$("${DIR}"/get_pod_info.sh "$@" "{.items[0].status.phase}")
+STATUS=$("${DIR}"/get_pod_info.sh "${COMPONENT}" "{.items[0].status.phase}")
 if [ "${STATUS}" != "Running" ]; then
     echo "Invalid pod status: ${STATUS}"
     exit 1
 fi
 
-"${DIR}"/get_pod_info.sh "$@" "{.items[0].metadata.name}"
\ No newline at end of file
+"${DIR}"/get_pod_info.sh "${COMPONENT}" "{.items[0].metadata.name}"
\ No newline at end of file

From e371b48ea0d60f7a93baebbc2ac431d42ce77183 Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Tue, 8 Nov 2022 11:52:36 -0500
Subject: [PATCH 82/96] add container specification to shell

---
 deploy/kubectl_helpers/shell.sh | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/deploy/kubectl_helpers/shell.sh b/deploy/kubectl_helpers/shell.sh
index 04ccd7836b..eaa8531887 100755
--- a/deploy/kubectl_helpers/shell.sh
+++ b/deploy/kubectl_helpers/shell.sh
@@ -4,6 +4,13 @@ DIR=$(dirname "$BASH_SOURCE")
 
 set -x -e
 
+COMPONENT=$2
+
 POD_NAME=$("${DIR}"/utils/get_pod_name.sh "$@")
 
-kubectl exec -it "${POD_NAME}" -- /bin/bash
+case ${COMPONENT} in
+  seqr) CONTAINER=seqr-pod ;;
+  *) CONTAINER=${COMPONENT} ;;
+esac
+
+kubectl exec -it "${POD_NAME}" -c "${CONTAINER}" -- /bin/bash

From 29a0854d69771e58e8949a61b2a798bdf3c4b7d4 Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Tue, 8 Nov 2022 13:21:00 -0500
Subject: [PATCH 83/96] update databse temp bucket

---
 deploy/kubectl_helpers/restore_local_db.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/deploy/kubectl_helpers/restore_local_db.sh b/deploy/kubectl_helpers/restore_local_db.sh
index 5708d22850..1817400a86 100755
--- a/deploy/kubectl_helpers/restore_local_db.sh
+++ b/deploy/kubectl_helpers/restore_local_db.sh
@@ -6,7 +6,7 @@ DEPLOYMENT_TARGET=$1
 DB=$2
 
 FILENAME=${DB}_${DEPLOYMENT_TARGET}_backup_$(date +"%Y-%m-%d__%H-%M-%S").gz
-GS_FILE=gs://seqr-backups/${FILENAME}
+GS_FILE=gs://seqr-temp/${FILENAME}
 
 gcloud sql export sql postgres-"${DEPLOYMENT_TARGET}" "${GS_FILE}" --database="${DB}" --offload
 gsutil mv "${GS_FILE}" .

From 402d6af20dadab1f19f95eec2b06e438be6ad394 Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Tue, 8 Nov 2022 13:44:46 -0500
Subject: [PATCH 84/96] search for mito variants with regular variants

---
 seqr/utils/elasticsearch/es_search.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/seqr/utils/elasticsearch/es_search.py b/seqr/utils/elasticsearch/es_search.py
index 2eda40e1c6..0a1924a642 100644
--- a/seqr/utils/elasticsearch/es_search.py
+++ b/seqr/utils/elasticsearch/es_search.py
@@ -180,6 +180,8 @@ def _filter_families_for_inheritance(self, inheritance_filter, skipped_sample_co
 
     def update_dataset_type(self, dataset_type, keep_previous=False):
         new_indices = self.indices_by_dataset_type[dataset_type]
+        if dataset_type == Sample.DATASET_TYPE_VARIANT_CALLS:
+            new_indices += self.indices_by_dataset_type[Sample.DATASET_TYPE_MITO_CALLS]
         if keep_previous:
             indices = set(self._indices)
             indices.update(new_indices)

From 81d0239f6fbd9ded65bb3d8c482371399e2cc4d0 Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Tue, 8 Nov 2022 15:05:54 -0500
Subject: [PATCH 85/96] update mutil index tests

---
 seqr/utils/elasticsearch/es_utils_tests.py | 30 ++++++++++++++--------
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/seqr/utils/elasticsearch/es_utils_tests.py b/seqr/utils/elasticsearch/es_utils_tests.py
index 2a26d66a2b..3b469970ee 100644
--- a/seqr/utils/elasticsearch/es_utils_tests.py
+++ b/seqr/utils/elasticsearch/es_utils_tests.py
@@ -1182,6 +1182,11 @@ def create_mock_response(search, index=INDEX_NAME):
             index_hits = [hit for hit in index_hits if hit['_id'] in variant_id_filters]
         response_dict['hits']['hits'] += index_hits
 
+    try:
+        response_dict['hits']['hits'] = sorted(response_dict['hits']['hits'], key=lambda v: v['_sort'])
+    except (KeyError, TypeError):
+        pass
+
     if search.get('aggs'):
         index_vars = COMPOUND_HET_INDEX_VARIANTS.get(index, {})
         buckets = [{'key': gene_id, 'doc_count': 3} for gene_id in ['ENSG00000135953', 'ENSG00000228198']]
@@ -1313,7 +1318,8 @@ def assertSameSearch(self, executed_search, expected_search_params):
 
     def assertCachedResults(self, results_model, expected_results, sort='xpos'):
         cache_key = 'search_results__{}__{}'.format(results_model.guid, sort)
-        self.assertDictEqual(json.loads(REDIS_CACHE.get(cache_key)), expected_results)
+        self.assertTrue(cache_key in REDIS_CACHE)
+        self.assertDictEqual(json.loads(REDIS_CACHE[cache_key]), expected_results)
         MOCK_REDIS.expire.assert_called_with(cache_key, timedelta(weeks=2))
 
     @urllib3_responses.activate
@@ -1330,8 +1336,9 @@ def test_get_es_variants_for_variant_tuples(self):
         self.assertDictEqual(variants[1], PARSED_NO_SORT_VARIANTS[1])
 
         self.assertExecutedSearch(
-            filters=[{'terms': {'variantId': ['2-103343353-GAGA-G', '1-248367227-TC-T', 'MT-138367346-A-C']}}], size=3,
+            filters=[{'terms': {'variantId': ['2-103343353-GAGA-G', '1-248367227-TC-T', 'MT-138367346-A-C']}}], size=6,
             unsorted=True,
+            index=','.join([INDEX_NAME, MITO_WGS_INDEX_NAME])
         )
 
     @urllib3_responses.activate
@@ -1532,7 +1539,7 @@ def test_get_es_variants(self, mock_max_variants):
         self.assertEqual(len(variants), 2)
         self.assertDictEqual(variants[0], PARSED_VARIANTS[0])
         self.assertDictEqual(variants[1], PARSED_VARIANTS[1])
-        self.assertEqual(total_results, 5)
+        self.assertEqual(total_results, 10)
 
         self.assertCachedResults(results_model, {'all_results': variants, 'total_results': 5})
         self.assertTrue('index_metadata__{},{},{}'.format(INDEX_NAME, MITO_WGS_INDEX_NAME, SV_INDEX_NAME) in REDIS_CACHE)
@@ -2362,7 +2369,7 @@ def test_all_samples_all_inheritance_get_es_variants(self):
         self.assertListEqual(variants, PARSED_VARIANTS)
         self.assertEqual(total_results, 5)
 
-        self.assertExecutedSearch(filters=[
+        self.assertExecutedSearch(index=','.join([INDEX_NAME, MITO_WGS_INDEX_NAME]), size=4, filters=[
             {'terms': {'variantId': ['1-248367227-TC-T', '2-103343353-GAGA-G']}}, ANNOTATION_QUERY])
 
     @urllib3_responses.activate
@@ -2635,9 +2642,9 @@ def test_multi_project_all_samples_all_inheritance_get_es_variants(self):
         })
 
         self.assertExecutedSearch(
-            index='{},{}'.format(INDEX_NAME, SECOND_INDEX_NAME),
+            index=','.join([INDEX_NAME, SECOND_INDEX_NAME, MITO_WGS_INDEX_NAME]),
             filters=[ANNOTATION_QUERY],
-            size=4,
+            size=6,
         )
 
         # test pagination
@@ -3002,12 +3009,12 @@ def test_multi_project_all_samples_all_inheritance_get_es_variant_gene_counts(se
         gene_counts = get_es_variant_gene_counts(results_model, None)
 
         self.assertDictEqual(gene_counts, {
-            'ENSG00000135953': {'total': 3, 'families': {'F000003_3': 1, 'F000002_2': 1, 'F000011_11': 1}},
+            'ENSG00000135953': {'total': 6, 'families': {'F000003_3': 2, 'F000002_2': 2, 'F000011_11': 2}},
             'ENSG00000228198': {'total': 6, 'families': {'F000003_3': 2, 'F000002_2': 2, 'F000011_11': 2}}
         })
 
         self.assertExecutedSearch(
-            index='{},{}'.format(INDEX_NAME, SECOND_INDEX_NAME),
+            index=','.join([INDEX_NAME, MITO_WGS_INDEX_NAME, SECOND_INDEX_NAME]),
             filters=[ANNOTATION_QUERY],
             size=1,
             gene_count_aggs={
@@ -3123,12 +3130,13 @@ def test_sort(self):
 
         variants, _ = get_es_variants(results_model, sort='primate_ai', num_results=2)
         self.assertExecutedSearch(filters=[ANNOTATION_QUERY], sort=[
-            {'primate_ai_score': {'order': 'desc', 'unmapped_type': 'double', 'numeric_type': 'double'}}, 'xpos', 'variantId'])
+            {'primate_ai_score': {'order': 'desc', 'unmapped_type': 'double', 'numeric_type': 'double'}}, 'xpos', 'variantId'],
+                                  index=','.join([INDEX_NAME, MITO_WGS_INDEX_NAME]), size=4)
         self.assertEqual(variants[0]['_sort'][0], maxsize)
         self.assertEqual(variants[1]['_sort'][0], -1)
 
         variants, _ = get_es_variants(results_model, sort='gnomad', num_results=2)
-        self.assertExecutedSearch(filters=[ANNOTATION_QUERY], sort=[
+        self.assertExecutedSearch(filters=[ANNOTATION_QUERY], index=','.join([INDEX_NAME, MITO_WGS_INDEX_NAME]), size=4, sort=[
             {
                 '_script': {
                     'type': 'number',
@@ -3142,7 +3150,7 @@ def test_sort(self):
         self.assertEqual(variants[1]['_sort'][0], maxsize)
 
         variants, _ = get_es_variants(results_model, sort='in_omim', num_results=2)
-        self.assertExecutedSearch(filters=[ANNOTATION_QUERY], sort=[
+        self.assertExecutedSearch(filters=[ANNOTATION_QUERY], index=','.join([INDEX_NAME, MITO_WGS_INDEX_NAME]), size=4, sort=[
             {
                 '_script': {
                     'type': 'number',

From 4a8f17bc90159e685d8c93b27fd7996e5938d63d Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Tue, 8 Nov 2022 15:54:21 -0500
Subject: [PATCH 86/96] fix caching tests

---
 seqr/utils/elasticsearch/es_utils_tests.py | 27 ++++++++++++----------
 seqr/views/utils/test_utils.py             |  2 +-
 2 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/seqr/utils/elasticsearch/es_utils_tests.py b/seqr/utils/elasticsearch/es_utils_tests.py
index 3b469970ee..3bc65b5bc5 100644
--- a/seqr/utils/elasticsearch/es_utils_tests.py
+++ b/seqr/utils/elasticsearch/es_utils_tests.py
@@ -486,7 +486,7 @@
       "alt" : "A",
       "AN" : 2520,
       "clinvar_allele_id" : None,
-      "clinvar_clinical_significance" : ['Likely_pathogenic'],
+      "clinvar_clinical_significance" : "Likely_pathogenic",
       "clinvar_gold_stars" : None,
       "codingGeneIds" : [
         "ENSG00000198840"
@@ -1318,7 +1318,7 @@ def assertSameSearch(self, executed_search, expected_search_params):
 
     def assertCachedResults(self, results_model, expected_results, sort='xpos'):
         cache_key = 'search_results__{}__{}'.format(results_model.guid, sort)
-        self.assertTrue(cache_key in REDIS_CACHE)
+        self.assertIn(cache_key, REDIS_CACHE.keys())
         self.assertDictEqual(json.loads(REDIS_CACHE[cache_key]), expected_results)
         MOCK_REDIS.expire.assert_called_with(cache_key, timedelta(weeks=2))
 
@@ -1531,6 +1531,8 @@ def test_invalid_get_es_variants(self, mock_logger):
     @urllib3_responses.activate
     def test_get_es_variants(self, mock_max_variants):
         setup_responses()
+        # Testing mito indices is done in other tests, it is helpful to have a strightforward single datatype test
+        Sample.objects.get(elasticsearch_index=MITO_WGS_INDEX_NAME).delete()
         search_model = VariantSearch.objects.create(search={'annotations': {'frameshift': ['frameshift_variant']}})
         results_model = VariantSearchResults.objects.create(variant_search=search_model)
         results_model.families.set(self.families)
@@ -1539,7 +1541,7 @@ def test_get_es_variants(self, mock_max_variants):
         self.assertEqual(len(variants), 2)
         self.assertDictEqual(variants[0], PARSED_VARIANTS[0])
         self.assertDictEqual(variants[1], PARSED_VARIANTS[1])
-        self.assertEqual(total_results, 10)
+        self.assertEqual(total_results, 5)
 
         self.assertCachedResults(results_model, {'all_results': variants, 'total_results': 5})
         self.assertTrue('index_metadata__{},{},{}'.format(INDEX_NAME, MITO_WGS_INDEX_NAME, SV_INDEX_NAME) in REDIS_CACHE)
@@ -1964,7 +1966,7 @@ def test_multi_dataset_get_es_variants(self):
         }}
         self.assertExecutedSearches([
             dict(filters=[path_filter], start_index=0, size=5, index=SV_INDEX_NAME),
-            dict(filters=[path_filter], start_index=0, size=5, index=MITO_WGS_INDEX_NAME),
+            dict(filters=[path_filter], start_index=0, size=5, index=MITO_WGS_INDEX_NAME),  # TODO
             dict(filters=[path_filter, ALL_INHERITANCE_QUERY], start_index=0, size=5, index=INDEX_NAME),
         ])
 
@@ -2635,8 +2637,9 @@ def test_multi_project_all_samples_all_inheritance_get_es_variants(self):
         self.assertListEqual(variants, expected_variants)
         self.assertEqual(total_results, 4)
 
+        cached_variants = expected_variants + [PARSED_MITO_VARIANT]
         self.assertCachedResults(results_model, {
-            'all_results': expected_variants,
+            'all_results': cached_variants,
             'duplicate_doc_count': 1,
             'total_results': 4,
         })
@@ -2649,30 +2652,30 @@ def test_multi_project_all_samples_all_inheritance_get_es_variants(self):
 
         # test pagination
         variants, total_results = get_es_variants(results_model, num_results=2, page=2)
-        expected_variants = [PARSED_VARIANTS[0], PARSED_MULTI_INDEX_VARIANT]
+        expected_variants = [PARSED_MITO_VARIANT, PARSED_VARIANTS[0]]
         self.assertListEqual(variants, expected_variants)
         self.assertEqual(total_results, 3)
 
         self.assertCachedResults(results_model, {
-            'all_results': expected_variants + expected_variants,
+            'all_results': cached_variants + cached_variants,
             'duplicate_doc_count': 2,
             'total_results': 3,
         })
 
         self.assertExecutedSearch(
-            index='{},{}'.format(INDEX_NAME, SECOND_INDEX_NAME),
+            index=','.join([INDEX_NAME, MITO_WGS_INDEX_NAME, SECOND_INDEX_NAME]),
             filters=[ANNOTATION_QUERY],
-            size=5,
-            start_index=3,
+            size=8,
+            start_index=4,
         )
 
         # test skipping page fetches all consecutively
         _set_cache('search_results__{}__xpos'.format(results_model.guid), None)
         get_es_variants(results_model, num_results=2, page=2)
         self.assertExecutedSearch(
-            index='{},{}'.format(INDEX_NAME, SECOND_INDEX_NAME),
+            index=','.join([INDEX_NAME, MITO_WGS_INDEX_NAME, SECOND_INDEX_NAME]),
             filters=[ANNOTATION_QUERY],
-            size=8,
+            size=12,
         )
 
     @urllib3_responses.activate
diff --git a/seqr/views/utils/test_utils.py b/seqr/views/utils/test_utils.py
index c927f935a1..331cad8f8d 100644
--- a/seqr/views/utils/test_utils.py
+++ b/seqr/views/utils/test_utils.py
@@ -1177,7 +1177,7 @@ def call_request_json(self, index=-1):
     'alt': 'A',
     'bothsidesSupport': None,
     'chrom': 'M',
-    'clinvar': {'alleleId': None, 'clinicalSignificance': ['Likely_pathogenic'], 'goldStars': None, 'variationId': None},
+    'clinvar': {'alleleId': None, 'clinicalSignificance': 'Likely_pathogenic', 'goldStars': None, 'variationId': None},
     'commonLowHeteroplasmy': False,
     'cpxIntervals': None,
     'end': 10195,

From fe75ad8fef3503194c466232737634512c50e3c2 Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Tue, 8 Nov 2022 16:17:06 -0500
Subject: [PATCH 87/96] fix gene search tests

---
 seqr/utils/elasticsearch/es_utils_tests.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/seqr/utils/elasticsearch/es_utils_tests.py b/seqr/utils/elasticsearch/es_utils_tests.py
index 3bc65b5bc5..74f518ba85 100644
--- a/seqr/utils/elasticsearch/es_utils_tests.py
+++ b/seqr/utils/elasticsearch/es_utils_tests.py
@@ -1161,10 +1161,13 @@ def create_mock_response(search, index=INDEX_NAME):
     indices = index.split(',')
     include_matched_queries = False
     variant_id_filters = None
+    gene_ids_filters = set()
     if 'query' in search:
         for search_filter in search['query']['bool']['filter']:
             if not variant_id_filters:
                 variant_id_filters = search_filter.get('terms', {}).get('variantId')
+            if not gene_ids_filters and not search.get('aggs'):
+                gene_ids_filters.update(search_filter.get('terms', {}).get('geneIds') or [])
             possible_inheritance_filters = search_filter.get('bool', {}).get('should', []) + [search_filter]
             if any('_name' in possible_filter.get('bool', {}) for possible_filter in possible_inheritance_filters):
                 include_matched_queries = True
@@ -1180,6 +1183,10 @@ def create_mock_response(search, index=INDEX_NAME):
             index=index_name)
         if variant_id_filters:
             index_hits = [hit for hit in index_hits if hit['_id'] in variant_id_filters]
+        elif gene_ids_filters:
+            index_hits = [hit for hit in index_hits if any(
+                gene_ids_filters.intersection({t['gene_id'] for t in hit['_source']['sortedTranscriptConsequences']})
+            )]
         response_dict['hits']['hits'] += index_hits
 
     try:
@@ -2734,7 +2741,7 @@ def test_skip_genotype_filter(self):
         expected_transcript_variant['selectedMainTranscriptId'] = PARSED_VARIANTS[1]['selectedMainTranscriptId']
         self.assertListEqual(variants, [expected_transcript_variant, PARSED_MULTI_INDEX_VARIANT])
         self.assertExecutedSearch(
-            index='{},{}'.format(INDEX_NAME, SECOND_INDEX_NAME),
+            index=','.join([INDEX_NAME, MITO_WGS_INDEX_NAME, SECOND_INDEX_NAME]),
             filters=[{'terms': {'geneIds': ['ENSG00000228198']}}, ANNOTATION_QUERY],
             size=3,
         )

From 93b7696149405b52b7ceb071f358f356d0afd6a1 Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Tue, 8 Nov 2022 16:32:17 -0500
Subject: [PATCH 88/96] add unit test

---
 seqr/utils/elasticsearch/es_utils_tests.py | 23 +++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/seqr/utils/elasticsearch/es_utils_tests.py b/seqr/utils/elasticsearch/es_utils_tests.py
index 74f518ba85..fef76dfb45 100644
--- a/seqr/utils/elasticsearch/es_utils_tests.py
+++ b/seqr/utils/elasticsearch/es_utils_tests.py
@@ -1973,10 +1973,31 @@ def test_multi_dataset_get_es_variants(self):
         }}
         self.assertExecutedSearches([
             dict(filters=[path_filter], start_index=0, size=5, index=SV_INDEX_NAME),
-            dict(filters=[path_filter], start_index=0, size=5, index=MITO_WGS_INDEX_NAME),  # TODO
+            dict(filters=[path_filter], start_index=0, size=5, index=MITO_WGS_INDEX_NAME),
             dict(filters=[path_filter, ALL_INHERITANCE_QUERY], start_index=0, size=5, index=INDEX_NAME),
         ])
 
+        # test with dataset filtering applied
+        search_model.search['annotations'] = {'frameshift': ['frameshift_variant']}
+        search_model.save()
+        _set_cache('search_results__{}__xpos'.format(results_model.guid), None)
+
+        get_es_variants(results_model, num_results=5)
+        filter = {'bool': {'should': [{'terms': {'transcriptConsequenceTerms': ['frameshift_variant']}}, path_filter]}}
+        self.assertExecutedSearches([
+            dict(filters=[filter], start_index=0, size=5, index=MITO_WGS_INDEX_NAME),
+            dict(filters=[filter, ALL_INHERITANCE_QUERY], start_index=0, size=5, index=INDEX_NAME),
+        ])
+
+        search_model.search['annotations'] = {'structural': ['DEL']}
+        search_model.save()
+        _set_cache('search_results__{}__xpos'.format(results_model.guid), None)
+
+        get_es_variants(results_model, num_results=5)
+        self.assertExecutedSearch(
+            filters=[{'bool': {'should': [{'terms': {'transcriptConsequenceTerms': ['DEL']}}, path_filter]}}],
+            start_index=0, size=5, index=SV_INDEX_NAME)
+
     @urllib3_responses.activate
     def test_multi_dataset_no_affected_inheritance_get_es_variants(self):
         setup_responses()

From c4f6bff2575d1d106c38c131c49b998729841059 Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Tue, 8 Nov 2022 16:35:44 -0500
Subject: [PATCH 89/96] use constant fo inheritance query

---
 seqr/utils/elasticsearch/es_utils_tests.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/seqr/utils/elasticsearch/es_utils_tests.py b/seqr/utils/elasticsearch/es_utils_tests.py
index d3cf1670eb..836b676b46 100644
--- a/seqr/utils/elasticsearch/es_utils_tests.py
+++ b/seqr/utils/elasticsearch/es_utils_tests.py
@@ -1076,6 +1076,14 @@
     }
 }
 
+COMPOUND_HET_PATH_INHERITANCE_QUERY = deepcopy(COMPOUND_HET_INHERITANCE_QUERY)
+for fam_q in COMPOUND_HET_PATH_INHERITANCE_QUERY['bool']['should']:
+    fam_quality_q = fam_q['bool']['must'][1]
+    fam_quality_q['bool'] = {'should': [
+        deepcopy(fam_quality_q),
+        {'terms': {'clinvar_clinical_significance': ['Pathogenic', 'Pathogenic/Likely_pathogenic']}}
+    ]}
+
 RECESSIVE_INHERITANCE_QUERY = {
     'bool': {
         'should': [
@@ -2056,16 +2064,8 @@ def test_compound_het_get_es_variants_secondary_annotation(self):
             {'terms': {'transcriptConsequenceTerms': ['frameshift_variant', 'intron']}},
         ]}}
 
-        inheritance_query = deepcopy(COMPOUND_HET_INHERITANCE_QUERY)
-        for fam_q in inheritance_query['bool']['should']:
-            fam_quality_q = fam_q['bool']['must'][1]
-            fam_quality_q['bool'] = {'should': [
-                deepcopy(fam_quality_q),
-                {'terms': {'clinvar_clinical_significance': ['Pathogenic', 'Pathogenic/Likely_pathogenic']}}
-            ]}
-
         self.assertExecutedSearch(
-            filters=[annotation_query, inheritance_query],
+            filters=[annotation_query, COMPOUND_HET_PATH_INHERITANCE_QUERY],
             gene_aggs=True,
             start_index=0,
             size=1

From 601a66a6dc231ed0122a4de114f403910e9db5ee Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Tue, 8 Nov 2022 16:52:49 -0500
Subject: [PATCH 90/96] update ui to load collaborators separately

---
 seqr/views/apis/project_api.py                |  2 +-
 .../components/ProjectCollaborators.jsx       | 16 +++++++++++-----
 ui/pages/Project/reducers.js                  | 19 +++++--------------
 ui/pages/Project/selectors.js                 |  1 +
 4 files changed, 18 insertions(+), 20 deletions(-)

diff --git a/seqr/views/apis/project_api.py b/seqr/views/apis/project_api.py
index cb73d34556..49089533eb 100644
--- a/seqr/views/apis/project_api.py
+++ b/seqr/views/apis/project_api.py
@@ -210,7 +210,7 @@ def project_overview(request, project_guid):
 
     project_json = response['projectsByGuid'][project_guid]
     project_json.update({
-        'detailsLoaded': True,
+        'overviewLoaded': True,
         'collaborators': get_json_for_project_collaborator_list(request.user, project),
         'collaboratorGroups': get_json_for_project_collaborator_groups(project),
         'mmeSubmissionCount': project_mme_submissions.filter(deleted_date__isnull=True).count(),
diff --git a/ui/pages/Project/components/ProjectCollaborators.jsx b/ui/pages/Project/components/ProjectCollaborators.jsx
index cb6e37df9d..d62c2e04ce 100644
--- a/ui/pages/Project/components/ProjectCollaborators.jsx
+++ b/ui/pages/Project/components/ProjectCollaborators.jsx
@@ -16,8 +16,8 @@ import LoadOptionsSelect from 'shared/components/form/LoadOptionsSelect'
 import { HelpIcon } from 'shared/components/StyledComponents'
 import { USER_NAME_FIELDS } from 'shared/utils/constants'
 
-import { updateCollaborator, updateCollaboratorGroup } from '../reducers'
-import { getUserOptions, getCurrentProject } from '../selectors'
+import { updateCollaborator, updateCollaboratorGroup, loadProjectCollaborators } from '../reducers'
+import { getUserOptions, getCurrentProject, getProjectCollaboratorsIsLoading } from '../selectors'
 
 const CollaboratorEmailDropdown = React.memo(({ load, ...props }) => (
   <DataLoader load={load} loading={false} content>
@@ -162,10 +162,12 @@ const collaboratorDisplay = ({ displayName, email }) => (
 
 const groupNameDisplay = ({ name }) => name
 
-const ProjectCollaborators = React.memo(({ project, user, onSubmit, onGroupSubmit, addCollaborator }) => {
+const ProjectCollaborators = React.memo((
+  { project, user, loading, load, onSubmit, onGroupSubmit, addCollaborator },
+) => {
   const canEdit = project.canEdit && !user.isAnvil
   return (
-    <div>
+    <DataLoader load={load} loading={loading} content={project.collaborators}>
       <ProjectAccessSection
         title="Collaborator"
         idField="email"
@@ -202,13 +204,15 @@ const ProjectCollaborators = React.memo(({ project, user, onSubmit, onGroupSubmi
           )}
         </Segment>
       )}
-    </div>
+    </DataLoader>
   )
 })
 
 ProjectCollaborators.propTypes = {
   project: PropTypes.object.isRequired,
   user: PropTypes.object.isRequired,
+  loading: PropTypes.bool,
+  load: PropTypes.func,
   onSubmit: PropTypes.func,
   onGroupSubmit: PropTypes.func,
   addCollaborator: PropTypes.func,
@@ -217,9 +221,11 @@ ProjectCollaborators.propTypes = {
 const mapStateToProps = state => ({
   project: getCurrentProject(state),
   user: getUser(state),
+  loading: getProjectCollaboratorsIsLoading(state),
 })
 
 const mapDispatchToProps = {
+  load: loadProjectCollaborators,
   onSubmit: updateCollaborator,
   onGroupSubmit: updateCollaboratorGroup,
   addCollaborator: updates => updateCollaborator(updates.user),
diff --git a/ui/pages/Project/reducers.js b/ui/pages/Project/reducers.js
index 10862fc4bf..530c599026 100644
--- a/ui/pages/Project/reducers.js
+++ b/ui/pages/Project/reducers.js
@@ -21,6 +21,7 @@ const UPDATE_SAVED_VARIANT_TABLE_STATE = 'UPDATE_VARIANT_STATE'
 const REQUEST_MME_MATCHES = 'REQUEST_MME_MATCHES'
 const REQUEST_RNA_SEQ_DATA = 'REQUEST_RNA_SEQ_DATA'
 const REQUEST_PROJECT_OVERVIEW = 'REQUEST_PROJECT_OVERVIEW'
+const REQUEST_PROJECT_COLLABORATORS = 'REQUEST_PROJECT_COLLABORATORS'
 const REQUEST_FAMILIES = 'REQUEST_FAMILIES'
 const RECEIVE_FAMILIES = 'RECEIVE_FAMILIES'
 const REQUEST_FAMILY_VARIANT_SUMMARY = 'REQUEST_FAMILY_VARIANT_SUMMARY'
@@ -52,20 +53,9 @@ export const loadProjectExportData = () => (dispatch, getState) => Promise.all([
   loadFamilyNotes()(dispatch, getState),
 ])
 
-export const loadProjectOverview = () => (dispatch, getState) => {
-  const { currentProjectGuid, projectsByGuid } = getState()
-  const project = projectsByGuid[currentProjectGuid]
-  if (!project.detailsLoaded) {
-    dispatch({ type: REQUEST_PROJECT_OVERVIEW })
-    new HttpRequestHelper(`/api/project/${currentProjectGuid}/get_overview`,
-      (responseJson) => {
-        dispatch({ type: RECEIVE_DATA, updatesById: responseJson })
-      },
-      (e) => {
-        dispatch({ type: RECEIVE_DATA, error: e.message, updatesById: {} })
-      }).get()
-  }
-}
+export const loadProjectOverview = () => loadCurrentProjectChildEntities('overview', REQUEST_PROJECT_OVERVIEW)
+
+export const loadProjectCollaborators = () => loadCurrentProjectChildEntities('collaborators', REQUEST_PROJECT_COLLABORATORS)
 
 export const loadFamilyVariantSummary = familyGuid => loadFamilyData(
   familyGuid, 'discoveryTags', 'variant_tag_summary', REQUEST_FAMILY_VARIANT_SUMMARY,
@@ -324,6 +314,7 @@ export const reducers = {
   individualsLoading: loadingReducer(REQUEST_INDIVIDUALS, RECEIVE_DATA),
   mmeSubmissionsLoading: loadingReducer(REQUEST_MME_SUBMISSIONS, RECEIVE_DATA),
   projectOverviewLoading: loadingReducer(REQUEST_PROJECT_OVERVIEW, RECEIVE_DATA),
+  projectCollaboratorsLoading: loadingReducer(REQUEST_PROJECT_COLLABORATORS, RECEIVE_DATA),
   familyTableState: createSingleObjectReducer(UPDATE_FAMILY_TABLE_STATE, {
     familiesSearch: '',
     familiesSortOrder: SORT_BY_FAMILY_NAME,
diff --git a/ui/pages/Project/selectors.js b/ui/pages/Project/selectors.js
index bb92650988..8ec3286e5a 100644
--- a/ui/pages/Project/selectors.js
+++ b/ui/pages/Project/selectors.js
@@ -47,6 +47,7 @@ const FAMILY_SORT_LOOKUP = FAMILY_SORT_OPTIONS.reduce(
 
 export const getProjectGuid = state => state.currentProjectGuid
 export const getProjectOverviewIsLoading = state => state.projectOverviewLoading.isLoading
+export const getProjectCollaboratorsIsLoading = state => state.projectCollaboratorsLoading.isLoading
 export const getMatchmakerMatchesLoading = state => state.matchmakerMatchesLoading.isLoading
 export const getMatchmakerContactNotes = state => state.mmeContactNotes
 export const getRnaSeqDataLoading = state => state.rnaSeqDataLoading.isLoading

From a6c77e1cbd508c0d6ed48e11aa8dec37a92ee111 Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Tue, 8 Nov 2022 16:56:30 -0500
Subject: [PATCH 91/96] separate collaborator endpoint

---
 seqr/urls.py                   |  3 ++-
 seqr/views/apis/project_api.py | 16 ++++++++++++++--
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/seqr/urls.py b/seqr/urls.py
index 573bb41426..6c67dc93c0 100644
--- a/seqr/urls.py
+++ b/seqr/urls.py
@@ -133,7 +133,7 @@
 from seqr.views.apis.analysis_group_api import update_analysis_group_handler, delete_analysis_group_handler
 from seqr.views.apis.project_api import create_project_handler, update_project_handler, delete_project_handler, \
     project_page_data, project_families, project_overview, project_mme_submisssions, project_individuals, \
-    project_analysis_groups, update_project_workspace, project_family_notes
+    project_analysis_groups, update_project_workspace, project_family_notes, project_collaborators
 from seqr.views.apis.project_categories_api import update_project_categories_handler
 from seqr.views.apis.anvil_workspace_api import anvil_workspace_page, create_project_from_workspace, \
     grant_workspace_access, validate_anvil_vcf, add_workspace_data, get_anvil_vcf_list
@@ -198,6 +198,7 @@
     'project/(?P<project_guid>[^/]+)/get_mme_submissions': project_mme_submisssions,
     'project/(?P<project_guid>[^/]+)/get_analysis_groups': project_analysis_groups,
     'project/(?P<project_guid>[^/]+)/get_overview': project_overview,
+    'project/(?P<project_guid>[^/]+)/get_collaborators': project_collaborators,
 
     'project/create_project': create_project_handler,
     'project/(?P<project_guid>[^/]+)/update_project': update_project_handler,
diff --git a/seqr/views/apis/project_api.py b/seqr/views/apis/project_api.py
index 49089533eb..d79b8286e4 100644
--- a/seqr/views/apis/project_api.py
+++ b/seqr/views/apis/project_api.py
@@ -211,8 +211,6 @@ def project_overview(request, project_guid):
     project_json = response['projectsByGuid'][project_guid]
     project_json.update({
         'overviewLoaded': True,
-        'collaborators': get_json_for_project_collaborator_list(request.user, project),
-        'collaboratorGroups': get_json_for_project_collaborator_groups(project),
         'mmeSubmissionCount': project_mme_submissions.filter(deleted_date__isnull=True).count(),
         'mmeDeletedSubmissionCount': project_mme_submissions.filter(deleted_date__isnull=False).count(),
     })
@@ -221,6 +219,20 @@ def project_overview(request, project_guid):
 
     return create_json_response(response)
 
+
+@login_and_policies_required
+def project_collaborators(request, project_guid):
+    project = get_project_and_check_permissions(project_guid, request.user)
+
+    return create_json_response({
+        'projectsByGuid': {project_guid: {
+            'collaboratorsLoaded': True,
+            'collaborators': get_json_for_project_collaborator_list(request.user, project),
+            'collaboratorGroups': get_json_for_project_collaborator_groups(project),
+        }}
+    })
+
+
 @login_and_policies_required
 def project_individuals(request, project_guid):
     project = get_project_and_check_permissions(project_guid, request.user)

From 4e51d59a7b03acdbd5b660ef11c760bb1cf78570 Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Tue, 8 Nov 2022 17:11:09 -0500
Subject: [PATCH 92/96] better loading behavior

---
 ui/pages/Project/reducers.js | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/ui/pages/Project/reducers.js b/ui/pages/Project/reducers.js
index 530c599026..48ebe19e29 100644
--- a/ui/pages/Project/reducers.js
+++ b/ui/pages/Project/reducers.js
@@ -21,7 +21,9 @@ const UPDATE_SAVED_VARIANT_TABLE_STATE = 'UPDATE_VARIANT_STATE'
 const REQUEST_MME_MATCHES = 'REQUEST_MME_MATCHES'
 const REQUEST_RNA_SEQ_DATA = 'REQUEST_RNA_SEQ_DATA'
 const REQUEST_PROJECT_OVERVIEW = 'REQUEST_PROJECT_OVERVIEW'
+const RECEIVE_PROJECT_OVERVIEW = 'RECEIVE_PROJECT_OVERVIEW'
 const REQUEST_PROJECT_COLLABORATORS = 'REQUEST_PROJECT_COLLABORATORS'
+const RECEIVE_PROJECT_COLLABORATORS = 'RECEIVE_PROJECT_COLLABORATORS'
 const REQUEST_FAMILIES = 'REQUEST_FAMILIES'
 const RECEIVE_FAMILIES = 'RECEIVE_FAMILIES'
 const REQUEST_FAMILY_VARIANT_SUMMARY = 'REQUEST_FAMILY_VARIANT_SUMMARY'
@@ -53,9 +55,9 @@ export const loadProjectExportData = () => (dispatch, getState) => Promise.all([
   loadFamilyNotes()(dispatch, getState),
 ])
 
-export const loadProjectOverview = () => loadCurrentProjectChildEntities('overview', REQUEST_PROJECT_OVERVIEW)
+export const loadProjectOverview = () => loadCurrentProjectChildEntities('overview', REQUEST_PROJECT_OVERVIEW, RECEIVE_PROJECT_OVERVIEW)
 
-export const loadProjectCollaborators = () => loadCurrentProjectChildEntities('collaborators', REQUEST_PROJECT_COLLABORATORS)
+export const loadProjectCollaborators = () => loadCurrentProjectChildEntities('collaborators', REQUEST_PROJECT_COLLABORATORS, RECEIVE_PROJECT_COLLABORATORS)
 
 export const loadFamilyVariantSummary = familyGuid => loadFamilyData(
   familyGuid, 'discoveryTags', 'variant_tag_summary', REQUEST_FAMILY_VARIANT_SUMMARY,
@@ -313,8 +315,8 @@ export const reducers = {
   familyVariantSummaryLoading: loadingReducer(REQUEST_FAMILY_VARIANT_SUMMARY, RECEIVE_DATA),
   individualsLoading: loadingReducer(REQUEST_INDIVIDUALS, RECEIVE_DATA),
   mmeSubmissionsLoading: loadingReducer(REQUEST_MME_SUBMISSIONS, RECEIVE_DATA),
-  projectOverviewLoading: loadingReducer(REQUEST_PROJECT_OVERVIEW, RECEIVE_DATA),
-  projectCollaboratorsLoading: loadingReducer(REQUEST_PROJECT_COLLABORATORS, RECEIVE_DATA),
+  projectOverviewLoading: loadingReducer(REQUEST_PROJECT_OVERVIEW, RECEIVE_PROJECT_OVERVIEW),
+  projectCollaboratorsLoading: loadingReducer(REQUEST_PROJECT_COLLABORATORS, RECEIVE_PROJECT_COLLABORATORS),
   familyTableState: createSingleObjectReducer(UPDATE_FAMILY_TABLE_STATE, {
     familiesSearch: '',
     familiesSortOrder: SORT_BY_FAMILY_NAME,

From e8717ada28e73f8eebb9939e11142d6e5693dc95 Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Tue, 8 Nov 2022 17:22:06 -0500
Subject: [PATCH 93/96] add tests

---
 seqr/views/apis/project_api_tests.py | 43 +++++++++++++++++++++-------
 1 file changed, 32 insertions(+), 11 deletions(-)

diff --git a/seqr/views/apis/project_api_tests.py b/seqr/views/apis/project_api_tests.py
index a9dd4d2dd1..04dae80dc9 100644
--- a/seqr/views/apis/project_api_tests.py
+++ b/seqr/views/apis/project_api_tests.py
@@ -8,7 +8,7 @@
 from seqr.models import Project
 from seqr.views.apis.project_api import create_project_handler, delete_project_handler, update_project_handler, \
     project_page_data, project_families, project_overview, project_mme_submisssions, project_individuals, \
-    project_analysis_groups, update_project_workspace, project_family_notes
+    project_analysis_groups, update_project_workspace, project_family_notes, project_collaborators
 from seqr.views.utils.terra_api_utils import TerraAPIException, TerraRefreshTokenFailedException
 from seqr.views.utils.test_utils import AuthenticationTestCase, AnvilAuthenticationTestCase, \
     PROJECT_FIELDS, LOCUS_LIST_FIELDS, PA_LOCUS_LIST_FIELDS, NO_INTERNAL_CASE_REVIEW_INDIVIDUAL_FIELDS, \
@@ -240,9 +240,8 @@ def test_project_overview(self):
         self.assertSetEqual(set(response_json.keys()), response_keys)
 
         project_fields = {
-            'collaborators', 'locusListGuids', 'variantTagTypes', 'variantFunctionalTagTypes', 'detailsLoaded',
-            'projectGuid', 'name', 'mmeDeletedSubmissionCount', 'mmeSubmissionCount',
-            'analysisGroupsLoaded', 'collaboratorGroups',
+            'locusListGuids', 'variantTagTypes', 'variantFunctionalTagTypes', 'overviewLoaded',
+            'projectGuid', 'name', 'mmeDeletedSubmissionCount', 'mmeSubmissionCount', 'analysisGroupsLoaded',
         }
         project_response = response_json['projectsByGuid'][PROJECT_GUID]
         self.assertSetEqual(set(project_response.keys()), project_fields)
@@ -259,8 +258,6 @@ def test_project_overview(self):
             'order': 100,
             'numTags': 1,
         })
-        self.assertListEqual(project_response['collaborators'], self.PROJECT_COLLABORATORS)
-        self.assertEqual(project_response['collaboratorGroups'], self.PROJECT_COLLABORATOR_GROUPS)
         self.assertListEqual(project_response['locusListGuids'], ['LL00049_pid_genes_autosomal_do', 'LL00005_retina_proteome'])
         self.assertEqual(project_response['mmeSubmissionCount'], 1)
         self.assertEqual(project_response['mmeDeletedSubmissionCount'], 0)
@@ -291,6 +288,23 @@ def test_project_overview(self):
         empty_url = reverse(project_overview, args=[EMPTY_PROJECT_GUID])
         self._check_empty_project(empty_url, response_keys)
 
+    def test_project_collaborators(self):
+        url = reverse(project_collaborators, args=[PROJECT_GUID])
+        self.check_collaborator_login(url)
+
+        response = self.client.get(url)
+        self.assertEqual(response.status_code, 200)
+
+        self.assertDictEqual(response.json(), {'projectsByGuid': {PROJECT_GUID: {
+            'collaboratorsLoaded': True,
+            'collaborators': self.PROJECT_COLLABORATORS,
+            'collaboratorGroups': self.PROJECT_COLLABORATOR_GROUPS,
+        }}})
+
+        # Test empty project
+        empty_url = reverse(project_collaborators, args=[EMPTY_PROJECT_GUID])
+        self._check_empty_project(empty_url, {'projectsByGuid'})
+
         if hasattr(self, 'mock_get_ws_acl'):
             self.mock_get_ws_acl.side_effect = TerraAPIException('AnVIL Error', 400)
             response = self.client.get(url)
@@ -302,7 +316,6 @@ def test_project_overview(self):
             self.assertEqual(response.status_code, 401)
             self.assertEqual(response.json()['error'], '/login')
 
-
     def test_project_families(self):
         url = reverse(project_families, args=[PROJECT_GUID])
         self.check_collaborator_login(url)
@@ -543,11 +556,19 @@ def test_project_page_data(self):
     def test_project_overview(self):
         super(AnvilProjectAPITest, self).test_project_overview()
         self.mock_list_workspaces.assert_not_called()
+        self.assert_no_extra_anvil_calls()
+        self.mock_get_ws_access_level.assert_called_with(self.collaborator_user, 'my-seqr-billing', 'empty')
+        self.assertEqual(self.mock_get_ws_access_level.call_count, 4)
+
+    def test_project_collaborators(self):
+        super(AnvilProjectAPITest, self).test_project_collaborators()
+        self.mock_list_workspaces.assert_not_called()
         self.mock_get_groups.assert_not_called()
         self.mock_get_group_members.assert_not_called()
         self.mock_get_ws_acl.assert_called_with(self.collaborator_user,
-            'my-seqr-billing', 'anvil-1kg project n\u00e5me with uni\u00e7\u00f8de')
-        self.assertEqual(self.mock_get_ws_acl.call_count, 4)
+                                                'my-seqr-billing', 'anvil-1kg project n\u00e5me with uni\u00e7\u00f8de')
+        self.assertEqual(self.mock_get_ws_acl.call_count, 3)
         self.mock_get_ws_access_level.assert_called_with(self.collaborator_user,
-            'my-seqr-billing', 'anvil-1kg project n\u00e5me with uni\u00e7\u00f8de')
-        self.assertEqual(self.mock_get_ws_access_level.call_count, 6)
+                                                         'my-seqr-billing',
+                                                         'anvil-1kg project n\u00e5me with uni\u00e7\u00f8de')
+        self.assertEqual(self.mock_get_ws_access_level.call_count, 5)

From e901e76998144ac08353a2efd273d42ac54062bb Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Tue, 8 Nov 2022 17:29:59 -0500
Subject: [PATCH 94/96] fix js unit test

---
 ui/pages/Project/fixtures.js | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ui/pages/Project/fixtures.js b/ui/pages/Project/fixtures.js
index 7210485aab..91e6b3043e 100644
--- a/ui/pages/Project/fixtures.js
+++ b/ui/pages/Project/fixtures.js
@@ -406,6 +406,7 @@ export const STATE_WITH_2_FAMILIES = {
     displayName: 'Test User',
   },
   savedVariantTableState: { hideExcluded: true, recordsPerPage: 1 },
+  projectCollaboratorsLoading: {},
   projectSavedVariantsLoading: {},
   familyDetailsLoading: {},
   savedVariantsByGuid: {

From 470998742fe75ab960b982acbfa730a2619f7e78 Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Wed, 9 Nov 2022 11:30:09 -0500
Subject: [PATCH 95/96] bump changelog

---
 CHANGELOG.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9a1097a26d..665b91228c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,8 +1,9 @@
 # _seqr_ Changes
 
 ## dev
-* Add PhenotypePrioritization model (REQUIRES DB MIGRATION)
 
+## 11/9/22
+* Add PhenotypePrioritization model (REQUIRES DB MIGRATION)
 * Add Refseq and MANE transcript info (REQUIRES DB MIGRATION)
   * To add new data, run the `update_gencode_transcripts` and `update_refseq` commands
 

From 9b0db3b3342419bb36bb4f4a6b56508f4fee30a2 Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Wed, 9 Nov 2022 14:18:46 -0500
Subject: [PATCH 96/96] fix changelog spacing

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 665b91228c..d4d6f9c693 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,7 @@
 
 ## 11/9/22
 * Add PhenotypePrioritization model (REQUIRES DB MIGRATION)
+
 * Add Refseq and MANE transcript info (REQUIRES DB MIGRATION)
   * To add new data, run the `update_gencode_transcripts` and `update_refseq` commands