From 7be4cbc40efc4ac9052bb82f32507cddf0337319 Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Thu, 20 Oct 2022 15:07:26 -0400
Subject: [PATCH 01/10] better notification for sample count

---
 seqr/views/apis/anvil_workspace_api.py       | 7 ++++---
 seqr/views/apis/anvil_workspace_api_tests.py | 3 ++-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/seqr/views/apis/anvil_workspace_api.py b/seqr/views/apis/anvil_workspace_api.py
index 18e8a272e2..b4ae3045d2 100644
--- a/seqr/views/apis/anvil_workspace_api.py
+++ b/seqr/views/apis/anvil_workspace_api.py
@@ -278,7 +278,7 @@ def _trigger_add_workspace_data(project, pedigree_records, user, data_path, samp
     # use airflow api to trigger AnVIL dags
     trigger_success = _trigger_data_loading(project, data_path, sample_type, user)
     # Send a slack message to the slack channel
-    _send_load_data_slack_msg(project, ids_path, data_path, sample_type, user)
+    _send_load_data_slack_msg(project, ids_path, data_path, len(updated_individuals), sample_type, user)
     AirtableSession(user, base=AirtableSession.ANVIL_BASE).safe_create_record(
         'AnVIL Seqr Loading Requests Tracking', {
             'Requester Name': user.get_full_name(),
@@ -323,10 +323,10 @@ def _get_loading_project_path(project, sample_type):
 def _get_seqr_project_url(project):
     return f'{BASE_URL}project/{project.guid}/project_page'
 
-def _send_load_data_slack_msg(project, ids_path, data_path, sample_type, user):
+def _send_load_data_slack_msg(project, ids_path, data_path, sample_count, sample_type, user):
     pipeline_dag = _construct_dag_variables(project, data_path, sample_type)
     message_content = """
-        *{user}* requested to load {sample_type} data ({genome_version}) from AnVIL workspace *{namespace}/{name}* at 
+        *{user}* requested to load {sample_count} {sample_type} samples ({genome_version}) from AnVIL workspace *{namespace}/{name}* at 
         {path} to seqr project <{project_url}|*{project_name}*> (guid: {guid})  
   
         The sample IDs to load have been uploaded to {ids_path}.  
@@ -342,6 +342,7 @@ def _send_load_data_slack_msg(project, ids_path, data_path, sample_type, user):
         project_url=_get_seqr_project_url(project),
         guid=project.guid,
         project_name=project.name,
+        sample_count=sample_count,
         sample_type=sample_type,
         genome_version=GENOME_VERSION_LOOKUP.get(project.genome_version),
         dag_name = "seqr_vcf_to_es_AnVIL_{anvil_type}_v{version}".format(anvil_type=sample_type, version=DAG_VERSION),
diff --git a/seqr/views/apis/anvil_workspace_api_tests.py b/seqr/views/apis/anvil_workspace_api_tests.py
index 7d0e77781b..648bda2f52 100644
--- a/seqr/views/apis/anvil_workspace_api_tests.py
+++ b/seqr/views/apis/anvil_workspace_api_tests.py
@@ -751,7 +751,7 @@ def _assert_valid_operation(self, project, test_add_data=True):
         self.assertEqual(responses.calls[call_cnt+1].request.headers['Authorization'], 'Bearer {}'.format(MOCK_AIRTABLE_KEY))
 
         slack_message = """
-        *test_user_manager@test.com* requested to load WES data ({version}) from AnVIL workspace *my-seqr-billing/{workspace_name}* at 
+        *test_user_manager@test.com* requested to load 3 WES samples ({version}) from AnVIL workspace *my-seqr-billing/{workspace_name}* at 
         gs://test_bucket/test_path.vcf to seqr project <http://testserver/project/{guid}/project_page|*{project_name}*> (guid: {guid})  
   
         The sample IDs to load have been uploaded to gs://seqr-datasets/v02/{version}/AnVIL_WES/{guid}/base/{guid}_ids.txt.  
@@ -842,6 +842,7 @@ def test_create_project_from_workspace_loading_delay_email(self):
                       '{}/api/v1/dags/seqr_vcf_to_es_AnVIL_WES_v0.0.1/tasks'.format(MOCK_AIRFLOW_URL),
                       headers={'Authorization': 'Bearer {}'.format(MOCK_TOKEN)},
                       json={"tasks": [
+                            {"task_id": "pyspark_compute_project_R0006_anvil_no_project_workspace"},
                             {"task_id": "pyspark_compute_project_R0007_anvil_no_project_workspace"},
                             {"task_id": "pyspark_compute_project_R0008_anvil_no_project_workspace"}],
                             "total_entries": 2},

From 7c9fadc6e80c599fff9286d3603e29c80e9b9946 Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Thu, 20 Oct 2022 15:44:11 -0400
Subject: [PATCH 02/10] properly track all created individuals

---
 seqr/views/apis/anvil_workspace_api_tests.py | 2 +-
 seqr/views/utils/individual_utils.py         | 1 +
 seqr/views/utils/json_to_orm_utils.py        | 3 ++-
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/seqr/views/apis/anvil_workspace_api_tests.py b/seqr/views/apis/anvil_workspace_api_tests.py
index 648bda2f52..2d7e6e6c1f 100644
--- a/seqr/views/apis/anvil_workspace_api_tests.py
+++ b/seqr/views/apis/anvil_workspace_api_tests.py
@@ -18,7 +18,7 @@
      "Notes", "familyNotes"],
     ["1", "NA19675", "NA19675_1", "NA19678", "", "Female", "Affected", "A affected individual, test1-zsf", ""],
     ["1", "NA19678", "", "", "", "Male", "Unaffected", "a individual note", ""],
-    ["21", "HG00735", "", "", "", "Female", "Unaffected", "", "a new family"]]
+    ["21", "HG00735", "", "", "", "", "", "", "a new family"]]
 
 BAD_SAMPLE_DATA = [["1", "NA19674", "NA19674_1", "NA19678", "NA19679", "Female", "Affected", "A affected individual, test1-zsf", ""]]
 
diff --git a/seqr/views/utils/individual_utils.py b/seqr/views/utils/individual_utils.py
index caf31c9638..d55906f552 100644
--- a/seqr/views/utils/individual_utils.py
+++ b/seqr/views/utils/individual_utils.py
@@ -101,6 +101,7 @@ def _update_from_record(record, user, families_by_id, individual_lookup, updated
             individual = create_model_from_json(
                 Individual, {'family': family, 'individual_id': individual_id, 'case_review_status': 'I'}, user)
             updated_families.add(family)
+            updated_individuals.add(individual)
             individual_lookup[individual_id][family] = individual
 
     record['family'] = family
diff --git a/seqr/views/utils/json_to_orm_utils.py b/seqr/views/utils/json_to_orm_utils.py
index cd26dd0572..dd074ba99d 100644
--- a/seqr/views/utils/json_to_orm_utils.py
+++ b/seqr/views/utils/json_to_orm_utils.py
@@ -68,7 +68,8 @@ def update_model_from_json(model_obj, json, user, allow_unknown_keys=False, immu
             raise ValueError('Cannot edit field {}'.format(orm_key))
         if allow_unknown_keys and not hasattr(model_obj, orm_key):
             continue
-        if getattr(model_obj, orm_key) != value:
+        model_value = getattr(model_obj, orm_key)
+        if (model_value or value) and model_value != value:
             if orm_key in internal_fields and not user_is_analyst(user):
                 raise PermissionDenied('User {0} is not authorized to edit the internal field {1}'.format(user, orm_key))
             updated_fields.add(orm_key)

From b1970a1dd3fca39c52f1ff566f7387994ca3c4ed Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Thu, 20 Oct 2022 16:14:20 -0400
Subject: [PATCH 03/10] requires sex and affected status for anvil pedigree

---
 seqr/views/apis/anvil_workspace_api.py        |  7 +++++--
 seqr/views/apis/anvil_workspace_api_tests.py  |  8 ++++++++
 seqr/views/apis/individual_api_tests.py       |  2 +-
 seqr/views/utils/pedigree_info_utils.py       | 16 ++++++++--------
 seqr/views/utils/pedigree_info_utils_tests.py |  8 ++++----
 5 files changed, 26 insertions(+), 15 deletions(-)

diff --git a/seqr/views/apis/anvil_workspace_api.py b/seqr/views/apis/anvil_workspace_api.py
index b4ae3045d2..24bc8f8016 100644
--- a/seqr/views/apis/anvil_workspace_api.py
+++ b/seqr/views/apis/anvil_workspace_api.py
@@ -24,7 +24,7 @@
 from seqr.views.utils.file_utils import load_uploaded_file
 from seqr.views.utils.terra_api_utils import add_service_account, has_service_account_access, TerraAPIException, \
     TerraRefreshTokenFailedException
-from seqr.views.utils.pedigree_info_utils import parse_pedigree_table
+from seqr.views.utils.pedigree_info_utils import parse_pedigree_table, JsonConstants
 from seqr.views.utils.individual_utils import add_or_update_individuals_and_families, get_updated_pedigree_json
 from seqr.utils.communication_utils import safe_post_to_slack, send_html_email
 from seqr.utils.file_utils import does_file_exist, mv_file_to_gs, get_gs_file_list
@@ -245,7 +245,10 @@ def add_workspace_data(request, project_guid):
 def _parse_uploaded_pedigree(request_json, user):
     # Parse families/individuals in the uploaded pedigree file
     json_records = load_uploaded_file(request_json['uploadedFileId'])
-    pedigree_records, _ = parse_pedigree_table(json_records, 'uploaded pedigree file', user=user, fail_on_warnings=True)
+    pedigree_records, _ = parse_pedigree_table(
+        json_records, 'uploaded pedigree file', user=user, fail_on_warnings=True, required_columns=[
+            JsonConstants.SEX_COLUMN, JsonConstants.AFFECTED_COLUMN,
+        ])
 
     missing_samples = [record['individualId'] for record in pedigree_records
                        if record['individualId'] not in request_json['vcfSamples']]
diff --git a/seqr/views/apis/anvil_workspace_api_tests.py b/seqr/views/apis/anvil_workspace_api_tests.py
index 2d7e6e6c1f..ffecc6af37 100644
--- a/seqr/views/apis/anvil_workspace_api_tests.py
+++ b/seqr/views/apis/anvil_workspace_api_tests.py
@@ -666,6 +666,14 @@ def _test_errors(self, url, fields, workspace_name):
         self.assertEqual(response.reason_phrase, f'Field(s) "{field_str}" are required')
         self.mock_get_ws_access_level.assert_called_with(self.manager_user, TEST_WORKSPACE_NAMESPACE, workspace_name)
 
+        # test missing columns
+        self.mock_load_file.return_value = [['family', 'individual'], ['1', '2']]
+        response = self.client.post(url, content_type='application/json', data=json.dumps(REQUEST_BODY))
+        self.assertEqual(response.status_code, 400)
+        response_json = response.json()
+        self.assertListEqual(response_json['errors'], [
+            'Error while converting uploaded pedigree file rows to json: Sex, Affected not specified in row #1'])
+
         # test sample data error
         self.mock_load_file.return_value = LOAD_SAMPLE_DATA + BAD_SAMPLE_DATA
         response = self.client.post(url, content_type='application/json', data=json.dumps(REQUEST_BODY))
diff --git a/seqr/views/apis/individual_api_tests.py b/seqr/views/apis/individual_api_tests.py
index 827805c3a5..5bc762a2fd 100644
--- a/seqr/views/apis/individual_api_tests.py
+++ b/seqr/views/apis/individual_api_tests.py
@@ -295,7 +295,7 @@ def test_individuals_table_handler(self):
         self.assertDictEqual(response.json(), {'errors': mock.ANY, 'warnings': []})
         errors = response.json()['errors']
         self.assertEqual(len(errors), 1)
-        self.assertEqual(errors[0].split('\n')[0],"Error while converting test.tsv rows to json: Individual Id not specified in row #1:")
+        self.assertEqual(errors[0], "Error while converting test.tsv rows to json: Individual Id not specified in row #1")
 
         response = self.client.post(individuals_url, {'f': SimpleUploadedFile(
             'test.tsv', 'Family ID	Individual ID	Previous Individual ID\n"1"	"NA19675_1"	"NA19675"'.encode('utf-8'))})
diff --git a/seqr/views/utils/pedigree_info_utils.py b/seqr/views/utils/pedigree_info_utils.py
index bb9774dd04..21e34037e1 100644
--- a/seqr/views/utils/pedigree_info_utils.py
+++ b/seqr/views/utils/pedigree_info_utils.py
@@ -9,7 +9,7 @@
 from seqr.utils.communication_utils import send_html_email
 from seqr.utils.logging_utils import SeqrLogger
 from seqr.utils.middleware import ErrorsWarningsException
-from seqr.views.utils.json_utils import _to_snake_case
+from seqr.views.utils.json_utils import _to_snake_case, _to_title_case
 from seqr.views.utils.permissions_utils import user_is_pm, get_pm_user_emails
 from seqr.models import Individual
 
@@ -19,7 +19,7 @@
 RELATIONSHIP_REVERSE_LOOKUP = {v.lower(): k for k, v in Individual.RELATIONSHIP_LOOKUP.items()}
 
 
-def parse_pedigree_table(parsed_file, filename, user, project=None, fail_on_warnings=False):
+def parse_pedigree_table(parsed_file, filename, user, project=None, fail_on_warnings=False, required_columns=None):
     """Validates and parses pedigree information from a .fam, .tsv, or Excel file.
 
     Args:
@@ -98,7 +98,7 @@ def parse_pedigree_table(parsed_file, filename, user, project=None, fail_on_warn
         else:
             logger.info("Parsing regular pedigree file", user)
 
-        json_records = _convert_fam_file_rows_to_json(rows)
+        json_records = _convert_fam_file_rows_to_json(rows, required_columns=required_columns)
     except Exception as e:
         raise ErrorsWarningsException(['Error while converting {} rows to json: {}'.format(filename, e)], [])
 
@@ -130,7 +130,7 @@ def _parse_affected(affected):
     return None
 
 
-def _convert_fam_file_rows_to_json(rows):
+def _convert_fam_file_rows_to_json(rows, required_columns=None):
     """Parse the values in rows and convert them to a json representation.
 
     Args:
@@ -163,10 +163,10 @@ def _convert_fam_file_rows_to_json(rows):
         json_record = _parse_row_dict(row_dict, i)
 
         # validate
-        if not json_record.get(JsonConstants.FAMILY_ID_COLUMN):
-            raise ValueError("Family Id not specified in row #%d:\n%s" % (i+1, json_record))
-        if not json_record.get(JsonConstants.INDIVIDUAL_ID_COLUMN):
-            raise ValueError("Individual Id not specified in row #%d:\n%s" % (i+1, json_record))
+        required_columns = (required_columns or []) + [JsonConstants.FAMILY_ID_COLUMN, JsonConstants.INDIVIDUAL_ID_COLUMN]
+        missing_cols = [col for col in required_columns if not json_record.get(col)]
+        if missing_cols:
+            raise ValueError(f"{', '.join([_to_title_case(_to_snake_case(col)) for col in missing_cols])} not specified in row #{i + 1}")
 
         json_results.append(json_record)
 
diff --git a/seqr/views/utils/pedigree_info_utils_tests.py b/seqr/views/utils/pedigree_info_utils_tests.py
index 6f0eb94fe8..8356b6b603 100644
--- a/seqr/views/utils/pedigree_info_utils_tests.py
+++ b/seqr/views/utils/pedigree_info_utils_tests.py
@@ -27,8 +27,8 @@ def test_parse_pedigree_table(self):
                 [['family_id', 'individual_id', 'sex', 'affected', 'father', 'mother'],
                 ['', '', 'male', 'u', '.', 'ind2']], FILENAME, self.collaborator_user)
         self.assertEqual(len(ec.exception.errors), 1)
-        self.assertEqual(ec.exception.errors[0].split('\n')[0],
-                         "Error while converting {} rows to json: Family Id not specified in row #1:".format(FILENAME))
+        self.assertEqual(ec.exception.errors[0],
+                         "Error while converting {} rows to json: Family Id, Individual Id not specified in row #1".format(FILENAME))
         self.assertListEqual(ec.exception.warnings, [])
 
         with self.assertRaises(ErrorsWarningsException) as ec:
@@ -36,8 +36,8 @@ def test_parse_pedigree_table(self):
                 [['family_id', 'individual_id', 'sex', 'affected', 'father', 'mother'],
                  ['fam1', '', 'male', 'u', '.', 'ind2']], FILENAME, self.collaborator_user)
         self.assertEqual(len(ec.exception.errors), 1)
-        self.assertEqual(ec.exception.errors[0].split('\n')[0],
-                         "Error while converting {} rows to json: Individual Id not specified in row #1:".format(FILENAME))
+        self.assertEqual(ec.exception.errors[0],
+                         "Error while converting {} rows to json: Individual Id not specified in row #1".format(FILENAME))
         self.assertListEqual(ec.exception.warnings, [])
 
         with self.assertRaises(ErrorsWarningsException) as ec:

From 43e36a2e7373278f83b061a543c5e1980597c095 Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Thu, 20 Oct 2022 16:22:16 -0400
Subject: [PATCH 04/10] update required columns in anvil ui

---
 ui/shared/components/panel/LoadWorkspaceDataForm.jsx | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/ui/shared/components/panel/LoadWorkspaceDataForm.jsx b/ui/shared/components/panel/LoadWorkspaceDataForm.jsx
index e702ce94d4..829688a8e7 100644
--- a/ui/shared/components/panel/LoadWorkspaceDataForm.jsx
+++ b/ui/shared/components/panel/LoadWorkspaceDataForm.jsx
@@ -14,6 +14,8 @@ import {
   FILE_FORMATS,
   INDIVIDUAL_CORE_EXPORT_DATA,
   INDIVIDUAL_ID_EXPORT_DATA,
+  INDIVIDUAL_FIELD_SEX,
+  INDIVIDUAL_FIELD_AFFECTED,
   SAMPLE_TYPE_OPTIONS,
 } from 'shared/utils/constants'
 import { validateUploadedFile } from 'shared/components/form/XHRUploaderField'
@@ -29,12 +31,17 @@ const VCF_DOCUMENTATION_URL = 'https://storage.googleapis.com/seqr-reference-dat
 const WARNING_HEADER = 'Planned Data Loading Delay'
 const WARNING_BANNER = null
 
+const NON_ID_REQUIRED_FIELDS = [INDIVIDUAL_FIELD_SEX, INDIVIDUAL_FIELD_AFFECTED]
+
 const FIELD_DESCRIPTIONS = {
   [FAMILY_FIELD_ID]: 'Family ID',
   [INDIVIDUAL_FIELD_ID]: 'Individual ID (needs to match the VCF ids)',
 }
 const REQUIRED_FIELDS = INDIVIDUAL_ID_EXPORT_DATA.map(config => (
   { ...config, description: FIELD_DESCRIPTIONS[config.field] }))
+REQUIRED_FIELDS.push(...INDIVIDUAL_CORE_EXPORT_DATA.filter(({ field }) => NON_ID_REQUIRED_FIELDS.includes(field)))
+
+const OPTIONAL_FIELDS = INDIVIDUAL_CORE_EXPORT_DATA.filter(({ field }) => !NON_ID_REQUIRED_FIELDS.includes(field))
 
 const BLANK_EXPORT = {
   filename: 'individuals_template',
@@ -51,7 +58,7 @@ const UploadPedigreeField = React.memo(({ name, error }) => (
         name={name}
         blankExportConfig={BLANK_EXPORT}
         requiredFields={REQUIRED_FIELDS}
-        optionalFields={INDIVIDUAL_CORE_EXPORT_DATA}
+        optionalFields={OPTIONAL_FIELDS}
         uploadFormats={FILE_FORMATS}
         actionDescription="load individual data from an AnVIL workspace to a new seqr project"
         url="/api/upload_temp_file"

From 789eb15d3bbd34fa8185a084b73756c8083c2934 Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Thu, 20 Oct 2022 16:31:08 -0400
Subject: [PATCH 05/10] clean up

---
 seqr/views/utils/pedigree_info_utils.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/seqr/views/utils/pedigree_info_utils.py b/seqr/views/utils/pedigree_info_utils.py
index 21e34037e1..c0ff5db70f 100644
--- a/seqr/views/utils/pedigree_info_utils.py
+++ b/seqr/views/utils/pedigree_info_utils.py
@@ -163,8 +163,10 @@ def _convert_fam_file_rows_to_json(rows, required_columns=None):
         json_record = _parse_row_dict(row_dict, i)
 
         # validate
-        required_columns = (required_columns or []) + [JsonConstants.FAMILY_ID_COLUMN, JsonConstants.INDIVIDUAL_ID_COLUMN]
-        missing_cols = [col for col in required_columns if not json_record.get(col)]
+        columns = [JsonConstants.FAMILY_ID_COLUMN, JsonConstants.INDIVIDUAL_ID_COLUMN]
+        if required_columns:
+            columns += required_columns
+        missing_cols = [col for col in columns if not json_record.get(col)]
         if missing_cols:
             raise ValueError(f"{', '.join([_to_title_case(_to_snake_case(col)) for col in missing_cols])} not specified in row #{i + 1}")
 

From 7f93041b0596c70be02b1e2239b4e86fcda0c9c0 Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Thu, 20 Oct 2022 16:36:38 -0400
Subject: [PATCH 06/10] fix displayed nbsp

---
 ui/shared/components/page/AcceptCookies.jsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ui/shared/components/page/AcceptCookies.jsx b/ui/shared/components/page/AcceptCookies.jsx
index 7eb634fb1d..85aef3c310 100644
--- a/ui/shared/components/page/AcceptCookies.jsx
+++ b/ui/shared/components/page/AcceptCookies.jsx
@@ -23,8 +23,8 @@ const AcceptCookies = () => (
       content={
         <Modal.Content>
           seqr collects cookies to improve our user experience and ensure the secure functioning of our site. For more
-          details, see our &npsp;
-          <Link target="_blank" to="/privacy_policy">Privacy Policy</Link>
+          details, see our
+          <Link target="_blank" to="/privacy_policy"> Privacy Policy</Link>
           . By clicking &quot;Accept&quot;, you consent to the use of these cookies.
         </Modal.Content>
       }

From 0dd56e7df562e0dcbd97796a6e0d794a6d142b70 Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Fri, 21 Oct 2022 11:19:42 -0400
Subject: [PATCH 07/10] better null value handling for pedigree records

---
 seqr/views/utils/json_to_orm_utils.py         |  3 +--
 seqr/views/utils/pedigree_info_utils.py       | 13 ++++++++-----
 seqr/views/utils/pedigree_info_utils_tests.py |  8 ++++----
 3 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/seqr/views/utils/json_to_orm_utils.py b/seqr/views/utils/json_to_orm_utils.py
index dd074ba99d..cd26dd0572 100644
--- a/seqr/views/utils/json_to_orm_utils.py
+++ b/seqr/views/utils/json_to_orm_utils.py
@@ -68,8 +68,7 @@ def update_model_from_json(model_obj, json, user, allow_unknown_keys=False, immu
             raise ValueError('Cannot edit field {}'.format(orm_key))
         if allow_unknown_keys and not hasattr(model_obj, orm_key):
             continue
-        model_value = getattr(model_obj, orm_key)
-        if (model_value or value) and model_value != value:
+        if getattr(model_obj, orm_key) != value:
             if orm_key in internal_fields and not user_is_analyst(user):
                 raise PermissionDenied('User {0} is not authorized to edit the internal field {1}'.format(user, orm_key))
             updated_fields.add(orm_key)
diff --git a/seqr/views/utils/pedigree_info_utils.py b/seqr/views/utils/pedigree_info_utils.py
index c0ff5db70f..e88388c4f3 100644
--- a/seqr/views/utils/pedigree_info_utils.py
+++ b/seqr/views/utils/pedigree_info_utils.py
@@ -196,11 +196,14 @@ def _parse_row_dict(row_dict, i):
 
         if column:
             format_func = JsonConstants.FORMAT_COLUMNS.get(column)
-            if format_func and (value or column in {JsonConstants.SEX_COLUMN, JsonConstants.AFFECTED_COLUMN}):
-                parsed_value = format_func(value)
-                if parsed_value is None and column not in JsonConstants.JSON_COLUMNS:
-                    raise ValueError(f'Invalid value "{value}" for {_to_snake_case(column)} in row #{i + 1}')
-                value = parsed_value
+            if format_func:
+                if (value or column in {JsonConstants.SEX_COLUMN, JsonConstants.AFFECTED_COLUMN}):
+                    parsed_value = format_func(value)
+                    if parsed_value is None and column not in JsonConstants.JSON_COLUMNS:
+                        raise ValueError(f'Invalid value "{value}" for {_to_snake_case(column)} in row #{i + 1}')
+                    value = parsed_value
+            elif value == '':
+                value = None
             json_record[column] = value
     return json_record
 
diff --git a/seqr/views/utils/pedigree_info_utils_tests.py b/seqr/views/utils/pedigree_info_utils_tests.py
index 8356b6b603..18df1eee4f 100644
--- a/seqr/views/utils/pedigree_info_utils_tests.py
+++ b/seqr/views/utils/pedigree_info_utils_tests.py
@@ -88,8 +88,8 @@ def test_parse_pedigree_table(self):
              'maternalId': 'ind2', 'notes': 'some notes', 'codedPhenotype': 'HPO:12345', 'probandRelationship': '',
              'previousIndividualId': 'ind1_old_id'},
             {'familyId': 'fam1', 'individualId': 'ind2', 'sex': 'F', 'affected': 'N', 'paternalId': '',
-             'maternalId': 'ind3', 'notes': '', 'codedPhenotype': 'HPO:56789', 'probandRelationship': 'M',
-             'previousIndividualId': ''},
+             'maternalId': 'ind3', 'notes': None, 'codedPhenotype': 'HPO:56789', 'probandRelationship': 'M',
+             'previousIndividualId': None},
         ])
         self.assertListEqual(warnings, no_error_warnings)
 
@@ -180,9 +180,9 @@ def test_parse_sample_manifest(self, mock_email, mock_pm_group):
         records, warnings = parse_pedigree_table(original_data, FILENAME, self.pm_user, project=project)
         self.assertListEqual(records, [
             {'affected': 'N', 'maternalId': '', 'notes': 'probably dad', 'individualId': 'SCO_PED073B_GA0339_1',
-             'sex': 'M', 'familyId': 'PED073', 'paternalId': '', 'codedPhenotype': '',
+             'sex': 'M', 'familyId': 'PED073', 'paternalId': '', 'codedPhenotype': None,
              'primaryBiosample': 'T', 'analyteType': 'B', 'tissueAffectedStatus': False,},
-            {'affected': 'A', 'maternalId': 'SCO_PED073A_GA0338_1', 'notes': '', 'individualId': 'SCO_PED073C_GA0340_1',
+            {'affected': 'A', 'maternalId': 'SCO_PED073A_GA0338_1', 'notes': None, 'individualId': 'SCO_PED073C_GA0340_1',
              'sex': 'F', 'familyId': 'PED073', 'paternalId': 'SCO_PED073B_GA0339_1', 'codedPhenotype': 'Perinatal death',
              'primaryBiosample': 'BM', 'analyteType': 'D', 'tissueAffectedStatus': True,
              }])

From 588b1df3e716135214508792703d7a8703665e83 Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Fri, 21 Oct 2022 11:37:02 -0400
Subject: [PATCH 08/10] remove unused in silico from ui

---
 ui/shared/components/panel/variants/Predictions.jsx | 4 ++--
 ui/shared/utils/constants.js                        | 3 ---
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/ui/shared/components/panel/variants/Predictions.jsx b/ui/shared/components/panel/variants/Predictions.jsx
index b72f6e9771..90242d80f1 100644
--- a/ui/shared/components/panel/variants/Predictions.jsx
+++ b/ui/shared/components/panel/variants/Predictions.jsx
@@ -20,10 +20,10 @@ const PredictionValue = styled.span`
 const NUM_TO_SHOW_ABOVE_THE_FOLD = 6 // how many predictors to show immediately
 
 const predictionFieldValue = (
-  predictions, { field, dangerThreshold, warningThreshold, indicatorMap, noSeverity, infoField, infoTitle },
+  predictions, { field, dangerThreshold, warningThreshold, indicatorMap, infoField, infoTitle },
 ) => {
   let value = predictions[field]
-  if (noSeverity || value === null || value === undefined) {
+  if (value === null || value === undefined) {
     return { value }
   }
 
diff --git a/ui/shared/utils/constants.js b/ui/shared/utils/constants.js
index 5409cf63ab..03c1c59cf2 100644
--- a/ui/shared/utils/constants.js
+++ b/ui/shared/utils/constants.js
@@ -1189,9 +1189,6 @@ export const PREDICTOR_FIELDS = [
   { field: 'sift', group: MISSENSE_IN_SILICO_GROUP, indicatorMap: INDICATOR_MAP },
   { field: 'mut_taster', group: MISSENSE_IN_SILICO_GROUP, indicatorMap: MUTTASTER_MAP },
   { field: 'fathmm', group: MISSENSE_IN_SILICO_GROUP, indicatorMap: INDICATOR_MAP },
-  { field: 'metasvm', group: MISSENSE_IN_SILICO_GROUP, indicatorMap: INDICATOR_MAP },
-  { field: 'gerp_rs', group: MISSENSE_IN_SILICO_GROUP, noSeverity: true, min: -13, max: 7 },
-  { field: 'phastcons_100_vert', group: MISSENSE_IN_SILICO_GROUP, noSeverity: true },
   { field: 'apogee', warningThreshold: 0.5, dangerThreshold: 0.5 },
   { field: 'gnomad_noncoding', displayOnly: true, warningThreshold: 0, dangerThreshold: 1.5 },
   { field: 'haplogroup_defining', indicatorMap: { Y: { color: 'green', value: '' } } },

From 791e0e6dbf8139d040f8b1503175a77a434e94e3 Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Fri, 21 Oct 2022 11:43:12 -0400
Subject: [PATCH 09/10] remove unused in silico from server

---
 seqr/utils/elasticsearch/constants.py      |  3 ---
 seqr/utils/elasticsearch/es_utils_tests.py |  6 ------
 seqr/views/utils/test_utils.py             | 22 +++++++++-------------
 3 files changed, 9 insertions(+), 22 deletions(-)

diff --git a/seqr/utils/elasticsearch/constants.py b/seqr/utils/elasticsearch/constants.py
index 48f133fdbc..2c6c5a6496 100644
--- a/seqr/utils/elasticsearch/constants.py
+++ b/seqr/utils/elasticsearch/constants.py
@@ -328,11 +328,8 @@
     'dbnsfp_DANN_score': {},
     'eigen_Eigen_phred': {},
     'dbnsfp_FATHMM_pred': {},
-    'dbnsfp_GERP_RS': {'response_key': 'gerp_rs'},
     'mpc_MPC': {},
-    'dbnsfp_MetaSVM_pred': {},
     'dbnsfp_MutationTaster_pred': {'response_key': 'mut_taster'},
-    'dbnsfp_phastCons100way_vertebrate': {'response_key': 'phastcons_100_vert'},
     'dbnsfp_Polyphen2_HVAR_pred': {'response_key': 'polyphen'},
     'gnomad_non_coding_constraint_z_score': {'response_key': 'gnomad_noncoding'},
     'primate_ai_score': {'response_key': 'primate_ai'},
diff --git a/seqr/utils/elasticsearch/es_utils_tests.py b/seqr/utils/elasticsearch/es_utils_tests.py
index 7c18bc9c09..0e6bcef1aa 100644
--- a/seqr/utils/elasticsearch/es_utils_tests.py
+++ b/seqr/utils/elasticsearch/es_utils_tests.py
@@ -771,13 +771,10 @@
     'contig',
     'variantId',
     'dbnsfp_MutationTaster_pred',
-    'dbnsfp_phastCons100way_vertebrate',
-    'dbnsfp_MetaSVM_pred',
     'mpc_MPC',
     'dbnsfp_DANN_score',
     'eigen_Eigen_phred',
     'dbnsfp_REVEL_score',
-    'dbnsfp_GERP_RS',
     'splice_ai_delta_score',
     'splice_ai_splice_consequence',
     'dbnsfp_FATHMM_pred',
@@ -893,13 +890,10 @@
     "common_low_heteroplasmy",
     "contig",
     "dbnsfp_FATHMM_pred",
-    "dbnsfp_GERP_RS",
-    "dbnsfp_MetaSVM_pred",
     "dbnsfp_MutationTaster_pred",
     "dbnsfp_Polyphen2_HVAR_pred",
     "dbnsfp_REVEL_score",
     "dbnsfp_SIFT_pred",
-    "dbnsfp_phastCons100way_vertebrate",
     "end",
     "filters",
     "genotypes",
diff --git a/seqr/views/utils/test_utils.py b/seqr/views/utils/test_utils.py
index 014c0ff7ed..d0e72f3f76 100644
--- a/seqr/views/utils/test_utils.py
+++ b/seqr/views/utils/test_utils.py
@@ -896,9 +896,8 @@ def call_request_json(self, index=-1):
         'pos': 248367227,
         'predictions': {'splice_ai': 0.75, 'eigen': None, 'revel': None, 'mut_taster': None, 'fathmm': None,
                         'hmtvar': None, 'apogee': None, 'haplogroup_defining': None, 'mitotip': None,
-                        'polyphen': None, 'dann': None, 'sift': None, 'cadd': '25.9', 'metasvm': None, 'primate_ai': None,
-                        'gerp_rs': None, 'mpc': None, 'phastcons_100_vert': None, 'strvctvre': None,
-                        'splice_ai_consequence': None, 'gnomad_noncoding': 1.01272,},
+                        'polyphen': None, 'dann': None, 'sift': None, 'cadd': '25.9', 'primate_ai': None,
+                        'mpc': None, 'strvctvre': None, 'splice_ai_consequence': None, 'gnomad_noncoding': 1.01272,},
         'ref': 'TC',
         'rsid': None,
         'screenRegionType': 'dELS',
@@ -982,8 +981,8 @@ def call_request_json(self, index=-1):
         'predictions': {
             'hmtvar': None, 'apogee': None, 'haplogroup_defining': None, 'mitotip': None, 'gnomad_noncoding': None,
             'splice_ai': None, 'eigen': None, 'revel': None, 'mut_taster': None, 'fathmm': None, 'polyphen': None,
-            'dann': None, 'sift': None, 'cadd': None, 'metasvm': None, 'primate_ai': 1, 'gerp_rs': None,
-            'mpc': None, 'phastcons_100_vert': None, 'strvctvre': None, 'splice_ai_consequence': None,
+            'dann': None, 'sift': None, 'cadd': None, 'primate_ai': 1,
+            'mpc': None, 'strvctvre': None, 'splice_ai_consequence': None,
         },
         'ref': 'GAGA',
         'rsid': None,
@@ -1068,9 +1067,8 @@ def call_request_json(self, index=-1):
     'pos': 49045487,
     'predictions': {'splice_ai': None, 'eigen': None, 'revel': None, 'mut_taster': None, 'fathmm': None,
                     'hmtvar': None, 'apogee': None, 'haplogroup_defining': None, 'mitotip': None, 'gnomad_noncoding': None,
-                    'polyphen': None, 'dann': None, 'sift': None, 'cadd': None, 'metasvm': None, 'primate_ai': None,
-                    'gerp_rs': None, 'mpc': None, 'phastcons_100_vert': None, 'strvctvre': 0.374,
-                    'splice_ai_consequence': None},
+                    'polyphen': None, 'dann': None, 'sift': None, 'cadd': None, 'primate_ai': None,
+                    'mpc': None, 'strvctvre': 0.374, 'splice_ai_consequence': None},
     'ref': None,
     'rsid': None,
     'screenRegionType': None,
@@ -1156,9 +1154,8 @@ def call_request_json(self, index=-1):
     'pos': 49045387,
     'predictions': {'splice_ai': None, 'eigen': None, 'revel': None, 'mut_taster': None, 'fathmm': None,
                     'hmtvar': None, 'apogee': None, 'haplogroup_defining': None, 'mitotip': None,
-                    'polyphen': None, 'dann': None, 'sift': None, 'cadd': None, 'metasvm': None, 'primate_ai': None,
-                    'gerp_rs': None, 'mpc': None, 'phastcons_100_vert': None, 'strvctvre': None, 'gnomad_noncoding': None,
-                    'splice_ai_consequence': None},
+                    'polyphen': None, 'dann': None, 'sift': None, 'cadd': None, 'primate_ai': None,
+                    'mpc': None, 'strvctvre': None, 'gnomad_noncoding': None, 'splice_ai_consequence': None},
     'ref': None,
     'rsid': None,
     'screenRegionType': None,
@@ -1235,8 +1232,7 @@ def call_request_json(self, index=-1):
         },
     'pos': 10195,
     'predictions': {'hmtvar': 0.71, 'apogee': 0.42, 'cadd': None, 'dann': None, 'eigen': None, 'fathmm': 'T',
-                    'gerp_rs': '5.07', 'haplogroup_defining': None, 'metasvm': None, 'mitotip': None,
-                    'mpc': None, 'mut_taster': 'N', 'phastcons_100_vert': '0.958000', 'polyphen': None,
+                    'haplogroup_defining': None, 'mitotip': None, 'mpc': None, 'mut_taster': 'N', 'polyphen': None,
                     'primate_ai': None, 'revel': None, 'sift': 'D', 'splice_ai': None, 'splice_ai_consequence': None,
                     'strvctvre': None, 'gnomad_noncoding': None,},
     'ref': 'C',

From 87c711a0b0d33b2f46ac4c71359b5e743cbb5776 Mon Sep 17 00:00:00 2001
From: Hana Snow <hana.snow@gmail.com>
Date: Fri, 21 Oct 2022 12:02:51 -0400
Subject: [PATCH 10/10] add kegg gene link

---
 ui/shared/components/panel/genes/GeneDetail.jsx | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ui/shared/components/panel/genes/GeneDetail.jsx b/ui/shared/components/panel/genes/GeneDetail.jsx
index adae51bd57..87e3e54cf3 100644
--- a/ui/shared/components/panel/genes/GeneDetail.jsx
+++ b/ui/shared/components/panel/genes/GeneDetail.jsx
@@ -305,6 +305,7 @@ const GeneDetailContent = React.memo(({ gene, user, updateGeneNote: dispatchUpda
     { title: 'primAD', link: `http://primad.basespace.illumina.com/gene/${gene.geneSymbol}?dataset=gnomad_r3`, description: 'Primate Genome Aggregation Database' },
     gene.mgiMarkerId ? { title: 'MGI', link: `http://www.informatics.jax.org/marker/${gene.mgiMarkerId}`, description: 'Mouse Genome Informatics' } : null,
     gene.mgiMarkerId ? { title: 'IMPC', link: `https://www.mousephenotype.org/data/genes/${gene.mgiMarkerId}`, description: 'International Mouse Phenotyping Consortium' } : null,
+    { title: 'KEGG', link: `https://www.kegg.jp/kegg-bin/search_pathway_text?keyword=${gene.geneSymbol}&viewImage=true`, description: 'Pathway maps representing known molecular interaction' },
     gene.clinGen ? { title: 'ClinGen', link: gene.clinGen.href, description: 'ClinGen Dosage Sensitivity' } : null,
     { title: 'ClinVar', link: `https://www.ncbi.nlm.nih.gov/clinvar?term=${gene.geneSymbol}[gene]`, description: 'Aggregated information about human genomic variation' },
     user.isAnalyst ? { title: 'HGMD', link: `https://my.qiagendigitalinsights.com/bbp/view/hgmd/pro/gene.php?gene=${gene.geneSymbol}`, description: 'Human Gene Mutation Database ' } : null,