diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9808f0fea5..73d72d8dda 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,8 @@
 # _seqr_ Changes
 
 ## dev
+
+## 8/22/23
 * Add db indices to optimize RNA data queries (REQUIRES DB MIGRATION)
 
 ## 7/11/23
diff --git a/deploy/docker/seqr/Dockerfile b/deploy/docker/seqr/Dockerfile
index 2093aef700..22a7b63a27 100644
--- a/deploy/docker/seqr/Dockerfile
+++ b/deploy/docker/seqr/Dockerfile
@@ -93,6 +93,7 @@ EXPOSE 8000
 ENV TERM=xterm
 
 COPY deploy/docker/seqr/readiness_probe /
+COPY deploy/docker/seqr/wait_for_routes /
 COPY deploy/docker/seqr/bin/*.sh /usr/local/bin/
 COPY deploy/docker/seqr/config/*.py ./
 COPY deploy/docker/seqr/bashrc /root/.bashrc
diff --git a/deploy/docker/seqr/wait_for_routes b/deploy/docker/seqr/wait_for_routes
new file mode 100755
index 0000000000..f8263fe1ac
--- /dev/null
+++ b/deploy/docker/seqr/wait_for_routes
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+###
+# Waits for network endpoints.  Intended usage is within Kubernetes CronJobs to wait for sidecar availability.
+# Usage: ./wait_for_routes https://www.google.com/ https://www.broadinstitute.org https://www.broadins.org
+###
+
+RETRY_COUNT=10
+SLEEP_S=2
+
+for route in "$@"
+do
+    retries=0
+    until [ "$retries" -ge 10 ]
+    do
+        curl -s $route -o /dev/null && echo "Successful ping of $route" && break
+        retries=$((retries+1))
+        if [ "$retries" -eq 10 ]; then
+            echo "Route ${route} wasn't available after ${RETRY_COUNT} connection attempts"
+            exit 1
+        else
+            echo "Unable to connect to ${route}, retrying. Attempt ${retries}/${RETRY_COUNT}"
+            sleep $SLEEP_S
+        fi
+    done
+done
diff --git a/hail_search/hail_search_query.py b/hail_search/hail_search_query.py
index 8580ae9aee..eb2a55f875 100644
--- a/hail_search/hail_search_query.py
+++ b/hail_search/hail_search_query.py
@@ -778,6 +778,29 @@ def _omim_sort(cls, r, omim_gene_set):
     def _gene_rank_sort(cls, r, gene_ranks):
         return [hl.min(cls._gene_ids_expr(r).map(gene_ranks.get))]
 
+    def gene_counts(self):
+        selects = {
+            'gene_ids': self._gene_ids_expr,
+            'families': self.BASE_ANNOTATION_FIELDS['familyGuids'],
+        }
+        ch_ht = None
+        if self._comp_het_ht:
+            ch_ht = self._comp_het_ht.explode(self._comp_het_ht[GROUPED_VARIANTS_FIELD])
+            ch_ht = ch_ht.select(**{k: v(ch_ht[GROUPED_VARIANTS_FIELD]) for k, v in selects.items()})
+
+        if self._ht:
+            ht = self._ht.select(**{k: v(self._ht) for k, v in selects.items()})
+            if ch_ht:
+                ht = ht.join(ch_ht, 'outer')
+                ht = ht.transmute(**{k: hl.or_else(ht[k], ht[f'{k}_1']) for k in selects})
+        else:
+            ht = ch_ht
+
+        ht = ht.explode('gene_ids').explode('families')
+        return ht.aggregate(hl.agg.group_by(
+            ht.gene_ids, hl.struct(total=hl.agg.count(), families=hl.agg.counter(ht.families))
+        ))
+
 
 class VariantHailTableQuery(BaseHailTableQuery):
 
diff --git a/hail_search/search.py b/hail_search/search.py
index eec334a80f..9e9fc4dd7e 100644
--- a/hail_search/search.py
+++ b/hail_search/search.py
@@ -1,7 +1,7 @@
 from hail_search.hail_search_query import QUERY_CLASS_MAP
 
 
-def search_hail_backend(request):
+def search_hail_backend(request, gene_counts=False):
     sample_data = request.pop('sample_data', {})
     genome_version = request.pop('genome_version')
 
@@ -12,7 +12,10 @@ def search_hail_backend(request):
     query_cls = QUERY_CLASS_MAP[single_data_type]
 
     query = query_cls(sample_data, genome_version, **request)
-    return query.search()
+    if gene_counts:
+        return query.gene_counts()
+    else:
+        return query.search()
 
 
 def load_globals():
diff --git a/hail_search/test_search.py b/hail_search/test_search.py
index 42c03b4f5d..71ebe42b92 100644
--- a/hail_search/test_search.py
+++ b/hail_search/test_search.py
@@ -3,8 +3,8 @@
 
 from hail_search.test_utils import get_hail_search_body, FAMILY_2_VARIANT_SAMPLE_DATA, FAMILY_2_MISSING_SAMPLE_DATA, \
     VARIANT1, VARIANT2, VARIANT3, VARIANT4, MULTI_PROJECT_SAMPLE_DATA, MULTI_PROJECT_MISSING_SAMPLE_DATA, \
-    LOCATION_SEARCH, EXCLUDE_LOCATION_SEARCH, VARIANT_ID_SEARCH, RSID_SEARCH, SV_WGS_SAMPLE_DATA, SV_VARIANT1, \
-    SV_VARIANT2, SV_VARIANT3, SV_VARIANT4
+    LOCATION_SEARCH, EXCLUDE_LOCATION_SEARCH, VARIANT_ID_SEARCH, RSID_SEARCH, GENE_COUNTS, SV_WGS_SAMPLE_DATA, \
+    SV_VARIANT1, SV_VARIANT2, SV_VARIANT3, SV_VARIANT4
 from hail_search.web_app import init_web_app
 
 PROJECT_2_VARIANT = {
@@ -119,7 +119,7 @@ async def test_status(self):
             resp_json = await resp.json()
         self.assertDictEqual(resp_json, {'success': True})
 
-    async def _assert_expected_search(self, results, **search_kwargs):
+    async def _assert_expected_search(self, results, gene_counts=None, **search_kwargs):
         search_body = get_hail_search_body(**search_kwargs)
         async with self.client.request('POST', '/search', json=search_body) as resp:
             self.assertEqual(resp.status, 200)
@@ -129,9 +129,18 @@ async def _assert_expected_search(self, results, **search_kwargs):
         for i, result in enumerate(resp_json['results']):
             self.assertEqual(result, results[i])
 
+        if gene_counts:
+            async with self.client.request('POST', '/gene_counts', json=search_body) as resp:
+                self.assertEqual(resp.status, 200)
+                gene_counts_json = await resp.json()
+            self.assertDictEqual(gene_counts_json, gene_counts)
+
     async def test_single_family_search(self):
         await self._assert_expected_search(
-            [VARIANT1, VARIANT2, VARIANT3, VARIANT4], sample_data=FAMILY_2_VARIANT_SAMPLE_DATA,
+            [VARIANT1, VARIANT2, VARIANT3, VARIANT4], sample_data=FAMILY_2_VARIANT_SAMPLE_DATA, gene_counts={
+                'ENSG00000097046': {'total': 2, 'families': {'F000002_2': 2}},
+                'ENSG00000177000': {'total': 2, 'families': {'F000002_2': 2}},
+            }
         )
 
         await self._assert_expected_search(
@@ -140,13 +149,16 @@ async def test_single_family_search(self):
 
     async def test_single_project_search(self):
         await self._assert_expected_search(
-            [VARIANT1, VARIANT2, MULTI_FAMILY_VARIANT, VARIANT4], omit_sample_type='SV_WES',
+            [VARIANT1, VARIANT2, MULTI_FAMILY_VARIANT, VARIANT4], omit_sample_type='SV_WES', gene_counts={
+                'ENSG00000097046': {'total': 3, 'families': {'F000002_2': 2, 'F000003_3': 1}},
+                'ENSG00000177000': {'total': 3, 'families': {'F000002_2': 2, 'F000003_3': 1}},
+            }
         )
 
     async def test_multi_project_search(self):
         await self._assert_expected_search(
             [PROJECT_2_VARIANT, MULTI_PROJECT_VARIANT1, MULTI_PROJECT_VARIANT2, VARIANT3, VARIANT4],
-            sample_data=MULTI_PROJECT_SAMPLE_DATA,
+            gene_counts=GENE_COUNTS, sample_data=MULTI_PROJECT_SAMPLE_DATA,
         )
 
     async def test_inheritance_filter(self):
@@ -195,8 +207,10 @@ async def test_inheritance_filter(self):
 
         inheritance_mode = 'compound_het'
         await self._assert_expected_search(
-            [[VARIANT3, VARIANT4]], inheritance_mode=inheritance_mode, sample_data=MULTI_PROJECT_SAMPLE_DATA,
-            **COMP_HET_ALL_PASS_FILTERS,
+            [[VARIANT3, VARIANT4]], inheritance_mode=inheritance_mode, sample_data=MULTI_PROJECT_SAMPLE_DATA, gene_counts={
+                'ENSG00000097046': {'total': 2, 'families': {'F000002_2': 2}},
+                'ENSG00000177000': {'total': 1, 'families': {'F000002_2': 1}},
+            }, **COMP_HET_ALL_PASS_FILTERS,
         )
 
         await self._assert_expected_search(
@@ -206,8 +220,10 @@ async def test_inheritance_filter(self):
 
         inheritance_mode = 'recessive'
         await self._assert_expected_search(
-            [PROJECT_2_VARIANT1, VARIANT2, [VARIANT3, VARIANT4]], inheritance_mode=inheritance_mode,
-            sample_data=MULTI_PROJECT_SAMPLE_DATA, **COMP_HET_ALL_PASS_FILTERS,
+            [PROJECT_2_VARIANT1, VARIANT2, [VARIANT3, VARIANT4]], inheritance_mode=inheritance_mode, gene_counts={
+                'ENSG00000097046': {'total': 2, 'families': {'F000002_2': 2}},
+                'ENSG00000177000': {'total': 2, 'families': {'F000002_2': 2}},
+            }, sample_data=MULTI_PROJECT_SAMPLE_DATA, **COMP_HET_ALL_PASS_FILTERS,
         )
 
         await self._assert_expected_search(
diff --git a/hail_search/test_utils.py b/hail_search/test_utils.py
index 85a942107e..993f5fc63f 100644
--- a/hail_search/test_utils.py
+++ b/hail_search/test_utils.py
@@ -537,6 +537,11 @@
 VARIANT_ID_SEARCH = {'variant_ids': [['1', 10439, 'AC', 'A'], ['1', 91511686, 'TCA', 'G']], 'rs_ids': []}
 RSID_SEARCH = {'variant_ids': [], 'rs_ids': ['rs1801131']}
 
+GENE_COUNTS = {
+    'ENSG00000097046': {'total': 2, 'families': {'F000002_2': 2}},
+    'ENSG00000177000': {'total': 3, 'families': {'F000002_2': 2, 'F000011_11': 1}},
+}
+
 
 def get_hail_search_body(genome_version='GRCh38', num_results=100, sample_data=None, omit_sample_type=None, **search_body):
     sample_data = sample_data or EXPECTED_SAMPLE_DATA
diff --git a/hail_search/web_app.py b/hail_search/web_app.py
index 0dc5775a6b..303ab82f5c 100644
--- a/hail_search/web_app.py
+++ b/hail_search/web_app.py
@@ -14,6 +14,10 @@ def hl_json_dumps(obj):
     return json.dumps(obj, default=_hl_json_default)
 
 
+async def gene_counts(request: web.Request) -> web.Response:
+    return web.json_response(search_hail_backend(await request.json(), gene_counts=True), dumps=hl_json_dumps)
+
+
 async def search(request: web.Request) -> web.Response:
     hail_results, total_results = search_hail_backend(await request.json())
     return web.json_response({'results': hail_results, 'total': total_results}, dumps=hl_json_dumps)
@@ -28,6 +32,7 @@ def init_web_app():
     app.add_routes([
         web.get('/status', status),
         web.post('/search', search),
+        web.post('/gene_counts', gene_counts),
     ])
     load_globals()
     return app
diff --git a/seqr/utils/search/hail_search_utils_tests.py b/seqr/utils/search/hail_search_utils_tests.py
index b4dc36b882..9090249b2a 100644
--- a/seqr/utils/search/hail_search_utils_tests.py
+++ b/seqr/utils/search/hail_search_utils_tests.py
@@ -8,10 +8,10 @@
 from seqr.models import Family
 from seqr.utils.search.utils import get_variant_query_gene_counts, query_variants, get_single_variant, \
     get_variants_for_variant_ids, InvalidSearchException
-from seqr.utils.search.search_utils_tests import SearchTestHelper, MOCK_COUNTS
+from seqr.utils.search.search_utils_tests import SearchTestHelper
 from hail_search.test_utils import get_hail_search_body, EXPECTED_SAMPLE_DATA, FAMILY_1_SAMPLE_DATA, \
     FAMILY_2_ALL_SAMPLE_DATA, ALL_AFFECTED_SAMPLE_DATA, CUSTOM_AFFECTED_SAMPLE_DATA, HAIL_BACKEND_VARIANTS, \
-    LOCATION_SEARCH, EXCLUDE_LOCATION_SEARCH, VARIANT_ID_SEARCH, RSID_SEARCH
+    LOCATION_SEARCH, EXCLUDE_LOCATION_SEARCH, VARIANT_ID_SEARCH, RSID_SEARCH, GENE_COUNTS
 MOCK_HOST = 'http://test-hail-host'
 
 
@@ -155,10 +155,10 @@ def test_query_variants(self):
 
     @responses.activate
     def test_get_variant_query_gene_counts(self):
-        responses.add(responses.POST, f'{MOCK_HOST}:5000/gene_counts', json=MOCK_COUNTS, status=200)
+        responses.add(responses.POST, f'{MOCK_HOST}:5000/gene_counts', json=GENE_COUNTS, status=200)
 
         gene_counts = get_variant_query_gene_counts(self.results_model, self.user)
-        self.assertDictEqual(gene_counts, MOCK_COUNTS)
+        self.assertDictEqual(gene_counts, GENE_COUNTS)
         self.assert_cached_results({'gene_aggs': gene_counts})
         self._test_expected_search_call(sort=None)
 
diff --git a/seqr/utils/search/search_utils_tests.py b/seqr/utils/search/search_utils_tests.py
index afb0816f98..ccfec426da 100644
--- a/seqr/utils/search/search_utils_tests.py
+++ b/seqr/utils/search/search_utils_tests.py
@@ -4,17 +4,12 @@
 import json
 import mock
 
+from hail_search.test_utils import GENE_COUNTS
 from seqr.models import Family, Sample, VariantSearch, VariantSearchResults
 from seqr.utils.search.utils import get_single_variant, get_variants_for_variant_ids, get_variant_query_gene_counts, \
     query_variants, InvalidSearchException
 from seqr.views.utils.test_utils import PARSED_VARIANTS, PARSED_COMPOUND_HET_VARIANTS_MULTI_PROJECT, GENE_FIELDS
 
-MOCK_COUNTS = {
-    'ENSG00000135953': {'total': 3, 'families': {'F000003_3': 2, 'F000002_2': 1, 'F000005_5': 1}},
-    'ENSG00000228198': {'total': 5, 'families': {'F000003_3': 4, 'F000002_2': 1, 'F000005_5': 1}},
-    'ENSG00000240361': {'total': 2, 'families': {'F000003_3': 2}},
-}
-
 
 class SearchTestHelper(object):
 
@@ -354,12 +349,12 @@ def test_invalid_search_get_variant_query_gene_counts(self):
 
     def test_get_variant_query_gene_counts(self, mock_get_variants):
         def _mock_get_variants(families, search, user, previous_search_results, genome_version, **kwargs):
-            previous_search_results['gene_aggs'] = MOCK_COUNTS
-            return MOCK_COUNTS
+            previous_search_results['gene_aggs'] = GENE_COUNTS
+            return GENE_COUNTS
         mock_get_variants.side_effect = _mock_get_variants
 
         gene_counts = get_variant_query_gene_counts(self.results_model, self.user)
-        self.assertDictEqual(gene_counts, MOCK_COUNTS)
+        self.assertDictEqual(gene_counts, GENE_COUNTS)
         results_cache = {'gene_aggs': gene_counts}
         self.assert_cached_results(results_cache)
         self._test_expected_search_call(
diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py
index cdc87d1cdb..2bae0faebe 100644
--- a/seqr/views/apis/report_api.py
+++ b/seqr/views/apis/report_api.py
@@ -713,8 +713,9 @@ def _get_sample_airtable_metadata(sample_ids, user, include_collaborator=False):
 READ_RNA_TABLE_COLUMNS.insert(READ_RNA_TABLE_COLUMNS.index('gene_annotation')+1, 'gene_annotation_details')
 READ_RNA_TABLE_COLUMNS.insert(READ_RNA_TABLE_COLUMNS.index('alignment_log_file')+1, 'alignment_postprocessing')
 READ_SET_TABLE_COLUMNS = ['aligned_dna_short_read_set_id', 'aligned_dna_short_read_id']
+CALLED_VARIANT_FILE_COLUMN = 'called_variants_dna_file'
 CALLED_TABLE_COLUMNS = [
-    'called_variants_dna_short_read_id', 'aligned_dna_short_read_set_id', 'called_variants_dna_file', 'md5sum',
+    'called_variants_dna_short_read_id', 'aligned_dna_short_read_set_id', CALLED_VARIANT_FILE_COLUMN, 'md5sum',
     'caller_software', 'variant_types', 'analysis_details',
 ]
 
@@ -912,7 +913,9 @@ def gregor_export(request):
         ('experiment_dna_short_read', EXPERIMENT_TABLE_COLUMNS, airtable_rows),
         ('aligned_dna_short_read', READ_TABLE_COLUMNS, airtable_rows),
         ('aligned_dna_short_read_set', READ_SET_TABLE_COLUMNS, airtable_rows),
-        ('called_variants_dna_short_read', CALLED_TABLE_COLUMNS, airtable_rows),
+        ('called_variants_dna_short_read', CALLED_TABLE_COLUMNS, [
+            row for row in airtable_rows if row.get(CALLED_VARIANT_FILE_COLUMN)
+        ]),
         ('experiment_rna_short_read', EXPERIMENT_RNA_TABLE_COLUMNS, airtable_rna_rows),
         ('aligned_rna_short_read', READ_RNA_TABLE_COLUMNS, airtable_rna_rows),
         ('experiment', EXPERIMENT_LOOKUP_TABLE_COLUMNS, experiment_lookup_rows),
@@ -1033,6 +1036,19 @@ def _get_experiment_lookup_row(is_rna, row_data):
     }
 
 
+is_integer = lambda val, *args: val.isnumeric() or re.match(r'^[\d{3},]*\d{3}$', val)
+DATA_TYPE_VALIDATORS = {
+    'string': lambda val, validator: (not validator.get('is_bucket_path')) or val.startswith('gs://'),
+    'enumeration': lambda val, validator: val in validator['enumerations'],
+    'integer': is_integer,
+    'float': lambda val, validator: is_integer(val) or re.match(r'^\d+.\d+$', val),
+    'date': lambda val, validator: bool(re.match(r'^\d{4}-\d{2}-\d{2}$', val)),
+}
+DATA_TYPE_ERROR_FORMATTERS = {
+    'string': lambda validator: ' are a google bucket path starting with gs://',
+    'enumeration': lambda validator: f': {", ".join(validator["enumerations"])}',
+}
+
 def _validate_gregor_files(file_data):
     errors = []
     warnings = []
@@ -1070,6 +1086,26 @@ def _validate_gregor_files(file_data):
             warnings.append(
                 f'The following columns are included in the "{file_name}" data model but are missing in the report: {col_summary}'
             )
+        invalid_data_type_columns = {
+            col: validator['data_type'] for col, validator in table_validator.items()
+            if validator.get('data_type') and validator['data_type'] not in DATA_TYPE_VALIDATORS
+        }
+        if invalid_data_type_columns:
+            col_summary = ', '.join(sorted([f'{col} ({data_type})' for col, data_type in invalid_data_type_columns.items()]))
+            warnings.append(
+                f'The following columns are included in the "{file_name}" data model but have an unsupported data type: {col_summary}'
+            )
+        invalid_enum_columns = [
+            col for col, validator in table_validator.items()
+            if validator.get('data_type') == 'enumeration' and not validator.get('enumerations')
+        ]
+        if invalid_enum_columns:
+            for col in invalid_enum_columns:
+                table_validator[col]['data_type'] = None
+            col_summary = ', '.join(sorted(invalid_enum_columns))
+            warnings.append(
+                f'The following columns are specified as "enumeration" in the "{file_name}" data model but are missing the allowed values definition: {col_summary}'
+            )
 
         for column in columns:
             _validate_column_data(
@@ -1112,15 +1148,18 @@ def _has_required_table(table, validator, tables):
 
 
 def _validate_column_data(column, file_name, data, column_validator, warnings, errors):
-    enum = column_validator.get('enumerations')
+    data_type = column_validator.get('data_type')
+    data_type_validator = DATA_TYPE_VALIDATORS.get(data_type)
+    unique = column_validator.get('is_unique')
     required = column_validator.get('required')
     recommended = column in WARN_MISSING_TABLE_COLUMNS.get(file_name, [])
-    if not (required or enum or recommended):
+    if not (required or unique or recommended or data_type_validator):
         return
 
     missing = []
     warn_missing = []
     invalid = []
+    grouped_values = defaultdict(set)
     for row in data:
         value = row.get(column)
         if not value:
@@ -1130,9 +1169,13 @@ def _validate_column_data(column, file_name, data, column_validator, warnings, e
                 check_recommend_condition = WARN_MISSING_CONDITIONAL_COLUMNS.get(column)
                 if not check_recommend_condition or check_recommend_condition(row):
                     warn_missing.append(_get_row_id(row))
-        elif enum and value not in enum:
+        elif data_type_validator and not data_type_validator(value, column_validator):
             invalid.append(f'{_get_row_id(row)} ({value})')
-    if missing or warn_missing or invalid:
+        elif unique:
+            grouped_values[value].add(_get_row_id(row))
+
+    duplicates = [f'{k} ({", ".join(sorted(v))})' for k, v in grouped_values.items() if len(v) > 1]
+    if missing or warn_missing or invalid or duplicates:
         airtable_summary = ' (from Airtable)' if column in ALL_AIRTABLE_COLUMNS else ''
         error_template = f'The following entries {{issue}} "{column}"{airtable_summary} in the "{file_name}" table'
         if missing:
@@ -1141,8 +1184,14 @@ def _validate_column_data(column, file_name, data, column_validator, warnings, e
             )
         if invalid:
             invalid_values = f'Invalid values: {", ".join(sorted(invalid))}'
+            allowed = DATA_TYPE_ERROR_FORMATTERS[data_type](column_validator) \
+                if data_type in DATA_TYPE_ERROR_FORMATTERS else f' have data type {data_type}'
+            errors.append(
+                f'{error_template.format(issue="have invalid values for")}. Allowed values{allowed}. {invalid_values}'
+            )
+        if duplicates:
             errors.append(
-                f'{error_template.format(issue="have invalid values for")}. Allowed values: {", ".join(enum)}. {invalid_values}'
+                f'{error_template.format(issue="have non-unique values for")}: {", ".join(sorted(duplicates))}'
             )
         if warn_missing:
             warnings.append(
diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py
index b03234cebe..ee2d6c3bf3 100644
--- a/seqr/views/apis/report_api_tests.py
+++ b/seqr/views/apis/report_api_tests.py
@@ -209,7 +209,7 @@
         'target_insert_size_wes': '385',
         'sequencing_platform_wes': 'NovaSeq',
         'aligned_dna_short_read_file_wes': 'gs://fc-eb352699-d849-483f-aefe-9d35ce2b21ac/Broad_COL_FAM1_1_D1.cram',
-        'aligned_dna_short_read_index_file_wes': 'gs://fc-eb352699-d849-483f-aefe-9d35ce2b21ac/Broad_COL_FAM1_1_D1.crai',
+        'aligned_dna_short_read_index_file_wes': 'NA',
         'md5sum_wes': '129c28163df082',
         'reference_assembly': 'GRCh38',
         'alignment_software_dna': 'BWA-MEM-2.3',
@@ -295,13 +295,13 @@
         'md5sum_wes': 'a6f6308866765ce8',
         'md5sum_wgs': '2aa33e8c32020b1c',
         'reference_assembly': 'GRCh38',
-        'alignment_software_dna': 'BWA 0.7.15.r1140',
+        'alignment_software_dna': 'BWA-MEM-2.3',
         'mean_coverage_wes': '42.8',
         'mean_coverage_wgs': '36.1',
         'analysis_details': '',
-        'called_variants_dna_short_read_id': 'NA',
+        'called_variants_dna_short_read_id': '',
         'aligned_dna_short_read_set_id': 'Broad_NA20888_D1',
-        'called_variants_dna_file': 'NA',
+        'called_variants_dna_file': '',
         'caller_software': 'NA',
         'variant_types': 'SNV',
       },
@@ -388,23 +388,23 @@
             'table': 'participant',
             'required': True,
             'columns': [
-                {'column': 'participant_id', 'required': True},
-                {'column': 'internal_project_id'},
-                {'column': 'gregor_center', 'required': True, 'enumerations': ['BCM', 'BROAD', 'UW']},
-                {'column': 'consent_code', 'required': True, 'enumerations': ['GRU', 'HMB']},
-                {'column': 'recontactable', 'enumerations': ['Yes', 'No']},
-                {'column': 'prior_testing'},
+                {'column': 'participant_id', 'required': True, 'data_type': 'string'},
+                {'column': 'internal_project_id', 'data_type': 'reference'},
+                {'column': 'gregor_center', 'required': True, 'data_type': 'enumeration', 'enumerations': ['BCM', 'BROAD', 'UW']},
+                {'column': 'consent_code', 'required': True, 'data_type': 'enumeration', 'enumerations': ['GRU', 'HMB']},
+                {'column': 'recontactable', 'data_type': 'enumeration', 'enumerations': ['Yes', 'No']},
+                {'column': 'prior_testing', 'data_type': 'enumeration'},
                 {'column': 'family_id', 'required': True},
                 {'column': 'paternal_id'},
                 {'column': 'maternal_id'},
                 {'column': 'proband_relationship', 'required': True},
-                {'column': 'sex', 'required': True, 'enumerations': ['Male', 'Female', 'Unknown']},
-                {'column': 'reported_race', 'enumerations': ['Asian', 'White', 'Black']},
-                {'column': 'reported_ethnicity', 'enumerations': ['Hispanic or Latino', 'Not Hispanic or Latino']},
+                {'column': 'sex', 'required': True, 'data_type': 'enumeration', 'enumerations': ['Male', 'Female', 'Unknown']},
+                {'column': 'reported_race', 'data_type': 'enumeration', 'enumerations': ['Asian', 'White', 'Black']},
+                {'column': 'reported_ethnicity', 'data_type': 'enumeration', 'enumerations': ['Hispanic or Latino', 'Not Hispanic or Latino']},
                 {'column': 'ancestry_metadata'},
-                {'column': 'affected_status', 'required': True, 'enumerations': ['Affected', 'Unaffected', 'Unknown']},
+                {'column': 'affected_status', 'required': True, 'data_type': 'enumeration', 'enumerations': ['Affected', 'Unaffected', 'Unknown']},
                 {'column': 'phenotype_description'},
-                {'column': 'age_at_enrollment'},
+                {'column': 'age_at_enrollment', 'data_type': 'date'},
             ],
         },
         {
@@ -413,13 +413,13 @@
             'columns': [
                 {'column': 'aligned_dna_short_read_id', 'required': True},
                 {'column': 'experiment_dna_short_read_id', 'required': True},
-                {'column': 'aligned_dna_short_read_file'},
-                {'column': 'aligned_dna_short_read_index_file'},
-                {'column': 'alignment_software'},
+                {'column': 'aligned_dna_short_read_file', 'is_unique': True, 'data_type': 'string', 'is_bucket_path': True},
+                {'column': 'aligned_dna_short_read_index_file', 'data_type': 'string', 'is_bucket_path': True},
+                {'column': 'alignment_software', 'is_unique': True},
                 {'column': 'analysis_details'},
-                {'column': 'md5sum'},
-                {'column': 'mean_coverage', 'required': True},
-                {'column': 'reference_assembly'},
+                {'column': 'md5sum', 'is_unique': True},
+                {'column': 'mean_coverage', 'required': True, 'data_type': 'float'},
+                {'column': 'reference_assembly', 'data_type': 'integer'},
                 {'column': 'reference_assembly_details'},
                 {'column': 'reference_assembly_uri'},
                 {'column': 'quality_issues'},
@@ -441,6 +441,33 @@
             'required': 'CONDITIONAL (aligned_dna_short_read_set, dna_read_data)',
             'columns': [{'column': 'analyte_id', 'required': True}],
         },
+        {
+            'table': 'experiment_rna_short_read',
+            'columns': [
+                {'column': 'experiment_rna_short_read_id', 'required': True},
+                {'column': 'analyte_id', 'required': True},
+                {'column': 'experiment_sample_id'},
+                {'column': 'seq_library_prep_kit_method'},
+                {'column': 'library_prep_type'},
+                {'column': 'experiment_type'},
+                {'column': 'read_length', 'data_type': 'integer'},
+                {'column': 'single_or_paired_ends'},
+                {'column': 'date_data_generation', 'data_type': 'float'},
+                {'column': 'sequencing_platform'},
+                {'column': 'within_site_batch_name'},
+                {'column': 'RIN', 'data_type': 'float'},
+                {'column': 'estimated_library_size'},
+                {'column': 'total_reads', 'data_type': 'integer'},
+                {'column': 'percent_rRNA', 'data_type': 'float'},
+                {'column': 'percent_mRNA', 'data_type': 'float'},
+                {'column': 'percent_mtRNA', 'data_type': 'float'},
+                {'column': 'percent_Globin', 'data_type': 'float'},
+                {'column': 'percent_UMI', 'data_type': 'float'},
+                {'column': '5prime3prime_bias', 'data_type': 'float'},
+                {'column': 'percent_GC', 'data_type': 'float'},
+                {'column': 'percent_chrX_Y', 'data_type': 'float'},
+            ],
+        },
     ]
 }
 
@@ -816,15 +843,22 @@ def test_gregor_export(self, mock_subprocess, mock_temp_dir, mock_open, mock_dat
             'The following tables are required in the data model but absent from the reports: subject, dna_read_data_set',
             'The following columns are included in the "participant" table but are missing from the data model: age_at_last_observation, ancestry_detail, pmid_id, proband_relationship_detail, sex_detail, twin_id',
             'The following columns are included in the "participant" data model but are missing in the report: ancestry_metadata',
+            'The following columns are included in the "participant" data model but have an unsupported data type: internal_project_id (reference)',
+            'The following columns are specified as "enumeration" in the "participant" data model but are missing the allowed values definition: prior_testing',
             'The following entries are missing recommended "recontactable" in the "participant" table: Broad_HG00731, Broad_HG00732, Broad_HG00733, Broad_NA19678, Broad_NA20870, Broad_NA20872, Broad_NA20874, Broad_NA20875, Broad_NA20876, Broad_NA20881',
             'The following entries are missing recommended "reported_race" in the "participant" table: Broad_HG00732, Broad_HG00733, Broad_NA19678, Broad_NA19679, Broad_NA20870, Broad_NA20872, Broad_NA20874, Broad_NA20875, Broad_NA20876, Broad_NA20881, Broad_NA20888',
             'The following entries are missing recommended "phenotype_description" in the "participant" table: Broad_HG00731, Broad_HG00732, Broad_HG00733, Broad_NA20870, Broad_NA20872, Broad_NA20874, Broad_NA20875, Broad_NA20876, Broad_NA20881, Broad_NA20888',
             'The following entries are missing recommended "age_at_enrollment" in the "participant" table: Broad_HG00731, Broad_NA20870, Broad_NA20872, Broad_NA20875, Broad_NA20876, Broad_NA20881, Broad_NA20888',
-        ] + skipped_file_validation_warnings[1:5] + skipped_file_validation_warnings[7:])
+        ] + skipped_file_validation_warnings[1:5] + skipped_file_validation_warnings[7:8] + skipped_file_validation_warnings[9:])
         self.assertListEqual(response.json()['errors'], [
             'The following entries are missing required "proband_relationship" in the "participant" table: Broad_HG00731, Broad_HG00732, Broad_HG00733, Broad_NA19678, Broad_NA19679, Broad_NA20870, Broad_NA20872, Broad_NA20874, Broad_NA20875, Broad_NA20876, Broad_NA20881, Broad_NA20888',
             'The following entries have invalid values for "reported_race" in the "participant" table. Allowed values: Asian, White, Black. Invalid values: Broad_NA19675_1 (Middle Eastern or North African)',
+            'The following entries have invalid values for "age_at_enrollment" in the "participant" table. Allowed values have data type date. Invalid values: Broad_NA19675_1 (18)',
+            'The following entries have invalid values for "aligned_dna_short_read_index_file" (from Airtable) in the "aligned_dna_short_read" table. Allowed values are a google bucket path starting with gs://. Invalid values: VCGS_FAM203_621_D2 (NA)',
+            'The following entries have invalid values for "reference_assembly" (from Airtable) in the "aligned_dna_short_read" table. Allowed values have data type integer. Invalid values: NA20888 (GRCh38), VCGS_FAM203_621_D2 (GRCh38)',
             'The following entries are missing required "mean_coverage" (from Airtable) in the "aligned_dna_short_read" table: VCGS_FAM203_621_D2',
+            'The following entries have non-unique values for "alignment_software" (from Airtable) in the "aligned_dna_short_read" table: BWA-MEM-2.3 (NA20888, VCGS_FAM203_621_D2)',
+            'The following entries have invalid values for "date_data_generation" (from Airtable) in the "experiment_rna_short_read" table. Allowed values have data type float. Invalid values: NA19679 (2023-02-11)',
         ])
 
         responses.add(responses.GET, MOCK_DATA_MODEL_URL, status=404)
@@ -976,20 +1010,19 @@ def _assert_expected_gregor_files(self, mock_open, has_second_project=False):
         self.assertIn([
             'Broad_exome_VCGS_FAM203_621_D2_1', 'Broad_exome_VCGS_FAM203_621_D2',
             'gs://fc-eb352699-d849-483f-aefe-9d35ce2b21ac/Broad_COL_FAM1_1_D1.cram',
-            'gs://fc-eb352699-d849-483f-aefe-9d35ce2b21ac/Broad_COL_FAM1_1_D1.crai', '129c28163df082', 'GRCh38',
-            '', '', '', 'BWA-MEM-2.3', 'DOI:10.5281/zenodo.4469317', '',
+            'NA', '129c28163df082', 'GRCh38', '', '', '', 'BWA-MEM-2.3', 'DOI:10.5281/zenodo.4469317', '',
         ], read_file)
         self.assertIn([
             'Broad_exome_NA20888_1', 'Broad_exome_NA20888',
             'gs://fc-eb352699-d849-483f-aefe-9d35ce2b21ac/Broad_NA20888.cram',
             'gs://fc-eb352699-d849-483f-aefe-9d35ce2b21ac/Broad_NA20888.crai', 'a6f6308866765ce8', 'GRCh38', '', '',
-            '42.8', 'BWA 0.7.15.r1140', '', '',
+            '42.8', 'BWA-MEM-2.3', '', '',
         ], read_file)
         self.assertEqual([
              'Broad_genome_NA20888_1_1', 'Broad_genome_NA20888_1',
              'gs://fc-eb352699-d849-483f-aefe-9d35ce2b21ac/Broad_NA20888_1.cram',
              'gs://fc-eb352699-d849-483f-aefe-9d35ce2b21ac/Broad_NA20888_1.crai', '2aa33e8c32020b1c', 'GRCh38', '', '',
-             '36.1', 'BWA 0.7.15.r1140', '', '',
+             '36.1', 'BWA-MEM-2.3', '', '',
         ] in read_file, has_second_project)
 
         self.assertEqual(len(read_set_file), num_airtable_rows)
@@ -998,7 +1031,7 @@ def _assert_expected_gregor_files(self, mock_open, has_second_project=False):
         self.assertIn(['Broad_NA20888_D1', 'Broad_exome_NA20888_1'], read_set_file)
         self.assertEqual(['Broad_NA20888_D1', 'Broad_genome_NA20888_1_1'] in read_set_file, has_second_project)
 
-        self.assertEqual(len(called_file), num_airtable_rows)
+        self.assertEqual(len(called_file), 2)
         self.assertEqual(called_file[0], [
             'called_variants_dna_short_read_id', 'aligned_dna_short_read_set_id', 'called_variants_dna_file', 'md5sum',
             'caller_software', 'variant_types', 'analysis_details',
@@ -1007,9 +1040,6 @@ def _assert_expected_gregor_files(self, mock_open, has_second_project=False):
             'SX2-3', 'BCM_H7YG5DSX2', 'gs://fc-fed09429-e563-44a7-aaeb-776c8336ba02/COL_FAM1_1_D1.SV.vcf',
             '129c28163df082', 'gatk4.1.2', 'SNV', 'DOI:10.5281/zenodo.4469317',
         ], called_file)
-        self.assertIn(['NA', 'Broad_NA20888_D1', 'NA', 'a6f6308866765ce8', 'NA', 'SNV', ''], called_file)
-        self.assertEqual(
-            ['NA', 'Broad_NA20888_D1', 'NA', '2aa33e8c32020b1c', 'NA', 'SNV', ''] in called_file, has_second_project)
 
         self.assertEqual(len(experiment_rna_file), 2)
         self.assertEqual(experiment_rna_file[0], [
diff --git a/ui/shared/components/panel/variants/Predictions.jsx b/ui/shared/components/panel/variants/Predictions.jsx
index c4dcd7e5d9..7899621545 100644
--- a/ui/shared/components/panel/variants/Predictions.jsx
+++ b/ui/shared/components/panel/variants/Predictions.jsx
@@ -8,7 +8,7 @@ import { getGenesById } from 'redux/selectors'
 import { PREDICTOR_FIELDS, getVariantMainGeneId } from 'shared/utils/constants'
 import { snakecaseToTitlecase } from 'shared/utils/stringUtils'
 import { HorizontalSpacer } from '../../Spacers'
-import { ButtonLink } from '../../StyledComponents'
+import { ButtonLink, ColoredIcon } from '../../StyledComponents'
 
 const PredictionValue = styled.span`
   margin-left: 5px;
@@ -19,8 +19,10 @@ const PredictionValue = styled.span`
 
 const NUM_TO_SHOW_ABOVE_THE_FOLD = 6 // how many predictors to show immediately
 
+const PRED_COLOR_MAP = ['green', 'olive', 'grey', 'yellow', 'red', '#8b0000']
+
 const predictionFieldValue = (
-  predictions, { field, dangerThreshold, warningThreshold, indicatorMap, infoField, infoTitle },
+  predictions, { field, thresholds, indicatorMap, infoField, infoTitle },
 ) => {
   let value = predictions[field]
   if (value === null || value === undefined) {
@@ -29,22 +31,23 @@ const predictionFieldValue = (
 
   const infoValue = predictions[infoField]
 
-  if (dangerThreshold) {
-    value = parseFloat(value).toPrecision(2)
-    let color = 'green'
-    if (value >= dangerThreshold) {
-      color = 'red'
-    } else if (value >= warningThreshold) {
-      color = 'yellow'
-    }
-    return { value, color, infoValue, infoTitle, dangerThreshold, warningThreshold }
+  if (thresholds) {
+    value = parseFloat(value).toPrecision(3)
+    const color = PRED_COLOR_MAP.find(
+      (clr, i) => (thresholds[i - 1] || thresholds[i]) &&
+        (thresholds[i - 1] === undefined || value >= thresholds[i - 1]) &&
+        (thresholds[i] === undefined || value < thresholds[i]),
+    )
+    return { value, color, infoValue, infoTitle, thresholds }
   }
 
   return indicatorMap[value[0]] || indicatorMap[value]
 }
 
+const coloredIcon = color => React.createElement(color.startsWith('#') ? ColoredIcon : Icon, { name: 'circle', size: 'small', color })
+
 const Prediction = (
-  { field, fieldTitle, value, color, infoValue, infoTitle, warningThreshold, dangerThreshold, href },
+  { field, fieldTitle, value, color, infoValue, infoTitle, thresholds, href },
 ) => {
   const indicator = infoValue ? (
     <Popup
@@ -52,16 +55,23 @@ const Prediction = (
       content={infoValue}
       trigger={<Icon name="question circle" size="small" color={color} />}
     />
-  ) : <Icon name="circle" size="small" color={color} />
+  ) : coloredIcon(color)
   const fieldName = fieldTitle || snakecaseToTitlecase(field)
-  const fieldDisplay = dangerThreshold ? (
+  const fieldDisplay = thresholds ? (
     <Popup
       header={`${fieldName} Color Ranges`}
       content={
-        <div>
-          <div>{`Red > ${dangerThreshold}`}</div>
-          {warningThreshold < dangerThreshold && <div>{`Yellow > ${warningThreshold}`}</div>}
-        </div>
+        PRED_COLOR_MAP.map((c, i) => {
+          if (thresholds[i] === undefined && thresholds[i - 1] === undefined) {
+            return null
+          }
+          return (
+            <div key={c}>
+              {coloredIcon(c)}
+              {thresholds[i] === undefined ? ` >= ${thresholds[i - 1]}` : ` < ${thresholds[i]}`}
+            </div>
+          )
+        })
       }
       trigger={<span>{fieldName}</span>}
     />
@@ -85,8 +95,7 @@ Prediction.propTypes = {
   infoTitle: PropTypes.string,
   fieldTitle: PropTypes.string,
   color: PropTypes.string,
-  warningThreshold: PropTypes.number,
-  dangerThreshold: PropTypes.number,
+  thresholds: PropTypes.arrayOf(PropTypes.number),
   href: PropTypes.string,
 }
 
@@ -116,8 +125,8 @@ class Predictions extends React.PureComponent {
     if (gene && gene.primateAi) {
       genePredictors.primate_ai = {
         field: 'primate_ai',
-        warningThreshold: gene.primateAi.percentile25,
-        dangerThreshold: gene.primateAi.percentile75,
+        thresholds: [undefined, undefined, gene.primateAi.percentile25.toPrecision(3),
+          gene.primateAi.percentile75.toPrecision(3), undefined],
       }
     }
 
diff --git a/ui/shared/utils/constants.js b/ui/shared/utils/constants.js
index 709b72b471..9334446cab 100644
--- a/ui/shared/utils/constants.js
+++ b/ui/shared/utils/constants.js
@@ -1317,15 +1317,14 @@ export const NO_SV_IN_SILICO_GROUPS = [MISSENSE_IN_SILICO_GROUP, CODING_IN_SILIC
 export const SPLICE_AI_FIELD = 'splice_ai'
 
 export const PREDICTOR_FIELDS = [
-  { field: 'cadd', group: CODING_IN_SILICO_GROUP, warningThreshold: 10, dangerThreshold: 20, min: 1, max: 99 },
-  { field: 'revel', group: MISSENSE_IN_SILICO_GROUP, warningThreshold: 0.5, dangerThreshold: 0.75 },
-  { field: 'primate_ai', group: MISSENSE_IN_SILICO_GROUP, warningThreshold: 0.5, dangerThreshold: 0.7 },
-  { field: 'mpc', group: MISSENSE_IN_SILICO_GROUP, warningThreshold: 1, dangerThreshold: 2, max: 5 },
+  { field: 'cadd', group: CODING_IN_SILICO_GROUP, thresholds: [0.151, 22.8, 25.3, 28.1, undefined], min: 1, max: 99 },
+  { field: 'revel', group: MISSENSE_IN_SILICO_GROUP, thresholds: [0.0161, 0.291, 0.644, 0.773, 0.932] },
+  { field: 'primate_ai', group: MISSENSE_IN_SILICO_GROUP, thresholds: [undefined, 0.484, 0.79, 0.867, undefined] },
+  { field: 'mpc', group: MISSENSE_IN_SILICO_GROUP, thresholds: [undefined, undefined, 1.36, 1.828, undefined], max: 5 },
   {
     field: SPLICE_AI_FIELD,
     group: SPLICING_IN_SILICO_GROUP,
-    warningThreshold: 0.5,
-    dangerThreshold: 0.8,
+    thresholds: [undefined, undefined, 0.5, 0.8, undefined],
     infoField: 'splice_ai_consequence',
     infoTitle: 'Predicted Consequence',
     fieldTitle: 'SpliceAI',
@@ -1333,20 +1332,25 @@ export const PREDICTOR_FIELDS = [
       `https://spliceailookup.broadinstitute.org/#variant=${chrom}-${pos}-${ref}-${alt}&hg=${genomeVersion}&distance=1000&mask=1`
     ),
   },
-  { field: 'eigen', group: CODING_IN_SILICO_GROUP, warningThreshold: 1, dangerThreshold: 2, max: 99 },
-  { field: 'dann', displayOnly: true, warningThreshold: 0.93, dangerThreshold: 0.96 },
-  { field: 'strvctvre', group: SV_IN_SILICO_GROUP, warningThreshold: 0.5, dangerThreshold: 0.75 },
+  { field: 'eigen', group: CODING_IN_SILICO_GROUP, thresholds: [undefined, undefined, 1, 2, undefined], max: 99 },
+  { field: 'dann', displayOnly: true, thresholds: [undefined, undefined, 0.93, 0.96, undefined] },
+  { field: 'strvctvre', group: SV_IN_SILICO_GROUP, thresholds: [undefined, undefined, 0.5, 0.75, undefined] },
   { field: 'polyphen', group: MISSENSE_IN_SILICO_GROUP, indicatorMap: POLYPHEN_MAP },
   { field: 'sift', group: MISSENSE_IN_SILICO_GROUP, indicatorMap: INDICATOR_MAP },
   { field: 'mut_taster', group: MISSENSE_IN_SILICO_GROUP, indicatorMap: MUTTASTER_MAP },
   { field: 'fathmm', group: MISSENSE_IN_SILICO_GROUP, indicatorMap: FATHMM_MAP },
-  { field: 'vest', warningThreshold: 0.5, dangerThreshold: 0.764 },
-  { field: 'mut_pred', warningThreshold: 0.392, dangerThreshold: 0.737 },
-  { field: 'apogee', warningThreshold: 0.5, dangerThreshold: 0.5 },
-  { field: 'gnomad_noncoding', fieldTitle: 'gnomAD Constraint', displayOnly: true, warningThreshold: 2.18, dangerThreshold: 4 },
+  { field: 'vest', thresholds: [undefined, 0.45, 0.764, 0.861, 0.965] },
+  { field: 'mut_pred', thresholds: [0.0101, 0.392, 0.737, 0.829, 0.932] },
+  { field: 'apogee', thresholds: [undefined, undefined, 0.5, 0.5, undefined] },
+  {
+    field: 'gnomad_noncoding',
+    fieldTitle: 'gnomAD Constraint',
+    displayOnly: true,
+    thresholds: [undefined, undefined, 2.18, 4, undefined],
+  },
   { field: 'haplogroup_defining', indicatorMap: { Y: { color: 'green', value: '' } } },
   { field: 'mitotip', indicatorMap: MITOTIP_MAP },
-  { field: 'hmtvar', warningThreshold: 0.35, dangerThreshold: 0.35 },
+  { field: 'hmtvar', thresholds: [undefined, undefined, 0.35, 0.35, undefined] },
 ]
 
 export const getVariantMainGeneId = ({ transcripts = {}, mainTranscriptId, selectedMainTranscriptId }) => {