Merge pull request #24 from eipm/develop

v1.2.0
eipm · Nov 21, 2024 · f95a5cb · f95a5cb
2 parents f259a97 + c3f5270
commit f95a5cb
Show file tree

Hide file tree

Showing 22 changed files with 996 additions and 447 deletions.
diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@
 
 Docs for the Bridge2AI Voice Project.
 
-[![Github](https://img.shields.io/badge/github-1.1.0-green?style=flat&logo=github)](https://github.com/eipm/bridge2ai-docs) [![Python 3.12.0](https://img.shields.io/badge/python-3.12.0-blue.svg)](https://www.python.org/downloads/release/python-3120/) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)  [![DOI](https://zenodo.org/badge/860006845.svg)](https://zenodo.org/doi/10.5281/zenodo.13834653)
+[![Github](https://img.shields.io/badge/github-1.2.0-green?style=flat&logo=github)](https://github.com/eipm/bridge2ai-docs) [![Python 3.12.0](https://img.shields.io/badge/python-3.12.0-blue.svg)](https://www.python.org/downloadxs/release/python-3120/) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)  [![DOI](https://zenodo.org/badge/860006845.svg)](https://zenodo.org/doi/10.5281/zenodo.13834653)
 
 
 ## 🤝 License

diff --git a/css/dashboard.css b/css/dashboard.css
@@ -55,7 +55,7 @@ footer {
     font-size: smaller;
 }
 
-@media screen and (max-width: 1278px) {
+@media screen and (max-width: 1400px) {
     .stTabs [data-baseweb="tab-list"] {
         padding-bottom: 25px;
     }

diff --git a/images/ai-readiness-figure-1.png b/images/ai-readiness-figure-1.png
diff --git a/requirements.txt b/requirements.txt
@@ -1,42 +1,42 @@
 altair==5.4.1
 attrs==24.2.0
-blinker==1.8.2
+blinker==1.9.0
 cachetools==5.5.0
 certifi==2024.8.30
-charset-normalizer==3.3.2
+charset-normalizer==3.4.0
 click==8.1.7
 gitdb==4.0.11
 GitPython==3.1.43
 idna==3.10
 Jinja2==3.1.4
 jsonschema==4.23.0
-jsonschema-specifications==2023.12.1
+jsonschema-specifications==2024.10.1
 markdown-it-py==3.0.0
-MarkupSafe==2.1.5
+MarkupSafe==3.0.2
 mdurl==0.1.2
-narwhals==1.8.1
-numpy==2.1.1
-packaging==24.1
-pandas==2.2.2
-pillow==10.4.0
+narwhals==1.14.1
+numpy==2.1.3
+packaging==24.2
+pandas==2.2.3
+pillow==11.0.0
 plotly==5.24.1
-protobuf==5.28.2
-pyarrow==17.0.0
+protobuf==5.28.3
+pyarrow==18.0.0
 pydeck==0.9.1
 Pygments==2.18.0
 python-dateutil==2.9.0.post0
 pytz==2024.2
 referencing==0.35.1
 requests==2.32.3
-rich==13.8.1
-rpds-py==0.20.0
+rich==13.9.4
+rpds-py==0.21.0
 six==1.16.0
 smmap==5.0.1
-streamlit==1.38.0
-tenacity==8.5.0
+streamlit==1.40.1
+tenacity==9.0.0
 toml==0.10.2
 tornado==6.4.1
 typing_extensions==4.12.2
-tzdata==2024.1
+tzdata==2024.2
 urllib3==2.2.3
-watchdog==4.0.2
+watchdog==6.0.0
diff --git a/src/bids-like_structure_preview.html b/src/bids-like_structure_preview.html
diff --git a/src/dashboard.py b/src/dashboard.py
@@ -3,14 +3,14 @@
 
 import streamlit as st
 
-from tabs.about import about_page
-from tabs.healthsheet import healthsheet_page
+from tabs.overview import overview_page
+from tabs.collection_methods import collection_methods_page
+from tabs.data_governance import data_governance_page
 from tabs.study_dashboard import study_dashboard_page
 from tabs.study_metadata import study_metadata_page
-from tabs.dataset_metadata import dataset_metadata_page
-from tabs.dataset_structure_preview import dataset_structure_preview_page
-from tabs.dataset_quality_dashboard import dataset_quality_dashboard_page
-from tabs.dataset_uses import dataset_uses_page
+from tabs.healthsheet import healthsheet_page
+from tabs.data_pre_processing import data_pre_processing_page
+from tabs.ai_readiness import ai_readiness_page
 
 def config_page(version):         
     st.set_page_config(
@@ -43,28 +43,28 @@ def create_tabs(tabs_func):
 
 def main():  
     # Current version of the app
-    version = "1.1.0"
+    version = "1.2.0"
     # Map tab names to functions
     # In this dictionary, the key is the tab name and the value is the function that will be called when the tab is selected
     # The function is defined in the respective file
-    # about_page() is defined in tabs/about.py
-    # healthsheet_page() is defined in tabs/healthsheet.py
+    # overview_page() is defined in tabs/overview.py
+    # collections_methods_page() is defined in tabs/collections_methods.py
+    # data_governance_page() is defined in tabs/data_governance.py
     # study_dashboard_page() is defined in tabs/study_dashboard.py
     # study_metadata_page() is defined in tabs/study_metadata.py
-    # dataset_metadata_page() is defined in tabs/dataset_metadata.py
-    # dataset_structure_preview_page() is defined in tabs/dataset_structure_preview.py
-    # dataset_quality_dashboard_page() is defined in tabs/dataset_quality_dashboard.py
-    # dataset_uses_page() is defined in tabs/dataset_uses.py
+    # healthsheet_page() is defined in tabs/healthsheet.py
+    # data_pre_processing_page() is defined in tabs/data_pre_processing.py
+    # ai_readiness_page() is defined in tabs/ai_readiness.py
 
     tab_functions = {
-        "About": about_page,
-        "Healthsheet": healthsheet_page,
+        "Overview": overview_page,
+        "Collection Methods": collection_methods_page,
+        "Data Governance": data_governance_page,
         "Study Dashboard": study_dashboard_page,
         "Study Metadata": study_metadata_page,
-        "Dataset Metadata": dataset_metadata_page,
-        "Dataset Structure Preview": dataset_structure_preview_page,
-        "Dataset Quality Dashboard": dataset_quality_dashboard_page,
-        "Dataset Uses": dataset_uses_page
+        "Healthsheet": healthsheet_page,
+        "Data Pre-Processing": data_pre_processing_page,
+        "AI-Readiness": ai_readiness_page,
     }
 
     # Set page configuration

diff --git a/src/tabs/about.py b/src/tabs/about.py
diff --git a/src/tabs/ai_readiness.py b/src/tabs/ai_readiness.py
@@ -0,0 +1,176 @@
+import streamlit as st
+from tabs.utils import create_html_table
+
+import pandas as pd
+import numpy as np
+
+def ai_readiness_page(tab_name):
+    st.markdown(
+        """
+        For detailed descriptions of each criteria, please read: [https://www.biorxiv.org/content/10.1101/2024.10.23.619844v2](https://www.biorxiv.org/content/10.1101/2024.10.23.619844v2)
+        """
+    )
+    # Add Figure
+    image_path = "images/ai-readiness-figure-1.png"
+    lt, cent, rt = st.columns([1,3,1], gap="small")
+    with cent:
+        st.image(image_path, use_container_width=True)
+
+    # Add Table
+    st.markdown(
+        """        
+        <style>
+        table {
+            width: 100%;
+            border-collapse: collapse;
+        }
+        th, td {
+            border: 1px solid !important;
+            padding: 4px;
+            text-align: left;
+        }
+        
+        table .center-align {
+            text-align: center;
+        }
+  
+        </style>
+        <table>
+            <caption style="caption-side: top; text-align: left; font-weight: bold; font-size: 1.2em;">
+                Table 4 - Precision Public Health (Voice) - Current Rating
+            </caption>
+            <tr>
+                <th colspan="2">Criterion</th>
+                <th>Criterion met? (Y=1; N=0)</th>
+                <th>Total Score for Criterion (%)</th>
+            </tr>
+            <tr>
+                <td rowspan="4">FAIRness (0)</td>
+                <td>Findable (0.a)</td>
+                <td class="center-align">1</td>
+                <td rowspan="4" class="center-align">100</td>
+            </tr>
+            <tr>
+                <td>Accessible (0.b)</td>
+                <td class="center-align">1</td>
+            </tr>
+            <tr>
+                <td>Interoperable (0.c)</td>
+                <td class="center-align">1</td>
+            </tr>
+            <tr>
+                <td>Reusable (0.d)</td>
+                <td class="center-align">1</td>
+            </tr>
+            <tr>
+                <td rowspan="4">Provenance (1)</td>
+                <td>Transparent (1.a)</td>
+                <td class="center-align">1</td>
+                <td rowspan="4" class="center-align">100</td>
+            </tr>
+            <tr>
+                <td>Traceable (1.b)</td>
+                <td class="center-align">1</td>
+            </tr>
+            <tr>
+                <td>Interpretable (1.c)</td>
+                <td class="center-align">1</td>
+            </tr>
+            <tr>
+                <td>Key actors identified (1.d)</td>
+                <td class="center-align">1</td>
+            </tr>
+            <tr>
+                <td rowspan="5">Characterization (2)</td>
+                <td>Semantics (2.a)</td>
+                <td class="center-align">1</td>
+                <td rowspan="5" class="center-align">80</td>
+            </tr>
+            <tr>
+                <td>Statistics (2.b)</td>
+                <td class="center-align">1</td>
+            </tr>
+            <tr>
+                <td>Standards (2.c)</td>
+                <td class="center-align">1</td>
+            </tr>
+            <tr>
+                <td>Potential Sources of Bias (2.d)</td>
+                <td class="center-align">1</td>
+            </tr>
+            <tr>
+                <td>Data Quality (2.e)</td>
+                <td class="center-align">0</td>
+            </tr>
+            <tr>
+                <td rowspan="3">Pre-model explainability (3)</td>
+                <td>Data documentation templates (3.a)</td>
+                <td class="center-align">1</td>
+                <td rowspan="3" class="center-align">100</td>
+            </tr>
+            <tr>
+                <td>Fit for purpose (3.c)</td>
+                <td class="center-align">1</td>
+            </tr>
+            <tr>
+                <td>Verifiable (3.d)</td>
+                <td class="center-align">1</td>
+            </tr>
+            <tr>
+                <td rowspan="4">Ethics (4)</td>
+                <td>Ethically acquired (4.a)</td>
+                <td class="center-align">1</td>
+                <td rowspan="4" class="center-align">100</td>
+            </tr>
+            <tr>
+                <td>Ethically managed (4.b)</td>
+                <td class="center-align">1</td>
+            </tr>
+            <tr>
+                <td>Ethically disseminated (4.c)</td>
+                <td class="center-align">1</td>
+            </tr>
+            <tr>
+                <td>Secure (4.d)</td>
+                <td class="center-align">1</td>
+            </tr>
+            <tr>
+                <td rowspan="4">Sustainability (5)</td>
+                <td>Persistent (5.a)</td>
+                <td class="center-align">1</td>
+                <td rowspan="4" class="center-align">50</td>
+            </tr>
+            <tr>
+                <td>Domain-appropriate (5.b)</td>
+                <td class="center-align">0</td>
+            </tr>
+            <tr>
+                <td>Well-governed (5.c)</td>
+                <td class="center-align">1</td>
+            </tr>
+            <tr>
+                <td>Associated (5.d)</td>
+                <td class="center-align">0</td>
+            </tr>
+            <tr>
+                <td rowspan="4">Computability (6)</td>
+                <td>Standardized (6.a)</td>
+                <td class="center-align">1</td>
+                <td rowspan="4" class="center-align">75</td>
+            </tr>
+            <tr>
+                <td>Computational Accessibility (6.b)</td>
+                <td class="center-align">1</td>
+            </tr>
+            <tr>
+                <td>Portable (6.c)</td>
+                <td class="center-align">1</td>
+            </tr>
+            <tr>
+                <td>Contextualized (6.d)</td>
+                <td class="center-align">0</td>
+            </tr>
+        </table>
+        """,
+        unsafe_allow_html=True
+    )
diff --git a/src/tabs/collection_methods.py b/src/tabs/collection_methods.py
@@ -0,0 +1,52 @@
+import streamlit as st
+from tabs.utils import create_html_table
+
+def collection_methods_page(tab_name):
+
+    st.markdown(
+
+        """
+        Data is collected across five disease categories. Initial data release contains data collected from four of five categories (pediatric data to be incorporated in subsequent dataset releases.
+
+        Participants are recruited across different academic institutions from “high volume expert clinics” based on diagnosis and inclusion/exclusion criteria outlined below **(Table 1)**.
+
+        **High Volume Expert Clinics:** Outpatient clinics within hospital systems or academic institutions that have developed an expertise in a specific disease area and see more than 50 patients per month from the same disease category. Ex: Asthma/COPD pulmonary specialty clinic.
+
+        Data is collected in the clinic with the assistance of a trained researched assistant. Future data collection will also occur remotely, however remote data collection did not occur with initial dataset being released. Voice samples are collected prospectively using a custom software application (Bridge2AI-Voice app) with the Bridge2AI-Voice protocols.
+
+        **Clinical validation:** Clinical validation is performed by qualified physician or practitioner based on established gold standards for diagnosis **(Table 1)**.
+
+        **Acoustic Tasks:** Voice, breathing, cough, and speech data are recorded with the app. A total of 22 acoustic Tasks are recorded through the app **(Table 2)**.
+
+        **Demographic surveys and confounders:** Detailed demographic data and surveys about confounding factors such as smoking and drinking history is collected through the smartphone application. 
+
+        **Validated Questionnaires:** The Bridge2AI-Voice protocols contain validated tools and questionnaires for each disease category within the app for data collection **(Table 3)**.
+
+        **Other Multimodal Data:** The rest of the multimodal data including imaging, genomic data (for the neuro cohort), laryngoscopy imaging and other EHR data is extracted from different sites independently and will be uploaded through the REDCAP database. Please note that no external data is released in this v1.0.0.
+
+        Please see publication for protocol development description: 
+        >Bensoussan, Yael, et al. "Developing Multi-Disorder Voice Protocols: A team science approach involving clinical expertise, bioethics, standards, and DEI." Proc. Interspeech 2024. 2024. [https://www.isca-archive.org/interspeech_2024/bensoussan24_interspeech.html](https://www.isca-archive.org/interspeech_2024/bensoussan24_interspeech.html).
+
+        The supporting REDCap Data Dictionary, Metadata and Instrument PDF’s are available at [https://github.com/eipm/bridge2ai-redcap](https://github.com/eipm/bridge2ai-redcap) .
+
+        When using the REDCap Data Dictionary and Metadata please cite:
+        
+        >Bensoussan, Y., Ghosh, S. S., Rameau, A., Boyer, M., Bahr, R., Watts, S., Rudzicz, F., Bolser, D., Lerner-Ellis, J., Awan, S., Powell, M. E., Belisle-Pipon, J.-C., Ravitsky, V., Johnson, A., Zisimopoulos, P., Tang, J., Sigaras, A., Elemento, O., Dorr, D., … Bridge2AI-Voice. (2024). eipm/bridge2ai-redcap. Zenodo. [https://zenodo.org/doi/10.5281/zenodo.12760724](https://zenodo.org/doi/10.5281/zenodo.12760724).
+        
+
+        Protocols can be found in the Bridge2AI-Voice documentation for v1.0.0 of the dataset at [https://kind-lab.github.io/vbai-fhir/protocol.html](https://kind-lab.github.io/vbai-fhir/protocol.html).
+
+        """
+    )
+
+    csv_file_path = "tables/Disease_cohort_inclusion_exclusion_criteria.csv"
+    caption = 'Table 1 - Disease cohort inclusion/exclusion criteria and validation methods'
+    create_html_table(csv_file_path, caption, [], 0)
+
+    csv_file_path = "tables/Acoustic_Tasks_Protocol.csv"
+    caption = 'Table 2 - Acoustic Tasks in Protocol'
+    create_html_table(csv_file_path, caption)
+
+    csv_file_path = "tables/Validated_Questionnaires.csv"
+    caption = 'Table 3 - Validated Questionnaires integrated into App'
+    create_html_table(csv_file_path, caption, ['X'])