diff --git a/api/.buildinfo b/api/.buildinfo
index b5cf5bebb..969bee2be 100644
--- a/api/.buildinfo
+++ b/api/.buildinfo
@@ -1,4 +1,4 @@
# Sphinx build info version 1
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
-config: 0555596bae49f971011984d58e3c9fcc
+config: 67b59008c58998f711ccce32ad07bc46
tags: 645f666f9bcd5a90fca523b33c5a78b7
diff --git a/api/_modules/cyclops/data/aggregate.html b/api/_modules/cyclops/data/aggregate.html
index 3bfe49b62..e0be3dbb1 100644
--- a/api/_modules/cyclops/data/aggregate.html
+++ b/api/_modules/cyclops/data/aggregate.html
@@ -60,7 +60,6 @@
-
@@ -191,24 +190,17 @@
User Guide Toggle navigation of User Guide
Installation
-Evaluation Toggle navigation of Evaluation
-Breast Cancer Classification and Evaluation
+Evaluation Toggle navigation of Evaluation
-Model Report Toggle navigation of Model Report
Tutorials Toggle navigation of Tutorials
-Example use cases Toggle navigation of Example use cases
diff --git a/api/_modules/cyclops/data/features/medical_image.html b/api/_modules/cyclops/data/features/medical_image.html
index 3ed3d29a8..7a2fdabe2 100644
--- a/api/_modules/cyclops/data/features/medical_image.html
+++ b/api/_modules/cyclops/data/features/medical_image.html
@@ -60,7 +60,6 @@
-
@@ -191,24 +190,17 @@
User Guide Toggle navigation of User Guide
Installation
-Evaluation Toggle navigation of Evaluation
-Breast Cancer Classification and Evaluation
+Evaluation Toggle navigation of Evaluation
-Model Report Toggle navigation of Model Report
Tutorials Toggle navigation of Tutorials
-Example use cases Toggle navigation of Example use cases
diff --git a/api/_modules/cyclops/data/slicer.html b/api/_modules/cyclops/data/slicer.html
index 0e43c6edc..5a2bd5d34 100644
--- a/api/_modules/cyclops/data/slicer.html
+++ b/api/_modules/cyclops/data/slicer.html
@@ -60,7 +60,6 @@
-
@@ -191,24 +190,17 @@
User Guide Toggle navigation of User Guide
Installation
-Evaluation Toggle navigation of Evaluation
-Breast Cancer Classification and Evaluation
+Evaluation Toggle navigation of Evaluation
-Model Report Toggle navigation of Model Report
Tutorials Toggle navigation of Tutorials
-Example use cases Toggle navigation of Example use cases
diff --git a/api/_modules/cyclops/evaluate/evaluator.html b/api/_modules/cyclops/evaluate/evaluator.html
index 97f92317e..9c8f221ec 100644
--- a/api/_modules/cyclops/evaluate/evaluator.html
+++ b/api/_modules/cyclops/evaluate/evaluator.html
@@ -60,7 +60,6 @@
-
@@ -191,24 +190,17 @@
User Guide Toggle navigation of User Guide
Installation
-Evaluation Toggle navigation of Evaluation
-Breast Cancer Classification and Evaluation
+Evaluation Toggle navigation of Evaluation
-Model Report Toggle navigation of Model Report
Tutorials Toggle navigation of Tutorials
-Example use cases Toggle navigation of Example use cases
diff --git a/api/_modules/cyclops/evaluate/fairness/evaluator.html b/api/_modules/cyclops/evaluate/fairness/evaluator.html
index ce776842f..5f63d6983 100644
--- a/api/_modules/cyclops/evaluate/fairness/evaluator.html
+++ b/api/_modules/cyclops/evaluate/fairness/evaluator.html
@@ -60,7 +60,6 @@
-
@@ -191,24 +190,17 @@
User Guide Toggle navigation of User Guide
Installation
-Evaluation Toggle navigation of Evaluation
-Breast Cancer Classification and Evaluation
+Evaluation Toggle navigation of Evaluation
-Model Report Toggle navigation of Model Report
Tutorials Toggle navigation of Tutorials
-Example use cases Toggle navigation of Example use cases
diff --git a/api/_modules/cyclops/evaluate/metrics/accuracy.html b/api/_modules/cyclops/evaluate/metrics/accuracy.html
index 205e2d7be..20ca41840 100644
--- a/api/_modules/cyclops/evaluate/metrics/accuracy.html
+++ b/api/_modules/cyclops/evaluate/metrics/accuracy.html
@@ -60,7 +60,6 @@
-
@@ -191,24 +190,17 @@
User Guide Toggle navigation of User Guide
Installation
-Evaluation Toggle navigation of Evaluation
-Breast Cancer Classification and Evaluation
+Evaluation Toggle navigation of Evaluation
-Model Report Toggle navigation of Model Report
Tutorials Toggle navigation of Tutorials
-Example use cases Toggle navigation of Example use cases
diff --git a/api/_modules/cyclops/evaluate/metrics/auroc.html b/api/_modules/cyclops/evaluate/metrics/auroc.html
index 1f19f0f8e..e2dd3572b 100644
--- a/api/_modules/cyclops/evaluate/metrics/auroc.html
+++ b/api/_modules/cyclops/evaluate/metrics/auroc.html
@@ -60,7 +60,6 @@
-
@@ -191,24 +190,17 @@
User Guide Toggle navigation of User Guide
Installation
-Evaluation Toggle navigation of Evaluation
-Breast Cancer Classification and Evaluation
+Evaluation Toggle navigation of Evaluation
-Model Report Toggle navigation of Model Report
Tutorials Toggle navigation of Tutorials
-Example use cases Toggle navigation of Example use cases
diff --git a/api/_modules/cyclops/evaluate/metrics/average_precision.html b/api/_modules/cyclops/evaluate/metrics/average_precision.html
index e112f5163..79f09977b 100644
--- a/api/_modules/cyclops/evaluate/metrics/average_precision.html
+++ b/api/_modules/cyclops/evaluate/metrics/average_precision.html
@@ -60,7 +60,6 @@
-
@@ -191,24 +190,17 @@
User Guide Toggle navigation of User Guide
Installation
-Evaluation Toggle navigation of Evaluation
-Breast Cancer Classification and Evaluation
+Evaluation Toggle navigation of Evaluation
-Model Report Toggle navigation of Model Report
Tutorials Toggle navigation of Tutorials
-Example use cases Toggle navigation of Example use cases
diff --git a/api/_modules/cyclops/evaluate/metrics/f_beta.html b/api/_modules/cyclops/evaluate/metrics/f_beta.html
index 9b34230ef..dc4f4a39c 100644
--- a/api/_modules/cyclops/evaluate/metrics/f_beta.html
+++ b/api/_modules/cyclops/evaluate/metrics/f_beta.html
@@ -60,7 +60,6 @@
-
@@ -191,24 +190,17 @@
User Guide Toggle navigation of User Guide
Installation
-Evaluation Toggle navigation of Evaluation
-Breast Cancer Classification and Evaluation
+Evaluation Toggle navigation of Evaluation
-Model Report Toggle navigation of Model Report
Tutorials Toggle navigation of Tutorials
-Example use cases Toggle navigation of Example use cases
diff --git a/api/_modules/cyclops/evaluate/metrics/functional/accuracy.html b/api/_modules/cyclops/evaluate/metrics/functional/accuracy.html
index 6d911d4fd..2776c015b 100644
--- a/api/_modules/cyclops/evaluate/metrics/functional/accuracy.html
+++ b/api/_modules/cyclops/evaluate/metrics/functional/accuracy.html
@@ -60,7 +60,6 @@
-
@@ -191,24 +190,17 @@
User Guide Toggle navigation of User Guide
Installation
-Evaluation Toggle navigation of Evaluation
-Breast Cancer Classification and Evaluation
+Evaluation Toggle navigation of Evaluation
-Model Report Toggle navigation of Model Report
Tutorials Toggle navigation of Tutorials
-Example use cases Toggle navigation of Example use cases
diff --git a/api/_modules/cyclops/evaluate/metrics/functional/auroc.html b/api/_modules/cyclops/evaluate/metrics/functional/auroc.html
index beba98645..f40b72e42 100644
--- a/api/_modules/cyclops/evaluate/metrics/functional/auroc.html
+++ b/api/_modules/cyclops/evaluate/metrics/functional/auroc.html
@@ -60,7 +60,6 @@
-
@@ -191,24 +190,17 @@
User Guide Toggle navigation of User Guide
Installation
-Evaluation Toggle navigation of Evaluation
-Breast Cancer Classification and Evaluation
+Evaluation Toggle navigation of Evaluation
-Model Report Toggle navigation of Model Report
Tutorials Toggle navigation of Tutorials
-Example use cases Toggle navigation of Example use cases
diff --git a/api/_modules/cyclops/evaluate/metrics/functional/average_precision.html b/api/_modules/cyclops/evaluate/metrics/functional/average_precision.html
index 7dabab4a7..109263cbd 100644
--- a/api/_modules/cyclops/evaluate/metrics/functional/average_precision.html
+++ b/api/_modules/cyclops/evaluate/metrics/functional/average_precision.html
@@ -60,7 +60,6 @@
-
@@ -191,24 +190,17 @@
User Guide Toggle navigation of User Guide
Installation
-Evaluation Toggle navigation of Evaluation
-Breast Cancer Classification and Evaluation
+Evaluation Toggle navigation of Evaluation
-Model Report Toggle navigation of Model Report
Tutorials Toggle navigation of Tutorials
-Example use cases Toggle navigation of Example use cases
diff --git a/api/_modules/cyclops/evaluate/metrics/functional/f_beta.html b/api/_modules/cyclops/evaluate/metrics/functional/f_beta.html
index e5895a1f4..be40bc779 100644
--- a/api/_modules/cyclops/evaluate/metrics/functional/f_beta.html
+++ b/api/_modules/cyclops/evaluate/metrics/functional/f_beta.html
@@ -60,7 +60,6 @@
-
@@ -191,24 +190,17 @@
User Guide Toggle navigation of User Guide
Installation
-Evaluation Toggle navigation of Evaluation
-Breast Cancer Classification and Evaluation
+Evaluation Toggle navigation of Evaluation
-Model Report Toggle navigation of Model Report
Tutorials Toggle navigation of Tutorials
-Example use cases Toggle navigation of Example use cases
diff --git a/api/_modules/cyclops/evaluate/metrics/functional/precision_recall.html b/api/_modules/cyclops/evaluate/metrics/functional/precision_recall.html
index 17acdc5a5..1aff6b0b2 100644
--- a/api/_modules/cyclops/evaluate/metrics/functional/precision_recall.html
+++ b/api/_modules/cyclops/evaluate/metrics/functional/precision_recall.html
@@ -60,7 +60,6 @@
-
@@ -191,24 +190,17 @@
User Guide Toggle navigation of User Guide
Installation
-Evaluation Toggle navigation of Evaluation
-Breast Cancer Classification and Evaluation
+Evaluation Toggle navigation of Evaluation
-Model Report Toggle navigation of Model Report
Tutorials Toggle navigation of Tutorials
-Example use cases Toggle navigation of Example use cases
diff --git a/api/_modules/cyclops/evaluate/metrics/functional/precision_recall_curve.html b/api/_modules/cyclops/evaluate/metrics/functional/precision_recall_curve.html
index e1df790e2..cdae16824 100644
--- a/api/_modules/cyclops/evaluate/metrics/functional/precision_recall_curve.html
+++ b/api/_modules/cyclops/evaluate/metrics/functional/precision_recall_curve.html
@@ -60,7 +60,6 @@
-
@@ -191,24 +190,17 @@
User Guide Toggle navigation of User Guide
Installation
-Evaluation Toggle navigation of Evaluation
-Breast Cancer Classification and Evaluation
+Evaluation Toggle navigation of Evaluation
-Model Report Toggle navigation of Model Report
Tutorials Toggle navigation of Tutorials
-Example use cases Toggle navigation of Example use cases
diff --git a/api/_modules/cyclops/evaluate/metrics/functional/roc.html b/api/_modules/cyclops/evaluate/metrics/functional/roc.html
index 6a4cb4c17..f99711902 100644
--- a/api/_modules/cyclops/evaluate/metrics/functional/roc.html
+++ b/api/_modules/cyclops/evaluate/metrics/functional/roc.html
@@ -60,7 +60,6 @@
-
@@ -191,24 +190,17 @@
User Guide Toggle navigation of User Guide
Installation
-Evaluation Toggle navigation of Evaluation
-Breast Cancer Classification and Evaluation
+Evaluation Toggle navigation of Evaluation
-Model Report Toggle navigation of Model Report
Tutorials Toggle navigation of Tutorials
-Example use cases Toggle navigation of Example use cases
diff --git a/api/_modules/cyclops/evaluate/metrics/functional/sensitivity.html b/api/_modules/cyclops/evaluate/metrics/functional/sensitivity.html
index 3f2b091ae..f701a91ae 100644
--- a/api/_modules/cyclops/evaluate/metrics/functional/sensitivity.html
+++ b/api/_modules/cyclops/evaluate/metrics/functional/sensitivity.html
@@ -60,7 +60,6 @@
-
@@ -191,24 +190,17 @@
User Guide Toggle navigation of User Guide
Installation
-Evaluation Toggle navigation of Evaluation
-Breast Cancer Classification and Evaluation
+Evaluation Toggle navigation of Evaluation
-Model Report Toggle navigation of Model Report
Tutorials Toggle navigation of Tutorials
-Example use cases Toggle navigation of Example use cases
diff --git a/api/_modules/cyclops/evaluate/metrics/functional/specificity.html b/api/_modules/cyclops/evaluate/metrics/functional/specificity.html
index 18a197f8b..f989d3b0d 100644
--- a/api/_modules/cyclops/evaluate/metrics/functional/specificity.html
+++ b/api/_modules/cyclops/evaluate/metrics/functional/specificity.html
@@ -60,7 +60,6 @@
-
@@ -191,24 +190,17 @@
User Guide Toggle navigation of User Guide
Installation
-Evaluation Toggle navigation of Evaluation
-Breast Cancer Classification and Evaluation
+Evaluation Toggle navigation of Evaluation
-Model Report Toggle navigation of Model Report
Tutorials Toggle navigation of Tutorials
-Example use cases Toggle navigation of Example use cases
diff --git a/api/_modules/cyclops/evaluate/metrics/functional/stat_scores.html b/api/_modules/cyclops/evaluate/metrics/functional/stat_scores.html
index eacd7344c..7892d8453 100644
--- a/api/_modules/cyclops/evaluate/metrics/functional/stat_scores.html
+++ b/api/_modules/cyclops/evaluate/metrics/functional/stat_scores.html
@@ -60,7 +60,6 @@
-
@@ -191,24 +190,17 @@
User Guide Toggle navigation of User Guide
Installation
-Evaluation Toggle navigation of Evaluation
-Breast Cancer Classification and Evaluation
+Evaluation Toggle navigation of Evaluation
-Model Report Toggle navigation of Model Report
Tutorials Toggle navigation of Tutorials
-Example use cases Toggle navigation of Example use cases
diff --git a/api/_modules/cyclops/evaluate/metrics/precision_recall.html b/api/_modules/cyclops/evaluate/metrics/precision_recall.html
index c5683fb92..93095c95d 100644
--- a/api/_modules/cyclops/evaluate/metrics/precision_recall.html
+++ b/api/_modules/cyclops/evaluate/metrics/precision_recall.html
@@ -60,7 +60,6 @@
-
@@ -191,24 +190,17 @@
User Guide Toggle navigation of User Guide
Installation
-Evaluation Toggle navigation of Evaluation
-Breast Cancer Classification and Evaluation
+Evaluation Toggle navigation of Evaluation
-Model Report Toggle navigation of Model Report
Tutorials Toggle navigation of Tutorials
-Example use cases Toggle navigation of Example use cases
diff --git a/api/_modules/cyclops/evaluate/metrics/precision_recall_curve.html b/api/_modules/cyclops/evaluate/metrics/precision_recall_curve.html
index b1c9ec079..ad56484f5 100644
--- a/api/_modules/cyclops/evaluate/metrics/precision_recall_curve.html
+++ b/api/_modules/cyclops/evaluate/metrics/precision_recall_curve.html
@@ -60,7 +60,6 @@
-
@@ -191,24 +190,17 @@
User Guide Toggle navigation of User Guide
Installation
-Evaluation Toggle navigation of Evaluation
-Breast Cancer Classification and Evaluation
+Evaluation Toggle navigation of Evaluation
-Model Report Toggle navigation of Model Report
Tutorials Toggle navigation of Tutorials
-Example use cases Toggle navigation of Example use cases
diff --git a/api/_modules/cyclops/evaluate/metrics/roc.html b/api/_modules/cyclops/evaluate/metrics/roc.html
index bea090e9f..09d59b291 100644
--- a/api/_modules/cyclops/evaluate/metrics/roc.html
+++ b/api/_modules/cyclops/evaluate/metrics/roc.html
@@ -60,7 +60,6 @@
-
@@ -191,24 +190,17 @@
User Guide Toggle navigation of User Guide
Installation
-Evaluation Toggle navigation of Evaluation
-Breast Cancer Classification and Evaluation
+Evaluation Toggle navigation of Evaluation
-Model Report Toggle navigation of Model Report
Tutorials Toggle navigation of Tutorials
-Example use cases Toggle navigation of Example use cases
diff --git a/api/_modules/cyclops/evaluate/metrics/sensitivity.html b/api/_modules/cyclops/evaluate/metrics/sensitivity.html
index 49c91bb63..4b28ba805 100644
--- a/api/_modules/cyclops/evaluate/metrics/sensitivity.html
+++ b/api/_modules/cyclops/evaluate/metrics/sensitivity.html
@@ -60,7 +60,6 @@
-
@@ -191,24 +190,17 @@
User Guide Toggle navigation of User Guide
Installation
-Evaluation Toggle navigation of Evaluation
-Breast Cancer Classification and Evaluation
+Evaluation Toggle navigation of Evaluation
-Model Report Toggle navigation of Model Report
Tutorials Toggle navigation of Tutorials
-Example use cases Toggle navigation of Example use cases
diff --git a/api/_modules/cyclops/evaluate/metrics/specificity.html b/api/_modules/cyclops/evaluate/metrics/specificity.html
index 3d23058cc..8a0978203 100644
--- a/api/_modules/cyclops/evaluate/metrics/specificity.html
+++ b/api/_modules/cyclops/evaluate/metrics/specificity.html
@@ -60,7 +60,6 @@
-
@@ -191,24 +190,17 @@
User Guide Toggle navigation of User Guide
Installation
-Evaluation Toggle navigation of Evaluation
-Breast Cancer Classification and Evaluation
+Evaluation Toggle navigation of Evaluation
-Model Report Toggle navigation of Model Report
Tutorials Toggle navigation of Tutorials
-Example use cases Toggle navigation of Example use cases
diff --git a/api/_modules/cyclops/evaluate/metrics/stat_scores.html b/api/_modules/cyclops/evaluate/metrics/stat_scores.html
index 805644d43..aa5dddb5d 100644
--- a/api/_modules/cyclops/evaluate/metrics/stat_scores.html
+++ b/api/_modules/cyclops/evaluate/metrics/stat_scores.html
@@ -60,7 +60,6 @@
-
@@ -191,24 +190,17 @@
User Guide Toggle navigation of User Guide
Installation
-Evaluation Toggle navigation of Evaluation
-Breast Cancer Classification and Evaluation
+Evaluation Toggle navigation of Evaluation
-Model Report Toggle navigation of Model Report
Tutorials Toggle navigation of Tutorials
-Example use cases Toggle navigation of Example use cases
diff --git a/api/_modules/cyclops/monitor/clinical_applicator.html b/api/_modules/cyclops/monitor/clinical_applicator.html
index ab036222c..c8039518b 100644
--- a/api/_modules/cyclops/monitor/clinical_applicator.html
+++ b/api/_modules/cyclops/monitor/clinical_applicator.html
@@ -60,7 +60,6 @@
-
@@ -191,24 +190,17 @@
User Guide Toggle navigation of User Guide
Installation
-Evaluation Toggle navigation of Evaluation
-Breast Cancer Classification and Evaluation
+Evaluation Toggle navigation of Evaluation
-Model Report Toggle navigation of Model Report
Tutorials Toggle navigation of Tutorials
-Example use cases Toggle navigation of Example use cases
diff --git a/api/_modules/cyclops/monitor/synthetic_applicator.html b/api/_modules/cyclops/monitor/synthetic_applicator.html
index e95b0e2cf..b68a6f8b2 100644
--- a/api/_modules/cyclops/monitor/synthetic_applicator.html
+++ b/api/_modules/cyclops/monitor/synthetic_applicator.html
@@ -60,7 +60,6 @@
-
@@ -191,24 +190,17 @@
User Guide Toggle navigation of User Guide
Installation
-Evaluation Toggle navigation of Evaluation
-Breast Cancer Classification and Evaluation
+Evaluation Toggle navigation of Evaluation
-Model Report Toggle navigation of Model Report
Tutorials Toggle navigation of Tutorials
-Example use cases Toggle navigation of Example use cases
diff --git a/api/_modules/cyclops/report/report.html b/api/_modules/cyclops/report/report.html
index d75400d58..94e37c02a 100644
--- a/api/_modules/cyclops/report/report.html
+++ b/api/_modules/cyclops/report/report.html
@@ -60,7 +60,6 @@
-
@@ -191,24 +190,17 @@
User Guide Toggle navigation of User Guide
Installation
-Evaluation Toggle navigation of Evaluation
-Breast Cancer Classification and Evaluation
+Evaluation Toggle navigation of Evaluation
-Model Report Toggle navigation of Model Report
Tutorials Toggle navigation of Tutorials
-Example use cases Toggle navigation of Example use cases
diff --git a/api/_modules/cyclops/tasks/classification.html b/api/_modules/cyclops/tasks/classification.html
index 63085a8d7..25b3ec041 100644
--- a/api/_modules/cyclops/tasks/classification.html
+++ b/api/_modules/cyclops/tasks/classification.html
@@ -60,7 +60,6 @@
-
@@ -191,24 +190,17 @@
User Guide Toggle navigation of User Guide
Installation
-Evaluation Toggle navigation of Evaluation
-Breast Cancer Classification and Evaluation
+Evaluation Toggle navigation of Evaluation
-Model Report Toggle navigation of Model Report
Tutorials Toggle navigation of Tutorials
-Example use cases Toggle navigation of Example use cases
diff --git a/api/_modules/index.html b/api/_modules/index.html
index e1e7f9935..23be9d4b8 100644
--- a/api/_modules/index.html
+++ b/api/_modules/index.html
@@ -60,7 +60,6 @@
-
@@ -191,24 +190,17 @@
User Guide Toggle navigation of User Guide
Installation
-Evaluation Toggle navigation of Evaluation
-Breast Cancer Classification and Evaluation
+Evaluation Toggle navigation of Evaluation
-Model Report Toggle navigation of Model Report
Tutorials Toggle navigation of Tutorials
-Example use cases Toggle navigation of Example use cases
diff --git a/api/_sources/examples/metrics.ipynb.txt b/api/_sources/examples/metrics.ipynb.txt
deleted file mode 100644
index cf2b10ba4..000000000
--- a/api/_sources/examples/metrics.ipynb.txt
+++ /dev/null
@@ -1,351 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Breast Cancer Classification and Evaluation\n",
- "\n",
- "The Breast Cancer dataset is a well-suited example for demonstrating CyclOps features due to its two distinct classes (binary classification) and complete absence of missing values. This clean and organized structure makes it an ideal starting point for exploring CyclOps Evaluator."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "import numpy as np\n",
- "import pandas as pd\n",
- "from datasets.arrow_dataset import Dataset\n",
- "from sklearn import datasets\n",
- "from sklearn.model_selection import train_test_split\n",
- "from sklearn.svm import SVC\n",
- "\n",
- "from cyclops.data.slicer import SliceSpec\n",
- "from cyclops.evaluate import evaluator\n",
- "from cyclops.evaluate.fairness import evaluate_fairness\n",
- "from cyclops.evaluate.metrics import BinaryAccuracy, create_metric\n",
- "from cyclops.evaluate.metrics.experimental import BinaryAUROC, BinaryAveragePrecision\n",
- "from cyclops.evaluate.metrics.experimental.metric_dict import MetricDict\n",
- "from cyclops.report.plot.classification import ClassificationPlotter"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Loading the data\n",
- "breast_cancer_data = datasets.load_breast_cancer(as_frame=True)\n",
- "X, y = breast_cancer_data.data, breast_cancer_data.target"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Features\n",
- "Just taking a quick look at features and their stats..."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "df = breast_cancer_data.frame\n",
- "df.describe().T"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Splitting into train and test\n",
- "X_train, X_test, y_train, y_test = train_test_split(\n",
- " X,\n",
- " y,\n",
- " test_size=0.1,\n",
- " random_state=13,\n",
- ")\n",
- "\n",
- "# Use SVM classifier for binary classification\n",
- "svc = SVC(C=10, gamma=0.01, probability=True)\n",
- "svc.fit(X_train, y_train)\n",
- "\n",
- "# model predictions\n",
- "y_pred = svc.predict(X_test)\n",
- "y_pred_prob = svc.predict_proba(X_test)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Now we can use CyclOps evaluation metrics to evaluate our model's performance. You can either use each metric individually by calling them, or define a ``MetricDict`` object.\n",
- "Here, we show both methods."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Individual Metrics\n",
- "In case you need only a single metric, you can create an object of the desired metric and call it on your ground truth and predictions:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "bin_acc_metric = BinaryAccuracy()\n",
- "bin_acc_metric(y_test.values, np.float64(y_pred))"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Using ``MetricDict``\n",
- "You may define a collection of metrics in case you need more metrics. It also speeds up the metric calculation."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "metric_names = [\n",
- " \"binary_accuracy\",\n",
- " \"binary_precision\",\n",
- " \"binary_recall\",\n",
- " \"binary_f1_score\",\n",
- " \"binary_roc_curve\",\n",
- "]\n",
- "metrics = [\n",
- " create_metric(metric_name, experimental=True) for metric_name in metric_names\n",
- "]\n",
- "metric_collection = MetricDict(metrics)\n",
- "metric_collection(y_test.values, np.float64(y_pred))"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "You may reset the metrics collection and add other metrics:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "metric_collection.reset()\n",
- "metric_collection.add_metrics(BinaryAveragePrecision(), BinaryAUROC())\n",
- "metric_collection(y_test.values, np.float64(y_pred))"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Data Slicing\n",
- "\n",
- "In addition to overall metrics, it might be interesting to see how the model performs on certain subpopulation or subsets. We can define these subsets using ``SliceSpec`` objects."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "spec_list = [\n",
- " {\n",
- " \"worst radius\": {\n",
- " \"min_value\": 14.0,\n",
- " \"max_value\": 15.0,\n",
- " \"min_inclusive\": True,\n",
- " \"max_inclusive\": False,\n",
- " },\n",
- " },\n",
- " {\n",
- " \"worst radius\": {\n",
- " \"min_value\": 15.0,\n",
- " \"max_value\": 17.0,\n",
- " \"min_inclusive\": True,\n",
- " \"max_inclusive\": False,\n",
- " },\n",
- " },\n",
- " {\n",
- " \"worst texture\": {\n",
- " \"min_value\": 23.1,\n",
- " \"max_value\": 28.7,\n",
- " \"min_inclusive\": True,\n",
- " \"max_inclusive\": False,\n",
- " },\n",
- " },\n",
- "]\n",
- "slice_spec = SliceSpec(spec_list)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Intersectional slicing"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "When subpopulation slices are specified using the ``SliceSpec``, sometimes we wish create combinations of intersectional slices. We can use the ``intersections`` argument to specify this."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "slice_spec = SliceSpec(spec_list, intersections=2)\n",
- "slice_spec"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Preparing Result\n",
- "\n",
- "CyclOps Evaluator takes data as a HuggingFace Dataset object, so we combine predictions and features in a dataframe, and create a `Dataset` object:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Combine result and features for test data\n",
- "df = pd.concat([X_test, pd.DataFrame(y_test, columns=[\"target\"])], axis=1)\n",
- "df[\"preds\"] = y_pred\n",
- "df[\"preds_prob\"] = y_pred_prob[:, 1]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Create Dataset object\n",
- "breast_cancer_data = Dataset.from_pandas(df)\n",
- "breast_cancer_sliced_result = evaluator.evaluate(\n",
- " dataset=breast_cancer_data,\n",
- " metrics=metric_collection, # type: ignore[list-item]\n",
- " target_columns=\"target\",\n",
- " prediction_columns=\"preds_prob\",\n",
- " slice_spec=slice_spec,\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "We can visualize the ``BinaryF1Score`` and ``BinaryPrecision`` for the different slices"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Extracting the metric values for all the slices.\n",
- "slice_metrics = {\n",
- " slice_name: {\n",
- " metric_name: metric_value\n",
- " for metric_name, metric_value in slice_results.items()\n",
- " if metric_name in [\"BinaryF1Score\", \"BinaryPrecision\"]\n",
- " }\n",
- " for slice_name, slice_results in breast_cancer_sliced_result[\n",
- " \"model_for_preds_prob\"\n",
- " ].items()\n",
- "}\n",
- "# Plotting the metric values for all the slices.\n",
- "plotter = ClassificationPlotter(task_type=\"binary\", class_names=[\"0\", \"1\"])\n",
- "plotter.set_template(\"plotly_white\")\n",
- "slice_metrics_plot = plotter.metrics_comparison_bar(slice_metrics)\n",
- "slice_metrics_plot.show()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Fairness Evaluator\n",
- "\n",
- "The Breast Cancer dataset may not be a very good example to apply fairness, but to demonstrate how you can use our fairness evaluator, we apply it to `mean texture` feature. It's recommended to use it on features with discrete values. For optimal results, the feature should have less than 50 unique categories."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "fairness_result = evaluate_fairness(\n",
- " dataset=breast_cancer_data,\n",
- " metrics=\"binary_precision\", # type: ignore[list-item]\n",
- " groups=\"mean texture\",\n",
- " target_columns=\"target\",\n",
- " prediction_columns=\"preds_prob\",\n",
- ")\n",
- "fairness_result"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.10.12"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
diff --git a/api/_sources/examples/report.ipynb.txt b/api/_sources/examples/report.ipynb.txt
deleted file mode 100644
index 6312ba6f5..000000000
--- a/api/_sources/examples/report.ipynb.txt
+++ /dev/null
@@ -1,900 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "import copy\n",
- "import inspect\n",
- "import os\n",
- "import shutil\n",
- "from datetime import date\n",
- "\n",
- "import numpy as np\n",
- "import pandas as pd\n",
- "import plotly.express as px\n",
- "from datasets import Dataset\n",
- "from kaggle.api.kaggle_api_extended import KaggleApi\n",
- "from sklearn.linear_model import LogisticRegression\n",
- "from sklearn.model_selection import train_test_split\n",
- "from sklearn.preprocessing import MinMaxScaler\n",
- "from tqdm import tqdm\n",
- "\n",
- "from cyclops.data.slicer import SliceSpec\n",
- "from cyclops.evaluate.metrics import create_metric\n",
- "from cyclops.evaluate.metrics.experimental.metric_dict import MetricDict\n",
- "from cyclops.report import ModelCardReport\n",
- "from cyclops.report.plot.classification import ClassificationPlotter\n",
- "from cyclops.report.utils import flatten_results_dict"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Report Generation for Heart Failure Prediction\n",
- "Here's an example to demonstrate how we can generate a report as we proceed through all the steps to train and evaluate a model. For this purpose, we are going to use Kaggle's heart prediction failure dataset and gradually populate the report with information about dataset, model and results."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Create Model Card Report\n",
- "First, we should create a `ModelCardReport` object to fill in the fields and sections after training."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "report = ModelCardReport()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Constants\n",
- "DATA_DIR = \"./data\"\n",
- "RANDOM_SEED = 21"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Data Loading\n",
- "Before starting, make sure to install the Kaggle API by running pip install kaggle. To use the Kaggle API, you need to sign up for a Kaggle account at https://www.kaggle.com. Then go to the ‘Account’ tab of your user profile (https://www.kaggle.com//account) and select ‘Create API Token’. This will trigger the download of kaggle.json, a file containing your API credentials. Place this file in the location ~/.kaggle/kaggle.json on your machine."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "api = KaggleApi()\n",
- "api.authenticate()\n",
- "api.dataset_download_files(\n",
- " \"fedesoriano/heart-failure-prediction\",\n",
- " path=DATA_DIR,\n",
- " unzip=True,\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "df = pd.read_csv(os.path.join(DATA_DIR, \"heart.csv\"))\n",
- "df.head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "df.describe().T"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "fig = px.pie(df, names=\"Sex\")\n",
- "\n",
- "fig.update_layout(\n",
- " title=\"Sex Distribution\",\n",
- ")\n",
- "\n",
- "fig.show()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Adding figures to report\n",
- "We can add figures and diagrams to report. We can define caption and the section of the report that this figure belongs to. Since we are exploring the distribution of different features in the dataset, we add it to `datasets` section:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "report.log_plotly_figure(\n",
- " fig=fig,\n",
- " caption=\"Sex Distribution\",\n",
- " section_name=\"datasets\",\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Age distribution figure\n",
- "We plot a histogram of ages similarly and add the figure to our report. This will appear under **Dataset** section:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "fig = px.histogram(df, x=\"Age\")\n",
- "fig.update_layout(\n",
- " title=\"Age Distribution\",\n",
- " xaxis_title=\"Age\",\n",
- " yaxis_title=\"Count\",\n",
- " bargap=0.2,\n",
- ")\n",
- "\n",
- "fig.show()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Outcome distribution\n",
- "Plot outcome distribution and add it to report. Similarly, we also place it under **Dataset** section:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "df[\"outcome\"] = df[\"HeartDisease\"].astype(\"int\")\n",
- "df = df.drop(columns=[\"HeartDisease\"])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "fig = px.pie(df, names=\"outcome\")\n",
- "fig.update_traces(textinfo=\"percent+label\")\n",
- "fig.update_layout(title_text=\"Outcome Distribution\")\n",
- "fig.update_traces(\n",
- " hovertemplate=\"Outcome: %{label} Count: \\\n",
- " %{value} Percent: %{percent}\",\n",
- ")\n",
- "fig.show()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "report.log_plotly_figure(\n",
- " fig=fig,\n",
- " caption=\"Outcome Distribution\",\n",
- " section_name=\"datasets\",\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "class_counts = df[\"outcome\"].value_counts()\n",
- "class_ratio = class_counts[0] / class_counts[1]\n",
- "print(class_ratio, class_counts)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Data Preprocessing"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "This dataset does not have any null values, so we can jump to feature scaling. The string data in the dataframe is in the form of object, we need to convert it back to string to work on it:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "string_col = df.select_dtypes(include=\"object\").columns\n",
- "df[string_col] = df[string_col].astype(\"string\")\n",
- "string_col = df.select_dtypes(\"string\").columns.to_list()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "We are going to apply tree-based models to our data, so we use `LabelEncoder`:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "target = \"outcome\"\n",
- "\n",
- "df_processed = pd.get_dummies(df, columns=string_col, drop_first=False)\n",
- "df_processed.head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "df.head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "feature_cols = df_processed.columns.to_list()\n",
- "feature_cols.remove(target)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Before training, let's document dataset in the model card.\n",
- "This can be done using the log_dataset method, which takes the following arguments: \n",
- "\n",
- "- `description`: A description of the dataset. \n",
- "- `citation`: The citation for the dataset. \n",
- "- `link`: A link to a resource for the dataset. \n",
- "- `license_id`: The SPDX license identifier for the dataset. \n",
- "- `version`: The version of the dataset. \n",
- "- `features`: A list of features in the dataset. \n",
- "- `split`: The split of the dataset (train, test, validation, etc.). \n",
- "- `sensitive_features`: A list of sensitive features used to train/evaluate the model. \n",
- "- `sensitive_feature_justification`: A justification for the sensitive features used to train/evaluate the model.\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "report.log_dataset(\n",
- " description=\"\"\"This dataset was created by combining different datasets\n",
- " already available independently but not combined before. In this dataset,\n",
- " 5 heart datasets are combined over 11 common features. Every dataset used\n",
- " can be found under the Index of heart disease datasets from UCI\n",
- " Machine Learning Repository on the following link:\n",
- " https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/.\"\"\",\n",
- " citation=inspect.cleandoc(\n",
- " \"\"\"\n",
- " @misc{fedesoriano,\n",
- " title={Heart Failure Prediction Dataset.},\n",
- " author={Fedesoriano, F},\n",
- " year={2021},\n",
- " publisher={Kaggle}\n",
- " }\n",
- " \"\"\",\n",
- " ),\n",
- " link=\"\"\"\n",
- " https://www.kaggle.com/datasets/fedesoriano/heart-failure-prediction\n",
- " \"\"\",\n",
- " license_id=\"CC0-1.0\",\n",
- " version=\"Version 1\",\n",
- " features=df.columns.to_list().remove(target),\n",
- " sensitive_features=[\"Sex\", \"Age\"],\n",
- " sensitive_feature_justification=\"Demographic information like age and gender \\\n",
- " often have a strong correlation with health outcomes. For example, older \\\n",
- " patients are more likely to have a higher risk of heart disease.\",\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Create HuggingFace Dataset\n",
- "We convert our processed Pandas dataframe into a Hugging Face dataset, for later evaluation by Cyclop metrics."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "dataset = Dataset.from_pandas(df_processed)\n",
- "dataset.cleanup_cache_files()\n",
- "print(dataset)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Split dataframe into inputs and outputs\n",
- "X, y = df_processed[feature_cols], df_processed[target]"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Training"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Splitting into train and test\n",
- "X_train, X_test, y_train, y_test = train_test_split(\n",
- " X,\n",
- " y,\n",
- " test_size=0.2,\n",
- " random_state=RANDOM_SEED,\n",
- ")\n",
- "classifier = LogisticRegression()\n",
- "scaler = MinMaxScaler()\n",
- "X_train = scaler.fit_transform(X_train)\n",
- "X_test = scaler.fit_transform(X_test)\n",
- "classifier.fit(X_train, y_train)\n",
- "y_pred = classifier.predict(X_test)\n",
- "y_pred_prob = classifier.predict_proba(X_test)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Evaluation\n",
- "As demonstrated in evaluation tutorial, we define a metric dict:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "metric_names = [\n",
- " \"binary_accuracy\",\n",
- " \"binary_precision\",\n",
- " \"binary_recall\",\n",
- " \"binary_f1_score\",\n",
- " \"binary_roc_curve\",\n",
- " \"binary_auroc\",\n",
- "]\n",
- "metrics = [\n",
- " create_metric(metric_name, experimental=True) for metric_name in metric_names\n",
- "]\n",
- "metric_collection = MetricDict(metrics)\n",
- "metric_collection(y_test.values, np.float64(y_pred))"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Data Slicing\n",
- "In addition to overall metrics, it might be useful to see how the model performs on certain subpopulation or subsets. We can define these subsets using SliceSpec objects."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "spec_list = [\n",
- " {\n",
- " \"Age\": {\n",
- " \"min_value\": 30,\n",
- " \"max_value\": 50,\n",
- " \"min_inclusive\": True,\n",
- " \"max_inclusive\": False,\n",
- " },\n",
- " },\n",
- " {\n",
- " \"Age\": {\n",
- " \"min_value\": 50,\n",
- " \"max_value\": 70,\n",
- " \"min_inclusive\": True,\n",
- " \"max_inclusive\": False,\n",
- " },\n",
- " },\n",
- "]\n",
- "slice_spec = SliceSpec(spec_list)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Below, we are combining the raw features of the test data and the predictions so that we use them later for slice-specific evaluations."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Get positions of matching indices in df\n",
- "matching_positions = y_test.index.get_indexer(df.index)\n",
- "\n",
- "# Select rows from df using matching positions (valid positions are non-negative)\n",
- "df_test = df.iloc[matching_positions[matching_positions >= 0]]\n",
- "df_test[\"preds\"] = y_pred\n",
- "df_test[\"preds_prob\"] = y_pred_prob[:, 1]\n",
- "df_test.head()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Age distribution in test data"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "fig = px.histogram(df_test, x=\"Age\")\n",
- "fig.update_layout(\n",
- " title=\"Age Distribution in Test Data\",\n",
- " xaxis_title=\"Age\",\n",
- " yaxis_title=\"Count\",\n",
- " bargap=0.2,\n",
- ")\n",
- "\n",
- "fig.show()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Logging metrics and results to report\n",
- "Here, we gather evalutaions and add them to the report.\n",
- "\n",
- "We can add a performance metric to the model card using the `log_performance_metric` method, which expects a dictionary where the keys are in the following format: `slice_name/metric_name`. For instance, `overall/accuracy` or `Age:[30 - 50)/BinaryPrecision`.\n",
- "\n",
- "We first need to process the evaluation results to get the metrics in the right format."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "from cyclops.evaluate import evaluator\n",
- "\n",
- "\n",
- "# Create Dataset object\n",
- "heart_failure_data = Dataset.from_pandas(df_test)\n",
- "\n",
- "result = evaluator.evaluate(\n",
- " dataset=heart_failure_data,\n",
- " metrics=metric_collection, # type: ignore[list-item]\n",
- " target_columns=target,\n",
- " prediction_columns=\"preds_prob\",\n",
- " slice_spec=slice_spec,\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "results_flat = flatten_results_dict(\n",
- " results=result, remove_metrics=[\"BinaryROC\"], model_name=\"model_for_preds_prob\"\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "result"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "results_flat"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "We first need to process the evaluation results to get the metrics in the right format. The descriptions dictionary will appear as you hover on metrics in the report, so feel free to change them as it's appropriate for your usage."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "for name, metric in results_flat.items():\n",
- " split, name = name.split(\"/\") # noqa: PLW2901\n",
- " descriptions = {\n",
- " \"BinaryPrecision\": \"The proportion of predicted positive instances that are correctly predicted.\",\n",
- " \"BinaryRecall\": \"The proportion of actual positive instances that are correctly predicted. Also known as recall or true positive rate.\",\n",
- " \"BinaryAccuracy\": \"The proportion of all instances that are correctly predicted.\",\n",
- " \"BinaryAUROC\": \"The area under the receiver operating characteristic curve (AUROC) is a measure of the performance of a binary classification model.\",\n",
- " \"BinaryAveragePrecision\": \"The area under the precision-recall curve (AUPRC) is a measure of the performance of a binary classification model.\",\n",
- " \"BinaryF1Score\": \"The harmonic mean of precision and recall.\",\n",
- " }\n",
- " report.log_quantitative_analysis(\n",
- " \"performance\",\n",
- " name=name,\n",
- " value=metric.tolist(),\n",
- " description=descriptions[name],\n",
- " metric_slice=split,\n",
- " pass_fail_thresholds=0.7,\n",
- " pass_fail_threshold_fns=lambda x, threshold: bool(x >= threshold),\n",
- " )"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "We can also use the `ClassificationPlotter`(as demonstrated in Evaluation example) to plot the performance metrics and the add the figure to the model card using the log_plotly_figure method."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "plotter = ClassificationPlotter(task_type=\"binary\", class_names=[\"0\", \"1\"])\n",
- "plotter.set_template(\"plotly_white\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# extracting the ROC curves and AUROC results for all the slices\n",
- "model_name = \"model_for_preds_prob\"\n",
- "roc_curves = {\n",
- " slice_name: slice_results[\"BinaryROC\"]\n",
- " for slice_name, slice_results in result[model_name].items()\n",
- "}\n",
- "aurocs = {\n",
- " slice_name: slice_results[\"BinaryAUROC\"]\n",
- " for slice_name, slice_results in result[model_name].items()\n",
- "}\n",
- "\n",
- "# plotting the ROC curves for all the slices\n",
- "roc_plot = plotter.roc_curve_comparison(roc_curves, aurocs=aurocs)\n",
- "report.log_plotly_figure(\n",
- " fig=roc_plot,\n",
- " caption=\"ROC Curve for All Patients\",\n",
- " section_name=\"quantitative analysis\",\n",
- ")\n",
- "roc_plot.show()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Extracting the overall classification metric values.\n",
- "overall_performance = {\n",
- " metric_name: metric_value\n",
- " for metric_name, metric_value in result[model_name][\"overall\"].items()\n",
- " if metric_name not in [\"BinaryROC\", \"BinaryPrecisionRecallCurve\"]\n",
- "}\n",
- "# Plotting the overall classification metric values.\n",
- "overall_performance_plot = plotter.metrics_value(\n",
- " overall_performance,\n",
- " title=\"Overall Performance\",\n",
- ")\n",
- "report.log_plotly_figure(\n",
- " fig=overall_performance_plot,\n",
- " caption=\"Overall Performance\",\n",
- " section_name=\"quantitative analysis\",\n",
- ")\n",
- "overall_performance_plot.show()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Extracting the metric values for all the slices.\n",
- "slice_metrics = {\n",
- " slice_name: {\n",
- " metric_name: metric_value\n",
- " for metric_name, metric_value in slice_results.items()\n",
- " if metric_name not in [\"BinaryROC\", \"BinaryPrecisionRecallCurve\"]\n",
- " }\n",
- " for slice_name, slice_results in result[model_name].items()\n",
- "}\n",
- "# Plotting the metric values for all the slices.\n",
- "slice_metrics_plot = plotter.metrics_comparison_bar(slice_metrics)\n",
- "report.log_plotly_figure(\n",
- " fig=slice_metrics_plot,\n",
- " caption=\"Slice Metric Comparison\",\n",
- " section_name=\"quantitative analysis\",\n",
- ")\n",
- "slice_metrics_plot.show()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Report Generation\n",
- "\n",
- "### ModelCard\n",
- "First, let's document the model details section. A **ModelCard** has several **Sections** and each **Section** includes multiple **Fields**. Model details could be one of the sections in our model card, and it has the following fields by default:\n",
- "\n",
- "- **`description`**: A high-level description of the model and its usage for a general audience. \n",
- "- **`version`**: The version of the model. \n",
- "- **`owners`**: The individuals or organizations that own the model. \n",
- "- **`license`**: The license under which the model is made available. \n",
- "- **`citation`**: The citation for the model. \n",
- "- **`references`**: Links to resources that are relevant to the model. \n",
- "- **`path`**: The path to where the model is stored. \n",
- "- **`regulatory_requirements`**: The regulatory requirements that are relevant to the model.\n",
- "\n",
- "We can add additional fields to the model details section by passing a dictionary to the `log_from_dict` method and specifying the section name as `model_details`. You can also use the `log_descriptor` method to add a new field object with a description attribute to any section of the model card."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "report.log_from_dict(\n",
- " data={\n",
- " \"name\": \"Heart Failure Prediction Model\",\n",
- " \"description\": \"The model was trained on the Kaggle Heart Failure \\\n",
- " Prediction Dataset to predict risk of heart failure.\",\n",
- " },\n",
- " section_name=\"model_details\",\n",
- ")\n",
- "\n",
- "report.log_version(\n",
- " version_str=\"0.0.1\",\n",
- " date=str(date.today()),\n",
- " description=\"Initial Release\",\n",
- ")\n",
- "report.log_owner(\n",
- " name=\"CyclOps Team\",\n",
- " contact=\"vectorinstitute.github.io/cyclops/\",\n",
- " email=\"cyclops@vectorinstitute.ai\",\n",
- ")\n",
- "report.log_license(identifier=\"Apache-2.0\")\n",
- "report.log_reference(\n",
- " link=\"https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDClassifier.html\", # noqa: E501\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Considerations\n",
- "\n",
- "Next, let’s populate the considerations section, which includes the following fields by default: \n",
- "- **`users`**: The intended users of the model. \n",
- "- **`use_cases`**: The use cases for the model. These could be primary, downstream or out-of-scope use cases. \n",
- "- **`fairness_assessment`**: A description of the benefits and harms of the model for different groups as well as the steps taken to mitigate the harms. \n",
- "- **`ethical_considerations`**: The risks associated with using the model and the steps taken to mitigate them. This can be populated using the log_risk method."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "report.log_from_dict(\n",
- " data={\n",
- " \"users\": [\n",
- " {\"description\": \"Hospitals\"},\n",
- " {\"description\": \"Clinicians\"},\n",
- " ],\n",
- " },\n",
- " section_name=\"considerations\",\n",
- ")\n",
- "report.log_user(description=\"ML Engineers\")\n",
- "report.log_use_case(\n",
- " description=\"Predicting risk of heart failure.\",\n",
- " kind=\"primary\",\n",
- ")\n",
- "report.log_use_case(\n",
- " description=\"Predicting risk of pathologies and conditions other\\\n",
- " than heart failure.\",\n",
- " kind=\"out-of-scope\",\n",
- ")\n",
- "report.log_fairness_assessment(\n",
- " affected_group=\"sex, age\",\n",
- " benefit=\"Improved health outcomes for patients.\",\n",
- " harm=\"Biased predictions for patients in certain groups (e.g. older patients) \\\n",
- " may lead to worse health outcomes.\",\n",
- " mitigation_strategy=\"We will monitor the performance of the model on these groups \\\n",
- " and retrain the model if the performance drops below a certain threshold.\",\n",
- ")\n",
- "report.log_risk(\n",
- " risk=\"The model may be used to make decisions that affect the health of patients.\",\n",
- " mitigation_strategy=\"The model should be continuously monitored for performance \\\n",
- " and retrained if the performance drops below a certain threshold.\",\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Exporting report\n",
- "Once the model card is populated, you can generate the report using the `export` method. The report is generated in the form of an HTML file. A JSON file containing the model card data will also be generated along with the HTML file. By default, the files will be saved in a folder named `cyclops_reports` in the current working directory. You can change the path by passing a `output_dir` argument when instantiating the `ModelCardReport` class."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "np.random.seed(42)\n",
- "\n",
- "synthetic_timestamps = pd.date_range(\n",
- " start=\"1/1/2020\", periods=10, freq=\"D\"\n",
- ").values.astype(str)\n",
- "\n",
- "\n",
- "report._model_card.overview = None\n",
- "report_path = report.export(\n",
- " output_filename=\"heart_failure_report_periodic.html\",\n",
- " synthetic_timestamp=synthetic_timestamps[0],\n",
- " last_n_evals=3,\n",
- ")\n",
- "\n",
- "shutil.copy(f\"{report_path}\", \".\")\n",
- "metric_save = None\n",
- "for i in tqdm(range(len(synthetic_timestamps[1:]))):\n",
- " if i == 3:\n",
- " report._model_card.quantitative_analysis.performance_metrics.append(\n",
- " metric_save,\n",
- " )\n",
- " report._model_card.overview = None\n",
- " for metric in report._model_card.quantitative_analysis.performance_metrics:\n",
- " metric.value = np.clip(\n",
- " metric.value + np.random.normal(0, 0.1),\n",
- " 0,\n",
- " 1,\n",
- " )\n",
- " metric.tests[0].passed = bool(metric.value >= 0.7)\n",
- " if i == 2:\n",
- " metrics = []\n",
- " for metric in report._model_card.quantitative_analysis.performance_metrics:\n",
- " if metric.type == \"BinaryAccuracy\" and metric.slice == \"Age:[30 - 50)\":\n",
- " metric_save = copy.deepcopy(metric)\n",
- " else:\n",
- " metrics.append(metric)\n",
- " report._model_card.quantitative_analysis.performance_metrics = metrics\n",
- " report_path = report.export(\n",
- " output_filename=\"heart_failure_report_periodic.html\",\n",
- " synthetic_timestamp=synthetic_timestamps[i + 1],\n",
- " last_n_evals=3,\n",
- " )\n",
- " shutil.copy(f\"{report_path}\", \".\")\n",
- "shutil.rmtree(\"./cyclops_report\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "You're now able to view the report [report](./heart_failure_report_periodic.html)."
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "cyclops",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.10.11"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/api/_sources/tutorials/diabetes_130/readmission_prediction.ipynb.txt b/api/_sources/tutorials/diabetes_130/readmission_prediction.ipynb.txt
deleted file mode 100644
index 4e50a97d1..000000000
--- a/api/_sources/tutorials/diabetes_130/readmission_prediction.ipynb.txt
+++ /dev/null
@@ -1,1417 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Readmission Prediction\n",
- "\n",
- "This notebook showcases readmission prediction on the [Diabetes 130-US Hospitals for Years 1999-2008](https://archive.ics.uci.edu/dataset/296/diabetes+130-us+hospitals+for+years+1999-2008) using CyclOps. The task is formulated as a binary classification task, where we predict the probability of early readmission of the patient within 30 days of discharge."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "tags": []
- },
- "source": [
- "## Install libraries"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "!pip install pycyclops\n",
- "!pip install ucimlrepo"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Import Libraries"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "\"\"\"Readmission prediction.\"\"\"\n",
- "\n",
- "# ruff: noqa: E402\n",
- "\n",
- "import copy\n",
- "import inspect\n",
- "from datetime import date\n",
- "\n",
- "import numpy as np\n",
- "import pandas as pd\n",
- "import plotly.express as px\n",
- "from datasets import Dataset\n",
- "from datasets.features import ClassLabel\n",
- "from sklearn.compose import ColumnTransformer\n",
- "from sklearn.impute import SimpleImputer\n",
- "from sklearn.pipeline import Pipeline\n",
- "from sklearn.preprocessing import MinMaxScaler, OneHotEncoder\n",
- "from ucimlrepo import fetch_ucirepo\n",
- "\n",
- "from cyclops.data.df.feature import TabularFeatures\n",
- "from cyclops.data.slicer import SliceSpec\n",
- "from cyclops.evaluate.fairness import FairnessConfig # noqa: E402\n",
- "from cyclops.evaluate.metrics import create_metric\n",
- "from cyclops.evaluate.metrics.experimental.functional import (\n",
- " binary_npv,\n",
- " binary_ppv,\n",
- " binary_roc,\n",
- ")\n",
- "from cyclops.evaluate.metrics.experimental.metric_dict import MetricDict\n",
- "from cyclops.models.catalog import create_model\n",
- "from cyclops.report import ModelCardReport\n",
- "from cyclops.report.plot.classification import ClassificationPlotter\n",
- "from cyclops.report.utils import flatten_results_dict\n",
- "from cyclops.tasks import BinaryTabularClassificationTask"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "CyclOps offers a package for documentation of the model through a model report. The `ModelCardReport` class is used to populate and generate the model report as an HTML file. The model report has the following sections:\n",
- "\n",
- "- Overview: Provides a high level overview of how the model is doing (a quick glance of important metrics), and how it is doing over time (performance over several metrics and subgroups over time).\n",
- "- Datasets: High level statistics of the training data, including changes in distribution over time.\n",
- "- Quantitative Analysis: This section contains additional detailed performance metrics of the model for different sets of the data and subpopulations.\n",
- "- Fairness Analysis: This section contains the fairness metrics of the model.\n",
- "- Model Details: This section contains descriptive metadata about the model such as the owners, version, license, etc.\n",
- "- Model Parameters: This section contains the technical details of the model such as the model architecture, training parameters, etc.\n",
- "- Considerations: This section contains descriptions of the considerations involved in developing and using the model such as the intended use, limitations, etc.\n",
- "\n",
- "We will use this to document the model development process as we go along and generate the model report at the end.\n",
- "\n",
- "`The model report tool is a work in progress and is subject to change.`"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "report = ModelCardReport()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Constants"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "RANDOM_SEED = 85\n",
- "NAN_THRESHOLD = 0.75\n",
- "TRAIN_SIZE = 0.8\n",
- "EVAL_NUM = 3"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Data Loading"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "diabetes_130_data = fetch_ucirepo(id=296)\n",
- "features = diabetes_130_data[\"data\"][\"features\"]\n",
- "targets = diabetes_130_data[\"data\"][\"targets\"]\n",
- "metadata = diabetes_130_data[\"metadata\"]\n",
- "variables = diabetes_130_data[\"variables\"]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "metadata"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "def transform_label(value):\n",
- " \"\"\"Transform string labels of readmission into 0/1 binary labels.\n",
- "\n",
- " Parameters\n",
- " ----------\n",
- " value: str\n",
- " Input value\n",
- "\n",
- " Returns\n",
- " -------\n",
- " int\n",
- " 0 if not readmitted or if greater than 30 days, 1 if less than 30 days\n",
- "\n",
- " \"\"\"\n",
- " if value in [\"NO\", \">30\"]:\n",
- " return 0\n",
- " if value == \"<30\":\n",
- " return 1\n",
- "\n",
- " raise ValueError(\"Unexpected value for readmission!\")\n",
- "\n",
- "\n",
- "df = features\n",
- "targets[\"readmitted\"] = targets[\"readmitted\"].apply(transform_label)\n",
- "df[\"readmitted\"] = targets"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Choose a small subset for modelling"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "df = df[0:1000]"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Remove features that are NaNs or have just a single unique value"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "features_to_remove = []\n",
- "for col in df:\n",
- " if len(df[col].value_counts()) <= 1:\n",
- " features_to_remove.append(col)\n",
- "df = df.drop(columns=features_to_remove)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Sex values"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "fig = px.pie(df, names=\"gender\")\n",
- "\n",
- "fig.update_layout(\n",
- " title=\"Gender Distribution\",\n",
- ")\n",
- "\n",
- "fig.show()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "**Add the figure to the report**"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "report.log_plotly_figure(\n",
- " fig=fig,\n",
- " caption=\"Gender Distribution\",\n",
- " section_name=\"datasets\",\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Age distribution"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "fig = px.histogram(df, x=\"age\")\n",
- "fig.update_layout(\n",
- " title=\"Age Distribution\",\n",
- " xaxis_title=\"Age\",\n",
- " yaxis_title=\"Count\",\n",
- " bargap=0.2,\n",
- ")\n",
- "\n",
- "fig.show()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "**Add the figure to the report**"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "report.log_plotly_figure(\n",
- " fig=fig,\n",
- " caption=\"Age Distribution\",\n",
- " section_name=\"datasets\",\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Outcome distribution"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "df[\"outcome\"] = df[\"readmitted\"].astype(\"int\")\n",
- "df = df.drop(columns=[\"readmitted\"])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "fig = px.pie(df, names=\"outcome\")\n",
- "fig.update_traces(textinfo=\"percent+label\")\n",
- "fig.update_layout(title_text=\"Outcome Distribution\")\n",
- "fig.update_traces(\n",
- " hovertemplate=\"Outcome: %{label} Count: \\\n",
- " %{value} Percent: %{percent}\",\n",
- ")\n",
- "fig.show()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "**Add the figure to the report**"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "report.log_plotly_figure(\n",
- " fig=fig,\n",
- " caption=\"Outcome Distribution\",\n",
- " section_name=\"datasets\",\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "class_counts = df[\"outcome\"].value_counts()\n",
- "class_ratio = class_counts[0] / class_counts[1]\n",
- "print(class_ratio, class_counts)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "From the features in the dataset, we select all of them to train the model!"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "features_list = list(df.columns)\n",
- "features_list.remove(\"outcome\")\n",
- "features_list = sorted(features_list)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Identifying feature types\n",
- "\n",
- "Cyclops `TabularFeatures` class helps to identify feature types, an essential step before preprocessing the data. Understanding feature types (numerical/categorical/binary) allows us to apply appropriate preprocessing steps for each type."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "tab_features = TabularFeatures(\n",
- " data=df.reset_index(),\n",
- " features=features_list,\n",
- " by=\"index\",\n",
- " targets=\"outcome\",\n",
- ")\n",
- "print(tab_features.types)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Creating data preprocessors\n",
- "\n",
- "We create a data preprocessor using sklearn's ColumnTransformer. This helps in applying different preprocessing steps to different columns in the dataframe. For instance, binary features might be processed differently from numeric features."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "numeric_transformer = Pipeline(\n",
- " steps=[(\"imputer\", SimpleImputer(strategy=\"mean\")), (\"scaler\", MinMaxScaler())],\n",
- ")\n",
- "\n",
- "binary_transformer = Pipeline(\n",
- " steps=[(\"imputer\", SimpleImputer(strategy=\"most_frequent\"))],\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "numeric_features = sorted((tab_features.features_by_type(\"numeric\")))\n",
- "numeric_indices = [\n",
- " df[features_list].columns.get_loc(column) for column in numeric_features\n",
- "]\n",
- "print(numeric_features)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "binary_features = sorted(tab_features.features_by_type(\"binary\"))\n",
- "binary_features.remove(\"outcome\")\n",
- "ordinal_features = sorted(\n",
- " tab_features.features_by_type(\"ordinal\")\n",
- " + [\"medical_specialty\", \"diag_1\", \"diag_2\", \"diag_3\"]\n",
- ")\n",
- "binary_indices = [\n",
- " df[features_list].columns.get_loc(column) for column in binary_features\n",
- "]\n",
- "ordinal_indices = [\n",
- " df[features_list].columns.get_loc(column) for column in ordinal_features\n",
- "]\n",
- "print(binary_features, ordinal_features)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "preprocessor = ColumnTransformer(\n",
- " transformers=[\n",
- " (\"num\", numeric_transformer, numeric_indices),\n",
- " (\n",
- " \"onehot\",\n",
- " OneHotEncoder(handle_unknown=\"ignore\", sparse_output=False),\n",
- " binary_indices + ordinal_indices,\n",
- " ),\n",
- " ],\n",
- " remainder=\"passthrough\",\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Let's document the dataset in the model card. This can be done using the `log_dataset` method, which takes the following arguments:\n",
- "- description: A description of the dataset.\n",
- "- citation: The citation for the dataset.\n",
- "- link: A link to a resource for the dataset.\n",
- "- license_id: The SPDX license identifier for the dataset.\n",
- "- version: The version of the dataset.\n",
- "- features: A list of features in the dataset.\n",
- "- split: The split of the dataset (train, test, validation, etc.).\n",
- "- sensitive_features: A list of sensitive features used to train/evaluate the model.\n",
- "- sensitive_feature_justification: A justification for the sensitive features used to train/evaluate the model."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "report.log_dataset(\n",
- " description=metadata[\"abstract\"],\n",
- " citation=inspect.cleandoc(\n",
- " \"\"\"\n",
- " @article{strack2014impact,\n",
- " title={Impact of HbA1c measurement on hospital readmission rates: analysis of 70,000 clinical database patient records},\n",
- " author={Strack, Beata and DeShazo, Jonathan P and Gennings, Chris and Olmo, Juan L and Ventura, Sebastian and Cios, Krzysztof J and Clore, John N and others},\n",
- " journal={BioMed research international},\n",
- " volume={2014},\n",
- " year={2014},\n",
- " publisher={Hindawi}\n",
- " }\n",
- " \"\"\",\n",
- " ),\n",
- " link=metadata[\"repository_url\"],\n",
- " license_id=\"CC0-1.0\",\n",
- " version=\"Version 1\",\n",
- " features=features_list,\n",
- " sensitive_features=[\"gender\", \"age\", \"race\"],\n",
- " sensitive_feature_justification=\"Demographic information like age and gender \\\n",
- " often have a strong correlation with health outcomes. For example, older \\\n",
- " patients are more likely to have a higher risk of readmission.\",\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Creating Hugging Face Dataset\n",
- "\n",
- "We convert our processed Pandas dataframe into a Hugging Face dataset, a powerful and easy-to-use data format which is also compatible with CyclOps models and evaluator modules. The dataset is then split to train and test sets."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "dataset = Dataset.from_pandas(df)\n",
- "dataset.cleanup_cache_files()\n",
- "print(dataset)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "dataset = dataset.cast_column(\"outcome\", ClassLabel(num_classes=2))\n",
- "dataset = dataset.train_test_split(\n",
- " train_size=TRAIN_SIZE,\n",
- " stratify_by_column=\"outcome\",\n",
- " seed=RANDOM_SEED,\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Model Creation\n",
- "\n",
- "CyclOps model registry allows for straightforward creation and selection of models. This registry maintains a list of pre-configured models, which can be instantiated with a single line of code. Here we use a SGD classifier to fit a logisitic regression model. The model configurations can be passed to `create_model` based on the sklearn parameters for SGDClassifier."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "model_name = \"xgb_classifier\"\n",
- "model = create_model(model_name, random_state=123)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Task Creation\n",
- "\n",
- "We use Cyclops tasks to define our model's task (in this case, readmission prediction), train the model, make predictions, and evaluate performance. Cyclops task classes encapsulate the entire ML pipeline into a single, cohesive structure, making the process smooth and easy to manage."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "readmission_prediction_task = BinaryTabularClassificationTask(\n",
- " {model_name: model},\n",
- " task_features=features_list,\n",
- " task_target=\"outcome\",\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "readmission_prediction_task.list_models()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Training\n",
- "\n",
- "If `best_model_params` is passed to the `train` method, the best model will be selected after the hyperparameter search. The parameters in `best_model_params` indicate the values to create the parameters grid.\n",
- "\n",
- "Note that the data preprocessor needs to be passed to the tasks methods if the Hugging Face dataset is not already preprocessed. "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "best_model_params = {\n",
- " \"n_estimators\": [100, 250, 500],\n",
- " \"learning_rate\": [0.1, 0.01],\n",
- " \"max_depth\": [2, 5],\n",
- " \"reg_lambda\": [0, 1, 10],\n",
- " \"colsample_bytree\": [0.7, 0.8, 1],\n",
- " \"gamma\": [0, 1, 2, 10],\n",
- " \"method\": \"random\",\n",
- " \"scale_pos_weight\": [int(class_ratio)],\n",
- "}\n",
- "readmission_prediction_task.train(\n",
- " dataset[\"train\"],\n",
- " model_name=model_name,\n",
- " transforms=preprocessor,\n",
- " best_model_params=best_model_params,\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "model_params = readmission_prediction_task.list_models_params()[model_name]\n",
- "print(model_params)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "**Log the model parameters to the report.**\n",
- "\n",
- "We can add model parameters to the model card using the `log_model_parameters` method."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "report.log_model_parameters(params=model_params)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Prediction\n",
- "\n",
- "The prediction output can be either the whole Hugging Face dataset with the prediction columns added to it or the single column containing the predicted values."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "y_pred = readmission_prediction_task.predict(\n",
- " dataset[\"test\"],\n",
- " model_name=model_name,\n",
- " transforms=preprocessor,\n",
- " proba=True,\n",
- " only_predictions=True,\n",
- ")\n",
- "prediction_df = pd.DataFrame(\n",
- " {\n",
- " \"y_prob\": [y_pred_i[1] for y_pred_i in y_pred],\n",
- " \"y_true\": dataset[\"test\"][\"outcome\"],\n",
- " \"gender\": dataset[\"test\"][\"gender\"],\n",
- " }\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Evaluation\n",
- "\n",
- "Evaluation is done using various evaluation metrics that provide different perspectives on the model's predictive abilities i.e. standard performance metrics and fairness metrics.\n",
- "\n",
- "The standard performance metrics can be created using the `MetricDict` object."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "metric_names = [\n",
- " \"binary_accuracy\",\n",
- " \"binary_precision\",\n",
- " \"binary_recall\",\n",
- " \"binary_f1_score\",\n",
- " \"binary_auroc\",\n",
- " \"binary_average_precision\",\n",
- " \"binary_roc_curve\",\n",
- " \"binary_precision_recall_curve\",\n",
- "]\n",
- "metrics = [\n",
- " create_metric(metric_name, experimental=True) for metric_name in metric_names\n",
- "]\n",
- "metric_collection = MetricDict(metrics)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "In addition to overall metrics, it might be interesting to see how the model performs on certain subpopulations. We can define these subpopulations using `SliceSpec` objects. "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "spec_list = [\n",
- " {\n",
- " \"age\": {\n",
- " \"value\": \"[50-60)\",\n",
- " },\n",
- " },\n",
- " {\n",
- " \"age\": {\n",
- " \"value\": \"[60-70)\",\n",
- " },\n",
- " },\n",
- "]\n",
- "slice_spec = SliceSpec(spec_list)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "A `MetricDict` can also be defined for the fairness metrics."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "specificity = create_metric(metric_name=\"binary_specificity\", experimental=True)\n",
- "sensitivity = create_metric(metric_name=\"binary_sensitivity\", experimental=True)\n",
- "\n",
- "fpr = -specificity + 1\n",
- "fnr = -sensitivity + 1\n",
- "\n",
- "ber = (fpr + fnr) / 2\n",
- "\n",
- "fairness_metric_collection = MetricDict(\n",
- " {\n",
- " \"Sensitivity\": sensitivity,\n",
- " \"Specificity\": specificity,\n",
- " \"BER\": ber,\n",
- " },\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "The FairnessConfig helps in setting up and evaluating the fairness of the model predictions."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "fairness_config = FairnessConfig(\n",
- " metrics=fairness_metric_collection,\n",
- " dataset=None, # dataset is passed from the evaluator\n",
- " target_columns=None, # target columns are passed from the evaluator\n",
- " groups=[\"age\"],\n",
- " group_base_values={\"age\": \"[40-50)\"},\n",
- " thresholds=[0.5],\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "The evaluate methods outputs the evaluation results and the Hugging Face dataset with the predictions added to it."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "results, dataset_with_preds = readmission_prediction_task.evaluate(\n",
- " dataset=dataset[\"test\"],\n",
- " metrics=metric_collection,\n",
- " model_names=model_name,\n",
- " transforms=preprocessor,\n",
- " prediction_column_prefix=\"preds\",\n",
- " slice_spec=slice_spec,\n",
- " batch_size=-1,\n",
- " fairness_config=fairness_config,\n",
- " override_fairness_metrics=False,\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "results_female, _ = readmission_prediction_task.evaluate(\n",
- " dataset=dataset[\"test\"],\n",
- " metrics=MetricDict(\n",
- " {\n",
- " \"BinaryAccuracy\": create_metric(\n",
- " metric_name=\"binary_accuracy\",\n",
- " experimental=True,\n",
- " ),\n",
- " },\n",
- " ),\n",
- " model_names=model_name,\n",
- " transforms=preprocessor,\n",
- " prediction_column_prefix=\"preds\",\n",
- " slice_spec=SliceSpec([{\"gender\": {\"value\": \"Female\"}}], include_overall=False),\n",
- " batch_size=-1,\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "**Log the performance metrics to the report.**\n",
- "\n",
- "We can add a performance metric to the model card using the `log_performance_metric` method, which expects a dictionary where the keys are in the following format: `slice_name/metric_name`. For instance, `overall/accuracy`. \n",
- "\n",
- "We first need to process the evaluation results to get the metrics in the right format."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "model_name = f\"model_for_preds.{model_name}\"\n",
- "results_flat = flatten_results_dict(\n",
- " results=results,\n",
- " remove_metrics=[\"BinaryROC\", \"BinaryPrecisionRecallCurve\"],\n",
- " model_name=model_name,\n",
- ")\n",
- "results_female_flat = flatten_results_dict(\n",
- " results=results_female,\n",
- " model_name=model_name,\n",
- ")\n",
- "# ruff: noqa: W505\n",
- "for name, metric in results_female_flat.items():\n",
- " split, name = name.split(\"/\") # noqa: PLW2901\n",
- " descriptions = {\n",
- " \"BinaryPrecision\": \"The proportion of predicted positive instances that are correctly predicted.\",\n",
- " \"BinaryRecall\": \"The proportion of actual positive instances that are correctly predicted. Also known as recall or true positive rate.\",\n",
- " \"BinaryAccuracy\": \"The proportion of all instances that are correctly predicted.\",\n",
- " \"BinaryAUROC\": \"The area under the receiver operating characteristic curve (AUROC) is a measure of the performance of a binary classification model.\",\n",
- " \"BinaryAveragePrecision\": \"The area under the precision-recall curve (AUPRC) is a measure of the performance of a binary classification model.\",\n",
- " \"BinaryF1Score\": \"The harmonic mean of precision and recall.\",\n",
- " }\n",
- " report.log_quantitative_analysis(\n",
- " \"performance\",\n",
- " name=name,\n",
- " value=metric.tolist(),\n",
- " description=descriptions[name],\n",
- " metric_slice=split,\n",
- " pass_fail_thresholds=0.7,\n",
- " pass_fail_threshold_fns=lambda x, threshold: bool(x >= threshold),\n",
- " )\n",
- "\n",
- "for name, metric in results_flat.items():\n",
- " split, name = name.split(\"/\") # noqa: PLW2901\n",
- " descriptions = {\n",
- " \"BinaryPrecision\": \"The proportion of predicted positive instances that are correctly predicted.\",\n",
- " \"BinaryRecall\": \"The proportion of actual positive instances that are correctly predicted. Also known as recall or true positive rate.\",\n",
- " \"BinaryAccuracy\": \"The proportion of all instances that are correctly predicted.\",\n",
- " \"BinaryAUROC\": \"The area under the receiver operating characteristic curve (AUROC) is a measure of the performance of a binary classification model.\",\n",
- " \"BinaryAveragePrecision\": \"The area under the precision-recall curve (AUPRC) is a measure of the performance of a binary classification model.\",\n",
- " \"BinaryF1Score\": \"The harmonic mean of precision and recall.\",\n",
- " }\n",
- " report.log_quantitative_analysis(\n",
- " \"performance\",\n",
- " name=name,\n",
- " value=metric.tolist(),\n",
- " description=descriptions[name],\n",
- " metric_slice=split,\n",
- " pass_fail_thresholds=0.7,\n",
- " pass_fail_threshold_fns=lambda x, threshold: bool(x >= threshold),\n",
- " )"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "We can also use the `ClassificationPlotter` to plot the performance metrics and the add the figure to the model card using the `log_plotly_figure` method."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "plotter = ClassificationPlotter(task_type=\"binary\", class_names=[\"0\", \"1\"])\n",
- "plotter.set_template(\"plotly_white\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# extracting the ROC curves and AUROC results for all the slices\n",
- "roc_curves = {\n",
- " slice_name: slice_results[\"BinaryROC\"]\n",
- " for slice_name, slice_results in results[model_name].items()\n",
- "}\n",
- "aurocs = {\n",
- " slice_name: slice_results[\"BinaryAUROC\"]\n",
- " for slice_name, slice_results in results[model_name].items()\n",
- "}\n",
- "roc_curves.keys()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# plotting the ROC curves for all the slices\n",
- "roc_plot = plotter.roc_curve_comparison(roc_curves, aurocs=aurocs)\n",
- "report.log_plotly_figure(\n",
- " fig=roc_plot,\n",
- " caption=\"ROC Curve for Female Patients\",\n",
- " section_name=\"quantitative analysis\",\n",
- ")\n",
- "roc_plot.show()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# extracting the precision-recall curves and average precision results for all the slices\n",
- "pr_curves = {\n",
- " slice_name: slice_results[\"BinaryPrecisionRecallCurve\"]\n",
- " for slice_name, slice_results in results[model_name].items()\n",
- "}\n",
- "average_precisions = {\n",
- " slice_name: slice_results[\"BinaryAveragePrecision\"]\n",
- " for slice_name, slice_results in results[model_name].items()\n",
- "}\n",
- "pr_curves.keys()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# plotting the precision-recall curves for all the slices\n",
- "pr_plot = plotter.precision_recall_curve_comparison(\n",
- " pr_curves,\n",
- " auprcs=average_precisions,\n",
- ")\n",
- "report.log_plotly_figure(\n",
- " fig=pr_plot,\n",
- " caption=\"Precision-Recall Curve Comparison\",\n",
- " section_name=\"quantitative analysis\",\n",
- ")\n",
- "pr_plot.show()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Extracting the overall classification metric values.\n",
- "overall_performance = {\n",
- " metric_name: metric_value\n",
- " for metric_name, metric_value in results[model_name][\"overall\"].items()\n",
- " if metric_name not in [\"BinaryROC\", \"BinaryPrecisionRecallCurve\"]\n",
- "}"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Plotting the overall classification metric values.\n",
- "overall_performance_plot = plotter.metrics_value(\n",
- " overall_performance,\n",
- " title=\"Overall Performance\",\n",
- ")\n",
- "report.log_plotly_figure(\n",
- " fig=overall_performance_plot,\n",
- " caption=\"Overall Performance\",\n",
- " section_name=\"quantitative analysis\",\n",
- ")\n",
- "overall_performance_plot.show()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Extracting the metric values for all the slices.\n",
- "slice_metrics = {\n",
- " slice_name: {\n",
- " metric_name: metric_value\n",
- " for metric_name, metric_value in slice_results.items()\n",
- " if metric_name not in [\"BinaryROC\", \"BinaryPrecisionRecallCurve\"]\n",
- " }\n",
- " for slice_name, slice_results in results[model_name].items()\n",
- "}"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "# Plotting the metric values for all the slices.\n",
- "slice_metrics_plot = plotter.metrics_comparison_bar(slice_metrics)\n",
- "report.log_plotly_figure(\n",
- " fig=slice_metrics_plot,\n",
- " caption=\"Slice Metric Comparison\",\n",
- " section_name=\"quantitative analysis\",\n",
- ")\n",
- "slice_metrics_plot.show()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Plotting the metric values for all the slices.\n",
- "# ROC curve components\n",
- "pred_probs = np.array(dataset_with_preds[\"preds.xgb_classifier\"])\n",
- "true_labels = np.array(dataset_with_preds[\"outcome\"])\n",
- "roc_curve = binary_roc(true_labels, pred_probs)\n",
- "ppv = np.zeros_like(roc_curve.thresholds)\n",
- "npv = np.zeros_like(roc_curve.thresholds)\n",
- "\n",
- "# Calculate PPV and NPV for each threshold\n",
- "for i, threshold in enumerate(roc_curve.thresholds):\n",
- " # Calculate PPV and NPV\n",
- " ppv[i] = binary_ppv(true_labels, pred_probs, threshold=threshold)\n",
- " npv[i] = binary_npv(true_labels, pred_probs, threshold=threshold)\n",
- "runway_plot = plotter.threshperf(roc_curve, ppv, npv, pred_probs)\n",
- "report.log_plotly_figure(\n",
- " fig=runway_plot,\n",
- " caption=\"Threshold-Performance plot\",\n",
- " section_name=\"quantitative analysis\",\n",
- ")\n",
- "runway_plot.show()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "We can also plot the calibration curve of the model on the test set"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "calibration_plot = plotter.calibration(\n",
- " prediction_df, y_true_col=\"y_true\", y_prob_col=\"y_prob\", group_col=\"gender\"\n",
- ")\n",
- "report.log_plotly_figure(\n",
- " fig=calibration_plot,\n",
- " caption=\"Calibration plot\",\n",
- " section_name=\"quantitative analysis\",\n",
- ")\n",
- "calibration_plot.show()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Reformatting the fairness metrics\n",
- "fairness_results = copy.deepcopy(results[\"fairness\"])\n",
- "fairness_metrics = {}\n",
- "# remove the group size from the fairness results and add it to the slice name\n",
- "for slice_name, slice_results in fairness_results.items():\n",
- " group_size = slice_results.pop(\"Group Size\")\n",
- " fairness_metrics[f\"{slice_name} (Size={group_size})\"] = slice_results"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Plotting the fairness metrics\n",
- "fairness_plot = plotter.metrics_comparison_scatter(\n",
- " fairness_metrics,\n",
- " title=\"Fairness Metrics\",\n",
- ")\n",
- "report.log_plotly_figure(\n",
- " fig=fairness_plot,\n",
- " caption=\"Fairness Metrics\",\n",
- " section_name=\"fairness analysis\",\n",
- ")\n",
- "fairness_plot.show()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Report Generation\n",
- "\n",
- "Before generating the model card, let us document some of the details of the model and some considerations involved in developing and using the model.\n",
- "\n",
- "\n",
- "Let's start with populating the model details section, which includes the following fields by default:\n",
- "- description: A high-level description of the model and its usage for a general audience.\n",
- "- version: The version of the model.\n",
- "- owners: The individuals or organizations that own the model.\n",
- "- license: The license under which the model is made available.\n",
- "- citation: The citation for the model.\n",
- "- references: Links to resources that are relevant to the model.\n",
- "- path: The path to where the model is stored.\n",
- "- regulatory_requirements: The regulatory requirements that are relevant to the model.\n",
- "\n",
- "We can add additional fields to the model details section by passing a dictionary to the `log_from_dict` method and specifying the section name as `model_details`. You can also use the `log_descriptor` method to add a new field object with a `description` attribute to any section of the model card."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "report.log_from_dict(\n",
- " data={\n",
- " \"name\": \"Readmission Prediction Model\",\n",
- " \"description\": \"The model was trained on the Diabetes 130-US Hospitals for Years 1999-2008 \\\n",
- " dataset to predict risk of readmission within 30 days of discharge.\",\n",
- " },\n",
- " section_name=\"model_details\",\n",
- ")\n",
- "\n",
- "report.log_version(\n",
- " version_str=\"0.0.1\",\n",
- " date=str(date.today()),\n",
- " description=\"Initial Release\",\n",
- ")\n",
- "report.log_owner(\n",
- " name=\"CyclOps Team\",\n",
- " contact=\"vectorinstitute.github.io/cyclops/\",\n",
- " email=\"cyclops@vectorinstitute.ai\",\n",
- ")\n",
- "report.log_license(identifier=\"Apache-2.0\")\n",
- "report.log_reference(\n",
- " link=\"https://xgboost.readthedocs.io/en/stable/python/python_api.html\", # noqa: E501\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Next, let's populate the considerations section, which includes the following fields by default:\n",
- "- users: The intended users of the model.\n",
- "- use_cases: The use cases for the model. These could be primary, downstream or out-of-scope use cases.\n",
- "- fairness_assessment: A description of the benefits and harms of the model for different groups as well as the steps taken to mitigate the harms.\n",
- "- ethical_considerations: The risks associated with using the model and the steps taken to mitigate them. This can be populated using the `log_risk` method.\n",
- "\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "report.log_from_dict(\n",
- " data={\n",
- " \"users\": [\n",
- " {\"description\": \"Hospitals\"},\n",
- " {\"description\": \"Clinicians\"},\n",
- " ],\n",
- " },\n",
- " section_name=\"considerations\",\n",
- ")\n",
- "report.log_user(description=\"ML Engineers\")\n",
- "report.log_use_case(\n",
- " description=\"Predicting risk of readmission.\",\n",
- " kind=\"primary\",\n",
- ")\n",
- "report.log_use_case(\n",
- " description=\"Predicting risk of pathologies and conditions other\\\n",
- " than risk of readmission.\",\n",
- " kind=\"out-of-scope\",\n",
- ")\n",
- "report.log_fairness_assessment(\n",
- " affected_group=\"sex, age\",\n",
- " benefit=\"Improved health outcomes for patients.\",\n",
- " harm=\"Biased predictions for patients in certain groups (e.g. older patients) \\\n",
- " may lead to worse health outcomes.\",\n",
- " mitigation_strategy=\"We will monitor the performance of the model on these groups \\\n",
- " and retrain the model if the performance drops below a certain threshold.\",\n",
- ")\n",
- "report.log_risk(\n",
- " risk=\"The model may be used to make decisions that affect the health of patients.\",\n",
- " mitigation_strategy=\"The model should be continuously monitored for performance \\\n",
- " and retrained if the performance drops below a certain threshold.\",\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Once the model card is populated, you can generate the report using the `export` method. The report is generated in the form of an HTML file. A JSON file containing the model card data will also be generated along with the HTML file. By default, the files will be saved in a folder named `cyclops_reports` in the current working directory. You can change the path by passing a `output_dir` argument when instantiating the `ModelCardReport` class."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "synthetic_timestamps = pd.date_range(\n",
- " start=\"20/6/2024\", periods=3, freq=\"W\"\n",
- ").values.astype(str)\n",
- "report_path = report.export(\n",
- " output_filename=\"readmission_report_periodic.html\",\n",
- " synthetic_timestamp=synthetic_timestamps[EVAL_NUM - 1],\n",
- " last_n_evals=3,\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "tags": []
- },
- "source": [
- "You can view the generated HTML [report](./heart_failure_report_periodic.html)."
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.10.12"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
diff --git a/api/_sources/tutorials/kaggle/heart_failure_prediction.ipynb.txt b/api/_sources/tutorials/kaggle/heart_failure_prediction.ipynb.txt
deleted file mode 100644
index 1f987623f..000000000
--- a/api/_sources/tutorials/kaggle/heart_failure_prediction.ipynb.txt
+++ /dev/null
@@ -1,1398 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Heart Failure Prediction\n",
- "\n",
- "This notebook showcases heart failure prediction on a [popular Kaggle dataset](https://www.kaggle.com/datasets/fedesoriano/heart-failure-prediction) using CyclOps. The task is formulated as a binary classification task, where we predict the probability that a patient will have heart failure."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Import Libraries"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "\"\"\"Heart failure prediction.\"\"\"\n",
- "\n",
- "import copy\n",
- "import inspect\n",
- "import shutil\n",
- "from datetime import date\n",
- "\n",
- "import numpy as np\n",
- "import pandas as pd\n",
- "import plotly.express as px\n",
- "from datasets import Dataset\n",
- "from datasets.features import ClassLabel\n",
- "from kaggle.api.kaggle_api_extended import KaggleApi\n",
- "from sklearn.compose import ColumnTransformer\n",
- "from sklearn.impute import SimpleImputer\n",
- "from sklearn.pipeline import Pipeline\n",
- "from sklearn.preprocessing import MinMaxScaler, OneHotEncoder\n",
- "\n",
- "from cyclops.data.df.feature import TabularFeatures\n",
- "from cyclops.data.slicer import SliceSpec\n",
- "from cyclops.evaluate.fairness import FairnessConfig # noqa: E402\n",
- "from cyclops.evaluate.metrics import create_metric\n",
- "from cyclops.evaluate.metrics.experimental.functional import (\n",
- " binary_npv,\n",
- " binary_ppv,\n",
- " binary_roc,\n",
- ")\n",
- "from cyclops.evaluate.metrics.experimental.metric_dict import MetricDict\n",
- "from cyclops.models.catalog import create_model\n",
- "from cyclops.report import ModelCardReport\n",
- "from cyclops.report.plot.classification import ClassificationPlotter\n",
- "from cyclops.report.utils import flatten_results_dict\n",
- "from cyclops.tasks import BinaryTabularClassificationTask\n",
- "from cyclops.utils.file import join, load_dataframe"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "CyclOps offers a package for documentation of the model through a model report. The `ModelCardReport` class is used to populate and generate the model report as an HTML file. The model report has the following sections:\n",
- "\n",
- "- Overview: Provides a high level overview of how the model is doing (a quick glance of important metrics), and how it is doing over time (performance over several metrics and subgroups over time).\n",
- "- Datasets: High level statistics of the training data, including changes in distribution over time.\n",
- "- Quantitative Analysis: This section contains additional detailed performance metrics of the model for different sets of the data and subpopulations.\n",
- "- Fairness Analysis: This section contains the fairness metrics of the model.\n",
- "- Model Details: This section contains descriptive metadata about the model such as the owners, version, license, etc.\n",
- "- Model Parameters: This section contains the technical details of the model such as the model architecture, training parameters, etc.\n",
- "- Considerations: This section contains descriptions of the considerations involved in developing and using the model such as the intended use, limitations, etc.\n",
- "\n",
- "We will use this to document the model development process as we go along and generate the model report at the end.\n",
- "\n",
- "`The model report tool is a work in progress and is subject to change.`"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "report = ModelCardReport()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Constants"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "DATA_DIR = \"./data\"\n",
- "RANDOM_SEED = 85\n",
- "NAN_THRESHOLD = 0.75\n",
- "TRAIN_SIZE = 0.8"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Data Loading\n",
- "\n",
- "Before starting, make sure to install the Kaggle API by running `pip install kaggle`. To use the Kaggle API, you need to sign up for a Kaggle account at https://www.kaggle.com. Then go to the 'Account' tab of your user profile (`https://www.kaggle.com//account`) and select 'Create API Token'. This will trigger the download of kaggle.json, a file containing your API credentials. Place this file in the location `~/.kaggle/kaggle.json` on your machine."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "api = KaggleApi()\n",
- "api.authenticate()\n",
- "api.dataset_download_files(\n",
- " \"fedesoriano/heart-failure-prediction\",\n",
- " path=DATA_DIR,\n",
- " unzip=True,\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "df = load_dataframe(join(DATA_DIR, \"heart.csv\"), file_format=\"csv\")\n",
- "df = df.reset_index()\n",
- "df[\"ID\"] = df.index\n",
- "print(df)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Sex values"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "fig = px.pie(df, names=\"Sex\")\n",
- "\n",
- "fig.update_layout(\n",
- " title=\"Sex Distribution\",\n",
- ")\n",
- "\n",
- "fig.show()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "**Add the figure to the report**"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "report.log_plotly_figure(\n",
- " fig=fig,\n",
- " caption=\"Sex Distribution\",\n",
- " section_name=\"datasets\",\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Age distribution"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "fig = px.histogram(df, x=\"Age\")\n",
- "fig.update_layout(\n",
- " title=\"Age Distribution\",\n",
- " xaxis_title=\"Age\",\n",
- " yaxis_title=\"Count\",\n",
- " bargap=0.2,\n",
- ")\n",
- "\n",
- "fig.show()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "**Add the figure to the report**"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "report.log_plotly_figure(\n",
- " fig=fig,\n",
- " caption=\"Age Distribution\",\n",
- " section_name=\"datasets\",\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Outcome distribution"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "df[\"outcome\"] = df[\"HeartDisease\"].astype(\"int\")\n",
- "df = df.drop(columns=[\"HeartDisease\"])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "fig = px.pie(df, names=\"outcome\")\n",
- "fig.update_traces(textinfo=\"percent+label\")\n",
- "fig.update_layout(title_text=\"Outcome Distribution\")\n",
- "fig.update_traces(\n",
- " hovertemplate=\"Outcome: %{label} Count: \\\n",
- " %{value} Percent: %{percent}\",\n",
- ")\n",
- "fig.show()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "**Add the figure to the report**"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "report.log_plotly_figure(\n",
- " fig=fig,\n",
- " caption=\"Outcome Distribution\",\n",
- " section_name=\"datasets\",\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "class_counts = df[\"outcome\"].value_counts()\n",
- "class_ratio = class_counts[0] / class_counts[1]\n",
- "print(class_ratio, class_counts)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "print(df.columns)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "From all the features in the dataset, we select 20 of them which was reported by [Li et al.](https://pubmed.ncbi.nlm.nih.gov/34301649/) to be the most important features in this classification task. "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "features_list = [\n",
- " \"Age\",\n",
- " \"Sex\",\n",
- " \"ChestPainType\",\n",
- " \"RestingBP\",\n",
- " \"Cholesterol\",\n",
- " \"FastingBS\",\n",
- " \"RestingECG\",\n",
- " \"MaxHR\",\n",
- " \"ExerciseAngina\",\n",
- " \"Oldpeak\",\n",
- " \"ST_Slope\",\n",
- "]\n",
- "\n",
- "features_list = sorted(features_list)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Identifying feature types\n",
- "\n",
- "Cyclops `TabularFeatures` class helps to identify feature types, an essential step before preprocessing the data. Understanding feature types (numerical/categorical/binary) allows us to apply appropriate preprocessing steps for each type."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "tab_features = TabularFeatures(\n",
- " data=df.reset_index(),\n",
- " features=features_list,\n",
- " by=\"ID\",\n",
- " targets=\"outcome\",\n",
- ")\n",
- "print(tab_features.types)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Creating data preprocessors\n",
- "\n",
- "We create a data preprocessor using sklearn's ColumnTransformer. This helps in applying different preprocessing steps to different columns in the dataframe. For instance, binary features might be processed differently from numeric features."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "numeric_transformer = Pipeline(\n",
- " steps=[(\"imputer\", SimpleImputer(strategy=\"mean\")), (\"scaler\", MinMaxScaler())],\n",
- ")\n",
- "\n",
- "binary_transformer = Pipeline(\n",
- " steps=[(\"imputer\", SimpleImputer(strategy=\"most_frequent\"))],\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "numeric_features = sorted((tab_features.features_by_type(\"numeric\")))\n",
- "numeric_indices = [\n",
- " df[features_list].columns.get_loc(column) for column in numeric_features\n",
- "]\n",
- "print(numeric_features)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "binary_features = sorted(tab_features.features_by_type(\"binary\"))\n",
- "binary_features.remove(\"outcome\")\n",
- "ordinal_features = sorted(tab_features.features_by_type(\"ordinal\"))\n",
- "binary_indices = [\n",
- " df[features_list].columns.get_loc(column) for column in binary_features\n",
- "]\n",
- "ordinal_indices = [\n",
- " df[features_list].columns.get_loc(column) for column in ordinal_features\n",
- "]\n",
- "print(binary_features, ordinal_features)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "preprocessor = ColumnTransformer(\n",
- " transformers=[\n",
- " (\"num\", numeric_transformer, numeric_indices),\n",
- " (\n",
- " \"onehot\",\n",
- " OneHotEncoder(handle_unknown=\"ignore\"),\n",
- " binary_indices + ordinal_indices,\n",
- " ),\n",
- " ],\n",
- " remainder=\"passthrough\",\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Let's document the dataset in the model card. This can be done using the `log_dataset` method, which takes the following arguments:\n",
- "- description: A description of the dataset.\n",
- "- citation: The citation for the dataset.\n",
- "- link: A link to a resource for the dataset.\n",
- "- license_id: The SPDX license identifier for the dataset.\n",
- "- version: The version of the dataset.\n",
- "- features: A list of features in the dataset.\n",
- "- split: The split of the dataset (train, test, validation, etc.).\n",
- "- sensitive_features: A list of sensitive features used to train/evaluate the model.\n",
- "- sensitive_feature_justification: A justification for the sensitive features used to train/evaluate the model."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "report.log_dataset(\n",
- " description=\"\"\"This dataset was created by combining different datasets\n",
- " already available independently but not combined before. In this dataset,\n",
- " 5 heart datasets are combined over 11 common features. Every dataset used\n",
- " can be found under the Index of heart disease datasets from UCI\n",
- " Machine Learning Repository on the following link:\n",
- " https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/.\"\"\",\n",
- " citation=inspect.cleandoc(\n",
- " \"\"\"\n",
- " @misc{fedesoriano,\n",
- " title={Heart Failure Prediction Dataset.},\n",
- " author={Fedesoriano, F},\n",
- " year={2021},\n",
- " publisher={Kaggle}\n",
- " }\n",
- " \"\"\",\n",
- " ),\n",
- " link=\"\"\"\n",
- " https://www.kaggle.com/datasets/fedesoriano/heart-failure-prediction\n",
- " \"\"\",\n",
- " license_id=\"CC0-1.0\",\n",
- " version=\"Version 1\",\n",
- " features=features_list,\n",
- " sensitive_features=[\"Sex\", \"Age\"],\n",
- " sensitive_feature_justification=\"Demographic information like age and gender \\\n",
- " often have a strong correlation with health outcomes. For example, older \\\n",
- " patients are more likely to have a higher risk of heart disease.\",\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Creating Hugging Face Dataset\n",
- "\n",
- "We convert our processed Pandas dataframe into a Hugging Face dataset, a powerful and easy-to-use data format which is also compatible with CyclOps models and evaluator modules. The dataset is then split to train and test sets."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "dataset = Dataset.from_pandas(df)\n",
- "dataset.cleanup_cache_files()\n",
- "print(dataset)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "dataset = dataset.cast_column(\"outcome\", ClassLabel(num_classes=2))\n",
- "dataset = dataset.train_test_split(\n",
- " train_size=TRAIN_SIZE,\n",
- " stratify_by_column=\"outcome\",\n",
- " seed=RANDOM_SEED,\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Model Creation\n",
- "\n",
- "CyclOps model registry allows for straightforward creation and selection of models. This registry maintains a list of pre-configured models, which can be instantiated with a single line of code. Here we use a SGD classifier to fit a logisitic regression model. The model configurations can be passed to `create_model` based on the sklearn parameters for SGDClassifier."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "model_name = \"sgd_classifier\"\n",
- "model = create_model(model_name, random_state=123, verbose=0, class_weight=\"balanced\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Task Creation\n",
- "\n",
- "We use Cyclops tasks to define our model's task (in this case, heart failure prediction), train the model, make predictions, and evaluate performance. Cyclops task classes encapsulate the entire ML pipeline into a single, cohesive structure, making the process smooth and easy to manage."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "heart_failure_prediction_task = BinaryTabularClassificationTask(\n",
- " {model_name: model},\n",
- " task_features=features_list,\n",
- " task_target=\"outcome\",\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "heart_failure_prediction_task.list_models()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Training\n",
- "\n",
- "If `best_model_params` is passed to the `train` method, the best model will be selected after the hyperparameter search. The parameters in `best_model_params` indicate the values to create the parameters grid.\n",
- "\n",
- "Note that the data preprocessor needs to be passed to the tasks methods if the Hugging Face dataset is not already preprocessed. "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "best_model_params = {\n",
- " \"alpha\": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100],\n",
- " \"learning_rate\": [\"constant\", \"optimal\", \"invscaling\", \"adaptive\"],\n",
- " \"eta0\": [0.1, 0.01, 0.001, 0.0001],\n",
- " \"metric\": \"roc_auc\",\n",
- " \"method\": \"grid\",\n",
- "}\n",
- "\n",
- "heart_failure_prediction_task.train(\n",
- " dataset[\"train\"],\n",
- " model_name=model_name,\n",
- " transforms=preprocessor,\n",
- " best_model_params=best_model_params,\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "model_params = heart_failure_prediction_task.list_models_params()[model_name]\n",
- "print(model_params)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "**Log the model parameters to the report.**\n",
- "\n",
- "We can add model parameters to the model card using the `log_model_parameters` method."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "report.log_model_parameters(params=model_params)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Prediction\n",
- "\n",
- "The prediction output can be either the whole Hugging Face dataset with the prediction columns added to it or the single column containing the predicted values."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "y_pred = heart_failure_prediction_task.predict(\n",
- " dataset[\"test\"],\n",
- " model_name=model_name,\n",
- " transforms=preprocessor,\n",
- " proba=True,\n",
- " only_predictions=True,\n",
- ")\n",
- "prediction_df = pd.DataFrame(\n",
- " {\n",
- " \"y_prob\": [y_pred_i[1] for y_pred_i in y_pred],\n",
- " \"y_true\": dataset[\"test\"][\"outcome\"],\n",
- " \"Sex\": dataset[\"test\"][\"Sex\"],\n",
- " }\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Evaluation\n",
- "\n",
- "Evaluation is done using various evaluation metrics that provide different perspectives on the model's predictive abilities i.e. standard performance metrics and fairness metrics.\n",
- "\n",
- "The standard performance metrics can be created using the `MetricDict` object."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "metric_names = [\n",
- " \"binary_accuracy\",\n",
- " \"binary_precision\",\n",
- " \"binary_recall\",\n",
- " \"binary_f1_score\",\n",
- " \"binary_auroc\",\n",
- " \"binary_average_precision\",\n",
- " \"binary_roc_curve\",\n",
- " \"binary_precision_recall_curve\",\n",
- "]\n",
- "metrics = [\n",
- " create_metric(metric_name, experimental=True) for metric_name in metric_names\n",
- "]\n",
- "metric_collection = MetricDict(metrics)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "In addition to overall metrics, it might be interesting to see how the model performs on certain subpopulations. We can define these subpopulations using `SliceSpec` objects. "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "spec_list = [\n",
- " {\n",
- " \"Age\": {\n",
- " \"min_value\": 30,\n",
- " \"max_value\": 50,\n",
- " \"min_inclusive\": True,\n",
- " \"max_inclusive\": False,\n",
- " },\n",
- " },\n",
- " {\n",
- " \"Age\": {\n",
- " \"min_value\": 50,\n",
- " \"max_value\": 70,\n",
- " \"min_inclusive\": True,\n",
- " \"max_inclusive\": False,\n",
- " },\n",
- " },\n",
- " {\"Sex\": {\"value\": \"M\"}},\n",
- "]\n",
- "slice_spec = SliceSpec(spec_list)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "A `MetricDict` can also be defined for the fairness metrics."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "specificity = create_metric(metric_name=\"binary_specificity\", experimental=True)\n",
- "sensitivity = create_metric(metric_name=\"binary_sensitivity\", experimental=True)\n",
- "\n",
- "fpr = -specificity + 1\n",
- "fnr = -sensitivity + 1\n",
- "\n",
- "ber = (fpr + fnr) / 2\n",
- "\n",
- "fairness_metric_collection = MetricDict(\n",
- " {\n",
- " \"Sensitivity\": sensitivity,\n",
- " \"Specificity\": specificity,\n",
- " \"BER\": ber,\n",
- " },\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "The FairnessConfig helps in setting up and evaluating the fairness of the model predictions."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "fairness_config = FairnessConfig(\n",
- " metrics=fairness_metric_collection,\n",
- " dataset=None, # dataset is passed from the evaluator\n",
- " target_columns=None, # target columns are passed from the evaluator\n",
- " groups=[\"Age\"],\n",
- " group_bins={\"Age\": [40, 50]},\n",
- " group_base_values={\"Age\": 40},\n",
- " thresholds=[0.5],\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "The evaluate methods outputs the evaluation results and the Hugging Face dataset with the predictions added to it."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "results, dataset_with_preds = heart_failure_prediction_task.evaluate(\n",
- " dataset=dataset[\"test\"],\n",
- " metrics=metric_collection,\n",
- " model_names=model_name,\n",
- " transforms=preprocessor,\n",
- " prediction_column_prefix=\"preds\",\n",
- " slice_spec=slice_spec,\n",
- " batch_size=-1,\n",
- " fairness_config=fairness_config,\n",
- " override_fairness_metrics=False,\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "results_female, _ = heart_failure_prediction_task.evaluate(\n",
- " dataset=dataset[\"test\"],\n",
- " metrics=MetricDict(\n",
- " {\n",
- " \"BinaryAccuracy\": create_metric(\n",
- " metric_name=\"binary_accuracy\",\n",
- " experimental=True,\n",
- " ),\n",
- " },\n",
- " ),\n",
- " model_names=model_name,\n",
- " transforms=preprocessor,\n",
- " prediction_column_prefix=\"preds\",\n",
- " slice_spec=SliceSpec(\n",
- " [{\"Sex\": {\"value\": \"F\"}}, {\"Sex\": {\"value\": \"M\"}}], include_overall=False\n",
- " ),\n",
- " batch_size=-1,\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "**Log the performance metrics to the report.**\n",
- "\n",
- "We can add a performance metric to the model card using the `log_performance_metric` method, which expects a dictionary where the keys are in the following format: `slice_name/metric_name`. For instance, `overall/accuracy`. \n",
- "\n",
- "We first need to process the evaluation results to get the metrics in the right format."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "model_name = f\"model_for_preds.{model_name}\"\n",
- "results_flat = flatten_results_dict(\n",
- " results=results,\n",
- " remove_metrics=[\"BinaryROC\", \"BinaryPrecisionRecallCurve\"],\n",
- " model_name=model_name,\n",
- ")\n",
- "results_female_flat = flatten_results_dict(\n",
- " results=results_female,\n",
- " model_name=model_name,\n",
- ")\n",
- "# ruff: noqa: W505\n",
- "for name, metric in results_female_flat.items():\n",
- " split, name = name.split(\"/\") # noqa: PLW2901\n",
- " descriptions = {\n",
- " \"BinaryPrecision\": \"The proportion of predicted positive instances that are correctly predicted.\",\n",
- " \"BinaryRecall\": \"The proportion of actual positive instances that are correctly predicted. Also known as recall or true positive rate.\",\n",
- " \"BinaryAccuracy\": \"The proportion of all instances that are correctly predicted.\",\n",
- " \"BinaryAUROC\": \"The area under the receiver operating characteristic curve (AUROC) is a measure of the performance of a binary classification model.\",\n",
- " \"BinaryAveragePrecision\": \"The area under the precision-recall curve (AUPRC) is a measure of the performance of a binary classification model.\",\n",
- " \"BinaryF1Score\": \"The harmonic mean of precision and recall.\",\n",
- " }\n",
- " report.log_quantitative_analysis(\n",
- " \"performance\",\n",
- " name=name,\n",
- " value=metric.tolist(),\n",
- " description=descriptions[name],\n",
- " metric_slice=split,\n",
- " pass_fail_thresholds=0.7,\n",
- " pass_fail_threshold_fns=lambda x, threshold: bool(x >= threshold),\n",
- " )\n",
- "\n",
- "for name, metric in results_flat.items():\n",
- " split, name = name.split(\"/\") # noqa: PLW2901\n",
- " descriptions = {\n",
- " \"BinaryPrecision\": \"The proportion of predicted positive instances that are correctly predicted.\",\n",
- " \"BinaryRecall\": \"The proportion of actual positive instances that are correctly predicted. Also known as recall or true positive rate.\",\n",
- " \"BinaryAccuracy\": \"The proportion of all instances that are correctly predicted.\",\n",
- " \"BinaryAUROC\": \"The area under the receiver operating characteristic curve (AUROC) is a measure of the performance of a binary classification model.\",\n",
- " \"BinaryAveragePrecision\": \"The area under the precision-recall curve (AUPRC) is a measure of the performance of a binary classification model.\",\n",
- " \"BinaryF1Score\": \"The harmonic mean of precision and recall.\",\n",
- " }\n",
- " report.log_quantitative_analysis(\n",
- " \"performance\",\n",
- " name=name,\n",
- " value=metric.tolist(),\n",
- " description=descriptions[name],\n",
- " metric_slice=split,\n",
- " pass_fail_thresholds=0.7,\n",
- " pass_fail_threshold_fns=lambda x, threshold: bool(x >= threshold),\n",
- " )"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "We can also use the `ClassificationPlotter` to plot the performance metrics and the add the figure to the model card using the `log_plotly_figure` method."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "plotter = ClassificationPlotter(task_type=\"binary\", class_names=[\"0\", \"1\"])\n",
- "plotter.set_template(\"plotly_white\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# extracting the ROC curves and AUROC results for all the slices\n",
- "roc_curves = {\n",
- " slice_name: slice_results[\"BinaryROC\"]\n",
- " for slice_name, slice_results in results[model_name].items()\n",
- "}\n",
- "aurocs = {\n",
- " slice_name: slice_results[\"BinaryAUROC\"]\n",
- " for slice_name, slice_results in results[model_name].items()\n",
- "}\n",
- "roc_curves.keys()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# plotting the ROC curves for all the slices\n",
- "roc_plot = plotter.roc_curve_comparison(roc_curves, aurocs=aurocs)\n",
- "report.log_plotly_figure(\n",
- " fig=roc_plot,\n",
- " caption=\"ROC Curve for Female Patients\",\n",
- " section_name=\"quantitative analysis\",\n",
- ")\n",
- "roc_plot.show()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# extracting the precision-recall curves and average precision results for all the slices\n",
- "pr_curves = {\n",
- " slice_name: slice_results[\"BinaryPrecisionRecallCurve\"]\n",
- " for slice_name, slice_results in results[model_name].items()\n",
- "}\n",
- "average_precisions = {\n",
- " slice_name: slice_results[\"BinaryAveragePrecision\"]\n",
- " for slice_name, slice_results in results[model_name].items()\n",
- "}\n",
- "pr_curves.keys()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# plotting the precision-recall curves for all the slices\n",
- "pr_plot = plotter.precision_recall_curve_comparison(\n",
- " pr_curves,\n",
- " auprcs=average_precisions,\n",
- ")\n",
- "report.log_plotly_figure(\n",
- " fig=pr_plot,\n",
- " caption=\"Precision-Recall Curve Comparison\",\n",
- " section_name=\"quantitative analysis\",\n",
- ")\n",
- "pr_plot.show()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Extracting the overall classification metric values.\n",
- "overall_performance = {\n",
- " metric_name: metric_value\n",
- " for metric_name, metric_value in results[model_name][\"overall\"].items()\n",
- " if metric_name not in [\"BinaryROC\", \"BinaryPrecisionRecallCurve\"]\n",
- "}"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Plotting the overall classification metric values.\n",
- "overall_performance_plot = plotter.metrics_value(\n",
- " overall_performance,\n",
- " title=\"Overall Performance\",\n",
- ")\n",
- "report.log_plotly_figure(\n",
- " fig=overall_performance_plot,\n",
- " caption=\"Overall Performance\",\n",
- " section_name=\"quantitative analysis\",\n",
- ")\n",
- "overall_performance_plot.show()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Extracting the metric values for all the slices.\n",
- "slice_metrics = {\n",
- " slice_name: {\n",
- " metric_name: metric_value\n",
- " for metric_name, metric_value in slice_results.items()\n",
- " if metric_name not in [\"BinaryROC\", \"BinaryPrecisionRecallCurve\"]\n",
- " }\n",
- " for slice_name, slice_results in results[model_name].items()\n",
- "}"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "# Plotting the metric values for all the slices.\n",
- "slice_metrics_plot = plotter.metrics_comparison_bar(slice_metrics)\n",
- "report.log_plotly_figure(\n",
- " fig=slice_metrics_plot,\n",
- " caption=\"Slice Metric Comparison\",\n",
- " section_name=\"quantitative analysis\",\n",
- ")\n",
- "slice_metrics_plot.show()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Plotting the metric values for all the slices.\n",
- "# ROC curve components\n",
- "pred_probs = np.array(dataset_with_preds[\"preds.sgd_classifier\"])\n",
- "true_labels = np.array(dataset_with_preds[\"outcome\"])\n",
- "roc_curve = binary_roc(true_labels, pred_probs)\n",
- "ppv = np.zeros_like(roc_curve.thresholds)\n",
- "npv = np.zeros_like(roc_curve.thresholds)\n",
- "\n",
- "# Calculate PPV and NPV for each threshold\n",
- "for i, threshold in enumerate(roc_curve.thresholds):\n",
- " # Calculate PPV and NPV\n",
- " ppv[i] = binary_ppv(true_labels, pred_probs, threshold=threshold)\n",
- " npv[i] = binary_npv(true_labels, pred_probs, threshold=threshold)\n",
- "runway_plot = plotter.threshperf(roc_curve, ppv, npv, pred_probs)\n",
- "report.log_plotly_figure(\n",
- " fig=runway_plot,\n",
- " caption=\"Threshold-Performance plot\",\n",
- " section_name=\"quantitative analysis\",\n",
- ")\n",
- "runway_plot.show()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "We can also plot the calibration curve of the model on the test set"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "calibration_plot = plotter.calibration(\n",
- " prediction_df, y_true_col=\"y_true\", y_prob_col=\"y_prob\", group_col=\"Sex\"\n",
- ")\n",
- "report.log_plotly_figure(\n",
- " fig=calibration_plot,\n",
- " caption=\"Calibration plot\",\n",
- " section_name=\"quantitative analysis\",\n",
- ")\n",
- "calibration_plot.show()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Reformatting the fairness metrics\n",
- "fairness_results = copy.deepcopy(results[\"fairness\"])\n",
- "fairness_metrics = {}\n",
- "# remove the group size from the fairness results and add it to the slice name\n",
- "for slice_name, slice_results in fairness_results.items():\n",
- " group_size = slice_results.pop(\"Group Size\")\n",
- " fairness_metrics[f\"{slice_name} (Size={group_size})\"] = slice_results"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Plotting the fairness metrics\n",
- "fairness_plot = plotter.metrics_comparison_scatter(\n",
- " fairness_metrics,\n",
- " title=\"Fairness Metrics\",\n",
- ")\n",
- "report.log_plotly_figure(\n",
- " fig=fairness_plot,\n",
- " caption=\"Fairness Metrics\",\n",
- " section_name=\"fairness analysis\",\n",
- ")\n",
- "fairness_plot.show()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Report Generation\n",
- "\n",
- "Before generating the model card, let us document some of the details of the model and some considerations involved in developing and using the model.\n",
- "\n",
- "\n",
- "Let's start with populating the model details section, which includes the following fields by default:\n",
- "- description: A high-level description of the model and its usage for a general audience.\n",
- "- version: The version of the model.\n",
- "- owners: The individuals or organizations that own the model.\n",
- "- license: The license under which the model is made available.\n",
- "- citation: The citation for the model.\n",
- "- references: Links to resources that are relevant to the model.\n",
- "- path: The path to where the model is stored.\n",
- "- regulatory_requirements: The regulatory requirements that are relevant to the model.\n",
- "\n",
- "We can add additional fields to the model details section by passing a dictionary to the `log_from_dict` method and specifying the section name as `model_details`. You can also use the `log_descriptor` method to add a new field object with a `description` attribute to any section of the model card."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "report.log_from_dict(\n",
- " data={\n",
- " \"name\": \"Heart Failure Prediction Model\",\n",
- " \"description\": \"The model was trained on the Kaggle Heart Failure \\\n",
- " Prediction Dataset to predict risk of heart failure.\",\n",
- " },\n",
- " section_name=\"model_details\",\n",
- ")\n",
- "\n",
- "report.log_version(\n",
- " version_str=\"0.0.1\",\n",
- " date=str(date.today()),\n",
- " description=\"Initial Release\",\n",
- ")\n",
- "report.log_owner(\n",
- " name=\"CyclOps Team\",\n",
- " contact=\"vectorinstitute.github.io/cyclops/\",\n",
- " email=\"cyclops@vectorinstitute.ai\",\n",
- ")\n",
- "report.log_license(identifier=\"Apache-2.0\")\n",
- "report.log_reference(\n",
- " link=\"https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDClassifier.html\", # noqa: E501\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Next, let's populate the considerations section, which includes the following fields by default:\n",
- "- users: The intended users of the model.\n",
- "- use_cases: The use cases for the model. These could be primary, downstream or out-of-scope use cases.\n",
- "- fairness_assessment: A description of the benefits and harms of the model for different groups as well as the steps taken to mitigate the harms.\n",
- "- ethical_considerations: The risks associated with using the model and the steps taken to mitigate them. This can be populated using the `log_risk` method.\n",
- "\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "report.log_from_dict(\n",
- " data={\n",
- " \"users\": [\n",
- " {\"description\": \"Hospitals\"},\n",
- " {\"description\": \"Clinicians\"},\n",
- " ],\n",
- " },\n",
- " section_name=\"considerations\",\n",
- ")\n",
- "report.log_user(description=\"ML Engineers\")\n",
- "report.log_use_case(\n",
- " description=\"Predicting risk of heart failure.\",\n",
- " kind=\"primary\",\n",
- ")\n",
- "report.log_use_case(\n",
- " description=\"Predicting risk of pathologies and conditions other\\\n",
- " than heart failure.\",\n",
- " kind=\"out-of-scope\",\n",
- ")\n",
- "report.log_fairness_assessment(\n",
- " affected_group=\"sex, age\",\n",
- " benefit=\"Improved health outcomes for patients.\",\n",
- " harm=\"Biased predictions for patients in certain groups (e.g. older patients) \\\n",
- " may lead to worse health outcomes.\",\n",
- " mitigation_strategy=\"We will monitor the performance of the model on these groups \\\n",
- " and retrain the model if the performance drops below a certain threshold.\",\n",
- ")\n",
- "report.log_risk(\n",
- " risk=\"The model may be used to make decisions that affect the health of patients.\",\n",
- " mitigation_strategy=\"The model should be continuously monitored for performance \\\n",
- " and retrained if the performance drops below a certain threshold.\",\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Once the model card is populated, you can generate the report using the `export` method. The report is generated in the form of an HTML file. A JSON file containing the model card data will also be generated along with the HTML file. By default, the files will be saved in a folder named `cyclops_reports` in the current working directory. You can change the path by passing a `output_dir` argument when instantiating the `ModelCardReport` class."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "np.random.seed(42)\n",
- "\n",
- "synthetic_timestamps = pd.date_range(\n",
- " start=\"1/1/2020\", periods=10, freq=\"D\"\n",
- ").values.astype(str)\n",
- "\n",
- "\n",
- "report._model_card.overview = None\n",
- "report_path = report.export(\n",
- " output_filename=\"heart_failure_report_periodic.html\",\n",
- " synthetic_timestamp=synthetic_timestamps[0],\n",
- " last_n_evals=3,\n",
- ")\n",
- "\n",
- "shutil.copy(f\"{report_path}\", \".\")\n",
- "metric_save = None\n",
- "for i in range(len(synthetic_timestamps[1:])):\n",
- " if i == 3:\n",
- " report._model_card.quantitative_analysis.performance_metrics.append(\n",
- " metric_save,\n",
- " )\n",
- " report._model_card.overview = None\n",
- " for metric in report._model_card.quantitative_analysis.performance_metrics:\n",
- " metric.value = np.clip(\n",
- " metric.value + np.random.normal(0, 0.1),\n",
- " 0,\n",
- " 1,\n",
- " )\n",
- " metric.tests[0].passed = bool(metric.value >= 0.7)\n",
- " if i == 2:\n",
- " metrics = []\n",
- " for metric in report._model_card.quantitative_analysis.performance_metrics:\n",
- " if metric.type == \"BinaryAccuracy\" and metric.slice == \"Age:[30 - 50)\":\n",
- " metric_save = copy.deepcopy(metric)\n",
- " else:\n",
- " metrics.append(metric)\n",
- " report._model_card.quantitative_analysis.performance_metrics = metrics\n",
- " report_path = report.export(\n",
- " output_filename=\"heart_failure_report_periodic.html\",\n",
- " synthetic_timestamp=synthetic_timestamps[i + 1],\n",
- " )\n",
- " shutil.copy(f\"{report_path}\", \".\")\n",
- "shutil.rmtree(\"./cyclops_report\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "tags": []
- },
- "source": [
- "You can view the generated HTML [report](./heart_failure_report_periodic.html)."
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.10.12"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
diff --git a/api/_sources/tutorials/mimiciv/mortality_prediction.ipynb.txt b/api/_sources/tutorials/mimiciv/mortality_prediction.ipynb.txt
deleted file mode 100644
index 5ad66b0c6..000000000
--- a/api/_sources/tutorials/mimiciv/mortality_prediction.ipynb.txt
+++ /dev/null
@@ -1,1325 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Mortality Prediction\n",
- "\n",
- "This notebook showcases mortality prediction on the [MIMICIV](https://physionet.org/content/mimiciv/2.0) dataset using CyclOps. The task is formulated as a binary classification task, whether the patient will die within the next N days. The prediction can be made after M number of days after admission. For example, if N = 14 and M = 1, we are predicting risk of patient mortality within 14 days of admission after considering 24 hours of data after admission."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Import Libraries"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "\"\"\"Mortality Prediction.\"\"\"\n",
- "\n",
- "import copy\n",
- "import shutil\n",
- "from datetime import date\n",
- "\n",
- "import cycquery.ops as qo\n",
- "import numpy as np\n",
- "import pandas as pd\n",
- "import plotly.express as px\n",
- "import plotly.graph_objects as go\n",
- "from cycquery import MIMICIVQuerier\n",
- "from datasets import Dataset\n",
- "from datasets.features import ClassLabel\n",
- "from imblearn.over_sampling import SMOTE\n",
- "from imblearn.pipeline import Pipeline as ImbPipeline\n",
- "from sklearn.compose import ColumnTransformer\n",
- "from sklearn.impute import SimpleImputer\n",
- "from sklearn.pipeline import Pipeline\n",
- "from sklearn.preprocessing import MinMaxScaler, OneHotEncoder\n",
- "\n",
- "from cyclops.data.aggregate import RESTRICT_TIMESTAMP, Aggregator\n",
- "from cyclops.data.clean import normalize_names\n",
- "from cyclops.data.df.feature import TabularFeatures\n",
- "from cyclops.data.slicer import SliceSpec\n",
- "from cyclops.evaluate.fairness import FairnessConfig # noqa: E402\n",
- "from cyclops.evaluate.metrics import create_metric\n",
- "from cyclops.evaluate.metrics.experimental.metric_dict import MetricDict\n",
- "from cyclops.models.catalog import create_model\n",
- "from cyclops.report import ModelCardReport\n",
- "from cyclops.report.plot.classification import ClassificationPlotter\n",
- "from cyclops.report.utils import flatten_results_dict\n",
- "from cyclops.tasks import BinaryTabularClassificationTask\n",
- "from cyclops.utils.common import add_years_approximate"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "CyclOps offers a package for documentation of the model through a model report. The `ModelCardReport` class is used to populate and generate the model report as an HTML file. The model report has the following sections:\n",
- "\n",
- "- Overview: Provides a high level overview of how the model is doing (a quick glance of important metrics), and how it is doing over time (performance over several metrics and subgroups over time).\n",
- "- Datasets: High level statistics of the training data, including changes in distribution over time.\n",
- "- Quantitative Analysis: This section contains additional detailed performance metrics of the model for different sets of the data and subpopulations.\n",
- "- Fairness Analysis: This section contains the fairness metrics of the model.\n",
- "- Model Details: This section contains descriptive metadata about the model such as the owners, version, license, etc.\n",
- "- Model Parameters: This section contains the technical details of the model such as the model architecture, training parameters, etc.\n",
- "- Considerations: This section contains descriptions of the considerations involved in developing and using the model such as the intended use, limitations, etc.\n",
- "\n",
- "We will use this to document the model development process as we go along and generate the model report at the end.\n",
- "\n",
- "`The model report tool is a work in progress and is subject to change.`"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "report = ModelCardReport()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Constants"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "M = 1\n",
- "N = 14\n",
- "NAN_THRESHOLD = 0.25\n",
- "TRAIN_SIZE = 0.8\n",
- "RANDOM_SEED = 12"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Data Querying & Processing\n",
- "\n",
- "### Compute mortality (labels)\n",
- "\n",
- "1. Get encounters\n",
- "2. Filter out encounters less than M days\n",
- "3. Set label = 1 for encounters where deathtime is within N days after admission\n",
- "4. Get lab events\n",
- "5. Aggregate them by computing mean, merge with encounter data"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "querier = MIMICIVQuerier(\n",
- " dbms=\"postgresql\",\n",
- " port=5432,\n",
- " host=\"localhost\",\n",
- " database=\"mimiciv-2.0\",\n",
- " user=\"postgres\",\n",
- " password=\"pwd\",\n",
- ")\n",
- "\n",
- "\n",
- "def get_encounters():\n",
- " \"\"\"Get encounters data.\"\"\"\n",
- " patients = querier.patients()\n",
- " encounters = querier.mimiciv_hosp.admissions()\n",
- " drop_op = qo.Drop(\n",
- " [\"language\", \"marital_status\", \"edregtime\", \"edouttime\"],\n",
- " )\n",
- " encounters = encounters.ops(drop_op)\n",
- " patient_encounters = patients.join(encounters, on=\"subject_id\")\n",
- " patient_encounters = patient_encounters.run()\n",
- " patient_encounters[\"age\"] = (\n",
- " patient_encounters[\"admittime\"].dt.year\n",
- " - patient_encounters[\"anchor_year\"]\n",
- " + patient_encounters[\"anchor_age\"]\n",
- " )\n",
- " for col in [\"admittime\", \"dischtime\", \"deathtime\"]:\n",
- " patient_encounters[col] = add_years_approximate(\n",
- " patient_encounters[col],\n",
- " patient_encounters[\"anchor_year_difference\"],\n",
- " )\n",
- "\n",
- " return patient_encounters[\n",
- " [\n",
- " \"hadm_id\",\n",
- " \"admittime\",\n",
- " \"dischtime\",\n",
- " \"deathtime\",\n",
- " \"anchor_age\",\n",
- " \"age\",\n",
- " \"gender\",\n",
- " \"anchor_year_difference\",\n",
- " \"admission_location\",\n",
- " \"admission_type\",\n",
- " \"insurance\",\n",
- " \"hospital_expire_flag\",\n",
- " ]\n",
- " ]\n",
- "\n",
- "\n",
- "def compute_mortality_outcome(patient_encounters):\n",
- " \"\"\"Compute mortality outcome.\"\"\"\n",
- " # Drop encounters ending in death which don't have a death timestamp\n",
- " invalid = (patient_encounters[\"hospital_expire_flag\"] == 1) & (\n",
- " patient_encounters[\"deathtime\"].isna()\n",
- " )\n",
- " patient_encounters = patient_encounters[~invalid]\n",
- " print(f\"Encounters with death flag but no death timestamp: {invalid.sum()}\")\n",
- " # Drop encounters which are shorter than M days\n",
- " invalid = (\n",
- " patient_encounters[\"dischtime\"] - patient_encounters[\"admittime\"]\n",
- " ).dt.days < M\n",
- " patient_encounters = patient_encounters[~invalid]\n",
- " print(f\"Encounters shorter than {M} days: {invalid.sum()}\")\n",
- " # Death timestamp is within (<=) N days of admission\n",
- " valid = (\n",
- " patient_encounters[\"deathtime\"] - patient_encounters[\"admittime\"]\n",
- " ).dt.days <= N\n",
- " print(f\"Encounters with death timestamp within {N} days: {valid.sum()}\")\n",
- " # (Died in hospital) & (Death timestamp is defined)\n",
- " print(len(patient_encounters))\n",
- " patient_encounters[\"mortality_outcome\"] = pd.Series(\n",
- " [0] * len(patient_encounters),\n",
- " index=patient_encounters.index,\n",
- " dtype=\"int64[pyarrow]\",\n",
- " )\n",
- " patient_encounters.loc[valid, \"mortality_outcome\"] = 1\n",
- " print(\n",
- " f\"Encounters with mortality outcome for the model: {patient_encounters['mortality_outcome'].sum()}\",\n",
- " )\n",
- "\n",
- " return patient_encounters\n",
- "\n",
- "\n",
- "def get_labevents(patient_encounters):\n",
- " \"\"\"Get labevents data.\"\"\"\n",
- " labevents = querier.labevents().run(index_col=\"hadm_id\", batch_mode=True)\n",
- "\n",
- " def process_labevents(labevents, patient_encounters):\n",
- " \"\"\"Process labevents before aggregation.\"\"\"\n",
- " # Reverse deidentified dating\n",
- " labevents = pd.merge(\n",
- " patient_encounters[\n",
- " [\n",
- " \"hadm_id\",\n",
- " \"anchor_year_difference\",\n",
- " ]\n",
- " ],\n",
- " labevents,\n",
- " on=\"hadm_id\",\n",
- " )\n",
- " labevents[\"charttime\"] = add_years_approximate(\n",
- " labevents[\"charttime\"],\n",
- " labevents[\"anchor_year_difference\"],\n",
- " )\n",
- " labevents = labevents.drop(\"anchor_year_difference\", axis=1)\n",
- " # Pre-processing\n",
- " labevents[\"label\"] = normalize_names(labevents[\"label\"])\n",
- " labevents[\"category\"] = normalize_names(labevents[\"category\"])\n",
- "\n",
- " return labevents\n",
- "\n",
- " start_timestamps = (\n",
- " patient_encounters[[\"hadm_id\", \"admittime\"]]\n",
- " .set_index(\"hadm_id\")\n",
- " .rename({\"admittime\": RESTRICT_TIMESTAMP}, axis=1)\n",
- " )\n",
- " mean_aggregator = Aggregator(\n",
- " aggfuncs={\n",
- " \"valuenum\": \"mean\",\n",
- " },\n",
- " window_duration=M * 24,\n",
- " window_start_time=start_timestamps,\n",
- " timestamp_col=\"charttime\",\n",
- " time_by=\"hadm_id\",\n",
- " agg_by=[\"hadm_id\", \"label\"],\n",
- " )\n",
- " means_df = pd.DataFrame()\n",
- " for batch_num, labevents_batch in enumerate(labevents):\n",
- " labevents_batch = process_labevents( # noqa: PLW2901\n",
- " labevents_batch,\n",
- " patient_encounters,\n",
- " )\n",
- " means = mean_aggregator.fit_transform(\n",
- " labevents_batch,\n",
- " )\n",
- " means = means.reset_index()\n",
- " means = means.pivot(index=\"hadm_id\", columns=\"label\", values=\"valuenum\")\n",
- " means = means.add_prefix(\"lab_\")\n",
- " means = pd.merge(\n",
- " patient_encounters[\n",
- " [\n",
- " \"hadm_id\",\n",
- " \"mortality_outcome\",\n",
- " \"age\",\n",
- " \"gender\",\n",
- " \"admission_location\",\n",
- " ]\n",
- " ],\n",
- " means,\n",
- " on=\"hadm_id\",\n",
- " )\n",
- " means_df = pd.concat([means_df, means])\n",
- " if batch_num == 2:\n",
- " break\n",
- " print(\"Processing batch {}\".format(batch_num + 1))\n",
- "\n",
- " return means_df\n",
- "\n",
- "\n",
- "def run_query():\n",
- " \"\"\"Run query.\"\"\"\n",
- " cohort = get_encounters()\n",
- " cohort = compute_mortality_outcome(cohort)\n",
- "\n",
- " return get_labevents(cohort)\n",
- "\n",
- "\n",
- "cohort = run_query()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Data Inspection and Preprocessing\n",
- "\n",
- "### Drop NaNs based on the `NAN_THRESHOLD`"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "null_counts = cohort.isnull().sum()[cohort.isnull().sum() > 0]\n",
- "fig = go.Figure(data=[go.Bar(x=null_counts.index, y=null_counts.values)])\n",
- "\n",
- "fig.update_layout(\n",
- " title=\"Number of Null Values per Column\",\n",
- " xaxis_title=\"Columns\",\n",
- " yaxis_title=\"Number of Null Values\",\n",
- " height=600,\n",
- ")\n",
- "\n",
- "fig.show()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "**Add the figure to the report**\n",
- "\n",
- "We can use the log_plotly_figure method to add the figure to a section of the report. One can specify whether the figure should be interactive or not by setting the `interactive` parameter to `True` or `False` respectively. The default value is `True`. This\n",
- "also affects the final size of the report. If the figure is interactive, the size of the report will be larger than if the figure is not interactive. "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "report.log_plotly_figure(\n",
- " fig=fig,\n",
- " caption=\"Number of Null Values per Column\",\n",
- " section_name=\"datasets\",\n",
- " interactive=True,\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "thresh_nan = int(NAN_THRESHOLD * len(cohort))\n",
- "cohort = cohort.dropna(axis=1, thresh=thresh_nan)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Outcome distribution"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "fig = px.pie(cohort, names=\"mortality_outcome\")\n",
- "fig.update_traces(textinfo=\"percent+label\")\n",
- "fig.update_layout(title_text=\"Outcome Distribution\")\n",
- "fig.update_traces(\n",
- " hovertemplate=\"Outcome: %{label} Count: \\\n",
- " %{value} Percent: %{percent}\",\n",
- ")\n",
- "fig.show()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "**Add the figure to the report**"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "report.log_plotly_figure(\n",
- " fig=fig,\n",
- " caption=\"Outcome Distribution\",\n",
- " section_name=\"datasets\",\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# The data is heavily unbalanced.\n",
- "class_counts = cohort[\"mortality_outcome\"].value_counts()\n",
- "class_ratio = class_counts[0] / class_counts[1]\n",
- "print(class_ratio, class_counts)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Gender distribution"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "fig = px.pie(cohort, names=\"gender\")\n",
- "fig.update_layout(\n",
- " title=\"Gender Distribution\",\n",
- ")\n",
- "fig.show()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "**Add the figure to the report**"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "report.log_plotly_figure(\n",
- " fig=fig,\n",
- " caption=\"Gender Distribution\",\n",
- " section_name=\"datasets\",\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Age distribution"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "fig = px.histogram(cohort, x=\"age\")\n",
- "fig.update_layout(\n",
- " title=\"Age Distribution\",\n",
- " xaxis_title=\"Age\",\n",
- " yaxis_title=\"Count\",\n",
- " bargap=0.2,\n",
- ")\n",
- "fig.show()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "**Add the figure to the report**"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "report.log_plotly_figure(\n",
- " fig=fig,\n",
- " caption=\"Age Distribution\",\n",
- " section_name=\"datasets\",\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Identifying feature types\n",
- "\n",
- "Cyclops `TabularFeatures` class helps to identify feature types, an essential step before preprocessing the data. Understanding feature types (numerical/categorical/binary) allows us to apply appropriate preprocessing steps for each type."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "features_list = set(cohort.columns.tolist()) - {\"hadm_id\", \"mortality_outcome\"}\n",
- "features_list = sorted(features_list)\n",
- "tab_features = TabularFeatures(\n",
- " data=cohort.reset_index(),\n",
- " features=features_list,\n",
- " by=\"hadm_id\",\n",
- " targets=\"mortality_outcome\",\n",
- ")\n",
- "print(tab_features.types)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Creating data preprocessors\n",
- "\n",
- "We create a data preprocessor using sklearn's ColumnTransformer. This helps in applying different preprocessing steps to different columns in the dataframe. For instance, binary features might be processed differently from numeric features."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "numeric_transformer = Pipeline(\n",
- " steps=[(\"imputer\", SimpleImputer(strategy=\"mean\")), (\"scaler\", MinMaxScaler())],\n",
- ")\n",
- "binary_transformer = Pipeline(\n",
- " steps=[(\"imputer\", SimpleImputer(strategy=\"most_frequent\"))],\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "numeric_features = sorted((tab_features.features_by_type(\"numeric\")))\n",
- "numeric_indices = [\n",
- " cohort[features_list].columns.get_loc(column) for column in numeric_features\n",
- "]\n",
- "print(numeric_features)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "binary_features = sorted(tab_features.features_by_type(\"binary\"))\n",
- "ordinal_features = sorted(tab_features.features_by_type(\"ordinal\"))\n",
- "binary_features.remove(\"mortality_outcome\")\n",
- "binary_indices = [\n",
- " cohort[features_list].columns.get_loc(column) for column in binary_features\n",
- "]\n",
- "ordinal_indices = [\n",
- " cohort[features_list].columns.get_loc(column) for column in ordinal_features\n",
- "]\n",
- "print(binary_features, ordinal_features)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "preprocessor = ColumnTransformer(\n",
- " transformers=[\n",
- " (\"num\", numeric_transformer, numeric_indices),\n",
- " (\n",
- " \"onehot\",\n",
- " OneHotEncoder(handle_unknown=\"ignore\"),\n",
- " binary_indices + ordinal_indices,\n",
- " ),\n",
- " ],\n",
- " remainder=\"passthrough\",\n",
- ")\n",
- "preprocessor_pipeline = [\n",
- " (\"preprocessor\", preprocessor),\n",
- " (\"oversampling\", SMOTE(random_state=RANDOM_SEED)),\n",
- "]\n",
- "preprocessor_pipeline = ImbPipeline(preprocessor_pipeline)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Creating Hugging Face Dataset\n",
- "\n",
- "We convert our processed Pandas dataframe into a Hugging Face dataset, a powerful and easy-to-use data format which is also compatible with CyclOps models and evaluator modules. The dataset is then split to train and test sets."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "cohort = cohort.drop(columns=[\"hadm_id\"])\n",
- "dataset = Dataset.from_pandas(cohort)\n",
- "dataset.cleanup_cache_files()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "dataset = dataset.cast_column(\"mortality_outcome\", ClassLabel(num_classes=2))\n",
- "dataset = dataset.train_test_split(\n",
- " train_size=TRAIN_SIZE,\n",
- " stratify_by_column=\"mortality_outcome\",\n",
- " seed=RANDOM_SEED,\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Model Creation\n",
- "\n",
- "CyclOps model registry allows for straightforward creation and selection of models. This registry maintains a list of pre-configured models, which can be instantiated with a single line of code. Here we use a XGBoost classifier to fit a logisitic regression model. The model configurations can be passed to `create_model` based on the parameters for XGBClassifier."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "model_name = \"xgb_classifier\"\n",
- "model = create_model(model_name, random_state=123)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Task Creation\n",
- "\n",
- "We use Cyclops tasks to define our model's task (in this case, BinaryTabularClassificationTask), train the model, make predictions, and evaluate performance. Cyclops task classes encapsulate the entire ML pipeline into a single, cohesive structure, making the process smooth and easy to manage."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "mortality_task = BinaryTabularClassificationTask(\n",
- " {model_name: model},\n",
- " task_features=features_list,\n",
- " task_target=\"mortality_outcome\",\n",
- ")\n",
- "mortality_task.list_models()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Training\n",
- "\n",
- "If `best_model_params` is passed to the `train` method, the best model will be selected after the hyperparameter search. The parameters in `best_model_params` indicate the values to create the parameters grid.\n",
- "\n",
- "Note that the data preprocessor needs to be passed to the tasks methods if the Hugging Face dataset is not already preprocessed. "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "best_model_params = {\n",
- " \"n_estimators\": [100, 250, 500],\n",
- " \"learning_rate\": [0.1, 0.01],\n",
- " \"max_depth\": [2, 5],\n",
- " \"reg_lambda\": [0, 1, 10],\n",
- " \"colsample_bytree\": [0.7, 0.8, 1],\n",
- " \"gamma\": [0, 1, 2, 10],\n",
- " \"method\": \"random\",\n",
- "}\n",
- "mortality_task.train(\n",
- " dataset[\"train\"],\n",
- " model_name=model_name,\n",
- " transforms=preprocessor_pipeline,\n",
- " best_model_params=best_model_params,\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "model_params = mortality_task.list_models_params()[model_name]\n",
- "print(model_params)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "**Log the model parameters to the report.**\n",
- "\n",
- "We can add model parameters to the model card using the `log_model_parameters` method."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "report.log_model_parameters(params=model_params)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Prediction\n",
- "\n",
- "The prediction output can be either the whole Hugging Face dataset with the prediction columns added to it or the single column containing the predicted values."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "y_pred = mortality_task.predict(\n",
- " dataset[\"test\"],\n",
- " model_name=model_name,\n",
- " transforms=preprocessor,\n",
- " proba=False,\n",
- " only_predictions=True,\n",
- ")\n",
- "print(len(y_pred))"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Evaluation\n",
- "\n",
- "Evaluation is done using various evaluation metrics that provide different perspectives on the model's predictive abilities i.e. standard performance metrics and fairness metrics.\n",
- "\n",
- "The standard performance metrics can be created using the `MetricDict` object."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "metric_names = [\n",
- " \"binary_accuracy\",\n",
- " \"binary_precision\",\n",
- " \"binary_recall\",\n",
- " \"binary_f1_score\",\n",
- " \"binary_auroc\",\n",
- " \"binary_average_precision\",\n",
- " \"binary_roc_curve\",\n",
- " \"binary_precision_recall_curve\",\n",
- "]\n",
- "metrics = [\n",
- " create_metric(metric_name, experimental=True) for metric_name in metric_names\n",
- "]\n",
- "metric_collection = MetricDict(metrics)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "In addition to overall metrics, it might be interesting to see how the model performs on certain subpopulations. We can define these subpopulations using `SliceSpec` objects. "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "spec_list = [\n",
- " {\n",
- " \"age\": {\n",
- " \"min_value\": 20,\n",
- " \"max_value\": 50,\n",
- " \"min_inclusive\": True,\n",
- " \"max_inclusive\": False,\n",
- " },\n",
- " },\n",
- " {\n",
- " \"age\": {\n",
- " \"min_value\": 50,\n",
- " \"max_value\": 80,\n",
- " \"min_inclusive\": True,\n",
- " \"max_inclusive\": False,\n",
- " },\n",
- " },\n",
- " {\"gender\": {\"value\": \"M\"}},\n",
- " {\"gender\": {\"value\": \"F\"}},\n",
- "]\n",
- "slice_spec = SliceSpec(spec_list)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "A `MetricDict` can also be defined for the fairness metrics."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "specificity = create_metric(metric_name=\"binary_specificity\", experimental=True)\n",
- "sensitivity = create_metric(metric_name=\"binary_sensitivity\", experimental=True)\n",
- "fpr = -specificity + 1 # __rsub__ is not implemented for metrics\n",
- "fnr = -sensitivity + 1\n",
- "ber = (fpr + fnr) / 2\n",
- "fairness_metric_collection = MetricDict(\n",
- " {\n",
- " \"Sensitivity\": sensitivity,\n",
- " \"Specificity\": specificity,\n",
- " \"BER\": ber,\n",
- " },\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "The FairnessConfig helps in setting up and evaluating the fairness of the model predictions."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "fairness_config = FairnessConfig(\n",
- " metrics=fairness_metric_collection,\n",
- " dataset=None, # dataset is passed from the evaluator\n",
- " target_columns=None, # target columns are passed from the evaluator\n",
- " groups=[\"gender\", \"age\"],\n",
- " group_bins={\"age\": [20, 40]},\n",
- " group_base_values={\"age\": 40, \"gender\": \"M\"},\n",
- " thresholds=[0.5],\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "The evaluate methods outputs the evaluation results and the Hugging Face dataset with the predictions added to it."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "results, dataset_with_preds = mortality_task.evaluate(\n",
- " dataset[\"test\"],\n",
- " metric_collection,\n",
- " model_names=model_name,\n",
- " transforms=preprocessor,\n",
- " prediction_column_prefix=\"preds\",\n",
- " slice_spec=slice_spec,\n",
- " batch_size=-1,\n",
- " fairness_config=fairness_config,\n",
- " override_fairness_metrics=False,\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "**Log the performance metrics to the report.**\n",
- "\n",
- "We can add a performance metric to the model card using the `log_performance_metric` method, which expects a dictionary where the keys are in the following format: `slice_name/metric_name`. For instance, `overall/accuracy`. \n",
- "\n",
- "We first need to process the evaluation results to get the metrics in the right format."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "model_name = f\"model_for_preds.{model_name}\"\n",
- "results_flat = flatten_results_dict(\n",
- " results=results,\n",
- " remove_metrics=[\"BinaryROC\", \"BinaryPrecisionRecallCurve\"],\n",
- " model_name=model_name,\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# ruff: noqa: W505\n",
- "for name, metric in results_flat.items():\n",
- " split, name = name.split(\"/\") # noqa: PLW2901\n",
- " descriptions = {\n",
- " \"BinaryPrecision\": \"The proportion of predicted positive instances that are correctly predicted.\",\n",
- " \"BinaryRecall\": \"The proportion of actual positive instances that are correctly predicted. Also known as recall or true positive rate.\",\n",
- " \"BinaryAccuracy\": \"The proportion of all instances that are correctly predicted.\",\n",
- " \"BinaryAUROC\": \"The area under the receiver operating characteristic curve (AUROC) is a measure of the performance of a binary classification model.\",\n",
- " \"BinaryAveragePrecision\": \"The area under the precision-recall curve (AUPRC) is a measure of the performance of a binary classification model.\",\n",
- " \"BinaryF1Score\": \"The harmonic mean of precision and recall.\",\n",
- " }\n",
- " report.log_quantitative_analysis(\n",
- " \"performance\",\n",
- " name=name,\n",
- " value=metric.tolist(),\n",
- " description=descriptions[name],\n",
- " metric_slice=split,\n",
- " pass_fail_thresholds=0.7,\n",
- " pass_fail_threshold_fns=lambda x, threshold: bool(x >= threshold),\n",
- " )"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "We can also use the `ClassificationPlotter` to plot the performance metrics and the add the figure to the model card using the `log_plotly_figure` method."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "plotter = ClassificationPlotter(task_type=\"binary\", class_names=[\"0\", \"1\"])\n",
- "plotter.set_template(\"plotly_white\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# extracting the ROC curves and AUROC results for all the slices\n",
- "roc_curves = {\n",
- " slice_name: slice_results[\"BinaryROC\"]\n",
- " for slice_name, slice_results in results[model_name].items()\n",
- "}\n",
- "aurocs = {\n",
- " slice_name: slice_results[\"BinaryAUROC\"]\n",
- " for slice_name, slice_results in results[model_name].items()\n",
- "}\n",
- "roc_curves.keys()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# extracting the precision-recall curves and average precision results for all the slices\n",
- "pr_curves = {\n",
- " slice_name: slice_results[\"BinaryPrecisionRecallCurve\"]\n",
- " for slice_name, slice_results in results[model_name].items()\n",
- "}\n",
- "average_precisions = {\n",
- " slice_name: slice_results[\"BinaryAveragePrecision\"]\n",
- " for slice_name, slice_results in results[model_name].items()\n",
- "}\n",
- "pr_curves.keys()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# plotting the ROC curves for all the slices\n",
- "roc_plot = plotter.roc_curve_comparison(roc_curves, aurocs=aurocs)\n",
- "report.log_plotly_figure(\n",
- " fig=roc_plot,\n",
- " caption=\"ROC Curve Comparison\",\n",
- " section_name=\"quantitative analysis\",\n",
- ")\n",
- "roc_plot.show()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# plotting the precision-recall curves for all the slices\n",
- "pr_plot = plotter.precision_recall_curve_comparison(\n",
- " pr_curves,\n",
- " auprcs=average_precisions,\n",
- ")\n",
- "report.log_plotly_figure(\n",
- " fig=pr_plot,\n",
- " caption=\"Precision-Recall Curve Comparison\",\n",
- " section_name=\"quantitative analysis\",\n",
- ")\n",
- "pr_plot.show()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Extracting the overall classification metric values.\n",
- "overall_performance = {\n",
- " metric_name: metric_value\n",
- " for metric_name, metric_value in results[model_name][\"overall\"].items()\n",
- " if metric_name not in [\"BinaryROC\", \"BinaryPrecisionRecallCurve\"]\n",
- "}"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Plotting the overall classification metric values.\n",
- "overall_performance_plot = plotter.metrics_value(\n",
- " overall_performance,\n",
- " title=\"Overall Performance\",\n",
- ")\n",
- "report.log_plotly_figure(\n",
- " fig=overall_performance_plot,\n",
- " caption=\"Overall Performance\",\n",
- " section_name=\"quantitative analysis\",\n",
- ")\n",
- "overall_performance_plot.show()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Extracting the metric values for all the slices.\n",
- "slice_metrics = {\n",
- " slice_name: {\n",
- " metric_name: metric_value\n",
- " for metric_name, metric_value in slice_results.items()\n",
- " if metric_name not in [\"BinaryROC\", \"BinaryPrecisionRecallCurve\"]\n",
- " }\n",
- " for slice_name, slice_results in results[model_name].items()\n",
- "}"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Plotting the metric values for all the slices.\n",
- "slice_metrics_plot = plotter.metrics_comparison_bar(slice_metrics)\n",
- "report.log_plotly_figure(\n",
- " fig=slice_metrics_plot,\n",
- " caption=\"Slice Metric Comparison\",\n",
- " section_name=\"quantitative analysis\",\n",
- ")\n",
- "slice_metrics_plot.show()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Reformatting the fairness metrics\n",
- "fairness_results = copy.deepcopy(results[\"fairness\"])\n",
- "fairness_metrics = {}\n",
- "# remove the group size from the fairness results and add it to the slice name\n",
- "for slice_name, slice_results in fairness_results.items():\n",
- " group_size = slice_results.pop(\"Group Size\")\n",
- " fairness_metrics[f\"{slice_name} (Size={group_size})\"] = slice_results"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Plotting the fairness metrics\n",
- "fairness_plot = plotter.metrics_comparison_scatter(\n",
- " fairness_metrics,\n",
- " title=\"Fairness Metrics\",\n",
- ")\n",
- "report.log_plotly_figure(\n",
- " fig=fairness_plot,\n",
- " caption=\"Fairness Metrics\",\n",
- " section_name=\"fairness analysis\",\n",
- ")\n",
- "fairness_plot.show()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Report Generation\n",
- "\n",
- "Before generating the model card, let us document some of the details of the model and some considerations involved in developing and using the model.\n",
- "\n",
- "\n",
- "Let's start with populating the model details section, which includes the following fields by default:\n",
- "- description: A high-level description of the model and its usage for a general audience.\n",
- "- version: The version of the model.\n",
- "- owners: The individuals or organizations that own the model.\n",
- "- license: The license under which the model is made available.\n",
- "- citation: The citation for the model.\n",
- "- references: Links to resources that are relevant to the model.\n",
- "- path: The path to where the model is stored.\n",
- "- regulatory_requirements: The regulatory requirements that are relevant to the model.\n",
- "\n",
- "We can add additional fields to the model details section by passing a dictionary to the `log_from_dict` method and specifying the section name as `model_details`. You can also use the `log_descriptor` method to add a new field object with a `description` attribute to any section of the model card."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "report.log_from_dict(\n",
- " data={\n",
- " \"name\": \"Mortality Prediction Model\",\n",
- " \"description\": \"The model was trained on the MIMICIV dataset \\\n",
- " to predict risk of in-hospital mortality.\",\n",
- " },\n",
- " section_name=\"model_details\",\n",
- ")\n",
- "report.log_version(\n",
- " version_str=\"0.0.1\",\n",
- " date=str(date.today()),\n",
- " description=\"Initial Release\",\n",
- ")\n",
- "report.log_owner(\n",
- " name=\"CyclOps Team\",\n",
- " contact=\"vectorinstitute.github.io/cyclops/\",\n",
- " email=\"cyclops@vectorinstitute.ai\",\n",
- ")\n",
- "report.log_license(identifier=\"Apache-2.0\")\n",
- "report.log_reference(\n",
- " link=\"https://xgboost.readthedocs.io/en/stable/python/python_api.html\", # noqa: E501\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Next, let's populate the considerations section, which includes the following fields by default:\n",
- "- users: The intended users of the model.\n",
- "- use_cases: The use cases for the model. These could be primary, downstream or out-of-scope use cases.\n",
- "- fairness_assessment: A description of the benefits and harms of the model for different groups as well as the steps taken to mitigate the harms.\n",
- "- ethical_considerations: The risks associated with using the model and the steps taken to mitigate them. This can be populated using the `log_risk` method.\n",
- "\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "report.log_from_dict(\n",
- " data={\n",
- " \"users\": [\n",
- " {\"description\": \"Hospitals\"},\n",
- " {\"description\": \"Clinicians\"},\n",
- " ],\n",
- " },\n",
- " section_name=\"considerations\",\n",
- ")\n",
- "report.log_user(description=\"ML Engineers\")\n",
- "report.log_use_case(\n",
- " description=\"Predicting prolonged length of stay\",\n",
- " kind=\"primary\",\n",
- ")\n",
- "report.log_fairness_assessment(\n",
- " affected_group=\"sex, age\",\n",
- " benefit=\"Improved health outcomes for patients.\",\n",
- " harm=\"Biased predictions for patients in certain groups (e.g. older patients) \\\n",
- " may lead to worse health outcomes.\",\n",
- " mitigation_strategy=\"We will monitor the performance of the model on these groups \\\n",
- " and retrain the model if the performance drops below a certain threshold.\",\n",
- ")\n",
- "report.log_risk(\n",
- " risk=\"The model may be used to make decisions that affect the health of patients.\",\n",
- " mitigation_strategy=\"The model should be continuously monitored for performance \\\n",
- " and retrained if the performance drops below a certain threshold.\",\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Once the model card is populated, you can generate the report using the `export` method. The report is generated in the form of an HTML file. A JSON file containing the model card data will also be generated along with the HTML file. By default, the files will be saved in a folder named `cyclops_reports` in the current working directory. You can change the path by passing a `output_dir` argument when instantiating the `ModelCardReport` class."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "synthetic_timestamps = [\n",
- " \"2021-09-01\",\n",
- " \"2021-10-01\",\n",
- " \"2021-11-01\",\n",
- " \"2021-12-01\",\n",
- " \"2022-01-01\",\n",
- "]\n",
- "report._model_card.overview = None\n",
- "report_path = report.export(\n",
- " output_filename=\"mortality_report_periodic.html\",\n",
- " synthetic_timestamp=synthetic_timestamps[0],\n",
- ")\n",
- "shutil.copy(f\"{report_path}\", \".\")\n",
- "for i in range(4):\n",
- " report._model_card.overview = None\n",
- " for metric in report._model_card.quantitative_analysis.performance_metrics:\n",
- " metric.value = np.clip(\n",
- " metric.value + np.random.normal(0, 0.1),\n",
- " 0,\n",
- " 1,\n",
- " )\n",
- " metric.tests[0].passed = bool(metric.value >= 0.7)\n",
- " report_path = report.export(\n",
- " output_filename=\"mortality_report_periodic.html\",\n",
- " synthetic_timestamp=synthetic_timestamps[i + 1],\n",
- " )\n",
- " shutil.copy(f\"{report_path}\", \".\")\n",
- "shutil.rmtree(\"./cyclops_report\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "You can view the generated HTML [report](./mortality_report_periodic.html)."
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.10.12"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
diff --git a/api/_sources/tutorials/nihcxr/cxr_classification.ipynb.txt b/api/_sources/tutorials/nihcxr/cxr_classification.ipynb.txt
deleted file mode 100644
index e2cbed78e..000000000
--- a/api/_sources/tutorials/nihcxr/cxr_classification.ipynb.txt
+++ /dev/null
@@ -1,611 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "id": "ea16a542",
- "metadata": {},
- "source": [
- "# Chest X-Ray Disease Classification\n",
- "\n",
- "This notebook shows chest x-ray classification on the [NIH dataset](https://www.nih.gov/news-events/news-releases/nih-clinical-center-provides-one-largest-publicly-available-chest-x-ray-datasets-scientific-community) using a pretrained model from the TorchXRayVision library and CyclOps to generate a model card."
- ]
- },
- {
- "cell_type": "markdown",
- "id": "fb698a85",
- "metadata": {},
- "source": [
- "### Import Libraries"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "fc1eb72a",
- "metadata": {},
- "outputs": [],
- "source": [
- "\"\"\"Chest X-ray Disease Classification.\"\"\"\n",
- "\n",
- "import shutil\n",
- "from functools import partial\n",
- "\n",
- "import numpy as np\n",
- "import plotly.express as px\n",
- "from torchvision.transforms import Compose\n",
- "from torchxrayvision.models import DenseNet\n",
- "\n",
- "from cyclops.data.loader import load_nihcxr\n",
- "from cyclops.data.slicer import (\n",
- " SliceSpec,\n",
- " filter_value, # noqa: E402\n",
- ")\n",
- "from cyclops.data.transforms import Lambdad, Resized\n",
- "from cyclops.data.utils import apply_transforms\n",
- "from cyclops.evaluate import evaluator\n",
- "from cyclops.evaluate.metrics.factory import create_metric\n",
- "from cyclops.models.wrappers import PTModel\n",
- "from cyclops.report import ModelCardReport"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "ac67f0e7",
- "metadata": {},
- "source": [
- "## Generate Historical Reports"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "25c2a16f",
- "metadata": {
- "nbsphinx": "hidden"
- },
- "outputs": [],
- "source": [
- "\"\"\"Generate historical reports with validation data\n",
- "for comparison with periodic report on test data.\"\"\"\n",
- "\n",
- "!python3 generate_nihcxr_report.py --synthetic_timestamp \"2023-10-19\" --seed 43\n",
- "!python3 generate_nihcxr_report.py --synthetic_timestamp \"2023-10-16\" --seed 44\n",
- "!python3 generate_nihcxr_report.py --synthetic_timestamp \"2023-10-22\" --seed 45\n",
- "!python3 generate_nihcxr_report.py --synthetic_timestamp \"2023-10-30\" --seed 46"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "198896c7",
- "metadata": {},
- "source": [
- "CyclOps offers a package for documentation of the model through a model report. The `ModelCardReport` class is used to populate and generate the model report as an HTML file. The model report has the following sections:\n",
- "\n",
- "- Overview: Provides a high level overview of how the model is doing (a quick glance of important metrics), and how it is doing over time (performance over several metrics and subgroups over time).\n",
- "- Datasets: High level statistics of the training data, including changes in distribution over time.\n",
- "- Quantitative Analysis: This section contains additional detailed performance metrics of the model for different sets of the data and subpopulations.\n",
- "- Fairness Analysis: This section contains the fairness metrics of the model.\n",
- "- Model Details: This section contains descriptive metadata about the model such as the owners, version, license, etc.\n",
- "- Model Parameters: This section contains the technical details of the model such as the model architecture, training parameters, etc.\n",
- "- Considerations: This section contains descriptions of the considerations involved in developing and using the model such as the intended use, limitations, etc.\n",
- "\n",
- "We will use this to document the model development process as we go along and generate the model report at the end.\n",
- "\n",
- "`The model report tool is a work in progress and is subject to change.`"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "0ee175a9",
- "metadata": {},
- "source": [
- "## Initialize Periodic Report"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "03edf1c0",
- "metadata": {},
- "outputs": [],
- "source": [
- "report = ModelCardReport()"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "4f8e8915",
- "metadata": {},
- "source": [
- "### Load Dataset"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "6514120e",
- "metadata": {},
- "outputs": [],
- "source": [
- "data_dir = \"/mnt/data/clinical_datasets/NIHCXR\"\n",
- "nih_ds = load_nihcxr(data_dir)[\"test\"]\n",
- "nih_ds = nih_ds.select(range(1000))\n",
- "\n",
- "transforms = Compose(\n",
- " [\n",
- " Resized(\n",
- " keys=(\"image\",),\n",
- " spatial_size=(224, 224),\n",
- " allow_missing_keys=True,\n",
- " ),\n",
- " Lambdad(\n",
- " keys=(\"image\",),\n",
- " func=lambda x: ((2 * (x / 255.0)) - 1.0) * 1024,\n",
- " allow_missing_keys=True,\n",
- " ),\n",
- " Lambdad(\n",
- " keys=(\"image\",),\n",
- " func=lambda x: x[0][np.newaxis, :] if x.shape[0] != 1 else x,\n",
- " allow_missing_keys=True,\n",
- " ),\n",
- " ],\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "c7c9172a",
- "metadata": {},
- "source": [
- "## Model Creation"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "5f624ed4",
- "metadata": {},
- "outputs": [],
- "source": [
- "model = PTModel(DenseNet(weights=\"densenet121-res224-nih\"))\n",
- "\n",
- "model.initialize()\n",
- "nih_ds = model.predict(\n",
- " nih_ds,\n",
- " feature_columns=[\"image\"],\n",
- " transforms=partial(apply_transforms, transforms=transforms),\n",
- " model_name=\"densenet\",\n",
- ")\n",
- "\n",
- "# remove any rows with No Finding == 1\n",
- "nih_ds = nih_ds.filter(\n",
- " partial(filter_value, column_name=\"No Finding\", value=1, negate=True),\n",
- " batched=True,\n",
- ")\n",
- "\n",
- "# remove the No Finding column and adjust the predictions to account for it\n",
- "nih_ds = nih_ds.map(\n",
- " lambda x: {\n",
- " \"predictions.densenet\": x[\"predictions.densenet\"][:14],\n",
- " },\n",
- " remove_columns=[\"No Finding\"],\n",
- ")\n",
- "print(nih_ds.features)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "9743a047",
- "metadata": {},
- "source": [
- "### Multilabel AUROC by Pathology and Sex"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "bff27cc1",
- "metadata": {},
- "outputs": [],
- "source": [
- "pathologies = model.model.pathologies[:14]\n",
- "\n",
- "# define the slices\n",
- "slices = [\n",
- " {\"Patient Gender\": {\"value\": \"M\"}},\n",
- " {\"Patient Gender\": {\"value\": \"F\"}},\n",
- "]\n",
- "\n",
- "\n",
- "num_labels = len(pathologies)\n",
- "ppv = create_metric(\n",
- " metric_name=\"multilabel_ppv\",\n",
- " experimental=True,\n",
- " num_labels=num_labels,\n",
- " average=None,\n",
- ")\n",
- "\n",
- "npv = create_metric(\n",
- " metric_name=\"multilabel_npv\",\n",
- " experimental=True,\n",
- " num_labels=num_labels,\n",
- " average=None,\n",
- ")\n",
- "\n",
- "specificity = create_metric(\n",
- " metric_name=\"multilabel_specificity\",\n",
- " experimental=True,\n",
- " num_labels=num_labels,\n",
- " average=None,\n",
- ")\n",
- "\n",
- "sensitivity = create_metric(\n",
- " metric_name=\"multilabel_sensitivity\",\n",
- " experimental=True,\n",
- " num_labels=num_labels,\n",
- " average=None,\n",
- ")\n",
- "\n",
- "# create the slice functions\n",
- "slice_spec = SliceSpec(spec_list=slices)\n",
- "\n",
- "nih_eval_results_gender = evaluator.evaluate(\n",
- " dataset=nih_ds,\n",
- " metrics=[ppv, npv, sensitivity, specificity],\n",
- " target_columns=pathologies,\n",
- " prediction_columns=\"predictions.densenet\",\n",
- " ignore_columns=\"image\",\n",
- " slice_spec=slice_spec,\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "88448ced",
- "metadata": {},
- "source": [
- "### Multilabel AUROC by Pathology and Age"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "8c38ef9e",
- "metadata": {},
- "outputs": [],
- "source": [
- "# define the slices\n",
- "slices = [\n",
- " {\"Patient Age\": {\"min_value\": 19, \"max_value\": 35}},\n",
- " {\"Patient Age\": {\"min_value\": 35, \"max_value\": 65}},\n",
- " {\"Patient Age\": {\"min_value\": 65, \"max_value\": 100}},\n",
- " {\n",
- " \"Patient Age\": {\"min_value\": 19, \"max_value\": 35},\n",
- " \"Patient Gender\": {\"value\": \"M\"},\n",
- " },\n",
- " {\n",
- " \"Patient Age\": {\"min_value\": 19, \"max_value\": 35},\n",
- " \"Patient Gender\": {\"value\": \"F\"},\n",
- " },\n",
- " {\n",
- " \"Patient Age\": {\"min_value\": 35, \"max_value\": 65},\n",
- " \"Patient Gender\": {\"value\": \"M\"},\n",
- " },\n",
- " {\n",
- " \"Patient Age\": {\"min_value\": 35, \"max_value\": 65},\n",
- " \"Patient Gender\": {\"value\": \"F\"},\n",
- " },\n",
- " {\n",
- " \"Patient Age\": {\"min_value\": 65, \"max_value\": 100},\n",
- " \"Patient Gender\": {\"value\": \"M\"},\n",
- " },\n",
- " {\n",
- " \"Patient Age\": {\"min_value\": 65, \"max_value\": 100},\n",
- " \"Patient Gender\": {\"value\": \"F\"},\n",
- " },\n",
- "]\n",
- "\n",
- "# create the slice functions\n",
- "slice_spec = SliceSpec(spec_list=slices)\n",
- "\n",
- "nih_eval_results_age = evaluator.evaluate(\n",
- " dataset=nih_ds,\n",
- " metrics=[ppv, npv, sensitivity, specificity],\n",
- " target_columns=pathologies,\n",
- " prediction_columns=\"predictions.densenet\",\n",
- " ignore_columns=\"image\",\n",
- " slice_spec=slice_spec,\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "3e674b7a",
- "metadata": {},
- "outputs": [],
- "source": [
- "fig = px.pie(\n",
- " values=[nih_ds[\"Patient Gender\"].count(\"M\"), nih_ds[\"Patient Gender\"].count(\"F\")],\n",
- " names=[\"Male\", \"Female\"],\n",
- ")\n",
- "\n",
- "fig.update_layout(\n",
- " title=\"Gender Distribution\",\n",
- ")\n",
- "\n",
- "report.log_plotly_figure(\n",
- " fig=fig,\n",
- " caption=\"Gender Distribution\",\n",
- " section_name=\"datasets\",\n",
- ")\n",
- "\n",
- "fig.show()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "a144dc9a",
- "metadata": {},
- "outputs": [],
- "source": [
- "fig = px.histogram(nih_ds[\"Patient Age\"])\n",
- "fig.update_traces(showlegend=False)\n",
- "fig.update_layout(\n",
- " title=\"Age Distribution\",\n",
- " xaxis_title=\"Age\",\n",
- " yaxis_title=\"Count\",\n",
- " bargap=0.2,\n",
- ")\n",
- "\n",
- "report.log_plotly_figure(\n",
- " fig=fig,\n",
- " caption=\"Age Distribution\",\n",
- " section_name=\"datasets\",\n",
- ")\n",
- "\n",
- "fig.show()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "d2ff6506",
- "metadata": {},
- "outputs": [],
- "source": [
- "fig = px.bar(x=pathologies, y=[np.array(nih_ds[p]).sum() for p in pathologies])\n",
- "fig.update_layout(\n",
- " title=\"Pathology Distribution\",\n",
- " xaxis_title=\"Pathology\",\n",
- " yaxis_title=\"Count\",\n",
- " bargap=0.2,\n",
- " # change size of plot\n",
- ")\n",
- "\n",
- "report.log_plotly_figure(\n",
- " fig=fig,\n",
- " caption=\"Pathology Distribution\",\n",
- " section_name=\"datasets\",\n",
- ")\n",
- "\n",
- "fig.show()"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "e5eec4e5",
- "metadata": {},
- "source": [
- "### Log Performance Metrics as Tests w/ Thresholds"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "ec627997",
- "metadata": {},
- "outputs": [],
- "source": [
- "results_flat = {}\n",
- "for slice_, metrics in nih_eval_results_age[\"model_for_predictions.densenet\"].items():\n",
- " for name, metric in metrics.items():\n",
- " results_flat[f\"{slice_}/{name}\"] = metric.mean()\n",
- " for itr, m in enumerate(metric):\n",
- " if slice_ == \"overall\":\n",
- " results_flat[f\"pathology:{pathologies[itr]}/{name}\"] = m\n",
- " else:\n",
- " results_flat[f\"{slice_}&pathology:{pathologies[itr]}/{name}\"] = m\n",
- "for slice_, metrics in nih_eval_results_gender[\n",
- " \"model_for_predictions.densenet\"\n",
- "].items():\n",
- " for name, metric in metrics.items():\n",
- " results_flat[f\"{slice_}/{name}\"] = metric.mean()\n",
- " for itr, m in enumerate(metric):\n",
- " if slice_ == \"overall\":\n",
- " results_flat[f\"pathology:{pathologies[itr]}/{name}\"] = m\n",
- " else:\n",
- " results_flat[f\"{slice_}&pathology:{pathologies[itr]}/{name}\"] = m\n",
- "\n",
- "for name, metric in results_flat.items():\n",
- " split, name = name.split(\"/\") # noqa: PLW2901\n",
- " descriptions = {\n",
- " \"MultilabelPPV\": \"The proportion of correctly predicted positive instances among all instances predicted as positive. Also known as precision.\",\n",
- " \"MultilabelNPV\": \"The proportion of correctly predicted negative instances among all instances predicted as negative.\",\n",
- " \"MultilabelSensitivity\": \"The proportion of actual positive instances that are correctly predicted. Also known as recall or true positive rate.\",\n",
- " \"MultilabelSpecificity\": \"The proportion of actual negative instances that are correctly predicted.\",\n",
- " }\n",
- " report.log_quantitative_analysis(\n",
- " \"performance\",\n",
- " name=name,\n",
- " value=metric.tolist() if isinstance(metric, np.generic) else metric,\n",
- " description=descriptions[name],\n",
- " metric_slice=split,\n",
- " pass_fail_thresholds=0.7,\n",
- " pass_fail_threshold_fns=lambda x, threshold: bool(x >= threshold),\n",
- " )"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "ff0664e9",
- "metadata": {},
- "source": [
- "## Populate Model Card Fields"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "c9d6fe83",
- "metadata": {},
- "outputs": [],
- "source": [
- "# model details for NIH Chest X-Ray model\n",
- "report.log_from_dict(\n",
- " data={\n",
- " \"name\": \"NIH Chest X-Ray Multi-label Classification Model\",\n",
- " \"description\": \"This model is a DenseNet121 model trained on the NIH Chest \\\n",
- " X-Ray dataset, which contains 112,120 frontal-view X-ray images of 30,805 \\\n",
- " unique patients with the fourteen text-mined disease labels from the \\\n",
- " associated radiological reports. The labels are Atelectasis, Cardiomegaly, \\\n",
- " Effusion, Infiltration, Mass, Nodule, Pneumonia, Pneumothorax, \\\n",
- " Consolidation, Edema, Emphysema, Fibrosis, Pleural Thickening, and Hernia. \\\n",
- " The model was trained on 80% of the data and evaluated on the remaining \\\n",
- " 20%.\",\n",
- " \"references\": [{\"link\": \"https://arxiv.org/abs/2111.00595\"}],\n",
- " },\n",
- " section_name=\"Model Details\",\n",
- ")\n",
- "\n",
- "report.log_citation(\n",
- " citation=\"\"\"@inproceedings{Cohen2022xrv,\n",
- " title = {{TorchXRayVision: A library of chest X-ray datasets and models}},\n",
- " author = {Cohen, Joseph Paul and Viviano, Joseph D. and Bertin, \\\n",
- " Paul and Morrison,Paul and Torabian, Parsa and Guarrera, \\\n",
- " Matteo and Lungren, Matthew P and Chaudhari,\\\n",
- " Akshay and Brooks, Rupert and Hashir, \\\n",
- " Mohammad and Bertrand, Hadrien},\n",
- " booktitle = {Medical Imaging with Deep Learning},\n",
- " url = {https://github.com/mlmed/torchxrayvision},\n",
- " arxivId = {2111.00595},\n",
- " year = {2022}\n",
- " }\"\"\",\n",
- ")\n",
- "\n",
- "report.log_citation(\n",
- " citation=\"\"\"@inproceedings{cohen2020limits,\n",
- " title={On the limits of cross-domain generalization\\\n",
- " in automated X-ray prediction},\n",
- " author={Cohen, Joseph Paul and Hashir, Mohammad and Brooks, \\\n",
- " Rupert and Bertrand, Hadrien},\n",
- " booktitle={Medical Imaging with Deep Learning},\n",
- " year={2020},\n",
- " url={https://arxiv.org/abs/2002.02497}\n",
- " }\"\"\",\n",
- ")\n",
- "\n",
- "report.log_owner(\n",
- " name=\"Machine Learning and Medicine Lab\",\n",
- " contact=\"mlmed.org\",\n",
- " email=\"joseph@josephpcohen.com\",\n",
- ")\n",
- "\n",
- "# considerations\n",
- "report.log_user(description=\"Radiologists\")\n",
- "report.log_user(description=\"Data Scientists\")\n",
- "\n",
- "report.log_use_case(\n",
- " description=\"The model can be used to predict the presence of 14 pathologies \\\n",
- " in chest X-ray images.\",\n",
- " kind=\"primary\",\n",
- ")\n",
- "report.log_descriptor(\n",
- " name=\"limitations\",\n",
- " description=\"The limitations of this model include its inability to detect \\\n",
- " pathologies that are not included in the 14 labels of the NIH \\\n",
- " Chest X-Ray dataset. Additionally, the model may not perform \\\n",
- " well on images that are of poor quality or that contain \\\n",
- " artifacts. Finally, the model may not generalize well to\\\n",
- " populations that are not well-represented in the training \\\n",
- " data, such as patients from different geographic regions or \\\n",
- " with different demographics.\",\n",
- " section_name=\"considerations\",\n",
- ")\n",
- "report.log_descriptor(\n",
- " name=\"tradeoffs\",\n",
- " description=\"The model can help radiologists to detect pathologies in \\\n",
- " chest X-ray images, but it may not generalize well to populations \\\n",
- " that are not well-represented in the training data.\",\n",
- " section_name=\"considerations\",\n",
- ")\n",
- "report.log_risk(\n",
- " risk=\"One ethical risk of the model is that it may not generalize well to \\\n",
- " populations that are not well-represented in the training data,\\\n",
- " such as patients from different geographic regions \\\n",
- " or with different demographics. \",\n",
- " mitigation_strategy=\"A mitigation strategy for this risk is to ensure \\\n",
- " that the training data is diverse and representative of the population \\\n",
- " that the model will be used on. Additionally, the model should be \\\n",
- " regularly evaluated and updated to ensure that it continues to \\\n",
- " perform well on diverse populations. Finally, the model should \\\n",
- " be used in conjunction with human expertise to ensure that \\\n",
- " any biases or limitations are identified and addressed.\",\n",
- ")\n",
- "report.log_fairness_assessment(\n",
- " affected_group=\"Patients with rare pathologies\",\n",
- " benefit=\"The model can help radiologists to detect pathologies in \\\n",
- " chest X-ray images.\",\n",
- " harm=\"The model may not generalize well to populations that are not \\\n",
- " well-represented in the training data.\",\n",
- " mitigation_strategy=\"A mitigation strategy for this risk is to ensure that \\\n",
- " the training data is diverse and representative of the population.\",\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "ae088e5b",
- "metadata": {},
- "outputs": [],
- "source": [
- "report_path = report.export(\n",
- " output_filename=\"nihcxr_report_periodic.html\",\n",
- " synthetic_timestamp=\"2023-11-06\",\n",
- ")\n",
- "shutil.copy(f\"{report_path}\", \".\")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "d7545d39",
- "metadata": {},
- "source": [
- "You can view the generated HTML [report](./nihcxr_report_periodic.html)."
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.10.12"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/api/_sources/tutorials/nihcxr/monitor_api.ipynb.txt b/api/_sources/tutorials/nihcxr/monitor_api.ipynb.txt
deleted file mode 100644
index 53e9ebcd3..000000000
--- a/api/_sources/tutorials/nihcxr/monitor_api.ipynb.txt
+++ /dev/null
@@ -1,301 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "id": "6b2520a8-d4ad-4941-8ea7-71fdd631225f",
- "metadata": {},
- "source": [
- "# NIHCXR Clinical Drift Experiments Tutorial"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "275fdb05",
- "metadata": {},
- "source": [
- "## Import Libraries and Load NIHCXR Dataset"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "8aa3302d",
- "metadata": {},
- "outputs": [],
- "source": [
- "\"\"\"NIHCXR Clinical Drift Experiments Tutorial.\"\"\"\n",
- "\n",
- "\n",
- "import random\n",
- "\n",
- "import numpy as np\n",
- "import torch\n",
- "from torchvision.transforms import Compose\n",
- "from torchxrayvision.models import DenseNet\n",
- "\n",
- "from cyclops.data.loader import load_nihcxr\n",
- "from cyclops.data.slicer import SliceSpec\n",
- "from cyclops.data.transforms import Lambdad, Resized\n",
- "from cyclops.monitor import ClinicalShiftApplicator, Detector, Reductor, TSTester\n",
- "from cyclops.monitor.plotter import plot_drift_experiment, plot_drift_timeseries\n",
- "\n",
- "\n",
- "nih_ds = load_nihcxr(\"/mnt/data/clinical_datasets/NIHCXR\")[\"test\"]\n",
- "\n",
- "random.seed(42)\n",
- "np.random.seed(42)\n",
- "torch.manual_seed(42)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "ebcc72bb",
- "metadata": {},
- "source": [
- "## Example 1. Generate Source/Target Dataset for Experiments (1-2)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "e11920db",
- "metadata": {},
- "outputs": [],
- "source": [
- "shifter = ClinicalShiftApplicator(\n",
- " \"sex\",\n",
- " source=None,\n",
- " target=\"F\",\n",
- " shift_id=\"Patient Gender\",\n",
- ")\n",
- "source_ds, target_ds = shifter.apply_shift(nih_ds, num_proc=6)\n",
- "\n",
- "transforms = Compose(\n",
- " [\n",
- " Resized(\n",
- " spatial_size=(224, 224),\n",
- " keys=(\"image\",),\n",
- " allow_missing_keys=True,\n",
- " ),\n",
- " Lambdad(\n",
- " func=lambda x: ((2 * (x / 255.0)) - 1.0) * 1024,\n",
- " keys=(\"image\",),\n",
- " allow_missing_keys=True,\n",
- " ),\n",
- " Lambdad(\n",
- " func=lambda x: x[0][np.newaxis, :] if x.shape[0] != 1 else x,\n",
- " keys=(\"image\",),\n",
- " allow_missing_keys=True,\n",
- " ),\n",
- " ],\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "ab403dc5",
- "metadata": {},
- "source": [
- "## Example 2. Sensitivity test experiment with 3 dimensionality reduction techniques\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "54a3523a",
- "metadata": {},
- "outputs": [],
- "source": [
- "model = DenseNet(weights=\"densenet121-res224-all\")\n",
- "dr_methods = {\n",
- " \"BBSE\": \"bbse-soft\",\n",
- " \"BBSE + TXRV-AE\": \"bbse-soft+txrv-ae\",\n",
- " \"TXRV-AE\": \"txrv-ae\",\n",
- "}\n",
- "results = {}\n",
- "\n",
- "for name, dr_method in dr_methods.items():\n",
- " if name == \"TXRV-AE\":\n",
- " reductor = Reductor(\n",
- " dr_method=dr_method,\n",
- " transforms=transforms,\n",
- " feature_columns=[\"image\"],\n",
- " )\n",
- " else:\n",
- " reductor = Reductor(\n",
- " dr_method=dr_method,\n",
- " model=model,\n",
- " transforms=transforms,\n",
- " feature_columns=[\"image\"],\n",
- " )\n",
- " detector = Detector(\n",
- " \"sensitivity_test\",\n",
- " reductor=reductor,\n",
- " tester=TSTester(tester_method=\"ks\"),\n",
- " source_sample_size=50,\n",
- " target_sample_size=[10, 25, 50],\n",
- " num_runs=1,\n",
- " )\n",
- " result = detector.detect_shift(source_ds, target_ds)\n",
- " results[name] = result\n",
- "plot_drift_experiment(results)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "ec20a728",
- "metadata": {},
- "source": [
- "## Example 3. Sensitivity test experiment with models trained on different datasets"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "40b5a90f",
- "metadata": {},
- "outputs": [],
- "source": [
- "models = {\n",
- " \"MODEL: NIH\": \"densenet121-res224-nih\",\n",
- " \"MODEL: CHEXPERT\": \"densenet121-res224-chex\",\n",
- " \"MODEL: PADCHEST\": \"densenet121-res224-pc\",\n",
- "}\n",
- "results = {}\n",
- "\n",
- "for model_name, model in models.items():\n",
- " detector = Detector(\n",
- " \"sensitivity_test\",\n",
- " reductor=Reductor(\n",
- " dr_method=\"bbse-soft\",\n",
- " model=DenseNet(weights=model),\n",
- " transforms=transforms,\n",
- " feature_columns=[\"image\"],\n",
- " ),\n",
- " tester=TSTester(tester_method=\"ks\"),\n",
- " source_sample_size=50,\n",
- " target_sample_size=[10, 25, 50],\n",
- " num_runs=1,\n",
- " )\n",
- " results[model_name] = detector.detect_shift(source_ds, target_ds)\n",
- "plot_drift_experiment(results)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "0e8ebcf1",
- "metadata": {},
- "source": [
- "## Example 4. Sensitivity test experiment with different clinical shifts"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "9ba03fac",
- "metadata": {},
- "outputs": [],
- "source": [
- "model = DenseNet(weights=\"densenet121-res224-all\")\n",
- "source_slice = None\n",
- "target_slices = {\n",
- " \"SEX: MALE\": SliceSpec(spec_list=[{\"Patient Gender\": {\"value\": \"M\"}}]),\n",
- " \"SEX: FEMALE\": SliceSpec(spec_list=[{\"Patient Gender\": {\"value\": \"F\"}}]),\n",
- " \"AGE: 18-35\": SliceSpec(\n",
- " spec_list=[{\"Patient Age\": {\"min_value\": 18, \"max_value\": 35}}],\n",
- " ),\n",
- " \"AGE: 35-65\": SliceSpec(\n",
- " spec_list=[{\"Patient Age\": {\"min_value\": 35, \"max_value\": 65}}],\n",
- " ),\n",
- "}\n",
- "results = {}\n",
- "\n",
- "for name, target_slice in target_slices.items():\n",
- " source_slice = None\n",
- " shifter = ClinicalShiftApplicator(\n",
- " \"custom\",\n",
- " source=source_slice,\n",
- " target=target_slice,\n",
- " )\n",
- " ds_source, ds_target = shifter.apply_shift(nih_ds, num_proc=6)\n",
- "\n",
- " detector = Detector(\n",
- " \"sensitivity_test\",\n",
- " reductor=Reductor(\n",
- " dr_method=\"bbse-soft\",\n",
- " model=model,\n",
- " transforms=transforms,\n",
- " feature_columns=[\"image\"],\n",
- " ),\n",
- " tester=TSTester(tester_method=\"ks\"),\n",
- " source_sample_size=50,\n",
- " target_sample_size=[10, 25, 50],\n",
- " num_runs=1,\n",
- " )\n",
- " results[name] = detector.detect_shift(ds_source, ds_target)\n",
- "plot_drift_experiment(results)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "7571bc9f",
- "metadata": {},
- "source": [
- "## Example 5. Rolling window experiment with synthetic timestamps using biweekly window"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "77e4b383",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "model = DenseNet(weights=\"densenet121-res224-all\")\n",
- "detector = Detector(\n",
- " \"rolling_window_drift\",\n",
- " reductor=Reductor(\n",
- " dr_method=\"bbse-soft\",\n",
- " model=model,\n",
- " transforms=transforms,\n",
- " feature_columns=[\"image\"],\n",
- " ),\n",
- " tester=TSTester(tester_method=\"ks\"),\n",
- " source_sample_size=50,\n",
- " target_sample_size=10,\n",
- " timestamp_column=\"timestamp\",\n",
- " window_size=\"4W\",\n",
- ")\n",
- "\n",
- "results = detector.detect_shift(source_ds, target_ds)\n",
- "plot_drift_timeseries(results)"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.10.12"
- },
- "nbsphinx": {
- "execute": "never"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/api/_sources/tutorials/synthea/los_prediction.ipynb.txt b/api/_sources/tutorials/synthea/los_prediction.ipynb.txt
deleted file mode 100644
index 53d4e4304..000000000
--- a/api/_sources/tutorials/synthea/los_prediction.ipynb.txt
+++ /dev/null
@@ -1,1506 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "id": "e337389b-1cfe-4796-a846-b4e1ba5690d6",
- "metadata": {
- "tags": []
- },
- "source": [
- "# Prolonged Length of Stay Prediction\n",
- "\n",
- "This notebook showcases length of stay prediction on the [Synthea](https://github.com/synthetichealth/synthea) dataset using CyclOps. The task is formulated as a binary classification task, where we predict the probability that a patient will stay 7 days or longer.\n",
- "\n",
- "To generate the synthetic patient data:\n",
- "\n",
- "1. Generate synthea data using their releases. We used [v3.0.0](https://github.com/synthetichealth/synthea/releases/tag/v3.0.0).\n",
- "2. Follow instructions provided in [ETL-Synthea](https://github.com/OHDSI/ETL-Synthea) to load the CSV data into a postgres database."
- ]
- },
- {
- "cell_type": "markdown",
- "id": "12c18656-7f16-4230-85d0-944563d6a13e",
- "metadata": {
- "tags": []
- },
- "source": [
- "## Import Libraries"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "53009e6b",
- "metadata": {},
- "outputs": [],
- "source": [
- "\"\"\"Prolonged Length of Stay Prediction.\"\"\"\n",
- "\n",
- "import copy\n",
- "import shutil\n",
- "from datetime import date\n",
- "\n",
- "import cycquery.ops as qo\n",
- "import numpy as np\n",
- "import plotly.express as px\n",
- "import plotly.graph_objects as go\n",
- "from cycquery import DatasetQuerier\n",
- "from datasets import Dataset\n",
- "from datasets.features import ClassLabel\n",
- "from sklearn.compose import ColumnTransformer\n",
- "from sklearn.impute import SimpleImputer\n",
- "from sklearn.pipeline import Pipeline\n",
- "from sklearn.preprocessing import MinMaxScaler, OneHotEncoder\n",
- "\n",
- "from cyclops.data.df.feature import TabularFeatures\n",
- "from cyclops.data.slicer import SliceSpec\n",
- "from cyclops.evaluate.fairness import FairnessConfig # noqa: E402\n",
- "from cyclops.evaluate.metrics import create_metric\n",
- "from cyclops.evaluate.metrics.experimental.metric_dict import MetricDict\n",
- "from cyclops.models.catalog import create_model\n",
- "from cyclops.report import ModelCardReport\n",
- "from cyclops.report.plot.classification import ClassificationPlotter\n",
- "from cyclops.report.utils import flatten_results_dict\n",
- "from cyclops.tasks import BinaryTabularClassificationTask"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "a0c35352-ccef-47c9-8d1a-3062c62adb97",
- "metadata": {},
- "source": [
- "CyclOps offers a package for documentation of the model through a model report. The `ModelCardReport` class is used to populate and generate the model report as an HTML file. The model report has the following sections:\n",
- "\n",
- "- Overview: Provides a high level overview of how the model is doing (a quick glance of important metrics), and how it is doing over time (performance over several metrics and subgroups over time).\n",
- "- Datasets: High level statistics of the training data, including changes in distribution over time.\n",
- "- Quantitative Analysis: This section contains additional detailed performance metrics of the model for different sets of the data and subpopulations.\n",
- "- Fairness Analysis: This section contains the fairness metrics of the model.\n",
- "- Model Details: This section contains descriptive metadata about the model such as the owners, version, license, etc.\n",
- "- Model Parameters: This section contains the technical details of the model such as the model architecture, training parameters, etc.\n",
- "- Considerations: This section contains descriptions of the considerations involved in developing and using the model such as the intended use, limitations, etc.\n",
- "\n",
- "We will use this to document the model development process as we go along and generate the model report at the end.\n",
- "\n",
- "`The model report tool is a work in progress and is subject to change.`"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "afae58a8-5708-4e05-8695-25ba3ce1a71f",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "report = ModelCardReport()"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "28becf40-dc5f-4a1d-a1c9-5d6a41f797aa",
- "metadata": {},
- "source": [
- "## Constants"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "739b109a-011b-4e6e-a3de-964edeffddbd",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "NAN_THRESHOLD = 0.25\n",
- "NUM_DAYS = 7\n",
- "TRAIN_SIZE = 0.8\n",
- "RANDOM_SEED = 85"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "52d5aa23-6ae5-4ab6-824f-b68c47f471ae",
- "metadata": {},
- "source": [
- "## Data Querying\n",
- "\n",
- "### Compute length of stay (labels)\n",
- "\n",
- "1. Get encounters, compute length of stay.\n",
- "2. Filter out encounters less than 2 days and greater than 20 days."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "e497df9f-0f3d-4e9c-845c-539627a37f67",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "querier = DatasetQuerier(\n",
- " dbms=\"postgresql\",\n",
- " port=5432,\n",
- " host=\"localhost\",\n",
- " database=\"synthea_demo\",\n",
- " user=\"postgres\",\n",
- " password=\"pwd\",\n",
- ")\n",
- "\n",
- "\n",
- "def get_encounters():\n",
- " \"\"\"Get encounters data.\"\"\"\n",
- " patients = querier.native.patients()\n",
- " ops = qo.Sequential(\n",
- " qo.Rename({\"id\": \"patient_id\"}),\n",
- " qo.Keep([\"patient_id\", \"birthdate\", \"gender\", \"race\", \"ethnicity\"]),\n",
- " )\n",
- " patients = patients.ops(ops)\n",
- " encounters = querier.native.encounters()\n",
- " patient_encounters = encounters.join(\n",
- " patients,\n",
- " on=(\"patient\", \"patient_id\"),\n",
- " isouter=True,\n",
- " )\n",
- " ops = qo.Sequential(\n",
- " qo.Rename({\"id\": \"encounter_id\"}),\n",
- " qo.ExtractTimestampComponent(\"start\", \"year\", \"start_year\"),\n",
- " qo.ExtractTimestampComponent(\"birthdate\", \"year\", \"birthdate_year\"),\n",
- " qo.AddColumn(\n",
- " \"start_year\",\n",
- " \"birthdate_year\",\n",
- " new_col_labels=\"age\",\n",
- " negative=True,\n",
- " ),\n",
- " qo.AddColumn(\"stop\", \"start\", new_col_labels=\"los\", negative=True),\n",
- " qo.ConditionGreaterThan(\"los\", 1),\n",
- " qo.ConditionLessThan(\"los\", 21),\n",
- " qo.Keep(\n",
- " [\n",
- " \"encounter_id\",\n",
- " \"los\",\n",
- " \"age\",\n",
- " \"gender\",\n",
- " ],\n",
- " ),\n",
- " )\n",
- " return patient_encounters.ops(ops)\n",
- "\n",
- "\n",
- "def get_observations(cohort):\n",
- " \"\"\"Get observations data.\"\"\"\n",
- " observations = querier.native.observations()\n",
- " ops = qo.Sequential(\n",
- " qo.ConditionIn(\n",
- " \"category\",\n",
- " [\n",
- " \"laboratory\",\n",
- " \"vital-signs\",\n",
- " ],\n",
- " ),\n",
- " qo.ConditionEquals(\"type\", \"numeric\"),\n",
- " )\n",
- " observations = observations.ops(ops)\n",
- " cohort = cohort.join(\n",
- " observations,\n",
- " on=(\"encounter_id\", \"encounter\"),\n",
- " isouter=True,\n",
- " )\n",
- " groupby_op = qo.GroupByAggregate(\n",
- " \"encounter_id\",\n",
- " {\"description\": (\"count\", \"n_obs\")},\n",
- " )\n",
- " observations = cohort.run()\n",
- " observations_count = cohort.ops(groupby_op).run()\n",
- " observations_stats = observations.pivot_table(\n",
- " index=\"encounter_id\",\n",
- " columns=\"description\",\n",
- " values=\"value\",\n",
- " aggfunc=\"max\",\n",
- " ).add_prefix(\"obs_\")\n",
- "\n",
- " return [observations_count, observations_stats]\n",
- "\n",
- "\n",
- "def get_medications(cohort):\n",
- " \"\"\"Get medications data.\"\"\"\n",
- " medications = querier.native.medications()\n",
- " cohort = cohort.join(\n",
- " medications,\n",
- " on=(\"encounter_id\", \"encounter\"),\n",
- " )\n",
- " groupby_op = qo.GroupByAggregate(\n",
- " \"encounter_id\",\n",
- " {\"description\": (\"count\", \"n_meds\")},\n",
- " )\n",
- "\n",
- " return cohort.ops(groupby_op).run()\n",
- "\n",
- "\n",
- "def get_procedures(cohort):\n",
- " \"\"\"Get procedures data.\"\"\"\n",
- " procedures = querier.native.procedures()\n",
- " cohort = cohort.join(\n",
- " procedures,\n",
- " on=(\"encounter_id\", \"encounter\"),\n",
- " )\n",
- " groupby_op = qo.GroupByAggregate(\n",
- " \"encounter_id\",\n",
- " {\"description\": (\"count\", \"n_procedures\")},\n",
- " )\n",
- "\n",
- " return cohort.ops(groupby_op).run()\n",
- "\n",
- "\n",
- "def run_query():\n",
- " \"\"\"Run query pipeline.\"\"\"\n",
- " cohort_query = get_encounters()\n",
- " to_merge = []\n",
- " observations = get_observations(cohort_query)\n",
- " to_merge.extend(observations)\n",
- " medications = get_medications(cohort_query)\n",
- " to_merge.append(medications)\n",
- " procedures = get_procedures(cohort_query)\n",
- " to_merge.append(procedures)\n",
- " cohort = cohort_query.run()\n",
- " for to_merge_df in to_merge:\n",
- " cohort = cohort.merge(\n",
- " to_merge_df,\n",
- " on=\"encounter_id\",\n",
- " how=\"left\",\n",
- " )\n",
- "\n",
- " return cohort\n",
- "\n",
- "\n",
- "cohort = run_query()"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "389796a5-5abb-4c8f-bcb9-229d0e3e2108",
- "metadata": {},
- "source": [
- "## Data Inspection and Preprocessing\n",
- "\n",
- "### Drop NaNs based on the `NAN_THRESHOLD`"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "c576ee51-e825-4970-86e8-3e5f221f145c",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "null_counts = cohort.isnull().sum()[cohort.isnull().sum() > 0]\n",
- "fig = go.Figure(data=[go.Bar(x=null_counts.index, y=null_counts.values)])\n",
- "fig.update_layout(\n",
- " title=\"Number of Null Values per Column\",\n",
- " xaxis_title=\"Columns\",\n",
- " yaxis_title=\"Number of Null Values\",\n",
- ")\n",
- "fig.show()"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "b77e5839-8c2f-4599-a511-2c3737e20772",
- "metadata": {},
- "source": [
- "**Add the figure to the report**\n",
- "\n",
- "We can use the log_plotly_figure method to add the figure to a section of the report. One can specify whether the figure should be interactive or not by setting the `interactive` parameter to `True` or `False` respectively. The default value is `True`. This\n",
- "also affects the final size of the report. If the figure is interactive, the size of the report will be larger than if the figure is not interactive. "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "190bcac3-2eda-4acc-bf1b-81cfd39a10b7",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "report.log_plotly_figure(\n",
- " fig=fig,\n",
- " caption=\"Number of Null Values per Column\",\n",
- " section_name=\"datasets\",\n",
- " interactive=True,\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "32d67235-d839-4b70-bbd4-5126d8c6da49",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "thresh_nan = int(NAN_THRESHOLD * len(cohort))\n",
- "cohort = cohort.dropna(axis=1, thresh=thresh_nan)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "dc5b45cb-2406-4330-b2fc-3b4823ff0c17",
- "metadata": {},
- "source": [
- "### Length of stay distribution"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "f4cd2841-a992-4a89-936c-19bb90e6234d",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "length_of_stay = cohort[\"los\"]\n",
- "length_of_stay_counts = list(length_of_stay.value_counts().values)\n",
- "length_of_stay_keys = list(length_of_stay.value_counts().keys())\n",
- "cohort[\"outcome\"] = cohort[\"los\"] < NUM_DAYS\n",
- "fig = go.Figure(data=[go.Bar(x=length_of_stay_keys, y=length_of_stay_counts)])\n",
- "fig.update_layout(\n",
- " title=\"Length of stay\",\n",
- " xaxis_title=\"Days\",\n",
- " yaxis_title=\"Number of encounters\",\n",
- ")\n",
- "fig.show()"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "dc629738",
- "metadata": {},
- "source": [
- "**Add the figure to the report**"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "57de20af",
- "metadata": {},
- "outputs": [],
- "source": [
- "report.log_plotly_figure(\n",
- " fig=fig,\n",
- " caption=\"Length of stay distribution\",\n",
- " section_name=\"datasets\",\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "05156094-56e8-49c5-8e3c-478a1797db62",
- "metadata": {},
- "source": [
- "### Outcome distribution"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "ae75c8a1-9316-4e47-ad45-7291a7b783bb",
- "metadata": {},
- "outputs": [],
- "source": [
- "cohort[\"outcome\"] = cohort[\"outcome\"].astype(\"int\")\n",
- "fig = px.pie(cohort, names=\"outcome\")\n",
- "fig.update_traces(textinfo=\"percent+label\")\n",
- "fig.update_layout(title_text=\"Outcome Distribution\")\n",
- "fig.update_traces(\n",
- " hovertemplate=\"Outcome: %{label} Count: \\\n",
- " %{value} Percent: %{percent}\",\n",
- ")\n",
- "fig.show()"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "742ba610-b754-458d-a77f-fc9469dcadab",
- "metadata": {
- "tags": []
- },
- "source": [
- "**Add the figure to the report**"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "794e085d-2dae-4dba-ade8-ab8cdc982af3",
- "metadata": {},
- "outputs": [],
- "source": [
- "report.log_plotly_figure(\n",
- " fig=fig,\n",
- " caption=\"Outcome Distribution\",\n",
- " section_name=\"datasets\",\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "bab6a390-95f4-4876-a63d-210c95262960",
- "metadata": {},
- "outputs": [],
- "source": [
- "class_counts = cohort[\"outcome\"].value_counts()\n",
- "class_ratio = class_counts[0] / class_counts[1]\n",
- "print(class_ratio)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "e48376c2-a437-41f4-96fa-ea75f182f7b7",
- "metadata": {},
- "source": [
- "### Gender distribution"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "d7ef4a36-3f9f-490a-befd-8e5fe41596ee",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "fig = px.pie(cohort, names=\"gender\")\n",
- "fig.update_layout(\n",
- " title=\"Gender Distribution\",\n",
- ")\n",
- "fig.show()"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "f7b3673d-8df6-4aa8-9fff-f623f4d800ff",
- "metadata": {},
- "source": [
- "**Add the figure to the report**"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "8bd741fb-d0fe-4add-8d66-6b8baf271395",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "report.log_plotly_figure(\n",
- " fig=fig,\n",
- " caption=\"Gender Distribution\",\n",
- " section_name=\"datasets\",\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "da0e832a-5763-42dc-a2e0-d91fef955ea5",
- "metadata": {
- "tags": []
- },
- "source": [
- "### Age distribution"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "ddb2b6cf-ee4d-4f83-8e56-6adcbe91f854",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "fig = px.histogram(cohort, x=\"age\")\n",
- "fig.update_layout(\n",
- " title=\"Age Distribution\",\n",
- " xaxis_title=\"Age\",\n",
- " yaxis_title=\"Count\",\n",
- " bargap=0.2,\n",
- ")\n",
- "fig.show()"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "53519265-e826-459e-a6d6-de9c4cde41e7",
- "metadata": {},
- "source": [
- "**Add the figure to the report**"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "f3c4c351-8eb5-41cd-8a50-006cfcfb13c0",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "report.log_plotly_figure(\n",
- " fig=fig,\n",
- " caption=\"Age Distribution\",\n",
- " section_name=\"datasets\",\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "483c9bb5-57bf-4a2c-960f-35f7e76eff1d",
- "metadata": {},
- "source": [
- "### Identifying feature types\n",
- "\n",
- "Cyclops `TabularFeatures` class helps to identify feature types, an essential step before preprocessing the data. Understanding feature types (numerical/categorical/binary) allows us to apply appropriate preprocessing steps for each type."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "2fb987af-f056-4886-84fa-4d4e1106e9b9",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "features_list = [\n",
- " \"age\",\n",
- " \"gender\",\n",
- " \"n_obs\",\n",
- " \"n_meds\",\n",
- " \"n_procedures\",\n",
- " \"obs_Alanine aminotransferase [Enzymatic activity/volume] in Serum or Plasma\",\n",
- " \"obs_Albumin [Mass/volume] in Serum or Plasma\",\n",
- " \"obs_Alkaline phosphatase [Enzymatic activity/volume] in Serum or Plasma\",\n",
- " \"obs_Aspartate aminotransferase [Enzymatic activity/volume] in Serum or Plasma\",\n",
- " \"obs_Bilirubin.total [Mass/volume] in Serum or Plasma\",\n",
- " \"obs_Body Weight\",\n",
- " \"obs_Calcium [Mass/volume] in Serum or Plasma\",\n",
- " \"obs_Carbon dioxide total [Moles/volume] in Serum or Plasma\",\n",
- " \"obs_Chloride [Moles/volume] in Serum or Plasma\",\n",
- " \"obs_Creatinine [Mass/volume] in Serum or Plasma\",\n",
- " \"obs_Diastolic Blood Pressure\",\n",
- " \"obs_Erythrocyte distribution width [Ratio] by Automated count\",\n",
- " \"obs_Erythrocytes [#/volume] in Blood by Automated count\",\n",
- " \"obs_Ferritin [Mass/volume] in Serum or Plasma\",\n",
- " \"obs_Glomerular filtration rate/1.73 sq M.predicted\",\n",
- " \"obs_Glucose [Mass/volume] in Serum or Plasma\",\n",
- " \"obs_Hematocrit [Volume Fraction] of Blood by Automated count\",\n",
- " \"obs_Hemoglobin [Mass/volume] in Blood\",\n",
- " \"obs_Leukocytes [#/volume] in Blood by Automated count\",\n",
- " \"obs_MCH [Entitic mass] by Automated count\",\n",
- " \"obs_MCHC [Mass/volume] by Automated count\",\n",
- " \"obs_MCV [Entitic volume] by Automated count\",\n",
- " \"obs_Oxygen saturation in Arterial blood\",\n",
- " \"obs_Platelets [#/volume] in Blood by Automated count\",\n",
- " \"obs_Potassium [Moles/volume] in Serum or Plasma\",\n",
- " \"obs_Protein [Mass/volume] in Serum or Plasma\",\n",
- " \"obs_Sodium [Moles/volume] in Serum or Plasma\",\n",
- " \"obs_Systolic Blood Pressure\",\n",
- " \"obs_Troponin I.cardiac [Mass/volume] in Serum or Plasma by High sensitivity method\", # noqa: E501\n",
- " \"obs_Urea nitrogen [Mass/volume] in Serum or Plasma\",\n",
- "]\n",
- "features_list = sorted(features_list)\n",
- "tab_features = TabularFeatures(\n",
- " data=cohort.reset_index(),\n",
- " features=features_list,\n",
- " by=\"encounter_id\",\n",
- " targets=\"outcome\",\n",
- ")\n",
- "print(tab_features.types)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "a2738074-00be-46fa-999f-77f85add9469",
- "metadata": {},
- "source": [
- "### Creating data preprocessors\n",
- "\n",
- "We create a data preprocessor using sklearn's ColumnTransformer. This helps in applying different preprocessing steps to different columns in the dataframe. For instance, binary features might be processed differently from numeric features."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "2bb80cfb-2e48-4ecf-a325-0026dad4aef7",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "numeric_transformer = Pipeline(\n",
- " steps=[(\"imputer\", SimpleImputer(strategy=\"mean\")), (\"scaler\", MinMaxScaler())],\n",
- ")\n",
- "\n",
- "binary_transformer = Pipeline(\n",
- " steps=[(\"imputer\", SimpleImputer(strategy=\"most_frequent\"))],\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "31ec3441-7dba-4b9a-9256-ad6b0293829d",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "numeric_features = sorted((tab_features.features_by_type(\"numeric\")))\n",
- "numeric_indices = [\n",
- " cohort[features_list].columns.get_loc(column) for column in numeric_features\n",
- "]\n",
- "print(numeric_features)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "f2fb5345-17f9-44ac-b944-079324a098e4",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "binary_features = sorted(tab_features.features_by_type(\"binary\"))\n",
- "binary_features.remove(\"outcome\")\n",
- "binary_indices = [\n",
- " cohort[features_list].columns.get_loc(column) for column in binary_features\n",
- "]\n",
- "print(binary_features)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "868fa57c-f12a-4821-a85e-31c6c95d6e76",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "preprocessor = ColumnTransformer(\n",
- " transformers=[\n",
- " (\"num\", numeric_transformer, numeric_indices),\n",
- " (\"onehot\", OneHotEncoder(handle_unknown=\"ignore\"), binary_indices),\n",
- " ],\n",
- " remainder=\"passthrough\",\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "4a178f34-3883-43c2-8009-28063619df2c",
- "metadata": {},
- "source": [
- "## Creating Hugging Face Dataset\n",
- "\n",
- "We convert our processed Pandas dataframe into a Hugging Face dataset, a powerful and easy-to-use data format which is also compatible with CyclOps models and evaluator modules. The dataset is then split to train and test sets."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "87b72a66-fc46-4e7a-a612-1f66de811c0d",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "cohort = cohort.drop(columns=[\"encounter_id\", \"los\"])\n",
- "dataset = Dataset.from_pandas(cohort)\n",
- "dataset.cleanup_cache_files()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "c6370e7a-c559-431f-9ebc-0b35bcdfb029",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "dataset = dataset.cast_column(\"outcome\", ClassLabel(num_classes=2))\n",
- "dataset = dataset.train_test_split(\n",
- " train_size=TRAIN_SIZE,\n",
- " stratify_by_column=\"outcome\",\n",
- " seed=RANDOM_SEED,\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "71007875-2e2c-460f-a956-bcd419cb2017",
- "metadata": {},
- "source": [
- "## Model Creation\n",
- "\n",
- "CyclOps model registry allows for straightforward creation and selection of models. This registry maintains a list of pre-configured models, which can be instantiated with a single line of code. Here we use a XGBoost classifier to fit a logisitic regression model. The model configurations can be passed to `create_model` based on the parameters for XGBClassifier."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "42e8d147-545c-47a7-9a91-b06b27a37d47",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "model_name = \"xgb_classifier\"\n",
- "model = create_model(model_name, random_state=123)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "ff3ae174-59c0-41df-954c-f6e05f64c8dd",
- "metadata": {},
- "source": [
- "## Task Creation\n",
- "\n",
- "We use Cyclops tasks to define our model's task (in this case, BinaryTabularClassificationTask), train the model, make predictions, and evaluate performance. Cyclops task classes encapsulate the entire ML pipeline into a single, cohesive structure, making the process smooth and easy to manage."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "01d36851-292a-4849-98e0-9183c2dec87a",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "los_task = BinaryTabularClassificationTask(\n",
- " {model_name: model},\n",
- " task_features=features_list,\n",
- " task_target=\"outcome\",\n",
- ")\n",
- "los_task.list_models()"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "cfb95f1e-ae78-469c-88d8-750d9c36a349",
- "metadata": {},
- "source": [
- "## Training\n",
- "\n",
- "If `best_model_params` is passed to the `train` method, the best model will be selected after the hyperparameter search. The parameters in `best_model_params` indicate the values to create the parameters grid.\n",
- "\n",
- "Note that the data preprocessor needs to be passed to the tasks methods if the Hugging Face dataset is not already preprocessed. "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "572b0fe0-5891-4aca-9e63-8d556851caec",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "best_model_params = {\n",
- " \"n_estimators\": [100, 250, 500],\n",
- " \"learning_rate\": [0.1, 0.01],\n",
- " \"max_depth\": [2, 5],\n",
- " \"reg_lambda\": [0, 1, 10],\n",
- " \"colsample_bytree\": [0.7, 0.8, 1],\n",
- " \"gamma\": [0, 1, 2, 10],\n",
- " \"method\": \"random\",\n",
- "}\n",
- "los_task.train(\n",
- " dataset[\"train\"],\n",
- " model_name=model_name,\n",
- " transforms=preprocessor,\n",
- " best_model_params=best_model_params,\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "9b687a58-a6e5-46f7-bd9e-106376912086",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "model_params = los_task.list_models_params()[model_name]\n",
- "print(model_params)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "7e699187-2650-41ab-aba8-42bf9c635819",
- "metadata": {},
- "source": [
- "**Log the model parameters to the report.**\n",
- "\n",
- "We can add model parameters to the model card using the `log_model_parameters` method."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "4cfdfe29-c813-45cd-9ae7-2959daa0cd8f",
- "metadata": {},
- "outputs": [],
- "source": [
- "report.log_model_parameters(params=model_params)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "d772526e-f595-4f4a-8026-7ab39523b3df",
- "metadata": {},
- "source": [
- "## Prediction\n",
- "\n",
- "The prediction output can be either the whole Hugging Face dataset with the prediction columns added to it or the single column containing the predicted values."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "c3fee35c-902e-4c54-89b7-c42c1e935e1b",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "y_pred = los_task.predict(\n",
- " dataset[\"test\"],\n",
- " model_name=model_name,\n",
- " transforms=preprocessor,\n",
- " proba=False,\n",
- " only_predictions=True,\n",
- ")\n",
- "print(len(y_pred))"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "f2eb06bb-9c85-4597-a07d-f3f16c841c56",
- "metadata": {},
- "source": [
- "## Evaluation\n",
- "\n",
- "Evaluation is done using various evaluation metrics that provide different perspectives on the model's predictive abilities i.e. standard performance metrics and fairness metrics.\n",
- "\n",
- "The standard performance metrics can be created using the `MetricDict` object."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "d3d5b101-17a8-4610-ae17-e04cf962d2d1",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "metric_names = [\n",
- " \"binary_accuracy\",\n",
- " \"binary_precision\",\n",
- " \"binary_recall\",\n",
- " \"binary_f1_score\",\n",
- " \"binary_auroc\",\n",
- " \"binary_roc_curve\",\n",
- " \"binary_precision_recall_curve\",\n",
- " \"binary_confusion_matrix\",\n",
- "]\n",
- "metrics = [\n",
- " create_metric(metric_name, experimental=True) for metric_name in metric_names\n",
- "]\n",
- "metric_collection = MetricDict(metrics)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "8cdf9238-dfc8-4eb2-a5c1-e0d273c75ee3",
- "metadata": {},
- "source": [
- "In addition to overall metrics, it might be interesting to see how the model performs on certain subpopulations. We can define these subpopulations using `SliceSpec` objects. "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "06d72639-db3b-4e91-be9d-3bf9dbb0f0e8",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "spec_list = [\n",
- " {\n",
- " \"age\": {\n",
- " \"min_value\": 20,\n",
- " \"max_value\": 50,\n",
- " \"min_inclusive\": True,\n",
- " \"max_inclusive\": False,\n",
- " },\n",
- " },\n",
- " {\n",
- " \"age\": {\n",
- " \"min_value\": 50,\n",
- " \"max_value\": 80,\n",
- " \"min_inclusive\": True,\n",
- " \"max_inclusive\": False,\n",
- " },\n",
- " },\n",
- " {\"gender\": {\"value\": \"M\"}},\n",
- " {\"gender\": {\"value\": \"F\"}},\n",
- "]\n",
- "slice_spec = SliceSpec(spec_list)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "67bd7806-c480-4c47-8e33-6612c2ede93e",
- "metadata": {},
- "source": [
- "A `MetricDict` can also be defined for the fairness metrics."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "24877057-f737-43bf-a61d-3dd4e45e0381",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "specificity = create_metric(metric_name=\"binary_specificity\", experimental=True)\n",
- "sensitivity = create_metric(metric_name=\"binary_sensitivity\", experimental=True)\n",
- "fpr = (\n",
- " -specificity + 1\n",
- ") # rsub is not supported due to limitations in the array API standard\n",
- "fnr = -sensitivity + 1\n",
- "ber = (fpr + fnr) / 2\n",
- "fairness_metric_collection = MetricDict(\n",
- " {\n",
- " \"Sensitivity\": sensitivity,\n",
- " \"Specificity\": specificity,\n",
- " \"BER\": ber,\n",
- " },\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "7b7f01c5-9598-4dcb-8697-ebc03a060d1c",
- "metadata": {},
- "source": [
- "The FairnessConfig helps in setting up and evaluating the fairness of the model predictions."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "945fdb5c-1b55-42f8-9644-e2bf6efd7011",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "fairness_config = FairnessConfig(\n",
- " metrics=fairness_metric_collection,\n",
- " dataset=None, # dataset is passed from the evaluator\n",
- " target_columns=None, # target columns are passed from the evaluator\n",
- " groups=[\"gender\", \"age\"],\n",
- " group_bins={\"age\": [20, 40]},\n",
- " group_base_values={\"age\": 40, \"gender\": \"M\"},\n",
- " thresholds=[0.5],\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "3e7d0840-f686-40c5-bb35-2ea37734553c",
- "metadata": {},
- "source": [
- "The evaluate methods outputs the evaluation results and the Hugging Face dataset with the predictions added to it."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "3797fe8c-886e-426f-8ea9-ad5afad3b65b",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "results, dataset_with_preds = los_task.evaluate(\n",
- " dataset[\"test\"],\n",
- " metric_collection,\n",
- " model_names=model_name,\n",
- " transforms=preprocessor,\n",
- " prediction_column_prefix=\"preds\",\n",
- " slice_spec=slice_spec,\n",
- " batch_size=-1,\n",
- " fairness_config=fairness_config,\n",
- " override_fairness_metrics=False,\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "7d2d1d75-f7d8-44d3-a782-2aba9a4fbac0",
- "metadata": {
- "tags": []
- },
- "source": [
- "**Log the performance metrics to the report.**\n",
- "\n",
- "We can add a performance metric to the model card using the `log_performance_metric` method, which expects a dictionary where the keys are in the following format: `slice_name/metric_name`. For instance, `overall/accuracy`. \n",
- "\n",
- "We first need to process the evaluation results to get the metrics in the right format."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "d322a86f-1f7c-42f6-8a97-8a18ea8622e2",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "model_name = f\"model_for_preds.{model_name}\"\n",
- "results_flat = flatten_results_dict(\n",
- " results=results,\n",
- " remove_metrics=[\"BinaryROC\", \"BinaryPrecisionRecallCurve\"],\n",
- " model_name=model_name,\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "d33a171c-02ef-4bc9-a3bf-87320c7c83d6",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "for name, metric in results_flat.items():\n",
- " split, name = name.split(\"/\") # noqa: PLW2901\n",
- " if name == \"BinaryConfusionMatrix\":\n",
- " continue\n",
- " descriptions = {\n",
- " \"BinaryPrecision\": \"The proportion of predicted positive instances that are correctly predicted.\",\n",
- " \"BinaryRecall\": \"The proportion of actual positive instances that are correctly predicted. Also known as recall or true positive rate.\",\n",
- " \"BinaryAccuracy\": \"The proportion of all instances that are correctly predicted.\",\n",
- " \"BinaryAUROC\": \"The area under the receiver operating characteristic curve (AUROC) is a measure of the performance of a binary classification model.\",\n",
- " \"BinaryF1Score\": \"The harmonic mean of precision and recall.\",\n",
- " }\n",
- " report.log_quantitative_analysis(\n",
- " \"performance\",\n",
- " name=name,\n",
- " value=metric.tolist(),\n",
- " description=descriptions[name],\n",
- " metric_slice=split,\n",
- " pass_fail_thresholds=0.7,\n",
- " pass_fail_threshold_fns=lambda x, threshold: bool(x >= threshold),\n",
- " )"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "0ee464d3-7246-4c6f-ac2d-8cca2a985f22",
- "metadata": {},
- "source": [
- "We can also use the `ClassificationPlotter` to plot the performance metrics and the add the figure to the model card using the `log_plotly_figure` method."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "c1acc80b-9dce-4f10-9641-dba86cae0a50",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "plotter = ClassificationPlotter(task_type=\"binary\", class_names=[\"0\", \"1\"])\n",
- "plotter.set_template(\"plotly_white\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "ba3a525b-63d5-4d35-b248-8fa67dc5a2c2",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "# extracting the ROC curves and AUROC results for all the slices\n",
- "roc_curves = {\n",
- " slice_name: slice_results[\"BinaryROC\"]\n",
- " for slice_name, slice_results in results[model_name].items()\n",
- "}\n",
- "aurocs = {\n",
- " slice_name: slice_results[\"BinaryAUROC\"]\n",
- " for slice_name, slice_results in results[model_name].items()\n",
- "}\n",
- "roc_curves.keys()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "ba032f62",
- "metadata": {},
- "outputs": [],
- "source": [
- "# Plot confusion matrix\n",
- "confusion_matrix = results[model_name][\"overall\"][\"BinaryConfusionMatrix\"]\n",
- "conf_plot = plotter.confusion_matrix(\n",
- " confusion_matrix,\n",
- ")\n",
- "report.log_plotly_figure(\n",
- " fig=conf_plot,\n",
- " caption=\"Confusion Matrix\",\n",
- " section_name=\"quantitative analysis\",\n",
- ")\n",
- "conf_plot.show()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "9774f0ef-89a9-4c65-9021-635a90122676",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "# plotting the ROC curves for all the slices\n",
- "roc_plot = plotter.roc_curve_comparison(roc_curves, aurocs=aurocs)\n",
- "report.log_plotly_figure(\n",
- " fig=roc_plot,\n",
- " caption=\"ROC Curve for Female Patients\",\n",
- " section_name=\"quantitative analysis\",\n",
- ")\n",
- "roc_plot.show()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "6d31a984-bd7f-419d-95dd-be4f397e853a",
- "metadata": {},
- "outputs": [],
- "source": [
- "# Extracting the overall classification metric values.\n",
- "overall_performance = {\n",
- " metric_name: metric_value\n",
- " for metric_name, metric_value in results[model_name][\"overall\"].items()\n",
- " if metric_name\n",
- " not in [\"BinaryROC\", \"BinaryPrecisionRecallCurve\", \"BinaryConfusionMatrix\"]\n",
- "}"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "a264f7a3-db58-4cbf-a67b-63a9ffe46b98",
- "metadata": {},
- "outputs": [],
- "source": [
- "# Plotting the overall classification metric values.\n",
- "overall_performance_plot = plotter.metrics_value(\n",
- " overall_performance,\n",
- " title=\"Overall Performance\",\n",
- ")\n",
- "report.log_plotly_figure(\n",
- " fig=overall_performance_plot,\n",
- " caption=\"Overall Performance\",\n",
- " section_name=\"quantitative analysis\",\n",
- ")\n",
- "overall_performance_plot.show()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "a463b995-e2a3-4e2e-8339-7caa5d885482",
- "metadata": {},
- "outputs": [],
- "source": [
- "# Extracting the metric values for all the slices.\n",
- "slice_metrics = {\n",
- " slice_name: {\n",
- " metric_name: metric_value\n",
- " for metric_name, metric_value in slice_results.items()\n",
- " if metric_name\n",
- " not in [\"BinaryROCCurve\", \"BinaryPrecisionRecallCurve\", \"BinaryConfusionMatrix\"]\n",
- " }\n",
- " for slice_name, slice_results in results[model_name].items()\n",
- "}"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "a2f5b559-ec42-47f8-bf73-4326e595271b",
- "metadata": {},
- "outputs": [],
- "source": [
- "# Plotting the metric values for all the slices.\n",
- "slice_metrics_plot = plotter.metrics_comparison_bar(slice_metrics)\n",
- "report.log_plotly_figure(\n",
- " fig=slice_metrics_plot,\n",
- " caption=\"Slice Metric Comparison\",\n",
- " section_name=\"quantitative analysis\",\n",
- ")\n",
- "slice_metrics_plot.show()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "22439e46-02a1-4ec6-83d5-9ab5ed9e57b0",
- "metadata": {},
- "outputs": [],
- "source": [
- "# Reformatting the fairness metrics\n",
- "fairness_results = copy.deepcopy(results[\"fairness\"])\n",
- "fairness_metrics = {}\n",
- "# remove the group size from the fairness results and add it to the slice name\n",
- "for slice_name, slice_results in fairness_results.items():\n",
- " group_size = slice_results.pop(\"Group Size\")\n",
- " fairness_metrics[f\"{slice_name} (Size={group_size})\"] = slice_results"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "b82589f2-46e2-4936-871b-f26535a3e0c1",
- "metadata": {},
- "outputs": [],
- "source": [
- "# Plotting the fairness metrics\n",
- "fairness_plot = plotter.metrics_comparison_scatter(\n",
- " fairness_metrics,\n",
- " title=\"Fairness Metrics\",\n",
- ")\n",
- "report.log_plotly_figure(\n",
- " fig=fairness_plot,\n",
- " caption=\"Fairness Metrics\",\n",
- " section_name=\"fairness analysis\",\n",
- ")\n",
- "fairness_plot.show()"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "be00d959-442d-41f8-92fc-60abedef133a",
- "metadata": {
- "tags": []
- },
- "source": [
- "## Report Generation\n",
- "\n",
- "Before generating the model card, let us document some of the details of the model and some considerations involved in developing and using the model.\n",
- "\n",
- "\n",
- "Let's start with populating the model details section, which includes the following fields by default:\n",
- "- description: A high-level description of the model and its usage for a general audience.\n",
- "- version: The version of the model.\n",
- "- owners: The individuals or organizations that own the model.\n",
- "- license: The license under which the model is made available.\n",
- "- citation: The citation for the model.\n",
- "- references: Links to resources that are relevant to the model.\n",
- "- path: The path to where the model is stored.\n",
- "- regulatory_requirements: The regulatory requirements that are relevant to the model.\n",
- "\n",
- "We can add additional fields to the model details section by passing a dictionary to the `log_from_dict` method and specifying the section name as `model_details`. You can also use the `log_descriptor` method to add a new field object with a `description` attribute to any section of the model card."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "d5c68db3-e18b-43c1-b37e-ec6018cc03c0",
- "metadata": {},
- "outputs": [],
- "source": [
- "report.log_from_dict(\n",
- " data={\n",
- " \"name\": \"Prolonged Length of Stay Prediction Model\",\n",
- " \"description\": \"The model was trained on the Synthea synthetic dataset \\\n",
- " to predict prolonged stay in the hospital.\",\n",
- " },\n",
- " section_name=\"model_details\",\n",
- ")\n",
- "report.log_version(\n",
- " version_str=\"0.0.1\",\n",
- " date=str(date.today()),\n",
- " description=\"Initial Release\",\n",
- ")\n",
- "report.log_owner(\n",
- " name=\"CyclOps Team\",\n",
- " contact=\"vectorinstitute.github.io/cyclops/\",\n",
- " email=\"cyclops@vectorinstitute.ai\",\n",
- ")\n",
- "report.log_license(identifier=\"Apache-2.0\")\n",
- "report.log_reference(\n",
- " link=\"https://xgboost.readthedocs.io/en/stable/python/python_api.html\", # noqa: E501\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "d3cf7f12-db00-47b7-abb0-5259bce56500",
- "metadata": {},
- "source": [
- "Next, let's populate the considerations section, which includes the following fields by default:\n",
- "- users: The intended users of the model.\n",
- "- use_cases: The use cases for the model. These could be primary, downstream or out-of-scope use cases.\n",
- "- fairness_assessment: A description of the benefits and harms of the model for different groups as well as the steps taken to mitigate the harms.\n",
- "- ethical_considerations: The risks associated with using the model and the steps taken to mitigate them. This can be populated using the `log_risk` method.\n",
- "\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "4825be83-b601-4daa-94a8-c7819f53b22b",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "report.log_from_dict(\n",
- " data={\n",
- " \"users\": [\n",
- " {\"description\": \"Hospitals\"},\n",
- " {\"description\": \"Clinicians\"},\n",
- " ],\n",
- " },\n",
- " section_name=\"considerations\",\n",
- ")\n",
- "report.log_user(description=\"ML Engineers\")\n",
- "report.log_use_case(\n",
- " description=\"Predicting prolonged length of stay\",\n",
- " kind=\"primary\",\n",
- ")\n",
- "report.log_fairness_assessment(\n",
- " affected_group=\"sex, age\",\n",
- " benefit=\"Improved health outcomes for patients.\",\n",
- " harm=\"Biased predictions for patients in certain groups (e.g. older patients) \\\n",
- " may lead to worse health outcomes.\",\n",
- " mitigation_strategy=\"We will monitor the performance of the model on these groups \\\n",
- " and retrain the model if the performance drops below a certain threshold.\",\n",
- ")\n",
- "report.log_risk(\n",
- " risk=\"The model may be used to make decisions that affect the health of patients.\",\n",
- " mitigation_strategy=\"The model should be continuously monitored for performance \\\n",
- " and retrained if the performance drops below a certain threshold.\",\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "fea2ac13-7185-4327-9eef-e4cf08f3b3dd",
- "metadata": {},
- "source": [
- "Once the model card is populated, you can generate the report using the `export` method. The report is generated in the form of an HTML file. A JSON file containing the model card data will also be generated along with the HTML file. By default, the files will be saved in a folder named `cyclops_reports` in the current working directory. You can change the path by passing a `output_dir` argument when instantiating the `ModelCardReport` class."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "257043a9-ae7b-4db6-a2f5-b03ff57320a7",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "synthetic_timestamps = [\n",
- " \"2021-09-01\",\n",
- " \"2021-10-01\",\n",
- " \"2021-11-01\",\n",
- " \"2021-12-01\",\n",
- " \"2022-01-01\",\n",
- "]\n",
- "report._model_card.overview = None\n",
- "report_path = report.export(\n",
- " output_filename=\"length_of_stay_report_periodic.html\",\n",
- " synthetic_timestamp=synthetic_timestamps[0],\n",
- ")\n",
- "shutil.copy(f\"{report_path}\", \".\")\n",
- "for i in range(4):\n",
- " report._model_card.overview = None\n",
- " for metric in report._model_card.quantitative_analysis.performance_metrics:\n",
- " metric.value = np.clip(\n",
- " metric.value + np.random.normal(0, 0.1),\n",
- " 0,\n",
- " 1,\n",
- " )\n",
- " metric.tests[0].passed = bool(metric.value >= 0.7)\n",
- " report_path = report.export(\n",
- " output_filename=\"length_of_stay_report_periodic.html\",\n",
- " synthetic_timestamp=synthetic_timestamps[i + 1],\n",
- " )\n",
- " shutil.copy(f\"{report_path}\", \".\")\n",
- "shutil.rmtree(\"./cyclops_report\")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "0d953f1b-845d-424e-b8b6-d782973d9e84",
- "metadata": {},
- "source": [
- "You can view the generated HTML [report](./length_of_stay_report_periodic.html)."
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.8.10"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/api/_static/nbsphinx-broken-thumbnail.svg b/api/_static/nbsphinx-broken-thumbnail.svg
deleted file mode 100644
index 4919ca882..000000000
--- a/api/_static/nbsphinx-broken-thumbnail.svg
+++ /dev/null
@@ -1,9 +0,0 @@
-
-
-
-
diff --git a/api/_static/nbsphinx-code-cells.css b/api/_static/nbsphinx-code-cells.css
deleted file mode 100644
index a3fb27c30..000000000
--- a/api/_static/nbsphinx-code-cells.css
+++ /dev/null
@@ -1,259 +0,0 @@
-/* remove conflicting styling from Sphinx themes */
-div.nbinput.container div.prompt *,
-div.nboutput.container div.prompt *,
-div.nbinput.container div.input_area pre,
-div.nboutput.container div.output_area pre,
-div.nbinput.container div.input_area .highlight,
-div.nboutput.container div.output_area .highlight {
- border: none;
- padding: 0;
- margin: 0;
- box-shadow: none;
-}
-
-div.nbinput.container > div[class*=highlight],
-div.nboutput.container > div[class*=highlight] {
- margin: 0;
-}
-
-div.nbinput.container div.prompt *,
-div.nboutput.container div.prompt * {
- background: none;
-}
-
-div.nboutput.container div.output_area .highlight,
-div.nboutput.container div.output_area pre {
- background: unset;
-}
-
-div.nboutput.container div.output_area div.highlight {
- color: unset; /* override Pygments text color */
-}
-
-/* avoid gaps between output lines */
-div.nboutput.container div[class*=highlight] pre {
- line-height: normal;
-}
-
-/* input/output containers */
-div.nbinput.container,
-div.nboutput.container {
- display: -webkit-flex;
- display: flex;
- align-items: flex-start;
- margin: 0;
- width: 100%;
-}
-@media (max-width: 540px) {
- div.nbinput.container,
- div.nboutput.container {
- flex-direction: column;
- }
-}
-
-/* input container */
-div.nbinput.container {
- padding-top: 5px;
-}
-
-/* last container */
-div.nblast.container {
- padding-bottom: 5px;
-}
-
-/* input prompt */
-div.nbinput.container div.prompt pre,
-/* for sphinx_immaterial theme: */
-div.nbinput.container div.prompt pre > code {
- color: #307FC1;
-}
-
-/* output prompt */
-div.nboutput.container div.prompt pre,
-/* for sphinx_immaterial theme: */
-div.nboutput.container div.prompt pre > code {
- color: #BF5B3D;
-}
-
-/* all prompts */
-div.nbinput.container div.prompt,
-div.nboutput.container div.prompt {
- width: 4.5ex;
- padding-top: 5px;
- position: relative;
- user-select: none;
-}
-
-div.nbinput.container div.prompt > div,
-div.nboutput.container div.prompt > div {
- position: absolute;
- right: 0;
- margin-right: 0.3ex;
-}
-
-@media (max-width: 540px) {
- div.nbinput.container div.prompt,
- div.nboutput.container div.prompt {
- width: unset;
- text-align: left;
- padding: 0.4em;
- }
- div.nboutput.container div.prompt.empty {
- padding: 0;
- }
-
- div.nbinput.container div.prompt > div,
- div.nboutput.container div.prompt > div {
- position: unset;
- }
-}
-
-/* disable scrollbars and line breaks on prompts */
-div.nbinput.container div.prompt pre,
-div.nboutput.container div.prompt pre {
- overflow: hidden;
- white-space: pre;
-}
-
-/* input/output area */
-div.nbinput.container div.input_area,
-div.nboutput.container div.output_area {
- -webkit-flex: 1;
- flex: 1;
- overflow: auto;
-}
-@media (max-width: 540px) {
- div.nbinput.container div.input_area,
- div.nboutput.container div.output_area {
- width: 100%;
- }
-}
-
-/* input area */
-div.nbinput.container div.input_area {
- border: 1px solid #e0e0e0;
- border-radius: 2px;
- /*background: #f5f5f5;*/
-}
-
-/* override MathJax center alignment in output cells */
-div.nboutput.container div[class*=MathJax] {
- text-align: left !important;
-}
-
-/* override sphinx.ext.imgmath center alignment in output cells */
-div.nboutput.container div.math p {
- text-align: left;
-}
-
-/* standard error */
-div.nboutput.container div.output_area.stderr {
- background: #fdd;
-}
-
-/* ANSI colors */
-.ansi-black-fg { color: #3E424D; }
-.ansi-black-bg { background-color: #3E424D; }
-.ansi-black-intense-fg { color: #282C36; }
-.ansi-black-intense-bg { background-color: #282C36; }
-.ansi-red-fg { color: #E75C58; }
-.ansi-red-bg { background-color: #E75C58; }
-.ansi-red-intense-fg { color: #B22B31; }
-.ansi-red-intense-bg { background-color: #B22B31; }
-.ansi-green-fg { color: #00A250; }
-.ansi-green-bg { background-color: #00A250; }
-.ansi-green-intense-fg { color: #007427; }
-.ansi-green-intense-bg { background-color: #007427; }
-.ansi-yellow-fg { color: #DDB62B; }
-.ansi-yellow-bg { background-color: #DDB62B; }
-.ansi-yellow-intense-fg { color: #B27D12; }
-.ansi-yellow-intense-bg { background-color: #B27D12; }
-.ansi-blue-fg { color: #208FFB; }
-.ansi-blue-bg { background-color: #208FFB; }
-.ansi-blue-intense-fg { color: #0065CA; }
-.ansi-blue-intense-bg { background-color: #0065CA; }
-.ansi-magenta-fg { color: #D160C4; }
-.ansi-magenta-bg { background-color: #D160C4; }
-.ansi-magenta-intense-fg { color: #A03196; }
-.ansi-magenta-intense-bg { background-color: #A03196; }
-.ansi-cyan-fg { color: #60C6C8; }
-.ansi-cyan-bg { background-color: #60C6C8; }
-.ansi-cyan-intense-fg { color: #258F8F; }
-.ansi-cyan-intense-bg { background-color: #258F8F; }
-.ansi-white-fg { color: #C5C1B4; }
-.ansi-white-bg { background-color: #C5C1B4; }
-.ansi-white-intense-fg { color: #A1A6B2; }
-.ansi-white-intense-bg { background-color: #A1A6B2; }
-
-.ansi-default-inverse-fg { color: #FFFFFF; }
-.ansi-default-inverse-bg { background-color: #000000; }
-
-.ansi-bold { font-weight: bold; }
-.ansi-underline { text-decoration: underline; }
-
-
-div.nbinput.container div.input_area div[class*=highlight] > pre,
-div.nboutput.container div.output_area div[class*=highlight] > pre,
-div.nboutput.container div.output_area div[class*=highlight].math,
-div.nboutput.container div.output_area.rendered_html,
-div.nboutput.container div.output_area > div.output_javascript,
-div.nboutput.container div.output_area:not(.rendered_html) > img{
- padding: 5px;
- margin: 0;
-}
-
-/* fix copybtn overflow problem in chromium (needed for 'sphinx_copybutton') */
-div.nbinput.container div.input_area > div[class^='highlight'],
-div.nboutput.container div.output_area > div[class^='highlight']{
- overflow-y: hidden;
-}
-
-/* hide copy button on prompts for 'sphinx_copybutton' extension ... */
-.prompt .copybtn,
-/* ... and 'sphinx_immaterial' theme */
-.prompt .md-clipboard.md-icon {
- display: none;
-}
-
-/* Some additional styling taken form the Jupyter notebook CSS */
-.jp-RenderedHTMLCommon table,
-div.rendered_html table {
- border: none;
- border-collapse: collapse;
- border-spacing: 0;
- color: black;
- font-size: 12px;
- table-layout: fixed;
-}
-.jp-RenderedHTMLCommon thead,
-div.rendered_html thead {
- border-bottom: 1px solid black;
- vertical-align: bottom;
-}
-.jp-RenderedHTMLCommon tr,
-.jp-RenderedHTMLCommon th,
-.jp-RenderedHTMLCommon td,
-div.rendered_html tr,
-div.rendered_html th,
-div.rendered_html td {
- text-align: right;
- vertical-align: middle;
- padding: 0.5em 0.5em;
- line-height: normal;
- white-space: normal;
- max-width: none;
- border: none;
-}
-.jp-RenderedHTMLCommon th,
-div.rendered_html th {
- font-weight: bold;
-}
-.jp-RenderedHTMLCommon tbody tr:nth-child(odd),
-div.rendered_html tbody tr:nth-child(odd) {
- background: #f5f5f5;
-}
-.jp-RenderedHTMLCommon tbody tr:hover,
-div.rendered_html tbody tr:hover {
- background: rgba(66, 165, 245, 0.2);
-}
-
diff --git a/api/_static/nbsphinx-gallery.css b/api/_static/nbsphinx-gallery.css
deleted file mode 100644
index 365c27a96..000000000
--- a/api/_static/nbsphinx-gallery.css
+++ /dev/null
@@ -1,31 +0,0 @@
-.nbsphinx-gallery {
- display: grid;
- grid-template-columns: repeat(auto-fill, minmax(160px, 1fr));
- gap: 5px;
- margin-top: 1em;
- margin-bottom: 1em;
-}
-
-.nbsphinx-gallery > a {
- padding: 5px;
- border: 1px dotted currentColor;
- border-radius: 2px;
- text-align: center;
-}
-
-.nbsphinx-gallery > a:hover {
- border-style: solid;
-}
-
-.nbsphinx-gallery img {
- max-width: 100%;
- max-height: 100%;
-}
-
-.nbsphinx-gallery > a > div:first-child {
- display: flex;
- align-items: start;
- justify-content: center;
- height: 120px;
- margin-bottom: 5px;
-}
diff --git a/api/_static/nbsphinx-no-thumbnail.svg b/api/_static/nbsphinx-no-thumbnail.svg
deleted file mode 100644
index 9dca7588f..000000000
--- a/api/_static/nbsphinx-no-thumbnail.svg
+++ /dev/null
@@ -1,9 +0,0 @@
-
-
-
-
diff --git a/api/api.html b/api/api.html
index 49341efca..e0ae4234a 100644
--- a/api/api.html
+++ b/api/api.html
@@ -61,7 +61,6 @@
-
@@ -192,24 +191,17 @@