Last minute cosmetics to more easily generate great tables

pegasystems · Dec 9, 2024 · 51ac723 · 51ac723
1 parent d8bce91
commit 51ac723
Show file tree

Hide file tree

Showing 4 changed files with 184 additions and 96 deletions.
diff --git a/python/pdstools/adm/CDH_Guidelines.py b/python/pdstools/adm/CDH_Guidelines.py
@@ -7,20 +7,24 @@
 _data = {
     "Issues": [1, 5, 25, None],
     "Groups per Issue": [1, 5, 25, None],
-    "Treatments": [1, 2500, 5000, 5000],
-    "Treatments per Channel": [1, 1000, 2500, 2500],
+    "Treatments": [2, 2500, 5000, 5000],
+    "Treatments per Channel": [2, 1000, 2500, 2500],
     "Treatments per Channel per Action": [1, 1, 5, None],
     "Actions": [10, 1000, 2500, 2500],
     "Actions per Group": [1, 100, 250, None],
     "Channels": [1, 2, None, None],
     "Configurations per Channel": [1, 1, 2, None],
     "Predictors": [50, 200, 700, 2000],
     "Active Predictors per Model": [2, 5, 100, None],
+
+    # below are not part of the standard cloud limits but used in the reports
+
     "Model Performance": [52, 55, 80, 90],
-    "Engagement Lift": [0.0, 0.2, 2.0, None],
     "Responses": [1.0, 200, None, None],
     "Positive Responses": [1.0, 200, None, None],
     "Engagement Lift": [0.0, 1.0, None, None],
+    "CTR": [0.0, 0.000001, 0.999999, 1.0],
+    "OmniChannel": [0.0, 0.5, 1.0, 1.0],
 }
 
 _pega_cloud_limits = pl.DataFrame(data=_data).transpose(include_header=True)

diff --git a/python/pdstools/reports/HealthCheck.qmd b/python/pdstools/reports/HealthCheck.qmd
@@ -265,24 +265,21 @@ formatted_channel_overview = (
         df_channel_overview,
         cdh_guidelines=cdh_guidelines,
         highlight_limits={
-            "Positives": "Positive Responses",
-            "Performance": "Model Performance",
-            "ResponseCount": "Responses",
-            "Total Number of Actions": "Actions",
-            "Used Actions": "Actions",
-            "Total Number of Treatments": "Treatments",
-            "Used Treatments": "Treatments",
+            "Positive Responses": "Positives",
+            "Model Performance": "Performance",
+            "Responses": "ResponseCount",
+            "Actions": ["Total Number of Actions", "Used Actions"],
+            "Treatments": ["Total Number of Treatments", "Used Treatments"],
             "Issues": "Issues",
+            "OmniChannel": "OmniChannel Actions",
+            "CTR" : "CTR",
         },
-        highlight_lists = {
-            "Channel" : cdh_guidelines.standard_channels,
-            "Direction" : cdh_guidelines.standard_directions,
+        highlight_lists={
+            "Channel": cdh_guidelines.standard_channels,
+            "Direction": cdh_guidelines.standard_directions,
         },
-        highlight_configurations = ["Configuration"],
+        highlight_configurations=["Configuration"],
     )
-    .fmt_percent(decimals=0, columns=["OmniChannel Actions"])
-    .fmt_number(decimals=2, columns=["Performance"])
-    .fmt_percent(decimals=2, columns=["CTR"])
     .cols_label(
         CTR="Base Rate",
         ResponseCount="Total Responses",
@@ -418,21 +415,24 @@ if prediction_file_path:
             highlight_limits={
                 # "Actions": "Actions",
                 # "Unique Treatments": "Treatments",
-                "Positives": "Positive Responses",
-                "Negatives": "Responses",
-                "Positives_Test": "Positive Responses",
-                "Negatives_Test": "Responses",
-                "Positives_Control": "Positive Responses",
-                "Negatives_Control": "Responses",
-                "Positives_NBA": "Positive Responses",
-                "Negatives_NBA": "Responses",
-                "ResponseCount": "Responses",
+                "Positive Responses": [
+                    "Positives",
+                    "Positives_Test",
+                    "Positives_Control",
+                    "Positives_NBA",
+                ],
+                "Responses": [
+                    "ResponseCount",
+                    "Negatives",
+                    "Negatives_Test",
+                    "Negatives_Control",
+                    "Negatives_NBA",
+                ],
+                "Model Performance": "Performance",
+                "CTR": ["CTR", "CTR_Test", "CTR_Control", "CTR_NBA"],
+                "Engagement Lift": "Lift",
             },
         )
-        .fmt_number(decimals=2, columns=["Performance"])
-        .fmt_percent(
-            decimals=2, columns=["CTR", "CTR_Test", "CTR_Control", "CTR_NBA", "Lift"]
-        )
         .fmt_percent(
             decimals=2,
             scale_values=False,
@@ -811,7 +811,9 @@ else:
 model_overview = (
     last_data.group_by(
         ["Configuration"]
-        + report_utils.polars_subset_to_existing_cols(datamart_all_columns, ["Channel", "Direction"])
+        + report_utils.polars_subset_to_existing_cols(
+            datamart_all_columns, ["Channel", "Direction"]
+        )
     )
     .agg(
         [
@@ -835,18 +837,19 @@ model_overview = (
 
 display(
     report_utils.table_standard_formatting(
-        model_overview, title="Model Overview",
+        model_overview,
+        title="Model Overview",
         cdh_guidelines=cdh_guidelines,
         highlight_limits={
             "Actions": "Actions",
-            "Unique Treatments": "Treatments",
-            "Positives": "Positive Responses",
-            "ResponseCount": "Responses",
+            "Treatments": "Unique Treatments",
+            "Positive Responses": "Positives",
+            "Responses": "ResponseCount",
         },
-        highlight_lists = {
-            "Channel" : cdh_guidelines.standard_channels,
-            "Direction" : cdh_guidelines.standard_directions,
-            "Configuration" : cdh_guidelines.standard_configurations,
+        highlight_lists={
+            "Channel": cdh_guidelines.standard_channels,
+            "Direction": cdh_guidelines.standard_directions,
+            "Configuration": cdh_guidelines.standard_configurations,
         },
     )
     .tab_style(
@@ -1301,7 +1304,7 @@ If predictors perform poorly across all models, that may be because of data sour
 
 ```{python}
 # weighted performance
-# TODO apply highlighting in the std way like in the R version
+
 
 if datamart.predictor_data is not None:
     bad_predictors = (
@@ -1329,7 +1332,7 @@ if datamart.predictor_data is not None:
             # .with_columns(MeanPlotData=pl.col("Mean")),
             rowname_col="PredictorName",
             cdh_guidelines=cdh_guidelines,
-            highlight_limits = {"Response Count" : "Responses"}
+            highlight_limits = {"Responses" : "Response Count"}
         )
         .tab_options(container_height="400px", container_overflow_y=True)
         .tab_spanner(

diff --git a/python/pdstools/reports/ModelReport.qmd b/python/pdstools/reports/ModelReport.qmd
@@ -230,28 +230,20 @@ try:
             classifier.collect().select(
                 pl.last("ResponseCount"),
                 pl.last("Positives"),
-                (pl.last("Positives") / pl.last("ResponseCount")).alias("Base Propensity"),
-                pl.last("Performance"),
+                (pl.last("Positives") / pl.last("ResponseCount")).alias(
+                    "Base Propensity"
+                ),
+                pl.last("Performance") * 100,
             ),
-            highlight_limits = {
-                "ResponseCount" : "Responses",
-                "Positives" : "Positive Responses",
-                "Performance" : "Model Performance"
-            }
-        )
-        .cols_label(
+            highlight_limits={
+                "Responses": "ResponseCount",
+                "Positive Responses": "Positives",
+                "Model Performance": "Performance",
+                "CTR": "Base Propensity",
+            },
+        ).cols_label(
             ResponseCount="Responses",
         )
-        .fmt_number(
-            decimals=0,
-            columns=["ResponseCount", "Positives"],
-        )
-        .fmt_percent(decimals=3, columns="Base Propensity")
-        .fmt_number(
-            decimals=2,
-            scale_by=100,
-            columns=["Performance"],
-        )
     )
 
     display(gt)