diff --git a/.github/workflows/Python tests.yml b/.github/workflows/Python tests.yml index b0463ed2..f2a831db 100644 --- a/.github/workflows/Python tests.yml +++ b/.github/workflows/Python tests.yml @@ -81,7 +81,7 @@ jobs: run: uv run pytest --cov=./python/pdstools --cov-report=xml --cov-config=./python/tests/.coveragerc --ignore=python/tests/test_healthcheck.py --ignore=python/tests/test_ADMTrees.py - name: Upload coverage reports to Codecov - uses: codecov/codecov-action@v5 + uses: codecov/codecov-action@v5.1.1 with: token: ${{ secrets.CODECOV_TOKEN }} - fail_ci_if_error: true + fail_ci_if_error: false diff --git a/.github/workflows/mend-scan.yml b/.github/workflows/mend-scan.yml deleted file mode 100644 index d69dd6a0..00000000 --- a/.github/workflows/mend-scan.yml +++ /dev/null @@ -1,54 +0,0 @@ -# Reference: https://github.com/mend-toolkit/mend-examples/blob/main/Unified%20Agent/CI-CD/GitHub.yml - -name: Mend Unified Agent Release Scan -on: - workflow_dispatch: - push: - branches: - - 'master' -jobs: - mendscan: - env: - WS_APIKEY: ${{secrets.MEND_APIKEY}} - WS_USERKEY: ${{secrets.MEND_USERKEY}} - WS_WSS_URL: ${{secrets.MEND_WS_WSS_URL}} - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v3 - - name: Set up JDK - uses: actions/setup-java@v2 - with: - java-version: '11' - distribution: 'adopt' - # - name: Build with Gradle - # run: ./gradlew -Prelease.useLastTag=false clean build - - name: Mend Unified Agent Scan - env: - WS_PRODUCTNAME: ${{vars.MEND_PRODUCT_NAME}} - WS_PROJECTNAME: ${{github.event.repository.name}}_${{github.ref_name}} - WS_GENERATEPROJECTDETAILSJSON: true - WS_GRADLE_ADDITIONALARGUMENTS: -Prelease.useLastTag=false - run: | - echo Downloading Mend Unified Agent - curl -LJO https://unified-agent.s3.amazonaws.com/wss-unified-agent.jar - if [[ "$(curl -sL https://unified-agent.s3.amazonaws.com/wss-unified-agent.jar.sha256)" != "$(sha256sum wss-unified-agent.jar)" ]] ; then - echo "Integrity Check Failed" - else - echo "Integrity Check Passed" - echo Starting Unified Agent Scan - java -jar wss-unified-agent.jar - fi - - name: 'Upload WhiteSource folder' - uses: actions/upload-artifact@v2 - with: - name: Mend - path: whitesource - retention-days: 14 - - name: 'Upload Mend folder if failure' - uses: actions/upload-artifact@v2 - if: failure() - with: - name: Mend - path: whitesource - retention-days: 14 diff --git a/examples/adm/ADMBinningInsights.ipynb b/examples/adm/ADMBinningInsights.ipynb index 23dd290f..75097f79 100644 --- a/examples/adm/ADMBinningInsights.ipynb +++ b/examples/adm/ADMBinningInsights.ipynb @@ -109,9 +109,7 @@ " predictor_name=\"Customer.AnnualIncome\",\n", ")\n", "fig.update_layout(height=400, width=700, xaxis_title=\"\")\n", - "fig.show()\n", - "\n", - "# TODO: y-order is not correct" + "fig.show()" ] }, { @@ -339,7 +337,8 @@ "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython3" + "pygments_lexer": "ipython3", + "version": "3.12.3" } }, "nbformat": 4, diff --git a/examples/prediction_studio/Predictions.ipynb b/examples/prediction_studio/Predictions.ipynb index db4b34fb..0d830614 100644 --- a/examples/prediction_studio/Predictions.ipynb +++ b/examples/prediction_studio/Predictions.ipynb @@ -6,6 +6,10 @@ "source": [ "# Predictions Overview\n", "\n", + "__Pega__\n", + "\n", + "__2024-12-04__\n", + "\n", "This is a small notebook to report and analyse Prediction Studio data on Predictions. The underlying data is from the Data-DM-Snapshot table that is used to populate the Prediction Studio screen with Prediction Performance, Lift, CTR etc.\n", "\n", "As data this notebook accept data exported from PDC - which has a slightly altered format - as well as data directly exported from the pyGetSnapshot dataset in Pega.\n", @@ -16,12 +20,12 @@ ] }, { - "cell_type": "code", - "execution_count": 1, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "import polars as pl" + "## Raw data\n", + "\n", + "First, we're going to show the raw data as . The raw data is in a \"long\" format with e.g. test and control groups in separate rows." ] }, { @@ -33,31 +37,37 @@ "from pathlib import Path\n", "import sys\n", "import polars as pl\n", - "import json\n", - "from pdstools import readDSExport, Prediction\n", + "from pdstools import read_ds_export, Prediction\n", "\n", + "# path to dataset export here\n", + "# e.g. PR_DATA_DM_SNAPSHOTS.parquet\n", "data_export = \"\"\n", "\n", "prediction = None\n", + "predictions_raw_data = None\n", "if data_export.endswith(\".parquet\"):\n", - " predictions_raw_data = pl.read_parquet(Path(data_export).expanduser())\n", - " prediction = Prediction(predictions_raw_data.lazy())\n", + " predictions_raw_data = pl.scan_parquet(Path(data_export).expanduser())\n", + " prediction = Prediction(predictions_raw_data)\n", "elif data_export.endswith(\".json\"):\n", " print(\"Import of PDC JSON data not supported\")\n", " sys.exit()\n", "elif data_export.endswith(\".zip\"):\n", - " # Assuming a direct export from the dataset\n", - " predictions_raw_data = readDSExport(data_export).collect()\n", - " prediction = Prediction(predictions_raw_data.lazy())\n", + " predictions_raw_data = read_ds_export(data_export)\n", + " prediction = Prediction(predictions_raw_data)\n", + "else:\n", + " prediction = Prediction.from_mock_data(days=60)\n", "\n", - "predictions_raw_data.head().to_pandas().style" + "if predictions_raw_data is not None:\n", + " predictions_raw_data.head(5).collect().to_pandas().style" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Peek at the internal data" + "## Prediction Data\n", + "\n", + "The actual prediction data is in a \"wide\" format with separate fields for Test and Control groups. Also, it is only the \"daily\" snapshots and the numbers and date are formatted to be normal Polars types." ] }, { @@ -73,7 +83,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Summary by Channel, over all time" + "## Summary by Channel\n", + "\n", + "Standard functionality exists to summarize the predictions per channel. Note that we do not have the prediction to channel mapping in the data (this is an outstanding product issue), so apply the implicit naming conventions of NBAD. For a specific customer, custom mappings can be passed into the summarization function." ] }, { @@ -89,20 +101,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Quick glance at the available data aggregated by day." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "prediction_summary_by_channel = (\n", - " prediction.summary_by_channel(by_period=\"1d\")\n", - " .with_columns(Prediction=pl.format(\"{} ({})\", pl.col.Channel, pl.col.ModelName))\n", - " .collect()\n", - ")" + "# Prediction Trends\n", + "\n", + "Summarization by default is over all time. You can pass in an argument to summarize by day, week or any other period as supported by the (Polars time offset string language)[https://docs.pola.rs/api/python/stable/reference/expressions/api/polars.Expr.dt.offset_by.html].\n", + "\n", + "This trend data can then easily be visualized." ] }, { @@ -111,17 +114,7 @@ "metadata": {}, "outputs": [], "source": [ - "import plotly.express as px\n", - "\n", - "px.line(\n", - " prediction_summary_by_channel.filter(pl.col(\"isMultiChannelPrediction\").not_())\n", - " .filter(pl.col(\"Channel\") != \"Unknown\")\n", - " .sort([\"Period\"]),\n", - " x=\"Period\",\n", - " y=\"Performance\",\n", - " color=\"Prediction\",\n", - " title=\"Prediction Performance\",\n", - ")" + "prediction.plot.performance_trend(\"1w\")" ] }, { @@ -130,15 +123,7 @@ "metadata": {}, "outputs": [], "source": [ - "px.line(\n", - " prediction_summary_by_channel.filter(pl.col(\"isMultiChannelPrediction\").not_())\n", - " .filter(pl.col(\"Channel\") != \"Unknown\")\n", - " .sort([\"Period\"]),\n", - " x=\"Period\",\n", - " y=\"Lift\",\n", - " color=\"Prediction\",\n", - " title=\"Prediction Lift\",\n", - ").update_yaxes(tickformat=\",.2%\")" + "prediction.plot.lift_trend(\"1w\")#, return_df=True).collect()" ] }, { @@ -147,18 +132,7 @@ "metadata": {}, "outputs": [], "source": [ - "px.line(\n", - " prediction_summary_by_channel.filter(pl.col(\"isMultiChannelPrediction\").not_())\n", - " .filter(pl.col(\"Channel\") != \"Unknown\")\n", - " .sort([\"Period\"]),\n", - " x=\"Period\",\n", - " y=\"CTR\",\n", - " facet_row=\"Prediction\",\n", - " color=\"Prediction\",\n", - " title=\"Prediction CTR\",\n", - ").update_yaxes(tickformat=\",.3%\", matches=None).for_each_annotation(\n", - " lambda a: a.update(text=\"\")\n", - ")" + "prediction.plot.ctr_trend(\"1w\", facetting=False)" ] }, { @@ -167,22 +141,13 @@ "metadata": {}, "outputs": [], "source": [ - "px.line(\n", - " prediction_summary_by_channel.filter(pl.col(\"isMultiChannelPrediction\").not_())\n", - " .filter(pl.col(\"Channel\") != \"Unknown\")\n", - " .sort([\"Period\"]),\n", - " x=\"Period\",\n", - " y=\"ResponseCount\",\n", - " facet_row=\"Prediction\",\n", - " color=\"Prediction\",\n", - " title=\"Prediction Responses\",\n", - ").update_yaxes(matches=None).for_each_annotation(lambda a: a.update(text=\"\"))" + "prediction.plot.responsecount_trend(\"1w\", facetting=False)" ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": ".venv", "language": "python", "name": "python3" }, diff --git a/python/pdstools/__init__.py b/python/pdstools/__init__.py index 8e0f707e..e8841240 100644 --- a/python/pdstools/__init__.py +++ b/python/pdstools/__init__.py @@ -1,6 +1,6 @@ """Pega Data Scientist Tools Python library""" -__version__ = "4.0.0a1" +__version__ = "4.0.0b1" from pathlib import Path diff --git a/python/pdstools/adm/ADMDatamart.py b/python/pdstools/adm/ADMDatamart.py index a50458ba..0005ef6e 100644 --- a/python/pdstools/adm/ADMDatamart.py +++ b/python/pdstools/adm/ADMDatamart.py @@ -399,6 +399,7 @@ def apply_predictor_categorization( categorization() if callable(categorization) else categorization ) + if df is not None: return df.with_columns(PredictorCategory=categorization_expr) diff --git a/python/pdstools/adm/Aggregates.py b/python/pdstools/adm/Aggregates.py index 2b773663..c5f0dc15 100644 --- a/python/pdstools/adm/Aggregates.py +++ b/python/pdstools/adm/Aggregates.py @@ -447,6 +447,8 @@ def name_normalizer(x): ChannelDirectionGroup=pl.when( pl.col("ChannelGroup").is_not_null() & pl.col("DirectionGroup").is_not_null() + & pl.col("ChannelGroup").is_in(["Other", "Unknown", ""]).not_() + & pl.col("DirectionGroup").is_in(["Other", "Unknown", ""]).not_() ) .then(pl.concat_str(["ChannelGroup", "DirectionGroup"], separator="/")) .otherwise(pl.lit("Other")), diff --git a/python/pdstools/adm/CDH_Guidelines.py b/python/pdstools/adm/CDH_Guidelines.py index fc059d88..a145df0a 100644 --- a/python/pdstools/adm/CDH_Guidelines.py +++ b/python/pdstools/adm/CDH_Guidelines.py @@ -7,19 +7,24 @@ _data = { "Issues": [1, 5, 25, None], "Groups per Issue": [1, 5, 25, None], - "Treatments": [1, 2500, 5000, 5000], - "Treatments per Channel": [1, 1000, 2500, 2500], + "Treatments": [2, 2500, 5000, 5000], + "Treatments per Channel": [2, 1000, 2500, 2500], "Treatments per Channel per Action": [1, 1, 5, None], "Actions": [10, 1000, 2500, 2500], "Actions per Group": [1, 100, 250, None], "Channels": [1, 2, None, None], "Configurations per Channel": [1, 1, 2, None], - "Predictors": [10, 200, 700, 2000], + "Predictors": [50, 200, 700, 2000], "Active Predictors per Model": [2, 5, 100, None], + + # below are not part of the standard cloud limits but used in the reports + "Model Performance": [52, 55, 80, 90], - "Engagement Lift": [0.0, 0.2, 2.0, None], "Responses": [1.0, 200, None, None], "Positive Responses": [1.0, 200, None, None], + "Engagement Lift": [0.0, 1.0, None, None], + "CTR": [0.0, 0.000001, 0.999999, 1.0], + "OmniChannel": [0.0, 0.5, 1.0, 1.0], } _pega_cloud_limits = pl.DataFrame(data=_data).transpose(include_header=True) @@ -159,7 +164,11 @@ def get_predictions_channel_mapping( ) -> pl.DataFrame: if not custom_predictions: custom_predictions = [] - all_predictions = _NBAD_Prediction_data + custom_predictions + all_predictions = _NBAD_Prediction_data + [ + prediction + for prediction in custom_predictions + if prediction[0].upper() not in {x[0].upper() for x in _NBAD_Prediction_data} + ] df = ( pl.DataFrame(data=all_predictions, orient="row") diff --git a/python/pdstools/adm/Plots.py b/python/pdstools/adm/Plots.py index 7cbc439c..aa511c3f 100644 --- a/python/pdstools/adm/Plots.py +++ b/python/pdstools/adm/Plots.py @@ -1194,7 +1194,7 @@ def binning_lift( return plot_df fig = px.bar( - plot_df.collect().to_pandas(use_pyarrow_extension_array=False), + plot_df.collect(), #.to_pandas(use_pyarrow_extension_array=False), x="Lift", y="BinSymbolAbbreviated", color="Direction", @@ -1209,6 +1209,7 @@ def binning_lift( template="pega", custom_data=["PredictorName", "BinSymbol"], facet_col_wrap=3, + category_orders=plot_df.collect().to_dict(), ) fig.update_traces( hovertemplate="
".join( @@ -1226,7 +1227,6 @@ def binning_lift( type="category", categoryorder="array", automargin=True, - autorange="reversed", title="", dtick=1, # show all bins matches=None, # allow independent y-labels if there are row facets @@ -1260,6 +1260,10 @@ def partitioned_plot( fig.show() return figs + + # TODO I took the propensity distrib plot out of the HC as + # it wasn't very clear, also didn't look great visually. + @requires( predictor_columns={ "BinPropensity", diff --git a/python/pdstools/adm/Reports.py b/python/pdstools/adm/Reports.py index 891a5165..4fa25f5c 100644 --- a/python/pdstools/adm/Reports.py +++ b/python/pdstools/adm/Reports.py @@ -1,18 +1,20 @@ __all__ = ["Reports"] import logging import os +import re import shutil import subprocess import sys from os import PathLike from pathlib import Path -from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Union +from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Tuple, Union import polars as pl from ..utils import cdh_utils from ..utils.namespaces import LazyNamespace from ..utils.types import QUERY +from ..prediction import Prediction if TYPE_CHECKING: from .ADMDatamart import ADMDatamart @@ -34,7 +36,7 @@ def model_reports( name: Optional[ str ] = None, # TODO when ends with .html assume its the full name but this could be in _get_output_filename - title: str = "ADM Model Overview", + title: str = "ADM Model Report", subtitle: str = "", output_dir: Optional[PathLike] = None, only_active_predictors: bool = False, @@ -122,9 +124,11 @@ def model_reports( output_filename = self._get_output_filename( name, "ModelReport", model_id, output_type ) - self._write_params_file( - temp_dir, - { + self.run_quarto( + qmd_file=qmd_file, + output_filename=output_filename, + output_type=output_type, + params={ "report_type": "ModelReport", "model_file_path": str(model_file_path), "predictor_file_path": str(predictor_file_path), @@ -133,24 +137,22 @@ def model_reports( "title": title, "subtitle": subtitle, }, - ) - self._run_quarto_command( - temp_dir, - qmd_file, - output_type, - output_filename, - verbose, + project={"title": title, "type": "default"}, + analysis={ + "predictions": False, + "predictors": (self.datamart.predictor_data is not None), + "models": (self.datamart.model_data is not None), + }, + temp_dir=temp_dir, + verbose=verbose, ) output_path = temp_dir / output_filename if verbose or not output_path.exists(): # print parameters so they can be copy/pasted into the quarto docs for debugging if model_file_path is not None: - print(f'datafolder = "{model_file_path.parent}"') - print(f'modelfilename = "{model_file_path.name}"') + print(f'model_file_path = "{model_file_path}"') if predictor_file_path is not None: - if model_file_path is None: - print(f'datafolder = "{predictor_file_path.parent}"') - print(f'predictorfilename = "{predictor_file_path.name}"') + print(f'predictor_file_path = "{predictor_file_path}"') print(f'model_id = "{model_id}"') print(f"output_path = {output_path}") if not output_path.exists(): @@ -158,6 +160,8 @@ def model_reports( output_file_paths.append(output_path) if progress_callback: progress_callback(i + 1, len(model_ids)) + # Is this just a difficult way to copy the file? Why not shutil.copy? Or + # even pass in the output-dir property to the quarto project? file_data, file_name = cdh_utils.process_files_to_bytes( output_file_paths, base_file_name=output_path ) @@ -192,6 +196,7 @@ def health_check( verbose: bool = False, model_file_path: Optional[PathLike] = None, predictor_file_path: Optional[PathLike] = None, + prediction_file_path: Optional[PathLike] = None, ) -> Path: """ Generates Health Check report based on the provided parameters. @@ -218,6 +223,8 @@ def health_check( Optional name of the actual model data file, so it does not get copied predictor_file_path : Union[str, Path, None], optional Optional name of the actual predictor data file, so it does not get copied + prediction_file_path : Union[str, Path, None], optional + Optional name of the actual predictions data file, so it does not get copied Returns ------- @@ -251,48 +258,53 @@ def health_check( ): model_file_path, predictor_file_path = self.datamart.save_data(temp_dir) - self._write_params_file( - temp_dir, - { + self.run_quarto( + qmd_file=qmd_file, + output_filename=output_filename, + output_type=output_type, + params={ "report_type": "HealthCheck", "model_file_path": str(model_file_path), "predictor_file_path": str(predictor_file_path), + "prediction_file_path": str(prediction_file_path), "query": query, "title": title, "subtitle": subtitle, }, - ) - self._run_quarto_command( - temp_dir, - qmd_file, - output_type, - output_filename, - verbose, + project={"title": title, "type": "default"}, + analysis={ + "predictions": (prediction_file_path is not None), + "predictors": (self.datamart.predictor_data is not None), + "models": (self.datamart.model_data is not None), + }, + temp_dir=temp_dir, + verbose=verbose, ) output_path = temp_dir / output_filename if verbose or not output_path.exists(): if model_file_path is not None: - print(f'datafolder = "{model_file_path.parent}"') - print(f'modelfilename = "{model_file_path.name}"') + print(f'model_file_path = "{model_file_path}"') if predictor_file_path is not None: - if model_file_path is None: - print(f'datafolder = "{predictor_file_path.parent}"') - print(f'predictorfilename = "{predictor_file_path.name}"') + print(f'predictor_file_path = "{predictor_file_path}"') + if prediction_file_path is not None: + print(f'prediction_file_path = "{prediction_file_path}"') print(f"output_path = {output_path}") if not output_path.exists(): raise ValueError(f"Failed to generate report: {output_filename}") + # TODO consider passing in the output-dir property to the quarto project so quarto does the copying final_path = output_dir / output_filename shutil.copy(output_path, final_path) + return final_path finally: if not keep_temp_files and temp_dir.exists() and temp_dir.is_dir(): shutil.rmtree(temp_dir, ignore_errors=True) + @staticmethod def _get_output_filename( - self, name: Optional[str], # going to be the full file name report_type: str, model_id: Optional[str] = None, @@ -312,7 +324,8 @@ def _get_output_filename( else f"{report_type}.{output_type}" ) - def _copy_quarto_file(self, qmd_file: str, temp_dir: Path) -> None: + @staticmethod + def _copy_quarto_file(qmd_file: str, temp_dir: Path) -> None: """Copy the report quarto file to the temporary directory.""" from pdstools import __reports__ @@ -329,47 +342,153 @@ def _copy_quarto_file(self, qmd_file: str, temp_dir: Path) -> None: # if not predictordata_files: # logger.warning("No cached predictor data found.") - def _write_params_file(self, temp_dir: Path, params: Dict) -> None: + @staticmethod + def _write_params_files( + temp_dir: Path, + params: Dict = {}, + project: Dict = {"type": "default"}, + analysis: Dict = {}, + ) -> None: """Write parameters to a YAML file.""" import yaml - yaml_params = {"kwargs": {key: value for key, value in params.items()}} + # Parameters to python code - with open(temp_dir / "params.yaml", "w") as f: - yaml.dump(yaml_params, f) + with open(temp_dir / "params.yml", "w") as f: + yaml.dump( + params, + f, + ) + + # Project/rendering options to quarto + + with open(temp_dir / "_quarto.yml", "w") as f: + yaml.dump( + { + "project": project, + "analysis": analysis, + }, + f, + ) + + @staticmethod + def _find_executable(exec_name: str) -> Path: + """Find the executable on the system.""" + + # First find in path + exec_in_path = shutil.which(exec_name) # pragma: no cover + if exec_in_path: # pragma: no cover + return Path(exec_in_path) + + # If not in path try find explicitly. TODO not sure this is wise + # maybe we should not try be smart and assume quarto/pandoc are + # properly installed. + + if sys.platform == "win32": # pragma: no cover + possible_paths = [ + Path( + os.environ.get("USERPROFILE", ""), + "AppData", + "Local", + "Programs", + f"{exec_name}", # assume windows is still case insensitive (NTFS changes this...) + "bin", + f"{exec_name}.cmd", + ), + Path( + os.environ.get("PROGRAMFILES", ""), + f"{exec_name}", + "bin", + f"{exec_name}.cmd", + ), + ] + else: # pragma: no cover + possible_paths = [ + Path(f"/usr/local/bin/{exec_name}"), + Path(f"/opt/{exec_name}/bin/{exec_name}"), + Path(os.environ.get("HOME", ""), ".local", "bin", exec_name), + ] + + for path in possible_paths: + if path.exists(): + return path + + raise FileNotFoundError( + "Quarto executable not found. Please ensure Quarto is installed and in the system PATH." + ) # pragma: no cover + + # TODO not conviced about below. This isn't necessarily the same path resolution + # as the os does. What's wrong with just assuming quarto is in the path so we can + # just test for version w code like + # def get_cmd_output(args): + # result = ( + # subprocess.run(args, stdout=subprocess.PIPE).stdout.decode("utf-8").split("\n") + # ) + # return result + # get_version_only(get_cmd_output(["quarto", "--version"])[0]) + + @staticmethod + def _get_executable_with_version( + exec_name: str, verbose: bool = False + ) -> Tuple[Path, str]: + def get_version_only(versionstr): + return re.sub("[^.0-9]", "", versionstr) - def _run_quarto_command( - self, - temp_dir: Path, - qmd_file: str, - output_type: str, - output_filename: str, - verbose: bool = True, - ) -> int: - """Run the Quarto command to generate the report.""" - if verbose: - print("Set verbose=False to hide output.") try: - quarto_exec = self._find_quarto_executable() + executable = Reports._find_executable(exec_name=exec_name) except FileNotFoundError as e: # pragma: no cover logger.error(e) raise - # Check Quarto version + # Check version try: version_result = subprocess.run( - [str(quarto_exec), "--version"], + [str(executable), "--version"], capture_output=True, text=True, check=True, ) - quarto_version = version_result.stdout.strip() - message = f"Quarto version: {quarto_version}" + version_string = get_version_only( + version_result.stdout.split("\n")[0].strip() + ) + message = f"{exec_name} version: {version_string}" logger.info(message) if verbose: print(message) except subprocess.CalledProcessError as e: # pragma: no cover - logger.warning(f"Failed to check Quarto version: {e}") + logger.warning(f"Failed to check {exec_name} version: {e}") + + return (executable, version_string) + + @staticmethod + def get_quarto_with_version(verbose: bool = True) -> Tuple[Path, str]: + return Reports._get_executable_with_version("quarto", verbose=verbose) + + @staticmethod + def get_pandoc_with_version(verbose: bool = True) -> Tuple[Path, str]: + return Reports._get_executable_with_version("pandoc", verbose=verbose) + + @staticmethod + def run_quarto( + qmd_file: str, + output_filename: str, + output_type: str = "html", + params: Dict = {}, + project: Dict = {"type": "default"}, + analysis: Dict = {}, + temp_dir: Path = Path("."), + verbose: bool = False, + ) -> int: + """Run the Quarto command to generate the report.""" + + Reports._write_params_files( + temp_dir, + params=params, + project=project, + analysis=analysis, + ) + + quarto_exec, _ = Reports.get_quarto_with_version(verbose) command = [ str(quarto_exec), @@ -380,9 +499,12 @@ def _run_quarto_command( "--output", output_filename, "--execute-params", - "params.yaml", + "params.yml", ] + if verbose: + print(f"Executing: {' '.join(command)}") + process = subprocess.Popen( command, stdout=subprocess.PIPE, @@ -407,42 +529,6 @@ def _run_quarto_command( return return_code - def _find_quarto_executable(self) -> Path: - """Find the Quarto executable on the system.""" - if sys.platform == "win32": # pragma: no cover - possible_paths = [ - Path(os.environ.get("USERPROFILE", "")) - / "AppData" - / "Local" - / "Programs" - / "Quarto" - / "bin" - / "quarto.cmd", - Path(os.environ.get("PROGRAMFILES", "")) - / "Quarto" - / "bin" - / "quarto.cmd", - ] - else: # pragma: no cover - possible_paths = [ - Path("/usr/local/bin/quarto"), - Path("/opt/quarto/bin/quarto"), - Path(os.environ.get("HOME", "")) / ".local" / "bin" / "quarto", - ] - - for path in possible_paths: - if path.exists(): - return path - - # If not found in common locations, try to find it in PATH - quarto_in_path = shutil.which("quarto") # pragma: no cover - if quarto_in_path: # pragma: no cover - return Path(quarto_in_path) - - raise FileNotFoundError( - "Quarto executable not found. Please ensure Quarto is installed and in the system PATH." - ) # pragma: no cover - def excel_report( self, name: Union[Path, str] = Path("Tables.xlsx"), diff --git a/python/pdstools/infinity/__init__.py b/python/pdstools/infinity/__init__.py index 4a4abb7e..01b04b1e 100644 --- a/python/pdstools/infinity/__init__.py +++ b/python/pdstools/infinity/__init__.py @@ -45,4 +45,4 @@ def __getattr__(name: str): raise AttributeError(f"module '{__name__}' has no attribute '{name}'") -__all__ = ["Infinity", "MissingDependenciesException"] +__all__ = ["Infinity"] \ No newline at end of file diff --git a/python/pdstools/infinity/internal/_base_client.py b/python/pdstools/infinity/internal/_base_client.py index 25723c8d..47e3aa25 100644 --- a/python/pdstools/infinity/internal/_base_client.py +++ b/python/pdstools/infinity/internal/_base_client.py @@ -159,8 +159,7 @@ def from_basic_auth( ) ) auth = httpx.BasicAuth(username=user_name, password=password) - if not base_url.endswith(("prweb", "prweb/")): - base_url = base_url.rsplit("/prweb")[0] + base_url = base_url.rsplit("/prweb")[0] return cls( base_url=base_url, auth=auth, diff --git a/python/pdstools/prediction/Prediction.py b/python/pdstools/prediction/Prediction.py index cf81b132..11f202a1 100644 --- a/python/pdstools/prediction/Prediction.py +++ b/python/pdstools/prediction/Prediction.py @@ -1,15 +1,211 @@ -from typing import List, Optional +import datetime +import itertools +from typing import ( + TYPE_CHECKING, + Any, + List, + Optional, + Tuple, + Union, +) import polars as pl +import logging + +from ..utils.types import QUERY +from ..utils.namespaces import LazyNamespace from ..adm.CDH_Guidelines import CDHGuidelines from ..utils import cdh_utils +logger = logging.getLogger(__name__) +try: + import plotly.express as px + import plotly.graph_objects as go + + from ..utils import pega_template as pega_template +except ImportError as e: # pragma: no cover + logger.debug(f"Failed to import optional dependencies: {e}") + +if TYPE_CHECKING: # pragma: no cover + import plotly.graph_objects as go + +COLORSCALE_TYPES = Union[List[Tuple[float, str]], List[str]] + +Figure = Union[Any, "go.Figure"] + +# T = TypeVar("T", bound="Plots") +# P = ParamSpec("P") + + +class PredictionPlots(LazyNamespace): + dependencies = ["plotly"] + + def __init__(self, prediction): + self.prediction = prediction + super().__init__() + + def _prediction_trend( + self, + period: str, + query: Optional[QUERY], + return_df: bool, + metric: str, + title: str, + facet_row: str = None, + facet_col: str = None, + bar_mode: bool = False, + ): + plot_df = self.prediction.summary_by_channel(by_period=period).with_columns( + Prediction=pl.format("{} ({})", pl.col.Channel, pl.col.Prediction), + Performance=pl.col("Performance") * 100, + ) + + plot_df = cdh_utils._apply_query(plot_df, query) + + if return_df: + return plot_df + + date_range = ( + cdh_utils._apply_query(self.prediction.predictions, query) + .select( + pl.format( + "period: {} to {}", + pl.col("SnapshotTime").min().dt.to_string("%v"), + pl.col("SnapshotTime").max().dt.to_string("%v"), + ) + ) + .collect() + .item() + ) + + if bar_mode: + plt = ( + px.bar( + plot_df.filter(pl.col("isMultiChannelPrediction").not_()) + .filter(pl.col("Channel") != "Unknown") + .sort(["Period"]) + .collect(), + x="Period", + y=metric, + barmode="group", + facet_row=facet_row, + facet_col=facet_col, + color="Prediction", + title=f"{title}
{date_range}", + template="pega", + ) + ) + else: + plt = px.line( + plot_df.filter(pl.col("isMultiChannelPrediction").not_()) + .filter(pl.col("Channel") != "Unknown") + .sort(["Period"]) + .collect(), + x="Period", + y=metric, + facet_row=facet_row, + facet_col=facet_col, + color="Prediction", + title=f"{title}
{date_range}", + template="pega", + markers=True, + ) + + plt.for_each_annotation(lambda a: a.update(text="")).update_layout( + legend_title_text="Channel" + ) + + if facet_row is not None: + plt.update_yaxes(title="") + if facet_col is not None: + plt.update_xaxes(title="") + + return plt + + def performance_trend( + self, + period: str = "1d", + *, + query: Optional[QUERY] = None, + return_df: bool = False, + ): + result = self._prediction_trend( + query=query, + period=period, + return_df=return_df, + metric="Performance", + title="Prediction Performance", + ) + if not return_df: + result.update_yaxes(range=[50, 100]) + return result + + def lift_trend( + self, + period: str = "1d", + *, + query: Optional[QUERY] = None, + return_df: bool = False, + ): + result = self._prediction_trend( + period=period, + query=query, + return_df=return_df, + metric="Lift", + title="Prediction Lift", + ) + if not return_df: + result.update_yaxes(tickformat=",.2%") + return result + + def ctr_trend( + self, + period: str = "1d", + facetting=False, + *, + query: Optional[QUERY] = None, + return_df: bool = False, + ): + result = self._prediction_trend( + period=period, + query=query, + return_df=return_df, + metric="CTR", + title="Prediction CTR", + facet_row="Prediction" if facetting else None, + ) + if not return_df: + result.update_yaxes(tickformat=",.3%") + result.update_layout(yaxis={"rangemode": "tozero"}) + return result + + def responsecount_trend( + self, + period: str = "1d", + facetting=False, + *, + query: Optional[QUERY] = None, + return_df: bool = False, + ): + result = self._prediction_trend( + period=period, + query=query, + return_df=return_df, + metric="ResponseCount", + title="Prediction Responses", + facet_col="Prediction" if facetting else None, + bar_mode=True, + ) + if not return_df: + result.update_layout(yaxis_title="Responses") + return result class Prediction: """Monitor Pega Prediction Studio Predictions""" predictions: pl.LazyFrame + plot: PredictionPlots # These are pretty strict conditions - many configurations appear not to satisfy these # perhaps the Total = Test + Control is no longer met when Impact Analyzer is around @@ -39,6 +235,7 @@ def __init__(self, df: pl.LazyFrame): The read in data as a Polars LazyFrame """ self.cdh_guidelines = CDHGuidelines() + self.plot = PredictionPlots(prediction=self) predictions_raw_data_prepped = ( df.filter(pl.col.pyModelType == "PREDICTION") @@ -121,6 +318,78 @@ def __init__(self, df: pl.LazyFrame): .sort(["pyModelId", "SnapshotTime"]) ) + @staticmethod + def from_mock_data(days = 70): + n_conditions = 4 # can't change this + n_predictions = 3 # tied to the data below + now = datetime.datetime.now() + def _interpolate(min, max, i, n): + return min + (max-min)*i/(n-1) + + mock_prediction_data = ( + pl.LazyFrame( + { + "pySnapShotTime": sorted( + [ + cdh_utils.to_prpc_date_time(now - datetime.timedelta(days=i))[ + 0:15 + ] # Polars doesn't like time zones like GMT+0200 + for i in range(days) + ] + * n_conditions + * n_predictions + ), + "pyModelId": ( + ["DATA-DECISION-REQUEST-CUSTOMER!PredictOutboundEmailPropensity"] + * n_conditions + + ["DATA-DECISION-REQUEST-CUSTOMER!PREDICTMOBILEPROPENSITY"] + * n_conditions + + ["DATA-DECISION-REQUEST-CUSTOMER!PREDICTWEBPROPENSITY"] * n_conditions + ) + * days, + "pyModelType": "PREDICTION", + "pySnapshotType": ["Daily", "Daily", "Daily", None] + * n_predictions + * days, + "pyDataUsage": ["Control", "Test", "NBA", ""] * n_predictions * days, # Control=Random, Test=Model + # "pyPositives": ( + # [100, 160, 120, None] + [200, 420, 250, None] + [350, 700, 380, None] + # ) + # * n_days, + "pyPositives": list( + itertools.chain.from_iterable( + [ + [_interpolate(100, 100, p, days), _interpolate(160, 200, p, days), _interpolate(120, 120, p, days), None] + + [_interpolate(120, 120, p, days), _interpolate(250, 300, p, days), _interpolate(150, 150, p, days), None] + + [_interpolate(1400, 1400, p, days), _interpolate(2800, 4000, p, days), _interpolate(1520, 1520, p, days), None] + for p in range(0, days) + ] + ) + ), + + "pyNegatives": ([10000]* n_conditions + [6000]* n_conditions + [40000]* n_conditions) *days, + "pyValue": list( + itertools.chain.from_iterable( + [ + [_interpolate(0.6, 0.65, p, days)] * n_conditions + + [_interpolate(0.7, 0.73, p, days)] * n_conditions + + [_interpolate(0.66, 0.68, p, days)] * n_conditions + for p in range(0, days) + ] + ) + ), + } + ) + .sort(["pySnapShotTime", "pyModelId", "pySnapshotType"]) + # .with_columns( + # pl.col("pyPositives").cum_sum().over(["pyModelId", "pySnapshotType"]), + # pl.col("pyNegatives").cum_sum().over(["pyModelId", "pySnapshotType"]), + # ) + .with_columns(pyCount=pl.col("pyPositives") + pl.col("pyNegatives")) + ) + + return Prediction(mock_prediction_data) + @property def is_available(self) -> bool: return len(self.predictions.head(1).collect()) > 0 @@ -170,7 +439,8 @@ def summary_by_channel( period_expr = [] return ( - self.predictions.join( + self.predictions.with_columns(pl.col("ModelName").str.to_uppercase()) + .join( self.cdh_guidelines.get_predictions_channel_mapping( custom_predictions ).lazy(), @@ -211,9 +481,6 @@ def summary_by_channel( + (["Period"] if by_period is not None else []) ) .agg( - cdh_utils.weighted_average_polars("CTR_Lift", "ResponseCount").alias( - "Lift" - ), cdh_utils.weighted_performance_polars().alias("Performance"), pl.col("Positives").sum(), pl.col("Negatives").sum(), @@ -248,9 +515,28 @@ def summary_by_channel( + pl.col("Positives_NBA") + pl.col("Negatives_NBA") ), - CTR=(pl.col("Positives")) / (pl.col("ResponseCount")), + CTR=pl.col("Positives") / (pl.col("Positives") + pl.col("Negatives")), + CTR_Test=pl.col("Positives_Test") + / (pl.col("Positives_Test") + pl.col("Negatives_Test")), + CTR_Control=pl.col("Positives_Control") + / (pl.col("Positives_Control") + pl.col("Negatives_Control")), + CTR_NBA=pl.col("Positives_NBA") + / (pl.col("Positives_NBA") + pl.col("Negatives_NBA")), + ChannelDirectionGroup=pl.when( + pl.col("Channel").is_not_null() + & pl.col("Direction").is_not_null() + & pl.col("Channel").is_in(["Other", "Unknown", ""]).not_() + & pl.col("Direction").is_in(["Other", "Unknown", ""]).not_() + & pl.col("isMultiChannelPrediction").not_() + ) + .then(pl.concat_str(["Channel", "Direction"], separator="/")) + .otherwise(pl.lit("Other")), isValid=self.prediction_validity_expr, ) + .with_columns( + Lift=(pl.col("CTR_Test") - pl.col("CTR_Control")) + / pl.col("CTR_Control"), + ) .sort(["Prediction"] + (["Period"] if by_period is not None else [])) ) diff --git a/python/pdstools/reports/HealthCheck.qmd b/python/pdstools/reports/HealthCheck.qmd index 1dd8600e..5c29400b 100644 --- a/python/pdstools/reports/HealthCheck.qmd +++ b/python/pdstools/reports/HealthCheck.qmd @@ -19,6 +19,7 @@ format: fontsize: small jupyter: python3 --- + ```{python} # | code-fold: true # | output: false @@ -32,6 +33,7 @@ from IPython.display import display, Markdown from pdstools import ( datasets, ADMDatamart, + Prediction, read_ds_export, ) from pdstools.adm.CDH_Guidelines import CDHGuidelines @@ -89,26 +91,21 @@ def fig_set_xaxis_modelperformance(fig, label="Model Performance"): ``` - - ```{python} # | tags: [parameters] # | echo: false -# The kwargs argument is in support of the streamlit app specifically. -# The folder/filename arguments are in support of calling from a command line, -# using the same argument names as the stand-alone model report. -# When neither is passed in, it falls back to the sample data. +# Parameters will be overriden by quarto when a parameters yaml is provided -name = "ADM Models" # TODO: what's this used for? -kwargs = dict() +title = "ADM Model Overview" +subtitle = "Sample data" -title = "ADM Model Overview" # pass in customer name here -subtitle = "" # pass in a date range or other qualification of the data source +# Insert the paths to your data files here to run the notebook from your IDE -datafolder = "" -modelfilename = "" -predictorfilename = "" +model_file_path = None +prediction_file_path = None +predictor_file_path = None +query = None tables_max_rows = 200 # max number of rows for embedded tables barchart_max_bars = 20 # max number of bars showing in bar charts @@ -118,20 +115,25 @@ responsecount_analysis_threshold = ( predictor_analysis_threshold = 200 # min number of responses for predictor analysis # channel_responses_threshold = 10000 configuration_responses_threshold = 10000 +``` + +```{python} +# Below needed because current Yaml lib reads None back in as the string None +# TODO consider a different lib that supports roundtrip preservation like ruamel.yaml +if model_file_path and model_file_path == "None": + model_file_path = None +if prediction_file_path and prediction_file_path == "None": + prediction_file_path = None +if predictor_file_path and predictor_file_path == "None": + predictor_file_path = None +if query and query == "None": + query = None + responsecount_analysis_query = ( pl.col("ResponseCount") > responsecount_analysis_threshold ) predictor_analysis_query = pl.col("ResponseCountBin") > predictor_analysis_threshold -``` - -```{python} -if len(kwargs) > 0: - # Calling through function (in streamlit or in a notebook) - if "title" in kwargs and len(kwargs["title"]) > 0: - title = kwargs["title"] - if "subtitle" in kwargs and len(kwargs["subtitle"]) > 0: - subtitle = kwargs["subtitle"] # | echo: false report_utils.quarto_print( @@ -145,7 +147,6 @@ report_utils.quarto_print( ``` - ```{python} # | tags: [initialization] # | code-fold: true @@ -177,25 +178,13 @@ def reset_datamart(dm): else: datamart_all_columns = dm.model_data.collect_schema().names() -if len(kwargs) > 0: - # Calling through function (in streamlit or in a notebook) +if model_file_path is not None: reset_datamart( ADMDatamart.from_ds_export( - model_filename= kwargs["model_file_path"], - predictor_filename= kwargs["predictor_file_path"], + model_filename=model_file_path, + predictor_filename=predictor_file_path, base_path=".", - query=kwargs["query"] - ) - ) -elif len(datafolder) > 0 or len(modelfilename) > 0 or len(predictorfilename) > 0: - # Run through this qmd file - - reset_datamart( - ADMDatamart.from_ds_export( - model_filename=modelfilename, - predictor_filename=predictorfilename, - base_path=datafolder, - # query=kwargs["query"] + query=query ) ) else: @@ -204,11 +193,9 @@ else: ``` -This document gives a global overview of the Adaptive models and (if available) the predictors of the models. It is generated from a Python markdown (Quarto) file in the [Pega Data Scientist Tools](https://github.com/pegasystems/pega-datascientist-tools). This is open-source software and comes without guarantees. Off-line reports for individual -models can be created as well, see [Wiki](https://github.com/pegasystems/pega-datascientist-tools/wiki). - -We provide guidance and best practices where possible. However these are generic guidelines and may or may not be applicable to the specific use case and situation of the implementation. The recommendations are strongly geared towards the CDH use cases and may not apply to, for example, Process AI. +This document gives a global overview of the Adaptive models and (if available) the predictors of the models. It is generated from a Python markdown (Quarto) file in the [Pega Data Scientist Tools](https://github.com/pegasystems/pega-datascientist-tools). This is open-source software and comes without guarantees. Off-line reports for individual models can be created as well, see [Wiki](https://github.com/pegasystems/pega-datascientist-tools/wiki). +We provide guidance and best practices where possible. However these are generic guidelines and may or may not be applicable to the specific use case and situation of the implementation. The recommendations are strongly geared towards the CDH use cases and may not apply to, for example, Process AI. ```{python} # Start with a global bubble chart. Maybe later replace by @@ -238,12 +225,8 @@ except Exception as e: report_utils.quarto_plot_exception("All ADM Models", e) ``` - ::: {.callout-tip title="Guidance"} -The [Plotly](https://plotly.com/python/) charts have [user controls for panning, -zooming etc](https://plotly.com/chart-studio-help/zoom-pan-hover-controls/) but -note that these interactive plots do not render well in portals like Sharepoint -or Box. It is preferable to view them from a browser. +The [Plotly](https://plotly.com/python/) charts have [user controls for panning, zooming etc](https://plotly.com/chart-studio-help/zoom-pan-hover-controls/) but note that these interactive plots do not render well in portals like Sharepoint or Box. It is preferable to view them from a browser. ::: # Overview of Channels @@ -282,24 +265,21 @@ formatted_channel_overview = ( df_channel_overview, cdh_guidelines=cdh_guidelines, highlight_limits={ - "Positives": "Positive Responses", - "Performance": "Model Performance", - "ResponseCount": "Responses", - "Total Number of Actions": "Actions", - "Used Actions": "Actions", - "Total Number of Treatments": "Treatments", - "Used Treatments": "Treatments", + "Positive Responses": "Positives", + "Model Performance": "Performance", + "Responses": "ResponseCount", + "Actions": ["Total Number of Actions", "Used Actions"], + "Treatments": ["Total Number of Treatments", "Used Treatments"], "Issues": "Issues", + "OmniChannel": "OmniChannel Actions", + "CTR" : "CTR", }, - highlight_lists = { - "Channel" : cdh_guidelines.standard_channels, - "Direction" : cdh_guidelines.standard_directions, + highlight_lists={ + "Channel": cdh_guidelines.standard_channels, + "Direction": cdh_guidelines.standard_directions, }, - highlight_configurations = ["Configuration"], + highlight_configurations=["Configuration"], ) - .fmt_percent(decimals=0, columns=["OmniChannel Actions"]) - .fmt_number(decimals=2, columns=["Performance"]) - .fmt_percent(decimals=2, columns=["CTR"]) .cols_label( CTR="Base Rate", ResponseCount="Total Responses", @@ -391,12 +371,12 @@ configurations it seems that the framework **{framework_usage}**. ``` ::: {.callout-tip title="Guidance"} -* Look out for channels supported by more than two model configurations, although there may be valid reasons to do so (e.g. different sets of predictors for certain issues) -* Channels with no responses at all -* Channels with no positive feedback +- Look out for channels supported by more than two model configurations, although there may be valid reasons to do so (e.g. different sets of predictors for certain issues) +- Channels with no responses at all +- Channels with no positive feedback ::: -::: {.callout-note} +::: callout-note The total number of responses and positives are summed up over the models for that channel. If the models are configured differently, e.g. in a serial manner (shadow pattern for example) rather than in parallel (independently learning champion challenger setup) the actual counts per channel may be different. Ultimately, the real counts per channel can only be derived from interaction history data. ::: @@ -404,6 +384,112 @@ The total number of responses and positives are summed up over the models for th Channels with very few or no responses will be excluded from subsequent analyses. They have been highlighted in the channel overview with a strike-through. +# Overview of the Predictions + +When data from Prediction Studio is made available to this notebook, we show an overview of the predictions in the system. A Prediction is a construct around models that allows for different modeling patterns and supports an overall monitoring of engagement lift, Performance etc. + +::: {.content-hidden when-meta="analysis.predictions"} +However, Prediction Data is **not available**. This is either because the data is not captured or because Predictions are not used, for example when the NBAD framework is not used. +::: + +::: {.content-hidden unless-meta="analysis.predictions"} +Predictions monitor performance in a different way than ADM models do. The measurement of Lift depends on having a control group for which the arbitration priority is random rather than model driven. + +Lift in Predictions is the lift in *engagement* (clicks, accepts) and is defined as: + +$$Lift = \frac{CTR_{test} - CTR_{control}}{CTR_{control}}$$ + +with "test" the model group and "control" random test group. A lift of 0% means the model group is doing no better than the random group. A lift of 100% means the models are giving two times the number of positive responses. Lift can be negative, up to -100%, which would mean the model group never results in any positive response. By default the size of the random group is 1-2% and is configured in Prediction Studio and Impact Analyzer (when using NBAD). + +```{python} +if prediction_file_path: + predictions = Prediction(read_ds_export(prediction_file_path)) + + predictions_overview = predictions.summary_by_channel().collect() + gt = ( + report_utils.table_standard_formatting( + predictions_overview, + title="Prediction Overview", + subtitle="by Channel", + cdh_guidelines=cdh_guidelines, + highlight_limits={ + # "Actions": "Actions", + # "Unique Treatments": "Treatments", + "Positive Responses": [ + "Positives", + "Positives_Test", + "Positives_Control", + "Positives_NBA", + ], + "Responses": [ + "ResponseCount", + "Negatives", + "Negatives_Test", + "Negatives_Control", + "Negatives_NBA", + ], + "Model Performance": "Performance", + "CTR": ["CTR", "CTR_Test", "CTR_Control", "CTR_NBA"], + "Engagement Lift": "Lift", + }, + ) + .fmt_percent( + decimals=2, + scale_values=False, + columns=["TestPercentage", "ControlPercentage"], + ) + .cols_label( + ResponseCount="Total Responses", + Positives="Total Positives", + Negatives="Total Negatives", + Positives_Test="Positives Test Group", + Negatives_Test="Negatives Test Group", + Positives_Control="Positives Control Group", + Negatives_Control="Negatives Control Group", + Positives_NBA="Positives NBA Group", + Negatives_NBA="Negatives NBA Group", + Performance="Overall Performance", + ControlPercentage="Control Group Size", + TestPercentage="Test Group Size", + CTR="Overall CTR", + CTR_Test="CTR Test Group", + CTR_Control="CTR Control Group", + CTR_NBA="CTR NBA Group", + ) + .cols_hide( + [ + "ChannelDirectionGroup", + "isStandardNBADPrediction", + "isMultiChannelPrediction", + "usesImpactAnalyzer", + "isValid", + ] + ) + ) + + display(gt) + +else: + report_utils.quarto_callout_no_prediction_data_warning() +``` + +```{python} +try: + plt = predictions.plot.lift_trend("1w") + plt.update_layout(autosize=True, height=400, ) + + plt.show() + + plt = predictions.plot.responsecount_trend("1w") + plt.update_layout(autosize=True, height=400, ) + + plt.show() +except Exception as e: + report_utils.quarto_plot_exception("Prediction Trend Charts", e) + +``` +::: + # Overview of the Actions In a standard setup, the offers/conversations are presented as treatments for actions in a hierarchical structure setup in NBA Designer. Treatments are often channel specific and typically there are more unique treatments than there are actions. @@ -412,7 +498,6 @@ Adaptive Models are created per treatment (at least in the default setup) and th The recommended [Service and data health limits for Pega Customer Decision Hub on Pega Cloud](https://docs.pega.com/bundle/customer-decision-hub-241/page/customer-decision-hub/cdh-portal/cloud-service-health-limits.html) are used here to highlight whether metrics are within limits. These limits may not apply for on-prem, non-CDH installs or for other reasons. - ```{python} df_action_overview = pl.DataFrame( { @@ -568,14 +653,12 @@ Just showing the top {barchart_max_bars} here and limiting to the propositions t ) ``` - ::: {.callout-tip title="Guidance"} -- Look out for actions that stand out, having a far higher success rate than the rest. Check with business if that is expected. +- Look out for actions that stand out, having a far higher success rate than the rest. Check with business if that is expected. -- Variation in the set of offers across customers is also an important metric but not one that can be derived from the Adaptive Model data - this requires analysis of the actual interactions. +- Variation in the set of offers across customers is also an important metric but not one that can be derived from the Adaptive Model data - this requires analysis of the actual interactions. ::: - ```{python} facet = "Channel/Direction" hover_columns = report_utils.polars_subset_to_existing_cols( @@ -646,8 +729,7 @@ except Exception as e: ## All Success Rates -Showing the success rates of all actions in an interactive -tree map. Green is higher, red are lower success rates. +Showing the success rates of all actions in an interactive tree map. Green is higher, red are lower success rates. ```{python} try: @@ -669,7 +751,7 @@ except Exception as e: Showing how the overall channel success rates evolved over the time that the data export covers. Split by Channel and model configuration. Usually there are separate model configurations for different channels but sometimes there are also additional model configurations for different outcomes (e.g. conversion) or different customers (e.g. anonymous). ::: {.callout-tip title="Guidance"} -- There shouldn’t be too sudden changes over time +- There shouldn’t be too sudden changes over time ::: ```{python} @@ -729,7 +811,9 @@ else: model_overview = ( last_data.group_by( ["Configuration"] - + report_utils.polars_subset_to_existing_cols(datamart_all_columns, ["Channel", "Direction"]) + + report_utils.polars_subset_to_existing_cols( + datamart_all_columns, ["Channel", "Direction"] + ) ) .agg( [ @@ -753,18 +837,19 @@ model_overview = ( display( report_utils.table_standard_formatting( - model_overview, title="Model Overview", + model_overview, + title="Model Overview", cdh_guidelines=cdh_guidelines, highlight_limits={ "Actions": "Actions", - "Unique Treatments": "Treatments", - "Positives": "Positive Responses", - "ResponseCount": "Responses", + "Treatments": "Unique Treatments", + "Positive Responses": "Positives", + "Responses": "ResponseCount", }, - highlight_lists = { - "Channel" : cdh_guidelines.standard_channels, - "Direction" : cdh_guidelines.standard_directions, - "Configuration" : cdh_guidelines.standard_configurations, + highlight_lists={ + "Channel": cdh_guidelines.standard_channels, + "Direction": cdh_guidelines.standard_directions, + "Configuration": cdh_guidelines.standard_configurations, }, ) .tab_style( @@ -841,32 +926,19 @@ if (unused_configurations.shape[0] > 0) and ( This “Bubble Chart” - similar to the standard ADM models overview in Pega Prediction Studio - shows the relation between model performance and proposition success rates. The size of the bubbles indicates the number of responses. ::: {.callout-tip title="Guidance"} +- Bubbles stacked up against the left-hand vertical axis represent actions/treatments for which the models are not predictive. These models may be still be ramping up, or they may not have enough features to work with: consider if new/better predictors can be added. -- Bubbles stacked up against the left-hand vertical axis represent - actions/treatments for which the models are not predictive. These - models may be still be ramping up, or they may not have - enough features to work with: consider if new/better predictors can - be added. +- Charts should not be empty or contain only a few bubbles. Such charts may represent channels or configurations not (or no longer) used. -- Charts should not be empty or contain only a few bubbles. Such - charts may represent channels or configurations not (or no longer) used. +- Bubbles at the bottom of the charts represent propositions with very low success rates - they may not be compelling enough. -- Bubbles at the bottom of the charts represent propositions with very - low success rates - they may not be compelling enough. +- In an ideal scenario you will see the larger bubbles more on the top-right, so more volume for propositions with higher success rates and better models. -- In an ideal scenario you will see the larger bubbles more on the - top-right, so more volume for propositions with higher success rates - and better models. +- There should - very roughly - be a positive correlation between success rate and performance and between response counts and performance. -- There should - very roughly - be a positive correlation between success - rate and performance and between response counts and performance. +- There should be variation in response counts (not all dots of equal size) -- There should be variation in response counts (not all dots of equal - size) - -- For small volumes of good models, see if the engagement rules in the - Decision Strategy are overly restrictive or reconsider the - arbitration of the propositions so they get more (or less) exposure. +- For small volumes of good models, see if the engagement rules in the Decision Strategy are overly restrictive or reconsider the arbitration of the propositions so they get more (or less) exposure. ::: ```{python} @@ -900,7 +972,7 @@ On the x-axis Model Performance measured in AUC-ROC, on the y-axis the Success R The trend chart shows how model performance evolves over time. Note that ADM is by default configured to track performance over *all* time. You can configure a window for monitoring but this is not commonly done. In Pega Prediction Studio you can monitor models per month, year etc. ::: {.callout-tip title="Guidance"} -- No abrupt changes but gradual upward trend is good +- No abrupt changes but gradual upward trend is good ::: ```{python} @@ -954,17 +1026,14 @@ except Exception as e: ### Response counts for all the actions -Using an interactive treemap to visualize the response counts. -Different channels will have very different numbers but within one channel the relative differences in response counts give an indication how skewed the distribution is. +Using an interactive treemap to visualize the response counts. Different channels will have very different numbers but within one channel the relative differences in response counts give an indication how skewed the distribution is. Warning : Currently treemap calculates mean response count moving upwards in the hierarchy. ::: {.callout-tip title="Guidance"} - If there are actions that have a much higher response count than the rest see why that is. Possibly they are levered up for valid business reasons. ::: - ```{python} try: fig = datamart.plot.tree_map( @@ -984,6 +1053,13 @@ except Exception as e: # Analysis of Predictors +This analysis looks at the predictors that are driving the models. + +::: {.content-hidden when-meta="analysis.predictors"} +However, Predictor Data is **not available**. Predictor analysis is not available. +::: + +::::::: {.content-hidden unless-meta="analysis.predictors"} ```{python} # | output: asis # | echo: false @@ -1049,10 +1125,10 @@ Split by category (defaults to the string before the first dot, can be overridde The numbers here can differ from the totals above, these ones are leading. ::: {.callout-tip title="Guidance"} -- Total number of predictors per model 200 - 700 to stay within service limits -- There should be some “IH” predictors but no more than ca 100 of them -- No more than a few dozen Param predictors -- Consistency in the numbers across configurations +- Total number of predictors per model 200 - 700 to stay within service limits +- There should be some “IH” predictors but no more than ca 100 of them +- No more than a few dozen Param predictors +- Consistency in the numbers across configurations ::: ```{python} @@ -1086,17 +1162,15 @@ else: ``` - ## Predictor Importance across all models per configuration -Box plots of the predictor importance. Predictor importance is using the -univariate predictor performance. +Box plots of the predictor importance. Predictor importance is using the univariate predictor performance. ::: {.callout-tip title="Guidance"} -* You expect most predictors to have a spread in the performance range, doing better for some actions than for others -* Predictors only showing as a single bar (no range) are suspicious -* A variation of predictors from different categories in the top 30 -* A min/max of the univariate AUC performance somewhere between 55 and 75 +- You expect most predictors to have a spread in the performance range, doing better for some actions than for others +- Predictors only showing as a single bar (no range) are suspicious +- A variation of predictors from different categories in the top 30 +- A min/max of the univariate AUC performance somewhere between 55 and 75 ::: ```{python} @@ -1144,6 +1218,7 @@ else: ``` ## Importance by Predictor Category + Aggregating up to the category of the predictors. This gives a view at a glance of how well e.g. interaction history, external model scores or contextual data are doing overall. ### Predictor Category performance per Channel/Direction/Issue @@ -1222,14 +1297,14 @@ else: If predictors perform poorly across all models, that may be because of data sourcing issues or because it just is not related to any of the model outcomes. See also the analysis of missing data. ::: {.callout-tip title="Guidance"} -- Predictors that consistently perform poorly could potentially be removed. -- Be sure to check for data problems -- Note we advise to be careful with predictor removal. Only remove if there is clearly no future value to other propositions as well or if there is always a related predictor that performs better. +- Predictors that consistently perform poorly could potentially be removed. +- Be sure to check for data problems +- Note we advise to be careful with predictor removal. Only remove if there is clearly no future value to other propositions as well or if there is always a related predictor that performs better. ::: ```{python} # weighted performance -# TODO apply highlighting in the std way like in the R version + if datamart.predictor_data is not None: bad_predictors = ( @@ -1257,7 +1332,7 @@ if datamart.predictor_data is not None: # .with_columns(MeanPlotData=pl.col("Mean")), rowname_col="PredictorName", cdh_guidelines=cdh_guidelines, - highlight_limits = {"Response Count" : "Responses"} + highlight_limits = {"Responses" : "Response Count"} ) .tab_options(container_height="400px", container_overflow_y=True) .tab_spanner( @@ -1286,12 +1361,11 @@ else: ``` ## Number of Active and Inactive Predictors + Showing the number of active and inactive predictors per model. ::: {.callout-tip title="Guidance"} - -- We expect a few dozen active predictors for every model instance - +- We expect a few dozen active predictors for every model instance ::: ```{python} @@ -1345,7 +1419,7 @@ if datamart.predictor_data is not None: title=f"Top predictors over {conf}", ) fig.update_yaxes(dtick=1, automargin=True) - fig.update_xaxes(dtick=1, tickangle=kwargs.get("tickangle", None)) + fig.update_xaxes(dtick=1) #, tickangle=params.get("tickangle", None)) fig.update(layout_coloraxis_showscale=False) fig.show() @@ -1361,9 +1435,7 @@ else: ## Missing values -If a predictor is low performing: are there too many missing values? This could point to a technical problem -Missing % is number of missing vs all responses, really just a filter on model data -This TreeMap only shows the fields that have any missing values. +If a predictor is low performing: are there too many missing values? This could point to a technical problem Missing % is number of missing vs all responses, really just a filter on model data This TreeMap only shows the fields that have any missing values. ```{python} if datamart.predictor_data is not None: @@ -1417,23 +1489,16 @@ else: report_utils.quarto_callout_no_predictor_data_warning() ``` - -## Residuals - -TODO: similar to analysis of the MISSING bin we can do something for the -Residual bin - the bin in categorical predictors that acts as an overflow -bin. Too much in there is not good. Also, the sheer number of distinct -values could be good to analyse, but we don't have that exactly. +::::::: # Responses -In the sections below we check how many models have reached certain reliability (or “maturity”) thresholds. This is based on heuristics on both the number of positives (> 200 considered mature) and performance. +In the sections below we check how many models have reached certain reliability (or “maturity”) thresholds. This is based on heuristics on both the number of positives (\> 200 considered mature) and performance. For the full list of models, export the data into an Excel sheet from Pega or from the PDS Tools app. All below lists are guidance. With new treatments/actions being introduced regularly, you would expect a small percentage of the models to be at their initial stages, but that percentage should be small. - ```{python} maturity_criteria = [ @@ -1547,16 +1612,14 @@ These models have received over 200 positives and do have a performance above th This could be because of lack of good predictors in the model. - ## Number of Empty/Immature Models over time -In the analysis below we count the number of models in each of the groups analysed before and show how that \ncount changes over time. The expectation is that the number of “non-empty” models increases steadily and the other lines are more or less stable.\n -Empty is defined as having no responses at all. Immature is defined as having < 200 positives, and no performance means model performance is still the initial 0.5 value while having matured already according to the definition. +In the analysis below we count the number of models in each of the groups analysed before and show how that \ncount changes over time. The expectation is that the number of “non-empty” models increases steadily and the other lines are more or less stable.\n Empty is defined as having no responses at all. Immature is defined as having \< 200 positives, and no performance means model performance is still the initial 0.5 value while having matured already according to the definition. ::: {.callout-tip title="Guidance"} -- Empty models shouldnt be increasing too much -- Good models (AUC 55-80) should increase or at least not decrease -- Good models should be much higher than problem kids +- Empty models shouldnt be increasing too much +- Good models (AUC 55-80) should increase or at least not decrease +- Good models should be much higher than problem kids ::: ```{python} @@ -1606,10 +1669,8 @@ except Exception as e: report_utils.quarto_plot_exception("Number of Empty/Immature Models over time", e) ``` - ## Number of Responses over Time - ```{python} facets = "Configuration" unique_count = len(datamart.unique_configurations) @@ -1640,6 +1701,7 @@ except Exception as e: # Which Models drive most of the Volume ## Analysis of skewness of the Responses + Showing the cumulative response count vs the number of models. Is there a larger percentage of models that take the vast majority of the responses? If this line strongly deviates from the diagonal it means that relatively few models drive the majority of the responses. @@ -1649,11 +1711,10 @@ In the left-hand plot we look at all responses, which really means that we are l Very skewed results may be caused by prioritization elements like levers and weights and can be a reason to check in with business and verify that this is expected. ::: {.callout-tip title="Guidance"} -- Area under this curve should be > 0.5 but not too close to 1, which means that most of the responses are driven by relatively few actions, but not too extreme +- Area under this curve should be \> 0.5 but not too close to 1, which means that most of the responses are driven by relatively few actions, but not too extreme ::: -::: {layout-ncol=2} - +::: {layout-ncol="2"} ```{python} # TODO move gains_table to PDS tools utils # TODO a corresponding GINI calculation would be nice @@ -1776,7 +1837,6 @@ try: except Exception as e: report_utils.quarto_plot_exception("Cumulative Positives", e) ``` - ::: ## Analysis of Performance vs Volume @@ -1852,43 +1912,13 @@ except Exception as e: report_utils.quarto_plot_exception("Analysis of Performance vs Volume", e) ``` - -# Propensity Analysis - -The distribution of propensities returned by the models is yet a different angle. - -Higher propensities clearly indicate the offers are more attractive - people apparenty click/accept/convert more often. - -## Propensity Distribution - -In a more emphathetic setup, you would expect that the distribution of the propensities leans towards the right-hand side: more volume to more attractive offers, although the relation is of course more complex, we are not just blindly pushing the offers with the highest success rates, but take a personalized approach. - -Often however, multiple factors are included in the prioritization, changing this picture. - -Note that the propensity bins are not of equal width. Propensities are typically very low so with an equal width distribution, almost all volume would be in the first bins. The binning here is based on (roughly) equal volume across all data. - -So when one of the graphs shows more volume on the left, that is to be interpreted as relative to the other graphs. - -```{python} -# | error: true -try: - if datamart.predictor_data is not None: - fig = datamart.plot.propensity_distribution() - fig.show() - else: - report_utils.quarto_callout_no_predictor_data_warning() -except Exception as e: - report_utils.quarto_plot_exception("Propensity Distribution", e) - -``` - # Credits - ```{python} # | echo: false # unfortunately no way to get the quarto source file name, so that is hardcoded report_utils.show_credits("pega-datascientist-tools/python/pdstools/reports/HealthCheck.qmd") -``` + +``` \ No newline at end of file diff --git a/python/pdstools/reports/ModelReport.qmd b/python/pdstools/reports/ModelReport.qmd index f18cfebf..b4e677ff 100644 --- a/python/pdstools/reports/ModelReport.qmd +++ b/python/pdstools/reports/ModelReport.qmd @@ -49,45 +49,40 @@ from pdstools.utils import report_utils ```{python} # | tags: [parameters] -# These parameters are overwritten when called externally +# Parameters will be overriden by quarto when a parameters yaml is provided -datafolder = "" -modelfilename = "" -predictorfilename = "" -model_id = "" +title = "ADM Model Details" +subtitle = "Sample data" -only_active_predictors = True - -title = "Demo Dataset" # pass in customer name here -subtitle = "" # typically used to pass in a date range or other qualification of the data source - -kwargs = dict() +model_file_path = None +predictor_file_path = None +query = None +model_id = None +only_active_predictors = True ``` ```{python} -# Predictor data for one model ID -if len(kwargs) > 0: - # streamlit call TODO fix this for v4 - model_id = kwargs["model_id"] - only_active_predictors = kwargs["only_active_predictors"] - model_df = read_ds_export(kwargs["model_file_path"]) - predictor_df = read_ds_export(kwargs["predictor_file_path"]) - datamart = ADMDatamart( - model_df=model_df, - predictor_df=predictor_df, - query=pl.col("ModelID") == model_id, - ) # .fillMissing() -elif len(datafolder) > 0 or len(modelfilename) > 0 or len(predictorfilename) > 0: - # command line call +# Below needed because current Yaml lib reads None back in as the string None +# TODO consider a different lib that supports roundtrip preservation like ruamel.yaml +if model_file_path and model_file_path == "None": + model_file_path = None +if predictor_file_path and predictor_file_path == "None": + predictor_file_path = None +if query and query == "None": + query = None +if model_id and model_id == "None": + model_id = None + +if model_id is not None and predictor_file_path is not None: datamart = ADMDatamart.from_ds_export( - base_path=datafolder, - model_filename=modelfilename, - predictor_filename=predictorfilename, - extract_pyname_keys=True, + model_filename=model_file_path, + predictor_filename=predictor_file_path, query=pl.col("ModelID") == model_id, - ) # .fillMissing() + ) + + # .fillMissing() else: # fall back to sample data model_id = "bd70a915-697a-5d43-ab2c-53b0557c85a0" @@ -159,13 +154,6 @@ model_name = ( .item() ) -if len(kwargs) > 0: - # Calling through function (in streamlit or in a notebook) - if "title" in kwargs and len(kwargs["title"]) > 0: - title = kwargs["title"] - if "subtitle" in kwargs and len(kwargs["subtitle"]) > 0: - subtitle = kwargs["subtitle"] - report_utils.quarto_print( f""" # {title} @@ -182,8 +170,6 @@ report_utils.quarto_print( ) ``` - - ```{python} try: fig = datamart.plot.score_distribution(model_id=model_id) @@ -204,33 +190,33 @@ except Exception as e: ``` -::: {.callout-tip} -The charts (built with [Plotly](https://plotly.com/python/)) have [user controls for panning, -zooming etc](https://plotly.com/chart-studio-help/zoom-pan-hover-controls/). These interactive plots do not render well in portals like Sharepoint -or Box. It is preferable to view them from a browser. +::: callout-tip +The charts (built with [Plotly](https://plotly.com/python/)) have [user controls for panning, zooming etc](https://plotly.com/chart-studio-help/zoom-pan-hover-controls/). These interactive plots do not render well in portals like Sharepoint or Box. It is preferable to view them from a browser. ::: ## Model Performance - ```{python} # unfortunately and TODO return_df on the previous plot doesnt give all the columns we'd need so we have to fetch the data separately -classifier = ( - datamart.aggregates.last(table="predictor_data") - .filter(pl.col("ModelID") == model_id) - .filter(pl.col("EntryType") == "Classifier") - .sort("BinIndex") -) +try: + classifier = ( + datamart.aggregates.last(table="predictor_data") + .filter(pl.col("ModelID") == model_id) + .filter(pl.col("EntryType") == "Classifier") + .sort("BinIndex") + ) -auc_roc = round(classifier.select(pl.last("Performance")).collect().item(), 4) + auc_roc = round(classifier.select(pl.last("Performance")).collect().item(), 4) -report_utils.quarto_print( - f""" -The model performance is **{auc_roc}** measured as AUC-ROC. This number is calculated from the “active” bins of the Classifier. -""" -) + report_utils.quarto_print( + f""" + The model performance is **{auc_roc}** measured as AUC-ROC. This number is calculated from the “active” bins of the Classifier. + """ + ) +except Exception as e: + report_utils.quarto_plot_exception("Model Performance", e) ``` The classifier maps the model scores (average of the log odds of the active predictors) to a propensity value. The “active” bins are the ones that can be reached from the current binning of the active predictors. @@ -238,98 +224,86 @@ The classifier maps the model scores (average of the log odds of the active pred See the [ADM Explained](https://pegasystems.github.io/pega-datascientist-tools/Python/articles/ADMExplained.html) article for more information on how ADM exactly works. ```{python} - -gt = ( - report_utils.table_standard_formatting( - classifier.collect().select( - pl.last("ResponseCount"), - pl.last("Positives"), - (pl.last("Positives") / pl.last("ResponseCount")).alias("Base Propensity"), - pl.last("Performance"), - ), - highlight_limits = { - "ResponseCount" : "Responses", - "Positives" : "Positive Responses", - "Performance" : "Model Performance" - } - ) - .cols_label( - ResponseCount="Responses", - ) - .fmt_number( - decimals=0, - columns=["ResponseCount", "Positives"], - ) - .fmt_percent(decimals=3, columns="Base Propensity") - .fmt_number( - decimals=2, - scale_by=100, - columns=["Performance"], +try: + gt = ( + report_utils.table_standard_formatting( + classifier.collect().select( + pl.last("ResponseCount"), + pl.last("Positives"), + (pl.last("Positives") / pl.last("ResponseCount")).alias( + "Base Propensity" + ), + pl.last("Performance") * 100, + ), + highlight_limits={ + "Responses": "ResponseCount", + "Positive Responses": "Positives", + "Model Performance": "Performance", + "CTR": "Base Propensity", + }, + ).cols_label( + ResponseCount="Responses", + ) ) -) -display(gt) + display(gt) +except Exception as e: + report_utils.quarto_plot_exception("Model Performance", e) ``` - ## Score Distribution -The Score Distribution shows the volume and average propensity in every bin of -the score ranges of the Classifier. +The Score Distribution shows the volume and average propensity in every bin of the score ranges of the Classifier. -Propensity is defined as $\frac{positives}{positives+negatives}$ per bin. -The adjusted propensity that is returned is a small modification (*Laplace -smoothing*) to this and calculated as -$\frac{0.5+positives}{1+positives+negatives}$ so new models initially return a -propensity of 0.5. This helps to address the cold start when introducing new -actions. +Propensity is defined as $\frac{positives}{positives+negatives}$ per bin. The adjusted propensity that is returned is a small modification (*Laplace smoothing*) to this and calculated as $\frac{0.5+positives}{1+positives+negatives}$ so new models initially return a propensity of 0.5. This helps to address the cold start when introducing new actions. -::: {.callout-warning} +::: callout-warning The Python version has no notion of unreachable bins (yet), when it has we should grey out the binning rows that fall outside of the active range ::: ```{python} -human_friendly_scoredistribution = ( - classifier.select( - pl.col("BinIndex").alias("Index"), - pl.col("BinSymbol").alias("Bin"), - pl.col("BinResponseCount").alias("Responses"), - pl.col("BinPositives").alias("Positives"), - (100 * (pl.col("BinPositives").cum_sum(reverse=True)) / pl.sum("BinPositives")) - .round(2) - .alias("Cum. Positives (%)"), - ( - 100 - * (pl.col("BinResponseCount").cum_sum(reverse=True)) - / pl.sum("BinResponseCount") +try: + human_friendly_scoredistribution = ( + classifier.select( + pl.col("BinIndex").alias("Index"), + pl.col("BinSymbol").alias("Bin"), + pl.col("BinResponseCount").alias("Responses"), + pl.col("BinPositives").alias("Positives"), + (100 * (pl.col("BinPositives").cum_sum(reverse=True)) / pl.sum("BinPositives")) + .round(2) + .alias("Cum. Positives (%)"), + ( + 100 + * (pl.col("BinResponseCount").cum_sum(reverse=True)) + / pl.sum("BinResponseCount") + ) + .round(2) + .alias("Cum. Total (%)"), + (100 * pl.col("BinPropensity")).round(3).alias("Propensity (%)"), + z_ratio(pl.col("BinPositives"), pl.col("BinNegatives")) + .round(3) + .alias("Z Ratio"), + (lift(pl.col("BinPositives"), pl.col("BinNegatives")) * 100) + .round(2) + .alias("Lift (%)"), + (pl.col("BinAdjustedPropensity") * 100) + .round(3) + .alias("Adjusted Propensity (%)"), ) - .round(2) - .alias("Cum. Total (%)"), - (100 * pl.col("BinPropensity")).round(3).alias("Propensity (%)"), - z_ratio(pl.col("BinPositives"), pl.col("BinNegatives")) - .round(3) - .alias("Z Ratio"), - (lift(pl.col("BinPositives"), pl.col("BinNegatives")) * 100) - .round(2) - .alias("Lift (%)"), - (pl.col("BinAdjustedPropensity") * 100) - .round(3) - .alias("Adjusted Propensity (%)"), - ) -).collect() - -# TODO some of the formatting could be done in GT instead of in polars + ).collect() -gt = report_utils.table_standard_formatting( - human_friendly_scoredistribution, title="Score Distribution" -) + # TODO some of the formatting could be done in GT instead of in polars -display(gt) + gt = report_utils.table_standard_formatting( + human_friendly_scoredistribution, title="Score Distribution" + ) + display(gt) +except Exception as e: + report_utils.quarto_plot_exception("Score Distribution", e) ``` - ## Cumulative Gains and Lift charts Below are alternative ways to view the Classifier. @@ -340,8 +314,7 @@ The Lift chart is derived from this and shows the ratio of the cumulative gain a TODO: unfortunately the Python version has no notion of unreachable bins (yet) which can cause really strange high lift values -::: {layout-ncol=2} - +::: {layout-ncol="2"} ```{python} # TODO perhaps this should move into the pdstools plot functions "plotCumulativeGains" # however it is so trivial, not really sure it should be. See also the other gains charts @@ -404,7 +377,6 @@ except Exception as e: report_utils.quarto_plot_exception("Lift", e) ``` - ::: ```{python} @@ -438,8 +410,7 @@ except Exception as e: # Trend charts -::: {layout-ncol=2} - +::: {layout-ncol="2"} ```{python} # TODO see if we can put the various trend charts in tabs - although this seems to require go not express, then gain .data[0] probably gives the go objects @@ -484,14 +455,11 @@ try: except Exception as e: report_utils.quarto_plot_exception("Success Rate Trend", e) ``` - ::: # Performance by Predictor Category -Showing the performance across all predictors. The predictor categories default -to the text before the first dot. This can be customized when reading the data -for a particular customer. +Showing the performance across all predictors. The predictor categories default to the text before the first dot. This can be customized when reading the data for a particular customer. ```{python} # | error: true @@ -515,17 +483,11 @@ except Exception as e: # Predictor Overview -The predictors for this model are sorted by performance and grouped if they are -correlated (shown with an indentation and a lighter color). - -The negatives and positives counts are usually the same across all the -predictors but will be different when predictors have been removed or added. IH -predictors often have slightly lower counts. - -For Adaptive Gradient Boosting models ("AGB") the number of positives and -negatives is not available. +The predictors for this model are sorted by performance and grouped if they are correlated (shown with an indentation and a lighter color). +The negatives and positives counts are usually the same across all the predictors but will be different when predictors have been removed or added. IH predictors often have slightly lower counts. +For Adaptive Gradient Boosting models ("AGB") the number of positives and negatives is not available. ```{python} # TODO add a list of correlated predictors as a list, to show in the @@ -763,4 +725,4 @@ for pred in ( # unfortunately no way to get the quarto source file name, so that is hardcoded report_utils.show_credits("pega-datascientist-tools/python/pdstools/reports/ModelReport.qmd") -``` +``` \ No newline at end of file diff --git a/python/pdstools/reports/_quarto.yml b/python/pdstools/reports/_quarto.yml new file mode 100644 index 00000000..9f23520b --- /dev/null +++ b/python/pdstools/reports/_quarto.yml @@ -0,0 +1,7 @@ +analysis: + models: true + predictions: true + predictors: true +project: + title: Sample Data + type: default diff --git a/python/pdstools/utils/cdh_utils.py b/python/pdstools/utils/cdh_utils.py index 393648a6..a2277026 100644 --- a/python/pdstools/utils/cdh_utils.py +++ b/python/pdstools/utils/cdh_utils.py @@ -227,10 +227,11 @@ def parse_pega_date_time_formats( ): """Parses Pega DateTime formats. - Supports the two most commonly used formats: + Supports commonly used formats: - "%Y-%m-%d %H:%M:%S" - "%Y%m%dT%H%M%S.%f %Z" + - "%d-%b-%y" Removes timezones, and rounds to seconds, with a 'ns' time unit. @@ -254,6 +255,9 @@ def parse_pega_date_time_formats( pl.col(timestamp_col).str.to_datetime( "%Y%m%dT%H%M%S.%3f %Z", strict=False, ambiguous="null" ), + pl.col(timestamp_col).str.to_datetime( + "%d-%b-%y", strict=False, ambiguous="null" + ), pl.col(timestamp_col).str.to_datetime( timestamp_fmt or "%Y", strict=False, ambiguous="null" ), diff --git a/python/pdstools/utils/report_utils.py b/python/pdstools/utils/report_utils.py index 71e3aa77..5a880d4d 100644 --- a/python/pdstools/utils/report_utils.py +++ b/python/pdstools/utils/report_utils.py @@ -1,12 +1,11 @@ import traceback -from typing import Dict, List +from typing import Dict, List, Literal, Optional, Union from IPython.display import display, Markdown from great_tables import GT, style, loc from ..adm.CDH_Guidelines import CDHGuidelines from ..utils.show_versions import show_versions +from ..adm.Reports import Reports import polars as pl -import re -import subprocess import datetime @@ -35,7 +34,8 @@ def quarto_callout_important(info): % info ) -def quarto_plot_exception(plot_name:str, e:Exception): + +def quarto_plot_exception(plot_name: str, e: Exception): quarto_print( """ ::: {.callout-important collapse="true"} @@ -47,9 +47,15 @@ def quarto_plot_exception(plot_name:str, e:Exception): % (plot_name, e, traceback.format_exc()) ) + +def quarto_callout_no_prediction_data_warning(extra=""): + quarto_callout_important(f"Prediction Data is not available. {extra}") + + def quarto_callout_no_predictor_data_warning(extra=""): quarto_callout_important(f"Predictor Data is not available. {extra}") + def polars_col_exists(df, col): return col in df.collect_schema().names() and df.schema[col] != pl.Null @@ -58,6 +64,54 @@ def polars_subset_to_existing_cols(all_columns, cols): return [col for col in cols if col in all_columns] +def rag_background_styler( + rag: Optional[Literal["Red", "Amber", "Yellow", "Green"]] = None +): + match rag[0].upper() if len(rag) > 0 else None: + case "R": + return style.fill(color="orangered") + case "A": + return style.fill(color="orange") + case "Y": + return style.fill(color="yellow") + case "G": + return None # no green background to keep it light + case _: + raise ValueError(f"Not a supported RAG value: {rag}") + + +def rag_background_styler_dense( + rag: Optional[Literal["Red", "Amber", "Yellow", "Green"]] = None +): + match rag[0].upper() if len(rag) > 0 else None: + case "R": + return style.fill(color="orangered") + case "A": + return style.fill(color="orange") + case "Y": + return style.fill(color="yellow") + case "G": + return style.fill(color="green") + case _: + raise ValueError(f"Not a supported RAG value: {rag}") + + +def rag_textcolor_styler( + rag: Optional[Literal["Red", "Amber", "Yellow", "Green"]] = None +): + match rag[0].upper() if len(rag) > 0 else None: + case "R": + return style.text(color="orangered") + case "A": + return style.text(color="orange") + case "Y": + return style.text(color="yellow") + case "G": + return style.text(color="green") + case _: + raise ValueError(f"Not a supported RAG value: {rag}") + + def table_standard_formatting( source_table, title=None, @@ -65,11 +119,12 @@ def table_standard_formatting( rowname_col=None, groupname_col=None, cdh_guidelines=CDHGuidelines(), - highlight_limits: Dict[str, str] = {}, + highlight_limits: Dict[str, Union[str, List[str]]] = {}, highlight_lists: Dict[str, List[str]] = {}, highlight_configurations: List[str] = [], + rag_styler: callable = rag_background_styler, ): - def apply_metric_style(gt, col_name, metric): + def apply_rag_styling(gt, col_name, metric): if col_name in source_table.collect_schema().names(): min_val = cdh_guidelines.min(metric) max_val = cdh_guidelines.max(metric) @@ -80,28 +135,52 @@ def apply_metric_style(gt, col_name, metric): bad_rows = [ i for i, v in enumerate(values) - if v < min_val or (max_val is not None and v > max_val) + if v is not None + and ( + (min_val is not None and v < min_val) + or (max_val is not None and v > max_val) + ) ] warning_rows = [ i for i, v in enumerate(values) - if (v >= min_val and v < best_practice_min) - or ( - best_practice_max is not None - and max_val is not None - and v > best_practice_max - and v <= max_val + if v is not None + and ( + ( + min_val is not None + and best_practice_min is not None + and v >= min_val + and v < best_practice_min + ) + or ( + max_val is not None + and best_practice_max is not None + and v > best_practice_max + and v <= max_val + ) ) ] - - gt = gt.tab_style( - style=style.fill(color="orangered"), - locations=loc.body(columns=col_name, rows=bad_rows), - ) - gt = gt.tab_style( - style=style.fill(color="orange"), - locations=loc.body(columns=col_name, rows=warning_rows), - ) + good_rows = [ + i + for i, v in enumerate(values) + if v is not None + and (best_practice_min is None or v >= best_practice_min) + and (best_practice_max is None or v <= best_practice_max) + ] + # TODO consider that bad / warning rows are exclusive + + def apply_style(gt, rag, rows): + style = rag_styler(rag) + if style is not None: + gt = gt.tab_style( + style=style, + locations=loc.body(columns=col_name, rows=rows), + ) + return gt + + gt = apply_style(gt, "green", good_rows) + gt = apply_style(gt, "amber", warning_rows) + gt = apply_style(gt, "red", bad_rows) return gt def apply_standard_name_style(gt, col_name, standard_list): @@ -111,7 +190,7 @@ def apply_standard_name_style(gt, col_name, standard_list): i for i, v in enumerate(values) if v not in standard_list ] gt = gt.tab_style( - style=style.fill(color="yellow"), + style=rag_styler("yellow"), locations=loc.body(columns=col_name, rows=non_standard_rows), ) return gt @@ -121,43 +200,87 @@ def apply_configuration_style(gt, col_name): values = source_table[col_name].to_list() multiple_config_rows = [i for i, v in enumerate(values) if v.count(",") > 1] gt = gt.tab_style( - style=style.fill(color="yellow"), + style=rag_styler("yellow"), locations=loc.body(columns=col_name, rows=multiple_config_rows), ) return gt - gt = GT( - source_table, rowname_col=rowname_col, groupname_col=groupname_col - ).tab_options(table_font_size=8) + gt = ( + GT(source_table, rowname_col=rowname_col, groupname_col=groupname_col) + .tab_options(table_font_size=8) + .sub_missing(missing_text="") + ) if title is not None: gt = gt.tab_header(title=title, subtitle=subtitle) - for c in highlight_limits.keys(): - gt = apply_metric_style(gt, c, highlight_limits[c]) - gt = gt.fmt_number( - columns=c, decimals=0, compact=True - ) # default number formatting + def metric_styling_model_performance(gt, cols): + return gt.fmt_number( + decimals=2, + columns=cols, + ) - for c in highlight_lists.keys(): - gt = apply_standard_name_style(gt, c, highlight_lists[c]) + def metric_styling_percentage(gt, cols): + return gt.fmt_percent( + decimals=0, + columns=cols, + ) + + def metric_styling_ctr(gt, cols): + return gt.fmt_percent( + decimals=3, + columns=cols, + ) + + def metric_styling_default(gt, cols): + return gt.fmt_number( + decimals=0, + compact=True, + columns=cols, + ) - for c in highlight_configurations: - gt = apply_configuration_style(gt, c) + for metric in highlight_limits.keys(): + cols = highlight_limits[metric] + if isinstance(cols, str): + cols = [cols] + for col_name in cols: + gt = apply_rag_styling(gt, col_name=col_name, metric=metric) + # gt = gt.fmt_number( + # columns=col_name, decimals=0, compact=True + # ) # default number formatting applied to everything - consider being smarter, in config + match metric: + case "Model Performance": + gt = metric_styling_model_performance(gt, cols) + case "Engagement Lift": + gt = metric_styling_percentage(gt, cols) + case "OmniChannel": + gt = metric_styling_percentage(gt, cols) + case "CTR": + gt = metric_styling_ctr(gt, cols) + case _: + gt = metric_styling_default(gt, cols) + + for metric in highlight_lists.keys(): + gt = apply_standard_name_style(gt, metric, highlight_lists[metric]) + + for metric in highlight_configurations: + gt = apply_configuration_style(gt, metric) return gt -def table_style_predictor_count(gt: GT, flds, cdh_guidelines=CDHGuidelines()): +def table_style_predictor_count( + gt: GT, flds, cdh_guidelines=CDHGuidelines(), rag_styler=rag_textcolor_styler +): for col in flds: gt = gt.tab_style( - style=style.fill(color="orange"), + style=rag_styler("amber"), locations=loc.body( columns=col, rows=(pl.col(col) < 200) | (pl.col(col) > 700) & (pl.col(col) > 0), ), ).tab_style( - style=style.fill(color="orangered"), + style=rag_styler("red"), locations=loc.body( columns=col, rows=(pl.col(col) == 0), @@ -233,31 +356,23 @@ def sample_values(dm, all_dm_cols, fld, n=6): .to_list()[:n] ) -def show_credits(quarto_source: str): - def get_cmd_output(args): - result = ( - subprocess.run(args, stdout=subprocess.PIPE).stdout.decode("utf-8").split("\n") - ) - return result - - - def get_version_only(versionstr): - return re.sub("[^.0-9]", "", versionstr) +def show_credits(quarto_source: str): + _, quarto_version = Reports.get_quarto_with_version(verbose=False) + _, pandoc_version = Reports.get_pandoc_with_version(verbose=False) - quarto_version = get_version_only(get_cmd_output(["quarto", "--version"])[0]) - pandoc_version = get_version_only(get_cmd_output(["pandoc", "--version"])[0]) timestamp_str = datetime.datetime.now().strftime("%d %b %Y %H:%M:%S") quarto_print( f""" + Document created at: {timestamp_str} + This notebook: {quarto_source} + Quarto runtime: {quarto_version} Pandoc: {pandoc_version} - Document created at: {timestamp_str} - Additional details from 'pdstools.show_versions()': """ @@ -265,6 +380,6 @@ def get_version_only(versionstr): show_versions() - quarto_print("For more information please see the [Pega Data Scientist Tools](https://github.com/pegasystems/pega-datascientist-tools).") - - + quarto_print( + "For more information please see the [Pega Data Scientist Tools](https://github.com/pegasystems/pega-datascientist-tools)." + ) diff --git a/python/tests/test_Prediction.py b/python/tests/test_Prediction.py index 14584d9c..1a2e4cc1 100644 --- a/python/tests/test_Prediction.py +++ b/python/tests/test_Prediction.py @@ -14,26 +14,17 @@ "pySnapShotTime": cdh_utils.to_prpc_date_time(datetime.datetime.now())[ 0:15 ], # Polars doesn't like time zones like GMT+0200 - "pyModelId": ["DATA-DECISION-REQUEST-CUSTOMER!PREDICTWEBPROPENSITY"] * 4 - + ["DATA-DECISION-REQUEST-CUSTOMER!PREDICTMOBILEPROPENSITY"] * 4, - # "Channel": ["Web"] * 4 + ["Mobile"] * 4, - # "Direction": ["Inbound"] * 4 + ["Outbound"] * 4, + "pyModelId": ["DATA-DECISION-REQUEST-CUSTOMER!MYCUSTOMPREDICTION"] * 4 + + ["DATA-DECISION-REQUEST-CUSTOMER!PredictActionPropensity"] * 4 + + ["DATA-DECISION-REQUEST-CUSTOMER!PREDICTMOBILEPROPENSITY"] * 4 + + ["DATA-DECISION-REQUEST-CUSTOMER!PREDICTWEBPROPENSITY"] * 4, "pyModelType": "PREDICTION", - "pySnapshotType": (["Daily"] * 3 + [None]) * 2, - "pyDataUsage": [ - "Control", - "Test", - "NBA", - "", - "Control", - "Test", - "NBA", - "", - ], - "pyPositives": [100, 400, 500, 1000, 200, 800, 1000, 2000], - "pyNegatives": [1000, 2000, 3000, 6000, 3000, 6000, 9000, 18000], - "pyCount": [1100, 2400, 3500, 7000, 3200, 6800, 10000, 20000], - "pyValue": [0.65] * 4 + [0.70] * 4, + "pySnapshotType": (["Daily"] * 3 + [None]) * 4, + "pyDataUsage": ["Control", "Test", "NBA", ""] * 4, + "pyPositives": [100, 400, 500, 1000, 200, 800, 1000, 2000] * 2, + "pyNegatives": [1000, 2000, 3000, 6000, 3000, 6000, 9000, 18000] * 2, + "pyCount": [1100, 2400, 3500, 7000, 3200, 6800, 10000, 20000] * 2, + "pyValue": ([0.65] * 4 + [0.70] * 4) * 2, } ).lazy() @@ -79,7 +70,6 @@ def test_summary_by_channel_cols(test): "Direction", "isStandardNBADPrediction", "isMultiChannelPrediction", - "Lift", "Performance", "Positives", "Negatives", @@ -94,24 +84,28 @@ def test_summary_by_channel_cols(test): "ControlPercentage", "TestPercentage", "CTR", + "CTR_Test", + "CTR_Control", + "CTR_NBA", + "ChannelDirectionGroup", "isValid", + "Lift" ] - assert len(summary) == 2 def test_summary_by_channel_channels(test): summary = test.summary_by_channel().collect() - assert summary.select(pl.len()).item() == 2 + assert summary.select(pl.len()).item() == 4 def test_summary_by_channel_validity(test): summary = test.summary_by_channel().collect() - assert summary["isValid"].to_list() == [True, True] + assert summary["isValid"].to_list() == [True, True, True, True] def test_summary_by_channel_ia(test): summary = test.summary_by_channel().collect() - assert summary["usesImpactAnalyzer"].to_list() == [True, True] + assert summary["usesImpactAnalyzer"].to_list() == [True, True, True, True] test = Prediction( mock_prediction_data.filter( @@ -122,7 +116,10 @@ def test_summary_by_channel_ia(test): ) ) ) + # only Web still has the NBA indicator assert test.summary_by_channel().collect()["usesImpactAnalyzer"].to_list() == [ + False, + False, False, True, ] @@ -130,30 +127,37 @@ def test_summary_by_channel_ia(test): def test_summary_by_channel_lift(test): summary = test.summary_by_channel().collect() - assert [round(x, 5) for x in summary["Lift"].to_list()] == [0.88235, 0.83333] + assert [round(x, 5) for x in summary["Lift"].to_list()] == [0.83333, 0.88235] * 2 def test_summary_by_channel_controlpct(test): summary = test.summary_by_channel().collect() assert [round(x, 5) for x in summary["ControlPercentage"].to_list()] == [ - 16.0, 15.71429, - ] + 16.0, + ] * 2 assert [round(x, 5) for x in summary["TestPercentage"].to_list()] == [ - 34.0, 34.28571, - ] + 34.0, + ] * 2 def test_summary_by_channel_trend(test): summary = test.summary_by_channel(by_period="1d").collect() - assert summary.select(pl.len()).item() == 2 + assert summary.select(pl.len()).item() == 4 def test_summary_by_channel_trend2(test2): summary = test2.summary_by_channel(by_period="1d").collect() - assert summary.select(pl.len()).item() == 4 + assert summary.select(pl.len()).item() == 8 + + +def test_summary_by_channel_channeldirectiongroup(test): + summary = test.summary_by_channel().collect() + assert summary["isMultiChannelPrediction"].to_list() == [False, True, False, False] + assert summary["isStandardNBADPrediction"].to_list() == [False, True, True, True] + assert summary["ChannelDirectionGroup"].to_list() == ["Other", "Other", "Mobile/Inbound", "Web/Inbound"] def test_overall_summary_cols(test): summary = test.overall_summary().collect() @@ -174,20 +178,21 @@ def test_overall_summary_cols(test): def test_overall_summary_n_valid_channels(test): - print(test.overall_summary().collect()) - assert test.overall_summary().collect()["Number of Valid Channels"].item() == 2 + assert test.overall_summary().collect()["Number of Valid Channels"].item() == 3 def test_overall_summary_overall_lift(test): - assert round(test.overall_summary().collect()["Overall Lift"].item(), 5) == 0.86964 + # print(test.overall_summary().collect()) + # print(test.summary_by_channel().collect()) + assert round(test.overall_summary().collect()["Overall Lift"].item(), 5) == 0.86217 def test_overall_summary_positives(test): - assert test.overall_summary().collect()["Positives"].item() == 3000 + assert test.overall_summary().collect()["Positives"].item() == 4000 def test_overall_summary_responsecount(test): - assert test.overall_summary().collect()["ResponseCount"].item() == 27000 + assert test.overall_summary().collect()["ResponseCount"].item() == 34000 def test_overall_summary_channel_min_lift(test): @@ -202,16 +207,16 @@ def test_overall_summary_min_lift(test): def test_overall_summary_ctr(test): - assert round(test.overall_summary().collect()["CTR"].item(), 5) == 0.11111 + assert round(test.overall_summary().collect()["CTR"].item(), 5) == 0.11765 def test_overall_summary_controlpct(test): assert ( round(test.overall_summary().collect()["ControlPercentage"].item(), 5) - == 15.92593 + == 15.88235 ) assert ( - round(test.overall_summary().collect()["TestPercentage"].item(), 5) == 34.07407 + round(test.overall_summary().collect()["TestPercentage"].item(), 5) == 34.11765 ) @@ -228,3 +233,17 @@ def test_overall_summary_ia(test): ) ) assert test.overall_summary().collect()["usesImpactAnalyzer"].to_list() == [True] + +def test_plots(): + prediction = Prediction.from_mock_data() + + assert prediction.plot.performance_trend() is not None + assert prediction.plot.lift_trend() is not None + assert prediction.plot.responsecount_trend() is not None + assert prediction.plot.ctr_trend() is not None + + assert prediction.plot.performance_trend("1w") is not None + assert isinstance(prediction.plot.lift_trend("2d", return_df=True), pl.LazyFrame) + assert prediction.plot.responsecount_trend("1m") is not None + assert prediction.plot.ctr_trend("5d") is not None + diff --git a/python/tests/test_plots.py b/python/tests/test_plots.py index e55ec6ab..86342c40 100644 --- a/python/tests/test_plots.py +++ b/python/tests/test_plots.py @@ -50,15 +50,10 @@ def test_proposition_success_rates(sample: ADMDatamart): def test_score_distribution(sample: ADMDatamart): - model_id = "bd70a915-697a-5d43-ab2c-53b0557c85a0" + model_id = sample.combined_data.select("ModelID").collect().row(0)[0] df = sample.plot.score_distribution(model_id=model_id, return_df=True) - assert df.select(pl.col("BinIndex").top_k(1)).collect().item() == 1 - assert ( - round(df.select(pl.col("BinPropensity").top_k(1)).collect().item(), 2) == 0.01 - ) - - # Test if the plot works + assert df.select(pl.col("PredictorName").top_k(1)).collect().item() == "Classifier" plot = sample.plot.score_distribution(model_id=model_id) assert plot is not None @@ -70,18 +65,25 @@ def test_multiple_score_distributions(sample: ADMDatamart): def test_predictor_binning(sample: ADMDatamart): - model_id = "bd70a915-697a-5d43-ab2c-53b0557c85a0" - predictor_name = "Customer.HealthMatter" + random_row = ( + sample.combined_data.select(["ModelID", "PredictorName"]).collect().sample(1) + ) + model_id = random_row["ModelID"][0] + predictor_name = random_row["PredictorName"][0] df = sample.plot.predictor_binning( model_id=model_id, predictor_name=predictor_name, return_df=True ) - - assert df.select(pl.col("BinIndex").top_k(1)).collect().item() == 1 - assert ( - round(df.select(pl.col("BinPropensity").top_k(1)).collect().item(), 5) - == 0.00206 - ) - + assert not df.collect().is_empty() + required_columns = ["BinIndex", "BinPropensity", "BinSymbol", "BinResponseCount"] + assert all(col in df.collect_schema().names() for col in required_columns) + with pytest.raises(ValueError): + sample.plot.predictor_binning( + model_id="non_existent_id", predictor_name=predictor_name + ) + with pytest.raises(ValueError): + sample.plot.predictor_binning( + model_id=model_id, predictor_name="non_existent_predictor" + ) plot = sample.plot.predictor_binning( model_id=model_id, predictor_name=predictor_name ) @@ -89,10 +91,18 @@ def test_predictor_binning(sample: ADMDatamart): def test_multiple_predictor_binning(sample: ADMDatamart): - model_id = "bd70a915-697a-5d43-ab2c-53b0557c85a0" + model_id = sample.combined_data.select("ModelID").collect().row(0)[0] plots = sample.plot.multiple_predictor_binning(model_id=model_id, show_all=False) assert isinstance(plots, list) - assert len(plots) == 90 + # Same number of plots as number of predictors + assert ( + len(plots) + == sample.combined_data.filter(pl.col("ModelID") == model_id) + .select("PredictorName") + .unique() + .collect() + .shape[0] + ) assert all(isinstance(plot, Figure) for plot in plots) @@ -197,12 +207,29 @@ def test_predictor_count(sample: ADMDatamart): def test_binning_lift(sample: ADMDatamart): - model_id = "bd70a915-697a-5d43-ab2c-53b0557c85a0" - predictor_name = "Customer.HealthMatter" + # Get a random model_id and predictor_name + random_row = ( + sample.combined_data.select(["ModelID", "PredictorName"]).collect().sample(1) + ) + model_id = random_row["ModelID"][0] + predictor_name = random_row["PredictorName"][0] + df = sample.plot.binning_lift(model_id, predictor_name, return_df=True).collect() - assert df.shape == (1, 8) - assert df.filter(pl.col("BinIndex") == 1).select("Lift").item() == 0.0 + assert not df.is_empty() + required_columns = [ + "PredictorName", + "BinIndex", + "BinPositives", + "BinNegatives", + "BinSymbol", + "Lift", + "Direction", + "BinSymbolAbbreviated", + ] + assert all(col in df.columns for col in required_columns) + + assert set(df["Direction"]) <= {"pos", "neg", "pos_shaded", "neg_shaded"} plot = sample.plot.binning_lift(model_id, predictor_name) assert isinstance(plot, Figure)