diff --git a/examples/ih/Conversion_Reporting.ipynb b/examples/ih/Conversion_Reporting.ipynb
index 7cbb0fa2..91c84afd 100644
--- a/examples/ih/Conversion_Reporting.ipynb
+++ b/examples/ih/Conversion_Reporting.ipynb
@@ -56,8 +56,9 @@
"metadata": {},
"outputs": [],
"source": [
- "ih.plots.conversion_overall_gauges(\n",
- " experiment_field=\"ExperimentGroup\",\n",
+ "ih.plots.overall_gauges(\n",
+ " metric=\"Conversion\",\n",
+ " condition=\"ExperimentGroup\",\n",
" by=\"Channel\",\n",
" reference_values={\"Web\": 0.055, \"Email\": 0.09},\n",
")"
@@ -78,7 +79,7 @@
"metadata": {},
"outputs": [],
"source": [
- "ih.plots.conversion_success_rates_tree_map()\n"
+ "ih.plots.success_rates_tree_map(metric=\"Conversion\")\n"
]
},
{
@@ -96,10 +97,20 @@
"metadata": {},
"outputs": [],
"source": [
- "ih.plots.conversion_success_rates_trend_bar(\n",
- " experiment_field=\"ExperimentGroup\",\n",
+ "ih.plots.success_rates_trend_bar(\n",
+ " metric=\"Conversion\",\n",
+ " condition=\"ExperimentGroup\",\n",
" every=\"1w\",\n",
- ")"
+ ")\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ih.plots.success_rates_trend(metric=\"Conversion\", every=\"1d\")"
]
},
{
@@ -115,8 +126,8 @@
"metadata": {},
"outputs": [],
"source": [
- "ih.plots.egagement_overall_gauges(\n",
- " experiment_field=\"ExperimentGroup\",\n",
+ "ih.plots.overall_gauges(\n",
+ " condition=\"ExperimentGroup\",\n",
" by=\"Channel\",\n",
" reference_values={\"Web\": 0.20, \"Email\": 0.20},\n",
")"
@@ -128,7 +139,7 @@
"metadata": {},
"outputs": [],
"source": [
- "ih.plots.conversion_success_rates_trend_line(\n",
+ "ih.plots.success_rates_trend(\n",
" by=\"Channel\"\n",
")"
]
diff --git a/examples/ih/Example_IH_Analysis.ipynb b/examples/ih/Example_IH_Analysis.ipynb
index 2430ac13..5fad5087 100644
--- a/examples/ih/Example_IH_Analysis.ipynb
+++ b/examples/ih/Example_IH_Analysis.ipynb
@@ -2,821 +2,155 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "ename": "ModuleNotFoundError",
- "evalue": "No module named 'cdhtools'",
- "output_type": "error",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
- "Cell \u001b[0;32mIn[1], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mpd\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01msys\u001b[39;00m\n\u001b[0;32m----> 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcdhtools\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mIHanalysis\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;241m*\u001b[39m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcdhtools\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcdh_utils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m readDSExport\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mmatplotlib\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpyplot\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mplt\u001b[39;00m\n",
- "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'cdhtools'"
- ]
- }
- ],
+ "outputs": [],
"source": [
- "import pandas as pd\n",
- "import sys\n",
+ "from pdstools import IH\n",
+ "from pdstools.utils import cdh_utils\n",
"\n",
- "from cdhtools.IHanalysis import *\n",
- "from cdhtools.cdh_utils import readDSExport\n",
+ "import polars as pl\n",
+ "import plotly.io as pio\n",
+ "import plotly as plotly\n",
"\n",
- "import matplotlib.pyplot as plt\n",
- "%matplotlib inline"
+ "plotly.offline.init_notebook_mode()\n",
+ "pio.renderers.default = \"vscode\""
]
},
{
- "cell_type": "code",
- "execution_count": null,
+ "cell_type": "markdown",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Importing: ../../data/Data-pxStrategyResult_pxInteractionHistory_20210101T010000_GMT.zip\n"
- ]
- }
- ],
"source": [
- "df_orig = readDSExport(\"Data-pxStrategyResult_pxInteractionHistory_20210101T010000_GMT.zip\", path=\"../../data\")"
+ "# Example IH Analysis\n",
+ "\n",
+ "Interaction History (IH) is a rich source of data at the level of individual interactions from Pega DSM applications. It contains the time of the interaction, the channel, the actions/treatments, the customer ID and is used to track different types of outcomes (decisions, sends, opens, clicks, etc). It does **not** contain customer attributes - only the IDs.\n",
+ "\n",
+ "This notebook gives some examples of data analysis on IH. Like most of PDSTools, it uses [plotly](https://plotly.com/python/) for visualization and [polars](https://docs.pola.rs/) (dataframe) but the purpose of this Notebook is more to serve example analyses than re-usable code, although of course we do try to provide some generic, re-usable functions. All of the analyses should be able to be replicated easily in other analytical BI environments - except perhaps the analysis of model performance / AUC.\n",
+ "\n",
+ "This notebook uses sample data shipped with PDStools. Replace it with your own actual IH data and modify the analyses as appropriate."
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
- "df = initial_prep(df_orig, referenceTime='pxOutcomeTime')"
+ "ih = IH.from_ds_export(\n",
+ " \"../../data/Data-pxStrategyResult_pxInteractionHistory_20210101T010000_GMT.zip\"\n",
+ ")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "At first, take a look into the IH dataframe, explore the columns, outcome types and business structure"
+ "Preview of the raw IH data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " pySubjectType | \n",
- " pxInteractionID | \n",
- " ControlGroupValidityStart | \n",
- " pyStage | \n",
- " pyJourney | \n",
- " CustomerID | \n",
- " ChannelSubGroup | \n",
- " pyChannel | \n",
- " pyCustomerSubSegment | \n",
- " pyStep | \n",
- " ... | \n",
- " pyResponse | \n",
- " pyCategory | \n",
- " ControlGroupValidityEnd | \n",
- " pxDecisionTime | \n",
- " pyLabel | \n",
- " ChannelGroup | \n",
- " pyStrategy | \n",
- " Date | \n",
- " WeekOfYear | \n",
- " Week | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " CDHSample-Data-Customer | \n",
- " -3586780626931683381 | \n",
- " | \n",
- " | \n",
- " | \n",
- " Customer-4118 | \n",
- " | \n",
- " SMS | \n",
- " | \n",
- " | \n",
- " ... | \n",
- " | \n",
- " | \n",
- " | \n",
- " 2021-01-27 13:22:05.810000+00:00 | \n",
- " U+ Personal Card | \n",
- " | \n",
- " InitializeModelsSmall | \n",
- " 2021-01-27 | \n",
- " 4 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " CDHSample-Data-Customer | \n",
- " -3586780626931683381 | \n",
- " | \n",
- " | \n",
- " | \n",
- " Customer-4118 | \n",
- " | \n",
- " Web | \n",
- " | \n",
- " | \n",
- " ... | \n",
- " | \n",
- " | \n",
- " | \n",
- " 2021-01-27 13:22:05.810000+00:00 | \n",
- " U+ Personal Card | \n",
- " | \n",
- " InitializeModelsSmall | \n",
- " 2021-01-27 | \n",
- " 4 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " CDHSample-Data-Customer | \n",
- " -3586780626931683381 | \n",
- " | \n",
- " | \n",
- " | \n",
- " Customer-4118 | \n",
- " | \n",
- " Web | \n",
- " | \n",
- " | \n",
- " ... | \n",
- " | \n",
- " | \n",
- " | \n",
- " 2021-01-27 13:22:05.810000+00:00 | \n",
- " Visa Gold Card | \n",
- " | \n",
- " InitializeModelsSmall | \n",
- " 2021-01-27 | \n",
- " 4 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " CDHSample-Data-Customer | \n",
- " -3586780626931683381 | \n",
- " | \n",
- " | \n",
- " | \n",
- " Customer-4118 | \n",
- " | \n",
- " SMS | \n",
- " | \n",
- " | \n",
- " ... | \n",
- " | \n",
- " | \n",
- " | \n",
- " 2021-01-27 13:22:05.810000+00:00 | \n",
- " MasterCard Gold | \n",
- " | \n",
- " InitializeModelsSmall | \n",
- " 2021-01-27 | \n",
- " 4 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " CDHSample-Data-Customer | \n",
- " -3586780626931683381 | \n",
- " | \n",
- " | \n",
- " | \n",
- " Customer-4118 | \n",
- " | \n",
- " Web | \n",
- " | \n",
- " | \n",
- " ... | \n",
- " | \n",
- " | \n",
- " | \n",
- " 2021-01-27 13:22:05.810000+00:00 | \n",
- " AMEXPersonal | \n",
- " | \n",
- " InitializeModelsSmall | \n",
- " 2021-01-27 | \n",
- " 4 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- "
\n",
- "
5 rows × 52 columns
\n",
- "
"
- ],
- "text/plain": [
- " pySubjectType pxInteractionID ControlGroupValidityStart \\\n",
- "0 CDHSample-Data-Customer -3586780626931683381 \n",
- "1 CDHSample-Data-Customer -3586780626931683381 \n",
- "2 CDHSample-Data-Customer -3586780626931683381 \n",
- "3 CDHSample-Data-Customer -3586780626931683381 \n",
- "4 CDHSample-Data-Customer -3586780626931683381 \n",
- "\n",
- " pyStage pyJourney CustomerID ChannelSubGroup pyChannel \\\n",
- "0 Customer-4118 SMS \n",
- "1 Customer-4118 Web \n",
- "2 Customer-4118 Web \n",
- "3 Customer-4118 SMS \n",
- "4 Customer-4118 Web \n",
- "\n",
- " pyCustomerSubSegment pyStep ... pyResponse pyCategory \\\n",
- "0 ... \n",
- "1 ... \n",
- "2 ... \n",
- "3 ... \n",
- "4 ... \n",
- "\n",
- " ControlGroupValidityEnd pxDecisionTime pyLabel \\\n",
- "0 2021-01-27 13:22:05.810000+00:00 U+ Personal Card \n",
- "1 2021-01-27 13:22:05.810000+00:00 U+ Personal Card \n",
- "2 2021-01-27 13:22:05.810000+00:00 Visa Gold Card \n",
- "3 2021-01-27 13:22:05.810000+00:00 MasterCard Gold \n",
- "4 2021-01-27 13:22:05.810000+00:00 AMEXPersonal \n",
- "\n",
- " ChannelGroup pyStrategy Date WeekOfYear Week \n",
- "0 InitializeModelsSmall 2021-01-27 4 1 \n",
- "1 InitializeModelsSmall 2021-01-27 4 1 \n",
- "2 InitializeModelsSmall 2021-01-27 4 1 \n",
- "3 InitializeModelsSmall 2021-01-27 4 1 \n",
- "4 InitializeModelsSmall 2021-01-27 4 1 \n",
- "\n",
- "[5 rows x 52 columns]"
- ]
- },
- "execution_count": 6,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df.head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " Count | \n",
- "
\n",
- " \n",
- " pyIssue | \n",
- " pyGroup | \n",
- " pyDirection | \n",
- " pyChannel | \n",
- " pyName | \n",
- " pyOutcome | \n",
- " | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " Churned | \n",
- " 5072 | \n",
- "
\n",
- " \n",
- " Loyal | \n",
- " 4928 | \n",
- "
\n",
- " \n",
- " Sales | \n",
- " CreditCards | \n",
- " Inbound | \n",
- " Web | \n",
- " AMEXPersonal | \n",
- " Clicked | \n",
- " 1487 | \n",
- "
\n",
- " \n",
- " NoResponse | \n",
- " 6331 | \n",
- "
\n",
- " \n",
- " UPlusFinGold | \n",
- " Accepted | \n",
- " 367 | \n",
- "
\n",
- " \n",
- " Rejected | \n",
- " 6468 | \n",
- "
\n",
- " \n",
- " UPlusFinPersonal | \n",
- " Accepted | \n",
- " 367 | \n",
- "
\n",
- " \n",
- " Rejected | \n",
- " 6534 | \n",
- "
\n",
- " \n",
- " UPlusGold | \n",
- " Accepted | \n",
- " 1843 | \n",
- "
\n",
- " \n",
- " Clicked | \n",
- " 1204 | \n",
- "
\n",
- " \n",
- " NoResponse | \n",
- " 7004 | \n",
- "
\n",
- " \n",
- " Rejected | \n",
- " 5487 | \n",
- "
\n",
- " \n",
- " UPlusPersonal | \n",
- " Accept | \n",
- " 2635 | \n",
- "
\n",
- " \n",
- " Accepted | \n",
- " 970 | \n",
- "
\n",
- " \n",
- " Rejected | \n",
- " 4361 | \n",
- "
\n",
- " \n",
- " VisaGold | \n",
- " Clicked | \n",
- " 1777 | \n",
- "
\n",
- " \n",
- " NoResponse | \n",
- " 5538 | \n",
- "
\n",
- " \n",
- " Outbound | \n",
- " SMS | \n",
- " AMEXPersonal | \n",
- " Clicked | \n",
- " 1002 | \n",
- "
\n",
- " \n",
- " NoResponse | \n",
- " 6775 | \n",
- "
\n",
- " \n",
- " MasterCardGold | \n",
- " Clicked | \n",
- " 296 | \n",
- "
\n",
- " \n",
- " NoResponse | \n",
- " 6438 | \n",
- "
\n",
- " \n",
- " MasterCardWorld | \n",
- " Clicked | \n",
- " 342 | \n",
- "
\n",
- " \n",
- " NoResponse | \n",
- " 5846 | \n",
- "
\n",
- " \n",
- " UPlusFinGold | \n",
- " Accepted | \n",
- " 297 | \n",
- "
\n",
- " \n",
- " Clicked | \n",
- " 265 | \n",
- "
\n",
- " \n",
- " NoResponse | \n",
- " 7081 | \n",
- "
\n",
- " \n",
- " Rejected | \n",
- " 6645 | \n",
- "
\n",
- " \n",
- " UPlusFinPersonal | \n",
- " Accepted | \n",
- " 311 | \n",
- "
\n",
- " \n",
- " Rejected | \n",
- " 6482 | \n",
- "
\n",
- " \n",
- " UPlusGold | \n",
- " Accepted | \n",
- " 1463 | \n",
- "
\n",
- " \n",
- " Rejected | \n",
- " 5474 | \n",
- "
\n",
- " \n",
- " UPlusPersonal | \n",
- " Accept | \n",
- " 5206 | \n",
- "
\n",
- " \n",
- " Accepted | \n",
- " 684 | \n",
- "
\n",
- " \n",
- " Clicked | \n",
- " 581 | \n",
- "
\n",
- " \n",
- " NoResponse | \n",
- " 4984 | \n",
- "
\n",
- " \n",
- " Rejected | \n",
- " 4578 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " Count\n",
- "pyIssue pyGroup pyDirection pyChannel pyName pyOutcome \n",
- " Churned 5072\n",
- " Loyal 4928\n",
- "Sales CreditCards Inbound Web AMEXPersonal Clicked 1487\n",
- " NoResponse 6331\n",
- " UPlusFinGold Accepted 367\n",
- " Rejected 6468\n",
- " UPlusFinPersonal Accepted 367\n",
- " Rejected 6534\n",
- " UPlusGold Accepted 1843\n",
- " Clicked 1204\n",
- " NoResponse 7004\n",
- " Rejected 5487\n",
- " UPlusPersonal Accept 2635\n",
- " Accepted 970\n",
- " Rejected 4361\n",
- " VisaGold Clicked 1777\n",
- " NoResponse 5538\n",
- " Outbound SMS AMEXPersonal Clicked 1002\n",
- " NoResponse 6775\n",
- " MasterCardGold Clicked 296\n",
- " NoResponse 6438\n",
- " MasterCardWorld Clicked 342\n",
- " NoResponse 5846\n",
- " UPlusFinGold Accepted 297\n",
- " Clicked 265\n",
- " NoResponse 7081\n",
- " Rejected 6645\n",
- " UPlusFinPersonal Accepted 311\n",
- " Rejected 6482\n",
- " UPlusGold Accepted 1463\n",
- " Rejected 5474\n",
- " UPlusPersonal Accept 5206\n",
- " Accepted 684\n",
- " Clicked 581\n",
- " NoResponse 4984\n",
- " Rejected 4578"
- ]
- },
- "execution_count": 7,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df.groupby(['pyIssue', 'pyGroup', 'pyDirection', 'pyChannel', 'pyName', 'pyOutcome']).count()[[\n",
- " 'pxInteractionID']].rename(columns={'pxInteractionID':'Count'})"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Use \"plot_daily_accept_rate\" to plot accept rate per day to understand how accept rates changed over time. To define accept rate, enter the positive (here: Accepted) and negative (here: Rejected) behaviour in the function. use kwargs to customize the graph. If the time ticks on the x axis are too many, shrink them using 'shrinkTicks'. If data is missing in certain days, force the graph make gaps for the missing days by setting 'allTime':True. you can also define hue"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "image/png": "",
- "text/plain": [
- "