Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Patch optional imports, documented predictor categorization, revamped docs #293

Merged
merged 29 commits into from
Dec 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
b6116e4
add pre-commit to dev deps, update docs
StijnKas Nov 15, 2024
77e971a
update docs workflows
StijnKas Nov 15, 2024
413eea1
try updated docs workflow
StijnKas Nov 15, 2024
d58cc30
see effects on docs
StijnKas Nov 15, 2024
82fb200
try to ignore loggers
StijnKas Nov 15, 2024
b870799
Merge branch 'pegasystems:master' into master
StijnKas Nov 18, 2024
8478d94
doc revamp
StijnKas Nov 18, 2024
c7e831a
keep using doc push on every push while deving
StijnKas Nov 18, 2024
b13a156
sphinxarg as dependency
StijnKas Nov 18, 2024
35f48b1
more conditions
StijnKas Nov 18, 2024
c145abb
Bring back jupyter tags
StijnKas Nov 18, 2024
7ccf716
bring back vf cell tag too
StijnKas Nov 18, 2024
c4893b7
update predictor_binning
StijnKas Nov 18, 2024
8aeda3b
update agb
StijnKas Nov 18, 2024
74591d9
Update VF article
StijnKas Nov 18, 2024
1b7751c
update admexplained
StijnKas Nov 18, 2024
e86fec5
Add getting started guide
StijnKas Nov 18, 2024
7457727
Further docstring improvements
StijnKas Nov 18, 2024
715527e
Fix tests
StijnKas Nov 18, 2024
f31282a
don't import from python
StijnKas Nov 18, 2024
33ade6e
Add inline dependencies to the cli
StijnKas Nov 18, 2024
e2c4356
Delete uv.lock
StijnKas Nov 19, 2024
713af9e
update release ci to use pypi token
StijnKas Nov 19, 2024
d12999a
try v5 :)
StijnKas Nov 19, 2024
c99ca38
skip one test
StijnKas Nov 19, 2024
80b7689
Release docs on public version release
StijnKas Nov 19, 2024
e2a2bfb
Fixed optional imports, documented predictor categorization
StijnKas Dec 9, 2024
a9bbd31
Merge branch 'master' into patch
StijnKas Dec 9, 2024
76b616c
tie polars to 1.16 - it was failing
StijnKas Dec 9, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/Python tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ jobs:
run: uv run pytest --cov=./python/pdstools --cov-report=xml --cov-config=./python/tests/.coveragerc --ignore=python/tests/test_healthcheck.py --ignore=python/tests/test_ADMTrees.py

- name: Upload coverage reports to Codecov
uses: codecov/codecov-action@v5.0.7
uses: codecov/codecov-action@v5.1.1
with:
token: ${{ secrets.CODECOV_TOKEN }}
fail_ci_if_error: false
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,4 @@ python/*.ipynb_checkpoints/*
**/META-INF/*
r/tests/testthat/d/tmp2
**/cache
.venv
3 changes: 1 addition & 2 deletions examples/articles/ADMExplained.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -944,8 +944,7 @@
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
"pygments_lexer": "ipython3"
}
},
"nbformat": 4,
Expand Down
6 changes: 2 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,20 +26,18 @@ classifiers = [
keywords = [
"pega",
"pegasystems",
"pds",
"pdstools",
"cdhtools",
"datascientist",
"tools",
]
requires-python = ">=3.9"
dependencies = ['polars>=1.9', 'typing_extensions']
dependencies = ['polars==1.16', 'typing_extensions']

[tool.setuptools.dynamic]
version = {attr="pdstools.__version__"}

[project.optional-dependencies]
adm = ['plotly>=5.5.0']
adm = ['plotly[express]>=6.0.0rc0', 'requests']
pega_io = ['aioboto3', 'polars_hash']
api = ['httpx', 'pydantic', 'anyio']
healthcheck = ['pdstools[adm]', 'great_tables>=0.13', 'quarto', 'papermill', 'xlsxwriter>=3.0', 'pydot']
Expand Down
28 changes: 19 additions & 9 deletions python/pdstools/adm/ADMDatamart.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,8 +354,8 @@ def _validate_predictor_data(
def apply_predictor_categorization(
self,
df: Optional[pl.LazyFrame] = None,
categorization: Optional[
Union[pl.Expr, Callable[..., pl.Expr]]
categorization: Union[
pl.Expr, Callable[..., pl.Expr]
] = cdh_utils.default_predictor_categorization,
):
"""Apply a new predictor categorization to the datamart tables
Expand All @@ -381,25 +381,35 @@ def apply_predictor_categorization(

See also
--------
pdstools.utils.cdh_utils.default_predictor_categorization : The default
pdstools.utils.cdh_utils.default_predictor_categorization : The default method

Examples
--------
>>> #TODO
>>> dm = ADMDatamart(my_data) #uses the OOTB predictor categorization

>>> dm.apply_predictor_categorization(categorization=pl.when(
>>> pl.col("PredictorName").cast(pl.Utf8).str.contains("Propensity")
>>> ).then(pl.lit("External Model")
>>> ).otherwise(pl.lit("Adaptive Model)")

>>> # Now, every subsequent plot will use the custom categorization
"""
if callable(categorization):
categorization: pl.Expr = categorization()

categorization_expr: pl.Expr = (
categorization() if callable(categorization) else categorization
)


if df is not None:
return df.with_columns(PredictorCategory=categorization)
return df.with_columns(PredictorCategory=categorization_expr)

if hasattr(self, "predictor_data") and self.predictor_data is not None:
self.predictor_data = self.predictor_data.with_columns(
PredictorCategory=categorization
PredictorCategory=categorization_expr
)
if hasattr(self, "combined_data") and self.combined_data is not None:
self.combined_data = self.combined_data.with_columns(
PredictorCategory=categorization
PredictorCategory=categorization_expr
)

def save_data(
Expand Down
63 changes: 57 additions & 6 deletions python/pdstools/adm/Plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ def distribution_graph(df: pl.LazyFrame, title: str):

class Plots(LazyNamespace):
dependencies = ["plotly"]
dependency_group = "adm"

def __init__(self, datamart: "ADMDatamart"):
self.datamart = datamart
Expand Down Expand Up @@ -295,15 +296,15 @@ def over_time(

metric_formatting = {
"SuccessRate_weighted_average": ":.4%",
"Performance_weighted_average": ":.2", # is not a percentage!
"Performance_weighted_average": ":.2", # is not a percentage!
"Positives": ":.d",
"ResponseCount": ":.d",
}

if metric == "Performance":
metric_scaling:pl.Expr = pl.lit(100.0)
metric_scaling: pl.Expr = pl.lit(100.0)
else:
metric_scaling:pl.Expr = pl.lit(1.0)
metric_scaling: pl.Expr = pl.lit(1.0)

if self.datamart.model_data is None:
raise ValueError("Visualisation requires model_data")
Expand Down Expand Up @@ -333,9 +334,10 @@ def over_time(
"SnapshotTime", every=every, group_by=grouping_columns
)
.agg(
(metric_scaling*cdh_utils.weighted_average_polars(
metric, "ResponseCount"
)).name.suffix("_weighted_average")
(
metric_scaling
* cdh_utils.weighted_average_polars(metric, "ResponseCount")
).name.suffix("_weighted_average")
)
.sort("SnapshotTime", by_col)
)
Expand Down Expand Up @@ -660,6 +662,10 @@ def predictor_performance(
Whether to facet the plot into subplots, by default None
return_df : bool, optional
Whether to return a dataframe instead of a plot, by default False

See also
--------
pdstools.adm.ADMDatamart.apply_predictor_categorization : how to override the out of the box predictor categorization
"""

metric = "PredictorPerformance" if metric == "Performance" else metric
Expand Down Expand Up @@ -762,6 +768,31 @@ def predictor_category_performance(
facet: Optional[Union[pl.Expr, str]] = None,
return_df: bool = False,
):
"""Plot the predictor category performance

Parameters
----------
metric : str, optional
The metric to plot, by default "Performance"
active_only : bool, optional
Whether to only analyze active predictors, by default False
query : Optional[QUERY], optional
An optional query to apply, by default None
facet : Optional[Union[pl.Expr, str]], optional
By which columns to facet the result, by default None
return_df : bool, optional
An optional flag to get the dataframe instead, by default False

Returns
-------
px.Figure
A Plotly figure


See also
--------
pdstools.adm.ADMDatamart.apply_predictor_categorization : how to override the out of the box predictor categorization
"""
metric = "PredictorPerformance" if metric == "Performance" else metric

# Determine columns to select and grouping
Expand Down Expand Up @@ -847,6 +878,26 @@ def predictor_contribution(
query: Optional[QUERY] = None,
return_df: bool = False,
):
"""Plots the predictor contribution for each configuration

Parameters
----------
by : str, optional
By which column to plot the contribution, by default "Configuration"
query : Optional[QUERY], optional
An optional query to apply to the data, by default None
return_df : bool, optional
An optional flag to get a Dataframe instead, by default False

Returns
-------
px.Figure
A plotly figure

See also
--------
pdstools.adm.ADMDatamart.apply_predictor_categorization : how to override the out of the box predictor categorization
"""
df = (
cdh_utils._apply_query(
self.datamart.aggregates.last(table="combined_data"),
Expand Down
49 changes: 44 additions & 5 deletions python/pdstools/infinity/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,48 @@
"""
My module docstring

Does this work?
Infinity API client for Pega Decision Management.
"""

from .client import AsyncInfinity, Infinity
from importlib.util import find_spec
from typing import TYPE_CHECKING, List

from ..utils.namespaces import MissingDependenciesException

if TYPE_CHECKING:
from .client import Infinity


class DependencyNotFound:
def __init__(self, dependencies: List[str]):
self.dependencies = dependencies
self.namespace = "the DX API Client"
self.deps_group = "api"

def __repr__(self):
return f"While importing, one or more dependencies were not found: {self.dependencies}"

def __call__(self):
raise MissingDependenciesException(
self.dependencies, namespace=self.namespace, deps_group=self.deps_group
)


def __getattr__(name: str):
"""Lazy import to avoid loading httpx until needed."""
if name == "Infinity":
missing_dependencies: List[str] = []
if not find_spec("pydantic"):
missing_dependencies.append("pydantic")
if not find_spec("httpx"):
missing_dependencies.append("httpx")

if missing_dependencies:
return DependencyNotFound(missing_dependencies)

from .client import Infinity

return Infinity

raise AttributeError(f"module '{__name__}' has no attribute '{name}'")


__all__ = ["Infinity", "AsyncInfinity"]
__all__ = ["Infinity"]
4 changes: 2 additions & 2 deletions python/pdstools/pega_io/API.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
from os import PathLike

import requests


def _read_client_credential_file(credential_file: PathLike): # pragma: no cover
outputdict = {}
Expand Down Expand Up @@ -37,6 +35,8 @@ def get_token(credential_file: PathLike, verify: bool = True): # pragma: no cov
explicitly set verify to False, otherwise Python will yell at you.

"""
import requests

creds = _read_client_credential_file(credential_file)
response = requests.post(
url=creds["Access token endpoint"],
Expand Down
34 changes: 20 additions & 14 deletions python/pdstools/pega_io/File.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from typing import Iterable, List, Literal, Optional, Tuple, Union, overload

import polars as pl
import requests

from ..utils.cdh_utils import from_prpc_date_time

Expand Down Expand Up @@ -94,6 +93,8 @@
logging.debug("Could not find file in directory, checking if URL")

try:
import requests

response = requests.get(f"{path}/{filename}")
logging.info(f"Response: {response}")
if response.status_code == 200:
Expand All @@ -102,6 +103,11 @@
file = BytesIO(urllib.request.urlopen(file).read())
_, extension = os.path.splitext(filename)

except ImportError:
warnings.warn(

Check warning on line 107 in python/pdstools/pega_io/File.py

View check run for this annotation

Codecov / codecov/patch

python/pdstools/pega_io/File.py#L107

Added line #L107 was not covered by tests
"Unable to import `requests`, so not able to check for remote files. If you're trying to read in a file from the internet (or, for instance, using the built-in cdh_sample method), try installing the 'requests' package (`uv pip install requests`)"
)

except Exception as e:
logging.info(e)
if verbose:
Expand Down Expand Up @@ -162,19 +168,19 @@

if extension == ".json":
try:
if isinstance(file, BytesIO):
from pyarrow import json

return pl.LazyFrame(
json.read_json(
file,
)
)
else:
return pl.scan_ndjson(
file,
infer_schema_length=reading_opts.pop("infer_schema_length", 10000),
)
# if isinstance(file, BytesIO):
# from pyarrow import json

# return pl.LazyFrame(
# json.read_json(
# file,
# )
# )
# else:
return pl.scan_ndjson(
file,
infer_schema_length=reading_opts.pop("infer_schema_length", 10000),
)
except Exception: # pragma: no cover
try:
return pl.read_json(file).lazy()
Expand Down
1 change: 0 additions & 1 deletion python/pdstools/prediction/Prediction.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,6 @@ def responsecount_trend(
result.update_layout(yaxis_title="Responses")
return result


class Prediction:
"""Monitor Pega Prediction Studio Predictions"""

Expand Down
1 change: 1 addition & 0 deletions python/pdstools/reports/HealthCheck.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -1920,4 +1920,5 @@ except Exception as e:
# unfortunately no way to get the quarto source file name, so that is hardcoded
report_utils.show_credits("pega-datascientist-tools/python/pdstools/reports/HealthCheck.qmd")


```
Loading