diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 0000000..c53011f --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,19 @@ +# .readthedocs.yaml +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +build: + os: ubuntu-20.04 + tools: + python: "3.8" + +sphinx: + configuration: docs/conf.py + +python: + install: + - requirements: requirements.txt + - requirements: docs/requirements.txt \ No newline at end of file diff --git a/connect/__init__.py b/connect/__init__.py index ee94960..e156aa0 100644 --- a/connect/__init__.py +++ b/connect/__init__.py @@ -1,9 +1,11 @@ """ Credo AI Connect package """ +from connect.adapters import Adapter +from connect.governance import Governance from connect.utils.version_check import validate_version -__version__ = "0.0.4" +__version__ = "0.0.5" __all__ = ["governance", "evidence", "utils"] diff --git a/connect/adapters/adapters.py b/connect/adapters/adapters.py index b4237f9..e610001 100644 --- a/connect/adapters/adapters.py +++ b/connect/adapters/adapters.py @@ -1,10 +1,11 @@ +from functools import partial from typing import Optional import pandas as pd -from connect.evidence import MetricContainer +from connect.evidence import EvidenceContainer, MetricContainer, TableContainer from connect.governance import Governance -from connect.utils import ValidationError +from connect.utils import ValidationError, wrap_list class Adapter: @@ -38,9 +39,10 @@ def __init__( def metrics_to_governance( self, metrics: dict, + source: str, labels: dict = None, metadata: dict = None, - overwrite_governance: bool = False, + overwrite_governance: bool = True, ): """ Packages metrics as evidence and sends them to governance @@ -49,16 +51,93 @@ def metrics_to_governance( --------- metrics : dict or pd.DataFrame Dictionary of metrics. Form: {metric_type: value, ...} + source : str + Label for what generated the metrics labels : dict - Additional labels to pass to underlying evidence + Additional key/value pairs to act as labels for the evidence + metadata : dict + Metadata to pass to underlying evidence + overwrite_governance : bool + When adding evidence to a Governance object, whether to overwrite existing + evidence or not, default False. + """ + self._evidence_to_governance( + self._metrics_to_evidence, + metrics, + source, + labels, + metadata, + overwrite_governance, + ) + + def table_to_governance( + self, + data: dict, + source: str, + labels: dict = None, + metadata: dict = None, + overwrite_governance: bool = True, + ): + """ + Packages metrics as evidence and sends them to governance + + Parameters + --------- + data: pd.DataFrame + Dataframe to pass to evidence_fun. The DataFrame must have a "name" attribute + source : str + Label for what generated the table + labels : dict + Additional key/value pairs to act as labels for the evidence metadata : dict Metadata to pass to underlying evidence overwrite_governance : bool When adding evidence to a Governance object, whether to overwrite existing evidence or not, default False. + evidence_fun : callable + Function to pass data, labels and metadata. The function should return a list of + evidence. Default: self._to_evidence """ - evidence = self._metrics_to_evidence(metrics, labels, metadata) + self._evidence_to_governance( + TableContainer, data, source, labels, metadata, overwrite_governance + ) + def _evidence_to_governance( + self, + evidence_fun, + data, + source, + labels, + metadata, + overwrite_governance=True, + ): + """ + Packages data as evidence and sends to governance + + Parameters + --------- + evidence_fun : callable or Container + Function to pass data, labels and metadata. The function should return a list of + evidence. If a Container, use self._to_evidence with the specified container + data + data to pass to evidence_fun + source : str + Label for what generated the table + labels : dict + Additional key/value pairs to act as labels for the evidence + metadata : dict + Metadata to pass to underlying evidence + overwrite_governance : bool + When adding evidence to a Governance object, whether to overwrite existing + evidence or not, default False. + """ + try: + if issubclass(evidence_fun, EvidenceContainer): + evidence_fun = partial(self._to_evidence, container_class=evidence_fun) + except TypeError: + pass + labels = {**(labels or {}), "source": source} + evidence = evidence_fun(data=data, labels=labels, metadata=metadata) if overwrite_governance: self.governance.set_evidence(evidence) else: @@ -72,12 +151,12 @@ def _get_artifact_meta(self): del model["tags"] return model or {} - def _metrics_to_evidence(self, metrics, labels=None, metadata=None): + def _metrics_to_evidence(self, data, labels=None, metadata=None): """Converts a dictionary of metrics to evidence Parameters ---------- - metrics : dict or pd.DataFrame + data : dict or pd.DataFrame Dictionary of metrics. Form: {metric_type: value, ...} labels : dict Additional labels to pass to underlying evidence @@ -89,11 +168,14 @@ def _metrics_to_evidence(self, metrics, labels=None, metadata=None): List list of Evidence """ + if isinstance(data, dict): + data = pd.DataFrame(data.items(), columns=["type", "value"]) + elif not isinstance(data, pd.DataFrame): + raise ValidationError("Metrics must be a dictionary or a dataframe") + return self._to_evidence(MetricContainer, data, labels, metadata) + + def _to_evidence(self, container_class, data, labels, metadata): meta = self._get_artifact_meta() meta.update(metadata or {}) - if isinstance(metrics, dict): - metrics = pd.DataFrame(metrics.items(), columns=["type", "value"]) - elif not isinstance(metrics, pd.DataFrame): - raise ValidationError("Metrics must be a dictionary or a dataframe") - container = MetricContainer(metrics, labels, meta) - return container.to_evidence() + container = container_class(data, labels, meta) + return wrap_list(container.to_evidence()) diff --git a/connect/governance/credo_api_client.py b/connect/governance/credo_api_client.py index 98dfd26..36c8851 100644 --- a/connect/governance/credo_api_client.py +++ b/connect/governance/credo_api_client.py @@ -9,8 +9,7 @@ from dotenv import dotenv_values from json_api_doc import deserialize, serialize -from connect import __version__ -from connect.utils import global_logger, json_dumps +from connect.utils import get_version, global_logger, json_dumps CREDO_URL = "https://api.credo.ai" @@ -133,7 +132,7 @@ def set_access_token(self, access_token): "accept": "application/vnd.api+json", "content-type": "application/vnd.api+json", "X-Client-Name": "Credo AI Connect", - "X-Client-Version": __version__, + "X-Client-Version": get_version(), } self._session.headers.update(headers) diff --git a/connect/governance/governance.py b/connect/governance/governance.py index ba57c3c..583ecc9 100644 --- a/connect/governance/governance.py +++ b/connect/governance/governance.py @@ -10,9 +10,14 @@ from json_api_doc import deserialize, serialize -from connect import __version__ from connect.evidence import Evidence, EvidenceRequirement -from connect.utils import check_subset, global_logger, json_dumps, wrap_list +from connect.utils import ( + check_subset, + get_version, + global_logger, + json_dumps, + wrap_list, +) from .credo_api import CredoApi from .credo_api_client import CredoApiClient @@ -171,7 +176,7 @@ def get_evidence_requirements(self, tags: dict = None, verbose=False): self._print_evidence(reqs) return reqs - def get_evidence_tags(self): + def get_requirement_tags(self): """Return the unique tags used for all evidence requirements""" return self._unique_tags @@ -318,7 +323,7 @@ def set_evidence(self, evidences: List[Evidence]): def tag_model(self, model): """Interactive utility to tag a model tags from assessment plan""" - tags = self.get_evidence_tags() + tags = self.get_requirement_tags() print(f"Select tag from assessment plan to associated with model:") print("0: No tags") for number, tag in enumerate(tags): @@ -393,7 +398,7 @@ def _file_export(self, filename): f"Saving {len(self._evidences)} evidences to {filename}.. for use_case_id={self._use_case_id} policy_pack_id={self._policy_pack_id} " ) data = self._prepare_export_data() - meta = {"client": "Credo AI Connect", "version": __version__} + meta = {"client": "Credo AI Connect", "version": get_version()} data = json_dumps(serialize(data=data, meta=meta)) with open(filename, "w") as f: f.write(data) diff --git a/connect/utils/__init__.py b/connect/utils/__init__.py index 4310848..fd8bfc5 100644 --- a/connect/utils/__init__.py +++ b/connect/utils/__init__.py @@ -5,3 +5,4 @@ from .common import * from .data_scrubbing import Scrubber from .logging import * +from .version_check import get_version diff --git a/connect/utils/version_check.py b/connect/utils/version_check.py index 05fa1ec..b83b4fa 100644 --- a/connect/utils/version_check.py +++ b/connect/utils/version_check.py @@ -4,8 +4,12 @@ from connect.utils import global_logger +def get_version(): + return connect.__version__ + + def validate_version(): - current_version = connect.__version__ + current_version = get_version() package = "credoai-connect" response = requests.get(f"https://pypi.org/pypi/{package}/json") diff --git a/docs/conf.py b/docs/conf.py index 985b48e..c91bc40 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -63,14 +63,6 @@ nbsphinx_allow_errors = True # Continue through Jupyter errors nbsphinx_execute = "never" # do not execute jupyter notebooks -autodoc_mock_imports = [ - "dotenv", - "json_api_doc", - "numpy", - "pandas", -] - - # -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for diff --git a/docs/index.rst b/docs/index.rst index 955c67d..ccca612 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -4,6 +4,7 @@ Home page Setup + Get Started API reference <_autosummary/connect> diff --git a/docs/notebooks/quickstart.ipynb b/docs/notebooks/quickstart.ipynb new file mode 100644 index 0000000..0b4e4ad --- /dev/null +++ b/docs/notebooks/quickstart.ipynb @@ -0,0 +1,434 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "858e3872-01c7-4dc5-bc70-eb9a821c3466", + "metadata": { + "tags": [] + }, + "source": [ + "\n", + "\n", + "# Credo AI Connect\n", + "\n", + "**Credo AI Connect** is the library to interact with the Credo AI Governance Platform. It is responsible for receiving \"evidence requirements\" and sending \"evidence\"\n", + "\n", + "In this demo we train a model on some made-up data, and send the results to the [Credo AI Governance Platform](https://www.credo.ai/). For a tool to help you assess your AI systems, see [Credo AI Lens](https://credoai-lens.readthedocs.io/en/stable/setup.html).\n", + "\n", + "**Setup**\n", + "\n", + "Connect installation instruction can be found on [readthedocs](https://credoai-connect.readthedocs.io/en/stable/setup.html)\n", + "\n", + "**Find the code**" + ] + }, + { + "cell_type": "raw", + "id": "e5793e90-e1aa-4e5a-a723-7e67b908aa18", + "metadata": {}, + "source": [ + "\n", + "Click here to download this notebook." + ] + }, + { + "cell_type": "markdown", + "id": "2feee674-2dce-4d74-bfbb-d8e58d84415f", + "metadata": {}, + "source": [ + "## Useful keywords\n", + "\n", + "- **Credo AI Platform**: Also referred to as simply \"Platform\". The central AI governance/reporting Platform, found at [https://app.credo.ai/](https://app.credo.ai/)\n", + "\n", + "- **credoconfig**: configuration file to be copied in the user's home folder\n", + "\n", + "- **use_case_name**: The name of your Use Case as it is registered on Credo AI Platform\n", + "\n", + "- **policy_pack**: A set of governance controls that a Use Case needs to satisfy. A Use Case can have multiple policy packs applied to it.\n", + "\n", + "- **policy_pack_key**: A unique identifier for a policy pack (where do we get this?)\n", + "\n", + "- **assessment_plan_url**: The link to the assessment plan, this is generated in the Platform and used to download the assessment plan in the Governance object. See example below.\n", + "\n", + "- **evidence**: Any evaluation of an AI system, formatted specifically to be uploaded to the platform.\n" + ] + }, + { + "cell_type": "markdown", + "id": "ef6d3ba0-6133-474c-97cf-9be99d7bf950", + "metadata": {}, + "source": [ + "## Setup API Connection with the Platform" + ] + }, + { + "cell_type": "markdown", + "id": "4255288f-0ffa-4050-b62c-ee9dcd7a9dd3", + "metadata": {}, + "source": [ + "### Get a config file\n", + "This file contains all the necessary information to connect Lens to the Credo AI Platform. \n", + "\n", + "To generate the config file, once you logged into the platform, click on your name icon (top left) and follow: \n", + "\n", + "`My Settings -> Tokens -> Plus Sign -> Generate`\n", + "\n", + "Immediately after generating the token, you will be given the possibility to download the config file.\n", + "\n", + "The default location/file name Lens is expecting is `~/.credoconfig`, where `~` is your home folder. You can specify any other location when you are initializing the `Governance` object (see below)." + ] + }, + { + "cell_type": "markdown", + "id": "b592bab4-843a-461d-af9f-5a38ee895ed3", + "metadata": {}, + "source": [ + "## Do some AI stuff!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fd155239-7dc8-4576-91e6-cc09e4463204", + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "from sklearn.linear_model import LinearRegression\n", + "import sklearn.metrics as sk_metrics\n", + "from sklearn.model_selection import train_test_split\n", + "import numpy as np" + ] + }, + { + "cell_type": "markdown", + "id": "33445116-3ad6-48f1-970d-e9e436d9d7ea", + "metadata": {}, + "source": [ + "### Get data and train model\n", + "\n", + "We'll make up some data and fit a simple model to get us started." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "62531aa3-11bd-489d-a3cf-a805b327fe46", + "metadata": {}, + "outputs": [], + "source": [ + "model = LinearRegression()\n", + "r = np.random.RandomState(42)\n", + "\n", + "# hallucinate data\n", + "N = 10000\n", + "data = r.randn(N, 2)\n", + "y = (data[:,0] + 3*data[:,1] + r.randn(N)*.2)\n", + "# split\n", + "X_train, X_test, y_train, y_test = train_test_split(data, y)\n", + "\n", + "# train model\n", + "model.fit(X_train, y_train)\n", + "pred_test = model.predict(X_test)" + ] + }, + { + "cell_type": "markdown", + "id": "69153277-5e82-4c76-9b92-9dba5f254323", + "metadata": {}, + "source": [ + "### Assess the AI System\n", + "\n", + "The most likely thing you'll want to send to the Credo AI Platform are _assessments_ of the AI system. These are often _metrics_ that are calculated to summarize aspects of the system's behavior (e.g., performance, fairness) but can be other things too, like descriptive statistics of the dataset.\n", + "\n", + "We'll assume we have a suite of assessments we plan to use for this use-case. The below is just an example - any assessments can be done and sent to the Platform." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a255a36d-c26b-463e-80e5-699961c789ef", + "metadata": {}, + "outputs": [], + "source": [ + "required_assessments = [sk_metrics.r2_score, sk_metrics.mean_squared_error]\n", + "\n", + "assessments = {m.__name__: m(y_test, pred_test) for m in required_assessments}" + ] + }, + { + "cell_type": "markdown", + "id": "4fb1c630-4f10-460c-97fc-85330360f070", + "metadata": { + "tags": [] + }, + "source": [ + "## Platform integration in 5 Minutes\n", + "\n", + "To send evidence to the platform takes three steps:\n", + "\n", + "1. Connect to the platform via the Governance class\n", + "2. Use the Adapter class to send the assessments to the Governance class\n", + "3. Export the evidence to the platform.\n", + "\n", + "First we will see all the code together, and then break it down." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "47e05093-f918-4e8a-8636-47eac6f0897c", + "metadata": {}, + "outputs": [], + "source": [ + "import connect as ct" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec32fc49-cbf1-4674-836d-91dfc190a416", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# connect to platform via governace\n", + "gov = ct.Governance() # Specify config_path if your config file is not the default one: ~/.credoconfig\n", + "url = 'your assessment url'\n", + "gov.register(assessment_plan_url=url)\n", + "\n", + "# set up adapter and send metrics to governance class\n", + "adapter = ct.Adapter(governance = gov, model_name='My Model')\n", + "source = f\"Quickstart_Connect-{ct.__version__}\"\n", + "adapter.metrics_to_governance(metrics=assessments, source = source)\n", + "\n", + "# export\n", + "gov.export()" + ] + }, + { + "cell_type": "markdown", + "id": "6c69d81a-83d8-4e76-afd9-c16b51f6ff68", + "metadata": {}, + "source": [ + "### 1. Connect to the platform via the Governance Class\n", + "\n", + "The Governance class handles the connection with the Credo AI Platform. On the Platform, you can govern an AI system by specifying \"policy packs\" that specify the technical requirements the AI system must meet. This class can retrieve them, which is most useful if you are using [Credo AI Lens](https://credoai-lens.readthedocs.io/en/stable/setup.html), our assessment framework. If you aren't using Lens, the requirements are still important in directing your assessment, but they aren't _programatically_ connected to your assessments.\n", + "\n", + "Since we are only using `Connect` in this demo, we will ignore that functionality.\n", + "\n", + "The important functionality relevant here is that the Governance object handles the API calls allowing you to _send_ evidence to the platform. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6df1b60f-48d7-4f6c-9c7d-adb3f166f734", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Retrieve Policy Pack Assessment Plan\n", + "gov = ct.Governance() # Specify config_path if your config file is not the default one: ~/.credoconfig\n", + "url = 'your assessment url'\n", + "gov.register(assessment_plan_url=url)" + ] + }, + { + "cell_type": "markdown", + "id": "12a8ff57-099a-4e63-b2bc-6a6da6779ce2", + "metadata": {}, + "source": [ + "### 2. Use the Adapter class to send the assessments to the Governance class\n", + "\n", + "The `Adapter` class handles structuring your assessments so that the Credo AI Platform can understand them. Connect uses `EvidenceContainers` to handle converting python objects like dictionaries into `Evidence`, the structured output that the Platform can understand. The Adapter then passes this `Evidence` to the `Governance` object for export" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4b0fa6b7-2daf-4061-aa37-7e036750c124", + "metadata": {}, + "outputs": [], + "source": [ + "adapter = ct.Adapter(governance = gov, model_name='My Model')\n" + ] + }, + { + "cell_type": "markdown", + "id": "1dfb4be0-eee6-4a9b-8ffa-412b375d5636", + "metadata": {}, + "source": [ + "Once we initialize an adapter, we can use its functionality to send different kinds of evidence.\n", + "\n", + "For instance, metrics (which must be organized as key:value pairs of metric_name:value) are sent to governance using `metrics_to_governance`\n", + "This converts the dictionary of assessments into `Evidence` (in this case a `MetricEvidence`).\n", + "\n", + "Every time we send evidence we specify a source. This is for you to define however you would like. We suggest you make the source useful to establish provenance of your assessments. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "deb31c08-40a2-4341-b795-5e37333edfc4", + "metadata": {}, + "outputs": [], + "source": [ + "source = f\"Quickstart_Connect-{ct.__version__}\"\n", + "adapter.metrics_to_governance(metrics=assessments, source = source)" + ] + }, + { + "cell_type": "markdown", + "id": "b90b05c4-c541-442d-aa41-79ba567167f3", + "metadata": {}, + "source": [ + "You can see the evidence in the `Governance` class we instantiated before" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fd32329f-44d0-400d-ba61-0e090e52152f", + "metadata": {}, + "outputs": [], + "source": [ + "gov.get_evidence()" + ] + }, + { + "cell_type": "markdown", + "id": "8dfab5cf-311b-43d6-a846-b6f4486f51c4", + "metadata": {}, + "source": [ + "If you need to send more evidence, you'll just call a function like `metrics_to_governance` again. By default, this functions overwright the `Evidence` in `Governance` so you'll have to change an argument to allow overwriting.\n", + "\n", + "We will send a different type of evidence - a table, which must be a pandas DataFrame." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a6861fda-a0b4-49b4-8f53-c8f187da0c26", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "table = pd.DataFrame({'arbitrary_data': [3,4,5]})\n", + "table.name = 'my_table'\n", + "adapter.table_to_governance(table, source=source, overwrite_governance=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9da37aa7-5305-4ec9-8c49-a11d32116f16", + "metadata": {}, + "outputs": [], + "source": [ + "gov.get_evidence()" + ] + }, + { + "cell_type": "markdown", + "id": "6e4364e8-d5da-436e-a0b3-e38a1b283f11", + "metadata": {}, + "source": [ + "### 3. Export the evidence to the platform.\n", + "\n", + "Exporting is straight forward! You can either export directly to the Platform or to a file. At the time of export, the uploaded evidence will be checked against the governance requirements specified on Platform and let you know what's missing." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bddb93de-6139-4f22-9df0-b97019157541", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# export to API\n", + "gov.export()\n", + "\n", + "# export to file\n", + "gov.export(\"assessment_export.json\")" + ] + }, + { + "cell_type": "markdown", + "id": "d6c71569-6152-42d6-aadc-da60a25eca52", + "metadata": { + "tags": [] + }, + "source": [ + "## Other ways to label your evidence" + ] + }, + { + "cell_type": "markdown", + "id": "21ea5e8a-2cb1-41ec-aafb-a10299774725", + "metadata": {}, + "source": [ + "On the Credo AI Platform, certain governance requirements may only apply to models that are tagged in a certain way. You can get the requirements tags from governance and apply it by passing a dictionary to the `model_tags` of the `Adapter`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fd269b76-3fb7-4288-8b0a-bf16772b5b6a", + "metadata": {}, + "outputs": [], + "source": [ + "gov.get_requirement_tags()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "861cdace-f849-4248-bdd7-cd5a86888902", + "metadata": {}, + "outputs": [], + "source": [ + "adapter = ct.Adapter(governance = gov, model_name='My Model', model_tags={'model_type': 'regression'})" + ] + }, + { + "cell_type": "markdown", + "id": "ee605487-1de4-4fe7-8df2-cb8947a260cd", + "metadata": {}, + "source": [ + "If you need to label your evidence with more information than just `source` you can do that too." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + }, + "vscode": { + "interpreter": { + "hash": "476253e59e65943f63347f07b9cde9e6998ced5a0f745ea85ba02156ab78e294" + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/setup.rst b/docs/setup.rst index 59473fe..b2f0496 100644 --- a/docs/setup.rst +++ b/docs/setup.rst @@ -27,4 +27,6 @@ would like to install requirements for testing and formatting, you'll have to install the dev requirements. :: + pip install credoai-connect[dev] +