From 5c928ccd8fcbdafa82573549c147b804d5670d3d Mon Sep 17 00:00:00 2001 From: Jack Gerrits Date: Mon, 10 Jul 2023 23:02:42 -0400 Subject: [PATCH] docs: add getting started for python (#51) --- reductionml-python/docs/environment.yml | 3 + reductionml-python/docs/source/conf.py | 1 + .../docs/source/getting_started.ipynb | 154 ++++++++++++++++++ reductionml-python/docs/source/index.rst | 13 +- reductionml-python/docs/source/reference.rst | 6 + 5 files changed, 174 insertions(+), 3 deletions(-) create mode 100644 reductionml-python/docs/source/getting_started.ipynb create mode 100644 reductionml-python/docs/source/reference.rst diff --git a/reductionml-python/docs/environment.yml b/reductionml-python/docs/environment.yml index 1ce363a..4198050 100644 --- a/reductionml-python/docs/environment.yml +++ b/reductionml-python/docs/environment.yml @@ -5,3 +5,6 @@ dependencies: - python=3.10 - pip: - furo + - myst-nb + - scikit-learn + - matplotlib diff --git a/reductionml-python/docs/source/conf.py b/reductionml-python/docs/source/conf.py index be06922..faa2a99 100644 --- a/reductionml-python/docs/source/conf.py +++ b/reductionml-python/docs/source/conf.py @@ -34,6 +34,7 @@ "sphinx.ext.autodoc", "sphinx.ext.autosummary", "sphinx.ext.napoleon", + "myst_nb" ] # Add any paths that contain templates here, relative to this directory. diff --git a/reductionml-python/docs/source/getting_started.ipynb b/reductionml-python/docs/source/getting_started.ipynb new file mode 100644 index 0000000..022242b --- /dev/null +++ b/reductionml-python/docs/source/getting_started.ipynb @@ -0,0 +1,154 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Getting started\n", + "\n", + "Before we get started, ensure you have the required packages installed:\n", + "\n", + "```sh\n", + "pip install reductionml scikit-learn\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [], + "source": [ + "from reductionml import Workspace, FormatType" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, we need to define the configuration we are going to use. You can think of this as the definition of the structure of the reduction stack, including all things that will affect learning and prediction. Things such as hyper parameters.\n", + "\n", + "We will create an instance of [`Coin`](https://jackgerrits.com/reductionml/book/coin.html) as the only reduction in our stack which provides solves for linear regression problems." + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [], + "source": [ + "config = {\n", + " \"entryReduction\": {\n", + " \"config\": {},\n", + " \"typename\": \"Coin\"\n", + " }, \n", + " \"globalConfig\": {\n", + " \"interactions\": [\n", + " [\"Default\", \"Default\"]\n", + " ]\n", + " }\n", + "}\n", + "\n", + "workspace = Workspace.create_from_config(config)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We're going to use the diabetes dataset from sklearn and plot the mean squared error after each example. We're using the [JSON](https://jackgerrits.github.io/reductionml/book/input_formats.html#json-format) format to provide the features to ReductionML.\n", + "\n", + "We use `predict_then_learn` to predict the output of the model before learning and keep track of the progressive loss.s" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import sklearn.datasets\n", + "import sklearn.model_selection\n", + "import matplotlib.pyplot as plt\n", + "\n", + "\n", + "def format_input(features, label):\n", + " return {\n", + " \"label\": {\n", + " \"value\": label,\n", + " \"weight\": 1.0\n", + " },\n", + " \"features\": {\n", + " \":default\": features.tolist()\n", + " }\n", + " }\n", + "\n", + "dataset = sklearn.datasets.load_diabetes()\n", + "\n", + "parser = workspace.create_parser(FormatType.Json)\n", + "\n", + "mse_error = 0\n", + "count = 0\n", + "mse_errors = []\n", + "for (input_features, input_label) in zip(dataset.data, dataset.target):\n", + " features, label = parser.parse(format_input(input_features, input_label))\n", + " prediction = workspace.predict_then_learn(features, label)\n", + " mse_error += (label.value - prediction.prediction) ** 2\n", + " count += 1\n", + " mse_errors.append(mse_error / count)\n", + "\n", + "plt.plot(mse_errors)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.15" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/reductionml-python/docs/source/index.rst b/reductionml-python/docs/source/index.rst index 9ce6fe1..fd10c3b 100644 --- a/reductionml-python/docs/source/index.rst +++ b/reductionml-python/docs/source/index.rst @@ -1,7 +1,14 @@ reductionml =========== -.. automodule:: reductionml - :members: - :undoc-members: +Install from PyPi: +.. code-block:: bash + + pip install reductionml + +.. toctree:: + :hidden: + + getting_started + reference diff --git a/reductionml-python/docs/source/reference.rst b/reductionml-python/docs/source/reference.rst new file mode 100644 index 0000000..383325f --- /dev/null +++ b/reductionml-python/docs/source/reference.rst @@ -0,0 +1,6 @@ +Reference +========= + +.. automodule:: reductionml + :members: + :undoc-members: