From 51a2831cf4941894f800de472dda34fafe3a77c5 Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Mon, 30 Oct 2023 22:59:48 +0100 Subject: [PATCH 01/22] Remove scrapbook and papermill deps Signed-off-by: miguelgfierro --- setup.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/setup.py b/setup.py index 5aacc47a3..3ce8b5d4b 100644 --- a/setup.py +++ b/setup.py @@ -48,12 +48,10 @@ "retrying>=1.3.3", "pandera[strategies]>=0.6.5", # For generating fake datasets "scikit-surprise>=1.0.6", - "scrapbook>=0.5.0,<1.0.0", "hyperopt>=0.1.2,<1", "ipykernel>=4.6.1,<7", "jupyter>=1,<2", "locust>=1,<2", - "papermill>=2.1.2,<3", ] # shared dependencies From 964bca0582b491fa4fe73f385cfee9c4a610d1f6 Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Mon, 30 Oct 2023 23:01:57 +0100 Subject: [PATCH 02/22] notebook utils programmatic execution Signed-off-by: miguelgfierro --- recommenders/utils/notebook_utils.py | 114 +++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) diff --git a/recommenders/utils/notebook_utils.py b/recommenders/utils/notebook_utils.py index c47154d60..723a38231 100644 --- a/recommenders/utils/notebook_utils.py +++ b/recommenders/utils/notebook_utils.py @@ -2,6 +2,10 @@ # Licensed under the MIT License. import os +import re +import nbformat +from nbconvert.preprocessors import ExecutePreprocessor +from IPython.display import display def is_jupyter(): @@ -35,3 +39,113 @@ def is_databricks(): return False except NameError: return False + + +def execute_notebook( + input_notebook, output_notebook, parameters, kernel_name="python3", timeout=600 +): + """Execute a notebook while passing parameters to it. + + .. note:: + + Ensure your Jupyter Notebook is set up with parameters that can be + modified and read. Use Markdown cells to specify parameters that need + modification and code cells to set parameters that need to be read. + + Args: + input_notebook (str): Path to the input notebook. + output_notebook (str): Path to the output notebook + parameters (dict): Dictionary of parameters to pass to the notebook. + kernel_name (str): Kernel name. + timeout (int): Timeout (in seconds) for each cell to execute. + """ + + # Load the Jupyter Notebook + with open(input_notebook, "r") as notebook_file: + notebook_content = nbformat.read(notebook_file, as_version=4) + + # Search for and replace parameter values in code cells + for cell in notebook_content.cells: + if ( + "tags" in cell.metadata + and "parameters" in cell.metadata["tags"] + and cell.cell_type == "code" + ): + cell_source = cell.source + modified_cell_source = ( + cell_source # Initialize a variable to hold the modified source + ) + for param, new_value in parameters.items(): + # Check if the new value is a string and surround it with quotes if necessary + if isinstance(new_value, str): + new_value = f'"{new_value}"' + # Define a regular expression pattern to match parameter assignments and ignore comments + pattern = re.compile( + rf"\b{param}\s*=\s*([^#\n]+)(?:#.*$)?", re.MULTILINE + ) + matches = re.findall(pattern, cell_source) + for match in matches: + old_assignment = match.strip() + modified_cell_source = modified_cell_source.replace( + old_assignment, f"{new_value}" + ) + # Update the cell's source within notebook_content + cell.source = modified_cell_source + + # Create an execution preprocessor + execute_preprocessor = ExecutePreprocessor(timeout=timeout, kernel_name=kernel_name) + + # Execute the notebook + executed_notebook, _ = execute_preprocessor.preprocess( + notebook_content, {"metadata": {"path": "./"}} + ) + + # Save the executed notebook + with open(output_notebook, "w", encoding="utf-8") as executed_notebook_file: + nbformat.write(executed_notebook, executed_notebook_file) + + +def store_metadata(name, value): + """Store data in the notebook's output source code. + This function is similar to snapbook.glue(). + + Args: + name (str): Name of the data. + value (int,float,str): Value of the data. + """ + + metadata = {"notebook_utils": {"name": name, "data": True, "display": False}} + data_json = { + "application/notebook_utils.json+json": { + "name": name, + "data": value, + "encoder": "json", + } + } + display(data_json, metadata=metadata, raw=True) + + +def read_notebook(path): + """Read the metadata stored in the notebook's output source code. + This function is similar to snapbook.read_notebook(). + + Args: + path (str): Path to the notebook. + + Returns: + dict: Dictionary of data stored in the notebook. + """ + # Load the Jupyter Notebook + with open(path, "r") as notebook_file: + notebook_content = nbformat.read(notebook_file, as_version=4) + + # Search for and replace parameter values in code cells + results = {} + for cell in notebook_content.cells: + if cell.cell_type == "code" and "outputs" in cell: + for outputs in cell.outputs: + if "metadata" in outputs and "notebook_utils" in outputs.metadata: + name = outputs.data["application/notebook_utils.json+json"]["name"] + data = outputs.data["application/notebook_utils.json+json"]["data"] + results[name] = data + return results From ab63e1c8160cb06a373c36fe498021339d333e63 Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Tue, 31 Oct 2023 06:58:20 +0100 Subject: [PATCH 03/22] Test notebook programmatic Signed-off-by: miguelgfierro --- .../recommenders/utils/test_notebook_utils.py | 104 +++++++++++++++++- 1 file changed, 99 insertions(+), 5 deletions(-) diff --git a/tests/unit/recommenders/utils/test_notebook_utils.py b/tests/unit/recommenders/utils/test_notebook_utils.py index 24223b703..755e09267 100644 --- a/tests/unit/recommenders/utils/test_notebook_utils.py +++ b/tests/unit/recommenders/utils/test_notebook_utils.py @@ -1,25 +1,43 @@ # Copyright (c) Recommenders contributors. # Licensed under the MIT License. - +import nbclient import pytest import papermill as pm import scrapbook as sb from pathlib import Path -from recommenders.utils.notebook_utils import is_jupyter, is_databricks +from recommenders.utils.notebook_utils import ( + is_jupyter, + is_databricks, + execute_notebook, + read_notebook, +) + + +@pytest.fixture(scope="function") +def notebook_types(): + return Path(__file__).absolute().parent.joinpath("test_notebook_utils.ipynb") + + +@pytest.fixture(scope="function") +def notebook_programmatic(): + return ( + Path(__file__) + .absolute() + .parent.joinpath("programmatic_notebook_execution.ipynb") + ) @pytest.mark.notebooks -def test_is_jupyter(output_notebook, kernel_name): +def test_is_jupyter(notebook_types, output_notebook, kernel_name): # Test on the terminal assert is_jupyter() is False assert is_databricks() is False # Test on Jupyter notebook - path = Path(__file__).absolute().parent.joinpath("test_notebook_utils.ipynb") pm.execute_notebook( - path, + notebook_types, output_notebook, kernel_name=kernel_name, ) @@ -36,3 +54,79 @@ def test_is_jupyter(output_notebook, kernel_name): @pytest.mark.skip(reason="TODO: Implement this") def test_is_databricks(): pass + + +def test_notebook_execution_int(notebook_programmatic, output_notebook, kernel_name): + execute_notebook( + notebook_programmatic, + output_notebook, + kernel_name=kernel_name, + parameters=dict(a=6), + ) + + results = read_notebook(output_notebook) + assert results["response1"] == 8 + + +def test_notebook_execution_float(notebook_programmatic, output_notebook, kernel_name): + execute_notebook( + notebook_programmatic, + output_notebook, + kernel_name=kernel_name, + parameters=dict(a=1.5), + ) + + results = read_notebook(output_notebook) + assert results["response1"] == 3.5 + + +def test_notebook_execution_letter(notebook_programmatic, output_notebook, kernel_name): + execute_notebook( + notebook_programmatic, + output_notebook, + kernel_name=kernel_name, + parameters=dict(b="M"), + ) + + results = read_notebook(output_notebook) + assert results["response2"] is True + + +def test_notebook_execution_other_letter( + notebook_programmatic, output_notebook, kernel_name +): + execute_notebook( + notebook_programmatic, + output_notebook, + kernel_name=kernel_name, + parameters=dict(b="A"), + ) + + results = read_notebook(output_notebook) + assert results["response2"] == "A" + + +def test_notebook_execution_value_error_fails( + notebook_programmatic, output_notebook, kernel_name +): + with pytest.raises(nbclient.exceptions.CellExecutionError): + execute_notebook( + notebook_programmatic, + output_notebook, + kernel_name=kernel_name, + parameters=dict(b=1), + ) + + +def test_notebook_execution_int_with_comment( + notebook_programmatic, output_notebook, kernel_name +): + execute_notebook( + notebook_programmatic, + output_notebook, + kernel_name=kernel_name, + parameters=dict(c=10), + ) + + results = read_notebook(output_notebook) + assert results["response3"] == 12 From 1ac30230b72841d45e197a00c71874ec7f4d18e1 Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Tue, 31 Oct 2023 07:02:39 +0100 Subject: [PATCH 04/22] Added test notebook for utils Signed-off-by: miguelgfierro --- .../programmatic_notebook_execution.ipynb | 173 ++++++++++++++++++ .../utils/test_notebook_utils.ipynb | 40 ++-- 2 files changed, 196 insertions(+), 17 deletions(-) create mode 100644 tests/unit/recommenders/utils/programmatic_notebook_execution.ipynb diff --git a/tests/unit/recommenders/utils/programmatic_notebook_execution.ipynb b/tests/unit/recommenders/utils/programmatic_notebook_execution.ipynb new file mode 100644 index 000000000..739709f9d --- /dev/null +++ b/tests/unit/recommenders/utils/programmatic_notebook_execution.ipynb @@ -0,0 +1,173 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "7123b694", + "metadata": {}, + "source": [ + "Copyright (c) Recommenders contributors.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "id": "17b046f1", + "metadata": {}, + "source": [ + "# Programmatic execution of Jupyter notebooks\n", + "\n", + "This is a Jupyter notebook that can be paramtrized to be able to execute it externally. Also, we provide utilities to extract the outputs computed by the notebook.\n", + "\n", + "The main use case for this is to test the notebooks.\n", + "\n", + "**NOTE:**\n", + "Make sure you parametrize the cell where you want to inject parameters. For doing it, go to View, Cell toolbar, Tags. In the text box, add `parameters`" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "494d7493", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3.8.13 (default, Mar 28 2022, 11:38:47) \n", + "[GCC 7.5.0]\n" + ] + } + ], + "source": [ + "import os\n", + "import sys\n", + "\n", + "from recommenders.utils.notebook_utils import store_metadata\n", + "\n", + "print(sys.version)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "298e0205", + "metadata": { + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "# Parameters\n", + "a = 1\n", + "b = \"M\"\n", + "c = 5 # This is a comment" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "5c802f7c", + "metadata": {}, + "outputs": [], + "source": [ + "def plus2(num):\n", + " return num + 2\n", + "\n", + "def is_letter(letter):\n", + " if letter == \"M\":\n", + " return True\n", + " elif letter != \"M\" and isinstance(letter, str):\n", + " return letter\n", + " else:\n", + " raise ValueError()\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "7f4af7c2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3\n" + ] + } + ], + "source": [ + "response1 = plus2(a)\n", + "print(response1)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "004ba65c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "True\n" + ] + } + ], + "source": [ + "response2 = is_letter(b)\n", + "print(response2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8ed0e55a", + "metadata": {}, + "outputs": [], + "source": [ + "response3 = plus2(c)\n", + "print(response3)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0de1951d", + "metadata": {}, + "outputs": [], + "source": [ + "store_metadata(\"response1\", response1)\n", + "store_metadata(\"response2\", response2)\n", + "store_metadata(\"response3\", response3)" + ] + } + ], + "metadata": { + "celltoolbar": "Tags", + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tests/unit/recommenders/utils/test_notebook_utils.ipynb b/tests/unit/recommenders/utils/test_notebook_utils.ipynb index 0d692267e..e9d0d72e4 100644 --- a/tests/unit/recommenders/utils/test_notebook_utils.ipynb +++ b/tests/unit/recommenders/utils/test_notebook_utils.ipynb @@ -2,9 +2,16 @@ "cells": [ { "cell_type": "markdown", - "metadata": { - "collapsed": true - }, + "metadata": {}, + "source": [ + "Copyright (c) Recommenders contributors.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, "source": [ "# This is a test notebook for recommenders.utils.notebook_utils module" ] @@ -17,7 +24,6 @@ "source": [ "# set the environment path to find Recommenders\n", "import sys\n", - "\n", "import scrapbook as sb\n", "from recommenders.utils.notebook_utils import is_jupyter, is_databricks" @@ -29,40 +35,40 @@ "metadata": {}, "outputs": [ { - "output_type": "display_data", "data": { "application/scrapbook.scrap.json+json": { - "name": "is_jupyter", "data": true, "encoder": "json", + "name": "is_jupyter", "version": 1 } }, "metadata": { "scrapbook": { - "name": "is_jupyter", "data": true, - "display": false + "display": false, + "name": "is_jupyter" } - } + }, + "output_type": "display_data" }, { - "output_type": "display_data", "data": { "application/scrapbook.scrap.json+json": { - "name": "is_databricks", "data": false, "encoder": "json", + "name": "is_databricks", "version": 1 } }, "metadata": { "scrapbook": { - "name": "is_databricks", "data": true, - "display": false + "display": false, + "name": "is_databricks" } - } + }, + "output_type": "display_data" } ], "source": [ @@ -81,13 +87,13 @@ "metadata": { "celltoolbar": "Tags", "kernelspec": { - "name": "python3", "display_name": "Python 3.6.12 64-bit ('sb_full': conda)", "metadata": { "interpreter": { "hash": "f28711ae1fad89778b64817fc2d746effb845deda73edae96b2473c20b2d4f70" } - } + }, + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -104,4 +110,4 @@ }, "nbformat": 4, "nbformat_minor": 1 -} \ No newline at end of file +} From 6a03c423b8d5cfc7b35c735f6375f31d23430b88 Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Tue, 31 Oct 2023 08:00:05 +0100 Subject: [PATCH 05/22] data notebooks Signed-off-by: miguelgfierro --- examples/01_prepare_data/mind_utils.ipynb | 53 +++++++++++++------ .../wikidata_knowledge_graph.ipynb | 9 ++-- tests/data_validation/examples/test_mind.py | 28 +++++----- .../data_validation/examples/test_wikidata.py | 16 +++--- 4 files changed, 61 insertions(+), 45 deletions(-) diff --git a/examples/01_prepare_data/mind_utils.ipynb b/examples/01_prepare_data/mind_utils.ipynb index 77fc03759..e03a3683d 100644 --- a/examples/01_prepare_data/mind_utils.ipynb +++ b/examples/01_prepare_data/mind_utils.ipynb @@ -13,19 +13,20 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Mind Utils Generation\n", + "# MIND Utils Generation\n", "\n", - "Many news recommendation methods ultilize word embeddings, news vertical embeddings, news subvertical embeddings and user id embedding. Therefore, it is necessary to generate a word dictionary, a vertical dictionary, a subvertical dictionary and a userid dictionary to convert words, news verticals, subvericals and user ids from strings to indexes. To use the pretrain word embedding, a embedding matrix is generated as the intial weight of the word embedding layer.\n", + "MIND dataset\\[1\\] is a large-scale English news dataset. It was collected from anonymized behavior logs of Microsoft News website. MIND contains 1,000,000 users, 161,013 news articles and 15,777,377 impression logs. Every news article contains rich textual content including title, abstract, body, category and entities. Each impression log contains the click events, non-clicked events and historical news click behaviors of this user before this impression.\n", "\n", + "Many news recommendation methods use word embeddings, news vertical embeddings, news subvertical embeddings and user id embedding. Therefore, it is necessary to generate a word dictionary, a vertical dictionary, a subvertical dictionary and a `userid` dictionary to convert words, news verticals, subverticals and user ids from strings to indexes. To use the pretrain word embedding, an embedding matrix is generated as the initial weight of the word embedding layer.\n", "\n", - "This notebook gives examples about how to generate\n", - "* word_dict.pkl: convert the words in news titles into indexes.\n", - "* word_dict_all.pkl: convert the words in news titles and abstracts into indexes.\n", - "* embedding.npy: pretrained word embedding matrix of words in word_dict.pkl\n", - "* embedding_all.npy: pretrained embedding matrix of words in word_dict_all.pkl\n", - "* vert_dict.pkl: convert news verticals into indexes.\n", - "* subvert_dict.pkl: convert news subverticals into indexes.\n", - "* uid2index.pkl: convert user ids into indexes." + "This notebook gives examples about how to generate:\n", + "* `word_dict.pkl`: convert the words in news titles into indexes.\n", + "* `word_dict_all.pkl`: convert the words in news titles and abstracts into indexes.\n", + "* `embedding.npy`: pretrained word embedding matrix of words in word_dict.pkl\n", + "* `embedding_all.npy`: pretrained embedding matrix of words in word_dict_all.pkl\n", + "* `vert_dict.pkl`: convert news verticals into indexes.\n", + "* `subvert_dict.pkl`: convert news subverticals into indexes.\n", + "* `uid2index.pkl`: convert user ids into indexes." ] }, { @@ -49,9 +50,9 @@ "import pandas as pd\n", "from tqdm import tqdm\n", "import pickle\n", - "import scrapbook as sb\n", "from collections import Counter\n", "from tempfile import TemporaryDirectory\n", + "\n", "from recommenders.datasets.mind import (download_mind,\n", " extract_mind,\n", " download_and_extract_glove,\n", @@ -59,6 +60,7 @@ " word_tokenize\n", " )\n", "from recommenders.datasets.download_utils import unzip_file\n", + "from recommenders.utils.notebook_utils import store_metadata\n", "\n", "print(\"System version: {}\".format(sys.version))\n" ] @@ -66,10 +68,15 @@ { "cell_type": "code", "execution_count": 2, - "metadata": {}, + "metadata": { + "tags": [ + "parameters" + ] + }, "outputs": [], "source": [ - "mind_type=\"demo\"\n", + "# MIND sizes: \"demo\", \"small\" or \"large\"\n", + "mind_type=\"demo\" \n", "# word_embedding_dim should be in [50, 100, 200, 300]\n", "word_embedding_dim = 300" ] @@ -465,7 +472,14 @@ } ], "source": [ - "sb.glue(\"utils_state\", utils_state)" + "# Record results for tests - ignore this cell\n", + "store_metadata(\"vert_num\", len(vert_dict))\n", + "store_metadata(\"subvert_num\", len(subvert_dict))\n", + "store_metadata(\"word_num\", len(word_dict))\n", + "store_metadata(\"word_num_all\", len(word_dict_all))\n", + "store_metadata(\"embedding_exist_num\", len(exist_word))\n", + "store_metadata(\"embedding_exist_num_all\", len(exist_all_word))\n", + "store_metadata(\"uid2index\", len(uid2index))" ] }, { @@ -476,6 +490,15 @@ "source": [ "tmpdir.cleanup()" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## References\n", + "\n", + "\\[1\\] Wu, Fangzhao, et al. \"MIND: A Large-scale Dataset for News Recommendation\" Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics. https://msnews.github.io/competition.html
" + ] } ], "metadata": { @@ -500,4 +523,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/examples/01_prepare_data/wikidata_knowledge_graph.ipynb b/examples/01_prepare_data/wikidata_knowledge_graph.ipynb index 067f917c4..7d8c36490 100644 --- a/examples/01_prepare_data/wikidata_knowledge_graph.ipynb +++ b/examples/01_prepare_data/wikidata_knowledge_graph.ipynb @@ -32,14 +32,12 @@ "source": [ "# Set logging error\n", "import logging\n", - "level = logging.ERROR\n", "logger = logging.getLogger()\n", - "logger.setLevel(level)\n", + "logger.setLevel(logging.ERROR)\n", "for handler in logger.handlers:\n", " handler.setLevel(level)\n", "\n", "import sys\n", - "import scrapbook as sb\n", "import pandas as pd\n", "import networkx as nx\n", "import matplotlib.pyplot as plt\n", @@ -50,6 +48,7 @@ " query_entity_links, \n", " read_linked_entities,\n", " query_entity_description)\n", + "from recommenders.utils.notebook_utils import store_metadata\n", "\n", "print(f\"System version: {sys.version}\")" ] @@ -582,8 +581,8 @@ } ], "source": [ - "# Record results with papermill for unit-tests\n", - "sb.glue(\"length_result\", number_movies)" + "# Record results for tests - ignore this cell\n", + "store_metadata(\"length_result\", number_movies)" ] } ], diff --git a/tests/data_validation/examples/test_mind.py b/tests/data_validation/examples/test_mind.py index e03162bf9..632c01c82 100644 --- a/tests/data_validation/examples/test_mind.py +++ b/tests/data_validation/examples/test_mind.py @@ -1,14 +1,12 @@ # Copyright (c) Recommenders contributors. # Licensed under the MIT License. -import pytest -import papermill as pm -import scrapbook as sb +from recommenders.utils.notebook_utils import execute_notebook, read_notebook def test_mind_utils_runs(notebooks, output_notebook, kernel_name, tmp): notebook_path = notebooks["mind_utils"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, @@ -18,20 +16,18 @@ def test_mind_utils_runs(notebooks, output_notebook, kernel_name, tmp): def test_mind_utils_values(notebooks, output_notebook, kernel_name, tmp): notebook_path = notebooks["mind_utils"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, - parameters=dict(mind_type="small", word_embedding_dim=300), + parameters=dict(mind_type="demo", word_embedding_dim=300), ) - results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ - "data" - ] + results = read_notebook(output_notebook) - assert results["utils_state"]["vert_num"] == 17 - assert results["utils_state"]["subvert_num"] == 17 - assert results["utils_state"]["word_num"] == 23404 - assert results["utils_state"]["word_num_all"] == 41074 - assert results["utils_state"]["embedding_exist_num"] == 22408 - assert results["utils_state"]["embedding_exist_num_all"] == 37634 - assert results["utils_state"]["uid2index"] == 5000 + assert results["vert_num"] == 17 + assert results["subvert_num"] == 17 + assert results["word_num"] == 23404 + assert results["word_num_all"] == 41074 + assert results["embedding_exist_num"] == 22408 + assert results["embedding_exist_num_all"] == 37634 + assert results["uid2index"] == 5000 diff --git a/tests/data_validation/examples/test_wikidata.py b/tests/data_validation/examples/test_wikidata.py index cdee1699b..65c676f67 100644 --- a/tests/data_validation/examples/test_wikidata.py +++ b/tests/data_validation/examples/test_wikidata.py @@ -3,16 +3,16 @@ import pytest -import papermill as pm -import scrapbook as sb + +from recommenders.utils.notebook_utils import execute_notebook, read_notebook @pytest.mark.notebooks -@pytest.mark.skip(reason="Wikidata API is unstable") +# @pytest.mark.skip(reason="Wikidata API is unstable") def test_wikidata_runs(notebooks, output_notebook, kernel_name, tmp): notebook_path = notebooks["wikidata_knowledge_graph"] MOVIELENS_SAMPLE_SIZE = 5 - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, @@ -25,10 +25,10 @@ def test_wikidata_runs(notebooks, output_notebook, kernel_name, tmp): @pytest.mark.notebooks -@pytest.mark.skip(reason="Wikidata API is unstable") +# @pytest.mark.skip(reason="Wikidata API is unstable") def test_wikidata_values(notebooks, output_notebook, kernel_name): notebook_path = notebooks["wikidata_knowledge_graph"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, @@ -36,9 +36,7 @@ def test_wikidata_values(notebooks, output_notebook, kernel_name): MOVIELENS_DATA_SIZE="100k", MOVIELENS_SAMPLE=True, MOVIELENS_SAMPLE_SIZE=5 ), ) - results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ - "data" - ] + results = read_notebook(output_notebook) # NOTE: The return number should be always 5, but sometimes we get less because wikidata is unstable assert results["length_result"] >= 1 From e1c2b63727898176e79014765a19e818f1a10176 Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Tue, 31 Oct 2023 10:57:04 +0100 Subject: [PATCH 06/22] Replace papermill and scrapbook for new internal function Signed-off-by: miguelgfierro --- examples/00_quick_start/als_movielens.ipynb | 1632 ++++++++--------- examples/00_quick_start/dkn_MIND.ipynb | 2 +- .../00_quick_start/fastai_movielens.ipynb | 22 +- .../00_quick_start/geoimc_movielens.ipynb | 654 +++---- examples/00_quick_start/ncf_movielens.ipynb | 20 +- examples/00_quick_start/rbm_movielens.ipynb | 10 +- examples/00_quick_start/sar_movielens.ipynb | 12 +- .../00_quick_start/wide_deep_movielens.ipynb | 12 +- examples/00_quick_start/xdeepfm_criteo.ipynb | 4 +- .../baseline_deep_dive.ipynb | 16 +- .../cornac_bivae_deep_dive.ipynb | 8 +- .../cornac_bpr_deep_dive.ipynb | 8 +- .../lightgcn_deep_dive.ipynb | 8 +- .../ncf_deep_dive.ipynb | 16 +- .../sar_deep_dive.ipynb | 8 +- .../surprise_svd_deep_dive.ipynb | 20 +- .../mmlspark_lightgbm_criteo.ipynb | 4 +- .../vowpal_wabbit_deep_dive.ipynb | 20 +- examples/02_model_hybrid/fm_deep_dive.ipynb | 2 +- .../02_model_hybrid/lightfm_deep_dive.ipynb | 8 +- examples/README.md | 2 +- examples/template.ipynb | 2 +- tests/README.md | 8 +- .../functional/examples/test_notebooks_gpu.py | 93 +- .../examples/test_notebooks_pyspark.py | 23 +- .../examples/test_notebooks_python.py | 56 +- tests/smoke/examples/test_notebooks_gpu.py | 63 +- .../smoke/examples/test_notebooks_pyspark.py | 16 +- tests/smoke/examples/test_notebooks_python.py | 40 +- tests/unit/examples/test_notebooks_gpu.py | 16 +- tests/unit/examples/test_notebooks_pyspark.py | 16 +- tests/unit/examples/test_notebooks_python.py | 22 +- .../recommenders/utils/test_notebook_utils.py | 2 +- 33 files changed, 1376 insertions(+), 1469 deletions(-) diff --git a/examples/00_quick_start/als_movielens.ipynb b/examples/00_quick_start/als_movielens.ipynb index 4e1c21a09..dc784f996 100644 --- a/examples/00_quick_start/als_movielens.ipynb +++ b/examples/00_quick_start/als_movielens.ipynb @@ -1,818 +1,818 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Copyright (c) Recommenders contributors.\n", - "\n", - "Licensed under the MIT License." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Running ALS on MovieLens (PySpark)\n", - "\n", - "Matrix factorization by [ALS](https://spark.apache.org/docs/latest/api/python/_modules/pyspark/ml/recommendation.html#ALS) (Alternating Least Squares) is a well known collaborative filtering algorithm.\n", - "\n", - "This notebook provides an example of how to utilize and evaluate ALS PySpark ML (DataFrame-based API) implementation, meant for large-scale distributed datasets. We use a smaller dataset in this example to run ALS efficiently on multiple cores of a [Data Science Virtual Machine](https://azure.microsoft.com/en-gb/services/virtual-machines/data-science-virtual-machines/)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Note**: This notebook requires a PySpark environment to run properly. Please follow the steps in [SETUP.md](../../SETUP.md) to install the PySpark environment." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "System version: 3.8.0 (default, Nov 6 2019, 21:49:08) \n", - "[GCC 7.3.0]\n", - "Spark version: 3.2.0\n" - ] - } - ], - "source": [ - "# set the environment path to find Recommenders\n", - "import sys\n", - "import pyspark\n", - "from pyspark.ml.recommendation import ALS\n", - "import pyspark.sql.functions as F\n", - "from pyspark.sql import SparkSession\n", - "from pyspark.sql.types import StructType, StructField\n", - "from pyspark.sql.types import StringType, FloatType, IntegerType, LongType\n", - "import warnings\n", - "warnings.simplefilter(action='ignore', category=FutureWarning)\n", - "\n", - "from recommenders.utils.timer import Timer\n", - "from recommenders.datasets import movielens\n", - "from recommenders.utils.notebook_utils import is_jupyter\n", - "from recommenders.datasets.spark_splitters import spark_random_split\n", - "from recommenders.evaluation.spark_evaluation import SparkRatingEvaluation, SparkRankingEvaluation\n", - "from recommenders.utils.spark_utils import start_or_get_spark\n", - "\n", - "print(\"System version: {}\".format(sys.version))\n", - "print(\"Spark version: {}\".format(pyspark.__version__))\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Set the default parameters." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "# top k items to recommend\n", - "TOP_K = 10\n", - "\n", - "# Select MovieLens data size: 100k, 1m, 10m, or 20m\n", - "MOVIELENS_DATA_SIZE = '100k'\n", - "\n", - "# Column names for the dataset\n", - "COL_USER = \"UserId\"\n", - "COL_ITEM = \"MovieId\"\n", - "COL_RATING = \"Rating\"\n", - "COL_TIMESTAMP = \"Timestamp\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 0. Set up Spark context\n", - "\n", - "The following settings work well for debugging locally on VM - change when running on a cluster. We set up a giant single executor with many threads and specify memory cap. " - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "# the following settings work well for debugging locally on VM - change when running on a cluster\n", - "# set up a giant single executor with many threads and specify memory cap\n", - "spark = start_or_get_spark(\"ALS PySpark\", memory=\"16g\")\n", - "spark.conf.set(\"spark.sql.analyzer.failAmbiguousSelfJoin\", \"false\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 1. Download the MovieLens dataset" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.81k/4.81k [00:05<00:00, 882KB/s]\n", - " \r" - ] + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Recommenders contributors.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Running ALS on MovieLens (PySpark)\n", + "\n", + "Matrix factorization by [ALS](https://spark.apache.org/docs/latest/api/python/_modules/pyspark/ml/recommendation.html#ALS) (Alternating Least Squares) is a well known collaborative filtering algorithm.\n", + "\n", + "This notebook provides an example of how to utilize and evaluate ALS PySpark ML (DataFrame-based API) implementation, meant for large-scale distributed datasets. We use a smaller dataset in this example to run ALS efficiently on multiple cores of a [Data Science Virtual Machine](https://azure.microsoft.com/en-gb/services/virtual-machines/data-science-virtual-machines/)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Note**: This notebook requires a PySpark environment to run properly. Please follow the steps in [SETUP.md](../../SETUP.md) to install the PySpark environment." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "System version: 3.8.0 (default, Nov 6 2019, 21:49:08) \n", + "[GCC 7.3.0]\n", + "Spark version: 3.2.0\n" + ] + } + ], + "source": [ + "# set the environment path to find Recommenders\n", + "import sys\n", + "import pyspark\n", + "from pyspark.ml.recommendation import ALS\n", + "import pyspark.sql.functions as F\n", + "from pyspark.sql import SparkSession\n", + "from pyspark.sql.types import StructType, StructField\n", + "from pyspark.sql.types import StringType, FloatType, IntegerType, LongType\n", + "import warnings\n", + "warnings.simplefilter(action='ignore', category=FutureWarning)\n", + "\n", + "from recommenders.utils.timer import Timer\n", + "from recommenders.datasets import movielens\n", + "from recommenders.utils.notebook_utils import is_jupyter\n", + "from recommenders.datasets.spark_splitters import spark_random_split\n", + "from recommenders.evaluation.spark_evaluation import SparkRatingEvaluation, SparkRankingEvaluation\n", + "from recommenders.utils.spark_utils import start_or_get_spark\n", + "\n", + "print(\"System version: {}\".format(sys.version))\n", + "print(\"Spark version: {}\".format(pyspark.__version__))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Set the default parameters." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "# top k items to recommend\n", + "TOP_K = 10\n", + "\n", + "# Select MovieLens data size: 100k, 1m, 10m, or 20m\n", + "MOVIELENS_DATA_SIZE = '100k'\n", + "\n", + "# Column names for the dataset\n", + "COL_USER = \"UserId\"\n", + "COL_ITEM = \"MovieId\"\n", + "COL_RATING = \"Rating\"\n", + "COL_TIMESTAMP = \"Timestamp\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 0. Set up Spark context\n", + "\n", + "The following settings work well for debugging locally on VM - change when running on a cluster. We set up a giant single executor with many threads and specify memory cap. " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# the following settings work well for debugging locally on VM - change when running on a cluster\n", + "# set up a giant single executor with many threads and specify memory cap\n", + "spark = start_or_get_spark(\"ALS PySpark\", memory=\"16g\")\n", + "spark.conf.set(\"spark.sql.analyzer.failAmbiguousSelfJoin\", \"false\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1. Download the MovieLens dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.81k/4.81k [00:05<00:00, 882KB/s]\n", + " \r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+------+-------+------+---------+\n", + "|UserId|MovieId|Rating|Timestamp|\n", + "+------+-------+------+---------+\n", + "| 196| 242| 3.0|881250949|\n", + "| 186| 302| 3.0|891717742|\n", + "| 22| 377| 1.0|878887116|\n", + "| 244| 51| 2.0|880606923|\n", + "| 166| 346| 1.0|886397596|\n", + "| 298| 474| 4.0|884182806|\n", + "| 115| 265| 2.0|881171488|\n", + "| 253| 465| 5.0|891628467|\n", + "| 305| 451| 3.0|886324817|\n", + "| 6| 86| 3.0|883603013|\n", + "| 62| 257| 2.0|879372434|\n", + "| 286| 1014| 5.0|879781125|\n", + "| 200| 222| 5.0|876042340|\n", + "| 210| 40| 3.0|891035994|\n", + "| 224| 29| 3.0|888104457|\n", + "| 303| 785| 3.0|879485318|\n", + "| 122| 387| 5.0|879270459|\n", + "| 194| 274| 2.0|879539794|\n", + "| 291| 1042| 4.0|874834944|\n", + "| 234| 1184| 2.0|892079237|\n", + "+------+-------+------+---------+\n", + "only showing top 20 rows\n", + "\n" + ] + } + ], + "source": [ + "# Note: The DataFrame-based API for ALS currently only supports integers for user and item ids.\n", + "schema = StructType(\n", + " (\n", + " StructField(COL_USER, IntegerType()),\n", + " StructField(COL_ITEM, IntegerType()),\n", + " StructField(COL_RATING, FloatType()),\n", + " StructField(COL_TIMESTAMP, LongType()),\n", + " )\n", + ")\n", + "\n", + "data = movielens.load_spark_df(spark, size=MOVIELENS_DATA_SIZE, schema=schema)\n", + "data.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2. Split the data using the Spark random splitter provided in utilities" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "N train 75018\n", + "N test 24982\n" + ] + } + ], + "source": [ + "train, test = spark_random_split(data, ratio=0.75, seed=123)\n", + "print (\"N train\", train.cache().count())\n", + "print (\"N test\", test.cache().count())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3. Train the ALS model on the training data, and get the top-k recommendations for our testing data\n", + "\n", + "To predict movie ratings, we use the rating data in the training set as users' explicit feedback. The hyperparameters used in building the model are referenced from [here](http://mymedialite.net/examples/datasets.html). We do not constrain the latent factors (`nonnegative = False`) in order to allow for both positive and negative preferences towards movies.\n", + "Timing will vary depending on the machine being used to train." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "header = {\n", + " \"userCol\": COL_USER,\n", + " \"itemCol\": COL_ITEM,\n", + " \"ratingCol\": COL_RATING,\n", + "}\n", + "\n", + "\n", + "als = ALS(\n", + " rank=10,\n", + " maxIter=15,\n", + " implicitPrefs=False,\n", + " regParam=0.05,\n", + " coldStartStrategy='drop',\n", + " nonnegative=False,\n", + " seed=42,\n", + " **header\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Took 7.5410127229988575 seconds for training.\n" + ] + } + ], + "source": [ + "with Timer() as train_time:\n", + " model = als.fit(train)\n", + "\n", + "print(\"Took {} seconds for training.\".format(train_time.interval))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In the movie recommendation use case, recommending movies that have been rated by the users do not make sense. Therefore, the rated movies are removed from the recommended items.\n", + "\n", + "In order to achieve this, we recommend all movies to all users, and then remove the user-movie pairs that exist in the training dataset." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Stage 126:====================================================>(198 + 2) / 200]\r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Took 25.246142672998758 seconds for prediction.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r\n", + " \r" + ] + } + ], + "source": [ + "with Timer() as test_time:\n", + "\n", + " # Get the cross join of all user-item pairs and score them.\n", + " users = train.select(COL_USER).distinct()\n", + " items = train.select(COL_ITEM).distinct()\n", + " user_item = users.crossJoin(items)\n", + " dfs_pred = model.transform(user_item)\n", + "\n", + " # Remove seen items.\n", + " dfs_pred_exclude_train = dfs_pred.alias(\"pred\").join(\n", + " train.alias(\"train\"),\n", + " (dfs_pred[COL_USER] == train[COL_USER]) & (dfs_pred[COL_ITEM] == train[COL_ITEM]),\n", + " how='outer'\n", + " )\n", + "\n", + " top_all = dfs_pred_exclude_train.filter(dfs_pred_exclude_train[f\"train.{COL_RATING}\"].isNull()) \\\n", + " .select('pred.' + COL_USER, 'pred.' + COL_ITEM, 'pred.' + \"prediction\")\n", + "\n", + " # In Spark, transformations are lazy evaluation\n", + " # Use an action to force execute and measure the test time \n", + " top_all.cache().count()\n", + "\n", + "print(\"Took {} seconds for prediction.\".format(test_time.interval))" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+------+-------+----------+\n", + "|UserId|MovieId|prediction|\n", + "+------+-------+----------+\n", + "| 1| 587| 4.1602826|\n", + "| 1| 869| 2.7732863|\n", + "| 1| 1208| 2.033383|\n", + "| 1| 1348| 1.0019257|\n", + "| 1| 1357| 0.9430026|\n", + "| 1| 1677| 2.8777318|\n", + "| 2| 80| 2.351385|\n", + "| 2| 472| 2.5865319|\n", + "| 2| 582| 3.9548612|\n", + "| 2| 838| 0.9482963|\n", + "| 2| 975| 3.1133535|\n", + "| 2| 1260| 1.9871743|\n", + "| 2| 1325| 1.2368056|\n", + "| 2| 1381| 3.5477588|\n", + "| 2| 1530| 2.08829|\n", + "| 3| 22| 3.1524537|\n", + "| 3| 57| 3.6980162|\n", + "| 3| 89| 3.9733813|\n", + "| 3| 367| 3.6629045|\n", + "| 3| 1091| 0.9144474|\n", + "+------+-------+----------+\n", + "only showing top 20 rows\n", + "\n" + ] + } + ], + "source": [ + "top_all.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4. Evaluate how well ALS performs" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + } + ], + "source": [ + "rank_eval = SparkRankingEvaluation(test, top_all, k = TOP_K, col_user=COL_USER, col_item=COL_ITEM, \n", + " col_rating=COL_RATING, col_prediction=\"prediction\", \n", + " relevancy_method=\"top_k\")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Stage 463:> (0 + 2) / 2]\r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model:\tALS\n", + "Top K:\t10\n", + "MAP:\t0.006527\n", + "NDCG:\t0.051718\n", + "Precision@K:\t0.051274\n", + "Recall@K:\t0.018840\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r\n", + " \r" + ] + } + ], + "source": [ + "print(\"Model:\\tALS\",\n", + " \"Top K:\\t%d\" % rank_eval.k,\n", + " \"MAP:\\t%f\" % rank_eval.map_at_k(),\n", + " \"NDCG:\\t%f\" % rank_eval.ndcg_at_k(),\n", + " \"Precision@K:\\t%f\" % rank_eval.precision_at_k(),\n", + " \"Recall@K:\\t%f\" % rank_eval.recall_at_k(), sep='\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 5. Evaluate rating prediction" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Stage 500:=============================================> (171 + 3) / 200]\r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+------+-------+------+---------+----------+\n", + "|UserId|MovieId|Rating|Timestamp|prediction|\n", + "+------+-------+------+---------+----------+\n", + "| 580| 148| 4.0|884125773| 3.4059548|\n", + "| 406| 148| 3.0|879540276| 2.7134619|\n", + "| 916| 148| 2.0|880843892| 2.2241986|\n", + "| 663| 148| 4.0|889492989| 2.714362|\n", + "| 330| 148| 4.0|876544781| 4.52321|\n", + "| 935| 148| 4.0|884472892| 4.3838587|\n", + "| 308| 148| 3.0|887740788| 2.6169493|\n", + "| 20| 148| 5.0|879668713| 4.3721194|\n", + "| 923| 148| 4.0|880387474| 3.9818575|\n", + "| 455| 148| 3.0|879110346| 3.0764186|\n", + "| 15| 148| 3.0|879456049| 2.9913845|\n", + "| 374| 148| 4.0|880392992| 3.2223384|\n", + "| 880| 148| 2.0|880167030| 2.8111982|\n", + "| 677| 148| 4.0|889399265| 3.8451843|\n", + "| 49| 148| 1.0|888068195| 1.3751594|\n", + "| 244| 148| 2.0|880605071| 2.6781514|\n", + "| 84| 148| 4.0|883452274| 3.6721768|\n", + "| 627| 148| 3.0|879530463| 2.6362069|\n", + "| 434| 148| 3.0|886724797| 3.0973828|\n", + "| 793| 148| 4.0|875104498| 2.2886577|\n", + "+------+-------+------+---------+----------+\n", + "only showing top 20 rows\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r\n", + "[Stage 500:=================================================> (186 + 3) / 200]\r\n", + "\r\n", + " \r" + ] + } + ], + "source": [ + "# Generate predicted ratings.\n", + "prediction = model.transform(test)\n", + "prediction.cache().show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Stage 775:==============================================> (174 + 2) / 200]\r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model:\tALS rating prediction\n", + "RMSE:\t0.967434\n", + "MAE:\t0.753340\n", + "Explained variance:\t0.265916\n", + "R squared:\t0.259532\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r\n", + " \r" + ] + } + ], + "source": [ + "rating_eval = SparkRatingEvaluation(test, prediction, col_user=COL_USER, col_item=COL_ITEM, \n", + " col_rating=COL_RATING, col_prediction=\"prediction\")\n", + "\n", + "print(\"Model:\\tALS rating prediction\",\n", + " \"RMSE:\\t%f\" % rating_eval.rmse(),\n", + " \"MAE:\\t%f\" % rating_eval.mae(),\n", + " \"Explained variance:\\t%f\" % rating_eval.exp_var(),\n", + " \"R squared:\\t%f\" % rating_eval.rsquared(), sep='\\n')" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "application/scrapbook.scrap.json+json": { + "data": 0.006527288768086336, + "encoder": "json", + "name": "map", + "version": 1 + } + }, + "metadata": { + "scrapbook": { + "data": true, + "display": false, + "name": "map" + } + }, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "data": { + "application/scrapbook.scrap.json+json": { + "data": 0.051717802220247217, + "encoder": "json", + "name": "ndcg", + "version": 1 + } + }, + "metadata": { + "scrapbook": { + "data": true, + "display": false, + "name": "ndcg" + } + }, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "data": { + "application/scrapbook.scrap.json+json": { + "data": 0.05127388535031851, + "encoder": "json", + "name": "precision", + "version": 1 + } + }, + "metadata": { + "scrapbook": { + "data": true, + "display": false, + "name": "precision" + } + }, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r\n", + "[Stage 904:> (0 + 2) / 2]\r\n", + "\r\n", + " \r" + ] + }, + { + "data": { + "application/scrapbook.scrap.json+json": { + "data": 0.018840283525491316, + "encoder": "json", + "name": "recall", + "version": 1 + } + }, + "metadata": { + "scrapbook": { + "data": true, + "display": false, + "name": "recall" + } + }, + "output_type": "display_data" + }, + { + "data": { + "application/scrapbook.scrap.json+json": { + "data": 0.9674342234414528, + "encoder": "json", + "name": "rmse", + "version": 1 + } + }, + "metadata": { + "scrapbook": { + "data": true, + "display": false, + "name": "rmse" + } + }, + "output_type": "display_data" + }, + { + "data": { + "application/scrapbook.scrap.json+json": { + "data": 0.7533395161385739, + "encoder": "json", + "name": "mae", + "version": 1 + } + }, + "metadata": { + "scrapbook": { + "data": true, + "display": false, + "name": "mae" + } + }, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "data": { + "application/scrapbook.scrap.json+json": { + "data": 0.2659161968930053, + "encoder": "json", + "name": "exp_var", + "version": 1 + } + }, + "metadata": { + "scrapbook": { + "data": true, + "display": false, + "name": "exp_var" + } + }, + "output_type": "display_data" + }, + { + "data": { + "application/scrapbook.scrap.json+json": { + "data": 0.2595322728476255, + "encoder": "json", + "name": "rsquared", + "version": 1 + } + }, + "metadata": { + "scrapbook": { + "data": true, + "display": false, + "name": "rsquared" + } + }, + "output_type": "display_data" + }, + { + "data": { + "application/scrapbook.scrap.json+json": { + "data": 7.5410127229988575, + "encoder": "json", + "name": "train_time", + "version": 1 + } + }, + "metadata": { + "scrapbook": { + "data": true, + "display": false, + "name": "train_time" + } + }, + "output_type": "display_data" + }, + { + "data": { + "application/scrapbook.scrap.json+json": { + "data": 25.246142672998758, + "encoder": "json", + "name": "test_time", + "version": 1 + } + }, + "metadata": { + "scrapbook": { + "data": true, + "display": false, + "name": "test_time" + } + }, + "output_type": "display_data" + } + ], + "source": [ + "if is_jupyter():\n", + " # Record results with papermill for tests\n", + " import scrapbook as sb\n", + " store_metadata(\"map\", rank_eval.map_at_k())\n", + " store_metadata(\"ndcg\", rank_eval.ndcg_at_k())\n", + " store_metadata(\"precision\", rank_eval.precision_at_k())\n", + " store_metadata(\"recall\", rank_eval.recall_at_k())\n", + " store_metadata(\"rmse\", rating_eval.rmse())\n", + " store_metadata(\"mae\", rating_eval.mae())\n", + " store_metadata(\"exp_var\", rating_eval.exp_var())\n", + " store_metadata(\"rsquared\", rating_eval.rsquared())\n", + " store_metadata(\"train_time\", train_time.interval)\n", + " store_metadata(\"test_time\", test_time.interval)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "# cleanup spark instance\n", + "spark.stop()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python (reco)", + "language": "python", + "name": "reco" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.0" + } }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+------+-------+------+---------+\n", - "|UserId|MovieId|Rating|Timestamp|\n", - "+------+-------+------+---------+\n", - "| 196| 242| 3.0|881250949|\n", - "| 186| 302| 3.0|891717742|\n", - "| 22| 377| 1.0|878887116|\n", - "| 244| 51| 2.0|880606923|\n", - "| 166| 346| 1.0|886397596|\n", - "| 298| 474| 4.0|884182806|\n", - "| 115| 265| 2.0|881171488|\n", - "| 253| 465| 5.0|891628467|\n", - "| 305| 451| 3.0|886324817|\n", - "| 6| 86| 3.0|883603013|\n", - "| 62| 257| 2.0|879372434|\n", - "| 286| 1014| 5.0|879781125|\n", - "| 200| 222| 5.0|876042340|\n", - "| 210| 40| 3.0|891035994|\n", - "| 224| 29| 3.0|888104457|\n", - "| 303| 785| 3.0|879485318|\n", - "| 122| 387| 5.0|879270459|\n", - "| 194| 274| 2.0|879539794|\n", - "| 291| 1042| 4.0|874834944|\n", - "| 234| 1184| 2.0|892079237|\n", - "+------+-------+------+---------+\n", - "only showing top 20 rows\n", - "\n" - ] - } - ], - "source": [ - "# Note: The DataFrame-based API for ALS currently only supports integers for user and item ids.\n", - "schema = StructType(\n", - " (\n", - " StructField(COL_USER, IntegerType()),\n", - " StructField(COL_ITEM, IntegerType()),\n", - " StructField(COL_RATING, FloatType()),\n", - " StructField(COL_TIMESTAMP, LongType()),\n", - " )\n", - ")\n", - "\n", - "data = movielens.load_spark_df(spark, size=MOVIELENS_DATA_SIZE, schema=schema)\n", - "data.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 2. Split the data using the Spark random splitter provided in utilities" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "N train 75018\n", - "N test 24982\n" - ] - } - ], - "source": [ - "train, test = spark_random_split(data, ratio=0.75, seed=123)\n", - "print (\"N train\", train.cache().count())\n", - "print (\"N test\", test.cache().count())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 3. Train the ALS model on the training data, and get the top-k recommendations for our testing data\n", - "\n", - "To predict movie ratings, we use the rating data in the training set as users' explicit feedback. The hyperparameters used in building the model are referenced from [here](http://mymedialite.net/examples/datasets.html). We do not constrain the latent factors (`nonnegative = False`) in order to allow for both positive and negative preferences towards movies.\n", - "Timing will vary depending on the machine being used to train." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "header = {\n", - " \"userCol\": COL_USER,\n", - " \"itemCol\": COL_ITEM,\n", - " \"ratingCol\": COL_RATING,\n", - "}\n", - "\n", - "\n", - "als = ALS(\n", - " rank=10,\n", - " maxIter=15,\n", - " implicitPrefs=False,\n", - " regParam=0.05,\n", - " coldStartStrategy='drop',\n", - " nonnegative=False,\n", - " seed=42,\n", - " **header\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Took 7.5410127229988575 seconds for training.\n" - ] - } - ], - "source": [ - "with Timer() as train_time:\n", - " model = als.fit(train)\n", - "\n", - "print(\"Took {} seconds for training.\".format(train_time.interval))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In the movie recommendation use case, recommending movies that have been rated by the users do not make sense. Therefore, the rated movies are removed from the recommended items.\n", - "\n", - "In order to achieve this, we recommend all movies to all users, and then remove the user-movie pairs that exist in the training dataset." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[Stage 126:====================================================>(198 + 2) / 200]\r" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Took 25.246142672998758 seconds for prediction.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\r\n", - " \r" - ] - } - ], - "source": [ - "with Timer() as test_time:\n", - "\n", - " # Get the cross join of all user-item pairs and score them.\n", - " users = train.select(COL_USER).distinct()\n", - " items = train.select(COL_ITEM).distinct()\n", - " user_item = users.crossJoin(items)\n", - " dfs_pred = model.transform(user_item)\n", - "\n", - " # Remove seen items.\n", - " dfs_pred_exclude_train = dfs_pred.alias(\"pred\").join(\n", - " train.alias(\"train\"),\n", - " (dfs_pred[COL_USER] == train[COL_USER]) & (dfs_pred[COL_ITEM] == train[COL_ITEM]),\n", - " how='outer'\n", - " )\n", - "\n", - " top_all = dfs_pred_exclude_train.filter(dfs_pred_exclude_train[f\"train.{COL_RATING}\"].isNull()) \\\n", - " .select('pred.' + COL_USER, 'pred.' + COL_ITEM, 'pred.' + \"prediction\")\n", - "\n", - " # In Spark, transformations are lazy evaluation\n", - " # Use an action to force execute and measure the test time \n", - " top_all.cache().count()\n", - "\n", - "print(\"Took {} seconds for prediction.\".format(test_time.interval))" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+------+-------+----------+\n", - "|UserId|MovieId|prediction|\n", - "+------+-------+----------+\n", - "| 1| 587| 4.1602826|\n", - "| 1| 869| 2.7732863|\n", - "| 1| 1208| 2.033383|\n", - "| 1| 1348| 1.0019257|\n", - "| 1| 1357| 0.9430026|\n", - "| 1| 1677| 2.8777318|\n", - "| 2| 80| 2.351385|\n", - "| 2| 472| 2.5865319|\n", - "| 2| 582| 3.9548612|\n", - "| 2| 838| 0.9482963|\n", - "| 2| 975| 3.1133535|\n", - "| 2| 1260| 1.9871743|\n", - "| 2| 1325| 1.2368056|\n", - "| 2| 1381| 3.5477588|\n", - "| 2| 1530| 2.08829|\n", - "| 3| 22| 3.1524537|\n", - "| 3| 57| 3.6980162|\n", - "| 3| 89| 3.9733813|\n", - "| 3| 367| 3.6629045|\n", - "| 3| 1091| 0.9144474|\n", - "+------+-------+----------+\n", - "only showing top 20 rows\n", - "\n" - ] - } - ], - "source": [ - "top_all.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 4. Evaluate how well ALS performs" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - " \r" - ] - } - ], - "source": [ - "rank_eval = SparkRankingEvaluation(test, top_all, k = TOP_K, col_user=COL_USER, col_item=COL_ITEM, \n", - " col_rating=COL_RATING, col_prediction=\"prediction\", \n", - " relevancy_method=\"top_k\")" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[Stage 463:> (0 + 2) / 2]\r" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Model:\tALS\n", - "Top K:\t10\n", - "MAP:\t0.006527\n", - "NDCG:\t0.051718\n", - "Precision@K:\t0.051274\n", - "Recall@K:\t0.018840\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\r\n", - " \r" - ] - } - ], - "source": [ - "print(\"Model:\\tALS\",\n", - " \"Top K:\\t%d\" % rank_eval.k,\n", - " \"MAP:\\t%f\" % rank_eval.map_at_k(),\n", - " \"NDCG:\\t%f\" % rank_eval.ndcg_at_k(),\n", - " \"Precision@K:\\t%f\" % rank_eval.precision_at_k(),\n", - " \"Recall@K:\\t%f\" % rank_eval.recall_at_k(), sep='\\n')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 5. Evaluate rating prediction" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[Stage 500:=============================================> (171 + 3) / 200]\r" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+------+-------+------+---------+----------+\n", - "|UserId|MovieId|Rating|Timestamp|prediction|\n", - "+------+-------+------+---------+----------+\n", - "| 580| 148| 4.0|884125773| 3.4059548|\n", - "| 406| 148| 3.0|879540276| 2.7134619|\n", - "| 916| 148| 2.0|880843892| 2.2241986|\n", - "| 663| 148| 4.0|889492989| 2.714362|\n", - "| 330| 148| 4.0|876544781| 4.52321|\n", - "| 935| 148| 4.0|884472892| 4.3838587|\n", - "| 308| 148| 3.0|887740788| 2.6169493|\n", - "| 20| 148| 5.0|879668713| 4.3721194|\n", - "| 923| 148| 4.0|880387474| 3.9818575|\n", - "| 455| 148| 3.0|879110346| 3.0764186|\n", - "| 15| 148| 3.0|879456049| 2.9913845|\n", - "| 374| 148| 4.0|880392992| 3.2223384|\n", - "| 880| 148| 2.0|880167030| 2.8111982|\n", - "| 677| 148| 4.0|889399265| 3.8451843|\n", - "| 49| 148| 1.0|888068195| 1.3751594|\n", - "| 244| 148| 2.0|880605071| 2.6781514|\n", - "| 84| 148| 4.0|883452274| 3.6721768|\n", - "| 627| 148| 3.0|879530463| 2.6362069|\n", - "| 434| 148| 3.0|886724797| 3.0973828|\n", - "| 793| 148| 4.0|875104498| 2.2886577|\n", - "+------+-------+------+---------+----------+\n", - "only showing top 20 rows\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\r\n", - "[Stage 500:=================================================> (186 + 3) / 200]\r\n", - "\r\n", - " \r" - ] - } - ], - "source": [ - "# Generate predicted ratings.\n", - "prediction = model.transform(test)\n", - "prediction.cache().show()\n" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[Stage 775:==============================================> (174 + 2) / 200]\r" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Model:\tALS rating prediction\n", - "RMSE:\t0.967434\n", - "MAE:\t0.753340\n", - "Explained variance:\t0.265916\n", - "R squared:\t0.259532\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\r\n", - " \r" - ] - } - ], - "source": [ - "rating_eval = SparkRatingEvaluation(test, prediction, col_user=COL_USER, col_item=COL_ITEM, \n", - " col_rating=COL_RATING, col_prediction=\"prediction\")\n", - "\n", - "print(\"Model:\\tALS rating prediction\",\n", - " \"RMSE:\\t%f\" % rating_eval.rmse(),\n", - " \"MAE:\\t%f\" % rating_eval.mae(),\n", - " \"Explained variance:\\t%f\" % rating_eval.exp_var(),\n", - " \"R squared:\\t%f\" % rating_eval.rsquared(), sep='\\n')" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "application/scrapbook.scrap.json+json": { - "data": 0.006527288768086336, - "encoder": "json", - "name": "map", - "version": 1 - } - }, - "metadata": { - "scrapbook": { - "data": true, - "display": false, - "name": "map" - } - }, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " \r" - ] - }, - { - "data": { - "application/scrapbook.scrap.json+json": { - "data": 0.051717802220247217, - "encoder": "json", - "name": "ndcg", - "version": 1 - } - }, - "metadata": { - "scrapbook": { - "data": true, - "display": false, - "name": "ndcg" - } - }, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " \r" - ] - }, - { - "data": { - "application/scrapbook.scrap.json+json": { - "data": 0.05127388535031851, - "encoder": "json", - "name": "precision", - "version": 1 - } - }, - "metadata": { - "scrapbook": { - "data": true, - "display": false, - "name": "precision" - } - }, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\r\n", - "[Stage 904:> (0 + 2) / 2]\r\n", - "\r\n", - " \r" - ] - }, - { - "data": { - "application/scrapbook.scrap.json+json": { - "data": 0.018840283525491316, - "encoder": "json", - "name": "recall", - "version": 1 - } - }, - "metadata": { - "scrapbook": { - "data": true, - "display": false, - "name": "recall" - } - }, - "output_type": "display_data" - }, - { - "data": { - "application/scrapbook.scrap.json+json": { - "data": 0.9674342234414528, - "encoder": "json", - "name": "rmse", - "version": 1 - } - }, - "metadata": { - "scrapbook": { - "data": true, - "display": false, - "name": "rmse" - } - }, - "output_type": "display_data" - }, - { - "data": { - "application/scrapbook.scrap.json+json": { - "data": 0.7533395161385739, - "encoder": "json", - "name": "mae", - "version": 1 - } - }, - "metadata": { - "scrapbook": { - "data": true, - "display": false, - "name": "mae" - } - }, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " \r" - ] - }, - { - "data": { - "application/scrapbook.scrap.json+json": { - "data": 0.2659161968930053, - "encoder": "json", - "name": "exp_var", - "version": 1 - } - }, - "metadata": { - "scrapbook": { - "data": true, - "display": false, - "name": "exp_var" - } - }, - "output_type": "display_data" - }, - { - "data": { - "application/scrapbook.scrap.json+json": { - "data": 0.2595322728476255, - "encoder": "json", - "name": "rsquared", - "version": 1 - } - }, - "metadata": { - "scrapbook": { - "data": true, - "display": false, - "name": "rsquared" - } - }, - "output_type": "display_data" - }, - { - "data": { - "application/scrapbook.scrap.json+json": { - "data": 7.5410127229988575, - "encoder": "json", - "name": "train_time", - "version": 1 - } - }, - "metadata": { - "scrapbook": { - "data": true, - "display": false, - "name": "train_time" - } - }, - "output_type": "display_data" - }, - { - "data": { - "application/scrapbook.scrap.json+json": { - "data": 25.246142672998758, - "encoder": "json", - "name": "test_time", - "version": 1 - } - }, - "metadata": { - "scrapbook": { - "data": true, - "display": false, - "name": "test_time" - } - }, - "output_type": "display_data" - } - ], - "source": [ - "if is_jupyter():\n", - " # Record results with papermill for tests\n", - " import scrapbook as sb\n", - " sb.glue(\"map\", rank_eval.map_at_k())\n", - " sb.glue(\"ndcg\", rank_eval.ndcg_at_k())\n", - " sb.glue(\"precision\", rank_eval.precision_at_k())\n", - " sb.glue(\"recall\", rank_eval.recall_at_k())\n", - " sb.glue(\"rmse\", rating_eval.rmse())\n", - " sb.glue(\"mae\", rating_eval.mae())\n", - " sb.glue(\"exp_var\", rating_eval.exp_var())\n", - " sb.glue(\"rsquared\", rating_eval.rsquared())\n", - " sb.glue(\"train_time\", train_time.interval)\n", - " sb.glue(\"test_time\", test_time.interval)" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "# cleanup spark instance\n", - "spark.stop()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python (reco)", - "language": "python", - "name": "reco" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.0" - } - }, - "nbformat": 4, - "nbformat_minor": 1 -} + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/examples/00_quick_start/dkn_MIND.ipynb b/examples/00_quick_start/dkn_MIND.ipynb index d37bb2cd3..6ef941e3a 100644 --- a/examples/00_quick_start/dkn_MIND.ipynb +++ b/examples/00_quick_start/dkn_MIND.ipynb @@ -345,7 +345,7 @@ "metadata": {}, "outputs": [], "source": [ - "sb.glue(\"res\", res)" + "store_metadata(\"res\", res)" ] }, { diff --git a/examples/00_quick_start/fastai_movielens.ipynb b/examples/00_quick_start/fastai_movielens.ipynb index 973906d51..fd64b1f03 100644 --- a/examples/00_quick_start/fastai_movielens.ipynb +++ b/examples/00_quick_start/fastai_movielens.ipynb @@ -915,16 +915,16 @@ ], "source": [ "# Record results with papermill for tests\n", - "sb.glue(\"map\", eval_map)\n", - "sb.glue(\"ndcg\", eval_ndcg)\n", - "sb.glue(\"precision\", eval_precision)\n", - "sb.glue(\"recall\", eval_recall)\n", - "sb.glue(\"rmse\", eval_rmse)\n", - "sb.glue(\"mae\", eval_mae)\n", - "sb.glue(\"exp_var\", eval_exp_var)\n", - "sb.glue(\"rsquared\", eval_r2)\n", - "sb.glue(\"train_time\", train_time.interval)\n", - "sb.glue(\"test_time\", test_time.interval)" + "store_metadata(\"map\", eval_map)\n", + "store_metadata(\"ndcg\", eval_ndcg)\n", + "store_metadata(\"precision\", eval_precision)\n", + "store_metadata(\"recall\", eval_recall)\n", + "store_metadata(\"rmse\", eval_rmse)\n", + "store_metadata(\"mae\", eval_mae)\n", + "store_metadata(\"exp_var\", eval_exp_var)\n", + "store_metadata(\"rsquared\", eval_r2)\n", + "store_metadata(\"train_time\", train_time.interval)\n", + "store_metadata(\"test_time\", test_time.interval)" ] }, { @@ -964,4 +964,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/examples/00_quick_start/geoimc_movielens.ipynb b/examples/00_quick_start/geoimc_movielens.ipynb index 1420df43b..f8518db12 100644 --- a/examples/00_quick_start/geoimc_movielens.ipynb +++ b/examples/00_quick_start/geoimc_movielens.ipynb @@ -1,329 +1,329 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Geometry Aware Inductive Matrix Completion (GeoIMC)\n", - "\n", - "GeoIMC is an inductive matrix completion algorithm based on the works by Jawanpuria et al. (2019)\n", - "\n", - "Consider the case of MovieLens-100K (ML100K), Let $X \\in R^{m \\times d_1}, Z \\in R^{n \\times d_2} $ be the features of users and movies respectively. Let $M \\in R^{m \\times n}$, be the partially observed ratings matrix. GeoIMC models this matrix as $M = XUBV^TZ^T$, where $U \\in R^{d_1 \\times k}, V \\in R^{d_2 \\times k}, B \\in R^{k \\times k}$ are Orthogonal, Orthogonal, Symmetric Positive-Definite matrices respectively. This Optimization problem is solved by using Pymanopt.\n", - "\n", - "\n", - "This notebook provides an example of how to utilize and evaluate GeoIMC implementation in **recommenders**\n" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import tempfile\n", - "import zipfile\n", - "import pandas as pd\n", - "import numpy as np\n", - "import papermill as pm\n", - "import scrapbook as sb\n", - "\n", - "from recommenders.datasets import movielens\n", - "from recommenders.models.geoimc.geoimc_data import ML_100K\n", - "from recommenders.models.geoimc.geoimc_algorithm import IMCProblem\n", - "from recommenders.models.geoimc.geoimc_predict import Inferer\n", - "from recommenders.evaluation.python_evaluation import (\n", - " rmse, mae\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "# Choose the MovieLens dataset\n", - "MOVIELENS_DATA_SIZE = '100k'\n", - "# Normalize user, item features\n", - "normalize = True\n", - "# Rank (k) of the model\n", - "rank = 300\n", - "# Regularization parameter\n", - "regularizer = 1e-3\n", - "\n", - "# Parameters for algorithm convergence\n", - "max_iters = 150000\n", - "max_time = 1000\n", - "verbosity = 1" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 1. Download ML100K dataset and features" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 4.81k/4.81k [00:09<00:00, 519KB/s]\n" - ] - } - ], - "source": [ - "# Create a directory to download ML100K\n", - "dp = tempfile.mkdtemp(suffix='-geoimc')\n", - "movielens.download_movielens(MOVIELENS_DATA_SIZE, f\"{dp}/ml-100k.zip\")\n", - "with zipfile.ZipFile(f\"{dp}/ml-100k.zip\", 'r') as z:\n", - " z.extractall(dp)\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 2. Load the dataset using the example features provided in helpers\n", - "\n", - "The features were generated using the same method as the work by Xin Dong et al. (2017)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "dataset = ML_100K(\n", - " normalize=normalize,\n", - " target_transform='binarize'\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "dataset.load_data(f\"{dp}/ml-100k/\")" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Characteristics:\n", - "\n", - " target: (943, 1682)\n", - " entities: (943, 1822), (1682, 1925)\n", - "\n", - " training: (80000,)\n", - " training_entities: (943, 1822), (1682, 1925)\n", - "\n", - " testing: (20000,)\n", - " test_entities: (943, 1822), (1682, 1925)\n", - "\n" - ] - } - ], - "source": [ - "print(f\"\"\"Characteristics:\n", - "\n", - " target: {dataset.training_data.data.shape}\n", - " entities: {dataset.entities[0].shape}, {dataset.entities[1].shape}\n", - "\n", - " training: {dataset.training_data.get_data().data.shape}\n", - " training_entities: {dataset.training_data.get_entity(\"row\").shape}, {dataset.training_data.get_entity(\"col\").shape}\n", - "\n", - " testing: {dataset.test_data.get_data().data.shape}\n", - " test_entities: {dataset.test_data.get_entity(\"row\").shape}, {dataset.test_data.get_entity(\"col\").shape}\n", - "\"\"\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 3. Initialize the IMC problem" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "np.random.seed(10)\n", - "prblm = IMCProblem(\n", - " dataset.training_data,\n", - " lambda1=regularizer,\n", - " rank=rank\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Optimizing...\n", - "Terminated - max time reached after 1753 iterations.\n", - "\n" - ] - } - ], - "source": [ - "# Solve the Optimization problem\n", - "prblm.solve(\n", - " max_time,\n", - " max_iters,\n", - " verbosity\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "# Initialize an inferer\n", - "inferer = Inferer(\n", - " method='dot'\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "# Predict using the parametrized matrices\n", - "predictions = inferer.infer(\n", - " dataset.test_data,\n", - " prblm.W\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "# Prepare the test, predicted dataframes\n", - "user_ids = dataset.test_data.get_data().tocoo().row\n", - "item_ids = dataset.test_data.get_data().tocoo().col\n", - "test_df = pd.DataFrame(\n", - " data={\n", - " \"userID\": user_ids,\n", - " \"itemID\": item_ids,\n", - " \"rating\": dataset.test_data.get_data().data\n", - " }\n", - ")\n", - "predictions_df = pd.DataFrame(\n", - " data={\n", - " \"userID\": user_ids,\n", - " \"itemID\": item_ids,\n", - " \"prediction\": [predictions[uid, iid] for uid, iid in list(zip(user_ids, item_ids))]\n", - " }\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "RMSE: 0.496351244012414\n", - "MAE: 0.47524594431584\n", - "\n" - ] - } - ], - "source": [ - "# Calculate RMSE\n", - "RMSE = rmse(\n", - " test_df,\n", - " predictions_df\n", - ")\n", - "# Calculate MAE\n", - "MAE = mae(\n", - " test_df,\n", - " predictions_df\n", - ")\n", - "print(f\"\"\"\n", - "RMSE: {RMSE}\n", - "MAE: {MAE}\n", - "\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sb.glue(\"rmse\", RMSE)\n", - "sb.glue(\"mae\", MAE)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## References\n", - "\n", - "[1] Pratik Jawanpuria, Arjun Balgovind, Anoop Kunchukuttan, Bamdev Mishra. _[Learning Multilingual Word Embeddings in Latent Metric Space: A Geometric Approach](https://www.mitpressjournals.org/doi/full/10.1162/tacl_a_00257)_. Transaction of the Association for Computational Linguistics (TACL), Volume 7, p.107-120, 2019.\n", - "\n", - "[2] Xin Dong, Lei Yu, Zhonghuo Wu, Yuxia Sun, Lingfeng Yuan, Fangxi Zhang. [A Hybrid Collaborative Filtering Model withDeep Structure for Recommender Systems](https://aaai.org/ocs/index.php/AAAI/AAAI17/paper/view/14676/13916).\n", - "Proceedings of the Thirty-First AAAI Conference on Artificial Intelligence (AAAI-17), p.1309-1315, 2017." - ] - } - ], - "metadata": { - "celltoolbar": "Tags", - "kernelspec": { - "display_name": "Python (reco)", - "language": "python", - "name": "reco_base" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.10" - } - }, - "nbformat": 4, - "nbformat_minor": 4 + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Geometry Aware Inductive Matrix Completion (GeoIMC)\n", + "\n", + "GeoIMC is an inductive matrix completion algorithm based on the works by Jawanpuria et al. (2019)\n", + "\n", + "Consider the case of MovieLens-100K (ML100K), Let $X \\in R^{m \\times d_1}, Z \\in R^{n \\times d_2} $ be the features of users and movies respectively. Let $M \\in R^{m \\times n}$, be the partially observed ratings matrix. GeoIMC models this matrix as $M = XUBV^TZ^T$, where $U \\in R^{d_1 \\times k}, V \\in R^{d_2 \\times k}, B \\in R^{k \\times k}$ are Orthogonal, Orthogonal, Symmetric Positive-Definite matrices respectively. This Optimization problem is solved by using Pymanopt.\n", + "\n", + "\n", + "This notebook provides an example of how to utilize and evaluate GeoIMC implementation in **recommenders**\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import tempfile\n", + "import zipfile\n", + "import pandas as pd\n", + "import numpy as np\n", + "import papermill as pm\n", + "import scrapbook as sb\n", + "\n", + "from recommenders.datasets import movielens\n", + "from recommenders.models.geoimc.geoimc_data import ML_100K\n", + "from recommenders.models.geoimc.geoimc_algorithm import IMCProblem\n", + "from recommenders.models.geoimc.geoimc_predict import Inferer\n", + "from recommenders.evaluation.python_evaluation import (\n", + " rmse, mae\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# Choose the MovieLens dataset\n", + "MOVIELENS_DATA_SIZE = '100k'\n", + "# Normalize user, item features\n", + "normalize = True\n", + "# Rank (k) of the model\n", + "rank = 300\n", + "# Regularization parameter\n", + "regularizer = 1e-3\n", + "\n", + "# Parameters for algorithm convergence\n", + "max_iters = 150000\n", + "max_time = 1000\n", + "verbosity = 1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Download ML100K dataset and features" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 4.81k/4.81k [00:09<00:00, 519KB/s]\n" + ] + } + ], + "source": [ + "# Create a directory to download ML100K\n", + "dp = tempfile.mkdtemp(suffix='-geoimc')\n", + "movielens.download_movielens(MOVIELENS_DATA_SIZE, f\"{dp}/ml-100k.zip\")\n", + "with zipfile.ZipFile(f\"{dp}/ml-100k.zip\", 'r') as z:\n", + " z.extractall(dp)\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Load the dataset using the example features provided in helpers\n", + "\n", + "The features were generated using the same method as the work by Xin Dong et al. (2017)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "dataset = ML_100K(\n", + " normalize=normalize,\n", + " target_transform='binarize'\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "dataset.load_data(f\"{dp}/ml-100k/\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Characteristics:\n", + "\n", + " target: (943, 1682)\n", + " entities: (943, 1822), (1682, 1925)\n", + "\n", + " training: (80000,)\n", + " training_entities: (943, 1822), (1682, 1925)\n", + "\n", + " testing: (20000,)\n", + " test_entities: (943, 1822), (1682, 1925)\n", + "\n" + ] + } + ], + "source": [ + "print(f\"\"\"Characteristics:\n", + "\n", + " target: {dataset.training_data.data.shape}\n", + " entities: {dataset.entities[0].shape}, {dataset.entities[1].shape}\n", + "\n", + " training: {dataset.training_data.get_data().data.shape}\n", + " training_entities: {dataset.training_data.get_entity(\"row\").shape}, {dataset.training_data.get_entity(\"col\").shape}\n", + "\n", + " testing: {dataset.test_data.get_data().data.shape}\n", + " test_entities: {dataset.test_data.get_entity(\"row\").shape}, {dataset.test_data.get_entity(\"col\").shape}\n", + "\"\"\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Initialize the IMC problem" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(10)\n", + "prblm = IMCProblem(\n", + " dataset.training_data,\n", + " lambda1=regularizer,\n", + " rank=rank\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Optimizing...\n", + "Terminated - max time reached after 1753 iterations.\n", + "\n" + ] + } + ], + "source": [ + "# Solve the Optimization problem\n", + "prblm.solve(\n", + " max_time,\n", + " max_iters,\n", + " verbosity\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize an inferer\n", + "inferer = Inferer(\n", + " method='dot'\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "# Predict using the parametrized matrices\n", + "predictions = inferer.infer(\n", + " dataset.test_data,\n", + " prblm.W\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "# Prepare the test, predicted dataframes\n", + "user_ids = dataset.test_data.get_data().tocoo().row\n", + "item_ids = dataset.test_data.get_data().tocoo().col\n", + "test_df = pd.DataFrame(\n", + " data={\n", + " \"userID\": user_ids,\n", + " \"itemID\": item_ids,\n", + " \"rating\": dataset.test_data.get_data().data\n", + " }\n", + ")\n", + "predictions_df = pd.DataFrame(\n", + " data={\n", + " \"userID\": user_ids,\n", + " \"itemID\": item_ids,\n", + " \"prediction\": [predictions[uid, iid] for uid, iid in list(zip(user_ids, item_ids))]\n", + " }\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RMSE: 0.496351244012414\n", + "MAE: 0.47524594431584\n", + "\n" + ] + } + ], + "source": [ + "# Calculate RMSE\n", + "RMSE = rmse(\n", + " test_df,\n", + " predictions_df\n", + ")\n", + "# Calculate MAE\n", + "MAE = mae(\n", + " test_df,\n", + " predictions_df\n", + ")\n", + "print(f\"\"\"\n", + "RMSE: {RMSE}\n", + "MAE: {MAE}\n", + "\"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "store_metadata(\"rmse\", RMSE)\n", + "store_metadata(\"mae\", MAE)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## References\n", + "\n", + "[1] Pratik Jawanpuria, Arjun Balgovind, Anoop Kunchukuttan, Bamdev Mishra. _[Learning Multilingual Word Embeddings in Latent Metric Space: A Geometric Approach](https://www.mitpressjournals.org/doi/full/10.1162/tacl_a_00257)_. Transaction of the Association for Computational Linguistics (TACL), Volume 7, p.107-120, 2019.\n", + "\n", + "[2] Xin Dong, Lei Yu, Zhonghuo Wu, Yuxia Sun, Lingfeng Yuan, Fangxi Zhang. [A Hybrid Collaborative Filtering Model withDeep Structure for Recommender Systems](https://aaai.org/ocs/index.php/AAAI/AAAI17/paper/view/14676/13916).\n", + "Proceedings of the Thirty-First AAAI Conference on Artificial Intelligence (AAAI-17), p.1309-1315, 2017." + ] + } + ], + "metadata": { + "celltoolbar": "Tags", + "kernelspec": { + "display_name": "Python (reco)", + "language": "python", + "name": "reco_base" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.10" + } + }, + "nbformat": 4, + "nbformat_minor": 4 } \ No newline at end of file diff --git a/examples/00_quick_start/ncf_movielens.ipynb b/examples/00_quick_start/ncf_movielens.ipynb index 4d669e92b..23142e458 100644 --- a/examples/00_quick_start/ncf_movielens.ipynb +++ b/examples/00_quick_start/ncf_movielens.ipynb @@ -60,6 +60,7 @@ "from recommenders.datasets.python_splitters import python_chrono_split\n", "from recommenders.evaluation.python_evaluation import (rmse, mae, rsquared, exp_var, map_at_k, ndcg_at_k, precision_at_k, \n", " recall_at_k, get_top_k_items)\n", + "from recommenders.utils.notebook_utils import store_metadata\n", "\n", "print(\"System version: {}\".format(sys.version))\n", "print(\"Pandas version: {}\".format(pd.__version__))\n", @@ -350,16 +351,13 @@ "metadata": {}, "outputs": [], "source": [ - "if is_jupyter():\n", - " # Record results with papermill for tests\n", - " import papermill as pm\n", - " import scrapbook as sb\n", - " sb.glue(\"map\", eval_map)\n", - " sb.glue(\"ndcg\", eval_ndcg)\n", - " sb.glue(\"precision\", eval_precision)\n", - " sb.glue(\"recall\", eval_recall)\n", - " sb.glue(\"train_time\", train_time.interval)\n", - " sb.glue(\"test_time\", test_time.interval)" + "# Record results for tests - ignore this cell\n", + "store_metadata(\"map\", eval_map)\n", + "store_metadata(\"ndcg\", eval_ndcg)\n", + "store_metadata(\"precision\", eval_precision)\n", + "store_metadata(\"recall\", eval_recall)\n", + "store_metadata(\"train_time\", train_time.interval)\n", + "store_metadata(\"test_time\", test_time.interval)" ] }, { @@ -392,4 +390,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/examples/00_quick_start/rbm_movielens.ipynb b/examples/00_quick_start/rbm_movielens.ipynb index d76fa764c..70df21ac3 100644 --- a/examples/00_quick_start/rbm_movielens.ipynb +++ b/examples/00_quick_start/rbm_movielens.ipynb @@ -779,10 +779,10 @@ ], "source": [ "# Record results with papermill for tests\n", - "sb.glue(\"map\", eval_100k['MAP'][0])\n", - "sb.glue(\"ndcg\", eval_100k['nDCG@k'][0])\n", - "sb.glue(\"precision\", eval_100k['Precision@k'][0])\n", - "sb.glue(\"recall\", eval_100k['Recall@k'][0])" + "store_metadata(\"map\", eval_100k['MAP'][0])\n", + "store_metadata(\"ndcg\", eval_100k['nDCG@k'][0])\n", + "store_metadata(\"precision\", eval_100k['Precision@k'][0])\n", + "store_metadata(\"recall\", eval_100k['Recall@k'][0])" ] }, { @@ -869,4 +869,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/examples/00_quick_start/sar_movielens.ipynb b/examples/00_quick_start/sar_movielens.ipynb index f5922a861..9fe09c6ad 100644 --- a/examples/00_quick_start/sar_movielens.ipynb +++ b/examples/00_quick_start/sar_movielens.ipynb @@ -773,12 +773,12 @@ "outputs": [], "source": [ "# Record results with papermill for tests - ignore this cell\n", - "sb.glue(\"map\", eval_map)\n", - "sb.glue(\"ndcg\", eval_ndcg)\n", - "sb.glue(\"precision\", eval_precision)\n", - "sb.glue(\"recall\", eval_recall)\n", - "sb.glue(\"train_time\", train_time.interval)\n", - "sb.glue(\"test_time\", test_time.interval)" + "store_metadata(\"map\", eval_map)\n", + "store_metadata(\"ndcg\", eval_ndcg)\n", + "store_metadata(\"precision\", eval_precision)\n", + "store_metadata(\"recall\", eval_recall)\n", + "store_metadata(\"train_time\", train_time.interval)\n", + "store_metadata(\"test_time\", test_time.interval)" ] } ], diff --git a/examples/00_quick_start/wide_deep_movielens.ipynb b/examples/00_quick_start/wide_deep_movielens.ipynb index 201a0bc36..bacac0122 100644 --- a/examples/00_quick_start/wide_deep_movielens.ipynb +++ b/examples/00_quick_start/wide_deep_movielens.ipynb @@ -951,7 +951,7 @@ }, { "data": { - "image/png": "\n", + "image/png": "", "text/plain": [ "
" ] @@ -964,7 +964,7 @@ "if EVALUATE_WHILE_TRAINING:\n", " logs = evaluation_logger.get_log()\n", " for i, (m, v) in enumerate(logs.items(), 1):\n", - " sb.glue(\"eval_{}\".format(m), v)\n", + " store_metadata(\"eval_{}\".format(m), v)\n", " x = [save_checkpoints_steps*i for i in range(1, len(v)+1)]\n", " plot.line_graph(\n", " values=list(zip(v, x)),\n", @@ -1077,7 +1077,7 @@ " rating_results = {}\n", " for m in RATING_METRICS:\n", " result = evaluator.metrics[m](test, prediction_df, **cols)\n", - " sb.glue(m, result)\n", + " store_metadata(m, result)\n", " rating_results[m] = result\n", " print(rating_results)" ] @@ -1167,7 +1167,7 @@ " ranking_results = {}\n", " for m in RANKING_METRICS:\n", " result = evaluator.metrics[m](test, prediction_df, **{**cols, 'k': TOP_K})\n", - " sb.glue(m, result)\n", + " store_metadata(m, result)\n", " ranking_results[m] = result\n", " print(ranking_results)" ] @@ -1242,7 +1242,7 @@ " tf_feat_cols=wide_columns+deep_columns,\n", " base_dir=EXPORT_DIR_BASE\n", ")\n", - "sb.glue('saved_model_dir', str(exported_path))\n", + "store_metadata('saved_model_dir', str(exported_path))\n", "print(\"Model exported to\", str(exported_path))" ] }, @@ -1286,4 +1286,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/examples/00_quick_start/xdeepfm_criteo.ipynb b/examples/00_quick_start/xdeepfm_criteo.ipynb index 35420ecc8..9479ab99d 100644 --- a/examples/00_quick_start/xdeepfm_criteo.ipynb +++ b/examples/00_quick_start/xdeepfm_criteo.ipynb @@ -331,8 +331,8 @@ } ], "source": [ - "sb.glue(\"auc\", result[\"auc\"])\n", - "sb.glue(\"logloss\", result[\"logloss\"])" + "store_metadata(\"auc\", result[\"auc\"])\n", + "store_metadata(\"logloss\", result[\"logloss\"])" ] }, { diff --git a/examples/02_model_collaborative_filtering/baseline_deep_dive.ipynb b/examples/02_model_collaborative_filtering/baseline_deep_dive.ipynb index bb6ab4e55..4df1b961c 100644 --- a/examples/02_model_collaborative_filtering/baseline_deep_dive.ipynb +++ b/examples/02_model_collaborative_filtering/baseline_deep_dive.ipynb @@ -863,14 +863,14 @@ "source": [ "if is_jupyter():\n", " # Record results with papermill and scrapbook for tests\n", - " sb.glue(\"map\", eval_map)\n", - " sb.glue(\"ndcg\", eval_ndcg)\n", - " sb.glue(\"precision\", eval_precision)\n", - " sb.glue(\"recall\", eval_recall)\n", - " sb.glue(\"rmse\", eval_rmse)\n", - " sb.glue(\"mae\", eval_mae)\n", - " sb.glue(\"exp_var\", eval_exp_var)\n", - " sb.glue(\"rsquared\", eval_rsquared)" + " store_metadata(\"map\", eval_map)\n", + " store_metadata(\"ndcg\", eval_ndcg)\n", + " store_metadata(\"precision\", eval_precision)\n", + " store_metadata(\"recall\", eval_recall)\n", + " store_metadata(\"rmse\", eval_rmse)\n", + " store_metadata(\"mae\", eval_mae)\n", + " store_metadata(\"exp_var\", eval_exp_var)\n", + " store_metadata(\"rsquared\", eval_rsquared)" ] }, { diff --git a/examples/02_model_collaborative_filtering/cornac_bivae_deep_dive.ipynb b/examples/02_model_collaborative_filtering/cornac_bivae_deep_dive.ipynb index 4c1b36583..8d177b07a 100644 --- a/examples/02_model_collaborative_filtering/cornac_bivae_deep_dive.ipynb +++ b/examples/02_model_collaborative_filtering/cornac_bivae_deep_dive.ipynb @@ -594,10 +594,10 @@ ], "source": [ "# Record results with papermill for tests\n", - "sb.glue(\"map\", eval_map)\n", - "sb.glue(\"ndcg\", eval_ndcg)\n", - "sb.glue(\"precision\", eval_precision)\n", - "sb.glue(\"recall\", eval_recall)" + "store_metadata(\"map\", eval_map)\n", + "store_metadata(\"ndcg\", eval_ndcg)\n", + "store_metadata(\"precision\", eval_precision)\n", + "store_metadata(\"recall\", eval_recall)" ] }, { diff --git a/examples/02_model_collaborative_filtering/cornac_bpr_deep_dive.ipynb b/examples/02_model_collaborative_filtering/cornac_bpr_deep_dive.ipynb index 9d0bcc9fa..50666e82d 100644 --- a/examples/02_model_collaborative_filtering/cornac_bpr_deep_dive.ipynb +++ b/examples/02_model_collaborative_filtering/cornac_bpr_deep_dive.ipynb @@ -575,10 +575,10 @@ "outputs": [], "source": [ "# Record results with papermill for tests\n", - "sb.glue(\"map\", eval_map)\n", - "sb.glue(\"ndcg\", eval_ndcg)\n", - "sb.glue(\"precision\", eval_precision)\n", - "sb.glue(\"recall\", eval_recall)" + "store_metadata(\"map\", eval_map)\n", + "store_metadata(\"ndcg\", eval_ndcg)\n", + "store_metadata(\"precision\", eval_precision)\n", + "store_metadata(\"recall\", eval_recall)" ] }, { diff --git a/examples/02_model_collaborative_filtering/lightgcn_deep_dive.ipynb b/examples/02_model_collaborative_filtering/lightgcn_deep_dive.ipynb index 45a465262..bcc9992e4 100644 --- a/examples/02_model_collaborative_filtering/lightgcn_deep_dive.ipynb +++ b/examples/02_model_collaborative_filtering/lightgcn_deep_dive.ipynb @@ -733,10 +733,10 @@ ], "source": [ "# Record results with papermill for tests\n", - "sb.glue(\"map\", eval_map)\n", - "sb.glue(\"ndcg\", eval_ndcg)\n", - "sb.glue(\"precision\", eval_precision)\n", - "sb.glue(\"recall\", eval_recall)" + "store_metadata(\"map\", eval_map)\n", + "store_metadata(\"ndcg\", eval_ndcg)\n", + "store_metadata(\"precision\", eval_precision)\n", + "store_metadata(\"recall\", eval_recall)" ] }, { diff --git a/examples/02_model_collaborative_filtering/ncf_deep_dive.ipynb b/examples/02_model_collaborative_filtering/ncf_deep_dive.ipynb index 77aec62cf..93ee20717 100644 --- a/examples/02_model_collaborative_filtering/ncf_deep_dive.ipynb +++ b/examples/02_model_collaborative_filtering/ncf_deep_dive.ipynb @@ -1070,14 +1070,14 @@ ], "source": [ "# Record results with papermill for tests\n", - "sb.glue(\"map\", eval_map)\n", - "sb.glue(\"ndcg\", eval_ndcg)\n", - "sb.glue(\"precision\", eval_precision)\n", - "sb.glue(\"recall\", eval_recall)\n", - "sb.glue(\"map2\", eval_map2)\n", - "sb.glue(\"ndcg2\", eval_ndcg2)\n", - "sb.glue(\"precision2\", eval_precision2)\n", - "sb.glue(\"recall2\", eval_recall2)" + "store_metadata(\"map\", eval_map)\n", + "store_metadata(\"ndcg\", eval_ndcg)\n", + "store_metadata(\"precision\", eval_precision)\n", + "store_metadata(\"recall\", eval_recall)\n", + "store_metadata(\"map2\", eval_map2)\n", + "store_metadata(\"ndcg2\", eval_ndcg2)\n", + "store_metadata(\"precision2\", eval_precision2)\n", + "store_metadata(\"recall2\", eval_recall2)" ] }, { diff --git a/examples/02_model_collaborative_filtering/sar_deep_dive.ipynb b/examples/02_model_collaborative_filtering/sar_deep_dive.ipynb index 4061e1b7b..3d89f31d0 100644 --- a/examples/02_model_collaborative_filtering/sar_deep_dive.ipynb +++ b/examples/02_model_collaborative_filtering/sar_deep_dive.ipynb @@ -512,10 +512,10 @@ "outputs": [], "source": [ "# Record results for tests - ignore this cell\n", - "sb.glue(\"map\", eval_map)\n", - "sb.glue(\"ndcg\", eval_ndcg)\n", - "sb.glue(\"precision\", eval_precision)\n", - "sb.glue(\"recall\", eval_recall)" + "store_metadata(\"map\", eval_map)\n", + "store_metadata(\"ndcg\", eval_ndcg)\n", + "store_metadata(\"precision\", eval_precision)\n", + "store_metadata(\"recall\", eval_recall)" ] }, { diff --git a/examples/02_model_collaborative_filtering/surprise_svd_deep_dive.ipynb b/examples/02_model_collaborative_filtering/surprise_svd_deep_dive.ipynb index 160d491bf..7c35b3d00 100644 --- a/examples/02_model_collaborative_filtering/surprise_svd_deep_dive.ipynb +++ b/examples/02_model_collaborative_filtering/surprise_svd_deep_dive.ipynb @@ -810,16 +810,16 @@ ], "source": [ "# Record results with papermill for tests\n", - "sb.glue(\"rmse\", eval_rmse)\n", - "sb.glue(\"mae\", eval_mae)\n", - "sb.glue(\"rsquared\", eval_rsquared)\n", - "sb.glue(\"exp_var\", eval_exp_var)\n", - "sb.glue(\"map\", eval_map)\n", - "sb.glue(\"ndcg\", eval_ndcg)\n", - "sb.glue(\"precision\", eval_precision)\n", - "sb.glue(\"recall\", eval_recall)\n", - "sb.glue(\"train_time\", train_time.interval)\n", - "sb.glue(\"test_time\", test_time.interval)" + "store_metadata(\"rmse\", eval_rmse)\n", + "store_metadata(\"mae\", eval_mae)\n", + "store_metadata(\"rsquared\", eval_rsquared)\n", + "store_metadata(\"exp_var\", eval_exp_var)\n", + "store_metadata(\"map\", eval_map)\n", + "store_metadata(\"ndcg\", eval_ndcg)\n", + "store_metadata(\"precision\", eval_precision)\n", + "store_metadata(\"recall\", eval_recall)\n", + "store_metadata(\"train_time\", train_time.interval)\n", + "store_metadata(\"test_time\", test_time.interval)" ] }, { diff --git a/examples/02_model_content_based_filtering/mmlspark_lightgbm_criteo.ipynb b/examples/02_model_content_based_filtering/mmlspark_lightgbm_criteo.ipynb index 3289c6b71..92ae786ee 100644 --- a/examples/02_model_content_based_filtering/mmlspark_lightgbm_criteo.ipynb +++ b/examples/02_model_content_based_filtering/mmlspark_lightgbm_criteo.ipynb @@ -441,7 +441,7 @@ ], "source": [ "# Record results with papermill for tests\n", - "sb.glue(\"auc\", auc)" + "store_metadata(\"auc\", auc)" ] }, { @@ -506,4 +506,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/examples/02_model_content_based_filtering/vowpal_wabbit_deep_dive.ipynb b/examples/02_model_content_based_filtering/vowpal_wabbit_deep_dive.ipynb index 024429dc3..441c33876 100644 --- a/examples/02_model_content_based_filtering/vowpal_wabbit_deep_dive.ipynb +++ b/examples/02_model_content_based_filtering/vowpal_wabbit_deep_dive.ipynb @@ -1318,16 +1318,16 @@ "source": [ "# record results for testing\n", "if is_jupyter():\n", - " sb.glue('rmse', saved_result['RMSE'])\n", - " sb.glue('mae', saved_result['MAE'])\n", - " sb.glue('rsquared', saved_result['R2'])\n", - " sb.glue('exp_var', saved_result['Explained Variance'])\n", - " sb.glue(\"train_time\", saved_result['Train Time (ms)'])\n", - " sb.glue(\"test_time\", test_time)\n", - " sb.glue('map', rank_metrics['MAP'])\n", - " sb.glue('ndcg', rank_metrics['NDCG'])\n", - " sb.glue('precision', rank_metrics['Precision'])\n", - " sb.glue('recall', rank_metrics['Recall'])" + " store_metadata('rmse', saved_result['RMSE'])\n", + " store_metadata('mae', saved_result['MAE'])\n", + " store_metadata('rsquared', saved_result['R2'])\n", + " store_metadata('exp_var', saved_result['Explained Variance'])\n", + " store_metadata(\"train_time\", saved_result['Train Time (ms)'])\n", + " store_metadata(\"test_time\", test_time)\n", + " store_metadata('map', rank_metrics['MAP'])\n", + " store_metadata('ndcg', rank_metrics['NDCG'])\n", + " store_metadata('precision', rank_metrics['Precision'])\n", + " store_metadata('recall', rank_metrics['Recall'])" ] }, { diff --git a/examples/02_model_hybrid/fm_deep_dive.ipynb b/examples/02_model_hybrid/fm_deep_dive.ipynb index 25fdbf9d7..5920b021f 100644 --- a/examples/02_model_hybrid/fm_deep_dive.ipynb +++ b/examples/02_model_hybrid/fm_deep_dive.ipynb @@ -606,7 +606,7 @@ } ], "source": [ - "sb.glue('auc_score', auc_score)" + "store_metadata('auc_score', auc_score)" ] }, { diff --git a/examples/02_model_hybrid/lightfm_deep_dive.ipynb b/examples/02_model_hybrid/lightfm_deep_dive.ipynb index ea139260f..5555ac19e 100755 --- a/examples/02_model_hybrid/lightfm_deep_dive.ipynb +++ b/examples/02_model_hybrid/lightfm_deep_dive.ipynb @@ -1890,10 +1890,10 @@ "outputs": [], "source": [ "# Record results for tests\n", - "sb.glue('eval_precision', eval_precision)\n", - "sb.glue('eval_recall', eval_recall)\n", - "sb.glue('eval_precision2', eval_precision2)\n", - "sb.glue('eval_recall2', eval_recall2)" + "store_metadata('eval_precision', eval_precision)\n", + "store_metadata('eval_recall', eval_recall)\n", + "store_metadata('eval_precision2', eval_precision2)\n", + "store_metadata('eval_recall2', eval_recall2)" ] }, { diff --git a/examples/README.md b/examples/README.md index 0b1f0ace8..10967bdca 100644 --- a/examples/README.md +++ b/examples/README.md @@ -69,4 +69,4 @@ cfg = NotebookRunConfig(source_directory='../', run_config=run_config) ``` -All metrics and parameters logged with `sb.glue` will be stored on the run as tracked metrics. The initial notebook that was submitted, will be stored as an output notebook ```out.ipynb``` in the outputs tab of the Azure Portal. +All metrics and parameters logged with `store_metadata` will be stored on the run as tracked metrics. The initial notebook that was submitted, will be stored as an output notebook ```out.ipynb``` in the outputs tab of the Azure Portal. diff --git a/examples/template.ipynb b/examples/template.ipynb index 06c66c0fb..944917db3 100644 --- a/examples/template.ipynb +++ b/examples/template.ipynb @@ -250,7 +250,7 @@ } ], "source": [ - "sb.glue(\"checked_version\", checked_version)" + "store_metadata(\"checked_version\", checked_version)" ] }, { diff --git a/tests/README.md b/tests/README.md index a3eb149e5..523194560 100644 --- a/tests/README.md +++ b/tests/README.md @@ -108,7 +108,7 @@ import papermill as pm @pytest.mark.notebooks def test_sar_single_node_runs(notebooks, output_notebook, kernel_name): notebook_path = notebooks["sar_single_node"] - pm.execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name) + execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name) ``` Notice that the input of the function is a fixture defined in [conftest.py](conftest.py). For more information, please see the [definition of fixtures in PyTest](https://docs.pytest.org/en/latest/fixture.html). @@ -143,15 +143,13 @@ ABS_TOL = 0.05 def test_sar_single_node_smoke(notebooks, output_notebook, kernel_name): notebook_path = notebooks["sar_single_node"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, parameters=dict(TOP_K=10, MOVIELENS_DATA_SIZE="100k"), ) - results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ - "data" - ] + results = read_notebook(output_notebook) assert results["precision"] == pytest.approx(0.330753, rel=TOL, abs=ABS_TOL) assert results["recall"] == pytest.approx(0.176385, rel=TOL, abs=ABS_TOL) ``` diff --git a/tests/functional/examples/test_notebooks_gpu.py b/tests/functional/examples/test_notebooks_gpu.py index 2d8c6b0a0..fbfde453a 100644 --- a/tests/functional/examples/test_notebooks_gpu.py +++ b/tests/functional/examples/test_notebooks_gpu.py @@ -3,10 +3,9 @@ import os import pytest -import papermill as pm -import scrapbook as sb from recommenders.utils.gpu_utils import get_number_gpus +from recommenders.utils.notebook_utils import execute_notebook, read_notebook TOL = 0.1 @@ -41,7 +40,7 @@ def test_ncf_functional( notebooks, output_notebook, kernel_name, size, epochs, expected_values, seed ): notebook_path = notebooks["ncf"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, @@ -49,9 +48,7 @@ def test_ncf_functional( TOP_K=10, MOVIELENS_DATA_SIZE=size, EPOCHS=epochs, BATCH_SIZE=512, SEED=seed ), ) - results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ - "data" - ] + results = read_notebook(output_notebook) for key, value in expected_values.items(): assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL) @@ -91,7 +88,7 @@ def test_ncf_deep_dive_functional( seed, ): notebook_path = notebooks["ncf_deep_dive"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, @@ -103,9 +100,7 @@ def test_ncf_deep_dive_functional( SEED=seed, ), ) - results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ - "data" - ] + results = read_notebook(output_notebook) for key, value in expected_values.items(): assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL) @@ -137,15 +132,13 @@ def test_fastai_functional( notebooks, output_notebook, kernel_name, size, epochs, expected_values ): notebook_path = notebooks["fastai"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, parameters=dict(TOP_K=10, MOVIELENS_DATA_SIZE=size, EPOCHS=epochs), ) - results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ - "data" - ] + results = read_notebook(output_notebook) for key, value in expected_values.items(): assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL) @@ -172,7 +165,7 @@ def test_xdeepfm_functional( seed, ): notebook_path = notebooks["xdeepfm_quickstart"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, @@ -182,9 +175,7 @@ def test_xdeepfm_functional( RANDOM_SEED=seed, ), ) - results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ - "data" - ] + results = read_notebook(output_notebook) for key, value in expected_values.items(): assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL) @@ -237,12 +228,10 @@ def test_wide_deep_functional( "RANKING_METRICS": ["ndcg_at_k", "map_at_k", "precision_at_k", "recall_at_k"], "RANDOM_SEED": seed, } - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, parameters=params ) - results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ - "data" - ] + results = read_notebook(output_notebook) for key, value in expected_values.items(): assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL) @@ -283,12 +272,10 @@ def test_slirec_quickstart_functional( "BATCH_SIZE": batch_size, "RANDOM_SEED": seed, } - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, parameters=params ) - results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ - "data" - ] + results = read_notebook(output_notebook) for key, value in expected_values.items(): assert results[key]["auc"] == pytest.approx(value["auc"], rel=TOL, abs=ABS_TOL) @@ -338,12 +325,10 @@ def test_nrms_quickstart_functional( "seed": seed, "MIND_type": MIND_type, } - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, parameters=params ) - results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ - "data" - ] + results = read_notebook(output_notebook) for key, value in expected_values.items(): assert results[key]["group_auc"] == pytest.approx( @@ -399,12 +384,10 @@ def test_naml_quickstart_functional( "seed": seed, "MIND_type": MIND_type, } - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, parameters=params ) - results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ - "data" - ] + results = read_notebook(output_notebook) for key, value in expected_values.items(): assert results[key]["group_auc"] == pytest.approx( @@ -460,12 +443,10 @@ def test_lstur_quickstart_functional( "seed": seed, "MIND_type": MIND_type, } - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, parameters=params ) - results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ - "data" - ] + results = read_notebook(output_notebook) for key, value in expected_values.items(): assert results[key]["group_auc"] == pytest.approx( @@ -521,12 +502,10 @@ def test_npa_quickstart_functional( "seed": seed, "MIND_type": MIND_type, } - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, parameters=params ) - results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ - "data" - ] + results = read_notebook(output_notebook) for key, value in expected_values.items(): assert results[key]["group_auc"] == pytest.approx( @@ -577,7 +556,7 @@ def test_lightgcn_deep_dive_functional( seed, ): notebook_path = notebooks["lightgcn_deep_dive"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, @@ -592,9 +571,7 @@ def test_lightgcn_deep_dive_functional( item_file=os.path.join(data_path, r"item_embeddings"), ), ) - results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ - "data" - ] + results = read_notebook(output_notebook) for key, value in expected_values.items(): assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL) @@ -604,15 +581,13 @@ def test_lightgcn_deep_dive_functional( @pytest.mark.notebooks def test_dkn_quickstart_functional(notebooks, output_notebook, kernel_name): notebook_path = notebooks["dkn_quickstart"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, parameters=dict(EPOCHS=5, BATCH_SIZE=500), ) - results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ - "data" - ] + results = read_notebook(output_notebook) assert results["res"]["auc"] == pytest.approx(0.5651, rel=TOL, abs=ABS_TOL) assert results["res"]["mean_mrr"] == pytest.approx(0.1639, rel=TOL, abs=ABS_TOL) @@ -633,15 +608,13 @@ def test_cornac_bivae_functional( notebooks, output_notebook, kernel_name, size, expected_values ): notebook_path = notebooks["cornac_bivae_deep_dive"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, parameters=dict(MOVIELENS_DATA_SIZE=size), ) - results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ - "data" - ] + results = read_notebook(output_notebook) for key, value in expected_values.items(): assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL) @@ -689,15 +662,13 @@ def test_sasrec_quickstart_functional( "model_name": model_name, "seed": seed, } - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, parameters=params, ) - results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ - "data" - ] + results = read_notebook(output_notebook) for key, value in expected_values.items(): assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL) @@ -719,15 +690,13 @@ def test_benchmark_movielens_gpu( notebooks, output_notebook, kernel_name, size, algos, expected_values_ndcg ): notebook_path = notebooks["benchmark_movielens"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, parameters=dict(data_sizes=size, algorithms=algos), ) - results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ - "data" - ] + results = read_notebook(output_notebook) assert len(results["results"]) == 4 for i, value in enumerate(results["results"]): assert results["results"][i] == pytest.approx(value, rel=TOL, abs=ABS_TOL) diff --git a/tests/functional/examples/test_notebooks_pyspark.py b/tests/functional/examples/test_notebooks_pyspark.py index 57bd87928..3a88f0d44 100644 --- a/tests/functional/examples/test_notebooks_pyspark.py +++ b/tests/functional/examples/test_notebooks_pyspark.py @@ -4,8 +4,9 @@ import os import sys import pytest -import papermill as pm -import scrapbook as sb + +from recommenders.utils.notebook_utils import execute_notebook, read_notebook + TOL = 0.05 ABS_TOL = 0.05 @@ -17,15 +18,13 @@ @pytest.mark.notebooks def test_als_pyspark_functional(notebooks, output_notebook, kernel_name): notebook_path = notebooks["als_pyspark"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, parameters=dict(TOP_K=10, MOVIELENS_DATA_SIZE="1m"), ) - results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ - "data" - ] + results = read_notebook(output_notebook) assert results["map"] == pytest.approx(0.00201, rel=TOL, abs=ABS_TOL) assert results["ndcg"] == pytest.approx(0.02516, rel=TOL, abs=ABS_TOL) @@ -45,15 +44,13 @@ def test_als_pyspark_functional(notebooks, output_notebook, kernel_name): @pytest.mark.skipif(sys.platform == "win32", reason="Not implemented on Windows") def test_mmlspark_lightgbm_criteo_functional(notebooks, output_notebook, kernel_name): notebook_path = notebooks["mmlspark_lightgbm_criteo"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, parameters=dict(DATA_SIZE="full", NUM_ITERATIONS=50), ) - results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ - "data" - ] + results = read_notebook(output_notebook) assert results["auc"] == pytest.approx(0.68895, rel=TOL, abs=ABS_TOL) @@ -75,15 +72,13 @@ def test_benchmark_movielens_pyspark( os.environ["PYSPARK_DRIVER_PYTHON"] = sys.executable os.environ.pop("SPARK_HOME", None) - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, parameters=dict(data_sizes=size, algorithms=algos), ) - results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ - "data" - ] + results = read_notebook(output_notebook) assert len(results["results"]) == 1 for i, value in enumerate(results["results"]): assert results["results"][i] == pytest.approx(value, rel=TOL, abs=ABS_TOL) diff --git a/tests/functional/examples/test_notebooks_python.py b/tests/functional/examples/test_notebooks_python.py index 5bb6da42b..0f3d81a45 100644 --- a/tests/functional/examples/test_notebooks_python.py +++ b/tests/functional/examples/test_notebooks_python.py @@ -2,8 +2,8 @@ # Licensed under the MIT License. import pytest -import papermill as pm -import scrapbook as sb + +from recommenders.utils.notebook_utils import execute_notebook, read_notebook TOL = 0.05 @@ -38,15 +38,13 @@ def test_sar_single_node_functional( notebooks, output_notebook, kernel_name, size, expected_values ): notebook_path = notebooks["sar_single_node"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, parameters=dict(TOP_K=10, MOVIELENS_DATA_SIZE=size), ) - results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ - "data" - ] + results = read_notebook(output_notebook) for key, value in expected_values.items(): assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL) @@ -72,15 +70,13 @@ def test_baseline_deep_dive_functional( notebooks, output_notebook, kernel_name, size, expected_values ): notebook_path = notebooks["baseline_deep_dive"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, parameters=dict(TOP_K=10, MOVIELENS_DATA_SIZE=size), ) - results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ - "data" - ] + results = read_notebook(output_notebook) for key, value in expected_values.items(): assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL) @@ -110,15 +106,13 @@ def test_surprise_svd_functional( notebooks, output_notebook, kernel_name, size, expected_values ): notebook_path = notebooks["surprise_svd_deep_dive"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, parameters=dict(MOVIELENS_DATA_SIZE=size), ) - results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ - "data" - ] + results = read_notebook(output_notebook) for key, value in expected_values.items(): assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL) @@ -148,15 +142,13 @@ def test_vw_deep_dive_functional( notebooks, output_notebook, kernel_name, size, expected_values ): notebook_path = notebooks["vowpal_wabbit_deep_dive"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, parameters=dict(MOVIELENS_DATA_SIZE=size, TOP_K=10), ) - results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ - "data" - ] + results = read_notebook(output_notebook) for key, value in expected_values.items(): assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL) @@ -166,7 +158,7 @@ def test_vw_deep_dive_functional( @pytest.mark.skip(reason="NNI pip package has installation incompatibilities") def test_nni_tuning_svd(notebooks, output_notebook, kernel_name, tmp): notebook_path = notebooks["nni_tuning_svd"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, @@ -194,15 +186,13 @@ def test_cornac_bpr_functional( notebooks, output_notebook, kernel_name, size, expected_values ): notebook_path = notebooks["cornac_bpr_deep_dive"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, parameters=dict(MOVIELENS_DATA_SIZE=size), ) - results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ - "data" - ] + results = read_notebook(output_notebook) for key, value in expected_values.items(): assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL) @@ -229,15 +219,13 @@ def test_lightfm_functional( notebooks, output_notebook, kernel_name, size, epochs, expected_values ): notebook_path = notebooks["lightfm_deep_dive"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, parameters=dict(MOVIELENS_DATA_SIZE=size, NO_EPOCHS=epochs), ) - results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ - "data" - ] + results = read_notebook(output_notebook) for key, value in expected_values.items(): assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL) @@ -251,10 +239,8 @@ def test_lightfm_functional( ) def test_geoimc_functional(notebooks, output_notebook, kernel_name, expected_values): notebook_path = notebooks["geoimc_quickstart"] - pm.execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name) - results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ - "data" - ] + execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name) + results = read_notebook(output_notebook) for key, value in expected_values.items(): assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL) @@ -265,15 +251,13 @@ def test_geoimc_functional(notebooks, output_notebook, kernel_name, expected_val @pytest.mark.skip(reason="xLearn pip package has installation incompatibilities") def test_xlearn_fm_functional(notebooks, output_notebook, kernel_name): notebook_path = notebooks["xlearn_fm_deep_dive"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, parameters=dict(LEARNING_RATE=0.2, EPOCH=10), ) - results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ - "data" - ] + results = read_notebook(output_notebook) assert results["auc_score"] == pytest.approx(0.75, rel=TOL, abs=ABS_TOL) @@ -289,7 +273,7 @@ def test_benchmark_movielens_cpu( notebooks, output_notebook, kernel_name, size, algos, expected_values_ndcg ): notebook_path = notebooks["benchmark_movielens"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, diff --git a/tests/smoke/examples/test_notebooks_gpu.py b/tests/smoke/examples/test_notebooks_gpu.py index 082b8664b..6d77fe6b0 100644 --- a/tests/smoke/examples/test_notebooks_gpu.py +++ b/tests/smoke/examples/test_notebooks_gpu.py @@ -3,10 +3,9 @@ import pytest -import papermill as pm -import scrapbook as sb from recommenders.utils.gpu_utils import get_number_gpus +from recommenders.utils.notebook_utils import execute_notebook, read_notebook TOL = 0.5 @@ -22,15 +21,13 @@ def test_gpu_vm(): @pytest.mark.gpu def test_ncf_smoke(notebooks, output_notebook, kernel_name): notebook_path = notebooks["ncf"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, parameters=dict(TOP_K=10, MOVIELENS_DATA_SIZE="100k", EPOCHS=1, BATCH_SIZE=256), ) - results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ - "data" - ] + results = read_notebook(output_notebook) assert results["map"] == pytest.approx(0.0409234, rel=TOL, abs=ABS_TOL) assert results["ndcg"] == pytest.approx(0.1773, rel=TOL, abs=ABS_TOL) @@ -42,7 +39,7 @@ def test_ncf_smoke(notebooks, output_notebook, kernel_name): @pytest.mark.gpu def test_ncf_deep_dive_smoke(notebooks, output_notebook, kernel_name): notebook_path = notebooks["ncf_deep_dive"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, @@ -50,9 +47,7 @@ def test_ncf_deep_dive_smoke(notebooks, output_notebook, kernel_name): TOP_K=10, MOVIELENS_DATA_SIZE="100k", EPOCHS=1, BATCH_SIZE=1024 ), ) - results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ - "data" - ] + results = read_notebook(output_notebook) # There is too much variability to do an approx equal, just adding top values assert results["map"] == pytest.approx(0.0370396, rel=TOL, abs=ABS_TOL) @@ -69,15 +64,13 @@ def test_ncf_deep_dive_smoke(notebooks, output_notebook, kernel_name): @pytest.mark.gpu def test_fastai_smoke(notebooks, output_notebook, kernel_name): notebook_path = notebooks["fastai"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, parameters=dict(TOP_K=10, MOVIELENS_DATA_SIZE="100k", EPOCHS=1), ) - results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ - "data" - ] + results = read_notebook(output_notebook) assert results["rmse"] == pytest.approx(0.959352, rel=TOL, abs=ABS_TOL) assert results["mae"] == pytest.approx(0.766504, rel=TOL, abs=ABS_TOL) @@ -93,7 +86,7 @@ def test_fastai_smoke(notebooks, output_notebook, kernel_name): @pytest.mark.gpu def test_xdeepfm_smoke(notebooks, output_notebook, kernel_name): notebook_path = notebooks["xdeepfm_quickstart"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, @@ -105,9 +98,7 @@ def test_xdeepfm_smoke(notebooks, output_notebook, kernel_name): RANDOM_SEED=42, ), ) - results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ - "data" - ] + results = read_notebook(output_notebook) assert results["res_syn"]["auc"] == pytest.approx(0.5043, rel=TOL, abs=ABS_TOL) assert results["res_syn"]["logloss"] == pytest.approx(0.7046, rel=TOL, abs=ABS_TOL) @@ -130,12 +121,10 @@ def test_wide_deep_smoke(notebooks, output_notebook, kernel_name, tmp): "RANKING_METRICS": ["ndcg_at_k", "precision_at_k"], "RANDOM_SEED": 42, } - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, parameters=params ) - results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ - "data" - ] + results = read_notebook(output_notebook) assert results["rmse"] == pytest.approx(1.06034, rel=TOL, abs=ABS_TOL) assert results["mae"] == pytest.approx(0.876228, rel=TOL, abs=ABS_TOL) @@ -147,15 +136,13 @@ def test_wide_deep_smoke(notebooks, output_notebook, kernel_name, tmp): @pytest.mark.gpu def test_naml_smoke(notebooks, output_notebook, kernel_name): notebook_path = notebooks["naml_quickstart"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, parameters=dict(epochs=1, seed=42, MIND_type="demo"), ) - results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ - "data" - ] + results = read_notebook(output_notebook) assert results["res_syn"]["group_auc"] == pytest.approx( 0.5801, rel=TOL, abs=ABS_TOL @@ -167,15 +154,13 @@ def test_naml_smoke(notebooks, output_notebook, kernel_name): @pytest.mark.gpu def test_nrms_smoke(notebooks, output_notebook, kernel_name): notebook_path = notebooks["nrms_quickstart"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, parameters=dict(epochs=1, seed=42, MIND_type="demo"), ) - results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ - "data" - ] + results = read_notebook(output_notebook) assert results["res_syn"]["group_auc"] == pytest.approx( 0.5768, rel=TOL, abs=ABS_TOL @@ -187,15 +172,13 @@ def test_nrms_smoke(notebooks, output_notebook, kernel_name): @pytest.mark.gpu def test_npa_smoke(notebooks, output_notebook, kernel_name): notebook_path = notebooks["npa_quickstart"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, parameters=dict(epochs=1, seed=42, MIND_type="demo"), ) - results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ - "data" - ] + results = read_notebook(output_notebook) assert results["res_syn"]["group_auc"] == pytest.approx( 0.5861, rel=TOL, abs=ABS_TOL @@ -207,15 +190,13 @@ def test_npa_smoke(notebooks, output_notebook, kernel_name): @pytest.mark.gpu def test_lstur_smoke(notebooks, output_notebook, kernel_name): notebook_path = notebooks["lstur_quickstart"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, parameters=dict(epochs=1, seed=40, MIND_type="demo"), ) - results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ - "data" - ] + results = read_notebook(output_notebook) assert results["res_syn"]["group_auc"] == pytest.approx( 0.5977, rel=TOL, abs=ABS_TOL @@ -227,15 +208,13 @@ def test_lstur_smoke(notebooks, output_notebook, kernel_name): @pytest.mark.gpu def test_cornac_bivae_smoke(notebooks, output_notebook, kernel_name): notebook_path = notebooks["cornac_bivae_deep_dive"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, parameters=dict(MOVIELENS_DATA_SIZE="100k"), ) - results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ - "data" - ] + results = read_notebook(output_notebook) assert results["map"] == pytest.approx(0.146552, rel=TOL, abs=ABS_TOL) assert results["ndcg"] == pytest.approx(0.474124, rel=TOL, abs=ABS_TOL) diff --git a/tests/smoke/examples/test_notebooks_pyspark.py b/tests/smoke/examples/test_notebooks_pyspark.py index 2e521104a..6a0f02e27 100644 --- a/tests/smoke/examples/test_notebooks_pyspark.py +++ b/tests/smoke/examples/test_notebooks_pyspark.py @@ -4,8 +4,8 @@ import sys import pytest -import papermill as pm -import scrapbook as sb + +from recommenders.utils.notebook_utils import execute_notebook, read_notebook TOL = 0.05 @@ -18,16 +18,14 @@ @pytest.mark.notebooks def test_als_pyspark_smoke(notebooks, output_notebook, kernel_name): notebook_path = notebooks["als_pyspark"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, parameters=dict(TOP_K=10, MOVIELENS_DATA_SIZE="100k"), ) - results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ - "data" - ] + results = read_notebook(output_notebook) assert results["map"] == pytest.approx(0.0052, rel=TOL, abs=ABS_TOL) assert results["ndcg"] == pytest.approx(0.0463, rel=TOL, abs=ABS_TOL) @@ -46,14 +44,12 @@ def test_als_pyspark_smoke(notebooks, output_notebook, kernel_name): @pytest.mark.skipif(sys.platform == "win32", reason="Not implemented on Windows") def test_mmlspark_lightgbm_criteo_smoke(notebooks, output_notebook, kernel_name): notebook_path = notebooks["mmlspark_lightgbm_criteo"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, parameters=dict(DATA_SIZE="sample", NUM_ITERATIONS=50), ) - results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ - "data" - ] + results = read_notebook(output_notebook) assert results["auc"] == pytest.approx(0.65, rel=TOL, abs=ABS_TOL) diff --git a/tests/smoke/examples/test_notebooks_python.py b/tests/smoke/examples/test_notebooks_python.py index 0bd359ce3..a7463d9b1 100644 --- a/tests/smoke/examples/test_notebooks_python.py +++ b/tests/smoke/examples/test_notebooks_python.py @@ -3,8 +3,8 @@ import pytest -import papermill as pm -import scrapbook as sb + +from recommenders.utils.notebook_utils import execute_notebook, read_notebook TOL = 0.05 @@ -14,15 +14,13 @@ @pytest.mark.notebooks def test_sar_single_node_smoke(notebooks, output_notebook, kernel_name): notebook_path = notebooks["sar_single_node"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, parameters=dict(TOP_K=10, MOVIELENS_DATA_SIZE="100k"), ) - results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ - "data" - ] + results = read_notebook(output_notebook) assert results["map"] == pytest.approx(0.110591, rel=TOL, abs=ABS_TOL) assert results["ndcg"] == pytest.approx(0.382461, rel=TOL, abs=ABS_TOL) @@ -33,15 +31,13 @@ def test_sar_single_node_smoke(notebooks, output_notebook, kernel_name): @pytest.mark.notebooks def test_baseline_deep_dive_smoke(notebooks, output_notebook, kernel_name): notebook_path = notebooks["baseline_deep_dive"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, parameters=dict(TOP_K=10, MOVIELENS_DATA_SIZE="100k"), ) - results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ - "data" - ] + results = read_notebook(output_notebook) assert results["rmse"] == pytest.approx(1.054252, rel=TOL, abs=ABS_TOL) assert results["mae"] == pytest.approx(0.846033, rel=TOL, abs=ABS_TOL) @@ -56,15 +52,13 @@ def test_baseline_deep_dive_smoke(notebooks, output_notebook, kernel_name): @pytest.mark.notebooks def test_surprise_svd_smoke(notebooks, output_notebook, kernel_name): notebook_path = notebooks["surprise_svd_deep_dive"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, parameters=dict(MOVIELENS_DATA_SIZE="100k"), ) - results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ - "data" - ] + results = read_notebook(output_notebook) assert results["rmse"] == pytest.approx(0.96, rel=TOL, abs=ABS_TOL) assert results["mae"] == pytest.approx(0.75, rel=TOL, abs=ABS_TOL) @@ -80,15 +74,13 @@ def test_surprise_svd_smoke(notebooks, output_notebook, kernel_name): @pytest.mark.skip(reason="VW pip package has installation incompatibilities") def test_vw_deep_dive_smoke(notebooks, output_notebook, kernel_name): notebook_path = notebooks["vowpal_wabbit_deep_dive"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, parameters=dict(MOVIELENS_DATA_SIZE="100k"), ) - results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ - "data" - ] + results = read_notebook(output_notebook) assert results["rmse"] == pytest.approx(0.985920, rel=TOL, abs=ABS_TOL) assert results["mae"] == pytest.approx(0.71292, rel=TOL, abs=ABS_TOL) @@ -103,7 +95,7 @@ def test_vw_deep_dive_smoke(notebooks, output_notebook, kernel_name): @pytest.mark.notebooks def test_lightgbm_quickstart_smoke(notebooks, output_notebook, kernel_name): notebook_path = notebooks["lightgbm_quickstart"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, @@ -116,9 +108,7 @@ def test_lightgbm_quickstart_smoke(notebooks, output_notebook, kernel_name): METRIC="auc", ), ) - results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ - "data" - ] + results = read_notebook(output_notebook) assert results["res_basic"]["auc"] == pytest.approx(0.7674, rel=TOL, abs=ABS_TOL) assert results["res_basic"]["logloss"] == pytest.approx( @@ -133,15 +123,13 @@ def test_lightgbm_quickstart_smoke(notebooks, output_notebook, kernel_name): @pytest.mark.notebooks def test_cornac_bpr_smoke(notebooks, output_notebook, kernel_name): notebook_path = notebooks["cornac_bpr_deep_dive"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, parameters=dict(MOVIELENS_DATA_SIZE="100k"), ) - results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ - "data" - ] + results = read_notebook(output_notebook) assert results["map"] == pytest.approx(0.1091, rel=TOL, abs=ABS_TOL) assert results["ndcg"] == pytest.approx(0.4034, rel=TOL, abs=ABS_TOL) diff --git a/tests/unit/examples/test_notebooks_gpu.py b/tests/unit/examples/test_notebooks_gpu.py index 45073daf5..a5e9b47ab 100644 --- a/tests/unit/examples/test_notebooks_gpu.py +++ b/tests/unit/examples/test_notebooks_gpu.py @@ -4,9 +4,9 @@ import os import pytest -import papermill as pm from recommenders.utils.gpu_utils import get_number_gpus +from recommenders.utils.notebook_utils import execute_notebook @pytest.mark.notebooks @@ -19,7 +19,7 @@ def test_gpu_vm(): @pytest.mark.gpu def test_fastai(notebooks, output_notebook, kernel_name): notebook_path = notebooks["fastai"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, @@ -31,7 +31,7 @@ def test_fastai(notebooks, output_notebook, kernel_name): @pytest.mark.gpu def test_ncf(notebooks, output_notebook, kernel_name): notebook_path = notebooks["ncf"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, @@ -45,7 +45,7 @@ def test_ncf(notebooks, output_notebook, kernel_name): @pytest.mark.gpu def test_ncf_deep_dive(notebooks, output_notebook, kernel_name): notebook_path = notebooks["ncf_deep_dive"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, @@ -59,7 +59,7 @@ def test_ncf_deep_dive(notebooks, output_notebook, kernel_name): @pytest.mark.gpu def test_xdeepfm(notebooks, output_notebook, kernel_name): notebook_path = notebooks["xdeepfm_quickstart"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, @@ -89,7 +89,7 @@ def test_wide_deep(notebooks, output_notebook, kernel_name, tmp): "RATING_METRICS": ["rmse"], "RANKING_METRICS": ["ndcg_at_k"], } - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, parameters=params ) @@ -106,7 +106,7 @@ def test_wide_deep(notebooks, output_notebook, kernel_name, tmp): "RATING_METRICS": ["rsquared"], "RANKING_METRICS": ["map_at_k"], } - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, parameters=params ) @@ -115,7 +115,7 @@ def test_wide_deep(notebooks, output_notebook, kernel_name, tmp): @pytest.mark.gpu def test_dkn_quickstart(notebooks, output_notebook, kernel_name): notebook_path = notebooks["dkn_quickstart"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, diff --git a/tests/unit/examples/test_notebooks_pyspark.py b/tests/unit/examples/test_notebooks_pyspark.py index 372fe6f23..991095f75 100644 --- a/tests/unit/examples/test_notebooks_pyspark.py +++ b/tests/unit/examples/test_notebooks_pyspark.py @@ -4,13 +4,13 @@ import sys import pytest -import papermill as pm from recommenders.utils.constants import ( DEFAULT_RATING_COL, DEFAULT_USER_COL, DEFAULT_ITEM_COL, ) +from recommenders.utils.notebook_utils import execute_notebook # This is a flaky test that can fail unexpectedly @@ -22,7 +22,7 @@ ) def test_als_pyspark_runs(notebooks, output_notebook, kernel_name): notebook_path = notebooks["als_pyspark"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, @@ -39,7 +39,7 @@ def test_als_pyspark_runs(notebooks, output_notebook, kernel_name): @pytest.mark.spark def test_data_split_runs(notebooks, output_notebook, kernel_name): notebook_path = notebooks["data_split"] - pm.execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name) + execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name) # This is a flaky test that can fail unexpectedly @@ -51,7 +51,7 @@ def test_data_split_runs(notebooks, output_notebook, kernel_name): ) def test_als_deep_dive_runs(notebooks, output_notebook, kernel_name): notebook_path = notebooks["als_deep_dive"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, @@ -73,7 +73,7 @@ def test_als_deep_dive_runs(notebooks, output_notebook, kernel_name): ) def test_evaluation_runs(notebooks, output_notebook, kernel_name): notebook_path = notebooks["evaluation"] - pm.execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name) + execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name) # This is a flaky test that can fail unexpectedly @@ -82,7 +82,7 @@ def test_evaluation_runs(notebooks, output_notebook, kernel_name): @pytest.mark.spark def test_evaluation_diversity_runs(notebooks, output_notebook, kernel_name): notebook_path = notebooks["evaluation_diversity"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, @@ -105,7 +105,7 @@ def test_evaluation_diversity_runs(notebooks, output_notebook, kernel_name): ) def test_spark_tuning(notebooks, output_notebook, kernel_name): notebook_path = notebooks["spark_tuning"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, @@ -125,7 +125,7 @@ def test_spark_tuning(notebooks, output_notebook, kernel_name): @pytest.mark.skipif(sys.platform == "win32", reason="Not implemented on Windows") def test_mmlspark_lightgbm_criteo_runs(notebooks, output_notebook, kernel_name): notebook_path = notebooks["mmlspark_lightgbm_criteo"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, diff --git a/tests/unit/examples/test_notebooks_python.py b/tests/unit/examples/test_notebooks_python.py index ed3d494fd..810721483 100644 --- a/tests/unit/examples/test_notebooks_python.py +++ b/tests/unit/examples/test_notebooks_python.py @@ -4,8 +4,8 @@ import sys import pytest -import papermill as pm -import scrapbook as sb + +from recommenders.utils.notebook_utils import execute_notebook, read_notebook TOL = 0.05 ABS_TOL = 0.05 @@ -14,7 +14,7 @@ @pytest.mark.notebooks def test_template_runs(notebooks, output_notebook, kernel_name): notebook_path = notebooks["template"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, parameters=dict(PM_VERSION=pm.__version__), @@ -30,25 +30,25 @@ def test_template_runs(notebooks, output_notebook, kernel_name): @pytest.mark.notebooks def test_sar_single_node_runs(notebooks, output_notebook, kernel_name): notebook_path = notebooks["sar_single_node"] - pm.execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name) + execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name) @pytest.mark.notebooks def test_sar_deep_dive_runs(notebooks, output_notebook, kernel_name): notebook_path = notebooks["sar_deep_dive"] - pm.execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name) + execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name) @pytest.mark.notebooks def test_baseline_deep_dive_runs(notebooks, output_notebook, kernel_name): notebook_path = notebooks["baseline_deep_dive"] - pm.execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name) + execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name) @pytest.mark.notebooks def test_surprise_deep_dive_runs(notebooks, output_notebook, kernel_name): notebook_path = notebooks["surprise_svd_deep_dive"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, @@ -59,7 +59,7 @@ def test_surprise_deep_dive_runs(notebooks, output_notebook, kernel_name): @pytest.mark.notebooks def test_lightgbm(notebooks, output_notebook, kernel_name): notebook_path = notebooks["lightgbm_quickstart"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, @@ -77,14 +77,14 @@ def test_lightgbm(notebooks, output_notebook, kernel_name): @pytest.mark.notebooks def test_cornac_deep_dive_runs(notebooks, output_notebook, kernel_name): notebook_path = notebooks["cornac_bpr_deep_dive"] - pm.execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name) + execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name) @pytest.mark.notebooks @pytest.mark.experimental def test_rlrmc_quickstart_runs(notebooks, output_notebook, kernel_name): notebook_path = notebooks["rlrmc_quickstart"] - pm.execute_notebook( + execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, @@ -97,4 +97,4 @@ def test_rlrmc_quickstart_runs(notebooks, output_notebook, kernel_name): @pytest.mark.skip(reason="VW pip package has installation incompatibilities") def test_vw_deep_dive_runs(notebooks, output_notebook, kernel_name): notebook_path = notebooks["vowpal_wabbit_deep_dive"] - pm.execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name) + execute_notebook(notebook_path, output_notebook, kernel_name=kernel_name) diff --git a/tests/unit/recommenders/utils/test_notebook_utils.py b/tests/unit/recommenders/utils/test_notebook_utils.py index 755e09267..dc2b9e343 100644 --- a/tests/unit/recommenders/utils/test_notebook_utils.py +++ b/tests/unit/recommenders/utils/test_notebook_utils.py @@ -36,7 +36,7 @@ def test_is_jupyter(notebook_types, output_notebook, kernel_name): assert is_databricks() is False # Test on Jupyter notebook - pm.execute_notebook( + execute_notebook( notebook_types, output_notebook, kernel_name=kernel_name, From 70c068e421e7164624b2db458d2e6e26e26a5ec1 Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Tue, 31 Oct 2023 11:02:41 +0100 Subject: [PATCH 07/22] Replace papermill and scrapbook for new internal function Signed-off-by: miguelgfierro --- .../utils/test_notebook_utils.ipynb | 10 ++++------ .../recommenders/utils/test_notebook_utils.py | 18 +++++++++++------- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/tests/unit/recommenders/utils/test_notebook_utils.ipynb b/tests/unit/recommenders/utils/test_notebook_utils.ipynb index e9d0d72e4..f32ae0102 100644 --- a/tests/unit/recommenders/utils/test_notebook_utils.ipynb +++ b/tests/unit/recommenders/utils/test_notebook_utils.ipynb @@ -22,11 +22,9 @@ "metadata": {}, "outputs": [], "source": [ - "# set the environment path to find Recommenders\n", - "import sys\n", "\n", - "import scrapbook as sb\n", - "from recommenders.utils.notebook_utils import is_jupyter, is_databricks" + "from recommenders.utils.notebook_utils import is_jupyter, is_databricks\n", + "from recommenders.utils.notebook_utils import store_metadata\n" ] }, { @@ -72,8 +70,8 @@ } ], "source": [ - "sb.glue(\"is_jupyter\", is_jupyter())\n", - "sb.glue(\"is_databricks\", is_databricks())" + "store_metadata(\"is_jupyter\", is_jupyter())\n", + "store_metadata(\"is_databricks\", is_databricks())" ] }, { diff --git a/tests/unit/recommenders/utils/test_notebook_utils.py b/tests/unit/recommenders/utils/test_notebook_utils.py index dc2b9e343..225db115a 100644 --- a/tests/unit/recommenders/utils/test_notebook_utils.py +++ b/tests/unit/recommenders/utils/test_notebook_utils.py @@ -1,8 +1,8 @@ # Copyright (c) Recommenders contributors. # Licensed under the MIT License. -import nbclient import pytest +import nbclient import papermill as pm import scrapbook as sb from pathlib import Path @@ -41,12 +41,10 @@ def test_is_jupyter(notebook_types, output_notebook, kernel_name): output_notebook, kernel_name=kernel_name, ) - nb = sb.read_notebook(output_notebook) - df = nb.scraps.dataframe - result_is_jupyter = df.loc[df["name"] == "is_jupyter", "data"].values[0] - assert result_is_jupyter # is True not allowed - result_is_databricks = df.loc[df["name"] == "is_databricks", "data"].values[0] - assert not result_is_databricks + results = read_notebook(output_notebook) + + assert results["is_jupyter"] + assert not results["is_databricks"] @pytest.mark.spark @@ -56,6 +54,7 @@ def test_is_databricks(): pass +@pytest.mark.notebooks def test_notebook_execution_int(notebook_programmatic, output_notebook, kernel_name): execute_notebook( notebook_programmatic, @@ -68,6 +67,7 @@ def test_notebook_execution_int(notebook_programmatic, output_notebook, kernel_n assert results["response1"] == 8 +@pytest.mark.notebooks def test_notebook_execution_float(notebook_programmatic, output_notebook, kernel_name): execute_notebook( notebook_programmatic, @@ -80,6 +80,7 @@ def test_notebook_execution_float(notebook_programmatic, output_notebook, kernel assert results["response1"] == 3.5 +@pytest.mark.notebooks def test_notebook_execution_letter(notebook_programmatic, output_notebook, kernel_name): execute_notebook( notebook_programmatic, @@ -92,6 +93,7 @@ def test_notebook_execution_letter(notebook_programmatic, output_notebook, kerne assert results["response2"] is True +@pytest.mark.notebooks def test_notebook_execution_other_letter( notebook_programmatic, output_notebook, kernel_name ): @@ -106,6 +108,7 @@ def test_notebook_execution_other_letter( assert results["response2"] == "A" +@pytest.mark.notebooks def test_notebook_execution_value_error_fails( notebook_programmatic, output_notebook, kernel_name ): @@ -118,6 +121,7 @@ def test_notebook_execution_value_error_fails( ) +@pytest.mark.notebooks def test_notebook_execution_int_with_comment( notebook_programmatic, output_notebook, kernel_name ): From 54c2278962eb772e52e282e89b9c3a24cb284846 Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Tue, 31 Oct 2023 11:41:55 +0100 Subject: [PATCH 08/22] Update new programmatic execution code Signed-off-by: miguelgfierro --- examples/00_quick_start/als_movielens.ipynb | 12 +- examples/00_quick_start/dkn_MIND.ipynb | 10 +- .../00_quick_start/fastai_movielens.ipynb | 3 +- .../00_quick_start/geoimc_movielens.ipynb | 8 +- .../00_quick_start/lightgbm_tinycriteo.ipynb | 31 +++- examples/00_quick_start/lstur_MIND.ipynb | 11 +- examples/00_quick_start/naml_MIND.ipynb | 11 +- examples/00_quick_start/npa_MIND.ipynb | 11 +- examples/00_quick_start/nrms_MIND.ipynb | 9 +- examples/00_quick_start/rbm_movielens.ipynb | 11 +- examples/00_quick_start/rlrmc_movielens.ipynb | 50 ++---- examples/00_quick_start/sar_movielens.ipynb | 6 +- examples/00_quick_start/sasrec_amazon.ipynb | 18 +- .../sequential_recsys_amazondataset.ipynb | 22 +-- examples/00_quick_start/tfidf_covid.ipynb | 8 +- .../00_quick_start/wide_deep_movielens.ipynb | 2 +- examples/00_quick_start/xdeepfm_criteo.ipynb | 9 +- .../functional/examples/test_notebooks_gpu.py | 163 ++++++++---------- tests/smoke/examples/test_notebooks_python.py | 12 +- 19 files changed, 210 insertions(+), 197 deletions(-) diff --git a/examples/00_quick_start/als_movielens.ipynb b/examples/00_quick_start/als_movielens.ipynb index dc784f996..2e5b4d7f6 100644 --- a/examples/00_quick_start/als_movielens.ipynb +++ b/examples/00_quick_start/als_movielens.ipynb @@ -43,7 +43,9 @@ } ], "source": [ - "# set the environment path to find Recommenders\n", + "import warnings\n", + "warnings.simplefilter(action='ignore', category=FutureWarning)\n", + "\n", "import sys\n", "import pyspark\n", "from pyspark.ml.recommendation import ALS\n", @@ -51,8 +53,6 @@ "from pyspark.sql import SparkSession\n", "from pyspark.sql.types import StructType, StructField\n", "from pyspark.sql.types import StringType, FloatType, IntegerType, LongType\n", - "import warnings\n", - "warnings.simplefilter(action='ignore', category=FutureWarning)\n", "\n", "from recommenders.utils.timer import Timer\n", "from recommenders.datasets import movielens\n", @@ -60,6 +60,7 @@ "from recommenders.datasets.spark_splitters import spark_random_split\n", "from recommenders.evaluation.spark_evaluation import SparkRatingEvaluation, SparkRankingEvaluation\n", "from recommenders.utils.spark_utils import start_or_get_spark\n", + "from recommenders.utils.notebook_utils import store_metadata\n", "\n", "print(\"System version: {}\".format(sys.version))\n", "print(\"Spark version: {}\".format(pyspark.__version__))\n" @@ -768,9 +769,8 @@ } ], "source": [ + "# Record results for tests - ignore this cell\n", "if is_jupyter():\n", - " # Record results with papermill for tests\n", - " import scrapbook as sb\n", " store_metadata(\"map\", rank_eval.map_at_k())\n", " store_metadata(\"ndcg\", rank_eval.ndcg_at_k())\n", " store_metadata(\"precision\", rank_eval.precision_at_k())\n", @@ -815,4 +815,4 @@ }, "nbformat": 4, "nbformat_minor": 1 -} \ No newline at end of file +} diff --git a/examples/00_quick_start/dkn_MIND.ipynb b/examples/00_quick_start/dkn_MIND.ipynb index 6ef941e3a..87e2acdaf 100644 --- a/examples/00_quick_start/dkn_MIND.ipynb +++ b/examples/00_quick_start/dkn_MIND.ipynb @@ -94,6 +94,7 @@ "from recommenders.models.deeprec.deeprec_utils import download_deeprec_resources, prepare_hparams\n", "from recommenders.models.deeprec.models.dkn import DKN\n", "from recommenders.models.deeprec.io.dkn_iterator import DKNTextIterator\n", + "from recommenders.utils.notebook_utils import store_metadata\n", "\n", "print(f\"System version: {sys.version}\")\n", "print(f\"Tensorflow version: {tf.__version__}\")" @@ -345,7 +346,12 @@ "metadata": {}, "outputs": [], "source": [ - "store_metadata(\"res\", res)" + "# Record results for tests - ignore this cell\n", + "store_metadata(\"auc\", res[\"auc\"])\n", + "store_metadata(\"group_auc\", res[\"group_auc\"])\n", + "store_metadata(\"ndcg@5\", res[\"ndcg@5\"])\n", + "store_metadata(\"ndcg@10\", res[\"ndcg@10\"])\n", + "store_metadata(\"mean_mrr\", res[\"mean_mrr\"])\n" ] }, { @@ -395,4 +401,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/examples/00_quick_start/fastai_movielens.ipynb b/examples/00_quick_start/fastai_movielens.ipynb index fd64b1f03..031f8b6be 100644 --- a/examples/00_quick_start/fastai_movielens.ipynb +++ b/examples/00_quick_start/fastai_movielens.ipynb @@ -61,6 +61,7 @@ "from recommenders.models.fastai.fastai_utils import cartesian_product, score\n", "from recommenders.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k\n", "from recommenders.evaluation.python_evaluation import rmse, mae, rsquared, exp_var\n", + "from recommenders.utils.notebook_utils import store_metadata\n", "\n", "print(\"System version: {}\".format(sys.version))\n", "print(\"Pandas version: {}\".format(pd.__version__))\n", @@ -914,7 +915,7 @@ } ], "source": [ - "# Record results with papermill for tests\n", + "# Record results for tests - ignore this cell\n", "store_metadata(\"map\", eval_map)\n", "store_metadata(\"ndcg\", eval_ndcg)\n", "store_metadata(\"precision\", eval_precision)\n", diff --git a/examples/00_quick_start/geoimc_movielens.ipynb b/examples/00_quick_start/geoimc_movielens.ipynb index f8518db12..2bb49fb01 100644 --- a/examples/00_quick_start/geoimc_movielens.ipynb +++ b/examples/00_quick_start/geoimc_movielens.ipynb @@ -31,9 +31,8 @@ "from recommenders.models.geoimc.geoimc_data import ML_100K\n", "from recommenders.models.geoimc.geoimc_algorithm import IMCProblem\n", "from recommenders.models.geoimc.geoimc_predict import Inferer\n", - "from recommenders.evaluation.python_evaluation import (\n", - " rmse, mae\n", - ")" + "from recommenders.evaluation.python_evaluation import rmse, mae\n", + "from recommenders.utils.notebook_utils import store_metadata" ] }, { @@ -287,6 +286,7 @@ "metadata": {}, "outputs": [], "source": [ + "# Record results for tests - ignore this cell\n", "store_metadata(\"rmse\", RMSE)\n", "store_metadata(\"mae\", MAE)" ] @@ -326,4 +326,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/examples/00_quick_start/lightgbm_tinycriteo.ipynb b/examples/00_quick_start/lightgbm_tinycriteo.ipynb index a62e7538a..bd0e4e746 100644 --- a/examples/00_quick_start/lightgbm_tinycriteo.ipynb +++ b/examples/00_quick_start/lightgbm_tinycriteo.ipynb @@ -59,6 +59,7 @@ "\n", "import recommenders.datasets.criteo as criteo\n", "import recommenders.models.lightgbm.lightgbm_utils as lgb_utils\n", + "from recommenders.utils.notebook_utils import store_metadata\n", "\n", "print(\"System version: {}\".format(sys.version))\n", "print(\"LightGBM version: {}\".format(lgb.__version__))" @@ -719,8 +720,18 @@ "auc = roc_auc_score(np.asarray(test_y.reshape(-1)), np.asarray(test_preds))\n", "logloss = log_loss(np.asarray(test_y.reshape(-1)), np.asarray(test_preds), eps=1e-12)\n", "res_basic = {\"auc\": auc, \"logloss\": logloss}\n", - "print(res_basic)\n", - "sb.glue(\"res_basic\", res_basic)" + "print(res_basic)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Record results for tests - ignore this cell\n", + "store_metadata(\"auc_basic\", res_basic[\"auc\"])\n", + "store_metadata(\"logloss_basic\", res_basic[\"logloss\"])" ] }, { @@ -897,8 +908,18 @@ "logloss = log_loss(np.asarray(test_y.reshape(-1)), np.asarray(test_preds), eps=1e-12)\n", "res_optim = {\"auc\": auc, \"logloss\": logloss}\n", "\n", - "print(res_optim)\n", - "sb.glue(\"res_optim\", res_optim)" + "print(res_optim)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Record results for tests - ignore this cell\n", + "store_metadata(\"auc_opt\", res_optim[\"auc\"])\n", + "store_metadata(\"logloss_opt\", res_optim[\"logloss\"])" ] }, { @@ -979,4 +1000,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/examples/00_quick_start/lstur_MIND.ipynb b/examples/00_quick_start/lstur_MIND.ipynb index daf17b9ee..808d03e40 100644 --- a/examples/00_quick_start/lstur_MIND.ipynb +++ b/examples/00_quick_start/lstur_MIND.ipynb @@ -109,6 +109,7 @@ "from recommenders.models.newsrec.models.lstur import LSTURModel\n", "from recommenders.models.newsrec.io.mind_iterator import MINDIterator\n", "from recommenders.models.newsrec.newsrec_utils import get_mind_data_set\n", + "from recommenders.utils.notebook_utils import store_metadata\n", "\n", "print(\"System version: {}\".format(sys.version))\n", "print(\"Tensorflow version: {}\".format(tf.__version__))\n" @@ -136,7 +137,7 @@ "batch_size = 32\n", "\n", "# Options: demo, small, large\n", - "MIND_type = 'demo'" + "MIND_type = \"demo\"" ] }, { @@ -440,7 +441,11 @@ "metadata": {}, "outputs": [], "source": [ - "sb.glue(\"res_syn\", res_syn)" + "# Record results for tests - ignore this cell\n", + "store_metadata(\"group_auc\", res_syn['group_auc'])\n", + "store_metadata(\"mean_mrr\", res_syn['mean_mrr'])\n", + "store_metadata(\"ndcg@5\", res_syn['ndcg@5'])\n", + "store_metadata(\"ndcg@10\", res_syn['ndcg@10'])" ] }, { @@ -559,4 +564,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/examples/00_quick_start/naml_MIND.ipynb b/examples/00_quick_start/naml_MIND.ipynb index 741803a68..42ed0aaf9 100644 --- a/examples/00_quick_start/naml_MIND.ipynb +++ b/examples/00_quick_start/naml_MIND.ipynb @@ -94,8 +94,8 @@ } ], "source": [ - "import sys\n", "import os\n", + "import sys\n", "import numpy as np\n", "import zipfile\n", "from tqdm import tqdm\n", @@ -109,6 +109,7 @@ "from recommenders.models.newsrec.models.naml import NAMLModel\n", "from recommenders.models.newsrec.io.mind_all_iterator import MINDAllIterator\n", "from recommenders.models.newsrec.newsrec_utils import get_mind_data_set\n", + "from recommenders.utils.notebook_utils import store_metadata\n", "\n", "print(\"System version: {}\".format(sys.version))\n", "print(\"Tensorflow version: {}\".format(tf.__version__))\n" @@ -437,7 +438,11 @@ "metadata": {}, "outputs": [], "source": [ - "sb.glue(\"res_syn\", res_syn)" + "# Record results for tests - ignore this cell\n", + "store_metadata(\"group_auc\", res_syn['group_auc'])\n", + "store_metadata(\"mean_mrr\", res_syn['mean_mrr'])\n", + "store_metadata(\"ndcg@5\", res_syn['ndcg@5'])\n", + "store_metadata(\"ndcg@10\", res_syn['ndcg@10'])" ] }, { @@ -556,4 +561,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/examples/00_quick_start/npa_MIND.ipynb b/examples/00_quick_start/npa_MIND.ipynb index cd68bcdd0..a8cfd7882 100644 --- a/examples/00_quick_start/npa_MIND.ipynb +++ b/examples/00_quick_start/npa_MIND.ipynb @@ -94,8 +94,8 @@ } ], "source": [ - "import sys\n", "import os\n", + "import sys\n", "import numpy as np\n", "import zipfile\n", "from tqdm import tqdm\n", @@ -109,6 +109,7 @@ "from recommenders.models.newsrec.models.npa import NPAModel\n", "from recommenders.models.newsrec.io.mind_iterator import MINDIterator\n", "from recommenders.models.newsrec.newsrec_utils import get_mind_data_set\n", + "from recommenders.utils.notebook_utils import store_metadata\n", "\n", "print(\"System version: {}\".format(sys.version))\n", "print(\"Tensorflow version: {}\".format(tf.__version__))" @@ -417,7 +418,11 @@ "metadata": {}, "outputs": [], "source": [ - "sb.glue(\"res_syn\", res_syn)" + "# Record results for tests - ignore this cell\n", + "store_metadata(\"group_auc\", res_syn['group_auc'])\n", + "store_metadata(\"mean_mrr\", res_syn['mean_mrr'])\n", + "store_metadata(\"ndcg@5\", res_syn['ndcg@5'])\n", + "store_metadata(\"ndcg@10\", res_syn['ndcg@10'])" ] }, { @@ -534,4 +539,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/examples/00_quick_start/nrms_MIND.ipynb b/examples/00_quick_start/nrms_MIND.ipynb index d9a7cc93a..d66564c0d 100644 --- a/examples/00_quick_start/nrms_MIND.ipynb +++ b/examples/00_quick_start/nrms_MIND.ipynb @@ -94,8 +94,8 @@ } ], "source": [ - "import sys\n", "import os\n", + "import sys\n", "import numpy as np\n", "import zipfile\n", "from tqdm import tqdm\n", @@ -109,6 +109,7 @@ "from recommenders.models.newsrec.models.nrms import NRMSModel\n", "from recommenders.models.newsrec.io.mind_iterator import MINDIterator\n", "from recommenders.models.newsrec.newsrec_utils import get_mind_data_set\n", + "from recommenders.utils.notebook_utils import store_metadata\n", "\n", "print(\"System version: {}\".format(sys.version))\n", "print(\"Tensorflow version: {}\".format(tf.__version__))\n" @@ -434,7 +435,11 @@ "metadata": {}, "outputs": [], "source": [ - "sb.glue(\"res_syn\", res_syn)" + "# Record results for tests - ignore this cell\n", + "store_metadata(\"group_auc\", res_syn['group_auc'])\n", + "store_metadata(\"mean_mrr\", res_syn['mean_mrr'])\n", + "store_metadata(\"ndcg@5\", res_syn['ndcg@5'])\n", + "store_metadata(\"ndcg@10\", res_syn['ndcg@10'])" ] }, { diff --git a/examples/00_quick_start/rbm_movielens.ipynb b/examples/00_quick_start/rbm_movielens.ipynb index 70df21ac3..17e5e2cbe 100644 --- a/examples/00_quick_start/rbm_movielens.ipynb +++ b/examples/00_quick_start/rbm_movielens.ipynb @@ -65,9 +65,7 @@ } ], "source": [ - "# set the environment path to find Recommenders\n", "import sys\n", - "\n", "import pandas as pd\n", "import numpy as np\n", "import scrapbook as sb\n", @@ -81,15 +79,16 @@ "from recommenders.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k\n", "from recommenders.utils.timer import Timer\n", "from recommenders.utils.plot import line_graph\n", + "from recommenders.utils.notebook_utils import store_metadata\n", "\n", "#For interactive mode only\n", "%load_ext autoreload\n", "%autoreload 2\n", "%matplotlib inline\n", "\n", - "print(\"System version: {}\".format(sys.version))\n", - "print(\"Pandas version: {}\".format(pd.__version__))\n", - "print(\"Tensorflow version: {}\".format(tf.__version__))" + "print(f\"System version: {sys.version}\")\n", + "print(f\"Pandas version: {pd.__version__}\")\n", + "print(f\"Tensorflow version: {tf.__version__})" ] }, { @@ -778,7 +777,7 @@ } ], "source": [ - "# Record results with papermill for tests\n", + "# Record results for tests - ignore this cell\n", "store_metadata(\"map\", eval_100k['MAP'][0])\n", "store_metadata(\"ndcg\", eval_100k['nDCG@k'][0])\n", "store_metadata(\"precision\", eval_100k['Precision@k'][0])\n", diff --git a/examples/00_quick_start/rlrmc_movielens.ipynb b/examples/00_quick_start/rlrmc_movielens.ipynb index ed78974d8..6ec3e6a86 100644 --- a/examples/00_quick_start/rlrmc_movielens.ipynb +++ b/examples/00_quick_start/rlrmc_movielens.ipynb @@ -22,34 +22,6 @@ "This notebook provides an example of how to utilize and evaluate RLRMC implementation in **recommenders**." ] }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "import sys\n", - "import time\n", - "import pandas as pd\n", - "\n", - "from recommenders.datasets.python_splitters import python_random_split\n", - "from recommenders.datasets.python_splitters import python_stratified_split\n", - "from recommenders.datasets import movielens\n", - "from recommenders.models.rlrmc.RLRMCdataset import RLRMCdataset \n", - "from recommenders.models.rlrmc.RLRMCalgorithm import RLRMCalgorithm \n", - "# Pymanopt installation is required via\n", - "# pip install pymanopt \n", - "from recommenders.evaluation.python_evaluation import (\n", - " rmse, mae\n", - ")\n", - "\n", - "# import logging\n", - "\n", - "# %load_ext autoreload\n", - "# %autoreload 2" - ] - }, { "cell_type": "code", "execution_count": 2, @@ -66,8 +38,24 @@ } ], "source": [ - "print(\"Pandas version: {}\".format(pd.__version__))\n", - "print(\"System version: {}\".format(sys.version))\n" + "import sys\n", + "import time\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "from recommenders.datasets.python_splitters import python_random_split\n", + "from recommenders.datasets.python_splitters import python_stratified_split\n", + "from recommenders.datasets import movielens\n", + "from recommenders.models.rlrmc.RLRMCdataset import RLRMCdataset \n", + "from recommenders.models.rlrmc.RLRMCalgorithm import RLRMCalgorithm \n", + "from recommenders.evaluation.python_evaluation import rmse, mae\n", + "from recommenders.utils.notebook_utils import store_metadata\n", + "\n", + "print(f\"Pandas version: {pd.__version__}\")\n", + "print(f\"System version: {sys.version}\")\n", + "\n", + "%load_ext autoreload\n", + "%autoreload 2" ] }, { @@ -331,4 +319,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/examples/00_quick_start/sar_movielens.ipynb b/examples/00_quick_start/sar_movielens.ipynb index 9fe09c6ad..09243e6fd 100644 --- a/examples/00_quick_start/sar_movielens.ipynb +++ b/examples/00_quick_start/sar_movielens.ipynb @@ -60,7 +60,6 @@ "import logging\n", "import numpy as np\n", "import pandas as pd\n", - "import scrapbook as sb\n", "from sklearn.preprocessing import minmax_scale\n", "\n", "from recommenders.utils.timer import Timer\n", @@ -79,6 +78,7 @@ " rsquared,\n", " exp_var\n", ")\n", + "from recommenders.utils.notebook_utils import store_metadata\n", "\n", "%load_ext autoreload\n", "%autoreload 2\n", @@ -772,7 +772,7 @@ }, "outputs": [], "source": [ - "# Record results with papermill for tests - ignore this cell\n", + "# Record results for tests - ignore this cell\n", "store_metadata(\"map\", eval_map)\n", "store_metadata(\"ndcg\", eval_ndcg)\n", "store_metadata(\"precision\", eval_precision)\n", @@ -804,4 +804,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/examples/00_quick_start/sasrec_amazon.ipynb b/examples/00_quick_start/sasrec_amazon.ipynb index 164fb365f..2e44fbe25 100644 --- a/examples/00_quick_start/sasrec_amazon.ipynb +++ b/examples/00_quick_start/sasrec_amazon.ipynb @@ -60,11 +60,9 @@ "import re\n", "import sys\n", "import os\n", - "import scrapbook as sb\n", "from tempfile import TemporaryDirectory\n", "import numpy as np\n", "import pandas as pd \n", - "\n", "from collections import defaultdict\n", "import tensorflow as tf\n", "tf.get_logger().setLevel('ERROR') # only show error messages\n", @@ -72,17 +70,13 @@ "from recommenders.utils.timer import Timer\n", "from recommenders.datasets.amazon_reviews import get_review_data\n", "from recommenders.datasets.split_utils import filter_k_core\n", - "\n", - "# Transformer Based Models\n", "from recommenders.models.sasrec.model import SASREC\n", "from recommenders.models.sasrec.ssept import SSEPT\n", - "\n", - "# Sampler for sequential prediction\n", "from recommenders.models.sasrec.sampler import WarpSampler\n", "from recommenders.models.sasrec.util import SASRecDataSet\n", "\n", - "print(\"System version: {}\".format(sys.version))\n", - "print(\"Tensorflow version: {}\".format(tf.__version__))" + "print(f\"System version: {sys.version}\")\n", + "print(f\"Tensorflow version: {tf.__version__}\")" ] }, { @@ -438,11 +432,9 @@ } ], "source": [ - "# Record results with papermill for tests - ignore this cell\n", - "# sb.glue(\"res_syn\", res_syn)\n", - "\n", - "sb.glue(\"ndcg@10\", t_test[0])\n", - "sb.glue(\"Hit@10\", t_test[1])" + "# Record results for tests - ignore this cell\n", + "store_metadata(\"ndcg@10\", t_test[0])\n", + "store_metadata(\"Hit@10\", t_test[1])" ] }, { diff --git a/examples/00_quick_start/sequential_recsys_amazondataset.ipynb b/examples/00_quick_start/sequential_recsys_amazondataset.ipynb index 8d22ba461..36efe26ea 100644 --- a/examples/00_quick_start/sequential_recsys_amazondataset.ipynb +++ b/examples/00_quick_start/sequential_recsys_amazondataset.ipynb @@ -61,11 +61,9 @@ } ], "source": [ - "import sys\n", "import os\n", + "import sys\n", "import logging\n", - "import papermill as pm\n", - "import scrapbook as sb\n", "from tempfile import TemporaryDirectory\n", "import numpy as np\n", "import tensorflow.compat.v1 as tf\n", @@ -78,22 +76,19 @@ ")\n", "from recommenders.datasets.amazon_reviews import download_and_extract, data_preprocessing\n", "from recommenders.datasets.download_utils import maybe_download\n", - "\n", - "\n", "from recommenders.models.deeprec.models.sequential.sli_rec import SLI_RECModel as SeqModel\n", "#### to use the other model, use one of the following lines:\n", "# from recommenders.models.deeprec.models.sequential.asvd import A2SVDModel as SeqModel\n", "# from recommenders.models.deeprec.models.sequential.caser import CaserModel as SeqModel\n", "# from recommenders.models.deeprec.models.sequential.gru import GRUModel as SeqModel\n", "# from recommenders.models.deeprec.models.sequential.sum import SUMModel as SeqModel\n", - "\n", "#from recommenders.models.deeprec.models.sequential.nextitnet import NextItNetModel\n", - "\n", "from recommenders.models.deeprec.io.sequential_iterator import SequentialIterator\n", "#from recommenders.models.deeprec.io.nextitnet_iterator import NextItNetIterator\n", + "from recommenders.utils.notebook_utils import store_metadata\n", "\n", - "print(\"System version: {}\".format(sys.version))\n", - "print(\"Tensorflow version: {}\".format(tf.__version__))\n", + "print(f\"System version: {sys.version}\")\n", + "print(f\"Tensorflow version: {tf.__version__}\")\n", "\n" ] }, @@ -402,7 +397,14 @@ "metadata": {}, "outputs": [], "source": [ - "sb.glue(\"res_syn\", res_syn)" + "# Record results for tests - ignore this cell\n", + "store_metadata(\"auc\", res_syn[\"auc\"])\n", + "store_metadata(\"logloss\", res_syn[\"logloss\"])\n", + "store_metadata(\"mean_mrr\", res_syn[\"mean_mrr\"])\n", + "store_metadata(\"ndcg@2\", res_syn[\"ndcg@2\"])\n", + "store_metadata(\"ndcg@4\", res_syn[\"ndcg@4\"])\n", + "store_metadata(\"ndcg@6\", res_syn[\"ndcg@6\"])\n", + "store_metadata(\"group_auc\", res_syn[\"group_auc\"])\n" ] }, { diff --git a/examples/00_quick_start/tfidf_covid.ipynb b/examples/00_quick_start/tfidf_covid.ipynb index 5193d5776..35ee16285 100644 --- a/examples/00_quick_start/tfidf_covid.ipynb +++ b/examples/00_quick_start/tfidf_covid.ipynb @@ -37,12 +37,12 @@ ], "source": [ "import sys\n", - "# Import functions\n", + "\n", "from recommenders.datasets import covid_utils\n", "from recommenders.models.tfidf.tfidf_utils import TfidfRecommender\n", "\n", "# Print version\n", - "print(\"System version: {}\".format(sys.version))" + "print(f\"System version: {sys.version}\")" ] }, { @@ -102,7 +102,7 @@ }, { "data": { - "image/png": "\n", + "image/png": "", "text/plain": [ "
" ] @@ -1229,4 +1229,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/examples/00_quick_start/wide_deep_movielens.ipynb b/examples/00_quick_start/wide_deep_movielens.ipynb index bacac0122..119528dc9 100644 --- a/examples/00_quick_start/wide_deep_movielens.ipynb +++ b/examples/00_quick_start/wide_deep_movielens.ipynb @@ -62,7 +62,6 @@ "import itertools\n", "import numpy as np\n", "import pandas as pd\n", - "import scrapbook as sb\n", "import sklearn.preprocessing\n", "from tempfile import TemporaryDirectory\n", "import tensorflow as tf\n", @@ -82,6 +81,7 @@ "from recommenders.datasets.python_splitters import python_random_split\n", "import recommenders.evaluation.python_evaluation as evaluator\n", "import recommenders.models.wide_deep.wide_deep_utils as wide_deep\n", + "from recommenders.utils.notebook_utils import store_metadata\n", "\n", "print(f\"System version: {sys.version}\")\n", "print(f\"Tensorflow version: {tf.__version__}\")\n", diff --git a/examples/00_quick_start/xdeepfm_criteo.ipynb b/examples/00_quick_start/xdeepfm_criteo.ipynb index 9479ab99d..14ac0d923 100644 --- a/examples/00_quick_start/xdeepfm_criteo.ipynb +++ b/examples/00_quick_start/xdeepfm_criteo.ipynb @@ -48,7 +48,6 @@ "source": [ "import os\n", "import sys\n", - "import scrapbook as sb\n", "from tempfile import TemporaryDirectory\n", "import tensorflow as tf\n", "tf.get_logger().setLevel('ERROR') # only show error messages\n", @@ -56,9 +55,10 @@ "from recommenders.models.deeprec.deeprec_utils import download_deeprec_resources, prepare_hparams\n", "from recommenders.models.deeprec.models.xDeepFM import XDeepFMModel\n", "from recommenders.models.deeprec.io.iterator import FFMTextIterator\n", + "from recommenders.utils.notebook_utils import store_metadata\n", "\n", - "print(\"System version: {}\".format(sys.version))\n", - "print(\"Tensorflow version: {}\".format(tf.__version__))" + "print(f\"System version: {sys.version}\")\n", + "print(f\"Tensorflow version: {tf.__version__}\"))" ] }, { @@ -331,6 +331,7 @@ } ], "source": [ + "# Record results for tests - ignore this cell\n", "store_metadata(\"auc\", result[\"auc\"])\n", "store_metadata(\"logloss\", result[\"logloss\"])" ] @@ -379,4 +380,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/tests/functional/examples/test_notebooks_gpu.py b/tests/functional/examples/test_notebooks_gpu.py index fbfde453a..50e3f3bdb 100644 --- a/tests/functional/examples/test_notebooks_gpu.py +++ b/tests/functional/examples/test_notebooks_gpu.py @@ -247,7 +247,7 @@ def test_wide_deep_functional( os.path.join("tests", "resources", "deeprec", "slirec"), 10, 400, - {"res_syn": {"auc": 0.7183, "logloss": 0.6045}}, + {"auc": 0.7183, "logloss": 0.6045}, 42, ) ], @@ -277,13 +277,11 @@ def test_slirec_quickstart_functional( ) results = read_notebook(output_notebook) - for key, value in expected_values.items(): - assert results[key]["auc"] == pytest.approx(value["auc"], rel=TOL, abs=ABS_TOL) - - ## disable logloss check, because so far SLi-Rec uses ranking loss, not a point-wise loss - # assert results[key]["logloss"] == pytest.approx( - # value["logloss"], rel=TOL, abs=ABS_TOL - # ) + assert results["auc"] == pytest.approx(expected_values["auc"], rel=TOL, abs=ABS_TOL) + ## disable logloss check, because so far SLi-Rec uses ranking loss, not a point-wise loss + # assert results["logloss"] == pytest.approx( + # expected_values["logloss"], rel=TOL, abs=ABS_TOL + # ) @pytest.mark.gpu @@ -297,12 +295,10 @@ def test_slirec_quickstart_functional( 42, "demo", { - "res_syn": { - "group_auc": 0.6217, - "mean_mrr": 0.2783, - "ndcg@5": 0.3024, - "ndcg@10": 0.3719, - } + "group_auc": 0.6217, + "mean_mrr": 0.2783, + "ndcg@5": 0.3024, + "ndcg@10": 0.3719, }, ) ], @@ -330,19 +326,18 @@ def test_nrms_quickstart_functional( ) results = read_notebook(output_notebook) - for key, value in expected_values.items(): - assert results[key]["group_auc"] == pytest.approx( - value["group_auc"], rel=TOL, abs=ABS_TOL - ) - assert results[key]["mean_mrr"] == pytest.approx( - value["mean_mrr"], rel=TOL, abs=ABS_TOL - ) - assert results[key]["ndcg@5"] == pytest.approx( - value["ndcg@5"], rel=TOL, abs=ABS_TOL - ) - assert results[key]["ndcg@10"] == pytest.approx( - value["ndcg@10"], rel=TOL, abs=ABS_TOL - ) + assert results["group_auc"] == pytest.approx( + expected_values["group_auc"], rel=TOL, abs=ABS_TOL + ) + assert results["mean_mrr"] == pytest.approx( + expected_values["mean_mrr"], rel=TOL, abs=ABS_TOL + ) + assert results["ndcg@5"] == pytest.approx( + expected_values["ndcg@5"], rel=TOL, abs=ABS_TOL + ) + assert results["ndcg@10"] == pytest.approx( + expected_values["ndcg@10"], rel=TOL, abs=ABS_TOL + ) @pytest.mark.gpu @@ -356,12 +351,10 @@ def test_nrms_quickstart_functional( 42, "demo", { - "res_syn": { - "group_auc": 0.6436, - "mean_mrr": 0.2990, - "ndcg@5": 0.3297, - "ndcg@10": 0.3933, - } + "group_auc": 0.6436, + "mean_mrr": 0.2990, + "ndcg@5": 0.3297, + "ndcg@10": 0.3933, }, ) ], @@ -389,19 +382,18 @@ def test_naml_quickstart_functional( ) results = read_notebook(output_notebook) - for key, value in expected_values.items(): - assert results[key]["group_auc"] == pytest.approx( - value["group_auc"], rel=TOL, abs=ABS_TOL - ) - assert results[key]["mean_mrr"] == pytest.approx( - value["mean_mrr"], rel=TOL, abs=ABS_TOL - ) - assert results[key]["ndcg@5"] == pytest.approx( - value["ndcg@5"], rel=TOL, abs=ABS_TOL - ) - assert results[key]["ndcg@10"] == pytest.approx( - value["ndcg@10"], rel=TOL, abs=ABS_TOL - ) + assert results["group_auc"] == pytest.approx( + expected_values["group_auc"], rel=TOL, abs=ABS_TOL + ) + assert results["mean_mrr"] == pytest.approx( + expected_values["mean_mrr"], rel=TOL, abs=ABS_TOL + ) + assert results["ndcg@5"] == pytest.approx( + expected_values["ndcg@5"], rel=TOL, abs=ABS_TOL + ) + assert results["ndcg@10"] == pytest.approx( + expected_values["ndcg@10"], rel=TOL, abs=ABS_TOL + ) @pytest.mark.gpu @@ -415,12 +407,10 @@ def test_naml_quickstart_functional( 42, "demo", { - "res_syn": { - "group_auc": 0.6444, - "mean_mrr": 0.2983, - "ndcg@5": 0.3287, - "ndcg@10": 0.3938, - } + "group_auc": 0.6444, + "mean_mrr": 0.2983, + "ndcg@5": 0.3287, + "ndcg@10": 0.3938, }, ) ], @@ -448,19 +438,18 @@ def test_lstur_quickstart_functional( ) results = read_notebook(output_notebook) - for key, value in expected_values.items(): - assert results[key]["group_auc"] == pytest.approx( - value["group_auc"], rel=TOL, abs=ABS_TOL - ) - assert results[key]["mean_mrr"] == pytest.approx( - value["mean_mrr"], rel=TOL, abs=ABS_TOL - ) - assert results[key]["ndcg@5"] == pytest.approx( - value["ndcg@5"], rel=TOL, abs=ABS_TOL - ) - assert results[key]["ndcg@10"] == pytest.approx( - value["ndcg@10"], rel=TOL, abs=ABS_TOL - ) + assert results["group_auc"] == pytest.approx( + expected_values["group_auc"], rel=TOL, abs=ABS_TOL + ) + assert results["mean_mrr"] == pytest.approx( + expected_values["mean_mrr"], rel=TOL, abs=ABS_TOL + ) + assert expected_values["ndcg@5"] == pytest.approx( + value["ndcg@5"], rel=TOL, abs=ABS_TOL + ) + assert expected_values["ndcg@10"] == pytest.approx( + value["ndcg@10"], rel=TOL, abs=ABS_TOL + ) @pytest.mark.gpu @@ -474,12 +463,10 @@ def test_lstur_quickstart_functional( 42, "demo", { - "res_syn": { - "group_auc": 0.6035, - "mean_mrr": 0.2765, - "ndcg@5": 0.2977, - "ndcg@10": 0.3637, - } + "group_auc": 0.6035, + "mean_mrr": 0.2765, + "ndcg@5": 0.2977, + "ndcg@10": 0.3637, }, ) ], @@ -507,19 +494,18 @@ def test_npa_quickstart_functional( ) results = read_notebook(output_notebook) - for key, value in expected_values.items(): - assert results[key]["group_auc"] == pytest.approx( - value["group_auc"], rel=TOL, abs=ABS_TOL - ) - assert results[key]["mean_mrr"] == pytest.approx( - value["mean_mrr"], rel=TOL, abs=ABS_TOL - ) - assert results[key]["ndcg@5"] == pytest.approx( - value["ndcg@5"], rel=TOL, abs=ABS_TOL - ) - assert results[key]["ndcg@10"] == pytest.approx( - value["ndcg@10"], rel=TOL, abs=ABS_TOL - ) + assert results["group_auc"] == pytest.approx( + expected_values["group_auc"], rel=TOL, abs=ABS_TOL + ) + assert results["mean_mrr"] == pytest.approx( + expected_values["mean_mrr"], rel=TOL, abs=ABS_TOL + ) + assert results["ndcg@5"] == pytest.approx( + expected_values["ndcg@5"], rel=TOL, abs=ABS_TOL + ) + assert results["ndcg@10"] == pytest.approx( + expected_values["ndcg@10"], rel=TOL, abs=ABS_TOL + ) @pytest.mark.gpu @@ -589,10 +575,10 @@ def test_dkn_quickstart_functional(notebooks, output_notebook, kernel_name): ) results = read_notebook(output_notebook) - assert results["res"]["auc"] == pytest.approx(0.5651, rel=TOL, abs=ABS_TOL) - assert results["res"]["mean_mrr"] == pytest.approx(0.1639, rel=TOL, abs=ABS_TOL) - assert results["res"]["ndcg@5"] == pytest.approx(0.1735, rel=TOL, abs=ABS_TOL) - assert results["res"]["ndcg@10"] == pytest.approx(0.2301, rel=TOL, abs=ABS_TOL) + assert results["auc"] == pytest.approx(0.5651, rel=TOL, abs=ABS_TOL) + assert results["mean_mrr"] == pytest.approx(0.1639, rel=TOL, abs=ABS_TOL) + assert results["ndcg@5"] == pytest.approx(0.1735, rel=TOL, abs=ABS_TOL) + assert results["ndcg@10"] == pytest.approx(0.2301, rel=TOL, abs=ABS_TOL) @pytest.mark.gpu @@ -697,6 +683,7 @@ def test_benchmark_movielens_gpu( parameters=dict(data_sizes=size, algorithms=algos), ) results = read_notebook(output_notebook) + assert len(results["results"]) == 4 for i, value in enumerate(results["results"]): assert results["results"][i] == pytest.approx(value, rel=TOL, abs=ABS_TOL) diff --git a/tests/smoke/examples/test_notebooks_python.py b/tests/smoke/examples/test_notebooks_python.py index a7463d9b1..c7c6df3aa 100644 --- a/tests/smoke/examples/test_notebooks_python.py +++ b/tests/smoke/examples/test_notebooks_python.py @@ -110,14 +110,10 @@ def test_lightgbm_quickstart_smoke(notebooks, output_notebook, kernel_name): ) results = read_notebook(output_notebook) - assert results["res_basic"]["auc"] == pytest.approx(0.7674, rel=TOL, abs=ABS_TOL) - assert results["res_basic"]["logloss"] == pytest.approx( - 0.4669, rel=TOL, abs=ABS_TOL - ) - assert results["res_optim"]["auc"] == pytest.approx(0.7757, rel=TOL, abs=ABS_TOL) - assert results["res_optim"]["logloss"] == pytest.approx( - 0.4607, rel=TOL, abs=ABS_TOL - ) + assert results["auc_basic"] == pytest.approx(0.7674, rel=TOL, abs=ABS_TOL) + assert results["logloss_basic"] == pytest.approx(0.4669, rel=TOL, abs=ABS_TOL) + assert results["auc_opt"] == pytest.approx(0.7757, rel=TOL, abs=ABS_TOL) + assert results["logloss_opt"] == pytest.approx(0.4607, rel=TOL, abs=ABS_TOL) @pytest.mark.notebooks From 055e5f059cd77e7fbc6a414bb63eef171723e8bc Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Tue, 31 Oct 2023 12:21:56 +0100 Subject: [PATCH 09/22] Update new programmatic execution code Signed-off-by: miguelgfierro --- examples/00_quick_start/als_movielens.ipynb | 2 +- .../als_deep_dive.ipynb | 32 +- .../baseline_deep_dive.ipynb | 37 +- .../cornac_bivae_deep_dive.ipynb | 23 +- .../cornac_bpr_deep_dive.ipynb | 14 +- .../lightgcn_deep_dive.ipynb | 12 +- .../multi_vae_deep_dive.ipynb | 15 +- .../ncf_deep_dive.ipynb | 2 +- .../rbm_deep_dive.ipynb | 12 +- .../sar_deep_dive.ipynb | 5 +- .../surprise_svd_deep_dive.ipynb | 94 +- .../dkn_deep_dive.ipynb | 16 +- .../mmlspark_lightgbm_criteo.ipynb | 13 +- .../vowpal_wabbit_deep_dive.ipynb | 50 +- examples/02_model_hybrid/fm_deep_dive.ipynb | 1094 ++--------------- .../02_model_hybrid/lightfm_deep_dive.ipynb | 40 +- .../als_movielens_diversity_metrics.ipynb | 27 +- .../azureml_hyperdrive_surprise_svd.ipynb | 3 +- .../azureml_hyperdrive_wide_and_deep.ipynb | 3 +- .../nni_ncf.ipynb | 3 +- .../nni_surprise_svd.ipynb | 3 +- .../tuning_spark_als.ipynb | 518 ++++---- examples/06_benchmarks/movielens.ipynb | 19 +- 23 files changed, 556 insertions(+), 1481 deletions(-) diff --git a/examples/00_quick_start/als_movielens.ipynb b/examples/00_quick_start/als_movielens.ipynb index 2e5b4d7f6..e485a216d 100644 --- a/examples/00_quick_start/als_movielens.ipynb +++ b/examples/00_quick_start/als_movielens.ipynb @@ -62,7 +62,7 @@ "from recommenders.utils.spark_utils import start_or_get_spark\n", "from recommenders.utils.notebook_utils import store_metadata\n", "\n", - "print(\"System version: {}\".format(sys.version))\n", + "print(f\"System version: {sys.version}\")\n", "print(\"Spark version: {}\".format(pyspark.__version__))\n" ] }, diff --git a/examples/02_model_collaborative_filtering/als_deep_dive.ipynb b/examples/02_model_collaborative_filtering/als_deep_dive.ipynb index 81d6f81d7..dceff16a6 100644 --- a/examples/02_model_collaborative_filtering/als_deep_dive.ipynb +++ b/examples/02_model_collaborative_filtering/als_deep_dive.ipynb @@ -102,16 +102,14 @@ } ], "source": [ - "# set the environment path to find Recommenders\n", + "import warnings\n", + "warnings.simplefilter(action='ignore', category=FutureWarning)\n", + "\n", "import sys\n", - "import pandas as pd\n", "from matplotlib import pyplot as plt\n", "import numpy as np\n", - "import seaborn as sns\n", - "import sys\n", "import pandas as pd\n", - "import warnings\n", - "warnings.simplefilter(action='ignore', category=FutureWarning)\n", + "import seaborn as sns\n", "\n", "import pyspark\n", "from pyspark.sql import SparkSession\n", @@ -128,9 +126,9 @@ "from recommenders.tuning.parameter_sweep import generate_param_grid\n", "from recommenders.datasets.spark_splitters import spark_random_split\n", "\n", - "print(\"System version: {}\".format(sys.version))\n", - "print(\"Pandas version: {}\".format(pd.__version__))\n", - "print(\"PySpark version: {}\".format(pyspark.__version__))" + "print(f\"System version: {sys.version}\")\n", + "print(f\"Pandas version: {pd.__version__}\")\n", + "print(f\"PySpark version: {pyspark.__version__}\")" ] }, { @@ -458,9 +456,9 @@ "name": "stderr", "output_type": "stream", "text": [ - "\r", - "[Stage 481:> (0 + 1) / 1]\r", - "\r", + "\r\n", + "[Stage 481:> (0 + 1) / 1]\r\n", + "\r\n", " \r" ] } @@ -511,7 +509,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "\r", + "\r\n", " \r" ] } @@ -659,7 +657,7 @@ }, { "data": { - "image/png": "\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAEGCAYAAABmXi5tAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAi60lEQVR4nO3de5xN9f7H8ddn7z0XlyEz5C45dTouHZVLcgqdnFAJEemUSHeFyiWnoptEN+midBLRjVKEilOc5JfGpTiik2unGUNyZ4yZ2fv7+2Nvk8GMqezZmfV+Ph770V7f9V1rfb5W+z1rvntZzDmHiIiUfL5YFyAiIsVDgS8i4hEKfBERj1Dgi4h4hAJfRMQjArEuoCA7u1+o24dOYBXf/S7WJcivtGtg81iXIL9BmeFTraB1usIXEfEIBb6IiEco8EVEPEKBLyLiEQp8ERGPUOCLiHiEAl9ExCMU+CIiHqHAFxHxCAW+iIhHKPBFRDxCgS8i4hEKfBERj1Dgi4h4hAJfRMQjFPgiIh6hwBcR8QgFvoiIRyjwRUQ8QoEvIuIRCnwREY9Q4IuIeIQCX0TEIxT4IiIeocAXEfEIBb6IiEco8EVEPEKBLyLiEQp8ERGPUOCLiHiEAl9ExCMCsS6gJCh18yDizm6G272TPYOuP2J9oNFfKNW1F4QcLhRk/2vPEfzvSvyn/IFS19+JlS4DoSBZ771OzqJ5edsldu1NXLOWEApxYO4Msj+eVpzDKtHaXNyKp556CL/Px/hX32TU48/nW1+rVnX+Oe4pKlZKZsf2nfTo2Zf09AwaNqzP88+OIKlcWYLBICMee5apU2fkbffwQ4Pp3PkygsEgL730Gs89P57u3TsxcMBtmBl79+yjzx1DWLFiVXEPuUSKv+JWAmc0wu3bxf4xdx+x3l+3MfGtr8I5B6Eg2bMmEPr+WwCsfEUSOt2ClU8BIGvio7idW/HVaUB8u2sxf4Bg+nqy3xsLoVCxjitaFPjHQfa/PyL74/cofduQo67PXbmUPUsXAuCrVYcyfYexZ8B1uAMHyBw7gtDmdKxCCknDXyJ3RSoucx/xLdviSzmZPXdfB85h5U4qxhGVbD6fjzHPDKftJd1JS8tg0Rez+WDmHFavXpPXZ9TIoUx6/R0mTZrKha3+wvBHhtCzV18yM/fT8/p+rF27gapVK5O66EPmzJnPrl27ua5HV2rUqEb9Bi1wzlGpUjhINm74gb9e1IWdO3fRts2FvPjCSJqf3z5Wwy9RcpfNJ3fRRyR0uf2o64PrVrJ/9QAArHItErvfxf7R/QFI6HI72fOnEVq3AuITwYXAjITOfcga/xBuWwZxF3UjcHYrcpd+Wkwjii5N6RwHwW9X4PbuLrjDgay8t5aQCDgAQpvTCG1OB8Dt2IbbvTMv2ONbX07WtIngwn3d7p3RKN2TmjY5m3XrNrJhw//IyclhypTpXN6+Tb4+deuezrx54R/S8+Yv5PL2FwOwZs161q7dAEBGxhZ+3LotL9hvubkHjwx/Onw1CWzdug2ALxYtYefOXQAs+nIZ1atXjf4gPSK0cTUuc2/BHbIP+ezFJ+Z9nqxSDfD5w2F/sF9ONpRKgmAublsGAMG1y/HXPzdq9Re3qAS+mZU3s8fM7Fsz225m28xsdaTtpGgc8/curvH5JD0xkTKDRpD50qgj1vv/8CcIBAht2QSAr3I14s67kLLDX6TM4MfwVale3CWXWNWqV+GHtE15y2npGVSrViVfnxUrVtGpYzsAOnZsR7lySSQnV8jXp0njs4iPj2Pduo0A1KlTm65XXs6iL2Yzc8YkTjvt1COOfX2vq/jo43lHtEv0+Os1pVT/0ST2GMKBaWMB8FWsisvaR8LVA0jsM4q4tteC+SBzN/j8+KrXASDQ4Dx85SvGsvzjKlpX+FOAHUAr51yycy4FuDDSNiVKx/xdy1nyOXsGXMe+J+8n8cr88/x2UjKlbxtC5osjf74CiYuHnGz23nsL2Z/OovTNg2JRtmcNGvwwLVo0Y3Hqx7S4oBlpaRkEg8G89VWqnMyECWO44Ya78q7oExLiyco6QLPzLuGf49/gn+OezLfPVi2b06tXd4b849FiHYvXBVelsn90f7JeH0V8627hRp8ff+26ZH/4Gllj78FX4WQC57QC4MDbo4m/pCeJt47AHdiPcyVj/h6iF/i1nXMjnXObDzY45zY750YCpxS0kZndZGZLzGzJhLWbCup2Qgt+uwLfyVWxpHLhhlKlKTNoBFlvv0Jw7eq8fqFtW8lJXQBAzuIF+GvViUW5JdKm9M3UrFEtb7lG9aps2rQ5X5+MjC1c2fVGmjRtw/1DRwKwa1d42i4pqSwzpr/G/UNH8mXqsrxt0tIzeO/92QC8//6HnHlm3bx1Z55Zl5defJwrOl/P9u07ojY2KVho42osuTKUTsLt3kYoYyNux48QChFcvRhftfBvZKEfviPr5aFkjR1CaOMq3E8lJ4uiFfjfm9kgM6t8sMHMKpvZYOCHgjZyzo1zzjV2zjXueVq1grqdcHyVfx6Lv/bpWFwcbs9u8Acoc9fD5CyYQ07qZ/m2yVnyOYH6ZwMQqNuQYEZasdZcki1e8jWnnXYqtWvXJC4ujq5dO/DBzDn5+qSkVMDMALhn8B1MmPgWAHFxcbw79RUmT36HadNm5dtmxoyPaNWyOQAtW5zHd2vWA1CzZjWmvv0yPXv1Y02kTYqHJf88VeerdioE4iBzD6G0dZBYGkqHL7x8dRoQ+jHyGSsTuRjzB4hr0ZGc1LnFXXbUROsunW7APcC/zezkSNsWYAZwZZSOGTOl77iPQN2zsKTylHtuClnvTICAH4Dsf31AXNMWxLdoA7m5uOwD7BvzEABx57Ui8Kc/4ytbjvgWbQHIfPExgt+v48CMNyh9+30ktOuCy9pP5rgnYjW8EicYDNKv/33MnvUGfp+PCRPfZtWq73hg2ACWLF3OzJlzadmyOcMfHoLDsWDBIu7oey8AV17ZngsuOJfklAr06NEVgN433Mny5d8wctTzTJr4HP363ci+vZncfMtAAO67905SUirw7LPhqZzc3FyanXdJbAZfwiR07YevTn2sdBKlBr1IzidTwB/+7OWmziVQ/1wCZ7fEhYKQk82Bt54Ob+hCZH84iVK9hwJGcNN6cpd8AkDcBR0InHEOmI+c1I8JrV8Zo9Edf3Zw/rHYDmjWyzn36rH67ex+YfEWJsdVxXe/i3UJ8ivtGtg81iXIb1Bm+FQraF0sbst8MAbHFBHxvKhM6ZjZioJWAZULWCciIlEUrTn8ykAbwrdhHsqA/4vSMUVEpBDRCvyZQFnn3NeHrzCz+VE6poiIFCIqge+c613IuqujcUwRESmcnqUjIuIRCnwREY9Q4IuIeIQCX0TEIxT4IiIeocAXEfEIBb6IiEco8EVEPEKBLyLiEQp8ERGPUOCLiHiEAl9ExCMU+CIiHqHAFxHxCAW+iIhHKPBFRDxCgS8i4hEKfBERj1Dgi4h4hAJfRMQjFPgiIh6hwBcR8QgFvoiIRyjwRUQ8QoEvIuIRCnwREY9Q4IuIeEQg1gUUJHHw3bEuQX4De/fmWJcgIofRFb6IiEco8EVEPEKBLyLiEQp8ERGPUOCLiHiEAl9ExCMU+CIiHqHAFxHxCAW+iIhHKPBFRDxCgS8i4hEKfBERj1Dgi4h4hAJfRMQjFPgiIh6hwBcR8QgFvoiIRyjwRUQ8QoEvIuIRCnwREY9Q4IuIeIQCX0TEIxT4IiIeocAXEfEIBb6IiEco8EVEPKJIgW9mCUdpSz7+5YiISLQU9Qp/mpnFHVwws6rA3OiUJCIi0VDUwH8fmGJmfjOrDXwMDIlWUSIicvwFitLJOfeymcUTDv7awM3Ouf+LYl0iInKcFRr4ZnbXoYtALeBroJmZNXPOPRXF2kRE5Dg61hV+0mHL0wpo97ShY9/is2WrSS5XlmlPDiyw38q1/6PH/c8yst81/K1ZQwCefn0mC5atBuCmzq1p2/xsAO5/4U2WrFpPUulEAB667Sr+VLt6lEfiHRdf3IqnnnoIv8/H+Fff5PHHn8+3vlat6rw87ikqVUpm+/adXNezL+npGTRsWJ/nnh1BUrmyhIJBRjz2LFOnzgCgVau/MGrk/cTFx/HVsv9w4013EwwG8/bZuFFDFiyYwd+vuY1p02YV63hLqvgrbiVwRiPcvl3sH3P3Eev9dRsT3/oqnHMQCpI9awKh778FwMpXJKHTLVj5FACyJj6K27kVX50GxLe7FvMHCKavJ/u9sRAKFeu4oqXQwHfOPVhchZzIOrRsQvc253Pv828W2CcYCjH6jVmc9+c/5rV9tmwV325IY8qou8jOyeWGB8dy/ll1KRsJ+buuuSzvB4McPz6fjzHPDKfdJd1JS8tg0RezmTlzDqtXr8nrM3LkUCa//g6TJk2lVau/MPyRIfTs1ZfMzP30ur4fa9duoGrVyny56EPmzJnP7t17GP/KaNq07caaNesZNmwAPa69klcnvJV3zEcfvZe5c/8dq2GXSLnL5pO76CMSutx+1PXBdSvZv3oAAFa5Fond72L/6P4AJHS5nez50witWwHxieBCYEZC5z5kjX8Ity2DuIu6ETi7FblLPy2mEUVXUW/L/KOZjTOzOWb26cFXIf3bHvK+vJm9YmYrzOwNM6t8PAr/PWlU7w+UK1u60D5vfvg5rc89k+TyZfPa1qdt4Zy6fyDg91M6MYHTT6nKwuXfRrtcz2va5GzWrdvIhg3/Iycnh7enTKd9+zb5+tStezrz5i0EYP78hbRvfzEAa9asZ+3aDQBkZGxh69ZtVKqUQkpKBbKzs1mzZj0A//rXZ3TqdEne/m7vcz3vvTeLrVu3FccQPSO0cTUuc2/BHbKz8t5afCI4F35fqQb4/OGwP9gvJxtKJUEwF7ctA4Dg2uX4658btfqLW1Hv0pkKfAXcBww85FWQRw95/ySQAbQHFgMv/fIyT2xbtu/i08X/oevfmudr/+Mp1fi/r79l/4Fsduzey+Jv1rL5p515659960O6DHyCxydOJzsnt5irLrmqVa9CWtqmvOX09AyqV6uSr8+KFavo1LEdAB07tqNcuSSSkyvk69Ok8VnExcexbt1GfvppO4FAgEbn/BmAzldcSs2a1cLHq1aFDh3a8uJLr0VzWFIAf72mlOo/msQeQzgwbSwAvopVcVn7SLh6AIl9RhHX9lowH2TuBp8fX/U6AAQanIevfMVYln9cFekuHSDXOTf2Vx6jsXPurMj7p83suoI6mtlNwE0Az93Xh96d2xbU9YTy+IT36X/1Zfh8+X++Nm94Bt+s+4Hr7n+WCuXK0PD0U/BH+vTtfikVT0oiJzfIQ+OmMn76p9zS5eJYlO9Jgwc/zDPPPEKPHl1ZsGARaWkZ+ebjq1Q5mVcnjKH39f3D88PANdfcxhNPPEBCQjxz//UZwWB43vfJJx/kH/94NK+fFK/gqlT2r0rFV7su8a27kfXqw+Dz469dl/3PDcTt+omEbncSOCc8dXPg7dHEX9ITAnEE1yzHuZIxfw9FD/wPzOw24D3gwMFG59z2AvqfHLnDx4ByZmbu5//bC/ytwjk3DhgHkPX1zBLz6fhmfRqDx0wCYMfufSz46lv8fh9/bXImN17RmhuvaA3APWMmc0q1SgBUqlAOgPi4AB1aNWHizPkxqb0k2pS+mRo1quUtV69elfRNm/P1ycjYQteuNwJQpkxpOnW6lF27dgOQlFSWGdNfY+jQkXyZuixvm0VfLuXCv14BQOvWLTj99PBVYqNz/szkyS8AULFiMm3b/pXc3FxmzPg4eoOUI4Q2rsaSK0PpJNzubYQyNuJ2/AhAcPVifDVPh6UQ+uE7sl4eCoD/tD/jq1g1lmUfV0UN/INX5YdO4zigTgH9X+bnO3kmAhWBrWZWhfBtnZ7y4XP35r2//4U3aXFOPf7a5EyCoRB79u3npKQyfPf9Jr77PoNH+oS/1N26YzeVKpTDOce8xSs5rWaVgnYvv9DiJV9z2mmnUrt2TdLTN9Otaweu7dEnX5+UlAps374T5xyDB9/BhInhL1/j4uJ4Z+orTJ78zhF32lSqlMLWrduIj49n4IA+jHhsDAB/POO8vD6v/PNpZs3+l8K+mFhyFdz28A9zX7VTIRAHmXsI7d8HiaWhdDnI3I2vTgNC6evCG5UpB/t2gz9AXIuOZM+fVsgRTixF/YtXp/6SnRZ0d49zbrOZzfsl+zoRDH5mEktWrWPnnn387daHuPXKNuRGfv0/fN7+ULm5QXoNC98OWKZUAo/ecTUBvx+AIc++zo7de3EOzqhdjftv7BL9gXhEMBikX//7mDXrDfw+HxMmvs2qVd8xbNgAli5dzsyZc2nZsjmPPDwEh+PzBYu4o2/4h/aVV7bnggvOJSWlAj16dAWg9w13snz5N9x9161ccmlrfD4f4156jfnzF8ZymJ6Q0LUfvjr1sdJJlBr0IjmfTIHIZyg3dS6B+ucSOLslLhSEnGwOvPV0eEMXIvvDSZTqPRQwgpvWk7vkEwDiLuhA4IxzwHzkpH5MaP3KGI3u+LOiziuaWQOgHpB4sM0594u/hTKz/znnah2rX0ma0vGipKY3x7oE+ZV2Diz4IkV+/8oMn2oFrSvSFb6ZDQNaEQ782UA74HPgqIFvZisK2hVQ4m7LFBE5ERR1Dr8L0BD4yjnXK3Iv/eRC+lcG2gA7Dms3QM/gERGJgaIGfpZzLmRmuWZWDvgRqFlI/5lAWefc14evMLP5v7hKERH5zY4Z+GZmwAozO4nw3TdLgb3AFwVt45zrXci6q395mSIi8lsdM/Cdc87MmjrndgIvmtlHQDnnXEHz9CIi8jtU1EcrLDOzJgDOuY0KexGRE09R5/DPBf5uZt8D+wh/+eqcc3+OWmUiInJcFTXw2xy7i4iI/J4V9W/afh/tQkREJLqKOocvIiInOAW+iIhHKPBFRDxCgS8i4hEKfBERj1Dgi4h4hAJfRMQjFPgiIh6hwBcR8QgFvoiIRyjwRUQ8QoEvIuIRCnwREY9Q4IuIeIQCX0TEIxT4IiIeocAXEfEIBb6IiEco8EVEPEKBLyLiEQp8ERGPUOCLiHhEINYFFMRfo16sS5DfwMW6ABE5gq7wRUQ8QoEvIuIRCnwREY9Q4IuIeIQCX0TEIxT4IiIeocAXEfEIBb6IiEco8EVEPEKBLyLiEQp8ERGPUOCLiHiEAl9ExCMU+CIiHqHAFxHxCAW+iIhHKPBFRDxCgS8i4hEKfBERj1Dgi4h4hAJfRMQjFPgiIh6hwBcR8QgFvoiIRyjwRUQ8QoEvIuIRCnwREY9Q4IuIeIQCX0TEIxT4IiIeocAXEfGIQKwLKAnue/QpPluYSnKFk3h/8otHrE9dtoK+9zxI9apVAGjdsjm3Xv/3vPXBYJBuvftycqWKvPD4gwAMfmAk33y7hkAgQIN6f2TYoL7EBXS6jpc2F7fiqacewu/zMf7VNxn1+PP51teqVZ1/jnuKipWS2bF9Jz169iU9PYOGDevz/LMjSCpXlmAwyIjHnmXq1BkAzP90GmWTygJwcqUUFi/5ms5detOyxXlMe3c8Gzb+AMD778/mkeGji3W8JVX8FbcSOKMRbt8u9o+5+4j1/rqNiW99Fc45CAXJnjWB0PffAmDlK5LQ6RasfAoAWRMfxe3ciq9OA+LbXYv5AwTT15P93lgIhYp1XNGiBDkOOl7yN67ufDn/ePiJAvuc07BBXpgfbvLU6dSpXYu9+zLz2i69+EIeGzYIgEEPjOTdDz7iqk6XHd/CPcrn8zHmmeG0vaQ7aWkZLPpiNh/MnMPq1Wvy+owaOZRJr7/DpElTubDVXxj+yBB69upLZuZ+el7fj7VrN1C1amVSF33InDnz2bVrN63+ekXe9lPeHseMD+bkLX/+eSodOl1XrOP0gtxl88ld9BEJXW4/6vrgupXsXz0AAKtci8Tud7F/dH8AErrcTvb8aYTWrYD4RHAhMCOhcx+yxj+E25ZB3EXdCJzditylnxbTiKIrKlM6ZlbezB4zs2/NbLuZbTOz1ZG2k6JxzFhqfNaZlC+X9Ku23fzjVj77v1Q6t2+Tr71F86aYGWbGmXXPYMuPPx2PUgVo2uRs1q3byIYN/yMnJ4cpU6Zz+WF//nXrns68eQsBmDd/IZe3vxiANWvWs3btBgAyMrbw49ZtVKqUkm/bpKSyXNjqL0yf/lExjMbbQhtX4zL3FtwhOyvvrcUngnPh95VqgM8fDvuD/XKyoVQSBHNx2zIACK5djr/+uVGrv7hFaw5/CrADaOWcS3bOpQAXRtqmROmYv2vLV67miutu45a772ft+u/z2kc+8xJ33dYbs6OfipzcXD74+BPOP7dxcZVa4lWrXoUf0jblLaelZ1CtWpV8fVasWEWnju0A6NixHeXKJZGcXCFfnyaNzyI+Po516zbma+/QoS2fzlvInj0/B1GzZo1YumQuM2dMol69Px7nEUlh/PWaUqr/aBJ7DOHAtLEA+CpWxWXtI+HqAST2GUVc22vBfJC5G3x+fNXrABBocB6+8hVjWf5xFa3Ar+2cG+mc23ywwTm32Tk3EjglSsf83ap3xh+Y++5Epk18gas7t6fvkIcAmL/wS5IrnET9P51e4LaPPPE8jRo2oNFZDYqrXAEGDX6YFi2asTj1Y1pc0Iy0tAyCwWDe+ipVTmbChDHccMNd4fnhQ1zVtQNvvf1+3vKyr/5DndOa0qjx33j+hVd5d+r44hqGAMFVqewf3Z+s10cR37pbuNHnx1+7LtkfvkbW2HvwVTiZwDmtADjw9mjiL+lJ4q0jcAf241zJmL+H6AX+92Y2yMwqH2wws8pmNhj4oaCNzOwmM1tiZkv++dqbUSqt+JUtU4bSpUsB4ama3NxcduzcxVcrVjH/80Vc3Pk6Bg57jNSlyxn84Ki87V4Y/zo7du5iUN+bYlV6ibQpfTM1a1TLW65RvSqbNm3O1ycjYwtXdr2RJk3bcP/QkQDs2rUbCE/ZzJj+GvcPHcmXqcvybZeSUoEmTc5m9uxP8tr27NnLvsj3Mx9+9ClxcQFSUvL/tiDRF9q4GkuuDKWTcLu3EcrYiNvxI4RCBFcvxlft1HC/H74j6+WhZI0dQmjjKtxPm46x5xNHtL607QbcA/w7EvoO2ALMALoWtJFzbhwwDiDnp/WuoH4nmp+2bScluQJmxn9W/ZeQc5xUvhx33tqLO2/tBYTv5Jnw5ruMjHxR+86Mj1j45VJeGTMCn093zx5Pi5d8zWmnnUrt2jVJT99M164duLZHn3x9UlIqsH37Tpxz3DP4DiZMfAuAuLg43p36CpMnv8O0abOO2HfnKy5j1ux/ceDAgby2ypUrsWXLViA8DeTz+di2bUcURygHWXIV3PbwD3NftVMhEAeZewjt3weJpaF0Ocjcja9OA0Lp68IblSkH+3aDP0Bci45kz58WwxEcX1EJfOfcDjN7FZgLLHLO5U1mmllboER9mzVw2GMs/moFO3fu5qKO13Bb72vJzc0FoFunS5kz73Pefm8W/oCfxPh4Hn/wHsys0H0+/MSzVK18Mn+/6S7gyFs55dcLBoP0638fs2e9gd/nY8LEt1m16jseGDaAJUuXM3PmXFq2bM7wh4fgcCxYsIg7+t4LwJVXtueCC84lOaUCPXqEr11633Any5d/A0C3rpcfcYtn5ysu5eabe5CbGyRrfxZ/v+a24h1wCZbQtR++OvWx0kmUGvQiOZ9MAb8fgNzUuQTqn0vg7Ja4UBBysjnw1tPhDV2I7A8nUar3UMAIblpP7pLwb2VxF3QgcMY5YD5yUj8mtH5ljEZ3/Nnh84/HZadmfYE+wGrgLKCfc256ZN0y59w5x9pHSbrC96JS1S6IdQnyK+0a2DzWJchvUGb41AKvJqM1pXMj0Mg5t9fMagPvmFlt59wzQOGXtiIiEhXRCnzfwWkc59xGM2tFOPRPQYEvIhIT0fo2cIuZnXVwIRL+lwEVgTOjdEwRESlEtAK/B5DvPjfnXK5zrgfQIkrHFBGRQkTrLp20QtYtjMYxRUSkcLrBW0TEIxT4IiIeocAXEfEIBb6IiEco8EVEPEKBLyLiEQp8ERGPUOCLiHiEAl9ExCMU+CIiHqHAFxHxCAW+iIhHKPBFRDxCgS8i4hEKfBERj1Dgi4h4hAJfRMQjFPgiIh6hwBcR8QgFvoiIRyjwRUQ8QoEvIuIRCnwREY9Q4IuIeIQCX0TEIxT4IiIeocAXEfEIc87FugZPMrObnHPjYl2H/Do6fycuL587XeHHzk2xLkB+E52/E5dnz50CX0TEIxT4IiIeocCPHU/OIZYgOn8nLs+eO31pKyLiEbrCFxHxCAW+iIhHKPCPAzNra2b/NbO1ZnbPUdYnmNnbkfVfmlntQ9YNibT/18zaHNI+3sx+NLOVxTQMOUwRzmsLM1tmZrlm1iUWNcrRHevzY2FjIud2hZmdU9w1xoIC/zcyMz/wPNAOqAd0N7N6h3XrDexwzp0GPA2MjGxbD7gKqA+0BV6I7A9gQqRNYqCI5/V/QE/gjeKtTopgAoV/ftoBp0deNwFji6GmmFPg/3ZNgbXOufXOuWzgLaDDYX06ABMj798BLjIzi7S/5Zw74JzbAKyN7A/n3GfA9uIYgBzVMc+rc26jc24FEIpFgVKwInx+OgCvubBFwElmVrV4qosdBf5vVx344ZDltEjbUfs453KBXUBKEbeV2NC5Kdk8eX4V+CIiHqHA/+3SgZqHLNeItB21j5kFgPLAtiJuK7Ghc1OyefL8KvB/u8XA6WZ2qpnFE/4SdsZhfWYA10XedwE+deG/8TYDuCpyF8+phL9ASi2muqVwRTmvcuKaAfSI3K3TDNjlnMuIdVHRFoh1ASc651yumd0OfAz4gfHOuW/M7CFgiXNuBvAKMMnM1hL+IumqyLbfmNkUYBWQC/RxzgUBzOxNoBVQ0czSgGHOuVeKeXieVZTzamZNgPeACkB7M3vQOVc/hmVLxNE+P0AcgHPuRWA2cAnhGyUygV6xqbR46dEKIiIeoSkdERGPUOCLiHiEAl9ExCMU+CIiHqHAFxHxCAW+yO+Qmf0j1jVIyaPbMuWEEXngnDnnfhcPKzMz/8G/NxGFfe91zpX9vdQjJYOu8OV3zcxqR55J/xqwEqhpZgPNbHHkOeYPHtL3/kjfz83sTTMbcIx99zSz6WY238zWmNmwQ9a9b2ZLzewbM7vpkPa9ZvakmS0HzjOzoZFaVprZuMgPJSL7fNrMlpjZajNrYmbTIsd55JD9XWNmqWb2tZm9ZGZ+M3sMKBVpe72gfker57j8oUvJ5ZzTS6/f7QuoTfjxw80iyxcT/keojfAFy0ygBdAE+BpIBJKANcCAY+y7J5BB+MmlpQj/QGkcWZcc+e/B9pTIsgO6HrKP5EPeTwLaR97PB0ZG3vcDNgFVgQTCT2ZMAeoCHwBxkX4vAD0i7/cest/C+uWrRy+9Cnvp0QpyIvjehZ9ZDuHAvxj4KrJclvAziJKA6c65LCDLzD4o4r7nOue2AZjZNOB8YAnQ18w6RfrUjBxjGxAE3j1k+wvNbBBQGkgGviEczvDzs3f+A3zjIs9qMbP1kX2eDzQCFkd+MSgF/HiUGi8qpN/h9YgUSIEvJ4J9h7w3YIRz7qVDO5hZ/1+578O/xHJm1gpoDZznnMs0s/mEf3MAyHI/P+8okfDVdmPn3A9m9sAh/QAORP4bOuT9weVAZCwTnXNDjlFjYf3y6hE5Fs3hy4nmY+B6MysLYGbVzexkYCHhB5glRtZdVsT9/c3Mks2sFNAxsp/yhP9Jykwz+xPQrIBtD4b7T5Fj/tJ/1/YToEukfiJ1nBJZl2NmcUXoJ1JkusKXE4pzbo6Z1QW+iExv7AWucc4tNrMZwApgC+FplF0AZnZLZNsXj7LLVMJTIjWAyc65JWb2H+AWM1sN/BdYdJTtcM7tNLOXCc/xbyb8SOVfMpZVZnYfMMfMfEAO0Af4nvD3FCvMbJlz7u+F9BMpMt2WKSWGmZV1zu01s9LAZ8BNzrllhfTvSXg65vbiqlEklnSFLyXJODOrR3iqZWJhYS/iRbrCFxHxCH1pKyLiEQp8ERGPUOCLiHiEAl9ExCMU+CIiHvH/Lufbq4FeUs0AAAAASUVORK5CYII=", "text/plain": [ "
" ] @@ -750,7 +748,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "\r", + "\r\n", " \r" ] } @@ -807,7 +805,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "\r", + "\r\n", " \r" ] } @@ -886,4 +884,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/examples/02_model_collaborative_filtering/baseline_deep_dive.ipynb b/examples/02_model_collaborative_filtering/baseline_deep_dive.ipynb index 4df1b961c..f8b627a54 100644 --- a/examples/02_model_collaborative_filtering/baseline_deep_dive.ipynb +++ b/examples/02_model_collaborative_filtering/baseline_deep_dive.ipynb @@ -67,22 +67,26 @@ ], "source": [ "import sys\n", - "\n", "import itertools\n", "import pandas as pd\n", - "import scrapbook as sb\n", "\n", - "from recommenders.utils.notebook_utils import is_jupyter\n", "from recommenders.datasets import movielens\n", "from recommenders.datasets.python_splitters import python_random_split\n", "from recommenders.datasets.pandas_df_utils import filter_by\n", "from recommenders.evaluation.python_evaluation import (\n", - " rmse, mae, rsquared, exp_var,\n", - " map_at_k, ndcg_at_k, precision_at_k, recall_at_k\n", + " rmse,\n", + " mae,\n", + " rsquared,\n", + " exp_var,\n", + " map_at_k,\n", + " ndcg_at_k,\n", + " precision_at_k,\n", + " recall_at_k,\n", ")\n", + "from recommenders.utils.notebook_utils import store_metadata\n", "\n", "print(f\"System version: {sys.version}\")\n", - "print(f\"Pandas version: {pd.__version__}\")" + "print(f\"Pandas version: {pd.__version__}\")\n" ] }, { @@ -861,16 +865,15 @@ } ], "source": [ - "if is_jupyter():\n", - " # Record results with papermill and scrapbook for tests\n", - " store_metadata(\"map\", eval_map)\n", - " store_metadata(\"ndcg\", eval_ndcg)\n", - " store_metadata(\"precision\", eval_precision)\n", - " store_metadata(\"recall\", eval_recall)\n", - " store_metadata(\"rmse\", eval_rmse)\n", - " store_metadata(\"mae\", eval_mae)\n", - " store_metadata(\"exp_var\", eval_exp_var)\n", - " store_metadata(\"rsquared\", eval_rsquared)" + "# Record results for tests - ignore this cell\n", + "store_metadata(\"map\", eval_map)\n", + "store_metadata(\"ndcg\", eval_ndcg)\n", + "store_metadata(\"precision\", eval_precision)\n", + "store_metadata(\"recall\", eval_recall)\n", + "store_metadata(\"rmse\", eval_rmse)\n", + "store_metadata(\"mae\", eval_mae)\n", + "store_metadata(\"exp_var\", eval_exp_var)\n", + "store_metadata(\"rsquared\", eval_rsquared)" ] }, { @@ -906,4 +909,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/examples/02_model_collaborative_filtering/cornac_bivae_deep_dive.ipynb b/examples/02_model_collaborative_filtering/cornac_bivae_deep_dive.ipynb index 8d177b07a..c2ff20337 100644 --- a/examples/02_model_collaborative_filtering/cornac_bivae_deep_dive.ipynb +++ b/examples/02_model_collaborative_filtering/cornac_bivae_deep_dive.ipynb @@ -44,23 +44,28 @@ } ], "source": [ - "import sys\n", "import os\n", + "import sys\n", "import torch\n", "import cornac\n", - "import papermill as pm\n", - "import scrapbook as sb\n", "import pandas as pd\n", + "\n", "from recommenders.datasets import movielens\n", "from recommenders.datasets.python_splitters import python_random_split\n", - "from recommenders.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k\n", "from recommenders.models.cornac.cornac_utils import predict_ranking\n", "from recommenders.utils.timer import Timer\n", "from recommenders.utils.constants import SEED\n", + "from recommenders.evaluation.python_evaluation import (\n", + " map_at_k,\n", + " ndcg_at_k,\n", + " precision_at_k,\n", + " recall_at_k,\n", + ")\n", + "from recommenders.utils.notebook_utils import store_metadata\n", "\n", - "print(\"System version: {}\".format(sys.version))\n", - "print(\"PyTorch version: {}\".format(torch.__version__))\n", - "print(\"Cornac version: {}\".format(cornac.__version__))" + "print(f\"System version: {sys.version}\")\n", + "print(f\"PyTorch version: {torch.__version__}\")\n", + "print(f\"Cornac version: {cornac.__version__}\")\n" ] }, { @@ -593,7 +598,7 @@ } ], "source": [ - "# Record results with papermill for tests\n", + "# Record results for tests - ignore this cell\n", "store_metadata(\"map\", eval_map)\n", "store_metadata(\"ndcg\", eval_ndcg)\n", "store_metadata(\"precision\", eval_precision)\n", @@ -647,4 +652,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/examples/02_model_collaborative_filtering/cornac_bpr_deep_dive.ipynb b/examples/02_model_collaborative_filtering/cornac_bpr_deep_dive.ipynb index 50666e82d..fb0253a2f 100644 --- a/examples/02_model_collaborative_filtering/cornac_bpr_deep_dive.ipynb +++ b/examples/02_model_collaborative_filtering/cornac_bpr_deep_dive.ipynb @@ -42,21 +42,21 @@ } ], "source": [ - "import sys\n", "import os\n", + "import sys\n", "import cornac\n", - "import papermill as pm\n", - "import scrapbook as sb\n", "import pandas as pd\n", + "\n", "from recommenders.datasets import movielens\n", "from recommenders.datasets.python_splitters import python_random_split\n", "from recommenders.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k\n", "from recommenders.models.cornac.cornac_utils import predict_ranking\n", "from recommenders.utils.timer import Timer\n", "from recommenders.utils.constants import SEED\n", + "from recommenders.utils.notebook_utils import store_metadata\n", "\n", - "print(\"System version: {}\".format(sys.version))\n", - "print(\"Cornac version: {}\".format(cornac.__version__))" + "print(f\"System version: {sys.version}\")\n", + "print(f\"Cornac version: {cornac.__version__}\")" ] }, { @@ -574,7 +574,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Record results with papermill for tests\n", + "# Record results for tests - ignore this cell\n", "store_metadata(\"map\", eval_map)\n", "store_metadata(\"ndcg\", eval_ndcg)\n", "store_metadata(\"precision\", eval_precision)\n", @@ -615,4 +615,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/examples/02_model_collaborative_filtering/lightgcn_deep_dive.ipynb b/examples/02_model_collaborative_filtering/lightgcn_deep_dive.ipynb index bcc9992e4..63fca3805 100644 --- a/examples/02_model_collaborative_filtering/lightgcn_deep_dive.ipynb +++ b/examples/02_model_collaborative_filtering/lightgcn_deep_dive.ipynb @@ -52,7 +52,6 @@ "source": [ "import sys\n", "import os\n", - "import scrapbook as sb\n", "import pandas as pd\n", "import numpy as np\n", "import tensorflow as tf\n", @@ -66,10 +65,11 @@ "from recommenders.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k\n", "from recommenders.utils.constants import SEED as DEFAULT_SEED\n", "from recommenders.models.deeprec.deeprec_utils import prepare_hparams\n", + "from recommenders.utils.notebook_utils import store_metadata\n", "\n", - "print(\"System version: {}\".format(sys.version))\n", - "print(\"Pandas version: {}\".format(pd.__version__))\n", - "print(\"Tensorflow version: {}\".format(tf.__version__))" + "print(f\"System version: {sys.version}\")\n", + "print(f\"Pandas version: {pd.__version__}\")\n", + "print(f\"Tensorflow version: {tf.__version__}\")" ] }, { @@ -732,7 +732,7 @@ } ], "source": [ - "# Record results with papermill for tests\n", + "# Record results for tests - ignore this cell\n", "store_metadata(\"map\", eval_map)\n", "store_metadata(\"ndcg\", eval_ndcg)\n", "store_metadata(\"precision\", eval_precision)\n", @@ -822,4 +822,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/examples/02_model_collaborative_filtering/multi_vae_deep_dive.ipynb b/examples/02_model_collaborative_filtering/multi_vae_deep_dive.ipynb index 7f7dcee6d..0c6d33d26 100644 --- a/examples/02_model_collaborative_filtering/multi_vae_deep_dive.ipynb +++ b/examples/02_model_collaborative_filtering/multi_vae_deep_dive.ipynb @@ -74,30 +74,27 @@ "import os\n", "import numpy as np\n", "import pandas as pd\n", - "import papermill as pm\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline\n", "import seaborn as sns\n", "sns.set()\n", "import tensorflow as tf\n", "import keras\n", + "from tempfile import TemporaryDirectory\n", "\n", "from recommenders.utils.timer import Timer\n", "from recommenders.datasets import movielens\n", "from recommenders.datasets.split_utils import min_rating_filter_pandas\n", "from recommenders.datasets.python_splitters import numpy_stratified_split\n", "from recommenders.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k\n", - "\n", "from recommenders.datasets.sparse import AffinityMatrix\n", "from recommenders.utils.python_utils import binarize\n", "from recommenders.models.vae.multinomial_vae import Mult_VAE\n", "\n", - "from tempfile import TemporaryDirectory\n", - "\n", - "print(\"System version: {}\".format(sys.version))\n", - "print(\"Pandas version: {}\".format(pd.__version__))\n", - "print(\"Tensorflow version: {}\".format(tf.__version__))\n", - "print(\"Keras version: {}\".format(keras.__version__))" + "print(f\"System version: {sys.version}\")\n", + "print(f\"Pandas version: {pd.__version__}\")\n", + "print(f\"Tensorflow version: {tf.__version__}\")\n", + "print(f\"Keras version: {keras.__version__}\")" ] }, { @@ -1869,4 +1866,4 @@ }, "nbformat": 4, "nbformat_minor": 1 -} \ No newline at end of file +} diff --git a/examples/02_model_collaborative_filtering/ncf_deep_dive.ipynb b/examples/02_model_collaborative_filtering/ncf_deep_dive.ipynb index 93ee20717..55c2a27d2 100644 --- a/examples/02_model_collaborative_filtering/ncf_deep_dive.ipynb +++ b/examples/02_model_collaborative_filtering/ncf_deep_dive.ipynb @@ -1069,7 +1069,7 @@ } ], "source": [ - "# Record results with papermill for tests\n", + "# Record results for tests - ignore this cell\n", "store_metadata(\"map\", eval_map)\n", "store_metadata(\"ndcg\", eval_ndcg)\n", "store_metadata(\"precision\", eval_precision)\n", diff --git a/examples/02_model_collaborative_filtering/rbm_deep_dive.ipynb b/examples/02_model_collaborative_filtering/rbm_deep_dive.ipynb index 67495e628..9484ef0df 100644 --- a/examples/02_model_collaborative_filtering/rbm_deep_dive.ipynb +++ b/examples/02_model_collaborative_filtering/rbm_deep_dive.ipynb @@ -74,13 +74,7 @@ } ], "source": [ - "from __future__ import print_function\n", - "from __future__ import absolute_import\n", - "from __future__ import division\n", - "\n", - "# set the environment path to find Recommenders\n", "import sys\n", - "\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline \n", @@ -90,16 +84,12 @@ "import tensorflow as tf\n", "tf.get_logger().setLevel(logging.ERROR)\n", "\n", - "#RBM \n", "from recommenders.models.rbm.rbm import RBM\n", "from recommenders.datasets.python_splitters import numpy_stratified_split\n", "from recommenders.datasets.sparse import AffinityMatrix\n", "from recommenders.utils.timer import Timer\n", "from recommenders.utils.plot import line_graph\n", - "\n", - "#Evaluation libraries\n", "from recommenders.datasets import movielens \n", - "\n", "from recommenders.evaluation.python_evaluation import (\n", " map_at_k,\n", " ndcg_at_k,\n", @@ -1154,4 +1144,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/examples/02_model_collaborative_filtering/sar_deep_dive.ipynb b/examples/02_model_collaborative_filtering/sar_deep_dive.ipynb index 3d89f31d0..d25baf4b4 100644 --- a/examples/02_model_collaborative_filtering/sar_deep_dive.ipynb +++ b/examples/02_model_collaborative_filtering/sar_deep_dive.ipynb @@ -109,18 +109,17 @@ "metadata": {}, "outputs": [], "source": [ - "# set the environment path to find Recommenders\n", "import sys\n", "import logging\n", "import scipy\n", "import numpy as np\n", "import pandas as pd\n", - "import scrapbook as sb\n", "\n", "from recommenders.datasets import movielens\n", "from recommenders.datasets.python_splitters import python_stratified_split\n", "from recommenders.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k\n", "from recommenders.models.sar import SAR\n", + "from recommenders.utils.notebook_utils import store_metadata\n", "\n", "print(f\"System version: {sys.version}\")\n", "print(f\"Pandas version: {pd.__version__}\")\n", @@ -553,4 +552,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/examples/02_model_collaborative_filtering/surprise_svd_deep_dive.ipynb b/examples/02_model_collaborative_filtering/surprise_svd_deep_dive.ipynb index 7c35b3d00..005d0f8ab 100644 --- a/examples/02_model_collaborative_filtering/surprise_svd_deep_dive.ipynb +++ b/examples/02_model_collaborative_filtering/surprise_svd_deep_dive.ipynb @@ -101,18 +101,31 @@ "import os\n", "import sys\n", "import surprise\n", - "import scrapbook as sb\n", "import pandas as pd\n", "\n", "from recommenders.utils.timer import Timer\n", "from recommenders.datasets import movielens\n", "from recommenders.datasets.python_splitters import python_random_split\n", - "from recommenders.evaluation.python_evaluation import (rmse, mae, rsquared, exp_var, map_at_k, ndcg_at_k, precision_at_k, \n", - " recall_at_k, get_top_k_items)\n", - "from recommenders.models.surprise.surprise_utils import predict, compute_ranking_predictions\n", + "from recommenders.evaluation.python_evaluation import (\n", + " rmse,\n", + " mae,\n", + " rsquared,\n", + " exp_var,\n", + " map_at_k,\n", + " ndcg_at_k,\n", + " precision_at_k,\n", + " recall_at_k,\n", + " get_top_k_items,\n", + ")\n", + "from recommenders.models.surprise.surprise_utils import (\n", + " predict,\n", + " compute_ranking_predictions,\n", + ")\n", + "from recommenders.utils.notebook_utils import store_metadata\n", + "\n", "\n", "print(f\"System version: {sys.version}\")\n", - "print(f\"Surprise version: {surprise.__version__}\")" + "print(f\"Surprise version: {surprise.__version__}\")\n" ] }, { @@ -129,7 +142,7 @@ "TOP_K = 10\n", "\n", "# Select MovieLens data size: 100k, 1m, 10m, or 20m\n", - "MOVIELENS_DATA_SIZE = '100k'" + "MOVIELENS_DATA_SIZE = \"100k\"\n" ] }, { @@ -228,11 +241,10 @@ ], "source": [ "data = movielens.load_pandas_df(\n", - " size=MOVIELENS_DATA_SIZE,\n", - " header=[\"userID\", \"itemID\", \"rating\"]\n", + " size=MOVIELENS_DATA_SIZE, header=[\"userID\", \"itemID\", \"rating\"]\n", ")\n", "\n", - "data.head()" + "data.head()\n" ] }, { @@ -252,7 +264,7 @@ "metadata": {}, "outputs": [], "source": [ - "train, test = python_random_split(data, 0.75)" + "train, test = python_random_split(data, 0.75)\n" ] }, { @@ -284,8 +296,10 @@ "# 'reader' is being used to get rating scale (for MovieLens, the scale is [1, 5]).\n", "# 'rating_scale' parameter can be used instead for the later version of surprise lib:\n", "# https://github.com/NicolasHug/Surprise/blob/master/surprise/dataset.py\n", - "train_set = surprise.Dataset.load_from_df(train, reader=surprise.Reader('ml-100k')).build_full_trainset()\n", - "train_set" + "train_set = surprise.Dataset.load_from_df(\n", + " train, reader=surprise.Reader(\"ml-100k\")\n", + ").build_full_trainset()\n", + "train_set\n" ] }, { @@ -351,7 +365,7 @@ "with Timer() as train_time:\n", " svd.fit(train_set)\n", "\n", - "print(f\"Took {train_time.interval} seconds for training.\")" + "print(f\"Took {train_time.interval} seconds for training.\")\n" ] }, { @@ -444,8 +458,8 @@ } ], "source": [ - "predictions = predict(svd, test, usercol='userID', itemcol='itemID')\n", - "predictions.head()" + "predictions = predict(svd, test, usercol=\"userID\", itemcol=\"itemID\")\n", + "predictions.head()\n" ] }, { @@ -472,9 +486,11 @@ ], "source": [ "with Timer() as test_time:\n", - " all_predictions = compute_ranking_predictions(svd, train, usercol='userID', itemcol='itemID', remove_seen=True)\n", - " \n", - "print(f\"Took {test_time.interval} seconds for prediction.\")" + " all_predictions = compute_ranking_predictions(\n", + " svd, train, usercol=\"userID\", itemcol=\"itemID\", remove_seen=True\n", + " )\n", + "\n", + "print(f\"Took {test_time.interval} seconds for prediction.\")\n" ] }, { @@ -558,7 +574,7 @@ } ], "source": [ - "all_predictions.head()" + "all_predictions.head()\n" ] }, { @@ -603,23 +619,31 @@ "eval_rsquared = rsquared(test, predictions)\n", "eval_exp_var = exp_var(test, predictions)\n", "\n", - "eval_map = map_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)\n", - "eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)\n", - "eval_precision = precision_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)\n", - "eval_recall = recall_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)\n", + "eval_map = map_at_k(test, all_predictions, col_prediction=\"prediction\", k=TOP_K)\n", + "eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction=\"prediction\", k=TOP_K)\n", + "eval_precision = precision_at_k(\n", + " test, all_predictions, col_prediction=\"prediction\", k=TOP_K\n", + ")\n", + "eval_recall = recall_at_k(test, all_predictions, col_prediction=\"prediction\", k=TOP_K)\n", "\n", "\n", - "print(\"RMSE:\\t\\t%f\" % eval_rmse,\n", - " \"MAE:\\t\\t%f\" % eval_mae,\n", - " \"rsquared:\\t%f\" % eval_rsquared,\n", - " \"exp var:\\t%f\" % eval_exp_var, sep='\\n')\n", + "print(\n", + " \"RMSE:\\t\\t%f\" % eval_rmse,\n", + " \"MAE:\\t\\t%f\" % eval_mae,\n", + " \"rsquared:\\t%f\" % eval_rsquared,\n", + " \"exp var:\\t%f\" % eval_exp_var,\n", + " sep=\"\\n\",\n", + ")\n", "\n", - "print('----')\n", + "print(\"----\")\n", "\n", - "print(\"MAP:\\t\\t%f\" % eval_map,\n", - " \"NDCG:\\t\\t%f\" % eval_ndcg,\n", - " \"Precision@K:\\t%f\" % eval_precision,\n", - " \"Recall@K:\\t%f\" % eval_recall, sep='\\n')" + "print(\n", + " \"MAP:\\t\\t%f\" % eval_map,\n", + " \"NDCG:\\t\\t%f\" % eval_ndcg,\n", + " \"Precision@K:\\t%f\" % eval_precision,\n", + " \"Recall@K:\\t%f\" % eval_recall,\n", + " sep=\"\\n\",\n", + ")\n" ] }, { @@ -809,7 +833,7 @@ } ], "source": [ - "# Record results with papermill for tests\n", + "# Record results for tests - ignore this cell\n", "store_metadata(\"rmse\", eval_rmse)\n", "store_metadata(\"mae\", eval_mae)\n", "store_metadata(\"rsquared\", eval_rsquared)\n", @@ -819,7 +843,7 @@ "store_metadata(\"precision\", eval_precision)\n", "store_metadata(\"recall\", eval_recall)\n", "store_metadata(\"train_time\", train_time.interval)\n", - "store_metadata(\"test_time\", test_time.interval)" + "store_metadata(\"test_time\", test_time.interval)\n" ] }, { @@ -856,4 +880,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/examples/02_model_content_based_filtering/dkn_deep_dive.ipynb b/examples/02_model_content_based_filtering/dkn_deep_dive.ipynb index ff9d397e5..98442c0bd 100644 --- a/examples/02_model_content_based_filtering/dkn_deep_dive.ipynb +++ b/examples/02_model_content_based_filtering/dkn_deep_dive.ipynb @@ -136,13 +136,11 @@ } ], "source": [ - "import sys\n", - "\n", "import os\n", + "import sys\n", "from tempfile import TemporaryDirectory\n", "import logging\n", - "import papermill as pm\n", - "import scrapbook as sb\n", + "\n", "import tensorflow as tf\n", "tf.get_logger().setLevel('ERROR') # only show error messages\n", "\n", @@ -158,6 +156,7 @@ "from recommenders.models.deeprec.deeprec_utils import prepare_hparams\n", "from recommenders.models.deeprec.models.dkn import DKN\n", "from recommenders.models.deeprec.io.dkn_iterator import DKNTextIterator\n", + "from recommenders.utils.notebook_utils import store_metadata\n", "\n", "print(f\"System version: {sys.version}\")\n", "print(f\"Tensorflow version: {tf.__version__}\")" @@ -466,7 +465,12 @@ } ], "source": [ - "sb.glue(\"res\", res)" + "# Record results for tests - ignore this cell\n", + "store_metadata(\"auc\", res[\"auc\"])\n", + "store_metadata(\"group_auc\", res[\"group_auc\"])\n", + "store_metadata(\"ndcg@5\", res[\"ndcg@5\"])\n", + "store_metadata(\"ndcg@10\", res[\"ndcg@10\"])\n", + "store_metadata(\"mean_mrr\", res[\"mean_mrr\"])" ] }, { @@ -558,4 +562,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/examples/02_model_content_based_filtering/mmlspark_lightgbm_criteo.ipynb b/examples/02_model_content_based_filtering/mmlspark_lightgbm_criteo.ipynb index 92ae786ee..ed42529ff 100644 --- a/examples/02_model_content_based_filtering/mmlspark_lightgbm_criteo.ipynb +++ b/examples/02_model_content_based_filtering/mmlspark_lightgbm_criteo.ipynb @@ -60,25 +60,20 @@ } ], "source": [ + "import warnings\n", + "warnings.simplefilter(action='ignore', category=FutureWarning)\n", + "\n", "import os\n", "import sys\n", "\n", - "\n", "import pyspark\n", "from pyspark.ml import PipelineModel\n", "from pyspark.ml.feature import FeatureHasher\n", - "import warnings\n", - "warnings.simplefilter(action='ignore', category=FutureWarning)\n", - "import papermill as pm\n", - "import scrapbook as sb\n", - "\n", "\n", "from recommenders.utils.notebook_utils import is_databricks\n", "from recommenders.utils.spark_utils import start_or_get_spark\n", "from recommenders.datasets.criteo import load_spark_df\n", "from recommenders.datasets.spark_splitters import spark_random_split\n", - "\n", - "# Setup MML Spark\n", "from recommenders.utils.spark_utils import MMLSPARK_REPO, MMLSPARK_PACKAGE\n", "\n", "# On Spark >3.0.0,<3.2.0, the following should be set:\n", @@ -440,7 +435,7 @@ } ], "source": [ - "# Record results with papermill for tests\n", + "# Record results for tests - ignore this cell\n", "store_metadata(\"auc\", auc)" ] }, diff --git a/examples/02_model_content_based_filtering/vowpal_wabbit_deep_dive.ipynb b/examples/02_model_content_based_filtering/vowpal_wabbit_deep_dive.ipynb index 441c33876..26540bcdb 100644 --- a/examples/02_model_content_based_filtering/vowpal_wabbit_deep_dive.ipynb +++ b/examples/02_model_content_based_filtering/vowpal_wabbit_deep_dive.ipynb @@ -75,25 +75,30 @@ } ], "source": [ - "import sys\n", - "\n", "import os\n", + "import sys\n", + "import pandas as pd\n", "from subprocess import run\n", "from tempfile import TemporaryDirectory\n", "from time import process_time\n", "\n", - "import pandas as pd\n", - "import papermill as pm\n", - "import scrapbook as sb\n", - "\n", - "from recommenders.utils.notebook_utils import is_jupyter\n", "from recommenders.datasets.movielens import load_pandas_df\n", "from recommenders.datasets.python_splitters import python_random_split\n", - "from recommenders.evaluation.python_evaluation import (rmse, mae, exp_var, rsquared, get_top_k_items,\n", - " map_at_k, ndcg_at_k, precision_at_k, recall_at_k)\n", + "from recommenders.evaluation.python_evaluation import (\n", + " rmse,\n", + " mae,\n", + " exp_var,\n", + " rsquared,\n", + " get_top_k_items,\n", + " map_at_k,\n", + " ndcg_at_k,\n", + " precision_at_k,\n", + " recall_at_k,\n", + ")\n", + "from recommenders.utils.notebook_utils import store_metadata\n", "\n", "print(\"System version: {}\".format(sys.version))\n", - "print(\"Pandas version: {}\".format(pd.__version__))" + "print(\"Pandas version: {}\".format(pd.__version__))\n" ] }, { @@ -1316,18 +1321,17 @@ } ], "source": [ - "# record results for testing\n", - "if is_jupyter():\n", - " store_metadata('rmse', saved_result['RMSE'])\n", - " store_metadata('mae', saved_result['MAE'])\n", - " store_metadata('rsquared', saved_result['R2'])\n", - " store_metadata('exp_var', saved_result['Explained Variance'])\n", - " store_metadata(\"train_time\", saved_result['Train Time (ms)'])\n", - " store_metadata(\"test_time\", test_time)\n", - " store_metadata('map', rank_metrics['MAP'])\n", - " store_metadata('ndcg', rank_metrics['NDCG'])\n", - " store_metadata('precision', rank_metrics['Precision'])\n", - " store_metadata('recall', rank_metrics['Recall'])" + "# Record results for tests - ignore this cell\n", + "store_metadata('rmse', saved_result['RMSE'])\n", + "store_metadata('mae', saved_result['MAE'])\n", + "store_metadata('rsquared', saved_result['R2'])\n", + "store_metadata('exp_var', saved_result['Explained Variance'])\n", + "store_metadata(\"train_time\", saved_result['Train Time (ms)'])\n", + "store_metadata(\"test_time\", test_time)\n", + "store_metadata('map', rank_metrics['MAP'])\n", + "store_metadata('ndcg', rank_metrics['NDCG'])\n", + "store_metadata('precision', rank_metrics['Precision'])\n", + "store_metadata('recall', rank_metrics['Recall'])" ] }, { @@ -1374,4 +1378,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/examples/02_model_hybrid/fm_deep_dive.ipynb b/examples/02_model_hybrid/fm_deep_dive.ipynb index 5920b021f..04d782e5a 100644 --- a/examples/02_model_hybrid/fm_deep_dive.ipynb +++ b/examples/02_model_hybrid/fm_deep_dive.ipynb @@ -238,10 +238,8 @@ } ], "source": [ - "import sys\n", "import os\n", - "import papermill as pm\n", - "import scrapbook as sb\n", + "import sys\n", "from tempfile import TemporaryDirectory\n", "import xlearn as xl\n", "from sklearn.metrics import roc_auc_score\n", @@ -256,6 +254,7 @@ "from recommenders.datasets.download_utils import maybe_download, unzip_file\n", "from recommenders.tuning.parameter_sweep import generate_param_grid\n", "from recommenders.datasets.pandas_df_utils import LibffmConverter\n", + "from recommenders.utils.notebook_utils import store_metadata\n", "\n", "print(\"System version: {}\".format(sys.version))\n", "print(\"Xlearn version: {}\".format(xl.__version__))" @@ -370,17 +369,19 @@ } ], "source": [ - "df_feature_original = pd.DataFrame({\n", - " 'rating': [1, 0, 0, 1, 1],\n", - " 'field1': ['xxx1', 'xxx2', 'xxx4', 'xxx4', 'xxx4'],\n", - " 'field2': [3, 4, 5, 6, 7],\n", - " 'field3': [1.0, 2.0, 3.0, 4.0, 5.0],\n", - " 'field4': ['1', '2', '3', '4', '5']\n", - "})\n", + "df_feature_original = pd.DataFrame(\n", + " {\n", + " \"rating\": [1, 0, 0, 1, 1],\n", + " \"field1\": [\"xxx1\", \"xxx2\", \"xxx4\", \"xxx4\", \"xxx4\"],\n", + " \"field2\": [3, 4, 5, 6, 7],\n", + " \"field3\": [1.0, 2.0, 3.0, 4.0, 5.0],\n", + " \"field4\": [\"1\", \"2\", \"3\", \"4\", \"5\"],\n", + " }\n", + ")\n", "\n", - "converter = LibffmConverter().fit(df_feature_original, col_rating='rating')\n", + "converter = LibffmConverter().fit(df_feature_original, col_rating=\"rating\")\n", "df_out = converter.transform(df_feature_original)\n", - "df_out" + "df_out\n" ] }, { @@ -397,7 +398,11 @@ } ], "source": [ - "print('There are in total {0} fields and {1} features.'.format(converter.field_count, converter.feature_count))" + "print(\n", + " \"There are in total {0} fields and {1} features.\".format(\n", + " converter.field_count, converter.feature_count\n", + " )\n", + ")\n" ] }, { @@ -421,11 +426,11 @@ "LEARNING_RATE = 0.2\n", "LAMBDA = 0.002\n", "EPOCH = 10\n", - "OPT_METHOD = \"sgd\" # options are \"sgd\", \"adagrad\" and \"ftrl\"\n", + "OPT_METHOD = \"sgd\" # options are \"sgd\", \"adagrad\" and \"ftrl\"\n", "\n", "# The metrics for binary classification options are \"acc\", \"prec\", \"f1\" and \"auc\"\n", "# for regression, options are \"rmse\", \"mae\", \"mape\"\n", - "METRIC = \"auc\" \n" + "METRIC = \"auc\"\n" ] }, { @@ -460,9 +465,11 @@ "model_file = os.path.join(data_path, MODEL_FILE_NAME)\n", "output_file = os.path.join(data_path, OUTPUT_FILE_NAME)\n", "\n", - "assets_url = \"https://recodatasets.z20.web.core.windows.net/deeprec/xdeepfmresources.zip\"\n", + "assets_url = (\n", + " \"https://recodatasets.z20.web.core.windows.net/deeprec/xdeepfmresources.zip\"\n", + ")\n", "assets_file = maybe_download(assets_url, work_directory=data_path)\n", - "unzip_file(assets_file, data_path)" + "unzip_file(assets_file, data_path)\n" ] }, { @@ -494,8 +501,8 @@ ], "source": [ "# Training task\n", - "ffm_model = xl.create_ffm() # Use field-aware factorization machine (ffm)\n", - "ffm_model.setTrain(train_file) # Set the path of training dataset\n", + "ffm_model = xl.create_ffm() # Use field-aware factorization machine (ffm)\n", + "ffm_model.setTrain(train_file) # Set the path of training dataset\n", "ffm_model.setValidate(valid_file) # Set the path of validation dataset\n", "\n", "# Parameters:\n", @@ -505,19 +512,20 @@ "# 3. evaluation metric: auc\n", "# 4. number of epochs: 10\n", "# 5. optimization method: sgd\n", - "param = {\"task\":\"binary\", \n", - " \"lr\": LEARNING_RATE, \n", - " \"lambda\": LAMBDA, \n", - " \"metric\": METRIC,\n", - " \"epoch\": EPOCH,\n", - " \"opt\": OPT_METHOD\n", - " }\n", + "param = {\n", + " \"task\": \"binary\",\n", + " \"lr\": LEARNING_RATE,\n", + " \"lambda\": LAMBDA,\n", + " \"metric\": METRIC,\n", + " \"epoch\": EPOCH,\n", + " \"opt\": OPT_METHOD,\n", + "}\n", "\n", "# Start to train\n", "# The trained model will be stored in model.out\n", "with Timer() as time_train:\n", " ffm_model.fit(param, model_file)\n", - "print(f\"Training time: {time_train}\")" + "print(f\"Training time: {time_train}\")\n" ] }, { @@ -536,13 +544,13 @@ "source": [ "# Prediction task\n", "ffm_model.setTest(test_file) # Set the path of test dataset\n", - "ffm_model.setSigmoid() # Convert output to 0-1\n", + "ffm_model.setSigmoid() # Convert output to 0-1\n", "\n", "# Start to predict\n", "# The output result will be stored in output.txt\n", "with Timer() as time_predict:\n", " ffm_model.predict(model_file, output_file)\n", - "print(f\"Prediction time: {time_predict}\")" + "print(f\"Prediction time: {time_predict}\")\n" ] }, { @@ -552,24 +560,6 @@ "The output are the predicted labels (i.e., 1 or 0) for the testing data set. AUC score is calculated to evaluate the model performance." ] }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "with open(output_file) as f:\n", - " predictions = f.readlines()\n", - "\n", - "with open(test_file) as f:\n", - " truths = f.readlines()\n", - "\n", - "truths = np.array([float(truth.split(' ')[0]) for truth in truths])\n", - "predictions = np.array([float(prediction.strip('')) for prediction in predictions])\n", - "\n", - "auc_score = roc_auc_score(truths, predictions)" - ] - }, { "cell_type": "code", "execution_count": 14, @@ -587,7 +577,18 @@ } ], "source": [ - "auc_score" + "with open(output_file) as f:\n", + " predictions = f.readlines()\n", + "\n", + "with open(test_file) as f:\n", + " truths = f.readlines()\n", + "\n", + "truths = np.array([float(truth.split(\" \")[0]) for truth in truths])\n", + "predictions = np.array([float(prediction.strip(\"\")) for prediction in predictions])\n", + "\n", + "auc_score = roc_auc_score(truths, predictions)\n", + "\n", + "print(auc_score)\n" ] }, { @@ -606,7 +607,7 @@ } ], "source": [ - "store_metadata('auc_score', auc_score)" + "store_metadata(\"auc_score\", auc_score)\n" ] }, { @@ -651,12 +652,9 @@ "metadata": {}, "outputs": [], "source": [ - "param_dict = {\n", - " \"lr\": [0.0001, 0.001, 0.01],\n", - " \"lambda\": [0.001, 0.01, 0.1]\n", - "}\n", + "param_dict = {\"lr\": [0.0001, 0.001, 0.01], \"lambda\": [0.001, 0.01, 0.1]}\n", "\n", - "param_grid = generate_param_grid(param_dict)" + "param_grid = generate_param_grid(param_dict)\n" ] }, { @@ -669,13 +667,13 @@ "\n", "with Timer() as time_tune:\n", " for param in param_grid:\n", - " ffm_model = xl.create_ffm() \n", - " ffm_model.setTrain(train_file) \n", + " ffm_model = xl.create_ffm()\n", + " ffm_model.setTrain(train_file)\n", " ffm_model.setValidate(valid_file)\n", " ffm_model.fit(param, model_file)\n", "\n", - " ffm_model.setTest(test_file) \n", - " ffm_model.setSigmoid() \n", + " ffm_model.setTest(test_file)\n", + " ffm_model.setSigmoid()\n", " ffm_model.predict(model_file, output_file)\n", "\n", " with open(output_file) as f:\n", @@ -684,10 +682,12 @@ " with open(test_file) as f:\n", " truths = f.readlines()\n", "\n", - " truths = np.array([float(truth.split(' ')[0]) for truth in truths])\n", - " predictions = np.array([float(prediction.strip('')) for prediction in predictions])\n", + " truths = np.array([float(truth.split(\" \")[0]) for truth in truths])\n", + " predictions = np.array(\n", + " [float(prediction.strip(\"\")) for prediction in predictions]\n", + " )\n", "\n", - " auc_scores.append(roc_auc_score(truths, predictions))" + " auc_scores.append(roc_auc_score(truths, predictions))\n" ] }, { @@ -704,7 +704,7 @@ } ], "source": [ - "print('Tuning by grid search takes {0:.2} min'.format(time_tune.interval / 60))" + "print(\"Tuning by grid search takes {0:.2} min\".format(time_tune.interval / 60))\n" ] }, { @@ -781,15 +781,17 @@ } ], "source": [ - "auc_scores = [float('%.4f' % x) for x in auc_scores]\n", - "auc_scores_array = np.reshape(auc_scores, (len(param_dict[\"lr\"]), len(param_dict[\"lambda\"]))) \n", + "auc_scores = [float(\"%.4f\" % x) for x in auc_scores]\n", + "auc_scores_array = np.reshape(\n", + " auc_scores, (len(param_dict[\"lr\"]), len(param_dict[\"lambda\"]))\n", + ")\n", "\n", "auc_df = pd.DataFrame(\n", - " data=auc_scores_array, \n", - " index=pd.Index(param_dict[\"lr\"], name=\"LR\"), \n", - " columns=pd.Index(param_dict[\"lambda\"], name=\"Lambda\")\n", + " data=auc_scores_array,\n", + " index=pd.Index(param_dict[\"lr\"], name=\"LR\"),\n", + " columns=pd.Index(param_dict[\"lambda\"], name=\"Lambda\"),\n", ")\n", - "auc_df" + "auc_df\n" ] }, { @@ -799,955 +801,7 @@ "outputs": [ { "data": { - "application/javascript": [ - "/* Put everything inside the global mpl namespace */\n", - "/* global mpl */\n", - "window.mpl = {};\n", - "\n", - "mpl.get_websocket_type = function () {\n", - " if (typeof WebSocket !== 'undefined') {\n", - " return WebSocket;\n", - " } else if (typeof MozWebSocket !== 'undefined') {\n", - " return MozWebSocket;\n", - " } else {\n", - " alert(\n", - " 'Your browser does not have WebSocket support. ' +\n", - " 'Please try Chrome, Safari or Firefox ≥ 6. ' +\n", - " 'Firefox 4 and 5 are also supported but you ' +\n", - " 'have to enable WebSockets in about:config.'\n", - " );\n", - " }\n", - "};\n", - "\n", - "mpl.figure = function (figure_id, websocket, ondownload, parent_element) {\n", - " this.id = figure_id;\n", - "\n", - " this.ws = websocket;\n", - "\n", - " this.supports_binary = this.ws.binaryType !== undefined;\n", - "\n", - " if (!this.supports_binary) {\n", - " var warnings = document.getElementById('mpl-warnings');\n", - " if (warnings) {\n", - " warnings.style.display = 'block';\n", - " warnings.textContent =\n", - " 'This browser does not support binary websocket messages. ' +\n", - " 'Performance may be slow.';\n", - " }\n", - " }\n", - "\n", - " this.imageObj = new Image();\n", - "\n", - " this.context = undefined;\n", - " this.message = undefined;\n", - " this.canvas = undefined;\n", - " this.rubberband_canvas = undefined;\n", - " this.rubberband_context = undefined;\n", - " this.format_dropdown = undefined;\n", - "\n", - " this.image_mode = 'full';\n", - "\n", - " this.root = document.createElement('div');\n", - " this.root.setAttribute('style', 'display: inline-block');\n", - " this._root_extra_style(this.root);\n", - "\n", - " parent_element.appendChild(this.root);\n", - "\n", - " this._init_header(this);\n", - " this._init_canvas(this);\n", - " this._init_toolbar(this);\n", - "\n", - " var fig = this;\n", - "\n", - " this.waiting = false;\n", - "\n", - " this.ws.onopen = function () {\n", - " fig.send_message('supports_binary', { value: fig.supports_binary });\n", - " fig.send_message('send_image_mode', {});\n", - " if (fig.ratio !== 1) {\n", - " fig.send_message('set_dpi_ratio', { dpi_ratio: fig.ratio });\n", - " }\n", - " fig.send_message('refresh', {});\n", - " };\n", - "\n", - " this.imageObj.onload = function () {\n", - " if (fig.image_mode === 'full') {\n", - " // Full images could contain transparency (where diff images\n", - " // almost always do), so we need to clear the canvas so that\n", - " // there is no ghosting.\n", - " fig.context.clearRect(0, 0, fig.canvas.width, fig.canvas.height);\n", - " }\n", - " fig.context.drawImage(fig.imageObj, 0, 0);\n", - " };\n", - "\n", - " this.imageObj.onunload = function () {\n", - " fig.ws.close();\n", - " };\n", - "\n", - " this.ws.onmessage = this._make_on_message_function(this);\n", - "\n", - " this.ondownload = ondownload;\n", - "};\n", - "\n", - "mpl.figure.prototype._init_header = function () {\n", - " var titlebar = document.createElement('div');\n", - " titlebar.classList =\n", - " 'ui-dialog-titlebar ui-widget-header ui-corner-all ui-helper-clearfix';\n", - " var titletext = document.createElement('div');\n", - " titletext.classList = 'ui-dialog-title';\n", - " titletext.setAttribute(\n", - " 'style',\n", - " 'width: 100%; text-align: center; padding: 3px;'\n", - " );\n", - " titlebar.appendChild(titletext);\n", - " this.root.appendChild(titlebar);\n", - " this.header = titletext;\n", - "};\n", - "\n", - "mpl.figure.prototype._canvas_extra_style = function (_canvas_div) {};\n", - "\n", - "mpl.figure.prototype._root_extra_style = function (_canvas_div) {};\n", - "\n", - "mpl.figure.prototype._init_canvas = function () {\n", - " var fig = this;\n", - "\n", - " var canvas_div = (this.canvas_div = document.createElement('div'));\n", - " canvas_div.setAttribute(\n", - " 'style',\n", - " 'border: 1px solid #ddd;' +\n", - " 'box-sizing: content-box;' +\n", - " 'clear: both;' +\n", - " 'min-height: 1px;' +\n", - " 'min-width: 1px;' +\n", - " 'outline: 0;' +\n", - " 'overflow: hidden;' +\n", - " 'position: relative;' +\n", - " 'resize: both;'\n", - " );\n", - "\n", - " function on_keyboard_event_closure(name) {\n", - " return function (event) {\n", - " return fig.key_event(event, name);\n", - " };\n", - " }\n", - "\n", - " canvas_div.addEventListener(\n", - " 'keydown',\n", - " on_keyboard_event_closure('key_press')\n", - " );\n", - " canvas_div.addEventListener(\n", - " 'keyup',\n", - " on_keyboard_event_closure('key_release')\n", - " );\n", - "\n", - " this._canvas_extra_style(canvas_div);\n", - " this.root.appendChild(canvas_div);\n", - "\n", - " var canvas = (this.canvas = document.createElement('canvas'));\n", - " canvas.classList.add('mpl-canvas');\n", - " canvas.setAttribute('style', 'box-sizing: content-box;');\n", - "\n", - " this.context = canvas.getContext('2d');\n", - "\n", - " var backingStore =\n", - " this.context.backingStorePixelRatio ||\n", - " this.context.webkitBackingStorePixelRatio ||\n", - " this.context.mozBackingStorePixelRatio ||\n", - " this.context.msBackingStorePixelRatio ||\n", - " this.context.oBackingStorePixelRatio ||\n", - " this.context.backingStorePixelRatio ||\n", - " 1;\n", - "\n", - " this.ratio = (window.devicePixelRatio || 1) / backingStore;\n", - "\n", - " var rubberband_canvas = (this.rubberband_canvas = document.createElement(\n", - " 'canvas'\n", - " ));\n", - " rubberband_canvas.setAttribute(\n", - " 'style',\n", - " 'box-sizing: content-box; position: absolute; left: 0; top: 0; z-index: 1;'\n", - " );\n", - "\n", - " // Apply a ponyfill if ResizeObserver is not implemented by browser.\n", - " if (this.ResizeObserver === undefined) {\n", - " if (window.ResizeObserver !== undefined) {\n", - " this.ResizeObserver = window.ResizeObserver;\n", - " } else {\n", - " var obs = _JSXTOOLS_RESIZE_OBSERVER({});\n", - " this.ResizeObserver = obs.ResizeObserver;\n", - " }\n", - " }\n", - "\n", - " this.resizeObserverInstance = new this.ResizeObserver(function (entries) {\n", - " var nentries = entries.length;\n", - " for (var i = 0; i < nentries; i++) {\n", - " var entry = entries[i];\n", - " var width, height;\n", - " if (entry.contentBoxSize) {\n", - " if (entry.contentBoxSize instanceof Array) {\n", - " // Chrome 84 implements new version of spec.\n", - " width = entry.contentBoxSize[0].inlineSize;\n", - " height = entry.contentBoxSize[0].blockSize;\n", - " } else {\n", - " // Firefox implements old version of spec.\n", - " width = entry.contentBoxSize.inlineSize;\n", - " height = entry.contentBoxSize.blockSize;\n", - " }\n", - " } else {\n", - " // Chrome <84 implements even older version of spec.\n", - " width = entry.contentRect.width;\n", - " height = entry.contentRect.height;\n", - " }\n", - "\n", - " // Keep the size of the canvas and rubber band canvas in sync with\n", - " // the canvas container.\n", - " if (entry.devicePixelContentBoxSize) {\n", - " // Chrome 84 implements new version of spec.\n", - " canvas.setAttribute(\n", - " 'width',\n", - " entry.devicePixelContentBoxSize[0].inlineSize\n", - " );\n", - " canvas.setAttribute(\n", - " 'height',\n", - " entry.devicePixelContentBoxSize[0].blockSize\n", - " );\n", - " } else {\n", - " canvas.setAttribute('width', width * fig.ratio);\n", - " canvas.setAttribute('height', height * fig.ratio);\n", - " }\n", - " canvas.setAttribute(\n", - " 'style',\n", - " 'width: ' + width + 'px; height: ' + height + 'px;'\n", - " );\n", - "\n", - " rubberband_canvas.setAttribute('width', width);\n", - " rubberband_canvas.setAttribute('height', height);\n", - "\n", - " // And update the size in Python. We ignore the initial 0/0 size\n", - " // that occurs as the element is placed into the DOM, which should\n", - " // otherwise not happen due to the minimum size styling.\n", - " if (fig.ws.readyState == 1 && width != 0 && height != 0) {\n", - " fig.request_resize(width, height);\n", - " }\n", - " }\n", - " });\n", - " this.resizeObserverInstance.observe(canvas_div);\n", - "\n", - " function on_mouse_event_closure(name) {\n", - " return function (event) {\n", - " return fig.mouse_event(event, name);\n", - " };\n", - " }\n", - "\n", - " rubberband_canvas.addEventListener(\n", - " 'mousedown',\n", - " on_mouse_event_closure('button_press')\n", - " );\n", - " rubberband_canvas.addEventListener(\n", - " 'mouseup',\n", - " on_mouse_event_closure('button_release')\n", - " );\n", - " // Throttle sequential mouse events to 1 every 20ms.\n", - " rubberband_canvas.addEventListener(\n", - " 'mousemove',\n", - " on_mouse_event_closure('motion_notify')\n", - " );\n", - "\n", - " rubberband_canvas.addEventListener(\n", - " 'mouseenter',\n", - " on_mouse_event_closure('figure_enter')\n", - " );\n", - " rubberband_canvas.addEventListener(\n", - " 'mouseleave',\n", - " on_mouse_event_closure('figure_leave')\n", - " );\n", - "\n", - " canvas_div.addEventListener('wheel', function (event) {\n", - " if (event.deltaY < 0) {\n", - " event.step = 1;\n", - " } else {\n", - " event.step = -1;\n", - " }\n", - " on_mouse_event_closure('scroll')(event);\n", - " });\n", - "\n", - " canvas_div.appendChild(canvas);\n", - " canvas_div.appendChild(rubberband_canvas);\n", - "\n", - " this.rubberband_context = rubberband_canvas.getContext('2d');\n", - " this.rubberband_context.strokeStyle = '#000000';\n", - "\n", - " this._resize_canvas = function (width, height, forward) {\n", - " if (forward) {\n", - " canvas_div.style.width = width + 'px';\n", - " canvas_div.style.height = height + 'px';\n", - " }\n", - " };\n", - "\n", - " // Disable right mouse context menu.\n", - " this.rubberband_canvas.addEventListener('contextmenu', function (_e) {\n", - " event.preventDefault();\n", - " return false;\n", - " });\n", - "\n", - " function set_focus() {\n", - " canvas.focus();\n", - " canvas_div.focus();\n", - " }\n", - "\n", - " window.setTimeout(set_focus, 100);\n", - "};\n", - "\n", - "mpl.figure.prototype._init_toolbar = function () {\n", - " var fig = this;\n", - "\n", - " var toolbar = document.createElement('div');\n", - " toolbar.classList = 'mpl-toolbar';\n", - " this.root.appendChild(toolbar);\n", - "\n", - " function on_click_closure(name) {\n", - " return function (_event) {\n", - " return fig.toolbar_button_onclick(name);\n", - " };\n", - " }\n", - "\n", - " function on_mouseover_closure(tooltip) {\n", - " return function (event) {\n", - " if (!event.currentTarget.disabled) {\n", - " return fig.toolbar_button_onmouseover(tooltip);\n", - " }\n", - " };\n", - " }\n", - "\n", - " fig.buttons = {};\n", - " var buttonGroup = document.createElement('div');\n", - " buttonGroup.classList = 'mpl-button-group';\n", - " for (var toolbar_ind in mpl.toolbar_items) {\n", - " var name = mpl.toolbar_items[toolbar_ind][0];\n", - " var tooltip = mpl.toolbar_items[toolbar_ind][1];\n", - " var image = mpl.toolbar_items[toolbar_ind][2];\n", - " var method_name = mpl.toolbar_items[toolbar_ind][3];\n", - "\n", - " if (!name) {\n", - " /* Instead of a spacer, we start a new button group. */\n", - " if (buttonGroup.hasChildNodes()) {\n", - " toolbar.appendChild(buttonGroup);\n", - " }\n", - " buttonGroup = document.createElement('div');\n", - " buttonGroup.classList = 'mpl-button-group';\n", - " continue;\n", - " }\n", - "\n", - " var button = (fig.buttons[name] = document.createElement('button'));\n", - " button.classList = 'mpl-widget';\n", - " button.setAttribute('role', 'button');\n", - " button.setAttribute('aria-disabled', 'false');\n", - " button.addEventListener('click', on_click_closure(method_name));\n", - " button.addEventListener('mouseover', on_mouseover_closure(tooltip));\n", - "\n", - " var icon_img = document.createElement('img');\n", - " icon_img.src = '_images/' + image + '.png';\n", - " icon_img.srcset = '_images/' + image + '_large.png 2x';\n", - " icon_img.alt = tooltip;\n", - " button.appendChild(icon_img);\n", - "\n", - " buttonGroup.appendChild(button);\n", - " }\n", - "\n", - " if (buttonGroup.hasChildNodes()) {\n", - " toolbar.appendChild(buttonGroup);\n", - " }\n", - "\n", - " var fmt_picker = document.createElement('select');\n", - " fmt_picker.classList = 'mpl-widget';\n", - " toolbar.appendChild(fmt_picker);\n", - " this.format_dropdown = fmt_picker;\n", - "\n", - " for (var ind in mpl.extensions) {\n", - " var fmt = mpl.extensions[ind];\n", - " var option = document.createElement('option');\n", - " option.selected = fmt === mpl.default_extension;\n", - " option.innerHTML = fmt;\n", - " fmt_picker.appendChild(option);\n", - " }\n", - "\n", - " var status_bar = document.createElement('span');\n", - " status_bar.classList = 'mpl-message';\n", - " toolbar.appendChild(status_bar);\n", - " this.message = status_bar;\n", - "};\n", - "\n", - "mpl.figure.prototype.request_resize = function (x_pixels, y_pixels) {\n", - " // Request matplotlib to resize the figure. Matplotlib will then trigger a resize in the client,\n", - " // which will in turn request a refresh of the image.\n", - " this.send_message('resize', { width: x_pixels, height: y_pixels });\n", - "};\n", - "\n", - "mpl.figure.prototype.send_message = function (type, properties) {\n", - " properties['type'] = type;\n", - " properties['figure_id'] = this.id;\n", - " this.ws.send(JSON.stringify(properties));\n", - "};\n", - "\n", - "mpl.figure.prototype.send_draw_message = function () {\n", - " if (!this.waiting) {\n", - " this.waiting = true;\n", - " this.ws.send(JSON.stringify({ type: 'draw', figure_id: this.id }));\n", - " }\n", - "};\n", - "\n", - "mpl.figure.prototype.handle_save = function (fig, _msg) {\n", - " var format_dropdown = fig.format_dropdown;\n", - " var format = format_dropdown.options[format_dropdown.selectedIndex].value;\n", - " fig.ondownload(fig, format);\n", - "};\n", - "\n", - "mpl.figure.prototype.handle_resize = function (fig, msg) {\n", - " var size = msg['size'];\n", - " if (size[0] !== fig.canvas.width || size[1] !== fig.canvas.height) {\n", - " fig._resize_canvas(size[0], size[1], msg['forward']);\n", - " fig.send_message('refresh', {});\n", - " }\n", - "};\n", - "\n", - "mpl.figure.prototype.handle_rubberband = function (fig, msg) {\n", - " var x0 = msg['x0'] / fig.ratio;\n", - " var y0 = (fig.canvas.height - msg['y0']) / fig.ratio;\n", - " var x1 = msg['x1'] / fig.ratio;\n", - " var y1 = (fig.canvas.height - msg['y1']) / fig.ratio;\n", - " x0 = Math.floor(x0) + 0.5;\n", - " y0 = Math.floor(y0) + 0.5;\n", - " x1 = Math.floor(x1) + 0.5;\n", - " y1 = Math.floor(y1) + 0.5;\n", - " var min_x = Math.min(x0, x1);\n", - " var min_y = Math.min(y0, y1);\n", - " var width = Math.abs(x1 - x0);\n", - " var height = Math.abs(y1 - y0);\n", - "\n", - " fig.rubberband_context.clearRect(\n", - " 0,\n", - " 0,\n", - " fig.canvas.width / fig.ratio,\n", - " fig.canvas.height / fig.ratio\n", - " );\n", - "\n", - " fig.rubberband_context.strokeRect(min_x, min_y, width, height);\n", - "};\n", - "\n", - "mpl.figure.prototype.handle_figure_label = function (fig, msg) {\n", - " // Updates the figure title.\n", - " fig.header.textContent = msg['label'];\n", - "};\n", - "\n", - "mpl.figure.prototype.handle_cursor = function (fig, msg) {\n", - " var cursor = msg['cursor'];\n", - " switch (cursor) {\n", - " case 0:\n", - " cursor = 'pointer';\n", - " break;\n", - " case 1:\n", - " cursor = 'default';\n", - " break;\n", - " case 2:\n", - " cursor = 'crosshair';\n", - " break;\n", - " case 3:\n", - " cursor = 'move';\n", - " break;\n", - " }\n", - " fig.rubberband_canvas.style.cursor = cursor;\n", - "};\n", - "\n", - "mpl.figure.prototype.handle_message = function (fig, msg) {\n", - " fig.message.textContent = msg['message'];\n", - "};\n", - "\n", - "mpl.figure.prototype.handle_draw = function (fig, _msg) {\n", - " // Request the server to send over a new figure.\n", - " fig.send_draw_message();\n", - "};\n", - "\n", - "mpl.figure.prototype.handle_image_mode = function (fig, msg) {\n", - " fig.image_mode = msg['mode'];\n", - "};\n", - "\n", - "mpl.figure.prototype.handle_history_buttons = function (fig, msg) {\n", - " for (var key in msg) {\n", - " if (!(key in fig.buttons)) {\n", - " continue;\n", - " }\n", - " fig.buttons[key].disabled = !msg[key];\n", - " fig.buttons[key].setAttribute('aria-disabled', !msg[key]);\n", - " }\n", - "};\n", - "\n", - "mpl.figure.prototype.handle_navigate_mode = function (fig, msg) {\n", - " if (msg['mode'] === 'PAN') {\n", - " fig.buttons['Pan'].classList.add('active');\n", - " fig.buttons['Zoom'].classList.remove('active');\n", - " } else if (msg['mode'] === 'ZOOM') {\n", - " fig.buttons['Pan'].classList.remove('active');\n", - " fig.buttons['Zoom'].classList.add('active');\n", - " } else {\n", - " fig.buttons['Pan'].classList.remove('active');\n", - " fig.buttons['Zoom'].classList.remove('active');\n", - " }\n", - "};\n", - "\n", - "mpl.figure.prototype.updated_canvas_event = function () {\n", - " // Called whenever the canvas gets updated.\n", - " this.send_message('ack', {});\n", - "};\n", - "\n", - "// A function to construct a web socket function for onmessage handling.\n", - "// Called in the figure constructor.\n", - "mpl.figure.prototype._make_on_message_function = function (fig) {\n", - " return function socket_on_message(evt) {\n", - " if (evt.data instanceof Blob) {\n", - " /* FIXME: We get \"Resource interpreted as Image but\n", - " * transferred with MIME type text/plain:\" errors on\n", - " * Chrome. But how to set the MIME type? It doesn't seem\n", - " * to be part of the websocket stream */\n", - " evt.data.type = 'image/png';\n", - "\n", - " /* Free the memory for the previous frames */\n", - " if (fig.imageObj.src) {\n", - " (window.URL || window.webkitURL).revokeObjectURL(\n", - " fig.imageObj.src\n", - " );\n", - " }\n", - "\n", - " fig.imageObj.src = (window.URL || window.webkitURL).createObjectURL(\n", - " evt.data\n", - " );\n", - " fig.updated_canvas_event();\n", - " fig.waiting = false;\n", - " return;\n", - " } else if (\n", - " typeof evt.data === 'string' &&\n", - " evt.data.slice(0, 21) === 'data:image/png;base64'\n", - " ) {\n", - " fig.imageObj.src = evt.data;\n", - " fig.updated_canvas_event();\n", - " fig.waiting = false;\n", - " return;\n", - " }\n", - "\n", - " var msg = JSON.parse(evt.data);\n", - " var msg_type = msg['type'];\n", - "\n", - " // Call the \"handle_{type}\" callback, which takes\n", - " // the figure and JSON message as its only arguments.\n", - " try {\n", - " var callback = fig['handle_' + msg_type];\n", - " } catch (e) {\n", - " console.log(\n", - " \"No handler for the '\" + msg_type + \"' message type: \",\n", - " msg\n", - " );\n", - " return;\n", - " }\n", - "\n", - " if (callback) {\n", - " try {\n", - " // console.log(\"Handling '\" + msg_type + \"' message: \", msg);\n", - " callback(fig, msg);\n", - " } catch (e) {\n", - " console.log(\n", - " \"Exception inside the 'handler_\" + msg_type + \"' callback:\",\n", - " e,\n", - " e.stack,\n", - " msg\n", - " );\n", - " }\n", - " }\n", - " };\n", - "};\n", - "\n", - "// from http://stackoverflow.com/questions/1114465/getting-mouse-location-in-canvas\n", - "mpl.findpos = function (e) {\n", - " //this section is from http://www.quirksmode.org/js/events_properties.html\n", - " var targ;\n", - " if (!e) {\n", - " e = window.event;\n", - " }\n", - " if (e.target) {\n", - " targ = e.target;\n", - " } else if (e.srcElement) {\n", - " targ = e.srcElement;\n", - " }\n", - " if (targ.nodeType === 3) {\n", - " // defeat Safari bug\n", - " targ = targ.parentNode;\n", - " }\n", - "\n", - " // pageX,Y are the mouse positions relative to the document\n", - " var boundingRect = targ.getBoundingClientRect();\n", - " var x = e.pageX - (boundingRect.left + document.body.scrollLeft);\n", - " var y = e.pageY - (boundingRect.top + document.body.scrollTop);\n", - "\n", - " return { x: x, y: y };\n", - "};\n", - "\n", - "/*\n", - " * return a copy of an object with only non-object keys\n", - " * we need this to avoid circular references\n", - " * http://stackoverflow.com/a/24161582/3208463\n", - " */\n", - "function simpleKeys(original) {\n", - " return Object.keys(original).reduce(function (obj, key) {\n", - " if (typeof original[key] !== 'object') {\n", - " obj[key] = original[key];\n", - " }\n", - " return obj;\n", - " }, {});\n", - "}\n", - "\n", - "mpl.figure.prototype.mouse_event = function (event, name) {\n", - " var canvas_pos = mpl.findpos(event);\n", - "\n", - " if (name === 'button_press') {\n", - " this.canvas.focus();\n", - " this.canvas_div.focus();\n", - " }\n", - "\n", - " var x = canvas_pos.x * this.ratio;\n", - " var y = canvas_pos.y * this.ratio;\n", - "\n", - " this.send_message(name, {\n", - " x: x,\n", - " y: y,\n", - " button: event.button,\n", - " step: event.step,\n", - " guiEvent: simpleKeys(event),\n", - " });\n", - "\n", - " /* This prevents the web browser from automatically changing to\n", - " * the text insertion cursor when the button is pressed. We want\n", - " * to control all of the cursor setting manually through the\n", - " * 'cursor' event from matplotlib */\n", - " event.preventDefault();\n", - " return false;\n", - "};\n", - "\n", - "mpl.figure.prototype._key_event_extra = function (_event, _name) {\n", - " // Handle any extra behaviour associated with a key event\n", - "};\n", - "\n", - "mpl.figure.prototype.key_event = function (event, name) {\n", - " // Prevent repeat events\n", - " if (name === 'key_press') {\n", - " if (event.which === this._key) {\n", - " return;\n", - " } else {\n", - " this._key = event.which;\n", - " }\n", - " }\n", - " if (name === 'key_release') {\n", - " this._key = null;\n", - " }\n", - "\n", - " var value = '';\n", - " if (event.ctrlKey && event.which !== 17) {\n", - " value += 'ctrl+';\n", - " }\n", - " if (event.altKey && event.which !== 18) {\n", - " value += 'alt+';\n", - " }\n", - " if (event.shiftKey && event.which !== 16) {\n", - " value += 'shift+';\n", - " }\n", - "\n", - " value += 'k';\n", - " value += event.which.toString();\n", - "\n", - " this._key_event_extra(event, name);\n", - "\n", - " this.send_message(name, { key: value, guiEvent: simpleKeys(event) });\n", - " return false;\n", - "};\n", - "\n", - "mpl.figure.prototype.toolbar_button_onclick = function (name) {\n", - " if (name === 'download') {\n", - " this.handle_save(this, null);\n", - " } else {\n", - " this.send_message('toolbar_button', { name: name });\n", - " }\n", - "};\n", - "\n", - "mpl.figure.prototype.toolbar_button_onmouseover = function (tooltip) {\n", - " this.message.textContent = tooltip;\n", - "};\n", - "\n", - "///////////////// REMAINING CONTENT GENERATED BY embed_js.py /////////////////\n", - "// prettier-ignore\n", - "var _JSXTOOLS_RESIZE_OBSERVER=function(A){var t,i=new WeakMap,n=new WeakMap,a=new WeakMap,r=new WeakMap,o=new Set;function s(e){if(!(this instanceof s))throw new TypeError(\"Constructor requires 'new' operator\");i.set(this,e)}function h(){throw new TypeError(\"Function is not a constructor\")}function c(e,t,i,n){e=0 in arguments?Number(arguments[0]):0,t=1 in arguments?Number(arguments[1]):0,i=2 in arguments?Number(arguments[2]):0,n=3 in arguments?Number(arguments[3]):0,this.right=(this.x=this.left=e)+(this.width=i),this.bottom=(this.y=this.top=t)+(this.height=n),Object.freeze(this)}function d(){t=requestAnimationFrame(d);var s=new WeakMap,p=new Set;o.forEach((function(t){r.get(t).forEach((function(i){var r=t instanceof window.SVGElement,o=a.get(t),d=r?0:parseFloat(o.paddingTop),f=r?0:parseFloat(o.paddingRight),l=r?0:parseFloat(o.paddingBottom),u=r?0:parseFloat(o.paddingLeft),g=r?0:parseFloat(o.borderTopWidth),m=r?0:parseFloat(o.borderRightWidth),w=r?0:parseFloat(o.borderBottomWidth),b=u+f,F=d+l,v=(r?0:parseFloat(o.borderLeftWidth))+m,W=g+w,y=r?0:t.offsetHeight-W-t.clientHeight,E=r?0:t.offsetWidth-v-t.clientWidth,R=b+v,z=F+W,M=r?t.width:parseFloat(o.width)-R-E,O=r?t.height:parseFloat(o.height)-z-y;if(n.has(t)){var k=n.get(t);if(k[0]===M&&k[1]===O)return}n.set(t,[M,O]);var S=Object.create(h.prototype);S.target=t,S.contentRect=new c(u,d,M,O),s.has(i)||(s.set(i,[]),p.add(i)),s.get(i).push(S)}))})),p.forEach((function(e){i.get(e).call(e,s.get(e),e)}))}return s.prototype.observe=function(i){if(i instanceof window.Element){r.has(i)||(r.set(i,new Set),o.add(i),a.set(i,window.getComputedStyle(i)));var n=r.get(i);n.has(this)||n.add(this),cancelAnimationFrame(t),t=requestAnimationFrame(d)}},s.prototype.unobserve=function(i){if(i instanceof window.Element&&r.has(i)){var n=r.get(i);n.has(this)&&(n.delete(this),n.size||(r.delete(i),o.delete(i))),n.size||r.delete(i),o.size||cancelAnimationFrame(t)}},A.DOMRectReadOnly=c,A.ResizeObserver=s,A.ResizeObserverEntry=h,A}; // eslint-disable-line\n", - "mpl.toolbar_items = [[\"Home\", \"Reset original view\", \"fa fa-home icon-home\", \"home\"], [\"Back\", \"Back to previous view\", \"fa fa-arrow-left icon-arrow-left\", \"back\"], [\"Forward\", \"Forward to next view\", \"fa fa-arrow-right icon-arrow-right\", \"forward\"], [\"\", \"\", \"\", \"\"], [\"Pan\", \"Left button pans, Right button zooms\\nx/y fixes axis, CTRL fixes aspect\", \"fa fa-arrows icon-move\", \"pan\"], [\"Zoom\", \"Zoom to rectangle\\nx/y fixes axis, CTRL fixes aspect\", \"fa fa-square-o icon-check-empty\", \"zoom\"], [\"\", \"\", \"\", \"\"], [\"Download\", \"Download plot\", \"fa fa-floppy-o icon-save\", \"download\"]];\n", - "\n", - "mpl.extensions = [\"eps\", \"jpeg\", \"pdf\", \"png\", \"ps\", \"raw\", \"svg\", \"tif\"];\n", - "\n", - "mpl.default_extension = \"png\";/* global mpl */\n", - "\n", - "var comm_websocket_adapter = function (comm) {\n", - " // Create a \"websocket\"-like object which calls the given IPython comm\n", - " // object with the appropriate methods. Currently this is a non binary\n", - " // socket, so there is still some room for performance tuning.\n", - " var ws = {};\n", - "\n", - " ws.close = function () {\n", - " comm.close();\n", - " };\n", - " ws.send = function (m) {\n", - " //console.log('sending', m);\n", - " comm.send(m);\n", - " };\n", - " // Register the callback with on_msg.\n", - " comm.on_msg(function (msg) {\n", - " //console.log('receiving', msg['content']['data'], msg);\n", - " // Pass the mpl event to the overridden (by mpl) onmessage function.\n", - " ws.onmessage(msg['content']['data']);\n", - " });\n", - " return ws;\n", - "};\n", - "\n", - "mpl.mpl_figure_comm = function (comm, msg) {\n", - " // This is the function which gets called when the mpl process\n", - " // starts-up an IPython Comm through the \"matplotlib\" channel.\n", - "\n", - " var id = msg.content.data.id;\n", - " // Get hold of the div created by the display call when the Comm\n", - " // socket was opened in Python.\n", - " var element = document.getElementById(id);\n", - " var ws_proxy = comm_websocket_adapter(comm);\n", - "\n", - " function ondownload(figure, _format) {\n", - " window.open(figure.canvas.toDataURL());\n", - " }\n", - "\n", - " var fig = new mpl.figure(id, ws_proxy, ondownload, element);\n", - "\n", - " // Call onopen now - mpl needs it, as it is assuming we've passed it a real\n", - " // web socket which is closed, not our websocket->open comm proxy.\n", - " ws_proxy.onopen();\n", - "\n", - " fig.parent_element = element;\n", - " fig.cell_info = mpl.find_output_cell(\"
\");\n", - " if (!fig.cell_info) {\n", - " console.error('Failed to find cell for figure', id, fig);\n", - " return;\n", - " }\n", - " fig.cell_info[0].output_area.element.on(\n", - " 'cleared',\n", - " { fig: fig },\n", - " fig._remove_fig_handler\n", - " );\n", - "};\n", - "\n", - "mpl.figure.prototype.handle_close = function (fig, msg) {\n", - " var width = fig.canvas.width / fig.ratio;\n", - " fig.cell_info[0].output_area.element.off(\n", - " 'cleared',\n", - " fig._remove_fig_handler\n", - " );\n", - " fig.resizeObserverInstance.unobserve(fig.canvas_div);\n", - "\n", - " // Update the output cell to use the data from the current canvas.\n", - " fig.push_to_output();\n", - " var dataURL = fig.canvas.toDataURL();\n", - " // Re-enable the keyboard manager in IPython - without this line, in FF,\n", - " // the notebook keyboard shortcuts fail.\n", - " IPython.keyboard_manager.enable();\n", - " fig.parent_element.innerHTML =\n", - " '';\n", - " fig.close_ws(fig, msg);\n", - "};\n", - "\n", - "mpl.figure.prototype.close_ws = function (fig, msg) {\n", - " fig.send_message('closing', msg);\n", - " // fig.ws.close()\n", - "};\n", - "\n", - "mpl.figure.prototype.push_to_output = function (_remove_interactive) {\n", - " // Turn the data on the canvas into data in the output cell.\n", - " var width = this.canvas.width / this.ratio;\n", - " var dataURL = this.canvas.toDataURL();\n", - " this.cell_info[1]['text/html'] =\n", - " '';\n", - "};\n", - "\n", - "mpl.figure.prototype.updated_canvas_event = function () {\n", - " // Tell IPython that the notebook contents must change.\n", - " IPython.notebook.set_dirty(true);\n", - " this.send_message('ack', {});\n", - " var fig = this;\n", - " // Wait a second, then push the new image to the DOM so\n", - " // that it is saved nicely (might be nice to debounce this).\n", - " setTimeout(function () {\n", - " fig.push_to_output();\n", - " }, 1000);\n", - "};\n", - "\n", - "mpl.figure.prototype._init_toolbar = function () {\n", - " var fig = this;\n", - "\n", - " var toolbar = document.createElement('div');\n", - " toolbar.classList = 'btn-toolbar';\n", - " this.root.appendChild(toolbar);\n", - "\n", - " function on_click_closure(name) {\n", - " return function (_event) {\n", - " return fig.toolbar_button_onclick(name);\n", - " };\n", - " }\n", - "\n", - " function on_mouseover_closure(tooltip) {\n", - " return function (event) {\n", - " if (!event.currentTarget.disabled) {\n", - " return fig.toolbar_button_onmouseover(tooltip);\n", - " }\n", - " };\n", - " }\n", - "\n", - " fig.buttons = {};\n", - " var buttonGroup = document.createElement('div');\n", - " buttonGroup.classList = 'btn-group';\n", - " var button;\n", - " for (var toolbar_ind in mpl.toolbar_items) {\n", - " var name = mpl.toolbar_items[toolbar_ind][0];\n", - " var tooltip = mpl.toolbar_items[toolbar_ind][1];\n", - " var image = mpl.toolbar_items[toolbar_ind][2];\n", - " var method_name = mpl.toolbar_items[toolbar_ind][3];\n", - "\n", - " if (!name) {\n", - " /* Instead of a spacer, we start a new button group. */\n", - " if (buttonGroup.hasChildNodes()) {\n", - " toolbar.appendChild(buttonGroup);\n", - " }\n", - " buttonGroup = document.createElement('div');\n", - " buttonGroup.classList = 'btn-group';\n", - " continue;\n", - " }\n", - "\n", - " button = fig.buttons[name] = document.createElement('button');\n", - " button.classList = 'btn btn-default';\n", - " button.href = '#';\n", - " button.title = name;\n", - " button.innerHTML = '';\n", - " button.addEventListener('click', on_click_closure(method_name));\n", - " button.addEventListener('mouseover', on_mouseover_closure(tooltip));\n", - " buttonGroup.appendChild(button);\n", - " }\n", - "\n", - " if (buttonGroup.hasChildNodes()) {\n", - " toolbar.appendChild(buttonGroup);\n", - " }\n", - "\n", - " // Add the status bar.\n", - " var status_bar = document.createElement('span');\n", - " status_bar.classList = 'mpl-message pull-right';\n", - " toolbar.appendChild(status_bar);\n", - " this.message = status_bar;\n", - "\n", - " // Add the close button to the window.\n", - " var buttongrp = document.createElement('div');\n", - " buttongrp.classList = 'btn-group inline pull-right';\n", - " button = document.createElement('button');\n", - " button.classList = 'btn btn-mini btn-primary';\n", - " button.href = '#';\n", - " button.title = 'Stop Interaction';\n", - " button.innerHTML = '';\n", - " button.addEventListener('click', function (_evt) {\n", - " fig.handle_close(fig, {});\n", - " });\n", - " button.addEventListener(\n", - " 'mouseover',\n", - " on_mouseover_closure('Stop Interaction')\n", - " );\n", - " buttongrp.appendChild(button);\n", - " var titlebar = this.root.querySelector('.ui-dialog-titlebar');\n", - " titlebar.insertBefore(buttongrp, titlebar.firstChild);\n", - "};\n", - "\n", - "mpl.figure.prototype._remove_fig_handler = function (event) {\n", - " var fig = event.data.fig;\n", - " if (event.target !== this) {\n", - " // Ignore bubbled events from children.\n", - " return;\n", - " }\n", - " fig.close_ws(fig, {});\n", - "};\n", - "\n", - "mpl.figure.prototype._root_extra_style = function (el) {\n", - " el.style.boxSizing = 'content-box'; // override notebook setting of border-box.\n", - "};\n", - "\n", - "mpl.figure.prototype._canvas_extra_style = function (el) {\n", - " // this is important to make the div 'focusable\n", - " el.setAttribute('tabindex', 0);\n", - " // reach out to IPython and tell the keyboard manager to turn it's self\n", - " // off when our div gets focus\n", - "\n", - " // location in version 3\n", - " if (IPython.notebook.keyboard_manager) {\n", - " IPython.notebook.keyboard_manager.register_events(el);\n", - " } else {\n", - " // location in version 2\n", - " IPython.keyboard_manager.register_events(el);\n", - " }\n", - "};\n", - "\n", - "mpl.figure.prototype._key_event_extra = function (event, _name) {\n", - " var manager = IPython.notebook.keyboard_manager;\n", - " if (!manager) {\n", - " manager = IPython.keyboard_manager;\n", - " }\n", - "\n", - " // Check for shift+enter\n", - " if (event.shiftKey && event.which === 13) {\n", - " this.canvas_div.blur();\n", - " // select the cell after this one\n", - " var index = IPython.notebook.find_cell_index(this.cell_info[0]);\n", - " IPython.notebook.select(index + 1);\n", - " }\n", - "};\n", - "\n", - "mpl.figure.prototype.handle_save = function (fig, _msg) {\n", - " fig.ondownload(fig, null);\n", - "};\n", - "\n", - "mpl.find_output_cell = function (html_output) {\n", - " // Return the cell and output element which can be found *uniquely* in the notebook.\n", - " // Note - this is a bit hacky, but it is done because the \"notebook_saving.Notebook\"\n", - " // IPython event is triggered only after the cells have been serialised, which for\n", - " // our purposes (turning an active figure into a static one), is too late.\n", - " var cells = IPython.notebook.get_cells();\n", - " var ncells = cells.length;\n", - " for (var i = 0; i < ncells; i++) {\n", - " var cell = cells[i];\n", - " if (cell.cell_type === 'code') {\n", - " for (var j = 0; j < cell.output_area.outputs.length; j++) {\n", - " var data = cell.output_area.outputs[j];\n", - " if (data.data) {\n", - " // IPython >= 3 moved mimebundle to data attribute of output\n", - " data = data.data;\n", - " }\n", - " if (data['text/html'] === html_output) {\n", - " return [cell, data, j];\n", - " }\n", - " }\n", - " }\n", - " }\n", - "};\n", - "\n", - "// Register the function which deals with the matplotlib target/channel.\n", - "// The kernel may be null if the page has been refreshed.\n", - "if (IPython.notebook.kernel !== null) {\n", - " IPython.notebook.kernel.comm_manager.register_target(\n", - " 'matplotlib',\n", - " mpl.mpl_figure_comm\n", - " );\n", - "}\n" - ], + "application/javascript": "/* Put everything inside the global mpl namespace */\n/* global mpl */\nwindow.mpl = {};\n\nmpl.get_websocket_type = function () {\n if (typeof WebSocket !== 'undefined') {\n return WebSocket;\n } else if (typeof MozWebSocket !== 'undefined') {\n return MozWebSocket;\n } else {\n alert(\n 'Your browser does not have WebSocket support. ' +\n 'Please try Chrome, Safari or Firefox ≥ 6. ' +\n 'Firefox 4 and 5 are also supported but you ' +\n 'have to enable WebSockets in about:config.'\n );\n }\n};\n\nmpl.figure = function (figure_id, websocket, ondownload, parent_element) {\n this.id = figure_id;\n\n this.ws = websocket;\n\n this.supports_binary = this.ws.binaryType !== undefined;\n\n if (!this.supports_binary) {\n var warnings = document.getElementById('mpl-warnings');\n if (warnings) {\n warnings.style.display = 'block';\n warnings.textContent =\n 'This browser does not support binary websocket messages. ' +\n 'Performance may be slow.';\n }\n }\n\n this.imageObj = new Image();\n\n this.context = undefined;\n this.message = undefined;\n this.canvas = undefined;\n this.rubberband_canvas = undefined;\n this.rubberband_context = undefined;\n this.format_dropdown = undefined;\n\n this.image_mode = 'full';\n\n this.root = document.createElement('div');\n this.root.setAttribute('style', 'display: inline-block');\n this._root_extra_style(this.root);\n\n parent_element.appendChild(this.root);\n\n this._init_header(this);\n this._init_canvas(this);\n this._init_toolbar(this);\n\n var fig = this;\n\n this.waiting = false;\n\n this.ws.onopen = function () {\n fig.send_message('supports_binary', { value: fig.supports_binary });\n fig.send_message('send_image_mode', {});\n if (fig.ratio !== 1) {\n fig.send_message('set_dpi_ratio', { dpi_ratio: fig.ratio });\n }\n fig.send_message('refresh', {});\n };\n\n this.imageObj.onload = function () {\n if (fig.image_mode === 'full') {\n // Full images could contain transparency (where diff images\n // almost always do), so we need to clear the canvas so that\n // there is no ghosting.\n fig.context.clearRect(0, 0, fig.canvas.width, fig.canvas.height);\n }\n fig.context.drawImage(fig.imageObj, 0, 0);\n };\n\n this.imageObj.onunload = function () {\n fig.ws.close();\n };\n\n this.ws.onmessage = this._make_on_message_function(this);\n\n this.ondownload = ondownload;\n};\n\nmpl.figure.prototype._init_header = function () {\n var titlebar = document.createElement('div');\n titlebar.classList =\n 'ui-dialog-titlebar ui-widget-header ui-corner-all ui-helper-clearfix';\n var titletext = document.createElement('div');\n titletext.classList = 'ui-dialog-title';\n titletext.setAttribute(\n 'style',\n 'width: 100%; text-align: center; padding: 3px;'\n );\n titlebar.appendChild(titletext);\n this.root.appendChild(titlebar);\n this.header = titletext;\n};\n\nmpl.figure.prototype._canvas_extra_style = function (_canvas_div) {};\n\nmpl.figure.prototype._root_extra_style = function (_canvas_div) {};\n\nmpl.figure.prototype._init_canvas = function () {\n var fig = this;\n\n var canvas_div = (this.canvas_div = document.createElement('div'));\n canvas_div.setAttribute(\n 'style',\n 'border: 1px solid #ddd;' +\n 'box-sizing: content-box;' +\n 'clear: both;' +\n 'min-height: 1px;' +\n 'min-width: 1px;' +\n 'outline: 0;' +\n 'overflow: hidden;' +\n 'position: relative;' +\n 'resize: both;'\n );\n\n function on_keyboard_event_closure(name) {\n return function (event) {\n return fig.key_event(event, name);\n };\n }\n\n canvas_div.addEventListener(\n 'keydown',\n on_keyboard_event_closure('key_press')\n );\n canvas_div.addEventListener(\n 'keyup',\n on_keyboard_event_closure('key_release')\n );\n\n this._canvas_extra_style(canvas_div);\n this.root.appendChild(canvas_div);\n\n var canvas = (this.canvas = document.createElement('canvas'));\n canvas.classList.add('mpl-canvas');\n canvas.setAttribute('style', 'box-sizing: content-box;');\n\n this.context = canvas.getContext('2d');\n\n var backingStore =\n this.context.backingStorePixelRatio ||\n this.context.webkitBackingStorePixelRatio ||\n this.context.mozBackingStorePixelRatio ||\n this.context.msBackingStorePixelRatio ||\n this.context.oBackingStorePixelRatio ||\n this.context.backingStorePixelRatio ||\n 1;\n\n this.ratio = (window.devicePixelRatio || 1) / backingStore;\n\n var rubberband_canvas = (this.rubberband_canvas = document.createElement(\n 'canvas'\n ));\n rubberband_canvas.setAttribute(\n 'style',\n 'box-sizing: content-box; position: absolute; left: 0; top: 0; z-index: 1;'\n );\n\n // Apply a ponyfill if ResizeObserver is not implemented by browser.\n if (this.ResizeObserver === undefined) {\n if (window.ResizeObserver !== undefined) {\n this.ResizeObserver = window.ResizeObserver;\n } else {\n var obs = _JSXTOOLS_RESIZE_OBSERVER({});\n this.ResizeObserver = obs.ResizeObserver;\n }\n }\n\n this.resizeObserverInstance = new this.ResizeObserver(function (entries) {\n var nentries = entries.length;\n for (var i = 0; i < nentries; i++) {\n var entry = entries[i];\n var width, height;\n if (entry.contentBoxSize) {\n if (entry.contentBoxSize instanceof Array) {\n // Chrome 84 implements new version of spec.\n width = entry.contentBoxSize[0].inlineSize;\n height = entry.contentBoxSize[0].blockSize;\n } else {\n // Firefox implements old version of spec.\n width = entry.contentBoxSize.inlineSize;\n height = entry.contentBoxSize.blockSize;\n }\n } else {\n // Chrome <84 implements even older version of spec.\n width = entry.contentRect.width;\n height = entry.contentRect.height;\n }\n\n // Keep the size of the canvas and rubber band canvas in sync with\n // the canvas container.\n if (entry.devicePixelContentBoxSize) {\n // Chrome 84 implements new version of spec.\n canvas.setAttribute(\n 'width',\n entry.devicePixelContentBoxSize[0].inlineSize\n );\n canvas.setAttribute(\n 'height',\n entry.devicePixelContentBoxSize[0].blockSize\n );\n } else {\n canvas.setAttribute('width', width * fig.ratio);\n canvas.setAttribute('height', height * fig.ratio);\n }\n canvas.setAttribute(\n 'style',\n 'width: ' + width + 'px; height: ' + height + 'px;'\n );\n\n rubberband_canvas.setAttribute('width', width);\n rubberband_canvas.setAttribute('height', height);\n\n // And update the size in Python. We ignore the initial 0/0 size\n // that occurs as the element is placed into the DOM, which should\n // otherwise not happen due to the minimum size styling.\n if (fig.ws.readyState == 1 && width != 0 && height != 0) {\n fig.request_resize(width, height);\n }\n }\n });\n this.resizeObserverInstance.observe(canvas_div);\n\n function on_mouse_event_closure(name) {\n return function (event) {\n return fig.mouse_event(event, name);\n };\n }\n\n rubberband_canvas.addEventListener(\n 'mousedown',\n on_mouse_event_closure('button_press')\n );\n rubberband_canvas.addEventListener(\n 'mouseup',\n on_mouse_event_closure('button_release')\n );\n // Throttle sequential mouse events to 1 every 20ms.\n rubberband_canvas.addEventListener(\n 'mousemove',\n on_mouse_event_closure('motion_notify')\n );\n\n rubberband_canvas.addEventListener(\n 'mouseenter',\n on_mouse_event_closure('figure_enter')\n );\n rubberband_canvas.addEventListener(\n 'mouseleave',\n on_mouse_event_closure('figure_leave')\n );\n\n canvas_div.addEventListener('wheel', function (event) {\n if (event.deltaY < 0) {\n event.step = 1;\n } else {\n event.step = -1;\n }\n on_mouse_event_closure('scroll')(event);\n });\n\n canvas_div.appendChild(canvas);\n canvas_div.appendChild(rubberband_canvas);\n\n this.rubberband_context = rubberband_canvas.getContext('2d');\n this.rubberband_context.strokeStyle = '#000000';\n\n this._resize_canvas = function (width, height, forward) {\n if (forward) {\n canvas_div.style.width = width + 'px';\n canvas_div.style.height = height + 'px';\n }\n };\n\n // Disable right mouse context menu.\n this.rubberband_canvas.addEventListener('contextmenu', function (_e) {\n event.preventDefault();\n return false;\n });\n\n function set_focus() {\n canvas.focus();\n canvas_div.focus();\n }\n\n window.setTimeout(set_focus, 100);\n};\n\nmpl.figure.prototype._init_toolbar = function () {\n var fig = this;\n\n var toolbar = document.createElement('div');\n toolbar.classList = 'mpl-toolbar';\n this.root.appendChild(toolbar);\n\n function on_click_closure(name) {\n return function (_event) {\n return fig.toolbar_button_onclick(name);\n };\n }\n\n function on_mouseover_closure(tooltip) {\n return function (event) {\n if (!event.currentTarget.disabled) {\n return fig.toolbar_button_onmouseover(tooltip);\n }\n };\n }\n\n fig.buttons = {};\n var buttonGroup = document.createElement('div');\n buttonGroup.classList = 'mpl-button-group';\n for (var toolbar_ind in mpl.toolbar_items) {\n var name = mpl.toolbar_items[toolbar_ind][0];\n var tooltip = mpl.toolbar_items[toolbar_ind][1];\n var image = mpl.toolbar_items[toolbar_ind][2];\n var method_name = mpl.toolbar_items[toolbar_ind][3];\n\n if (!name) {\n /* Instead of a spacer, we start a new button group. */\n if (buttonGroup.hasChildNodes()) {\n toolbar.appendChild(buttonGroup);\n }\n buttonGroup = document.createElement('div');\n buttonGroup.classList = 'mpl-button-group';\n continue;\n }\n\n var button = (fig.buttons[name] = document.createElement('button'));\n button.classList = 'mpl-widget';\n button.setAttribute('role', 'button');\n button.setAttribute('aria-disabled', 'false');\n button.addEventListener('click', on_click_closure(method_name));\n button.addEventListener('mouseover', on_mouseover_closure(tooltip));\n\n var icon_img = document.createElement('img');\n icon_img.src = '_images/' + image + '.png';\n icon_img.srcset = '_images/' + image + '_large.png 2x';\n icon_img.alt = tooltip;\n button.appendChild(icon_img);\n\n buttonGroup.appendChild(button);\n }\n\n if (buttonGroup.hasChildNodes()) {\n toolbar.appendChild(buttonGroup);\n }\n\n var fmt_picker = document.createElement('select');\n fmt_picker.classList = 'mpl-widget';\n toolbar.appendChild(fmt_picker);\n this.format_dropdown = fmt_picker;\n\n for (var ind in mpl.extensions) {\n var fmt = mpl.extensions[ind];\n var option = document.createElement('option');\n option.selected = fmt === mpl.default_extension;\n option.innerHTML = fmt;\n fmt_picker.appendChild(option);\n }\n\n var status_bar = document.createElement('span');\n status_bar.classList = 'mpl-message';\n toolbar.appendChild(status_bar);\n this.message = status_bar;\n};\n\nmpl.figure.prototype.request_resize = function (x_pixels, y_pixels) {\n // Request matplotlib to resize the figure. Matplotlib will then trigger a resize in the client,\n // which will in turn request a refresh of the image.\n this.send_message('resize', { width: x_pixels, height: y_pixels });\n};\n\nmpl.figure.prototype.send_message = function (type, properties) {\n properties['type'] = type;\n properties['figure_id'] = this.id;\n this.ws.send(JSON.stringify(properties));\n};\n\nmpl.figure.prototype.send_draw_message = function () {\n if (!this.waiting) {\n this.waiting = true;\n this.ws.send(JSON.stringify({ type: 'draw', figure_id: this.id }));\n }\n};\n\nmpl.figure.prototype.handle_save = function (fig, _msg) {\n var format_dropdown = fig.format_dropdown;\n var format = format_dropdown.options[format_dropdown.selectedIndex].value;\n fig.ondownload(fig, format);\n};\n\nmpl.figure.prototype.handle_resize = function (fig, msg) {\n var size = msg['size'];\n if (size[0] !== fig.canvas.width || size[1] !== fig.canvas.height) {\n fig._resize_canvas(size[0], size[1], msg['forward']);\n fig.send_message('refresh', {});\n }\n};\n\nmpl.figure.prototype.handle_rubberband = function (fig, msg) {\n var x0 = msg['x0'] / fig.ratio;\n var y0 = (fig.canvas.height - msg['y0']) / fig.ratio;\n var x1 = msg['x1'] / fig.ratio;\n var y1 = (fig.canvas.height - msg['y1']) / fig.ratio;\n x0 = Math.floor(x0) + 0.5;\n y0 = Math.floor(y0) + 0.5;\n x1 = Math.floor(x1) + 0.5;\n y1 = Math.floor(y1) + 0.5;\n var min_x = Math.min(x0, x1);\n var min_y = Math.min(y0, y1);\n var width = Math.abs(x1 - x0);\n var height = Math.abs(y1 - y0);\n\n fig.rubberband_context.clearRect(\n 0,\n 0,\n fig.canvas.width / fig.ratio,\n fig.canvas.height / fig.ratio\n );\n\n fig.rubberband_context.strokeRect(min_x, min_y, width, height);\n};\n\nmpl.figure.prototype.handle_figure_label = function (fig, msg) {\n // Updates the figure title.\n fig.header.textContent = msg['label'];\n};\n\nmpl.figure.prototype.handle_cursor = function (fig, msg) {\n var cursor = msg['cursor'];\n switch (cursor) {\n case 0:\n cursor = 'pointer';\n break;\n case 1:\n cursor = 'default';\n break;\n case 2:\n cursor = 'crosshair';\n break;\n case 3:\n cursor = 'move';\n break;\n }\n fig.rubberband_canvas.style.cursor = cursor;\n};\n\nmpl.figure.prototype.handle_message = function (fig, msg) {\n fig.message.textContent = msg['message'];\n};\n\nmpl.figure.prototype.handle_draw = function (fig, _msg) {\n // Request the server to send over a new figure.\n fig.send_draw_message();\n};\n\nmpl.figure.prototype.handle_image_mode = function (fig, msg) {\n fig.image_mode = msg['mode'];\n};\n\nmpl.figure.prototype.handle_history_buttons = function (fig, msg) {\n for (var key in msg) {\n if (!(key in fig.buttons)) {\n continue;\n }\n fig.buttons[key].disabled = !msg[key];\n fig.buttons[key].setAttribute('aria-disabled', !msg[key]);\n }\n};\n\nmpl.figure.prototype.handle_navigate_mode = function (fig, msg) {\n if (msg['mode'] === 'PAN') {\n fig.buttons['Pan'].classList.add('active');\n fig.buttons['Zoom'].classList.remove('active');\n } else if (msg['mode'] === 'ZOOM') {\n fig.buttons['Pan'].classList.remove('active');\n fig.buttons['Zoom'].classList.add('active');\n } else {\n fig.buttons['Pan'].classList.remove('active');\n fig.buttons['Zoom'].classList.remove('active');\n }\n};\n\nmpl.figure.prototype.updated_canvas_event = function () {\n // Called whenever the canvas gets updated.\n this.send_message('ack', {});\n};\n\n// A function to construct a web socket function for onmessage handling.\n// Called in the figure constructor.\nmpl.figure.prototype._make_on_message_function = function (fig) {\n return function socket_on_message(evt) {\n if (evt.data instanceof Blob) {\n /* FIXME: We get \"Resource interpreted as Image but\n * transferred with MIME type text/plain:\" errors on\n * Chrome. But how to set the MIME type? It doesn't seem\n * to be part of the websocket stream */\n evt.data.type = 'image/png';\n\n /* Free the memory for the previous frames */\n if (fig.imageObj.src) {\n (window.URL || window.webkitURL).revokeObjectURL(\n fig.imageObj.src\n );\n }\n\n fig.imageObj.src = (window.URL || window.webkitURL).createObjectURL(\n evt.data\n );\n fig.updated_canvas_event();\n fig.waiting = false;\n return;\n } else if (\n typeof evt.data === 'string' &&\n evt.data.slice(0, 21) === 'data:image/png;base64'\n ) {\n fig.imageObj.src = evt.data;\n fig.updated_canvas_event();\n fig.waiting = false;\n return;\n }\n\n var msg = JSON.parse(evt.data);\n var msg_type = msg['type'];\n\n // Call the \"handle_{type}\" callback, which takes\n // the figure and JSON message as its only arguments.\n try {\n var callback = fig['handle_' + msg_type];\n } catch (e) {\n console.log(\n \"No handler for the '\" + msg_type + \"' message type: \",\n msg\n );\n return;\n }\n\n if (callback) {\n try {\n // console.log(\"Handling '\" + msg_type + \"' message: \", msg);\n callback(fig, msg);\n } catch (e) {\n console.log(\n \"Exception inside the 'handler_\" + msg_type + \"' callback:\",\n e,\n e.stack,\n msg\n );\n }\n }\n };\n};\n\n// from http://stackoverflow.com/questions/1114465/getting-mouse-location-in-canvas\nmpl.findpos = function (e) {\n //this section is from http://www.quirksmode.org/js/events_properties.html\n var targ;\n if (!e) {\n e = window.event;\n }\n if (e.target) {\n targ = e.target;\n } else if (e.srcElement) {\n targ = e.srcElement;\n }\n if (targ.nodeType === 3) {\n // defeat Safari bug\n targ = targ.parentNode;\n }\n\n // pageX,Y are the mouse positions relative to the document\n var boundingRect = targ.getBoundingClientRect();\n var x = e.pageX - (boundingRect.left + document.body.scrollLeft);\n var y = e.pageY - (boundingRect.top + document.body.scrollTop);\n\n return { x: x, y: y };\n};\n\n/*\n * return a copy of an object with only non-object keys\n * we need this to avoid circular references\n * http://stackoverflow.com/a/24161582/3208463\n */\nfunction simpleKeys(original) {\n return Object.keys(original).reduce(function (obj, key) {\n if (typeof original[key] !== 'object') {\n obj[key] = original[key];\n }\n return obj;\n }, {});\n}\n\nmpl.figure.prototype.mouse_event = function (event, name) {\n var canvas_pos = mpl.findpos(event);\n\n if (name === 'button_press') {\n this.canvas.focus();\n this.canvas_div.focus();\n }\n\n var x = canvas_pos.x * this.ratio;\n var y = canvas_pos.y * this.ratio;\n\n this.send_message(name, {\n x: x,\n y: y,\n button: event.button,\n step: event.step,\n guiEvent: simpleKeys(event),\n });\n\n /* This prevents the web browser from automatically changing to\n * the text insertion cursor when the button is pressed. We want\n * to control all of the cursor setting manually through the\n * 'cursor' event from matplotlib */\n event.preventDefault();\n return false;\n};\n\nmpl.figure.prototype._key_event_extra = function (_event, _name) {\n // Handle any extra behaviour associated with a key event\n};\n\nmpl.figure.prototype.key_event = function (event, name) {\n // Prevent repeat events\n if (name === 'key_press') {\n if (event.which === this._key) {\n return;\n } else {\n this._key = event.which;\n }\n }\n if (name === 'key_release') {\n this._key = null;\n }\n\n var value = '';\n if (event.ctrlKey && event.which !== 17) {\n value += 'ctrl+';\n }\n if (event.altKey && event.which !== 18) {\n value += 'alt+';\n }\n if (event.shiftKey && event.which !== 16) {\n value += 'shift+';\n }\n\n value += 'k';\n value += event.which.toString();\n\n this._key_event_extra(event, name);\n\n this.send_message(name, { key: value, guiEvent: simpleKeys(event) });\n return false;\n};\n\nmpl.figure.prototype.toolbar_button_onclick = function (name) {\n if (name === 'download') {\n this.handle_save(this, null);\n } else {\n this.send_message('toolbar_button', { name: name });\n }\n};\n\nmpl.figure.prototype.toolbar_button_onmouseover = function (tooltip) {\n this.message.textContent = tooltip;\n};\n\n///////////////// REMAINING CONTENT GENERATED BY embed_js.py /////////////////\n// prettier-ignore\nvar _JSXTOOLS_RESIZE_OBSERVER=function(A){var t,i=new WeakMap,n=new WeakMap,a=new WeakMap,r=new WeakMap,o=new Set;function s(e){if(!(this instanceof s))throw new TypeError(\"Constructor requires 'new' operator\");i.set(this,e)}function h(){throw new TypeError(\"Function is not a constructor\")}function c(e,t,i,n){e=0 in arguments?Number(arguments[0]):0,t=1 in arguments?Number(arguments[1]):0,i=2 in arguments?Number(arguments[2]):0,n=3 in arguments?Number(arguments[3]):0,this.right=(this.x=this.left=e)+(this.width=i),this.bottom=(this.y=this.top=t)+(this.height=n),Object.freeze(this)}function d(){t=requestAnimationFrame(d);var s=new WeakMap,p=new Set;o.forEach((function(t){r.get(t).forEach((function(i){var r=t instanceof window.SVGElement,o=a.get(t),d=r?0:parseFloat(o.paddingTop),f=r?0:parseFloat(o.paddingRight),l=r?0:parseFloat(o.paddingBottom),u=r?0:parseFloat(o.paddingLeft),g=r?0:parseFloat(o.borderTopWidth),m=r?0:parseFloat(o.borderRightWidth),w=r?0:parseFloat(o.borderBottomWidth),b=u+f,F=d+l,v=(r?0:parseFloat(o.borderLeftWidth))+m,W=g+w,y=r?0:t.offsetHeight-W-t.clientHeight,E=r?0:t.offsetWidth-v-t.clientWidth,R=b+v,z=F+W,M=r?t.width:parseFloat(o.width)-R-E,O=r?t.height:parseFloat(o.height)-z-y;if(n.has(t)){var k=n.get(t);if(k[0]===M&&k[1]===O)return}n.set(t,[M,O]);var S=Object.create(h.prototype);S.target=t,S.contentRect=new c(u,d,M,O),s.has(i)||(s.set(i,[]),p.add(i)),s.get(i).push(S)}))})),p.forEach((function(e){i.get(e).call(e,s.get(e),e)}))}return s.prototype.observe=function(i){if(i instanceof window.Element){r.has(i)||(r.set(i,new Set),o.add(i),a.set(i,window.getComputedStyle(i)));var n=r.get(i);n.has(this)||n.add(this),cancelAnimationFrame(t),t=requestAnimationFrame(d)}},s.prototype.unobserve=function(i){if(i instanceof window.Element&&r.has(i)){var n=r.get(i);n.has(this)&&(n.delete(this),n.size||(r.delete(i),o.delete(i))),n.size||r.delete(i),o.size||cancelAnimationFrame(t)}},A.DOMRectReadOnly=c,A.ResizeObserver=s,A.ResizeObserverEntry=h,A}; // eslint-disable-line\nmpl.toolbar_items = [[\"Home\", \"Reset original view\", \"fa fa-home icon-home\", \"home\"], [\"Back\", \"Back to previous view\", \"fa fa-arrow-left icon-arrow-left\", \"back\"], [\"Forward\", \"Forward to next view\", \"fa fa-arrow-right icon-arrow-right\", \"forward\"], [\"\", \"\", \"\", \"\"], [\"Pan\", \"Left button pans, Right button zooms\\nx/y fixes axis, CTRL fixes aspect\", \"fa fa-arrows icon-move\", \"pan\"], [\"Zoom\", \"Zoom to rectangle\\nx/y fixes axis, CTRL fixes aspect\", \"fa fa-square-o icon-check-empty\", \"zoom\"], [\"\", \"\", \"\", \"\"], [\"Download\", \"Download plot\", \"fa fa-floppy-o icon-save\", \"download\"]];\n\nmpl.extensions = [\"eps\", \"jpeg\", \"pdf\", \"png\", \"ps\", \"raw\", \"svg\", \"tif\"];\n\nmpl.default_extension = \"png\";/* global mpl */\n\nvar comm_websocket_adapter = function (comm) {\n // Create a \"websocket\"-like object which calls the given IPython comm\n // object with the appropriate methods. Currently this is a non binary\n // socket, so there is still some room for performance tuning.\n var ws = {};\n\n ws.close = function () {\n comm.close();\n };\n ws.send = function (m) {\n //console.log('sending', m);\n comm.send(m);\n };\n // Register the callback with on_msg.\n comm.on_msg(function (msg) {\n //console.log('receiving', msg['content']['data'], msg);\n // Pass the mpl event to the overridden (by mpl) onmessage function.\n ws.onmessage(msg['content']['data']);\n });\n return ws;\n};\n\nmpl.mpl_figure_comm = function (comm, msg) {\n // This is the function which gets called when the mpl process\n // starts-up an IPython Comm through the \"matplotlib\" channel.\n\n var id = msg.content.data.id;\n // Get hold of the div created by the display call when the Comm\n // socket was opened in Python.\n var element = document.getElementById(id);\n var ws_proxy = comm_websocket_adapter(comm);\n\n function ondownload(figure, _format) {\n window.open(figure.canvas.toDataURL());\n }\n\n var fig = new mpl.figure(id, ws_proxy, ondownload, element);\n\n // Call onopen now - mpl needs it, as it is assuming we've passed it a real\n // web socket which is closed, not our websocket->open comm proxy.\n ws_proxy.onopen();\n\n fig.parent_element = element;\n fig.cell_info = mpl.find_output_cell(\"
\");\n if (!fig.cell_info) {\n console.error('Failed to find cell for figure', id, fig);\n return;\n }\n fig.cell_info[0].output_area.element.on(\n 'cleared',\n { fig: fig },\n fig._remove_fig_handler\n );\n};\n\nmpl.figure.prototype.handle_close = function (fig, msg) {\n var width = fig.canvas.width / fig.ratio;\n fig.cell_info[0].output_area.element.off(\n 'cleared',\n fig._remove_fig_handler\n );\n fig.resizeObserverInstance.unobserve(fig.canvas_div);\n\n // Update the output cell to use the data from the current canvas.\n fig.push_to_output();\n var dataURL = fig.canvas.toDataURL();\n // Re-enable the keyboard manager in IPython - without this line, in FF,\n // the notebook keyboard shortcuts fail.\n IPython.keyboard_manager.enable();\n fig.parent_element.innerHTML =\n '';\n fig.close_ws(fig, msg);\n};\n\nmpl.figure.prototype.close_ws = function (fig, msg) {\n fig.send_message('closing', msg);\n // fig.ws.close()\n};\n\nmpl.figure.prototype.push_to_output = function (_remove_interactive) {\n // Turn the data on the canvas into data in the output cell.\n var width = this.canvas.width / this.ratio;\n var dataURL = this.canvas.toDataURL();\n this.cell_info[1]['text/html'] =\n '';\n};\n\nmpl.figure.prototype.updated_canvas_event = function () {\n // Tell IPython that the notebook contents must change.\n IPython.notebook.set_dirty(true);\n this.send_message('ack', {});\n var fig = this;\n // Wait a second, then push the new image to the DOM so\n // that it is saved nicely (might be nice to debounce this).\n setTimeout(function () {\n fig.push_to_output();\n }, 1000);\n};\n\nmpl.figure.prototype._init_toolbar = function () {\n var fig = this;\n\n var toolbar = document.createElement('div');\n toolbar.classList = 'btn-toolbar';\n this.root.appendChild(toolbar);\n\n function on_click_closure(name) {\n return function (_event) {\n return fig.toolbar_button_onclick(name);\n };\n }\n\n function on_mouseover_closure(tooltip) {\n return function (event) {\n if (!event.currentTarget.disabled) {\n return fig.toolbar_button_onmouseover(tooltip);\n }\n };\n }\n\n fig.buttons = {};\n var buttonGroup = document.createElement('div');\n buttonGroup.classList = 'btn-group';\n var button;\n for (var toolbar_ind in mpl.toolbar_items) {\n var name = mpl.toolbar_items[toolbar_ind][0];\n var tooltip = mpl.toolbar_items[toolbar_ind][1];\n var image = mpl.toolbar_items[toolbar_ind][2];\n var method_name = mpl.toolbar_items[toolbar_ind][3];\n\n if (!name) {\n /* Instead of a spacer, we start a new button group. */\n if (buttonGroup.hasChildNodes()) {\n toolbar.appendChild(buttonGroup);\n }\n buttonGroup = document.createElement('div');\n buttonGroup.classList = 'btn-group';\n continue;\n }\n\n button = fig.buttons[name] = document.createElement('button');\n button.classList = 'btn btn-default';\n button.href = '#';\n button.title = name;\n button.innerHTML = '';\n button.addEventListener('click', on_click_closure(method_name));\n button.addEventListener('mouseover', on_mouseover_closure(tooltip));\n buttonGroup.appendChild(button);\n }\n\n if (buttonGroup.hasChildNodes()) {\n toolbar.appendChild(buttonGroup);\n }\n\n // Add the status bar.\n var status_bar = document.createElement('span');\n status_bar.classList = 'mpl-message pull-right';\n toolbar.appendChild(status_bar);\n this.message = status_bar;\n\n // Add the close button to the window.\n var buttongrp = document.createElement('div');\n buttongrp.classList = 'btn-group inline pull-right';\n button = document.createElement('button');\n button.classList = 'btn btn-mini btn-primary';\n button.href = '#';\n button.title = 'Stop Interaction';\n button.innerHTML = '';\n button.addEventListener('click', function (_evt) {\n fig.handle_close(fig, {});\n });\n button.addEventListener(\n 'mouseover',\n on_mouseover_closure('Stop Interaction')\n );\n buttongrp.appendChild(button);\n var titlebar = this.root.querySelector('.ui-dialog-titlebar');\n titlebar.insertBefore(buttongrp, titlebar.firstChild);\n};\n\nmpl.figure.prototype._remove_fig_handler = function (event) {\n var fig = event.data.fig;\n if (event.target !== this) {\n // Ignore bubbled events from children.\n return;\n }\n fig.close_ws(fig, {});\n};\n\nmpl.figure.prototype._root_extra_style = function (el) {\n el.style.boxSizing = 'content-box'; // override notebook setting of border-box.\n};\n\nmpl.figure.prototype._canvas_extra_style = function (el) {\n // this is important to make the div 'focusable\n el.setAttribute('tabindex', 0);\n // reach out to IPython and tell the keyboard manager to turn it's self\n // off when our div gets focus\n\n // location in version 3\n if (IPython.notebook.keyboard_manager) {\n IPython.notebook.keyboard_manager.register_events(el);\n } else {\n // location in version 2\n IPython.keyboard_manager.register_events(el);\n }\n};\n\nmpl.figure.prototype._key_event_extra = function (event, _name) {\n var manager = IPython.notebook.keyboard_manager;\n if (!manager) {\n manager = IPython.keyboard_manager;\n }\n\n // Check for shift+enter\n if (event.shiftKey && event.which === 13) {\n this.canvas_div.blur();\n // select the cell after this one\n var index = IPython.notebook.find_cell_index(this.cell_info[0]);\n IPython.notebook.select(index + 1);\n }\n};\n\nmpl.figure.prototype.handle_save = function (fig, _msg) {\n fig.ondownload(fig, null);\n};\n\nmpl.find_output_cell = function (html_output) {\n // Return the cell and output element which can be found *uniquely* in the notebook.\n // Note - this is a bit hacky, but it is done because the \"notebook_saving.Notebook\"\n // IPython event is triggered only after the cells have been serialised, which for\n // our purposes (turning an active figure into a static one), is too late.\n var cells = IPython.notebook.get_cells();\n var ncells = cells.length;\n for (var i = 0; i < ncells; i++) {\n var cell = cells[i];\n if (cell.cell_type === 'code') {\n for (var j = 0; j < cell.output_area.outputs.length; j++) {\n var data = cell.output_area.outputs[j];\n if (data.data) {\n // IPython >= 3 moved mimebundle to data attribute of output\n data = data.data;\n }\n if (data['text/html'] === html_output) {\n return [cell, data, j];\n }\n }\n }\n }\n};\n\n// Register the function which deals with the matplotlib target/channel.\n// The kernel may be null if the page has been refreshed.\nif (IPython.notebook.kernel !== null) {\n IPython.notebook.kernel.comm_manager.register_target(\n 'matplotlib',\n mpl.mpl_figure_comm\n );\n}\n", "text/plain": [ "" ] @@ -1780,7 +834,7 @@ ], "source": [ "fig, ax = plt.subplots()\n", - "sns.heatmap(auc_df, cbar=False, annot=True, fmt=\".4g\")" + "sns.heatmap(auc_df, cbar=False, annot=True, fmt=\".4g\")\n" ] }, { @@ -1806,7 +860,7 @@ "metadata": {}, "outputs": [], "source": [ - "tmpdir.cleanup()" + "tmpdir.cleanup()\n" ] }, { @@ -1855,4 +909,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/examples/02_model_hybrid/lightfm_deep_dive.ipynb b/examples/02_model_hybrid/lightfm_deep_dive.ipynb index 5555ac19e..5ce4b7915 100755 --- a/examples/02_model_hybrid/lightfm_deep_dive.ipynb +++ b/examples/02_model_hybrid/lightfm_deep_dive.ipynb @@ -131,36 +131,36 @@ } ], "source": [ - "import sys\n", "import os\n", - "\n", + "import sys\n", "import itertools\n", "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", - "import scrapbook as sb\n", "\n", "import lightfm\n", "from lightfm import LightFM\n", "from lightfm.data import Dataset\n", "from lightfm import cross_validation\n", - "\n", - "# Import LightFM's evaluation metrics\n", "from lightfm.evaluation import precision_at_k as lightfm_prec_at_k\n", "from lightfm.evaluation import recall_at_k as lightfm_recall_at_k\n", "\n", - "# Import repo's evaluation metrics\n", "from recommenders.evaluation.python_evaluation import precision_at_k, recall_at_k\n", - "\n", "from recommenders.utils.timer import Timer\n", "from recommenders.datasets import movielens\n", "from recommenders.models.lightfm.lightfm_utils import (\n", - " track_model_metrics, prepare_test_df, prepare_all_predictions,\n", - " compare_metric, similar_users, similar_items)\n", + " track_model_metrics,\n", + " prepare_test_df,\n", + " prepare_all_predictions,\n", + " compare_metric,\n", + " similar_users,\n", + " similar_items,\n", + ")\n", + "from recommenders.utils.notebook_utils import store_metadata\n", "\n", "print(\"System version: {}\".format(sys.version))\n", - "print(\"LightFM version: {}\".format(lightfm.__version__))" + "print(\"LightFM version: {}\".format(lightfm.__version__))\n" ] }, { @@ -1412,7 +1412,7 @@ "outputs": [ { "data": { - "image/png": "\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAeIAAADQCAYAAADbLGKxAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAqg0lEQVR4nO3deZRcZZ3/8fcnBAhmYQ2rhoDEYdUgzTIiCjPAhIwQ54ACjoq/QZFBZRyOHuMRIQI6IDMyII4SgRFQBhEHjRpBBNEMsjUQSJAtCRHDlkAQEiSQpL+/P+6tTqXSfe/trrpdS39e59RJVd3tqU499dxn+z6KCMzMzKw5RjQ7AWZmZsOZC2IzM7MmckFsZmbWRC6IzczMmsgFsZmZWRO5IDYzM2siF8QtSNJkSVMztndJuqSkax8q6WVJcyU9IunsBp13tqQtMrZfLmnPRlzLrFYL5alHJf17g88/UdL8qmv9vJHnt/KNbHYCrE+TgS5gdu0GSSMjohvoLvH6cyLifZJGA3Ml/Swi7q9Jw5qBnDAi+v0RTLd/fJBpNStiMq2RpzYDHpB0Y0TcUeL1rI24RlyC9A71UUnfk/S4pB9IOlzSHZKekHRAut9oSVdKukfSA5KmSdoEOAc4Pr2DPl7SDEnXSLoDuKb6rlfSGEn/LWmepIckHduozxERrwL3Abv1kYbxkn4s6d70cXBWeiQtlrRN+pl/IelBSfMlHZ9uv11SV/r8xPT4+ZIuqPq7rpT01fTYuyRt16jPaq2tg/LUa8BcYKf0WkdKulPS/ZJ+JGlM+v7+kn6fftfvkTQ2/RvMSfe9X9K7GpUua7KI8KPBD2AisAbYh+Rm5z7gSkDANOAn6X5fAz6cPt8CeBwYDXwMuLTqfDPSc2yWvj4U+Hn6/ALgP6v23bKP9FxEkvlrH9P72Lf63FsDi4G9+kjDtcC70+cTgEey0pOeZxvgWOC7Vds3T/+9naTGsiPwFDCepMXmNuD96T4BHJ0+/zpwZrP/r/0YmkcH5akt0+tun+aH3wGj021fAM4CNgEWAfun749L88KbgFHpe5OA7qq/zfzaa/nRPg83TZfnyYiYByDpYeDWiAhJ80gyDsCRwDGSPpe+HkVSqPVlViR307UOB06ovIiIl2p3iIh/HWDaD5H0ANADnB8RD0v6QE0aDgf2lFQ5Zlx6N5+XnnnAf6Q13Z9HxJya7fsDt0fEMgBJPwDeA/wEeAOo9H/dBxwxwM9l7a3d89SDJAXof0bEc5LeB+wJ3JHmo02AO4G/Ap6NiHvTa70CSW0fuFTSZGAt8LYBpsFalAvi8rxe9byn6nUP6/7uAo6NiMeqD5R0YB/ne3WwCZF0EXBYH5uui4jz+3h/TkS8LycNI4CDImJVzbUy0xIRj0t6JzAVOE/SrRFxTuZB66yOiEpw9LX4+zvctH2ekrQLcJek69O03hIRJ9ace59+LvuvwPPAO0jy36p+9rM24z7i5roZ+IzS0kvSvun7K4CxBc9xC/CpygtJW9buEBH/GhGT+3j09YNR1K+Az1Rdd3KR9EjaEfhLRHwfuBB4Z8157wHem/YnbwScCPy2jnTa8NLSeSoingTOJ2mGvgs4WNJu6XVGS3ob8Biwg6T90/fHShoJbE5SU+4BPgJsVPDzWItzQdxc5wIbAw+lTW3npu//hqTZd25lMFOG84At04FND9L3XXoZTge60sEsfwBOLZiefYB7JM0Fzk737xURzwLTSf4GDwL3RcRPy/sY1mHaIU99h6S7pdJ3/T+SHiJplt49It4Ajge+mV7/FpIm9v8CTkrf2506avTWWrSupc/MzMyGmmvEZmZmTeSC2MzMrIlcEJuZmTWRC2IzM7Mm6piCeMqUKUESeckPP5rxaEvON340+WF0UEH8wgsvNDsJZm3H+cas+TqmIDYzM2tHLojNzMyayLF6zXL09ASLX3yV519ZxXbjRjFx69GMGJEdU9tsuHO+Kc4FsVmGnp7gpoef44zr57JqdQ+jNh7BNz44mSl7be8fFbN+ON8MjJumbdjr6QkWLVvJnQtfYNGylfT0rBvMufjFV3t/TABWre7hjOvnsvhFh/k164/zzcC4RmzDWt6d+/OvrOr9MalYtbqHpStWsev4MU1KtVlrc74ZGNeIbVjLu3PfbtwoRm28fjYZtfEIth07asjTatYunG8GxgWxDQv9NT9n3bkDTNx6NN/44OTeH5VKjXni1qOH9gOYtZisLh3nm4Fx07R1hKwRmlnNz5U79+rCuPrOfcQIMWWv7dn99ENYumIV24716E+zvC4d55uBcY3Y2kLW3XflR2HqJXM48bt3M/WSOdz08HO9+2Q1Pxe5cx8xQuw6fgwH7boNu44f4x8TG/aKDMZyvinONWJreXl33/39KOx++iHsOn5M7sAR37mbDYwHYzWWa8TWMvqr9ebdfef18+YNHPGdu1nf+suTHozVWC6IbcgMtnm53oK2VQeOSJoi6TFJCyRN72P7ppJ+mG6/W9LE9P2Jkl6TNDd9fKfqmP0kzUuPuUSS7ypsULLyZKvmqXblpmkbEvU0L+cNqKr8KNSeu/Kj0IoDRyRtBHwLOAJYAtwraVZE/KFqt5OBlyJiN0knABcAx6fbFkbE5D5O/W3gE8DdwGxgCvDLcj6FdbK8Lp9Wy1PtzAWxNUzWyOV6+nEPmLh13QVtpfm5hfqvDgAWRMQiAEnXAdOA6oJ4GjAjfX4DcGlWDVfSDsC4iLgrfX018H5cEFs/svJsXj9wC+aptuWC2Aakv4xbb4SqrFpvmxa0eXYC/lT1eglwYH/7RMQaSS8DW6fbdpH0APAKcGZEzEn3X1Jzzp1qLyzpFOAUgAkTJtT/Sawt5eXZvJYoaxz3EVthWX1G9Uaoyutz8oCq9TwLTIiIfYEzgGsljSt6cETMjIiuiOgaP358aYm01paXZ90PPHRcI7b1DLZ5Oa/G2479uCV7GnhL1es3p+/1tc8SSSOBzYEXIyKA1wEi4j5JC4G3pfu/OeecZkCxpudhliebxgWx9aqnebkREarasHm5HvcCkyTtQlJYngB8qGafWcBJwJ3AccBtERGSxgPLI2KtpF2BScCiiFgu6RVJB5EM1voo8M0h+jzWgrJurIs0PQ+zPNk0bpq2XvU0LztC1cBExBrg08DNwCPA9RHxsKRzJB2T7nYFsLWkBSRN0JUpTu8BHpI0l2QQ16kRsTzddhpwObAAWIgHag1beRHn3PTcOpS0cpV0cmkKcDGwEXB5RJxfs/1U4FPAWmAlcEpl+oakL5JM31gLnB4RN2ddq6urK7q7uxv/IdpQ1l1w1vY7F77Aid+9e4PzXXfKgRy06za5NebKeYdpM1ZbflDnm861aNlKpl4yZ4Ma7+x0pgLQCnm2LfNNo5XWNF1wnuS1EfGddP9jgG8AUyTtSdJUtxewI/BrSW+LiLVlpbdTFCksy1oAwc1YZkOvvxvrImEonWdbQ5lN073zJCPiDaAyT7JXRLxS9XI0UKmeTwOui4jXI+JJkma2A0pMa8fIa172AghmnSOr+dlhKNtHmYO1isyTRNKnSPq/NgH+purYu2qO9XzIVD2T8L0AglnnyJrJkDdTwVpH00dNR8S3gG9J+hBwJsko0aLHzgRmQtLXVU4Km2OwgTPympeLND+7qcqsddRz4+0b6/ZQZtN0kXmS1a4jCcc3mGM7Sj2BM/Kalz1S0qx95I189spinaHMGnHuPElJkyLiifTl3wOV57NIogV9g2Sw1iTgnhLT2lLqCZxRZECV75LN2kNejHY3P3eG0griNDZuZZ7kRsCVlXmSQHdEzAI+LelwYDXwEmmzdLrf9SQB8NcAn+q0EdODbW5qxCR8Nz+btY56F17wjXX7K7WPOCJmkyzFVv3eWVXP/yXj2K8CXy0vdc1TTz+v74DNOkcjFl7wjXX7c2StJqinn7dyBzz79EO47pQDmX36Ib2Z1szaixdeMGiBUdOdqszmJt8Bm7WXwQbdcNPz8OCCuARubjKzinqi2YF/C4YDN03XoacnWLRsJXcufIFFy1b2Tilwc5OZVdQbzc46n2vEg5R1l+vmJjOrcNANy+MacYb+aryQfZdbJMarJ9qbpCmSHpO0QNL0PrZvKumH6fa7JU2s2T5B0kpJn6t6b7GkeZLmSvKySi3AQTcsjwvifuRFtMm6y3Vzk+WpWp3sKGBP4MR01bFqJwMvRcRuwEXABTXbv0Hf6w0fFhGTI6Krwcm2fmTdtPv3wPK4abofeRFtsgZZuOnZCuhdnQxAUmV1suplQqcBM9LnNwCXSlJEhKT3A08Crw5Ziq1PeYMz/XtgeVwj7kdWjRfy73Ld3GQ5+lqdrHaFsd59ImIN8DKwtaQxwBeAr/Rx3gB+Jem+dHWyDUg6RVK3pO5ly5bV+TEsb3Am+PfAsg3rGnHWXN8iqxT5LteaZAZwUUSslDb4vr07Ip6WtC1wi6RHI+J31Tt08qplzZA3GMssz7AtiPOak4qEkvT8PqtDkRXGKvsskTQS2Bx4kWRd7+MkfR3YAuiRtCoiLo2IpwEiYqmkG0mawH+HlabIXGCzLB1fEPdX683rA3aN10qWuzoZySpkJwF3AscBt0VEAIdUdpA0A1gZEZdKGg2MiIgV6fMjgXNK/yTDQFbrmeO/W706uiCuZ64vuMZr5Sm4OtkVwDWSFgDLSQrrLNsBN6bN1SOBayPiptI+xDDhwVhWNiU32O2vq6srurvXnza5aNlKpl4yZ4Mmo9mnJxWK/ra54LVBaMtf3b7yzXDVX60363fEvxV1a8t802gdPWrac33NrIisuAF5MyjM6lVq07SkKcDFJE1vl0fE+TXbzwA+DqwBlgH/FBF/TLetBealuz4VEccM9Pqe62tmRWSNGfFgLCtbaTXigpGDHgC6IuLtJAELvl617bU0OtDkwRTC4Lm+ZlaMW8+smcqsEedGDoqI31Ttfxfw4UYmwLVeMyvCrWfWTGX2EReJHFTtZNaPmzsqjf5zVxrOb1Bc6zUzqC8etH9HrEwtMX1J0oeBLuC9VW/vnEYI2hW4TdK8iFhYc9wpwCkAEyZMGLL0mll78RQka2Vl1oiLRA5C0uHAl4BjIuL1yvtVEYIWAbcD+9YeGxEzI6IrIrrGjx/f2NSbWcdwPGhrZWUWxL2RgyRtQhKMYFb1DpL2BS4jKYSXVr2/paRN0+fbAAez/qo0ZmaFeQqStbLSmqYLRg66EBgD/CiNBlSZprQHcJmkHpKbhfMjwgWxmfWrnkVczJqp1D7iiJgNzK5576yq54f3c9zvgX3KTJuZdY5GLOJi1iwtMVjLzKweXsTF2pkLYjNre17ExdpZR8eaNrPhodIHXM19wNYuXBCbNYmkKZIek7RA0vQ+tm8q6Yfp9rslTazZPkHSSkmfK3rOTuUwlNbO3DRt1gRVsdiPIIk6d6+kWTWzA04GXoqI3SSdAFwAHF+1/RtURaMreM621t/IaPcBWztzQWzWHLmx2NPXM9LnNwCXSlJERBr29Ung1ar9i5yzbRWJjuU+YGtHbpo2a44isdh794mINcDLwNaSxgBfAL4yiHMi6ZQ0jnv3smXL6voQQ6lIdCyzduSC2Kz9zAAuioiVgzm4XUPDOjqWdSo3TZs1R5FY7JV9lkgaCWwOvAgcCBwn6evAFkCPpFXAfQXO2bYcHcs6lWvEZs2RG4s9fX1S+vw44LZIHBIREyNiIvCfwNci4tKC52xp9SxVaFZL0mclvanZ6ciTWyOWtB3wNWDHiDhK0p7AX0fEFaWnzqxDFYzFfgVwjaQFwHKSgnXA5yz1gzSQlyq0EnwW+D7wlyanI5MiInsH6ZfAfwNfioh3pE1kD0RES8WC7urqiu7u7mYnw4avtiwNWinfLFq2kqmXzNmg6Xl2GqbSOlLD8o2k0cD1JF0yGwE/Illi9zHghYg4TNK3gf2BzYAbIuLs9NipJNMBXwXuAHaNiPel5/wmsDewMTAjIn7aqDRXFGma3iYirgd6oHf05tpGJ8TMhjcPxrI6TQGeiYh3RMTeJN02zwCHRcRh6T5fiogu4O3AeyW9XdIokuV4j4qI/YDqEYxfIukSOgA4DLgwLZwbqkhB/KqkrYEAkHQQyTQKM7OGcZhKq9M84AhJF0g6JCL6Kqc+KOl+4AFgL2BPYHdgUUQ8me7zP1X7HwlMlzQXuB0YBUxodMKLjJo+g2TAx1sl3UFyt3BcoxNiZsOblyq0ekTE45LeCUwFzpN0a/V2SbsAnwP2j4iXJH2PpGDNIuDYiHisjDRX5BbEEXG/pPcCf5Um6rGIWF1mosysM/UXohLwYCyri6QdgeUR8X1JfwY+DqwAxgIvAONI+oBfTgchH0VSy30M2FXSxIhYzPphZG8GPiPpM2lEu30j4oFGp73IqOmP1rz1TklExNUFjp0CXEzScX55RJxfs/0Mkj/WGmAZ8E8R8cd020nAmemu50XEVXnXM7PWlTcqGrxUodVlH5I+3B5gNfDPwF8DN0l6Jh2s9QDwKEkEujsAIuI1Sael+71KMg2w4lySvuaHJI0gCSv7vkYnvMio6W9WvRwF/C1wf0RkNk+nAegfpyoAPXBidQB6SYcBd0fEXyT9M3BoRBwvaSugG+gi6Zu+D9gvIl7q73qtNPrThqW2rLYNZb7xqGjrQ0vkG0ljImKlJJEsnPJERFw0VNcv0jT9merXkrYAritw7twA9BHxm6r97wI+nD7/O+CWiFieHnsLyYi46k50M2sjWaOiXRBbk30ibYXdhGQg12VDefHBhLh8FdilwH59BaA/MGP/k1m3pFvh4PXAKQATJjR8IJuZDUJ//cAOUWmtKq39DlkNuFaRPuKfkU5dIpnutCfJpOmGkfRhkmbo9w7kuIiYCcyEpImtkWkys4HL6gf2qGizvhWpEf971fM1wB8jYkmB44oEtUfS4SSTpt8bEa9XHXtozbG3F7immTVRf0sV7p72A3tUtNmGivQR/3aQ5+4NQE9SsJ4AfKh6B0n7krTFT4mIpVWbbga+JmnL9PWRwBcHmQ4zGyJ5/cAeFW22oX4LYkkrWNckvd4mICJiXNaJCwa1vxAYA/woGazGUxFxTEQsl3Qu64aRn1MZuGVmrcv9wGYDlzt9qV14+pI1WVu2rzY63xSZK2xWpSW+FOlsoA9FxH8N8LjZ6XF/ruf6hUdNS9qWqnBgEfFUPRc2G+4KBLzZFLga2A94ETg+IhZLOoB0kCLJD9mMiLgxPWYxSTShtcCaNMD9kHF0LGtTWwCnAesVxJJGpgsd9Skipjbi4kVGTR8D/AewI7AU2Bl4hCRgtpkNQhrw5ltUBbyRNKs64A3JlL6XImI3SScAF5CE35sPdKXdPzsAD0r6WdUPxmER8UJZac8KUwmOjmXlW7Fq9YRHnl1x7vOvrNpx+3Gjnt19h7Fnjh21cT2Vw/NJ1lOYSxKVaxXwEsmCEG+T9BOSwcejgIvTGTuVG98uki7WXwL/B7yLZFzUtIh4rcjFi9SIzwUOAn4dEfum0bA+nHOMmWXLDXiTvp6RPr8BuFSSIqJ6kfNR9D2WoxRuerZmW7Fq9YRfznvu12fNmj+p8h0855i9Dzpqn+0Pr6Mwng7sHRGTJR0K/CJ9XVmR6Z/SsUubkdw0/zgiXqw5xySS6JGfkHQ9cCzw/SIXL7IM4ur0giMkjUijYQ1pc5dZByoStKZ3n7S2+zKwNYCkAyU9TLL026lVteEAfiXpvjTgzQYknSKpW1L3smXLBpTo/qYnLX7x1QGdx2ywHnl2xbmVQhiS7+BZs+ZPeuTZFec28DL3VBXCAKdLepAkAuRbSArdWk9GxNz0+X3AxKIXK1IQ/1nSGGAO8ANJF5NE1zKzJomIuyNiL2B/4Ivp4uYA746Id5KsLPMpSe/p49iZEdEVEV3jx4+v3Zwpa3qS2VB4/pVVO/b5HXxl1Y4NvExvGZfWkA8H/joi3kESArOvaQCvVz1fywDGYBUpiH8DbA78C3ATsBA4uugFzKxPRQLe9O4jaSRJPlyvOSwiHgFWAnunr59O/10K3EjSBD5gPT3BomUruXPhCyxatpKenqT1uzI9qZqnJ9lQ2n7cqGf7/A6OG/VMHaetLJfYl81Jxmr8RdLuJF21DVWkIB4J/IokstVY4Id9tI2b2cD0BryRtAlJwJtZNfvMAk5Knx8H3JauibpLWjAjaWeSASWLJY2WNDZ9fzRJIJz5A01YpR946iVzOPG7dzP1kjnc9PBz9PREb5jKyg+hw1TaUNt9h7FnnnPM3k9UfwfPOWbvJ/bYYeyXB3vOtEy7Q9J8kvgW1W4CRkp6hGRQ112DvU5/Cs8jlvR2khGbxwJLIuLwRiemHp5HbE024JFKkqaSrHVaCXjz1eqAN2lz8zXAvsBy4ISIWCTpIySDS1YDPSQBb34iaVeSWjAkN9DXRsRXs9LQV77JW66wMmra05OsAQb1xamMml76yqodtx036pk9dhj75TpHTTfVQFZfWgo8R9I0tm05yTEbPiJiNjC75r2zqp6vAj7Qx3HXkBTQte8vAt5Rb7ocptJa3dhRGz91wC5bnZS/Z3vIbZqWdJqk24FbSUZsfiIi3l52wsysOdwPbDa0ivQRvwX4bETsFREzagIOmFmHcT+w2dAqsvqSVz0yG0YcptJsaA2kj9jMhgn3A5sNnSJN02ZmZlYSF8RmZtZ4PT3wwhPw5Jzk356e/GOaRNIWkk4b5LGflfSmeq7vgtjMzBqrpwce/Rlcdghc9b7k30d/1sqF8RYkyyAOxmeB1i2IJU2R9JikBZKm97H9PZLul7RG0nE129ZKmps+aiMOmZlZq1q+EG78JKxOVwFc/VryevnCxpx/1SsT+OPvr2L+/97CH++8mlWvTKjzjL3LIEq6UNLnJd0r6SFJX4EkWp2kX0h6UNJ8ScdLOp1kieDfSPrNYC9e2mCtguutPgV8DPhcH6d4LSIml5U+MzMryYrn1hXCFatfg5XPwTZ9LVw0AKtemcAjP/01sz8/idWvwcabwdQLD2KPaYczalwjlkE8kiSk7AEkkb9mpYunjAeeiYi/B5C0eUS8LOkM6lwDvMwace96qxHxBlBZb7VXRCyOiIdIwvSZmVm7yOoDHrt9UkBW23gzGLN9/dd9fv65vYUwJAX87M9P4vn5jVoG8cj08QBwP0ks90kkS44eIekCSYdExMsNul6pBXGR9VazjErXTL1L0vv72qGedVXNzGyQ8vqAt3or/MNl6wrjjTdLXm/11vqvveK5Hfusba94rlHLIAr4t4iYnD52i4grIuJx4J0kBfJ5ks7KPk1xrTyPeOeIeDoNZH+bpHkRsV4HQ0TMBGZCEry+GYk0M+tYPT1Jv+6K55Ja7lZvhREj+u8D/uSeSdPziBGw+9HJ65XPJTXhyrH1GrvDs2y82fpN3xtvBmO3b9QyiDcD50r6QUSslLQTyQIrI4HlEfF9SX8GPl5zbEs2TRdZb7VfVeuqLiJZgnHfRibOzMwyZNV6s/qAK0aMSArliYesK5wbYbu9zmTqhU+sV9ueeuETbLd3o5ZBPAK4FrhT0jzgBpKCdh/gHklzgbOB89LDZwI3teRgLarWWyUpgE8APlTkQElbAn+JiNclbQMcDHy9tJSaNYGkKcDFJMsgXh4R59ds3xS4GtiPZNWz4yNisaQDSFuCSJrRZkTEjUXOacNMfzXaItuzar2VPuDaWmkj+oDzjBr3FHtMO5yt3nouK57bkbHbP8N2e3+5joFaAEREbfl0cc3rhSS15drjvgl8s55rl1YQR8QaSZ8mSXhlvdWHa9Zb3Z9k/dQtgaMlfSUi9gL2AC6T1ENSaz/fi01YJyk4q+Bk4KWI2E3SCcAFJGuCzwe60jy2A/CgpJ8BUeCcNlxUarSVwrTST7v70Ulhm7c9q9Y74eBk39pjG9EHXMSocU+x87s6ZhnEUvuIC6y3ei9Jk3Xtcb8naQYw61S9swoAJFVmFVQXmtOAGenzG4BLJSki/lK1zyiSArjoOa3TDLYfN297Vq23zD7gYch/NbPmKDKroHefiFgDvEyyJjiSDpT0MMkIzlPT7YVmKni2QQeppx83b3veyOey+oCHoVYeNW1m/YiIu4G9JO0BXCXplwM41rMN2klZ/bh5213rHTL+i5o1R5FZBb37SBoJbE4yaKtXRDwCrAT2LnhOayd583WzarV5Ndoic31d6x0SrhGbNUeRWQWzgJOAO0lC7t0WEZEe86d0sNbOJJF/FgN/LnBOazWDrfHW24/rGm/LcEFs1gRFZhUAVwDXSFoALCcpWAHeDUyXtJokPOxplTi3fZ1zSD+YDUw9I5e3mbSuVtvf6OVKjba/+M55221IKKIzuoi6urqiu7u72cmw4UvNTsBgON802QtPJM3NtTXaT85JCse87bCuRt2etdq2zDeN1jb/W2ZmLStrAYSs7fWOXAb343YAN02bmdWjnsAZHrlsuEZsZlaf/gZULV+Yv901XsM1YjOzfFkjm/MGVOVtd4132HNBbGaWJa/puRGBMzxyeVjzbZeZWZa8pudGBM6wYc01YjOzLHlNyw6cYXVyQWxmBv33AxdZe9eBM6wOviUzM8uK6eymZSuZa8RmNjzUE9PZTctWolK/SZKmSHpM0gJJ0/vY/h5J90taI+m4mm0nSXoifZxUZjrNrMPVs4oReC6vlaq0b5OkjYBvAUcBewInStqzZrengI8B19YcuxVwNnAgcABwtqQty0qrmXWArDCTeSOfK/3A1Wr7gc1KUuZt3QHAgohYFBFvANcB06p3iIjFEfEQyQoy1f4OuCUilkfES8AtwJQS02pm7azeGq/7ga2JyiyIdwL+VPV6Sfpew46VdIqkbkndy5YtG3RCzYZagW6bTSX9MN1+t6SJ6ftHSLpP0rz037+pOub29Jxz08e2Q/iRmqveGm/vFKM58LGfJ/9WAnaYlaytv2URMTMiuiKia/z48c1OjlkhBbttTgZeiojdgIuAC9L3XwCOjoh9gJOAa2qO+8eImJw+lpb2IZrFqxhZBypz1PTTwFuqXr85fa/osYfWHHt7Q1Jl1ny93TYAkirdNn+o2mcaMCN9fgNwqSRFxANV+zwMbCZp04h4vfxkN5lXMbIOVea38F5gkqRdJG0CnADMKnjszcCRkrZMB2kdmb5n1gmKdL307hMRa4CXga1r9jkWuL+mEP7vtFn6y5L6XHS9bbt0vIqRdajSasQRsUbSp0kK0I2AKyPiYUnnAN0RMUvS/sCNwJbA0ZK+EhF7RcRySeeSFOYA50TE8rLSatZuJO1F0lx9ZNXb/xgRT0saC/wY+Ahwde2xETETmAnQ1dUVQ5Dc4upZ5cg1XmtTpQb0iIjZwOya986qen4vSbNzX8deCVxZZvrMmqRIt01lnyWSRgKbAy8CSHozyQ3sRyNiYeWAiHg6/XeFpGtJmsA3KIhbVr2rHDmMpLUp3y6aDb0i3TazSAZjARwH3BYRIWkL4BfA9Ii4o7KzpJGStkmfbwy8D5hf7scYhHrm+nqKkXUoh7g0G2JFum2AK4BrJC0AlpMU1gCfBnYDzpJUaV06EngVuDkthDcCfg18d8g+VBF5Nd56Vzkya1MuiM2aoEC3zSrgA30cdx5wXj+n3a+RaWy4vHjOjVjlyKwN+VbSzIaGo1uZ9ck1YrOskbrWOJ7ra9YnF8TWOrIKxLzCcrDb8/oti5zb1tff36tS4639W/c119dNzzaMuCC29dVb4A323FkFImQXlnmFadb2vH7LIgW1rZP393KN12wDzgGdKGuKSNb2vBVsimzv77p5x2ZNXcmb1lLP9rx+y7xz2/ry/l6ObmW2AeeCeuQVePUcm1eoDbbAy9peT4FWT0EL2QViXmFZz/a8VXnyzj0cZX3//PcyGzAXxINVZu0wa3u9BV49tcOs7fUUtJBdIOYVlvVszxup6wXj15f3/fPfy2zAOr8gHmzNMm97mbXDepppy6wdZm2vp6CF7AIxr7CsZ3veOrTDdUpNf999R78ya7jOHqxV5gCgvIInawBQXgShrO1B9rF5U0SytueNas3bnnXdvGPzBvJkbcs7tsj2/kbqDscBRlnffUe/Mmu4zi6Is0bEQnZhWU8UoLwfq3oKy8rzwRZ4WdvrKdDqLWgr+2QViFnTWurdnmW4TanJ+u47+pVZw3V2QVxPzTKvMC2zdpi3vZ4Cr57aYdb2egtaax1Z3/0JB+fPBTazAensgriemmU9UYDqrR3W00xbOb6s2mEWF7SdIeu776Zns4ZTRHnrgkuaAlxMshrM5RFxfs32TUnWS92PZK3V4yNisaSJwCPAY+mud0XEqVnX6urqiu7u7vXfLLOPOE9lOpB/rIYLNTsBgzHgfOPvsDVWW+abRiutIJa0EfA4cASwhGQN1hMj4g9V+5wGvD0iTpV0AvAPEXF8WhD/PCL2Lnq9Pn9QILtAzCssXZhacQP+QanjRvUI4HxgE+AN4PMRcVt6zH7A94DNSFZ3+pfIyOSDyjdmjeOCmHKbpg8AFkTEIgBJ1wHTgD9U7TMNmJE+vwG4VFJj/2PKHABkNkjpjeq3qLpRlTSr+kYVOBl4KSJ2S29ULwCOB14Ajo6IZyTtTbKu8U7pMd8GPgHcTVIQTwF+OeAE+rtvNmTKvMXdCfhT1eslrPux2GCfiFgDvAxsnW7bRdIDkn4r6ZAS02nWDL03qhHxBlC5Ua02DbgqfX4D8LeSFBEPRMQz6fsPA5tJ2lTSDsC4iLgrrQVfDby/9E9iZnVp1bamZ4EJEbEvcAZwraRxtTtJOkVSt6TuZcuWDXkizepQ741qxbHA/RHxerr/kpxzOt+YtZgym6afBt5S9frN6Xt97bNE0khgc+DF9G7+dYCIuE/SQuBtwHqdWRExE5gJIGmZpD9mpGcbkia9VuN0DUyrpuumiJgylBeUtBdJc/WRAznO+aZUTtfADHm+aUVlFsT3ApMk7UJS4J4AfKhmn1nAScCdwHHAbRERksYDyyNiraRdgUnAoqyLRcT4rO2SuiOia3AfpTxO18C0aroGYdA3qgCS3gzcCHw0IhZW7f/mnHOux/mmsZwuG4zSmqbTprRPkwwkeQS4PiIelnSOpGPS3a4Atpa0gKQJenr6/nuAhyTNJekbOzUilpeVVrMm6L1RlbQJyY3qrJp9KjeqsP6N6hbAL4DpEXFHZeeIeBZ4RdJB6aDHjwI/LflzmFmdSg3oERGzSUZuVr93VtXzVcAH+jjux8CPy0ybWTNFxBpJlRvVjYArKzeqQHdEzCK5Ub0mvVFdTlJYQ3KDuxtwlqRKfjoyIpYCp7Fu+tIvGcyIaTMbUp0dWWt9M5udgH44XQPTqukasDpuVM8DzuvnnN1A4fn3BbTq39vpGphWTZdRcmQtMzMzy9aq05fMzMyGBRfEZmZmTdTxBbGkKZIek7RA0vT8I4aOpMWS5kmaK6mPgL9Dlo4rJS2VNL/qva0k3SLpifTfLVskXTMkPZ3+zeZKmjrU6RoOWjXftEqeSdPifGMN0dEFcVU836OAPYETJe3Z3FRt4LCImNzkOX7fI4lJXG06cGtETAJuZd3UsqH0PTZMF8BF6d9scjrgyRqoDfJNK+QZcL6xBunogphi8XyHvYj4Hcn0mGrVcY6vogkxi/tJl5XP+aYA5xtrlE4viIvE822mAH4l6T5JpzQ7MTW2SwNEADwHbNfMxNT4tKSH0ia4IW/6GwZaOd+0cp4B5xsbhE4viFvduyPinSRNgJ+S9J5mJ6gvaezvVpnn9m3grcBkksVB/qOpqbGh1hZ5BpxvrLhOL4iLxPNtmoh4Ov13KUnc4AOam6L1PJ8uq0f679ImpweAiHg+ItZGRA/wXVrrb9YpWjbftHieAecbG4ROL4iLxPNtCkmjJY2tPCdZQWd+9lFDqjrO8Um0SMziyo9c6h9orb9Zp2jJfNMGeQacb2wQOjrEZX/xfJucrIrtgBuT2PyMBK6NiJuakRBJ/wMcCmwjaQlwNnA+cL2kk4E/Ah9skXQdKmkySZPfYuCTQ52uTtfC+aZl8gw431jjOMSlmZlZE3V607SZmVlLc0FsZmbWRC6IzczMmsgFsZmZWRO5IDYzM2siF8RWmKRDJf282ekwayfON5bHBbGZmVkTuSDuQJI+LOmedN3RyyRtJGmlpIskPSzpVknj030nS7orDQZ/YyUYvKTdJP1a0oOS7pf01vT0YyTdIOlRST9QGl3BrN0531izuCDuMJL2AI4HDo6IycBa4B+B0UB3ROwF/JYk2g7A1cAXIuLtwLyq938AfCsi3gG8iyRQPMC+wGdJ1qndFTi45I9kVjrnG2umjg5xOUz9LbAfcG96070ZSeD5HuCH6T7fB/5X0ubAFhHx2/T9q4AfpfF8d4qIGwEiYhVAer57ImJJ+nouMBH4v9I/lVm5nG+saVwQdx4BV0XEF9d7U/pyzX6DjW36etXztfg7ZJ3B+caaxk3TnedW4DhJ2wJI2krSziT/18el+3wI+L+IeBl4SdIh6fsfAX4bESuAJZLen55jU0lvGsoPYTbEnG+saXxX1mEi4g+SzgR+JWkEsBr4FPAqcEC6bSlJfxgkS7V9J/3BWAT8v/T9jwCXSTonPccHhvBjmA0p5xtrJq++NExIWhkRY5qdDrN24nxjQ8FN02ZmZk3kGrGZmVkTuUZsZmbWRC6IzczMmsgFsZmZWRO5IDYzM2siF8RmZmZN9P8B/2ALHFLwKzIAAAAASUVORK5CYII=", "text/plain": [ "
" ] @@ -1446,7 +1446,7 @@ "outputs": [ { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAeIAAADQCAYAAADbLGKxAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAuQUlEQVR4nO3de5hcVZnv8e+vIdiYC5AQEkBjuERjUCdicxmZoB7BiYiAR+R2dFAZg6OImmGOOKMSQT1cnIAKMwaFEVFBxMGJTASRi0aHWwMRCBdJQoQguSO5QGNCveePvSupVLqrqrtq966q/n2ep5+u2te1O9n11lp7rXcpIjAzM7N8dORdADMzs6HMgdjMzCxHDsRmZmY5ciA2MzPLkQOxmZlZjhyIzczMcuRA3IQkTZV0VIX1XZK+mdG53y7peUkLJD0q6ZwGHXeepF0rrP+upCmNOJdZuSa6px6T9PUGH3+ipIdLznVjI49v2dsx7wJYr6YCXcC88hWSdoyIbqA7w/PPj4ijJQ0HFkj6eUTcX1aGzf05YET0+SGYrv/7AZbVrBZTaY57amfgAUk3RMTvMjyftRDXiDOQfkN9TNL3JP1B0g8lHSHpd5KekHRwut1wSVdKukfSA5KOlbQTcC5wYvoN+kRJsyRdLel3wNWl33oljZD0H5IekvSgpPc36joiYiNwH7B/L2UYK+mnku5Nfw6rVB5JSyXtnl7zf0v6vaSHJZ2Yrr9DUlf6+uR0/4clXVDyd90g6avpvndJGteoa7Xm1kb31IvAAmDv9FzvknSnpPsl/UTSiHT5QZL+J/2/fo+kkenfYH667f2S3tqoclnOIsI/Df4BJgKbgTeSfNm5D7gSEHAs8LN0u68BH0xf7wr8ARgOfBi4tOR4s9Jj7Jy+fztwY/r6AuCSkm1366U8F5Pc/OU/Z/eybemxxwBLgQN6KcOPgL9JX08AHq1UnvQ4uwPvB75Tsn6X9PcdJDWWvYCngLEkLTa3Acel2wTw3vT1hcAX8v639s/g/LTRPbVbet7x6f3wG2B4uu5zwJeAnYAlwEHp8lHpvfBKoDNdNgnoLvnbPFx+Lv+0zo+bprPzZEQ8BCBpIXBrRISkh0huHIB3AcdIOit930kS1HozN5Jv0+WOAE4qvomI58o3iIjP9rPs0yQ9ABSA8yNioaQPlJXhCGCKpOI+o9Jv89XK8xDwr2lN98aImF+2/iDgjohYBSDph8DhwM+AvwDF51/3AUf287qstbX6PfV7kgB6SUQsl3Q0MAX4XXof7QTcCbwOeDYi7k3PtQ6S2j5wqaSpwMvAa/tZBmtSDsTZeankdaHkfYGtf3cB74+Ix0t3lHRIL8fbONCCSLoYeEcvq66NiPN7WT4/Io6uUoYO4NCI6Ck7V8WyRMQfJB0IHAV8RdKtEXFuxZ222hQRxeToL+P/v0NNy99TkvYB7pJ0XVrWWyLi5LJjv7GP034WWAH8Fcn919PHdtZi/Iw4XzcDn1IavSS9OV2+HhhZ4zFuAT5ZfCNpt/INIuKzETG1l5/ePjBq9UvgUyXnnVpLeSTtBbwQET8ALgIOLDvuPcDb0ufJOwAnA7+uo5w2tDT1PRURTwLnkzRD3wUcJmn/9DzDJb0WeBzYU9JB6fKRknYEdiGpKReADwE71Hg91uQciPN1HjAMeDBtajsvXX47SbPvgmJnpgq+AuyWdmz6Pb1/S8/CmUBX2pnlEeDjNZbnjcA9khYA56TbbxERzwJnk/wNfg/cFxH/ld1lWJtphXvq2ySPW4rPrq+R9CBJs/TkiPgLcCLwrfT8t5A0sf8bcGq6bDJ11OituWhrS5+ZmZkNNteIzczMcuRAbGZmliMHYjMzsxw5EJuZmeWobQLx9OnTgyTzkn/8k8dPS/J945+cf4w2CsSrV6/OuwhmNZM0XdLjkhZJOruX9a+Q9ON0/d2SJqbLJ0p6MR2Gs0DSt0v2eUuaH3mRpG8Wx9JW4vvGLH9tE4jNWkWaqOQy4N0kKQ5P1vZTQJ4GPBcR+5PkNb6gZN3ikgQSHy9Z/u/Ax0jSKE4Cpmd1DWbWOA7EZoPvYGBRRCxJkzdcSzJxQaljgavS19cD76xUw5W0JzAqIu5K04B+Hziu4SU3s4ZzILaWUCgES1Zt4M7Fq1myagOFQks/XtobeLrk/bJ0Wa/bRDL38/Mks2EB7JNO8fdrSdNKtl9W5ZgASJohqVtS96pVq+q7EmtqbXbftC0nzbemVygENy1czszrFtCzqUDnsA5mnzCV6QeMp6Oj6mPQdvMsMCEi1kh6C/AzSQf05wARcTlwOUBXV5c/mdtU7vdNoQBrF8P65TByPIzeDzpc9+uN/yrW9Jau2bjlwwSgZ1OBmdctYOmalk21+wzw6pL3r0qX9bpNScL/NRHxUkSsAYiI+4DFJNPhPZMep9IxbQjJ9b4pFOCxn8OcaXDV0cnvx36eLLftOBBb01uxrmfLh0lRz6YCK9e37Cxw9wKTJO0jaSeSuW/nlm0zFzg1fX08cFs69+7YtLMXkvYl6ZS1JJ0sY52kQ9NnyX8HeLKMISzX+2btYrjhdNiUTve86cXk/drF2Z+7Bblp2preuFGddA7r2OZDpXNYB3uM7Kz5GIVCsHTNRlas62HcqE4mjhmeW7N2RGyWdAbJlH07AFdGxEJJ5wLdETEXuAK4WtIiYC1bJ6o/HDhX0iaSeXg/HhFr03WfAL4H7Az8Iv2xIaoR982ArV++NQgXbXoRNiyH3Sdlf/4Wk2kgljQd+AbJh813y+fqlPRxknk/XwY2ADMi4pF03edJhnC8DJwZETdnWVZrXhPHDGf2CVO3e9Y1cczwmvbP/VlZLyJiHjCvbNmXSl73AB/oZb+fAj/t45jdwBsaW1JrVfXeN3UZOR6G7bxtMB62M4wYn/25W1Bm0yCmzWd/AI4k6cF5L3ByMdCm24yKiHXp62OAT0TE9HRM5TUkwzz2An4FvDYiXu7rfF1dXdHd3Z3JtVj+ijXalet72GPk9jXaSjXeJas2cNQ3529XM5h35jT2HTuiUUVsyV5jvm/aW7X7JsMTJ8+Ei83Tw3aG982Bye8t77DVkvdNo2VZI94yVhJAUnGs5JZAXAzCqeFsTXl2LHBtRLwEPJk2zx1MMnG2DUEdHWLfsSN6DZzVaryVnpXtO3ZE1WbrZmrWNuuPSvdNxidOgu7pU5Lm6BHuNV1JloG4t7GSh5RvJOmTwExgJ+B/lex7V9m+242JlDQDmAEwYcKEhhTaWk9fvUMnpzXeSs/KqgXxZmzWNhs09QxB6uhIngf7mXBVuX89iYjLImI/4HPAF/q57+UR0RURXWPHjs2mgNb0qvUOLT4r6xyW/HcvfVZWbYhHGw6dMquNhyANmixrxLWMlSx1LUmu3IHsaw2SZzPtQI9drXdoR4eYfsB4Jp85bbtnZdWarautN2tbfQ1BOn2Ka7kNlmUg3jJWkiSIngScUrqBpEkR8UT69j1A8fVc4EeSZpN01poE3JNhWY3qz1qzbKat59i19A7t61lZtSCe6xAQszx5CNKgyaxpOs2PWxwr+ShwXXGsZNpDGuAMSQslLSB5Tnxquu9C4DqSjl03AZ+s1GPaGiPrZtpKeW/rOXaxxjvvzGlcO+MQ5p05reYvB5WarWtZb9a2ikOQSnkIUiYyHUdcw1jJT1fY96vAV7MrXb6asSduls209fZsrmagvUMrNVvXst6sqdXT2Wr0fsmQo/IhSKP3q//Ytg1n1spB3j1x+/oSkGUzbT09m7NWLYjnNgTErB61j+XtXaUhSPUe27bhv1gOsmzirWXfmxYu56hvzufk79zNUd+cz00Ll1MoRN3NtJXKVU/PZjMbgEbkey4OQZo4LfldDLLOJd1QrhFXkFXzcZZNvNVUq5kOtJm2Wrnq6dlsZgOQZWcrd+RqKNeI+1Cp5li6zUBqpsWgVKreJt5aa9PVaqbFZthD992dfceO2C4Q9rW+WrlqqfFWO7eZ9UOWna3ckauhHIj7UC2w1BKo+1JLUOoryNc7tVk9XwIqqSXAD7Rns5kNQLGzVTFglne2atZjD0Fumu5Dtebjak28lVRrhq3UzFtvp6asZmSppVzu9GQ2iLLM9+xc0g3lQNyHaoEly+E2lYJ8vYE0q2exuU65Zma9qzPfc8V+Ms4l3TAOxH2oFliyHG5TLcjXG0izqJm6s5VZTjIaz5v3MMuhZEgH4krf9qoFlmqBup4e17X0MG7GJt5mLVezkjQd+AawA/DdiDi/bP0rgO8DbwHWACdGxNKS9RNIss/Nioivp8s+C/w9yZSiDwEfiYjaOhBYc6oUaDMcz1vt8VszJiVqVW0fiPv6z1LLt71KgaWeoTzVuJm3/UnaAbgMOJJkms97Jc2NiEdKNjsNeC4i9pd0EnABcGLJ+tnAL0qOuTdwJjAlIl6UdB1JjvfvZXoxlp1qgTbDiRkqtcxNHDPcteUGausn65V6NjdieruBDuWp5bj19DCuJ+GHDZqDgUURsSQi/kIy+9ixZdscC1yVvr4eeKckAUg6DngSWFi2z47AzpJ2BF4J/Cmb4tugqJY4o9J43jpVGmHh6UEbq60DcaX/LPUOA6qkEcce6JjaeoZV2aDaG3i65P2ydFmv26STqDwPjJE0gmT+7i+XbhwRzwBfB54CngWej4hflp9Y0gxJ3ZK6V61a1aDLaQGFAqx+Ap6cn/xuhXl1qwXaDMfzVhpmmeXn51DU1oG40n+WrMbTQnZjdWvhb6pDwizg4ojYULpQ0m4kteh9SKYPHS7pg+U7R8TlEdEVEV1jx44djPLmr1Unua8WaDMcz1upZS7Pz7h21NaBuNJ/lixzG+eZN9nfVFvGM8CrS96/Kl3W6zZpU/MuJJ22DgEulLQU+Azwz5LOAI4AnoyIVRGxCfhP4K0ZXkPraNXcyNUCbUcHhdcdzfpTb+fPJ9zA+lNvp/C6oxs2nrevljnnhm+stu6sVanTU5bDbfIcyuOJ7FvGvcAkSfuQBNyTgFPKtplLMkf3ncDxwG0REcC04gaSZgEbIuJSSYcAh0p6JfAi8E6gO+sLaQmtmhu5SuKMQiG46ZGVzLxuafoZt5TZJ+yaeacpD1dsrLYOxLXMNZvVcJu8hvK4x3VriIjNaS32ZpLhS1dGxEJJ5wLdETEXuAK4WtIiYC1JsK50zLslXQ/cD2wGHgAuz/I6Wkaxibc0GLdKbuQKiTPqyfBXf7E8XLFRlHzBbn1dXV3R3e0v/7B1yJa/qQ6qlvwDD5n7pk3nz71z8WpO/s7d2y2/dsYhHLrv7jmUqN9a8r5ptLauEQ9V/qZqVqZNcyP7UVR7aO3/hWZmteprkvsW5k5T7cE1YjOzFuVOU+3BgdjMrIU166Mo56KuXaaBuIak9jNJEtRvBlYBH42IP6brXiZJWg/wVEQck2VZzcysMTxzU/9k9pCkJKn9u4EpwMmSppRt9gDQFRFvIsmle2HJuhcjYmr64yBsZtYinOGvf7LsrVA1qX1E3B4RL6Rv7yLJLmRmZi3MGf76J8tAXEtS+1KnUTKlG9CZJqa/K51pxszMWoBzUfdPU/TfTxPTdwEXlSx+TUR0kaT9u0TSdlnMh+wsMmZmTczDqvony85atSS1R9IRwL8Ab4uIl4rL0yndiIglku4A3gxsk6E9Ii4nTeHX1dXVHinCzMxanIdV9U+WgbhqUntJbwbmANMjYmXJ8t2AFyLiJUm7A4exbUcuMzNrYs06rKoZZRaIa0xqfxEwAviJJNg6TOn1wBxJBZLm8/Mj4pGsympmNhR5rG9zyHQccUTMA+aVLftSyesj+tjvf4A3Zlk2M2tBhUIyh/D65cmMSs2SL7pZy1WBx/o2D2fWMrPWkOcMSpUCbYvO7LR0zUYuuukRvjqtk3H6MyvYjYtueoTJ40e6OXmQORCbWWtYu3hrsIPk9w2nJzMq9TJXb8NUC7T1liun2vRzG3u48pAV7DN/5pbrOnDabJ7b2AMOxIOqeb+umbUxSdMlPS5pkaSze1n/Ckk/TtffLWli2foJkjZIOqtk2a6Srpf0mKRHJf31IFzK4Fm/fGuwK9r0YjKtYZb6CrRrF9dfrmKQnzMNrjo6+f3Yz5PlGXvtjqu2BmGATS+yz/yZTNrRQ0EHmwOx2SCrMf3racBzEbE/cDFwQdn62WybAAeSvO43RcRk4K+ARxtd9lyNHJ/URksN2zmZWzhLVQJtYUTv5SoMr6Fc1YJ8hoZvWt3rdQ3ftDrzc9u2HIjNBl/V9K/p+6vS19cD71Q6tCDNNPcksLC4saRdgMOBKwAi4i8R8ecMr2Hwjd4vaRIuBr1iE/Ho7XL9NFaVLwBPazxLD5+9TbmWHj6bp1VDIM6rlg909HFdHSMz/mJj23EgNht8taR/3bJNRGwGngfGSBoBfA74ctn2+5DMYPYfkh6Q9F1J7ZXGqKMjeS57+nz48I3J78HoEFXlC8Cfnn+JU347lp8edA3zD7uKnx50Daf8dizPrnupwkFTedXyIb8vNrYdd9Yyay2zgIsjYkNaQS7aETgQ+FRE3C3pG8DZwBfLDyBpBjADYMKECZkXuKE6OpIOUFl2zurtnJPfm3S+2rA8CZIlHarGjepk7Qub+cfb/gIMA16oPa9yMRiWdwQbjGBY5bps8DgQmw2+WtK/FrdZJmlHYBdgDXAIcLykC4FdgYKkHpLm62URcXe6//UkgXg7Tg07ABW+ABTzKpePx60pr3LewTCPLza2HQdis8FXNf0rMBc4FbgTOB64LSICmFbcQNIsYENEXJq+f1rS6yLiceCdgLPRDYK68yo7GA55DsRmg6zG9K9XAFdLWgSsJQnW1XwK+KGknYAlwEeyuQIr57zKVg8HYrMc1JD+tQf4QJVjzCp7v4BkOlEzayF+Km9mZpYjB2IzM7McORCbmZnlyIHYzMwsR+6sZWbWxAqFYOmajaxY18O4Uf0cGmUtwYHYzKxJFQrBTQuXb5csZPoB4x2MayDpM8DlEfFC3mWppGrTtKRxkq6Q9Iv0/RRJp2VfNDOzNlEowOon4Mn5ye8apzlcumbjliAM0LOpwMzrFrB0zcYsS9tOPgO8Mu9CVFPLM+LvkSQe2Ct9/weSizMzG1SFQrBk1QbuXLyaJas2UCi0QIbOOuYcXrGuZ0sQLurZVGDl+p6sStuyJA2X9N+Sfi/pYUnnkMSt2yXdnm7z75K6JS2U9OWSfY9K5/G+T9I3Jd1YcswrJd2TTqZSPktaQ9TSNL17RFwn6fOwJSvQy1kUxsysLy3bTNvXnMOnT6ma1nLcqE46h3VsE4xrnlBi6JkO/Cki3gNbpgb9CPCOiChOsvwvEbE2nRP8VklvIqlczgEOj4gnJV1Tcsx/IUkv+1FJuwL3SPpVRDS0SaKWGvFGSWOAAJB0KMmUbGZmg6Zlm2nrmHO4OKFE57Dko7pfE0oMPQ8BR0q6QNK0iOgtTp0g6X7gAeAAYAowGVgSEU+m25QG4ncBZ0taANwBdAINn7KslhrxTJIE9PtJ+h0wliQJvZnZoKnUTNvUOZ6Lcw6XBuMa5xyue0KJISQi/iDpQOAo4CuSbi1dn06ychZwUEQ8J+l7JIG1EgHvTydSyUzVGnFE3A+8DXgrcDpwQEQ8WMvBJU2X9LikRZK2m5JN0kxJj0h6UNKtkl5Tsu5USU+kP6fWfklm1o6KzbSlWqKZdvR+xHHfToIvwLCdk/c1zjlcnFDi0H13Z9+xIxyE+yBpL+CFiPgBcBHJ/NzrgZHpJqOAjcDzksYB706XPw7sK2li+v7EksPeDHxK6eTfkt6cRdmr1ogl/V3ZogMlERHfr7LfDsBlwJHAMuBeSXMjonRqtgeAroh4QdI/ABcCJ0oaDZxDksA+gPvSfZ+r+crMrK3UNe9vjgqIOzoOZd0h1zIm/swa7cqojsm8HTmjUmO9EbhIUgHYBPwD8NfATZL+FBHvkPQA8BjwNPA7gIh4UdIn0u02kkxTWnQecAnwoKQO4Eng6EYXvJam6YNKXneSzHN6P1AxEAMHA4siYgmApGuBYymZIzUibi/Z/i7gg+nrvwVuiYi16b63kDyIL227N7MhpFWbaZeu2cgnfvRA2qw+DNhI57AHmHfmtOZuUm8xEXEzSQ22VDfwrZJtPtzH7rdHxOS05ntZuh8R8SJJS3CmqgbiiPhU6fu059i1NRx7b5JvHUXLgEMqbH8a8IsK++5dvoOkGcAMgAkTGv783MyaTD3z/lbNUFUoJD2c1y9PnuuO3g866q+zVnu27cxZTeFj6SPQnUhaaucM5skHkllrI7BPIwsh6YMkzdBv689+EXE5cDlAV1dXCwwoNLM8VB36VBzrWxxmNGxneN8cmPzeuoNxpSFILTskq81ExMXAxXmdv5bMWj+XNDf9uZHkwfYNNRz7GeDVJe9flS4rP/4RJGO1jomIl/qzr1krq6Ez4ysk/Thdf3dJZ5Li+gmSNkg6q2z5DmnygRszvoSWUXXoU19jfdcurvvclYYgteyQLGuoWmrEXy95vRn4Y0Qsq2G/e4FJaZfxZ4CTgFNKN0h7oM0BpkfEypJVNwNfk7Rb+v5dwOdrOKdZS6ixM+NpwHMRsb+kk4AL2LZH52y2Ps4p9WngUZJeokYNQ58qjfWtknSjmkrPtlt2SJY1VC3PiH89kAOnGbjOIAmqOwBXRsRCSecC3RExl6SL+QjgJ2nv8Kci4pg088l5bO29dm6x45ZZm6jamTF9Pyt9fT1wqSRFREg6jqQH5zZVJ0mvAt4DfJUkB4BRQ4aqOsb61qKvZ9vOnGVQoWla0npJ63r5WS9pXS0Hj4h5EfHaiNgvIr6aLvtSGoSJiCMiYlxETE1/jinZ98qI2D/9+Y96L9SsydTSIXHLNhGxmSSj3RhJI4DPAV9me5cA/xfoM5GxpBlpvt3uVatWDfgCWknVDFWj90ueCZeM9eV9c2oe65tZuWxI6LNGHBEj+1pnZrmaBVwcERvSliQAJB0NrIyI+yS9va+dh2Inx6pDnzo6ko5Zp09JmqNHNK7XdF3lskGRjgY6JSL+rZ/7zUv3+3M956+517SkPShJBxYRT9VzYrMhrpYOicVtlknaEdgFWEMyDPB4SRcCuwIFST0kNehjJB1Fcq+OkvSDiPggVn3oU0dH8jy4l2fCWQ4xqmdIljXMrsAngG0CsaQd09aoXkXEUY04eS2ZtY4B/pVkOqmVwGtIOoIc0IgCmA1RVTszkuR4PxW4kyS/+20REcC04gaSZgEbIuLSdNHn0+VvB85quiCc0VjdLHmIUfNZ37NpwqPPrj9vxbqevcaP6nx28p4jvzCyc1g9lcPzSeZTWECSlasHeI5kQojXSvoZyZfiTuAbaasSkpaSDL0dQdJx8rck6aCfAY5NE4JUVUuN+DzgUOBXEfFmSe9gawYsMxuAGjszXgFcLWkRsJYkWLeuDMfqZqmvIUaTnRkrF+t7Nk34xUPLf/WluQ9PKn4xOveYNxz67jeOP6KOYHw28IaImJp+if3v9H1xRqaPpp2IdyYZ4fDTiFhTdoxJwMkR8TFJ1wHvB35Qy8lrCcSbImKNpA5JHRFxu6RLajm4mfUtIuYB88qWfankdQ/wgSrHmNXH8jtIpm1rHnXMy5snDzFqLo8+u/68YhCG5N/iS3MfnjRx9+HnHbzP6EZNEHRPSRAGOFPS+9LXryYJuuWB+MmIWJC+vg+YWOvJagnEf057ac4HfihpJWVDJszMqspwrG6WPMSouaxY17NXr1+M1vXs1cDTbIlxaQ35COCv0wmK7qD36RNfKnn9MrBzrSerpT3odpJOIp8GbgIWA++t9QRmZsDWsbqlGjhWNyseYtRcxo/qfLbX6TBHdf6pjsOWTpdYbheSxDovSJpM8qi2oWqpEe8I/JLkGdWPgR/30jZuZlZZcaxu+TPijMfq1stDjJrL5D1HfuHcY95waNkz4idev+fILw70mOnj199Jehh4EVhRsvom4OOSHiVJ8XxXfVewPSWdMGvYUHoTSXq99wPLIuKIRhemHl1dXdHd3Z13MWzoaslP5UG/b4q9pgdxrK41tQHdN8Ve0yvX9ey1x6jOP71+z5FfrLPXdK76M/vSSmA5yQPqPbIpjpm1tQpjdc1qNbJz2FMN7JiVu1pmX/pE+nD6VmAM8LGIeFPWBTMzMxsKaqkRvxr4TEm3bDOzbLRgwg+zetUy+5KnHzSz7LVowg+zevl/t5k1h74SfqxdnG+5zDLmQGxmzaFSwg+zNuZAbGbNoUUTfljrk7SrpE8McN/PSHplPed3IDaz5lBM+FEMxi2S8MPawq4k0yAOxGeAugJxf8YRm5llp6Mj6Zh1+hQn/LDKetZNYMXD57F++V6M3PNZxh3wBTpHNWoaxFtI8macALwCuCEizpE0HLiOZO7wHUhmJhxHMkXw7ZJWR8Q7BnJyB2Izax51JPwoFIKlazayYl0P40Y5DWXb6lk3gUf/61fM+6dJW3rXH3XRobz+2CPqCMal0yC+i2T+74NJMn/NlXQ4MBb4U0S8B0DSLhHxvKSZwDsiYvVAL8lfNc2s5RUKwR2Pr+DB33fz8pL5PPj7+7jj8RUUCrWl8LUWsuLh87YEYUg69M37p0msePi8Bp3hXenPA8D9wGSSaQ8fAo6UdIGkaRHxfIPO5xqxmbW+p9duYN/VtzHx7plbxiAvPXw2T499L6/Zva9JdawlrV++V6+969cvb9Q0iAL+X0TM2W6FdCBwFPAVSbdGxLmNOKFrxGY5kDRd0uOSFkk6u5f1r5D043T93ZImlq2fIGmDpLPS96+WdLukRyQtlPTpQbqUprDz+qVM/M3MbcYgT/zNTF65fmmu5bIMjNzz2V57148c36hpEG8GPippBICkvSXtIWkv4IWI+AFwEXBgL/sOSKaBuIYPm8Ml3S9ps6Tjy9a9LGlB+jM3y3KaDSZJOwCXAe8GpgAnS5pSttlpJHOg7g9cDFxQtn428IuS95uBf4yIKSTzpX6yl2PmrlAIlqzawJ2LV7Nk1YaGNR2P2LSm1zHIwzd5xta2M+6AL3DURU9s07v+qIueYNwb6poGEShOg3gk8CPgTkkPAdeTBNo3AvekHbrOAb6S7n45cJOk2wd6/syapks+bI4ElgH3SpobEY+UbPYU8GHgrF4O8WJETM2qfGY5OhhYFBFLACRdCxwLlN4bxwKz0tfXA5dKUkSEpOOAJ4GNxY0j4lng2fT1+nTu1L3LjpmrQiG4aeFyZl63gOI8srNPmMr0A8bX3amqc7e9kw/k0mA8bOdkubWXzlFP8fpjj2D0fmmv6fF/Ytwbvlhnr2ki4pSyRd8oe7+YpLZcvt+3gG/Vc+4sa8RbPmwi4i9A8cNmi4hYGhEPAoUMy2HWbPYGni55vyxd1us2EbEZeB4YkzaXfQ74cl8HT5ux3wzc3cf6GZK6JXWvWrVqoNfQp75qvUvXbNwShAF6NhWYed0Clq7ZWOlwNekYsx9x3Le3GYMcx32bjjEeg9yWOkc9xWveeipv+N9H8pq3nlpvEM5blp21evuwOaQf+3dK6iZpcjs/In5WvoGkGcAMgAkTJgy8pGatYxZwcURskLavRaaB+qckM6at6+0AEXE5SXMaXV1dDe1WXKnWu2Jdz5YgXNSzqcDK9T3sO3ZEfSfu6ECvPwbGHbBlDLI8BtlaRDP3mn5NRDwjaV/gNkkPRcQ22d+z/EAxy9AzJNOLFr0qXdbbNssk7QjsAqwh+TJ7vKQLSbIBFST1RMSlkoaRBOEfRsR/ZnwNveqr1jv5zGmMG9VJ57CObYJx57AO9hjZ2ZiT1zEG2SxPWX5drOXDpk8R8Uz6ewlwB0lTm1k7uBeYJGkfSTsBJwHlHRLnAqemr48HbovEtIiYGBETgUuAr6VBWMAVwKMRMXtQrqIXlWq9E8cMZ/YJU+kclnzsFGvLE8cMz6OoZk0jyxrxlg8bkgB8ElD+MLxXknYj6Sb+kqTdgcOACzMrqdkgiojNks4g6fixA3BlRCyUdC7QHRFzSYLq1ZIWAWtJ7p9KDgM+BDyU9uoE+OeImJfJRfShUq23o0NMP2A8k8+cxsr1Pewx0tmvzAAUkV2LrqSjSL61Fz9svlr6YSPpIOAGYDegB1geEQdIeiswh6QTVwdwSURcUelcXV1d0d3dndm1mFXRktGk0fdNlj2jrS35PwUZB+LB5EBsOWvJD5Q+75tCAdYuTuYIHtm/yReKOZ9d67Ua+D8Gzd1Zy8zyUCjAYz+HG07fki6S981JZkaqIRh3dIh9x46ovye02RDhvv1mtq21i7cGYUh+33B6stzMGs6B2My2tX55r+ki2bA8n/KYtTkHYjPb1sjx9JpUf8T4fMpj1uYciM1sW6P3S54JlybVf9+cZHnGspoUwqyZubOWmW2royPpmHX6lC3pIvvTa3qg8h76VOztvWJdD+NGube3DR4HYjPbXg7pIiulx8y6B3beXwJsaHPTtJk1hUrpMbOW5cxQZtU4EJtZUyimxyzV0EkhKsjzS4CZA7GZNVahAKufgCfnJ78LtU03nuekEHl+CTDzM2Iza5w6snLlOSlE8UtA+TNizwxlg8GB2Mwap6+sXKdPqanjV17pMT0zlOXJgdjMGqdSVq5B7IE9EM6RbXnxM2Izaxxn5TLrNwdiM2ucHLNymbUqB2KznEiaLulxSYsknd3L+ldI+nG6/m5JE8vWT5C0QdJZtR6zVgNONbklK9d8+PCNye8ap080G6r8jNgsB5J2AC4DjgSWAfdKmhsRj5RsdhrwXETsL+kk4ALgxJL1s4Ff9POYVdWdZSqHrFxmrcxfU83ycTCwKCKWRMRfgGuBY8u2ORa4Kn19PfBOSQKQdBzwJLCwn8esylmmzAaXA7FZPvYGni55vyxd1us2EbEZeB4YI2kE8DngywM4JpJmSOqW1L1q1artCuYsU2aDy4HYrPXMAi6OiA0D2TkiLo+IrojoGjt27HbrnWXKbHA5EJvl4xng1SXvX5Uu63UbSTsCuwBrgEOACyUtBT4D/LOkM2o8ZlV5ppo0G4oy7awlaTrwDWAH4LsRcX7Z+sOBS4A3ASdFxPUl604FvpC+/UpEXIXZQBUKSdan9cuTsa79mV+3nn37di8wSdI+JMHyJOCUsm3mAqcCdwLHA7dFRADTihtImgVsiIhL02Bd7ZhVOcuU2eDKLBDX2IPzKeDDwFll+44GzgG6gADuS/d9LqvyWpOrN5AOMP9xXftWEBGb01rszSRfVK+MiIWSzgW6I2IucAVwtaRFwFqSwNrvYw6kfM4yZTZ4sqwRb+nBCSCp2INzSyCOiKXpuvLpWf4WuCUi1qbrbwGmA9dkWF6rV7VgOdBgWm8wrCf/cZ25kyuJiHnAvLJlXyp53QN8oMoxZlU7ppk1tyyfEdfUg7Oefav1/mxqA5wqrmnPXQyWc6bBVUcnvx/7+dZjV1tfSV/BcO3i2spWKf9xsWx9/T2q7WtmVqeW7qxVrfdn06onKBX3H2ggrffcfakWLGsJpn1dV73BsFL+42p/D+dONrOMZRmI6+nB2ZDen02rnhpevYG02rmrBfmBBstaaqV9XVctwbBSuSvlP67293DuZDPLWJbPiGvpFdqXm4GvSdotff8u4PONL2KdBvrMs5ap4vo6di3PLCuVq9K5R+9X+VlspWe1xWBZeuzSYFltfaXrKgbD8vMWg2G1Z8hb8h9PSa5zRMnfpNq/RaV9zcwaILNPkzQTULEH56PAdcVeoZKOAZB0kKRlJB1S5khamO67FjiPJJjfC5xb7LjVb/U24/a1bz0102o1vErHrqdmWe3c9TQvV6s5Vltf6bqqTSRQSwtDMf/xxGlbA2wt/xaV9h2q8uzfYNaGMh1HXEOv0HtJmp172/dK4Mq6ClCtplSp5lht33p601ar4VU6dj01y90nVT73H39XuXZYrfZYqeZYrWZZ7boqTSRQz2T01f4tbFsZDecyG8rae/alas2d9QTaah/+lYJ8taBU6dgTDqscOOppaq23ebnarDuV1tcTEKuVqxI3PfdPhsO5zIaq9g7E1Zpx6wm0lT78a6k1VApKlY5db82y0rmrBcMsa4/1BMR6y+Vp+2pWWL+cjl7ui8L65XT472c2IO0diCsFpXoCLVT+8K+31lAtsGRVs6wWDLOuPQ40ILpWO2g2Dtudkb3cFxuH7c7I/Ipl1tLaOxBXC0oDDbRQX0/cauoJLPUGpXqal/PUrOVqM3/YPJbR02azz/yZW+6LJ6fN5rnNYzkw78KZtaj2DsSVglI9gbb0+L19+NfzzLLasbPe16yC3YZ38tG7x3HGQdewh55nZezCpXcXuOL1nXkXzaxltXcghr6DUj2Bthr3xLU2NXHMcP5p+hRmXreAnk3D6BzW4ykSzerU/oG4kqxqjn5maW3KUySaNd7QDsRZcvOwtSlPkWjWWK6imZmZ5ciB2MzMLEeKiLzL0BCSVgF/rLDJ7sDqQSpOf7hc/dOs5VodEdPzLkR/+b5pOJerf1ryvmm0tgnE1UjqjoiuvMtRzuXqn2YtV7tq1r+3y9U/zVouS7hp2szMLEcOxGZmZjkaSoH48rwL0AeXq3+atVztqln/3i5X/zRruYwh9IzYzMysGQ2lGrGZmVnTcSA2MzPLUdsHYknTJT0uaZGks/MuTylJSyU9JGmBpO4cy3GlpJWSHi5ZNlrSLZKeSH/v1iTlmiXpmfRvtkDSUYNdrqGgWe+bZrln0rL4vrGGaOtALGkH4DLg3cAU4GRJU/It1XbeERFTcx7j9z2gfFD92cCtETEJuDV9P9i+x/blArg4/ZtNjYh5g1ymttcC900z3DPg+8YapK0DMXAwsCgilkTEX4BrgWNzLlPTiYjfAGvLFh8LXJW+vgo4bjDLBH2Wy7Ln+6YGvm+sUdo9EO8NPF3yflm6rFkE8EtJ90makXdhyoyLiGfT18uBcXkWpswZkh5Mm+AGvelvCGjm+6aZ7xnwfWMD0O6BuNn9TUQcSNIE+ElJh+ddoN5EMsatWca5/TuwHzAVeBb411xLY4OtJe4Z8H1jtWv3QPwM8OqS969KlzWFiHgm/b0SuIGkSbBZrJC0J0D6e2XO5QEgIlZExMsRUQC+Q3P9zdpF0943TX7PgO8bG4B2D8T3ApMk7SNpJ+AkYG7OZQJA0nBJI4uvgXcBD1fea1DNBU5NX58K/FeOZdmi+CGXeh/N9TdrF01537TAPQO+b2wAdsy7AFmKiM2SzgBuBnYAroyIhTkXq2gccIMkSP4dfhQRN+VREEnXAG8Hdpe0DDgHOB+4TtJpJNPkndAk5Xq7pKkkTX5LgdMHu1ztronvm6a5Z8D3jTWOU1yamZnlqN2bps3MzJqaA7GZmVmOHIjNzMxy5EBsZmaWIwdiMzOzHDkQW80kvV3SjXmXw6yV+L6xahyIzczMcuRA3IYkfVDSPem8o3Mk7SBpg6SLJS2UdKuksem2UyXdlSaDv6GYDF7S/pJ+Jen3ku6XtF96+BGSrpf0mKQfKs2uYNbqfN9YXhyI24yk1wMnAodFxFTgZeD/AMOB7og4APg1SbYdgO8Dn4uINwEPlSz/IXBZRPwV8FaSRPEAbwY+QzJP7b7AYRlfklnmfN9Ynto6xeUQ9U7gLcC96ZfunUkSzxeAH6fb/AD4T0m7ALtGxK/T5VcBP0nz+e4dETcAREQPQHq8eyJiWfp+ATAR+G3mV2WWLd83lhsH4vYj4KqI+Pw2C6Uvlm030NymL5W8fhn/H7L24PvGcuOm6fZzK3C8pD0AJI2W9BqSf+vj021OAX4bEc8Dz0mali7/EPDriFgPLJN0XHqMV0h65WBehNkg831jufG3sjYTEY9I+gLwS0kdwCbgk8BG4OB03UqS52GQTNX27fQDYwnwkXT5h4A5ks5Nj/GBQbwMs0Hl+8by5NmXhghJGyJiRN7lMGslvm9sMLhp2szMLEeuEZuZmeXINWIzM7McORCbmZnlyIHYzMwsRw7EZmZmOXIgNjMzy9H/B1Ck7YhL5z97AAAAAElFTkSuQmCC\n", + "image/png": "", "text/plain": [ "
" ] @@ -1496,7 +1496,7 @@ "outputs": [ { "data": { - "image/png": "\n", + "image/png": "", "text/plain": [ "
" ] @@ -1508,7 +1508,7 @@ }, { "data": { - "image/png": "\n", + "image/png": "", "text/plain": [ "
" ] @@ -1889,11 +1889,11 @@ "metadata": {}, "outputs": [], "source": [ - "# Record results for tests\n", - "store_metadata('eval_precision', eval_precision)\n", - "store_metadata('eval_recall', eval_recall)\n", - "store_metadata('eval_precision2', eval_precision2)\n", - "store_metadata('eval_recall2', eval_recall2)" + "# Record results for tests - ignore this cell\n", + "store_metadata(\"eval_precision\", eval_precision)\n", + "store_metadata(\"eval_recall\", eval_recall)\n", + "store_metadata(\"eval_precision2\", eval_precision2)\n", + "store_metadata(\"eval_recall2\", eval_recall2)\n" ] }, { @@ -1953,4 +1953,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/examples/03_evaluate/als_movielens_diversity_metrics.ipynb b/examples/03_evaluate/als_movielens_diversity_metrics.ipynb index de82ddd68..517f97afd 100644 --- a/examples/03_evaluate/als_movielens_diversity_metrics.ipynb +++ b/examples/03_evaluate/als_movielens_diversity_metrics.ipynb @@ -156,20 +156,20 @@ } ], "source": [ - "# set the environment path to find Recommenders\n", - "%load_ext autoreload\n", - "%autoreload 2\n", + "import warnings\n", + "warnings.simplefilter(action='ignore', category=FutureWarning)\n", "\n", "import sys\n", + "import numpy as np\n", + "import pandas as pd\n", "\n", "import pyspark\n", - "from pyspark.ml.recommendation import ALS\n", "import pyspark.sql.functions as F\n", + "from pyspark.sql.window import Window\n", "from pyspark.sql.types import FloatType, IntegerType, LongType, StructType, StructField\n", "from pyspark.ml.feature import Tokenizer, StopWordsRemover\n", "from pyspark.ml.feature import HashingTF, CountVectorizer, VectorAssembler\n", - "import warnings\n", - "warnings.simplefilter(action='ignore', category=FutureWarning)\n", + "from pyspark.ml.recommendation import ALS\n", "\n", "from recommenders.utils.timer import Timer\n", "from recommenders.datasets import movielens\n", @@ -177,11 +177,8 @@ "from recommenders.evaluation.spark_evaluation import SparkRankingEvaluation, SparkDiversityEvaluation\n", "from recommenders.utils.spark_utils import start_or_get_spark\n", "\n", - "from pyspark.sql.window import Window\n", - "import pyspark.sql.functions as F\n", - "\n", - "import numpy as np\n", - "import pandas as pd\n", + "%load_ext autoreload\n", + "%autoreload 2\n", "\n", "print(\"System version: {}\".format(sys.version))\n", "print(\"Spark version: {}\".format(pyspark.__version__))\n" @@ -354,7 +351,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "\r", + "\r\n", " \r" ] } @@ -498,7 +495,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "\r", + "\r\n", " \r" ] } @@ -954,7 +951,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "\r", + "\r\n", " \r" ] } @@ -1116,4 +1113,4 @@ }, "nbformat": 4, "nbformat_minor": 1 -} \ No newline at end of file +} diff --git a/examples/04_model_select_and_optimize/azureml_hyperdrive_surprise_svd.ipynb b/examples/04_model_select_and_optimize/azureml_hyperdrive_surprise_svd.ipynb index 259ea4be4..03d600dc6 100644 --- a/examples/04_model_select_and_optimize/azureml_hyperdrive_surprise_svd.ipynb +++ b/examples/04_model_select_and_optimize/azureml_hyperdrive_surprise_svd.ipynb @@ -54,7 +54,6 @@ "import sys\n", "import os\n", "import surprise\n", - "import papermill as pm\n", "import pandas as pd\n", "import shutil\n", "from tempfile import TemporaryDirectory\n", @@ -783,4 +782,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/examples/04_model_select_and_optimize/azureml_hyperdrive_wide_and_deep.ipynb b/examples/04_model_select_and_optimize/azureml_hyperdrive_wide_and_deep.ipynb index 3dde0284e..e608257a4 100644 --- a/examples/04_model_select_and_optimize/azureml_hyperdrive_wide_and_deep.ipynb +++ b/examples/04_model_select_and_optimize/azureml_hyperdrive_wide_and_deep.ipynb @@ -75,7 +75,6 @@ "\n", "from IPython.display import clear_output\n", "import numpy as np\n", - "import papermill as pm\n", "import pandas as pd\n", "import sklearn.preprocessing\n", "import tensorflow as tf\n", @@ -1198,4 +1197,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/examples/04_model_select_and_optimize/nni_ncf.ipynb b/examples/04_model_select_and_optimize/nni_ncf.ipynb index 500f6656c..17f6a3f7a 100644 --- a/examples/04_model_select_and_optimize/nni_ncf.ipynb +++ b/examples/04_model_select_and_optimize/nni_ncf.ipynb @@ -55,7 +55,6 @@ "import json\n", "import os\n", "import surprise\n", - "import papermill as pm\n", "import pandas as pd\n", "import shutil\n", "import subprocess\n", @@ -700,4 +699,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/examples/04_model_select_and_optimize/nni_surprise_svd.ipynb b/examples/04_model_select_and_optimize/nni_surprise_svd.ipynb index aae442dcd..3f24ba895 100644 --- a/examples/04_model_select_and_optimize/nni_surprise_svd.ipynb +++ b/examples/04_model_select_and_optimize/nni_surprise_svd.ipynb @@ -51,7 +51,6 @@ "import json\n", "import os\n", "import surprise\n", - "import papermill as pm\n", "import pandas as pd\n", "import shutil\n", "import subprocess\n", @@ -1081,4 +1080,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/examples/04_model_select_and_optimize/tuning_spark_als.ipynb b/examples/04_model_select_and_optimize/tuning_spark_als.ipynb index 5f1c19286..2aba7b791 100644 --- a/examples/04_model_select_and_optimize/tuning_spark_als.ipynb +++ b/examples/04_model_select_and_optimize/tuning_spark_als.ipynb @@ -2,22 +2,23 @@ "cells": [ { "cell_type": "markdown", + "metadata": {}, "source": [ "Copyright (c) Recommenders contributors.\n", "\n", "Licensed under the MIT License." - ], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "# Hyperparameter tuning (Spark based recommender)" - ], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "Hyperparameter tuning for Spark based recommender algorithm is important to select a model with the optimal performance. This notebook introduces good practices in performing hyperparameter tuning for building recommender models with the utility functions provided in the [Microsoft/Recommenders](https://github.com/recommenders-team/recommenders.git) repository.\n", "\n", @@ -25,27 +26,40 @@ "* Spark native/custom constructs (`ParamGridBuilder`, `TrainValidationSplit`).\n", "* `hyperopt` package with Tree of Parzen Estimator algorithm. \n", "* Brute-force random search of parameter values sampled with pre-defined space. " - ], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "## 0 Global settings and import" - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "System version: 3.5.5 |Anaconda custom (64-bit)| (default, May 13 2018, 21:12:35) \n", + "[GCC 7.2.0]\n", + "Pandas version: 0.23.0\n", + "PySpark version: 2.3.1\n" + ] + } + ], "source": [ - "# set the environment path to find Recommenders\n", - "%matplotlib notebook\n", "\n", - "import matplotlib.pyplot as plt\n", "import sys\n", - "import pandas as pd\n", "import numpy as np\n", + "import pandas as pd\n", + "from hyperopt import fmin, tpe, hp, STATUS_OK, Trials\n", + "from hyperopt.pyll.stochastic import sample\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib notebook\n", "\n", "import pyspark\n", "import pyspark.sql.functions as F\n", @@ -59,9 +73,6 @@ "from pyspark.mllib.evaluation import RankingMetrics\n", "from pyspark.sql.types import ArrayType, IntegerType\n", "\n", - "from hyperopt import fmin, tpe, hp, STATUS_OK, Trials\n", - "from hyperopt.pyll.stochastic import sample\n", - "\n", "from recommenders.utils.timer import Timer\n", "from recommenders.utils.spark_utils import start_or_get_spark\n", "from recommenders.evaluation.spark_evaluation import SparkRankingEvaluation, SparkRatingEvaluation\n", @@ -71,24 +82,17 @@ "print(\"System version: {}\".format(sys.version))\n", "print(\"Pandas version: {}\".format(pd.__version__))\n", "print(\"PySpark version: {}\".format(pyspark.__version__))" - ], - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "System version: 3.5.5 |Anaconda custom (64-bit)| (default, May 13 2018, 21:12:35) \n", - "[GCC 7.2.0]\n", - "Pandas version: 0.23.0\n", - "PySpark version: 2.3.1\n" - ] - } - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 2, + "metadata": { + "tags": [ + "parameters" + ] + }, + "outputs": [], "source": [ "MOVIELENS_DATA_SIZE = \"100k\"\n", "\n", @@ -118,142 +122,138 @@ "\n", "RANK = [10, 15, 20, 30, 40]\n", "REG = [ 0.1, 0.01, 0.001, 0.0001, 0.00001]" - ], - "outputs": [], - "metadata": { - "tags": [ - "parameters" - ] - } + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "## 1 Data preparation" - ], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "A Spark session is created. Note in this case, to study the running time for different approaches, the Spark session in local mode uses only one core for running. This eliminates the impact of parallelization of parameter tuning. " - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 3, + "metadata": {}, + "outputs": [], "source": [ "spark = start_or_get_spark(url=\"local[{}]\".format(NUMBER_CORES))" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "MovieLens 100k dataset is used for running the demonstration." - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 4, - "source": [ - "data = load_spark_df(spark, size=MOVIELENS_DATA_SIZE, header=(COL_USER, COL_ITEM, COL_RATING))" - ], + "metadata": {}, "outputs": [ { - "output_type": "stream", "name": "stderr", + "output_type": "stream", "text": [ "100%|██████████| 4.81k/4.81k [00:01<00:00, 2.47kKB/s]\n" ] } ], - "metadata": {} + "source": [ + "data = load_spark_df(spark, size=MOVIELENS_DATA_SIZE, header=(COL_USER, COL_ITEM, COL_RATING))" + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "To reduce time spent on the comparitive study, 50% of the data is used for the experimentation below." - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 5, + "metadata": {}, + "outputs": [], "source": [ "data, _ = spark_random_split(data, ratio=SUBSET_RATIO)" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "The dataset is split into 3 subsets randomly with a given split ratio. The hyperparameter tuning is performed on the training and the validating data, and then the optimal recommender selected is evaluated on the testing dataset." - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 6, + "metadata": {}, + "outputs": [], "source": [ "train, valid, test = spark_random_split(data, ratio=[3, 1, 1])" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "## 2 Hyper parameter tuning with Azure Machine Learning Services" - ], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "The `hyperdrive` module in the [Azure Machine Learning Services](https://azure.microsoft.com/en-us/services/machine-learning-service/) runs [hyperparameter tuning and optimizing for machine learning model selection](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-tune-hyperparameters). At the moment, the service supports running hyperparameter tuning on heterogenous computing targets such as cluster of commodity compute nodes with or without GPU devices (see detailed documentation [here](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-set-up-training-targets)). It is feasible to run parameter tuning on a cluster of VM nodes. In this case, the service containerizes individual and independent Spark session on each node of the cluster to run the parameter tuning job in parallel, instead of inside a single Spark session where the training is executed in a distributed manner. \n", "\n", "Detailed instructions of tuning hyperparameter of non-Spark workloads by using Azure Machine Learning Services can be found in [this](./hypertune_aml_wide_and_deep_quickstart.ipynb) notebook. " - ], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "## 3 Hyper parameter tuning with Spark ML constructs" - ], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "### 3.1 Spark native construct" - ], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "Spark ML lib implements modules such as `CrossValidator` and `TrainValidationSplit` for tuning hyperparameters (see [here](https://spark.apache.org/docs/2.2.0/ml-tuning.html)). However, by default, it does not support custom machine learning algorithms, data splitting methods, and evaluation metrics, like what are offered as utility functions in the Recommenders repository. \n", "\n", "For example, the Spark native constuct can be used for tuning a recommender against the `rmse` metric which is one of the available regression metrics in Spark." - ], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "Firstly, a Spark ALS object needs to be created. In this case, for illustration purpose, it is an ALS model object." - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 7, + "metadata": {}, + "outputs": [], "source": [ "# NOTE the parameters of interest, rank and regParam, are left unset, \n", "# because their values will be assigned in the parameter grid builder.\n", @@ -265,41 +265,41 @@ " nonnegative=False,\n", " **HEADER_ALS\n", ")" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "Then, a parameter grid can be defined as follows. Without loss of generity, only `rank` and `regParam` are considered." - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 8, + "metadata": {}, + "outputs": [], "source": [ "paramGrid = ParamGridBuilder() \\\n", " .addGrid(als.rank, RANK) \\\n", " .addGrid(als.regParam, REG) \\\n", " .build()" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "Given the settings above, a `TrainValidationSplit` constructor can be created for fitting the best model in the given parameter range. In this case, the `RegressionEvaluator` is using `RMSE`, by default, as an evaluation metric. \n", "\n", "Since the data splitter is embedded in the `TrainValidationSplit` object, to make sure the splitting ratio is consistent across different approaches, the split ratio is set to be 0.75 and in the model training the training dataset and validating dataset are combined. " - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 9, + "metadata": {}, + "outputs": [], "source": [ "tvs = TrainValidationSplit(\n", " estimator=als,\n", @@ -311,44 +311,36 @@ " # are therefore not available here. \n", " trainRatio=0.75\n", ")" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 10, + "metadata": {}, + "outputs": [], "source": [ "with Timer() as time_spark:\n", " # Run TrainValidationSplit, and choose the best set of parameters.\n", " # NOTE train and valid is union because in Spark TrainValidationSplit does splitting by itself.\n", " model = tvs.fit(train.union(valid))\n", "\n" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "The model parameters in the grid and the best metrics can be then returned. " - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 11, - "source": [ - "for idx, item in enumerate(model.getEstimatorParamMaps()):\n", - " print('Run {}:'.format(idx))\n", - " print('\\tValidation Metric: {}'.format(model.validationMetrics[idx]))\n", - " for key, value in item.items():\n", - " print('\\t{0}: {1}'.format(repr(key), value))" - ], + "metadata": {}, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "Run 0:\n", "\tValidation Metric: 1.0505385750367227\n", @@ -453,17 +445,20 @@ ] } ], - "metadata": {} + "source": [ + "for idx, item in enumerate(model.getEstimatorParamMaps()):\n", + " print('Run {}:'.format(idx))\n", + " print('\\tValidation Metric: {}'.format(model.validationMetrics[idx]))\n", + " for key, value in item.items():\n", + " print('\\t{0}: {1}'.format(repr(key), value))" + ] }, { "cell_type": "code", "execution_count": 12, - "source": [ - "model.validationMetrics" - ], + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "[1.0505385750367227,\n", @@ -493,49 +488,54 @@ " 4.426604995574413]" ] }, + "execution_count": 12, "metadata": {}, - "execution_count": 12 + "output_type": "execute_result" } ], - "metadata": {} + "source": [ + "model.validationMetrics" + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "To get the best model, just do" - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 13, + "metadata": {}, + "outputs": [], "source": [ "model_best_spark = model.bestModel" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "### 3.2 Custom `Estimator`, `Transformer`, and `Evaluator` for Spark ALS\n", "\n", "One can also customize Spark modules to allow tuning hyperparameters for a desired model and evaluation metric, given that the native Spark ALS does not allow tuning hyperparameters for ranking metrics such as precision@k, recall@k, etc. This can be done by creating custom `Estimator`, `Transformer` and `Evaluator`. The benefit is that, after the customization, the tuning process can make use of `trainValidSplit` directly, which distributes the tuning in a Spark session." - ], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "#### Customized `Estimator` and `Transformer` for top k recommender based on Spark ALS\n", "\n", "The following shows how to implement a PySpark `Estimator` and `Transfomer` for recommending top k items from ALS model. The latter generates top k recommendations from the model object. Both of the two are designed by following the protocol of Spark APIs, to make sure that they can be run with the hyperparameter tuning constructs in Spark." - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 14, + "metadata": {}, + "outputs": [], "source": [ "class ALSTopK(\n", " ALS,\n", @@ -690,22 +690,22 @@ " )\n", " \n", " return topk_recommendation_all.select(self.userCol, labelCol, predictionCol)" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "#### Customized precision@k evaluation metric\n", "\n", "In addition to the custom `Estimator` and `Transformer`, it may also be desired to customize an `Evaluator` to allow \"beyond-rating\" metrics. The codes as following illustrates a precision@k evaluator. Other types of evaluators can be developed in a similar way." - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 15, + "metadata": {}, + "outputs": [], "source": [ "# Define a custom Evaulator. Here precision@k is used.\n", "class PrecisionAtKEvaluator(Evaluator):\n", @@ -727,20 +727,20 @@ "\n", " def isLargerBetter(self):\n", " return True" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "Then a new ALS top-k recommender can be created, and the Spark native construct, `TrainValidationSplit` module, can be used to find the optimal model w.r.t the precision@k metric." - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 16, + "metadata": {}, + "outputs": [], "source": [ "alstopk = ALSTopK(\n", " userCol=COL_USER,\n", @@ -765,23 +765,14 @@ " # are therefore not available here. \n", " trainRatio=0.75\n", ")" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 17, - "source": [ - "# Run TrainValidationSplit, and choose the best set of parameters.\n", - "# NOTE train and valid is union because in Spark TrainValidationSplit does splitting by itself.\n", - "model_precision = tvs.fit(train.union(valid))\n", - "\n", - "model_precision.getEstimatorParamMaps()" - ], + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "[{Param(parent='ALSTopK_4f48b7cc6cf2badfcea7', name='rank', doc='rank of the factorization'): 10,\n", @@ -794,15 +785,24 @@ " Param(parent='ALSTopK_4f48b7cc6cf2badfcea7', name='regParam', doc='regularization parameter (>= 0).'): 0.01}]" ] }, + "execution_count": 17, "metadata": {}, - "execution_count": 17 + "output_type": "execute_result" } ], - "metadata": {} + "source": [ + "# Run TrainValidationSplit, and choose the best set of parameters.\n", + "# NOTE train and valid is union because in Spark TrainValidationSplit does splitting by itself.\n", + "model_precision = tvs.fit(train.union(valid))\n", + "\n", + "model_precision.getEstimatorParamMaps()" + ] }, { "cell_type": "code", "execution_count": 18, + "metadata": {}, + "outputs": [], "source": [ "def best_param(model, is_larger_better=True):\n", " if is_larger_better:\n", @@ -813,35 +813,25 @@ " parameters = model.getEstimatorParamMaps()[model.validationMetrics.index(best_metric)]\n", " \n", " return list(parameters.values())" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 19, + "metadata": {}, + "outputs": [], "source": [ "params = best_param(model_precision)" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 20, - "source": [ - "model_precision.bestModel.transform(valid).limit(5).show()\n", - "\n", - "for idx, item in enumerate(model_precision.getEstimatorParamMaps()):\n", - " print('Run {}:'.format(idx))\n", - " print('\\tValidation Metric: {}'.format(model_precision.validationMetrics[idx]))\n", - " for key, value in item.items():\n", - " print('\\t{0}: {1}'.format(repr(key), value))" - ], + "metadata": {}, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "+------+--------------------+--------------------+\n", "|userID| label| prediction|\n", @@ -872,29 +862,39 @@ ] } ], - "metadata": {} + "source": [ + "model_precision.bestModel.transform(valid).limit(5).show()\n", + "\n", + "for idx, item in enumerate(model_precision.getEstimatorParamMaps()):\n", + " print('Run {}:'.format(idx))\n", + " print('\\tValidation Metric: {}'.format(model_precision.validationMetrics[idx]))\n", + " for key, value in item.items():\n", + " print('\\t{0}: {1}'.format(repr(key), value))" + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "## 4 Hyperparameter tuning with `hyperopt`" - ], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "`hyperopt` is an open source Python package that is designed for tuning parameters for generic function with any pre-defined loss. More information about `hyperopt` can be found [here](https://github.com/hyperopt/hyperopt). `hyperopt` supports parallelization on MongoDB but not Spark. In our case, the tuning is performed in a sequential mode on a local computer.\n", "\n", "In `hyperopt`, an *objective* function is defined for optimizing the hyper parameters. In this case, the objective is similar to that in the Spark native construct situation, which is *to the RMSE metric for an ALS recommender*. Parameters of `rank` and `regParam` are used as hyperparameters. \n", "\n", "The objective function shown below demonstrates a RMSE loss for an ALS recommender. " - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 21, + "metadata": {}, + "outputs": [], "source": [ "# Customize an objective function\n", "def objective(params):\n", @@ -940,12 +940,11 @@ " 'status': STATUS_OK,\n", " 'eval_time': time_run_start.interval\n", " }" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "A search space is usually defined for hyperparameter exploration. Design of search space is empirical, and depends on the understanding of how distribution of parameter of interest affects the model performance measured by the loss function. \n", "\n", @@ -954,12 +953,13 @@ "* The reg parameter prevents overfitting in certain way. \n", "\n", "Therefore, in this case, a uniform distribution and a lognormal distribution sampling spaces are used for rank and reg, respectively. A narrow search space is used for illustration purpose, that is, the range of rank is from 10 to 20, while that of reg is from $e^{-5}$ to $e^{-1}$. Together with the randomly sampled hyper parameters, other parameters use for building / evaluating the recommender, like `k`, column names, data, etc., are kept as constants." - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 22, + "metadata": {}, + "outputs": [], "source": [ "# define a search space\n", "space = {\n", @@ -974,31 +974,31 @@ " 'k': 10,\n", " 'relevancy_method': \"top_k\"\n", "}" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "### 4.1 Hyperparameter tuning with TPE" - ], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "`fmin` of `hyperopt` is used for running the trials for searching optimal hyper parameters. In `hyperopt`, there are different strategies for intelligently optimize hyper parameters. For example, `hyperopt` avails [Tree of Parzen Estimators (TPE) method](https://papers.nips.cc/paper/4443-algorithms-for-hyper-parameter-optimization.pdf) for searching optimal parameters. \n", "\n", "The TPE method models a surface response of $p(x|y)$ by transforming a generative process, replacing the distributions of the configuration prior with non-parametric densities, where $p$ is the probability of configuration space $x$ given the loss $y$. For different configuration space, the TPE method does different replacements. That is, uniform $\\to$ truncated Gaussian mixture, log-uniform $\\to$ exponentiated truncated Gaussian mixture, categorical $\\to$ re-weighted categorical, etc. Using different observations ${x(1), ..., x(k)}$ in the non-parametric densities, these substitutions represent a learning algorithm that can produce a variety of densities over the configuration space $X$. By maintaining sorted lists of observed variables in $H$, the runtime of each iteration of the TPE algorithm can scale linearly in $|H|$ and linearly in the number of variables (dimensions) being optimized. In a nutshell, the algorithm recognizes the irrelevant variables in the configuration space, and thus reduces iterations in searching for the optimal ones. Details of the TPE algorithm can be found in the reference paper.\n", "\n", "The following runs the trials with the pre-defined objective function and search space. TPE is used as the optimization method. Totally there will be 10 evaluations run for searching the best parameters." - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 23, + "metadata": {}, + "outputs": [], "source": [ "with Timer() as time_hyperopt:\n", " # Trials for recording each iteration of the hyperparameter searching.\n", @@ -1012,19 +1012,14 @@ " max_evals=NUMBER_ITERATIONS\n", " )\n", " \n" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 24, - "source": [ - "trials.best_trial" - ], + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "{'book_time': datetime.datetime(2019, 7, 17, 12, 28, 19, 108000),\n", @@ -1045,41 +1040,31 @@ " 'version': 0}" ] }, + "execution_count": 24, "metadata": {}, - "execution_count": 24 + "output_type": "execute_result" } ], - "metadata": {} + "source": [ + "trials.best_trial" + ] }, { "cell_type": "code", "execution_count": 25, - "source": [ - "parameters = ['rank', 'reg']\n", - "cols = len(parameters)\n", - "f, axes = plt.subplots(nrows=1, ncols=cols, figsize=(15,5))\n", - "cmap = plt.cm.jet\n", - "for i, val in enumerate(parameters):\n", - " xs = np.array([t['misc']['vals'][val] for t in trials.trials]).ravel()\n", - " ys = [t['result']['loss'] for t in trials.trials]\n", - " xs, ys = zip(*sorted(zip(xs, ys)))\n", - " ys = np.array(ys)\n", - " axes[i].scatter(xs, ys, s=20, linewidth=0.01, alpha=0.75, c=cmap(float(i)/len(parameters)))\n", - " axes[i].set_title(val)" - ], + "metadata": {}, "outputs": [ { - "output_type": "display_data", "data": { "application/javascript": "/* Put everything inside the global mpl namespace */\nwindow.mpl = {};\n\n\nmpl.get_websocket_type = function() {\n if (typeof(WebSocket) !== 'undefined') {\n return WebSocket;\n } else if (typeof(MozWebSocket) !== 'undefined') {\n return MozWebSocket;\n } else {\n alert('Your browser does not have WebSocket support.' +\n 'Please try Chrome, Safari or Firefox ≥ 6. ' +\n 'Firefox 4 and 5 are also supported but you ' +\n 'have to enable WebSockets in about:config.');\n };\n}\n\nmpl.figure = function(figure_id, websocket, ondownload, parent_element) {\n this.id = figure_id;\n\n this.ws = websocket;\n\n this.supports_binary = (this.ws.binaryType != undefined);\n\n if (!this.supports_binary) {\n var warnings = document.getElementById(\"mpl-warnings\");\n if (warnings) {\n warnings.style.display = 'block';\n warnings.textContent = (\n \"This browser does not support binary websocket messages. \" +\n \"Performance may be slow.\");\n }\n }\n\n this.imageObj = new Image();\n\n this.context = undefined;\n this.message = undefined;\n this.canvas = undefined;\n this.rubberband_canvas = undefined;\n this.rubberband_context = undefined;\n this.format_dropdown = undefined;\n\n this.image_mode = 'full';\n\n this.root = $('
');\n this._root_extra_style(this.root)\n this.root.attr('style', 'display: inline-block');\n\n $(parent_element).append(this.root);\n\n this._init_header(this);\n this._init_canvas(this);\n this._init_toolbar(this);\n\n var fig = this;\n\n this.waiting = false;\n\n this.ws.onopen = function () {\n fig.send_message(\"supports_binary\", {value: fig.supports_binary});\n fig.send_message(\"send_image_mode\", {});\n if (mpl.ratio != 1) {\n fig.send_message(\"set_dpi_ratio\", {'dpi_ratio': mpl.ratio});\n }\n fig.send_message(\"refresh\", {});\n }\n\n this.imageObj.onload = function() {\n if (fig.image_mode == 'full') {\n // Full images could contain transparency (where diff images\n // almost always do), so we need to clear the canvas so that\n // there is no ghosting.\n fig.context.clearRect(0, 0, fig.canvas.width, fig.canvas.height);\n }\n fig.context.drawImage(fig.imageObj, 0, 0);\n };\n\n this.imageObj.onunload = function() {\n fig.ws.close();\n }\n\n this.ws.onmessage = this._make_on_message_function(this);\n\n this.ondownload = ondownload;\n}\n\nmpl.figure.prototype._init_header = function() {\n var titlebar = $(\n '
');\n var titletext = $(\n '
');\n titlebar.append(titletext)\n this.root.append(titlebar);\n this.header = titletext[0];\n}\n\n\n\nmpl.figure.prototype._canvas_extra_style = function(canvas_div) {\n\n}\n\n\nmpl.figure.prototype._root_extra_style = function(canvas_div) {\n\n}\n\nmpl.figure.prototype._init_canvas = function() {\n var fig = this;\n\n var canvas_div = $('
');\n\n canvas_div.attr('style', 'position: relative; clear: both; outline: 0');\n\n function canvas_keyboard_event(event) {\n return fig.key_event(event, event['data']);\n }\n\n canvas_div.keydown('key_press', canvas_keyboard_event);\n canvas_div.keyup('key_release', canvas_keyboard_event);\n this.canvas_div = canvas_div\n this._canvas_extra_style(canvas_div)\n this.root.append(canvas_div);\n\n var canvas = $('');\n canvas.addClass('mpl-canvas');\n canvas.attr('style', \"left: 0; top: 0; z-index: 0; outline: 0\")\n\n this.canvas = canvas[0];\n this.context = canvas[0].getContext(\"2d\");\n\n var backingStore = this.context.backingStorePixelRatio ||\n\tthis.context.webkitBackingStorePixelRatio ||\n\tthis.context.mozBackingStorePixelRatio ||\n\tthis.context.msBackingStorePixelRatio ||\n\tthis.context.oBackingStorePixelRatio ||\n\tthis.context.backingStorePixelRatio || 1;\n\n mpl.ratio = (window.devicePixelRatio || 1) / backingStore;\n\n var rubberband = $('');\n rubberband.attr('style', \"position: absolute; left: 0; top: 0; z-index: 1;\")\n\n var pass_mouse_events = true;\n\n canvas_div.resizable({\n start: function(event, ui) {\n pass_mouse_events = false;\n },\n resize: function(event, ui) {\n fig.request_resize(ui.size.width, ui.size.height);\n },\n stop: function(event, ui) {\n pass_mouse_events = true;\n fig.request_resize(ui.size.width, ui.size.height);\n },\n });\n\n function mouse_event_fn(event) {\n if (pass_mouse_events)\n return fig.mouse_event(event, event['data']);\n }\n\n rubberband.mousedown('button_press', mouse_event_fn);\n rubberband.mouseup('button_release', mouse_event_fn);\n // Throttle sequential mouse events to 1 every 20ms.\n rubberband.mousemove('motion_notify', mouse_event_fn);\n\n rubberband.mouseenter('figure_enter', mouse_event_fn);\n rubberband.mouseleave('figure_leave', mouse_event_fn);\n\n canvas_div.on(\"wheel\", function (event) {\n event = event.originalEvent;\n event['data'] = 'scroll'\n if (event.deltaY < 0) {\n event.step = 1;\n } else {\n event.step = -1;\n }\n mouse_event_fn(event);\n });\n\n canvas_div.append(canvas);\n canvas_div.append(rubberband);\n\n this.rubberband = rubberband;\n this.rubberband_canvas = rubberband[0];\n this.rubberband_context = rubberband[0].getContext(\"2d\");\n this.rubberband_context.strokeStyle = \"#000000\";\n\n this._resize_canvas = function(width, height) {\n // Keep the size of the canvas, canvas container, and rubber band\n // canvas in synch.\n canvas_div.css('width', width)\n canvas_div.css('height', height)\n\n canvas.attr('width', width * mpl.ratio);\n canvas.attr('height', height * mpl.ratio);\n canvas.attr('style', 'width: ' + width + 'px; height: ' + height + 'px;');\n\n rubberband.attr('width', width);\n rubberband.attr('height', height);\n }\n\n // Set the figure to an initial 600x600px, this will subsequently be updated\n // upon first draw.\n this._resize_canvas(600, 600);\n\n // Disable right mouse context menu.\n $(this.rubberband_canvas).bind(\"contextmenu\",function(e){\n return false;\n });\n\n function set_focus () {\n canvas.focus();\n canvas_div.focus();\n }\n\n window.setTimeout(set_focus, 100);\n}\n\nmpl.figure.prototype._init_toolbar = function() {\n var fig = this;\n\n var nav_element = $('
')\n nav_element.attr('style', 'width: 100%');\n this.root.append(nav_element);\n\n // Define a callback function for later on.\n function toolbar_event(event) {\n return fig.toolbar_button_onclick(event['data']);\n }\n function toolbar_mouse_event(event) {\n return fig.toolbar_button_onmouseover(event['data']);\n }\n\n for(var toolbar_ind in mpl.toolbar_items) {\n var name = mpl.toolbar_items[toolbar_ind][0];\n var tooltip = mpl.toolbar_items[toolbar_ind][1];\n var image = mpl.toolbar_items[toolbar_ind][2];\n var method_name = mpl.toolbar_items[toolbar_ind][3];\n\n if (!name) {\n // put a spacer in here.\n continue;\n }\n var button = $('