From 5f829f7e18129b76f4ddfc89e1a6bea2f1a31306 Mon Sep 17 00:00:00 2001 From: juanbc Date: Thu, 25 Jan 2024 18:02:40 -0300 Subject: [PATCH] extend tutorial --- .gitignore | 1 + docs/source/tutorial/extend.ipynb | 892 ++++++++++++++++++++++++++++-- docs/source/tutorial/index.rst | 3 +- skcriteria/agg/_agg_base.py | 1 - skcriteria/utils/bunch.py | 2 +- tests/utils/test_bunch.py | 5 + 6 files changed, 846 insertions(+), 58 deletions(-) diff --git a/.gitignore b/.gitignore index 2cc0161..c21bb40 100644 --- a/.gitignore +++ b/.gitignore @@ -63,3 +63,4 @@ setuptools-*.zip .vscode/ result_images/ docs/source/_dynamic/*.rst +**/.ipynb_checkpoints \ No newline at end of file diff --git a/docs/source/tutorial/extend.ipynb b/docs/source/tutorial/extend.ipynb index 4a3042a..ef57727 100644 --- a/docs/source/tutorial/extend.ipynb +++ b/docs/source/tutorial/extend.ipynb @@ -9,7 +9,7 @@ "\n", "This tutorial serves as a guide for utilizing the extension tools for aggregation and transformer functions in Scikit-Criteria. After going through this tutorial, you will be able to implement your own multi-criteria decision models compatible with the data types and tools provided by the library.\n", "\n", - "## Introduction\n", + "## 1. Introduction\n", "\n", "In Scikit-Criteria, leveraging the provided decorators (`@extend.mkagg` and `@extend.mktransformer`) for extending aggregation and transformation functions provides a powerful means to customize decision-making models allowing the creation of custom functions, enabling domain-specific logic implementation for diverse use cases. \n", "\n", @@ -55,7 +55,9 @@ "\n", "To create a custom aggregation model, follow these steps:\n", "\n", - "1. Declare a function with the name of your model using the [`CapWords`/`UpperCamelCase`/`PascalCase`](https://en.wikipedia.org/wiki/Camel_case) convention. While this is not mandatory, not adhering to this convention will trigger a warning message from scikit-criteria, notifying that the model name does not follow the Scikit-Criteria standard.\n" + "1. Declare a function with the name of your model using the [CapWords/UpperCamelCase/PascalCase](https://en.wikipedia.org/wiki/Camel_case) convention. While this is not mandatory, not adhering to this convention will trigger a warning message from scikit-criteria, notifying that the model name does not follow the Scikit-Criteria standard.\n", + "\n", + "\n" ] }, { @@ -97,18 +99,16 @@ "\n", "\n", "- `hparams`: Model Hyperparameters.\n", - "- `matrix`: Alternatives matrix as pandas DataFrame.\n", - "- `objectives`: Objectives for criteria as integers: $maximize = 1$ and $minimize = -1$\n", - "- `weights`: Weights of the criteria.\n", - "- `dtypes`: Dtypes of the criteria.\n", - "- `alternatives`: Names of the alternatives.\n", - "- `criteria`: Names of the criteria.\n", - "\n", - "Additionally, if you do not want to use any of those parts of the matrix, you can declare the function with [Variable Keyword Arguments (`**kwargs`)](https://www.w3schools.com/python/gloss_python_function_arbitrary_keyword_arguments.asp).\n", - "\n", + "- `matrix`: Alternatives matrix as numpy array.\n", + "- `objectives`: numpy array of objectives for criteria as integers: $maximize = 1$ and $minimize = -1$.\n", + "- `weights`: Weights of the criteria as numpy array.\n", + "- `dtypes`: Data types of the criteria as numpy array.\n", + "- `alternatives`: Names of the alternatives as numpy array.\n", + "- `criteria`: Names of the criteria as numpy array.\n", "\n", - "**If any parameter is forgotten and `**kwargs` is not present, a [`TypeError`](https://docs.python.org/3/library/exceptions.html#TypeError) is raised.**\n", + "Additionally, if you do not want to use any of those parameters of the matrix, you can declare the function with [Variable Keyword Arguments (**kwargs)](https://www.w3schools.com/python/gloss_python_function_arbitrary_keyword_arguments.asp).\n", "\n", + "If any parameter is forgotten and `**kwargs` is not present, a [TypeError](https://docs.python.org/3/library/exceptions.html#TypeError) is raised.\n", "\n", "So this next two functions are a valid Aggregation functions" ] @@ -330,27 +330,27 @@ "
\n", "\n", - "\n", + "
\n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
AlternativesPEJNAAFXMMGNPEJNAAFXMMGN
Rank111111Rank111111
\n", @@ -615,27 +615,27 @@ "
\n", "\n", - "\n", + "
\n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
AlternativesPEJNAAFXMMGNPEJNAAFXMMGN
Rank352641Rank352641
\n", @@ -697,27 +697,27 @@ "
\n", "\n", - "\n", + "
\n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
AlternativesPEJNAAFXMMGNPEJNAAFXMMGN
Rank243651Rank243651
\n", @@ -776,7 +776,789 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## 3. A New Transformer" + "## 3. A New Transformer\n", + "\n", + "The only difference between creating a new aggregator and a transformer lies in the type of data returned by the decorated function. Everything else is exactly the same (received parameters, function names, and functionality of hyperparameters).\n", + "\n", + "The decorated function must return a dictionary that can have the same keys as the parameters received by the function except for `hparam`: `matrix`, `objectives`, `weights`, `dtypes`, `alternatives`, or `criteria`; and whose values must be the new values with which to replace the original ones in the transformation matrix.\n", + "\n", + "It is not necessary to return all values; only the ones that you want to change.\n", + "\n", + "\n", + "For example, if we want to create a transformer `StrFormat` that converts the text of the names of each criterion and alternative using the methods of `str`, and by default, it converts texts to lowercase.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "]>" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "@mktransformer(operation=str.lower)\n", + "def StrFormat(alternatives, criteria, hparams, **kwargs):\n", + " \"\"\"Applies a string formatting operation (lowercasing by default) to alternatives and criteria.\"\"\"\n", + " # Apply the string formatting operation to each alternative\n", + " new_alternatives = [hparams.operation(a) for a in alternatives]\n", + "\n", + " # Apply the string formatting operation to each criterion\n", + " new_criteria = [hparams.operation(c) for c in criteria]\n", + "\n", + " # Return the transformed alternatives and criteria in a dictionary\n", + " return {\"alternatives\": new_alternatives, \"criteria\": new_criteria}\n", + " \n", + "trans = StrFormat()\n", + "trans" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
roe[▲ 2.0]cap[▲ 4.0]ri[▲ 1.0]
pe750.028571
jn540.038462
aa560.035714
fx340.027778
mm170.033333
gn580.033333
\n", + "
6 Alternatives x 3 Criteria\n", + "
" + ], + "text/plain": [ + " roe[▲ 2.0] cap[▲ 4.0] ri[▲ 1.0]\n", + "pe 7 5 0.028571\n", + "jn 5 4 0.038462\n", + "aa 5 6 0.035714\n", + "fx 3 4 0.027778\n", + "mm 1 7 0.033333\n", + "gn 5 8 0.033333\n", + "[6 Alternatives x 3 Criteria]" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "trans.transform(dm)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can use any function provided by `str`." + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "]>" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "trans = StrFormat(operation=str.capitalize)\n", + "trans" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Roe[▲ 2.0]Cap[▲ 4.0]Ri[▲ 1.0]
Pe750.028571
Jn540.038462
Aa560.035714
Fx340.027778
Mm170.033333
Gn580.033333
\n", + "
6 Alternatives x 3 Criteria\n", + "
" + ], + "text/plain": [ + " Roe[▲ 2.0] Cap[▲ 4.0] Ri[▲ 1.0]\n", + "Pe 7 5 0.028571\n", + "Jn 5 4 0.038462\n", + "Aa 5 6 0.035714\n", + "Fx 3 4 0.027778\n", + "Mm 1 7 0.033333\n", + "Gn 5 8 0.033333\n", + "[6 Alternatives x 3 Criteria]" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "trans.transform(dm)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In fact, given our implementation, any arbitrary function that converts text can be used. For example, if we want to create our own function that adds exclamation marks to the end of each criterion and alternative." + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "]>" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def add_exclamation(text):\n", + " return text + \" !! \"\n", + "\n", + "trans = StrFormat(operation=add_exclamation)\n", + "trans" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ROE !! [▲ 2.0]CAP !! [▲ 4.0]RI !! [▲ 1.0]
PE !!750.028571
JN !!540.038462
AA !!560.035714
FX !!340.027778
MM !!170.033333
GN !!580.033333
\n", + "
6 Alternatives x 3 Criteria\n", + "
" + ], + "text/plain": [ + " ROE !! [▲ 2.0] CAP !! [▲ 4.0] RI !! [▲ 1.0]\n", + "PE !! 7 5 0.028571\n", + "JN !! 5 4 0.038462\n", + "AA !! 5 6 0.035714\n", + "FX !! 3 4 0.027778\n", + "MM !! 1 7 0.033333\n", + "GN !! 5 8 0.033333\n", + "[6 Alternatives x 3 Criteria]" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "trans.transform(dm)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3.1 Special considerations regarding `dtypes`\n", + "\n", + "\n", + "By design decision, scikitcriteria always attempts to **always** preserve the original data types, unless it needs to infer them again.\n", + "\n", + "This may not seem important to a user at first glance, so let's use an example of a transformer affected by this characteristic.\n", + "\n", + "First, let's reload the original decision matrix, where the values of all criteria are `int`.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ROE[▲ 2.0]CAP[▲ 4.0]RI[▼ 1.0]
PE7535
JN5426
AA5628
FX3436
MM1730
GN5830
\n", + "
6 Alternatives x 3 Criteria\n", + "
" + ], + "text/plain": [ + " ROE[▲ 2.0] CAP[▲ 4.0] RI[▼ 1.0]\n", + "PE 7 5 35\n", + "JN 5 4 26\n", + "AA 5 6 28\n", + "FX 3 4 36\n", + "MM 1 7 30\n", + "GN 5 8 30\n", + "[6 Alternatives x 3 Criteria]" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dm = skc.datasets.load_simple_stock_selection()\n", + "dm" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, let's create a transformer that converts all criteria to the `float` type." + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "@mktransformer\n", + "def AsFloat(matrix, **kwargs):\n", + " \"\"\"Converts the elements of a decision-matrix to floating-point numbers.\"\"\"\n", + " # Convert the elements of the matrix to floating-point numbers\n", + " new_matrix = matrix.astype(float)\n", + " \n", + " # Return the transformed matrix in a dictionary\n", + " return {\"matrix\": new_matrix}\n", + "\n", + "trans = AsFloat()\n", + "trans\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, let's test its functionality.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ROE[▲ 2.0]CAP[▲ 4.0]RI[▼ 1.0]
PE7535
JN5426
AA5628
FX3436
MM1730
GN5830
\n", + "
6 Alternatives x 3 Criteria\n", + "
" + ], + "text/plain": [ + " ROE[▲ 2.0] CAP[▲ 4.0] RI[▼ 1.0]\n", + "PE 7 5 35\n", + "JN 5 4 26\n", + "AA 5 6 28\n", + "FX 3 4 36\n", + "MM 1 7 30\n", + "GN 5 8 30\n", + "[6 Alternatives x 3 Criteria]" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "trans.transform(dm)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As can be seen, the numbers are still integers. This is because the `dtypes` parameter of the matrix indicates that those columns are indeed integers.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "ROE int64\n", + "CAP int64\n", + "RI int64\n", + "dtype: object" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dm.dtypes # check the dtypes" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The simplest solution would be to ensure that the dtypes are inferred again based on the values of the new matrix. This is achieved by assigning the dtype values to None.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ROE[▲ 2.0]CAP[▲ 4.0]RI[▼ 1.0]
PE7.05.035.0
JN5.04.026.0
AA5.06.028.0
FX3.04.036.0
MM1.07.030.0
GN5.08.030.0
\n", + "
6 Alternatives x 3 Criteria\n", + "
" + ], + "text/plain": [ + " ROE[▲ 2.0] CAP[▲ 4.0] RI[▼ 1.0]\n", + "PE 7.0 5.0 35.0\n", + "JN 5.0 4.0 26.0\n", + "AA 5.0 6.0 28.0\n", + "FX 3.0 4.0 36.0\n", + "MM 1.0 7.0 30.0\n", + "GN 5.0 8.0 30.0\n", + "[6 Alternatives x 3 Criteria]" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "@mktransformer\n", + "def AsFloat(matrix, **kwargs):\n", + " \"\"\"Converts the elements of a decision-matrix to floating-point numbers.\"\"\"\n", + " # Convert the elements of the matrix to floating-point numbers\n", + " new_matrix = matrix.astype(float)\n", + " \n", + " # Return the transformed matrix in a dictionary\n", + " # and assign the dtypes as None\n", + " return {\"matrix\": new_matrix, \"dtypes\": None}\n", + "\n", + "trans = AsFloat()\n", + "trans.transform(dm)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "While this may seem somewhat inconvenient, it gives the user complete control over the data types of the matrix without assuming default behaviors that may be undesirable.\n", + "\n", + "It's essential to consider that the original `dtypes` are also received by the transformer (in our case, they are inside `**kwargs`) and can be used to determine the new types." ] } ], diff --git a/docs/source/tutorial/index.rst b/docs/source/tutorial/index.rst index a279303..6ff3b13 100644 --- a/docs/source/tutorial/index.rst +++ b/docs/source/tutorial/index.rst @@ -67,4 +67,5 @@ to different scientific areas. Python quickly, we recommend `Dive Into Python `_. If that's not quite your style, there are many other `books about Python `_. - At last if you already know Python but check the `Scipy Lecture Notes `_ + At last, if you're already familiar with Python and eager to explore the scientific stack further, be sure to check out + the `Scipy Lecture Notes `_ diff --git a/skcriteria/agg/_agg_base.py b/skcriteria/agg/_agg_base.py index efa70e2..7eeaf65 100644 --- a/skcriteria/agg/_agg_base.py +++ b/skcriteria/agg/_agg_base.py @@ -276,7 +276,6 @@ class RankResult(ResultABC): @doc_inherit(ResultABC._validate_result) def _validate_result(self, values): - # the sorted unique values of the rank! # [1, 1, 1, 2, 3] >>> [1, 2, 3] <<< OK! this is consecutive # [1, 1, 4, 4, 3] >>> [1, 3, 4] <<< BAD this is not consecutive diff --git a/skcriteria/utils/bunch.py b/skcriteria/utils/bunch.py index bf4e3be..3e1da1a 100644 --- a/skcriteria/utils/bunch.py +++ b/skcriteria/utils/bunch.py @@ -54,7 +54,7 @@ class Bunch(Mapping): def __init__(self, name, data): if not isinstance(data, Mapping): - raise TypeError(f"Data must be some kind of mapping") + raise TypeError("Data must be some kind of mapping") self._name = str(name) self._data = data diff --git a/tests/utils/test_bunch.py b/tests/utils/test_bunch.py index e9c570d..9e97286 100644 --- a/tests/utils/test_bunch.py +++ b/tests/utils/test_bunch.py @@ -86,3 +86,8 @@ def test_Bunch_copy(): assert md is not md_c assert md._name == md_c._name assert md._data == md_c._data and md._data is md_c._data + + +def test_Bunch_data_is_not_a_mapping(): + with pytest.raises(TypeError, match="Data must be some kind of mapping"): + bunch.Bunch("foo", None)