From f16ba084a0456e7108ab9c459eac595ad7187aaf Mon Sep 17 00:00:00 2001
From: Matthias Feurer <feurerm@informatik.uni-freiburg.de>
Date: Mon, 31 May 2021 11:30:52 +0200
Subject: [PATCH 01/53] minor fixes to usage.rst (#1090)

---
 doc/usage.rst | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/doc/usage.rst b/doc/usage.rst
index dd85d989c..8c713b586 100644
--- a/doc/usage.rst
+++ b/doc/usage.rst
@@ -29,15 +29,18 @@ machine learning algorithms on them and then share the results online.
 The following tutorial gives a short introduction on how to install and set up
 the OpenML Python connector, followed up by a simple example.
 
-* `:ref:`sphx_glr_examples_20_basic_introduction_tutorial.py`
+* :ref:`sphx_glr_examples_20_basic_introduction_tutorial.py`
 
 ~~~~~~~~~~~~~
 Configuration
 ~~~~~~~~~~~~~
 
-The configuration file resides in a directory ``.openml`` in the home
-directory of the user and is called config. It consists of ``key = value`` pairs
-which are separated by newlines. The following keys are defined:
+The configuration file resides in a directory ``.config/openml`` in the home
+directory of the user and is called config (More specifically, it resides in the
+`configuration directory specified by the XDGB Base Directory Specification
+<https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html>`_).
+It consists of ``key = value`` pairs which are separated by newlines.
+The following keys are defined:
 
 * apikey:
     * required to access the server. The :ref:`sphx_glr_examples_20_basic_introduction_tutorial.py`

From 6717e66a1e967a131a6ca7feb96f6d166017bed7 Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Wed, 16 Jun 2021 08:54:44 +0200
Subject: [PATCH 02/53] Add Windows to Github Action CI matrix (#1095)

* Add Windows to Github Action CI matrix

* Fix syntax, disable Ubuntu tests

Ubuntu tests only temporarily disabled for this PR, to avoid
unnecessary computational costs/time.

* Fix syntax for skip on install Python step

* Explicitly add the OS to includes

* Disable check for files left behind for Windows

The check is bash script, which means it fails on a Windows machine.

* Re-enable Ubuntu tests

* Replace Appveyor with Github Actions for WindowsCI
---
 .../workflows/{ubuntu-test.yml => test.yml}   | 25 ++++--
 appveyor.yml                                  | 48 ----------
 appveyor/run_with_env.cmd                     | 88 -------------------
 doc/progress.rst                              |  6 ++
 tests/test_runs/test_run_functions.py         |  5 +-
 5 files changed, 26 insertions(+), 146 deletions(-)
 rename .github/workflows/{ubuntu-test.yml => test.yml} (72%)
 delete mode 100644 appveyor.yml
 delete mode 100644 appveyor/run_with_env.cmd

diff --git a/.github/workflows/ubuntu-test.yml b/.github/workflows/test.yml
similarity index 72%
rename from .github/workflows/ubuntu-test.yml
rename to .github/workflows/test.yml
index 41cc155ac..059aec58d 100644
--- a/.github/workflows/ubuntu-test.yml
+++ b/.github/workflows/test.yml
@@ -3,13 +3,14 @@ name: Tests
 on: [push, pull_request]
 
 jobs:
-  ubuntu:
-
-    runs-on: ubuntu-latest
+  test:
+    name: (${{ matrix.os }}, Py${{ matrix.python-version }}, sk${{ matrix.scikit-learn }})
+    runs-on: ${{ matrix.os }}
     strategy:
       matrix:
         python-version: [3.6, 3.7, 3.8]
         scikit-learn: [0.21.2, 0.22.2, 0.23.1, 0.24]
+        os: [ubuntu-latest]
         exclude:  # no scikit-learn 0.21.2 release for Python 3.8
           - python-version: 3.8
             scikit-learn: 0.21.2
@@ -17,13 +18,19 @@ jobs:
           - python-version: 3.6
             scikit-learn: 0.18.2
             scipy: 1.2.0
+            os: ubuntu-latest
           - python-version: 3.6
             scikit-learn: 0.19.2
+            os: ubuntu-latest
           - python-version: 3.6
             scikit-learn: 0.20.2
+            os: ubuntu-latest
           - python-version: 3.8
             scikit-learn: 0.23.1
             code-cov: true
+            os: ubuntu-latest
+          - os: windows-latest
+            scikit-learn: 0.24.*
       fail-fast:  false
       max-parallel: 4
 
@@ -32,6 +39,7 @@ jobs:
       with:
         fetch-depth: 2
     - name: Setup Python ${{ matrix.python-version }}
+      if: matrix.os != 'windows-latest'  # windows-latest only uses preinstalled Python (3.7.9)
       uses: actions/setup-python@v2
       with:
         python-version: ${{ matrix.python-version }}
@@ -50,12 +58,17 @@ jobs:
       id: status-before
       run: |
         echo "::set-output name=BEFORE::$(git status --porcelain -b)"
-    - name: Run tests
+    - name: Run tests on Ubuntu
+      if: matrix.os == 'ubuntu-latest'
       run: |
         if [ ${{ matrix.code-cov }} ]; then codecov='--cov=openml --long  --cov-report=xml'; fi
         pytest -n 4 --durations=20 --timeout=600 --timeout-method=thread --dist load -sv $codecov --reruns 5 --reruns-delay 1
+    - name: Run tests on Windows
+      if: matrix.os == 'windows-latest'
+      run: |  # we need a separate step because of the bash-specific if-statement in the previous one.
+        pytest -n 4 --durations=20 --timeout=600 --timeout-method=thread --dist load -sv --reruns 5 --reruns-delay 1
     - name: Check for files left behind by test
-      if: ${{ always() }}
+      if: matrix.os != 'windows-latest' && always()
       run: |
         before="${{ steps.status-before.outputs.BEFORE }}"
         after="$(git status --porcelain -b)"
@@ -71,4 +84,4 @@ jobs:
       with:
         files: coverage.xml
         fail_ci_if_error: true
-        verbose: true
\ No newline at end of file
+        verbose: true
diff --git a/appveyor.yml b/appveyor.yml
deleted file mode 100644
index e3fa74aaf..000000000
--- a/appveyor.yml
+++ /dev/null
@@ -1,48 +0,0 @@
-clone_folder: C:\\projects\\openml-python
-
-environment:
-# global:
-#     CMD_IN_ENV: "cmd /E:ON /V:ON /C .\\appveyor\\scikit-learn-contrib\\run_with_env.cmd"
-
- matrix:
-    - PYTHON: "C:\\Python3-x64"
-      PYTHON_VERSION: "3.6"
-      PYTHON_ARCH: "64"
-      MINICONDA: "C:\\Miniconda36-x64"
-
-matrix:
-    fast_finish: true
-
-
-install:
-  # Miniconda is pre-installed in the worker build
-  - "SET PATH=%MINICONDA%;%MINICONDA%\\Scripts;%PATH%"
-  - "python -m pip install -U pip"
-
-  # Check that we have the expected version and architecture for Python
-  - "python --version"
-  - "python -c \"import struct; print(struct.calcsize('P') * 8)\""
-  - "pip --version"
-
-  # Remove cygwin because it clashes with conda
-  # see http://help.appveyor.com/discussions/problems/3712-git-remote-https-seems-to-be-broken
-  - rmdir C:\\cygwin /s /q
-
-  # Update previous packages and install the build and runtime dependencies of the project.
-  - conda update conda --yes
-  - conda update --all --yes
-
-  # Install the build and runtime dependencies of the project.
-  - "cd C:\\projects\\openml-python"
-  - "pip install .[examples,test]"
-  - "pip install scikit-learn==0.21"
-  # Uninstall coverage, as it leads to an error on appveyor
-  - "pip uninstall -y pytest-cov"
-
-
-# Not a .NET project, we build scikit-learn in the install step instead
-build: false
-
-test_script:
-  - "cd C:\\projects\\openml-python"
-  - "%CMD_IN_ENV% pytest -n 4 --timeout=600 --timeout-method=thread --dist load -sv"
diff --git a/appveyor/run_with_env.cmd b/appveyor/run_with_env.cmd
deleted file mode 100644
index 5da547c49..000000000
--- a/appveyor/run_with_env.cmd
+++ /dev/null
@@ -1,88 +0,0 @@
-:: To build extensions for 64 bit Python 3, we need to configure environment
-:: variables to use the MSVC 2010 C++ compilers from GRMSDKX_EN_DVD.iso of:
-:: MS Windows SDK for Windows 7 and .NET Framework 4 (SDK v7.1)
-::
-:: To build extensions for 64 bit Python 2, we need to configure environment
-:: variables to use the MSVC 2008 C++ compilers from GRMSDKX_EN_DVD.iso of:
-:: MS Windows SDK for Windows 7 and .NET Framework 3.5 (SDK v7.0)
-::
-:: 32 bit builds, and 64-bit builds for 3.5 and beyond, do not require specific
-:: environment configurations.
-::
-:: Note: this script needs to be run with the /E:ON and /V:ON flags for the
-:: cmd interpreter, at least for (SDK v7.0)
-::
-:: More details at:
-:: https://github.com/cython/cython/wiki/64BitCythonExtensionsOnWindows
-:: http://stackoverflow.com/a/13751649/163740
-::
-:: Author: Olivier Grisel
-:: License: CC0 1.0 Universal: http://creativecommons.org/publicdomain/zero/1.0/
-::
-:: Notes about batch files for Python people:
-::
-:: Quotes in values are literally part of the values:
-::      SET FOO="bar"
-:: FOO is now five characters long: " b a r "
-:: If you don't want quotes, don't include them on the right-hand side.
-::
-:: The CALL lines at the end of this file look redundant, but if you move them
-:: outside of the IF clauses, they do not run properly in the SET_SDK_64==Y
-:: case, I don't know why.
-@ECHO OFF
-
-SET COMMAND_TO_RUN=%*
-SET WIN_SDK_ROOT=C:\Program Files\Microsoft SDKs\Windows
-SET WIN_WDK=c:\Program Files (x86)\Windows Kits\10\Include\wdf
-
-:: Extract the major and minor versions, and allow for the minor version to be
-:: more than 9.  This requires the version number to have two dots in it.
-SET MAJOR_PYTHON_VERSION=%PYTHON_VERSION:~0,1%
-IF "%PYTHON_VERSION:~3,1%" == "." (
-    SET MINOR_PYTHON_VERSION=%PYTHON_VERSION:~2,1%
-) ELSE (
-    SET MINOR_PYTHON_VERSION=%PYTHON_VERSION:~2,2%
-)
-
-:: Based on the Python version, determine what SDK version to use, and whether
-:: to set the SDK for 64-bit.
-IF %MAJOR_PYTHON_VERSION% == 2 (
-    SET WINDOWS_SDK_VERSION="v7.0"
-    SET SET_SDK_64=Y
-) ELSE (
-    IF %MAJOR_PYTHON_VERSION% == 3 (
-        SET WINDOWS_SDK_VERSION="v7.1"
-        IF %MINOR_PYTHON_VERSION% LEQ 4 (
-            SET SET_SDK_64=Y
-        ) ELSE (
-            SET SET_SDK_64=N
-            IF EXIST "%WIN_WDK%" (
-                :: See: https://connect.microsoft.com/VisualStudio/feedback/details/1610302/
-                REN "%WIN_WDK%" 0wdf
-            )
-        )
-    ) ELSE (
-        ECHO Unsupported Python version: "%MAJOR_PYTHON_VERSION%"
-        EXIT 1
-    )
-)
-
-IF %PYTHON_ARCH% == 64 (
-    IF %SET_SDK_64% == Y (
-        ECHO Configuring Windows SDK %WINDOWS_SDK_VERSION% for Python %MAJOR_PYTHON_VERSION% on a 64 bit architecture
-        SET DISTUTILS_USE_SDK=1
-        SET MSSdk=1
-        "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Setup\WindowsSdkVer.exe" -q -version:%WINDOWS_SDK_VERSION%
-        "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Bin\SetEnv.cmd" /x64 /release
-        ECHO Executing: %COMMAND_TO_RUN%
-        call %COMMAND_TO_RUN% || EXIT 1
-    ) ELSE (
-        ECHO Using default MSVC build environment for 64 bit architecture
-        ECHO Executing: %COMMAND_TO_RUN%
-        call %COMMAND_TO_RUN% || EXIT 1
-    )
-) ELSE (
-    ECHO Using default MSVC build environment for 32 bit architecture
-    ECHO Executing: %COMMAND_TO_RUN%
-    call %COMMAND_TO_RUN% || EXIT 1
-)
diff --git a/doc/progress.rst b/doc/progress.rst
index b0c182e05..937c60eb2 100644
--- a/doc/progress.rst
+++ b/doc/progress.rst
@@ -6,6 +6,12 @@
 Changelog
 =========
 
+0.13.0
+~~~~~~
+
+ * MAIN#1088: Do CI for Windows on Github Actions instead of Appveyor.
+
+
 0.12.2
 ~~~~~~
 
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index c8f1729b7..b02b18880 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -366,10 +366,7 @@ def _check_sample_evaluations(
                             evaluation = sample_evaluations[measure][rep][fold][sample]
                             self.assertIsInstance(evaluation, float)
                             if not (os.environ.get("CI_WINDOWS") or os.name == "nt"):
-                                # Either Appveyor is much faster than Travis
-                                # and/or measurements are not as accurate.
-                                # Either way, windows seems to get an eval-time
-                                # of 0 sometimes.
+                                # Windows seems to get an eval-time of 0 sometimes.
                                 self.assertGreater(evaluation, 0)
                             self.assertLess(evaluation, max_time_allowed)
 

From 29844033ec46a6bb578e9c2c5786da12131b4caa Mon Sep 17 00:00:00 2001
From: Pieter Gijsbers <p.gijsbers@tue.nl>
Date: Wed, 27 Oct 2021 14:38:55 +0200
Subject: [PATCH 03/53] Add ChunkedError to list of retry exception (#1118)

Since it can stem from connectivity issues and it might not occur on a
retry.
---
 openml/_api_calls.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/openml/_api_calls.py b/openml/_api_calls.py
index b5ed976bc..12b283738 100644
--- a/openml/_api_calls.py
+++ b/openml/_api_calls.py
@@ -242,6 +242,7 @@ def _send_request(request_method, url, data, files=None, md5_checksum=None):
                     )
                 break
             except (
+                requests.exceptions.ChunkedEncodingError,
                 requests.exceptions.ConnectionError,
                 requests.exceptions.SSLError,
                 OpenMLServerException,

From a6c057630658c04e18c4d48670f9a89dd304b5b5 Mon Sep 17 00:00:00 2001
From: Pieter Gijsbers <p.gijsbers@tue.nl>
Date: Wed, 27 Oct 2021 15:11:35 +0200
Subject: [PATCH 04/53] Always ignore MaxRetryError but log with warning
 (#1119)

Currently parquet files are completely optional, so under no
circumstance should the inability to download it raise an error to the
user. Instead we log a warning and proceed without the parquet file.
---
 openml/datasets/functions.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
index 34156eff7..d92d7d515 100644
--- a/openml/datasets/functions.py
+++ b/openml/datasets/functions.py
@@ -428,10 +428,7 @@ def get_dataset(
 
         arff_file = _get_dataset_arff(description) if download_data else None
         if "oml:minio_url" in description and download_data:
-            try:
-                parquet_file = _get_dataset_parquet(description)
-            except urllib3.exceptions.MaxRetryError:
-                parquet_file = None
+            parquet_file = _get_dataset_parquet(description)
         else:
             parquet_file = None
         remove_dataset_cache = False
@@ -1003,7 +1000,8 @@ def _get_dataset_parquet(
             openml._api_calls._download_minio_file(
                 source=cast(str, url), destination=output_file_path
             )
-        except FileNotFoundError:
+        except (FileNotFoundError, urllib3.exceptions.MaxRetryError) as e:
+            logger.warning("Could not download file from %s: %s" % (cast(str, url), e))
             return None
     return output_file_path
 

From b4c868a791f3fd08c5dc28c2f22d5ac9afd9e643 Mon Sep 17 00:00:00 2001
From: Pieter Gijsbers <p.gijsbers@tue.nl>
Date: Thu, 28 Oct 2021 09:49:44 +0200
Subject: [PATCH 05/53] Fix/1110 (#1117)

Update function signatures for create_study|suite and allow for empty studies (i.e. with no runs).
---
 doc/progress.rst                         |  2 +-
 openml/study/functions.py                | 65 +++++++++---------------
 tests/test_study/test_study_functions.py | 28 +++++++++-
 3 files changed, 53 insertions(+), 42 deletions(-)

diff --git a/doc/progress.rst b/doc/progress.rst
index 937c60eb2..401550a4d 100644
--- a/doc/progress.rst
+++ b/doc/progress.rst
@@ -8,7 +8,7 @@ Changelog
 
 0.13.0
 ~~~~~~
-
+ * FIX#1110: Make arguments to ``create_study`` and ``create_suite`` that are defined as optional by the OpenML XSD actually optional.
  * MAIN#1088: Do CI for Windows on Github Actions instead of Appveyor.
 
 
diff --git a/openml/study/functions.py b/openml/study/functions.py
index ee877ddf2..144c089b3 100644
--- a/openml/study/functions.py
+++ b/openml/study/functions.py
@@ -3,7 +3,6 @@
 from typing import cast, Dict, List, Optional, Union
 import warnings
 
-import dateutil.parser
 import xmltodict
 import pandas as pd
 
@@ -94,7 +93,6 @@ def _get_study(id_: Union[int, str], entity_type) -> BaseStudy:
     description = result_dict["oml:description"]
     status = result_dict["oml:status"]
     creation_date = result_dict["oml:creation_date"]
-    creation_date_as_date = dateutil.parser.parse(creation_date)
     creator = result_dict["oml:creator"]
 
     # tags is legacy. remove once no longer needed.
@@ -106,35 +104,18 @@ def _get_study(id_: Union[int, str], entity_type) -> BaseStudy:
                 current_tag["window_start"] = tag["oml:window_start"]
             tags.append(current_tag)
 
-    if "oml:data" in result_dict:
-        datasets = [int(x) for x in result_dict["oml:data"]["oml:data_id"]]
-    else:
-        raise ValueError("No datasets attached to study {}!".format(id_))
-    if "oml:tasks" in result_dict:
-        tasks = [int(x) for x in result_dict["oml:tasks"]["oml:task_id"]]
-    else:
-        raise ValueError("No tasks attached to study {}!".format(id_))
+    def get_nested_ids_from_result_dict(key: str, subkey: str) -> Optional[List]:
+        if result_dict.get(key) is not None:
+            return [int(oml_id) for oml_id in result_dict[key][subkey]]
+        return None
 
-    if main_entity_type in ["runs", "run"]:
+    datasets = get_nested_ids_from_result_dict("oml:data", "oml:data_id")
+    tasks = get_nested_ids_from_result_dict("oml:tasks", "oml:task_id")
 
-        if "oml:flows" in result_dict:
-            flows = [int(x) for x in result_dict["oml:flows"]["oml:flow_id"]]
-        else:
-            raise ValueError("No flows attached to study {}!".format(id_))
-        if "oml:setups" in result_dict:
-            setups = [int(x) for x in result_dict["oml:setups"]["oml:setup_id"]]
-        else:
-            raise ValueError("No setups attached to study {}!".format(id_))
-        if "oml:runs" in result_dict:
-            runs = [
-                int(x) for x in result_dict["oml:runs"]["oml:run_id"]
-            ]  # type: Optional[List[int]]
-        else:
-            if creation_date_as_date < dateutil.parser.parse("2019-01-01"):
-                # Legacy studies did not require runs
-                runs = None
-            else:
-                raise ValueError("No runs attached to study {}!".format(id_))
+    if main_entity_type in ["runs", "run"]:
+        flows = get_nested_ids_from_result_dict("oml:flows", "oml:flow_id")
+        setups = get_nested_ids_from_result_dict("oml:setups", "oml:setup_id")
+        runs = get_nested_ids_from_result_dict("oml:runs", "oml:run_id")
 
         study = OpenMLStudy(
             study_id=study_id,
@@ -177,9 +158,9 @@ def _get_study(id_: Union[int, str], entity_type) -> BaseStudy:
 def create_study(
     name: str,
     description: str,
-    run_ids: List[int],
-    alias: Optional[str],
-    benchmark_suite: Optional[int],
+    run_ids: Optional[List[int]] = None,
+    alias: Optional[str] = None,
+    benchmark_suite: Optional[int] = None,
 ) -> OpenMLStudy:
     """
     Creates an OpenML study (collection of data, tasks, flows, setups and run),
@@ -188,16 +169,19 @@ def create_study(
 
     Parameters
     ----------
-    alias : str (optional)
-        a string ID, unique on server (url-friendly)
     benchmark_suite : int (optional)
         the benchmark suite (another study) upon which this study is ran.
     name : str
         the name of the study (meta-info)
     description : str
         brief description (meta-info)
-    run_ids : list
-        a list of run ids associated with this study
+    run_ids : list, optional
+        a list of run ids associated with this study,
+        these can also be added later with ``attach_to_study``.
+    alias : str (optional)
+        a string ID, unique on server (url-friendly)
+    benchmark_suite: int (optional)
+        the ID of the suite for which this study contains run results
 
     Returns
     -------
@@ -217,13 +201,13 @@ def create_study(
         data=None,
         tasks=None,
         flows=None,
-        runs=run_ids,
+        runs=run_ids if run_ids != [] else None,
         setups=None,
     )
 
 
 def create_benchmark_suite(
-    name: str, description: str, task_ids: List[int], alias: Optional[str],
+    name: str, description: str, task_ids: List[int], alias: Optional[str] = None,
 ) -> OpenMLBenchmarkSuite:
     """
     Creates an OpenML benchmark suite (collection of entity types, where
@@ -231,14 +215,15 @@ def create_benchmark_suite(
 
     Parameters
     ----------
-    alias : str (optional)
-        a string ID, unique on server (url-friendly)
     name : str
         the name of the study (meta-info)
     description : str
         brief description (meta-info)
     task_ids : list
         a list of task ids associated with this study
+        more can be added later with ``attach_to_suite``.
+    alias : str (optional)
+        a string ID, unique on server (url-friendly)
 
     Returns
     -------
diff --git a/tests/test_study/test_study_functions.py b/tests/test_study/test_study_functions.py
index e028ba2bd..904df4d3a 100644
--- a/tests/test_study/test_study_functions.py
+++ b/tests/test_study/test_study_functions.py
@@ -1,4 +1,5 @@
 # License: BSD 3-Clause
+from typing import Optional, List
 
 import openml
 import openml.study
@@ -114,6 +115,31 @@ def test_publish_benchmark_suite(self):
         self.assertEqual(study_downloaded.status, "deactivated")
         # can't delete study, now it's not longer in preparation
 
+    def _test_publish_empty_study_is_allowed(self, explicit: bool):
+        runs: Optional[List[int]] = [] if explicit else None
+        kind = "explicit" if explicit else "implicit"
+
+        study = openml.study.create_study(
+            name=f"empty-study-{kind}",
+            description=f"a study with no runs attached {kind}ly",
+            run_ids=runs,
+        )
+
+        study.publish()
+        TestBase._mark_entity_for_removal("study", study.id)
+        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], study.id))
+
+        self.assertGreater(study.id, 0)
+        study_downloaded = openml.study.get_study(study.id)
+        self.assertEqual(study_downloaded.main_entity_type, "run")
+        self.assertIsNone(study_downloaded.runs)
+
+    def test_publish_empty_study_explicit(self):
+        self._test_publish_empty_study_is_allowed(explicit=True)
+
+    def test_publish_empty_study_implicit(self):
+        self._test_publish_empty_study_is_allowed(explicit=False)
+
     @pytest.mark.flaky()
     def test_publish_study(self):
         # get some random runs to attach
@@ -214,7 +240,7 @@ def test_study_attach_illegal(self):
 
     def test_study_list(self):
         study_list = openml.study.list_studies(status="in_preparation")
-        # might fail if server is recently resetted
+        # might fail if server is recently reset
         self.assertGreaterEqual(len(study_list), 2)
 
     def test_study_list_output_format(self):

From aed5010c0ef636bd071ce42c09b03c69c080923f Mon Sep 17 00:00:00 2001
From: Pieter Gijsbers <p.gijsbers@tue.nl>
Date: Wed, 3 Nov 2021 16:47:08 +0100
Subject: [PATCH 06/53] Add AttributeError as suspect for dependency issue
 (#1121)

* Add AttributeError as suspect for dependency issue

Happens for example when loading a 1.3 dataframe with a 1.0 pandas.
---
 openml/datasets/dataset.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/openml/datasets/dataset.py b/openml/datasets/dataset.py
index 122e2e697..8f1ce612b 100644
--- a/openml/datasets/dataset.py
+++ b/openml/datasets/dataset.py
@@ -544,15 +544,23 @@ def _load_data(self):
                     data, categorical, attribute_names = pickle.load(fh)
         except FileNotFoundError:
             raise ValueError(f"Cannot find file for dataset {self.name} at location '{fpath}'.")
-        except (EOFError, ModuleNotFoundError, ValueError) as e:
+        except (EOFError, ModuleNotFoundError, ValueError, AttributeError) as e:
             error_message = e.message if hasattr(e, "message") else e.args[0]
             hint = ""
 
             if isinstance(e, EOFError):
                 readable_error = "Detected a corrupt cache file"
-            elif isinstance(e, ModuleNotFoundError):
+            elif isinstance(e, (ModuleNotFoundError, AttributeError)):
                 readable_error = "Detected likely dependency issues"
-                hint = "This is most likely due to https://github.com/openml/openml-python/issues/918. "  # noqa: 501
+                hint = (
+                    "This can happen if the cache was constructed with a different pandas version "
+                    "than the one that is used to load the data. See also "
+                )
+                if isinstance(e, ModuleNotFoundError):
+                    hint += "https://github.com/openml/openml-python/issues/918. "
+                elif isinstance(e, AttributeError):
+                    hint += "https://github.com/openml/openml-python/pull/1121. "
+
             elif isinstance(e, ValueError) and "unsupported pickle protocol" in e.args[0]:
                 readable_error = "Encountered unsupported pickle protocol"
             else:

From db7bb9ade05ea8877994bf9b516ec8738caa82bd Mon Sep 17 00:00:00 2001
From: Pieter Gijsbers <p.gijsbers@tue.nl>
Date: Tue, 11 Jan 2022 11:30:43 +0100
Subject: [PATCH 07/53] Add CITATION.cff (#1120)

Some ORCIDs are missing because I could not with certainty determine the ORCID of some co-authors.
---
 CITATION.cff | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)
 create mode 100644 CITATION.cff

diff --git a/CITATION.cff b/CITATION.cff
new file mode 100644
index 000000000..c5454ef6f
--- /dev/null
+++ b/CITATION.cff
@@ -0,0 +1,40 @@
+cff-version: 1.2.0
+message: "If you use this software in a publication, please cite the metadata from preferred-citation."
+preferred-citation:
+  type: article
+  authors:
+  - family-names: "Feurer"
+    given-names: "Matthias"
+    orcid: "https://orcid.org/0000-0001-9611-8588"
+  - family-names: "van Rijn"
+    given-names: "Jan N."
+    orcid: "https://orcid.org/0000-0003-2898-2168"
+  - family-names: "Kadra"
+    given-names: "Arlind"
+  - family-names: "Gijsbers"
+    given-names: "Pieter"
+    orcid: "https://orcid.org/0000-0001-7346-8075"
+  - family-names: "Mallik"
+    given-names: "Neeratyoy"
+    orcid: "https://orcid.org/0000-0002-0598-1608"
+  - family-names: "Ravi"
+    given-names: "Sahithya"
+  - family-names: "Müller"
+    given-names: "Andreas"
+    orcid: "https://orcid.org/0000-0002-2349-9428"
+  - family-names: "Vanschoren"
+    given-names: "Joaquin"
+    orcid: "https://orcid.org/0000-0001-7044-9805"
+  - family-names: "Hutter"
+    given-names: "Frank"
+    orcid: "https://orcid.org/0000-0002-2037-3694"
+  journal: "Journal of Machine Learning Research"
+  title: "OpenML-Python: an extensible Python API for OpenML"
+  abstract: "OpenML is an online platform for open science collaboration in machine learning, used to share datasets and results of machine learning experiments. In this paper, we introduce OpenML-Python, a client API for Python, which opens up the OpenML platform for a wide range of Python-based machine learning tools. It provides easy access to all datasets, tasks and experiments on OpenML from within Python. It also provides functionality to conduct machine learning experiments, upload the results to OpenML, and reproduce results which are stored on OpenML. Furthermore, it comes with a scikit-learn extension and an extension mechanism to easily integrate other machine learning libraries written in Python into the OpenML ecosystem. Source code and documentation are available at https://github.com/openml/openml-python/."
+  volume: 22
+  year: 2021
+  start: 1
+  end: 5
+  pages: 5
+  number: 100
+  url: https://jmlr.org/papers/v22/19-920.html

From 493511a297a271e7a356a56d01f11c08a30ffd28 Mon Sep 17 00:00:00 2001
From: Pieter Gijsbers <p.gijsbers@tue.nl>
Date: Thu, 14 Apr 2022 19:17:27 +0200
Subject: [PATCH 08/53] Precommit update (#1129)

* Correctly use regex to specify files

* Add type hint

* Add note of fixing pre-commit hook #1129
---
 .pre-commit-config.yaml   | 8 ++++----
 doc/progress.rst          | 2 ++
 openml/study/functions.py | 2 +-
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index b3a1d2aba..e13aa2fd0 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -9,20 +9,20 @@ repos:
     hooks:
       - id: mypy
         name: mypy openml
-        files: openml/*
+        files: openml/.*
       - id: mypy
         name: mypy tests
-        files: tests/*
+        files: tests/.*
   - repo: https://gitlab.com/pycqa/flake8
     rev: 3.8.3
     hooks:
       - id: flake8
         name: flake8 openml
-        files: openml/*
+        files: openml/.*
         additional_dependencies:
           - flake8-print==3.1.4
       - id: flake8
         name: flake8 tests
-        files: tests/*
+        files: tests/.*
         additional_dependencies:
           - flake8-print==3.1.4
diff --git a/doc/progress.rst b/doc/progress.rst
index 401550a4d..c31976301 100644
--- a/doc/progress.rst
+++ b/doc/progress.rst
@@ -8,10 +8,12 @@ Changelog
 
 0.13.0
 ~~~~~~
+ * FIX#1030: ``pre-commit`` hooks now no longer should issue a warning.
  * FIX#1110: Make arguments to ``create_study`` and ``create_suite`` that are defined as optional by the OpenML XSD actually optional.
  * MAIN#1088: Do CI for Windows on Github Actions instead of Appveyor.
 
 
+
 0.12.2
 ~~~~~~
 
diff --git a/openml/study/functions.py b/openml/study/functions.py
index 144c089b3..26cb9bd55 100644
--- a/openml/study/functions.py
+++ b/openml/study/functions.py
@@ -135,7 +135,7 @@ def get_nested_ids_from_result_dict(key: str, subkey: str) -> Optional[List]:
         )  # type: BaseStudy
 
     elif main_entity_type in ["tasks", "task"]:
-
+        tasks = cast("List[int]", tasks)
         study = OpenMLBenchmarkSuite(
             suite_id=study_id,
             alias=alias,

From 99a62f609766db1d8a27ddc52cb619f920c052d0 Mon Sep 17 00:00:00 2001
From: Pieter Gijsbers <p.gijsbers@tue.nl>
Date: Tue, 19 Apr 2022 20:28:09 +0200
Subject: [PATCH 09/53] Predictions (#1128)

* Add easy way to retrieve run predictions

* Log addition of ``predictions`` (#1103)
---
 doc/progress.rst                      |  2 +-
 openml/runs/run.py                    | 18 ++++++++++++++++++
 tests/test_runs/test_run_functions.py |  1 +
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/doc/progress.rst b/doc/progress.rst
index c31976301..286666767 100644
--- a/doc/progress.rst
+++ b/doc/progress.rst
@@ -11,7 +11,7 @@ Changelog
  * FIX#1030: ``pre-commit`` hooks now no longer should issue a warning.
  * FIX#1110: Make arguments to ``create_study`` and ``create_suite`` that are defined as optional by the OpenML XSD actually optional.
  * MAIN#1088: Do CI for Windows on Github Actions instead of Appveyor.
-
+ * ADD#1103: Add a ``predictions`` property to OpenMLRun for easy accessibility of prediction data.
 
 
 0.12.2
diff --git a/openml/runs/run.py b/openml/runs/run.py
index 4c1c9907d..5c93e9518 100644
--- a/openml/runs/run.py
+++ b/openml/runs/run.py
@@ -8,6 +8,7 @@
 
 import arff
 import numpy as np
+import pandas as pd
 
 import openml
 import openml._api_calls
@@ -116,6 +117,23 @@ def __init__(
         self.predictions_url = predictions_url
         self.description_text = description_text
         self.run_details = run_details
+        self._predictions = None
+
+    @property
+    def predictions(self) -> pd.DataFrame:
+        """ Return a DataFrame with predictions for this run """
+        if self._predictions is None:
+            if self.data_content:
+                arff_dict = self._generate_arff_dict()
+            elif self.predictions_url:
+                arff_text = openml._api_calls._download_text_file(self.predictions_url)
+                arff_dict = arff.loads(arff_text)
+            else:
+                raise RuntimeError("Run has no predictions.")
+            self._predictions = pd.DataFrame(
+                arff_dict["data"], columns=[name for name, _ in arff_dict["attributes"]]
+            )
+        return self._predictions
 
     @property
     def id(self) -> Optional[int]:
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index b02b18880..8eafb0a7b 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -175,6 +175,7 @@ def _rerun_model_and_compare_predictions(self, run_id, model_prime, seed, create
         predictions_prime = run_prime._generate_arff_dict()
 
         self._compare_predictions(predictions, predictions_prime)
+        pd.testing.assert_frame_equal(run.predictions, run_prime.predictions)
 
     def _perform_run(
         self,

From c911d6d3043af7c01bc8f682f400526b422fe5bf Mon Sep 17 00:00:00 2001
From: Pieter Gijsbers <p.gijsbers@tue.nl>
Date: Tue, 28 Jun 2022 09:38:37 +0200
Subject: [PATCH 10/53] Use GET instead of POST for flow exist (#1147)

---
 doc/progress.rst          | 1 +
 openml/flows/functions.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/progress.rst b/doc/progress.rst
index 286666767..02dd78086 100644
--- a/doc/progress.rst
+++ b/doc/progress.rst
@@ -10,6 +10,7 @@ Changelog
 ~~~~~~
  * FIX#1030: ``pre-commit`` hooks now no longer should issue a warning.
  * FIX#1110: Make arguments to ``create_study`` and ``create_suite`` that are defined as optional by the OpenML XSD actually optional.
+ * FIX#1147: ``openml.flow.flow_exists`` no longer requires an API key.
  * MAIN#1088: Do CI for Windows on Github Actions instead of Appveyor.
  * ADD#1103: Add a ``predictions`` property to OpenMLRun for easy accessibility of prediction data.
 
diff --git a/openml/flows/functions.py b/openml/flows/functions.py
index 048fa92a4..28d49b691 100644
--- a/openml/flows/functions.py
+++ b/openml/flows/functions.py
@@ -253,7 +253,7 @@ def flow_exists(name: str, external_version: str) -> Union[int, bool]:
         raise ValueError("Argument 'version' should be a non-empty string")
 
     xml_response = openml._api_calls._perform_api_call(
-        "flow/exists", "post", data={"name": name, "external_version": external_version},
+        "flow/exists", "get", data={"name": name, "external_version": external_version},
     )
 
     result_dict = xmltodict.parse(xml_response)

From c6fab8ea1e71b1cfa18d043b2af676317182a912 Mon Sep 17 00:00:00 2001
From: Pieter Gijsbers <p.gijsbers@tue.nl>
Date: Mon, 11 Jul 2022 10:06:33 +0200
Subject: [PATCH 11/53] pre-commit update (#1150)

* Update to latest versions

* Updated Black formatting

Black was bumped from 19.10b0 to 22.6.0. Changes in the files are
reduced to:
 - No whitespace at the start and end of a docstring.
 - All comma separated "lists" (for example in function calls) are now
   one item per line, regardless if they would fit on one line.

* Update error code for "print"

Changed in flake8-print 5.0.0: https://pypi.org/project/flake8-print/

* Shorten comment to observe line length codestyle

* Install stubs for requests for mypy

* Add dependency for mypy dateutil type stubs

* Resolve mypy warnings

* Add update pre-commit dependencies notice
---
 .flake8                                       |  2 +-
 .pre-commit-config.yaml                       | 16 ++-
 doc/progress.rst                              |  1 +
 examples/30_extended/custom_flow_.py          |  9 +-
 .../30_extended/fetch_runtimes_tutorial.py    | 10 +-
 .../30_extended/flows_and_runs_tutorial.py    |  6 +-
 examples/30_extended/run_setup_tutorial.py    | 12 ++-
 examples/30_extended/study_tutorial.py        |  4 +-
 .../task_manual_iteration_tutorial.py         | 43 ++++++--
 openml/_api_calls.py                          | 55 ++++++++---
 openml/base.py                                | 34 +++----
 openml/cli.py                                 | 16 ++-
 openml/config.py                              | 31 +++---
 openml/datasets/dataset.py                    | 22 ++---
 openml/datasets/functions.py                  | 28 +++---
 openml/evaluations/functions.py               |  2 +-
 openml/exceptions.py                          | 16 +--
 openml/extensions/extension_interface.py      |  8 +-
 openml/extensions/functions.py                |  8 +-
 openml/extensions/sklearn/extension.py        | 64 ++++++++----
 openml/flows/flow.py                          | 10 +-
 openml/flows/functions.py                     | 20 ++--
 openml/runs/functions.py                      | 25 +++--
 openml/runs/run.py                            | 22 +++--
 openml/runs/trace.py                          | 19 +++-
 openml/setups/functions.py                    |  2 +-
 openml/study/functions.py                     | 11 ++-
 openml/study/study.py                         |  6 +-
 openml/tasks/functions.py                     | 14 ++-
 openml/tasks/split.py                         | 10 +-
 openml/tasks/task.py                          | 96 ++++++++++--------
 openml/testing.py                             |  4 +-
 openml/utils.py                               |  2 +-
 setup.py                                      | 10 +-
 tests/conftest.py                             |  2 +-
 tests/test_datasets/test_dataset_functions.py | 37 ++++---
 tests/test_extensions/test_functions.py       |  6 +-
 .../test_sklearn_extension.py                 | 42 +++++---
 tests/test_flows/test_flow.py                 | 13 ++-
 tests/test_flows/test_flow_functions.py       | 14 ++-
 tests/test_openml/test_api_calls.py           |  3 +-
 tests/test_openml/test_config.py              | 12 +--
 tests/test_openml/test_openml.py              | 11 ++-
 tests/test_runs/test_run.py                   | 21 +++-
 tests/test_runs/test_run_functions.py         | 98 ++++++++++++++-----
 tests/test_runs/test_trace.py                 | 11 ++-
 tests/test_setups/test_setup_functions.py     |  4 +-
 tests/test_study/test_study_functions.py      |  6 +-
 tests/test_tasks/test_split.py                | 12 ++-
 tests/test_tasks/test_task_functions.py       | 25 ++++-
 tests/test_utils/test_utils.py                |  3 +-
 51 files changed, 659 insertions(+), 299 deletions(-)

diff --git a/.flake8 b/.flake8
index 211234f22..2d17eec10 100644
--- a/.flake8
+++ b/.flake8
@@ -5,7 +5,7 @@ select = C,E,F,W,B,T
 ignore = E203, E402, W503
 per-file-ignores =
     *__init__.py:F401
-    *cli.py:T001
+    *cli.py:T201
 exclude =
     venv
     examples
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index e13aa2fd0..ebea5251e 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,28 +1,34 @@
 repos:
   - repo: https://github.com/psf/black
-    rev: 19.10b0
+    rev: 22.6.0
     hooks:
       - id: black
         args: [--line-length=100]
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v0.761
+    rev: v0.961
     hooks:
       - id: mypy
         name: mypy openml
         files: openml/.*
+        additional_dependencies:
+          - types-requests
+          - types-python-dateutil
       - id: mypy
         name: mypy tests
         files: tests/.*
+        additional_dependencies:
+          - types-requests
+          - types-python-dateutil
   - repo: https://gitlab.com/pycqa/flake8
-    rev: 3.8.3
+    rev: 4.0.1
     hooks:
       - id: flake8
         name: flake8 openml
         files: openml/.*
         additional_dependencies:
-          - flake8-print==3.1.4
+          - flake8-print==5.0.0
       - id: flake8
         name: flake8 tests
         files: tests/.*
         additional_dependencies:
-          - flake8-print==3.1.4
+          - flake8-print==5.0.0
diff --git a/doc/progress.rst b/doc/progress.rst
index 02dd78086..88b0dd29d 100644
--- a/doc/progress.rst
+++ b/doc/progress.rst
@@ -12,6 +12,7 @@ Changelog
  * FIX#1110: Make arguments to ``create_study`` and ``create_suite`` that are defined as optional by the OpenML XSD actually optional.
  * FIX#1147: ``openml.flow.flow_exists`` no longer requires an API key.
  * MAIN#1088: Do CI for Windows on Github Actions instead of Appveyor.
+ * MAIN#1146: Update the pre-commit dependencies.
  * ADD#1103: Add a ``predictions`` property to OpenMLRun for easy accessibility of prediction data.
 
 
diff --git a/examples/30_extended/custom_flow_.py b/examples/30_extended/custom_flow_.py
index ae5f37631..513d445ba 100644
--- a/examples/30_extended/custom_flow_.py
+++ b/examples/30_extended/custom_flow_.py
@@ -85,7 +85,9 @@
 # but that does not matter for this demonstration.
 
 autosklearn_flow = openml.flows.get_flow(9313)  # auto-sklearn 0.5.1
-subflow = dict(components=OrderedDict(automl_tool=autosklearn_flow),)
+subflow = dict(
+    components=OrderedDict(automl_tool=autosklearn_flow),
+)
 
 ####################################################################################################
 # With all parameters of the flow defined, we can now initialize the OpenMLFlow and publish.
@@ -98,7 +100,10 @@
 # the model of the flow to `None`.
 
 autosklearn_amlb_flow = openml.flows.OpenMLFlow(
-    **general, **flow_hyperparameters, **subflow, model=None,
+    **general,
+    **flow_hyperparameters,
+    **subflow,
+    model=None,
 )
 autosklearn_amlb_flow.publish()
 print(f"autosklearn flow created: {autosklearn_amlb_flow.flow_id}")
diff --git a/examples/30_extended/fetch_runtimes_tutorial.py b/examples/30_extended/fetch_runtimes_tutorial.py
index 3d5183613..535f3607d 100644
--- a/examples/30_extended/fetch_runtimes_tutorial.py
+++ b/examples/30_extended/fetch_runtimes_tutorial.py
@@ -72,7 +72,10 @@
 n_repeats, n_folds, n_samples = task.get_split_dimensions()
 print(
     "Task {}: number of repeats: {}, number of folds: {}, number of samples {}.".format(
-        task_id, n_repeats, n_folds, n_samples,
+        task_id,
+        n_repeats,
+        n_folds,
+        n_samples,
     )
 )
 
@@ -97,7 +100,10 @@ def print_compare_runtimes(measures):
 clf = RandomForestClassifier(n_estimators=10)
 
 run1 = openml.runs.run_model_on_task(
-    model=clf, task=task, upload_flow=False, avoid_duplicate_runs=False,
+    model=clf,
+    task=task,
+    upload_flow=False,
+    avoid_duplicate_runs=False,
 )
 measures = run1.fold_evaluations
 
diff --git a/examples/30_extended/flows_and_runs_tutorial.py b/examples/30_extended/flows_and_runs_tutorial.py
index 714ce7b55..05b8c8cce 100644
--- a/examples/30_extended/flows_and_runs_tutorial.py
+++ b/examples/30_extended/flows_and_runs_tutorial.py
@@ -176,7 +176,11 @@
 
 # The following lines can then be executed offline:
 run = openml.runs.run_model_on_task(
-    pipe, task, avoid_duplicate_runs=False, upload_flow=False, dataset_format="array",
+    pipe,
+    task,
+    avoid_duplicate_runs=False,
+    upload_flow=False,
+    dataset_format="array",
 )
 
 # The run may be stored offline, and the flow will be stored along with it:
diff --git a/examples/30_extended/run_setup_tutorial.py b/examples/30_extended/run_setup_tutorial.py
index 1bb123aad..a2bc3a4df 100644
--- a/examples/30_extended/run_setup_tutorial.py
+++ b/examples/30_extended/run_setup_tutorial.py
@@ -57,10 +57,18 @@
 # easy as you want it to be
 
 
-cat_imp = make_pipeline(OneHotEncoder(handle_unknown="ignore", sparse=False), TruncatedSVD(),)
+cat_imp = make_pipeline(
+    OneHotEncoder(handle_unknown="ignore", sparse=False),
+    TruncatedSVD(),
+)
 cont_imp = SimpleImputer(strategy="median")
 ct = ColumnTransformer([("cat", cat_imp, cat), ("cont", cont_imp, cont)])
-model_original = Pipeline(steps=[("transform", ct), ("estimator", RandomForestClassifier()),])
+model_original = Pipeline(
+    steps=[
+        ("transform", ct),
+        ("estimator", RandomForestClassifier()),
+    ]
+)
 
 # Let's change some hyperparameters. Of course, in any good application we
 # would tune them using, e.g., Random Search or Bayesian Optimization, but for
diff --git a/examples/30_extended/study_tutorial.py b/examples/30_extended/study_tutorial.py
index b66c49096..d5bfcd88a 100644
--- a/examples/30_extended/study_tutorial.py
+++ b/examples/30_extended/study_tutorial.py
@@ -51,7 +51,9 @@
 # And we can use the evaluation listing functionality to learn more about
 # the evaluations available for the conducted runs:
 evaluations = openml.evaluations.list_evaluations(
-    function="predictive_accuracy", output_format="dataframe", study=study.study_id,
+    function="predictive_accuracy",
+    output_format="dataframe",
+    study=study.study_id,
 )
 print(evaluations.head())
 
diff --git a/examples/30_extended/task_manual_iteration_tutorial.py b/examples/30_extended/task_manual_iteration_tutorial.py
index c30ff66a3..676a742a1 100644
--- a/examples/30_extended/task_manual_iteration_tutorial.py
+++ b/examples/30_extended/task_manual_iteration_tutorial.py
@@ -44,7 +44,10 @@
 
 print(
     "Task {}: number of repeats: {}, number of folds: {}, number of samples {}.".format(
-        task_id, n_repeats, n_folds, n_samples,
+        task_id,
+        n_repeats,
+        n_folds,
+        n_samples,
     )
 )
 
@@ -53,7 +56,11 @@
 # samples (indexing is zero-based). Usually, one would loop over all repeats, folds and sample
 # sizes, but we can neglect this here as there is only a single repetition.
 
-train_indices, test_indices = task.get_train_test_split_indices(repeat=0, fold=0, sample=0,)
+train_indices, test_indices = task.get_train_test_split_indices(
+    repeat=0,
+    fold=0,
+    sample=0,
+)
 
 print(train_indices.shape, train_indices.dtype)
 print(test_indices.shape, test_indices.dtype)
@@ -69,7 +76,10 @@
 
 print(
     "X_train.shape: {}, y_train.shape: {}, X_test.shape: {}, y_test.shape: {}".format(
-        X_train.shape, y_train.shape, X_test.shape, y_test.shape,
+        X_train.shape,
+        y_train.shape,
+        X_test.shape,
+        y_test.shape,
     )
 )
 
@@ -82,7 +92,10 @@
 n_repeats, n_folds, n_samples = task.get_split_dimensions()
 print(
     "Task {}: number of repeats: {}, number of folds: {}, number of samples {}.".format(
-        task_id, n_repeats, n_folds, n_samples,
+        task_id,
+        n_repeats,
+        n_folds,
+        n_samples,
     )
 )
 
@@ -92,7 +105,9 @@
     for fold_idx in range(n_folds):
         for sample_idx in range(n_samples):
             train_indices, test_indices = task.get_train_test_split_indices(
-                repeat=repeat_idx, fold=fold_idx, sample=sample_idx,
+                repeat=repeat_idx,
+                fold=fold_idx,
+                sample=sample_idx,
             )
             X_train = X.iloc[train_indices]
             y_train = y.iloc[train_indices]
@@ -121,7 +136,10 @@
 n_repeats, n_folds, n_samples = task.get_split_dimensions()
 print(
     "Task {}: number of repeats: {}, number of folds: {}, number of samples {}.".format(
-        task_id, n_repeats, n_folds, n_samples,
+        task_id,
+        n_repeats,
+        n_folds,
+        n_samples,
     )
 )
 
@@ -131,7 +149,9 @@
     for fold_idx in range(n_folds):
         for sample_idx in range(n_samples):
             train_indices, test_indices = task.get_train_test_split_indices(
-                repeat=repeat_idx, fold=fold_idx, sample=sample_idx,
+                repeat=repeat_idx,
+                fold=fold_idx,
+                sample=sample_idx,
             )
             X_train = X.iloc[train_indices]
             y_train = y.iloc[train_indices]
@@ -160,7 +180,10 @@
 n_repeats, n_folds, n_samples = task.get_split_dimensions()
 print(
     "Task {}: number of repeats: {}, number of folds: {}, number of samples {}.".format(
-        task_id, n_repeats, n_folds, n_samples,
+        task_id,
+        n_repeats,
+        n_folds,
+        n_samples,
     )
 )
 
@@ -170,7 +193,9 @@
     for fold_idx in range(n_folds):
         for sample_idx in range(n_samples):
             train_indices, test_indices = task.get_train_test_split_indices(
-                repeat=repeat_idx, fold=fold_idx, sample=sample_idx,
+                repeat=repeat_idx,
+                fold=fold_idx,
+                sample=sample_idx,
             )
             X_train = X.iloc[train_indices]
             y_train = y.iloc[train_indices]
diff --git a/openml/_api_calls.py b/openml/_api_calls.py
index 12b283738..959cad51a 100644
--- a/openml/_api_calls.py
+++ b/openml/_api_calls.py
@@ -69,15 +69,20 @@ def _perform_api_call(call, request_method, data=None, file_elements=None):
     __check_response(response, url, file_elements)
 
     logging.info(
-        "%.7fs taken for [%s] request for the URL %s", time.time() - start, request_method, url,
+        "%.7fs taken for [%s] request for the URL %s",
+        time.time() - start,
+        request_method,
+        url,
     )
     return response.text
 
 
 def _download_minio_file(
-    source: str, destination: Union[str, pathlib.Path], exists_ok: bool = True,
+    source: str,
+    destination: Union[str, pathlib.Path],
+    exists_ok: bool = True,
 ) -> None:
-    """ Download file ``source`` from a MinIO Bucket and store it at ``destination``.
+    """Download file ``source`` from a MinIO Bucket and store it at ``destination``.
 
     Parameters
     ----------
@@ -103,7 +108,9 @@ def _download_minio_file(
 
     try:
         client.fget_object(
-            bucket_name=bucket, object_name=object_name, file_path=str(destination),
+            bucket_name=bucket,
+            object_name=object_name,
+            file_path=str(destination),
         )
     except minio.error.S3Error as e:
         if e.message.startswith("Object does not exist"):
@@ -120,7 +127,7 @@ def _download_text_file(
     exists_ok: bool = True,
     encoding: str = "utf8",
 ) -> Optional[str]:
-    """ Download the text file at `source` and store it in `output_path`.
+    """Download the text file at `source` and store it in `output_path`.
 
     By default, do nothing if a file already exists in `output_path`.
     The downloaded file can be checked against an expected md5 checksum.
@@ -156,7 +163,10 @@ def _download_text_file(
 
     if output_path is None:
         logging.info(
-            "%.7fs taken for [%s] request for the URL %s", time.time() - start, "get", source,
+            "%.7fs taken for [%s] request for the URL %s",
+            time.time() - start,
+            "get",
+            source,
         )
         return downloaded_file
 
@@ -165,7 +175,10 @@ def _download_text_file(
             fh.write(downloaded_file)
 
         logging.info(
-            "%.7fs taken for [%s] request for the URL %s", time.time() - start, "get", source,
+            "%.7fs taken for [%s] request for the URL %s",
+            time.time() - start,
+            "get",
+            source,
         )
 
         del downloaded_file
@@ -174,8 +187,8 @@ def _download_text_file(
 
 def _file_id_to_url(file_id, filename=None):
     """
-     Presents the URL how to download a given file id
-     filename is optional
+    Presents the URL how to download a given file id
+    filename is optional
     """
     openml_url = config.server.split("/api/")
     url = openml_url[0] + "/data/download/%s" % file_id
@@ -194,7 +207,12 @@ def _read_url_files(url, data=None, file_elements=None):
         file_elements = {}
     # Using requests.post sets header 'Accept-encoding' automatically to
     # 'gzip,deflate'
-    response = _send_request(request_method="post", url=url, data=data, files=file_elements,)
+    response = _send_request(
+        request_method="post",
+        url=url,
+        data=data,
+        files=file_elements,
+    )
     return response
 
 
@@ -258,7 +276,9 @@ def _send_request(request_method, url, data, files=None, md5_checksum=None):
                         raise OpenMLServerError(
                             "Unexpected server error when calling {}. Please contact the "
                             "developers!\nStatus code: {}\n{}".format(
-                                url, response.status_code, response.text,
+                                url,
+                                response.status_code,
+                                response.text,
                             )
                         )
                 if retry_counter >= n_retries:
@@ -290,7 +310,9 @@ def __check_response(response, url, file_elements):
 
 
 def __parse_server_exception(
-    response: requests.Response, url: str, file_elements: Dict,
+    response: requests.Response,
+    url: str,
+    file_elements: Dict,
 ) -> OpenMLServerError:
 
     if response.status_code == 414:
@@ -319,12 +341,17 @@ def __parse_server_exception(
 
         # 512 for runs, 372 for datasets, 500 for flows
         # 482 for tasks, 542 for evaluations, 674 for setups
-        return OpenMLServerNoResult(code=code, message=full_message,)
+        return OpenMLServerNoResult(
+            code=code,
+            message=full_message,
+        )
     # 163: failure to validate flow XML (https://www.openml.org/api_docs#!/flow/post_flow)
     if code in [163] and file_elements is not None and "description" in file_elements:
         # file_elements['description'] is the XML file description of the flow
         full_message = "\n{}\n{} - {}".format(
-            file_elements["description"], message, additional_information,
+            file_elements["description"],
+            message,
+            additional_information,
         )
     else:
         full_message = "{} - {}".format(message, additional_information)
diff --git a/openml/base.py b/openml/base.py
index 1b6e5ccc7..35a9ce58f 100644
--- a/openml/base.py
+++ b/openml/base.py
@@ -13,7 +13,7 @@
 
 
 class OpenMLBase(ABC):
-    """ Base object for functionality that is shared across entities. """
+    """Base object for functionality that is shared across entities."""
 
     def __repr__(self):
         body_fields = self._get_repr_body_fields()
@@ -22,32 +22,32 @@ def __repr__(self):
     @property
     @abstractmethod
     def id(self) -> Optional[int]:
-        """ The id of the entity, it is unique for its entity type. """
+        """The id of the entity, it is unique for its entity type."""
         pass
 
     @property
     def openml_url(self) -> Optional[str]:
-        """ The URL of the object on the server, if it was uploaded, else None. """
+        """The URL of the object on the server, if it was uploaded, else None."""
         if self.id is None:
             return None
         return self.__class__.url_for_id(self.id)
 
     @classmethod
     def url_for_id(cls, id_: int) -> str:
-        """ Return the OpenML URL for the object of the class entity with the given id. """
+        """Return the OpenML URL for the object of the class entity with the given id."""
         # Sample url for a flow: openml.org/f/123
         return "{}/{}/{}".format(openml.config.get_server_base_url(), cls._entity_letter(), id_)
 
     @classmethod
     def _entity_letter(cls) -> str:
-        """ Return the letter which represents the entity type in urls, e.g. 'f' for flow."""
+        """Return the letter which represents the entity type in urls, e.g. 'f' for flow."""
         # We take advantage of the class naming convention (OpenMLX),
         # which holds for all entities except studies and tasks, which overwrite this method.
         return cls.__name__.lower()[len("OpenML") :][0]
 
     @abstractmethod
     def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
-        """ Collect all information to display in the __repr__ body.
+        """Collect all information to display in the __repr__ body.
 
         Returns
         ------
@@ -60,13 +60,13 @@ def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
         pass
 
     def _apply_repr_template(self, body_fields: List[Tuple[str, str]]) -> str:
-        """ Generates the header and formats the body for string representation of the object.
+        """Generates the header and formats the body for string representation of the object.
 
-         Parameters
-         ----------
-         body_fields: List[Tuple[str, str]]
-            A list of (name, value) pairs to display in the body of the __repr__.
-         """
+        Parameters
+        ----------
+        body_fields: List[Tuple[str, str]]
+           A list of (name, value) pairs to display in the body of the __repr__.
+        """
         # We add spaces between capitals, e.g. ClassificationTask -> Classification Task
         name_with_spaces = re.sub(
             r"(\w)([A-Z])", r"\1 \2", self.__class__.__name__[len("OpenML") :]
@@ -81,7 +81,7 @@ def _apply_repr_template(self, body_fields: List[Tuple[str, str]]) -> str:
 
     @abstractmethod
     def _to_dict(self) -> "OrderedDict[str, OrderedDict]":
-        """ Creates a dictionary representation of self.
+        """Creates a dictionary representation of self.
 
         Uses OrderedDict to ensure consistent ordering when converting to xml.
         The return value (OrderedDict) will be used to create the upload xml file.
@@ -98,7 +98,7 @@ def _to_dict(self) -> "OrderedDict[str, OrderedDict]":
         pass
 
     def _to_xml(self) -> str:
-        """ Generate xml representation of self for upload to server. """
+        """Generate xml representation of self for upload to server."""
         dict_representation = self._to_dict()
         xml_representation = xmltodict.unparse(dict_representation, pretty=True)
 
@@ -108,7 +108,7 @@ def _to_xml(self) -> str:
         return xml_body
 
     def _get_file_elements(self) -> Dict:
-        """ Get file_elements to upload to the server, called during Publish.
+        """Get file_elements to upload to the server, called during Publish.
 
         Derived child classes should overwrite this method as necessary.
         The description field will be populated automatically if not provided.
@@ -117,7 +117,7 @@ def _get_file_elements(self) -> Dict:
 
     @abstractmethod
     def _parse_publish_response(self, xml_response: Dict):
-        """ Parse the id from the xml_response and assign it to self. """
+        """Parse the id from the xml_response and assign it to self."""
         pass
 
     def publish(self) -> "OpenMLBase":
@@ -136,7 +136,7 @@ def publish(self) -> "OpenMLBase":
         return self
 
     def open_in_browser(self):
-        """ Opens the OpenML web page corresponding to this object in your default browser. """
+        """Opens the OpenML web page corresponding to this object in your default browser."""
         webbrowser.open(self.openml_url)
 
     def push_tag(self, tag: str):
diff --git a/openml/cli.py b/openml/cli.py
index cfd453e9f..039ac227c 100644
--- a/openml/cli.py
+++ b/openml/cli.py
@@ -26,7 +26,7 @@ def looks_like_url(url: str) -> bool:
 def wait_until_valid_input(
     prompt: str, check: Callable[[str], str], sanitize: Union[Callable[[str], str], None]
 ) -> str:
-    """  Asks `prompt` until an input is received which returns True for `check`.
+    """Asks `prompt` until an input is received which returns True for `check`.
 
     Parameters
     ----------
@@ -252,7 +252,7 @@ def configure_field(
     input_message: str,
     sanitize: Union[Callable[[str], str], None] = None,
 ) -> None:
-    """ Configure `field` with `value`. If `value` is None ask the user for input.
+    """Configure `field` with `value`. If `value` is None ask the user for input.
 
     `value` and user input are first corrected/auto-completed with `convert_value` if provided,
     then validated with `check_with_message` function.
@@ -288,13 +288,15 @@ def configure_field(
     else:
         print(intro_message)
         value = wait_until_valid_input(
-            prompt=input_message, check=check_with_message, sanitize=sanitize,
+            prompt=input_message,
+            check=check_with_message,
+            sanitize=sanitize,
         )
     verbose_set(field, value)
 
 
 def configure(args: argparse.Namespace):
-    """ Calls the right submenu(s) to edit `args.field` in the configuration file. """
+    """Calls the right submenu(s) to edit `args.field` in the configuration file."""
     set_functions = {
         "apikey": configure_apikey,
         "server": configure_server,
@@ -348,7 +350,11 @@ def main() -> None:
     )
 
     parser_configure.add_argument(
-        "value", type=str, default=None, nargs="?", help="The value to set the FIELD to.",
+        "value",
+        type=str,
+        default=None,
+        nargs="?",
+        help="The value to set the FIELD to.",
     )
 
     args = parser.parse_args()
diff --git a/openml/config.py b/openml/config.py
index 8593ad484..09359d33d 100644
--- a/openml/config.py
+++ b/openml/config.py
@@ -23,7 +23,7 @@
 
 
 def _create_log_handlers(create_file_handler=True):
-    """ Creates but does not attach the log handlers. """
+    """Creates but does not attach the log handlers."""
     global console_handler, file_handler
     if console_handler is not None or file_handler is not None:
         logger.debug("Requested to create log handlers, but they are already created.")
@@ -36,7 +36,7 @@ def _create_log_handlers(create_file_handler=True):
     console_handler.setFormatter(output_formatter)
 
     if create_file_handler:
-        one_mb = 2 ** 20
+        one_mb = 2**20
         log_path = os.path.join(cache_directory, "openml_python.log")
         file_handler = logging.handlers.RotatingFileHandler(
             log_path, maxBytes=one_mb, backupCount=1, delay=True
@@ -45,7 +45,7 @@ def _create_log_handlers(create_file_handler=True):
 
 
 def _convert_log_levels(log_level: int) -> Tuple[int, int]:
-    """ Converts a log level that's either defined by OpenML/Python to both specifications. """
+    """Converts a log level that's either defined by OpenML/Python to both specifications."""
     # OpenML verbosity level don't match Python values directly:
     openml_to_python = {0: logging.WARNING, 1: logging.INFO, 2: logging.DEBUG}
     python_to_openml = {
@@ -62,7 +62,7 @@ def _convert_log_levels(log_level: int) -> Tuple[int, int]:
 
 
 def _set_level_register_and_store(handler: logging.Handler, log_level: int):
-    """ Set handler log level, register it if needed, save setting to config file if specified. """
+    """Set handler log level, register it if needed, save setting to config file if specified."""
     oml_level, py_level = _convert_log_levels(log_level)
     handler.setLevel(py_level)
 
@@ -74,13 +74,13 @@ def _set_level_register_and_store(handler: logging.Handler, log_level: int):
 
 
 def set_console_log_level(console_output_level: int):
-    """ Set console output to the desired level and register it with openml logger if needed. """
+    """Set console output to the desired level and register it with openml logger if needed."""
     global console_handler
     _set_level_register_and_store(cast(logging.Handler, console_handler), console_output_level)
 
 
 def set_file_log_level(file_output_level: int):
-    """ Set file output to the desired level and register it with openml logger if needed. """
+    """Set file output to the desired level and register it with openml logger if needed."""
     global file_handler
     _set_level_register_and_store(cast(logging.Handler, file_handler), file_output_level)
 
@@ -90,7 +90,14 @@ def set_file_log_level(file_output_level: int):
     "apikey": "",
     "server": "https://www.openml.org/api/v1/xml",
     "cachedir": (
-        os.environ.get("XDG_CACHE_HOME", os.path.join("~", ".cache", "openml",))
+        os.environ.get(
+            "XDG_CACHE_HOME",
+            os.path.join(
+                "~",
+                ".cache",
+                "openml",
+            ),
+        )
         if platform.system() == "Linux"
         else os.path.join("~", ".openml")
     ),
@@ -144,7 +151,7 @@ def set_retry_policy(value: str, n_retries: Optional[int] = None) -> None:
 
 
 class ConfigurationForExamples:
-    """ Allows easy switching to and from a test configuration, used for examples. """
+    """Allows easy switching to and from a test configuration, used for examples."""
 
     _last_used_server = None
     _last_used_key = None
@@ -154,7 +161,7 @@ class ConfigurationForExamples:
 
     @classmethod
     def start_using_configuration_for_example(cls):
-        """ Sets the configuration to connect to the test server with valid apikey.
+        """Sets the configuration to connect to the test server with valid apikey.
 
         To configuration as was before this call is stored, and can be recovered
         by using the `stop_use_example_configuration` method.
@@ -181,7 +188,7 @@ def start_using_configuration_for_example(cls):
 
     @classmethod
     def stop_using_configuration_for_example(cls):
-        """ Return to configuration as it was before `start_use_example_configuration`. """
+        """Return to configuration as it was before `start_use_example_configuration`."""
         if not cls._start_last_called:
             # We don't want to allow this because it will (likely) result in the `server` and
             # `apikey` variables being set to None.
@@ -281,7 +288,7 @@ def _get(config, key):
 
 
 def set_field_in_config_file(field: str, value: Any):
-    """ Overwrites the `field` in the configuration file with the new `value`. """
+    """Overwrites the `field` in the configuration file with the new `value`."""
     if field not in _defaults:
         return ValueError(f"Field '{field}' is not valid and must be one of '{_defaults.keys()}'.")
 
@@ -302,7 +309,7 @@ def set_field_in_config_file(field: str, value: Any):
 
 
 def _parse_config(config_file: str):
-    """ Parse the config file, set up defaults. """
+    """Parse the config file, set up defaults."""
     config = configparser.RawConfigParser(defaults=_defaults)
 
     # The ConfigParser requires a [SECTION_HEADER], which we do not expect in our config file.
diff --git a/openml/datasets/dataset.py b/openml/datasets/dataset.py
index 8f1ce612b..6f3f66853 100644
--- a/openml/datasets/dataset.py
+++ b/openml/datasets/dataset.py
@@ -239,7 +239,7 @@ def id(self) -> Optional[int]:
         return self.dataset_id
 
     def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
-        """ Collect all information to display in the __repr__ body. """
+        """Collect all information to display in the __repr__ body."""
         fields = {
             "Name": self.name,
             "Version": self.version,
@@ -297,7 +297,7 @@ def __eq__(self, other):
         return all(self.__dict__[key] == other.__dict__[key] for key in self_keys)
 
     def _download_data(self) -> None:
-        """ Download ARFF data file to standard cache directory. Set `self.data_file`. """
+        """Download ARFF data file to standard cache directory. Set `self.data_file`."""
         # import required here to avoid circular import.
         from .functions import _get_dataset_arff, _get_dataset_parquet
 
@@ -354,8 +354,8 @@ def decode_arff(fh):
             return decoder.decode(fh, encode_nominal=True, return_type=return_type)
 
         if filename[-3:] == ".gz":
-            with gzip.open(filename) as fh:
-                return decode_arff(fh)
+            with gzip.open(filename) as zipfile:
+                return decode_arff(zipfile)
         else:
             with open(filename, encoding="utf8") as fh:
                 return decode_arff(fh)
@@ -363,7 +363,7 @@ def decode_arff(fh):
     def _parse_data_from_arff(
         self, arff_file_path: str
     ) -> Tuple[Union[pd.DataFrame, scipy.sparse.csr_matrix], List[bool], List[str]]:
-        """ Parse all required data from arff file.
+        """Parse all required data from arff file.
 
         Parameters
         ----------
@@ -473,7 +473,7 @@ def _compressed_cache_file_paths(self, data_file: str) -> Tuple[str, str, str]:
     def _cache_compressed_file_from_file(
         self, data_file: str
     ) -> Tuple[Union[pd.DataFrame, scipy.sparse.csr_matrix], List[bool], List[str]]:
-        """ Store data from the local file in compressed format.
+        """Store data from the local file in compressed format.
 
         If a local parquet file is present it will be used instead of the arff file.
         Sets cache_format to 'pickle' if data is sparse.
@@ -519,7 +519,7 @@ def _cache_compressed_file_from_file(
         return data, categorical, attribute_names
 
     def _load_data(self):
-        """ Load data from compressed format or arff. Download data if not present on disk. """
+        """Load data from compressed format or arff. Download data if not present on disk."""
         need_to_create_pickle = self.cache_format == "pickle" and self.data_pickle_file is None
         need_to_create_feather = self.cache_format == "feather" and self.data_feather_file is None
 
@@ -675,7 +675,7 @@ def get_data(
         List[bool],
         List[str],
     ]:
-        """ Returns dataset content as dataframes or sparse matrices.
+        """Returns dataset content as dataframes or sparse matrices.
 
         Parameters
         ----------
@@ -863,7 +863,7 @@ def get_features_by_type(
         return result
 
     def _get_file_elements(self) -> Dict:
-        """ Adds the 'dataset' to file elements. """
+        """Adds the 'dataset' to file elements."""
         file_elements = {}
         path = None if self.data_file is None else os.path.abspath(self.data_file)
 
@@ -882,11 +882,11 @@ def _get_file_elements(self) -> Dict:
         return file_elements
 
     def _parse_publish_response(self, xml_response: Dict):
-        """ Parse the id from the xml_response and assign it to self. """
+        """Parse the id from the xml_response and assign it to self."""
         self.dataset_id = int(xml_response["oml:upload_data_set"]["oml:id"])
 
     def _to_dict(self) -> "OrderedDict[str, OrderedDict]":
-        """ Creates a dictionary representation of self. """
+        """Creates a dictionary representation of self."""
         props = [
             "id",
             "name",
diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
index d92d7d515..fb2e201f6 100644
--- a/openml/datasets/functions.py
+++ b/openml/datasets/functions.py
@@ -36,12 +36,12 @@
 
 
 def _get_cache_directory(dataset: OpenMLDataset) -> str:
-    """ Return the cache directory of the OpenMLDataset """
+    """Return the cache directory of the OpenMLDataset"""
     return _create_cache_directory_for_id(DATASETS_CACHE_DIR_NAME, dataset.dataset_id)
 
 
 def list_qualities() -> List[str]:
-    """ Return list of data qualities available.
+    """Return list of data qualities available.
 
     The function performs an API call to retrieve the entire list of
     data qualities that are computed on the datasets uploaded.
@@ -236,7 +236,8 @@ def _validated_data_attributes(
 
 
 def check_datasets_active(
-    dataset_ids: List[int], raise_error_if_not_exist: bool = True,
+    dataset_ids: List[int],
+    raise_error_if_not_exist: bool = True,
 ) -> Dict[int, bool]:
     """
     Check if the dataset ids provided are active.
@@ -274,7 +275,7 @@ def check_datasets_active(
 def _name_to_id(
     dataset_name: str, version: Optional[int] = None, error_if_multiple: bool = False
 ) -> int:
-    """ Attempt to find the dataset id of the dataset with the given name.
+    """Attempt to find the dataset id of the dataset with the given name.
 
     If multiple datasets with the name exist, and ``error_if_multiple`` is ``False``,
     then return the least recent still active dataset.
@@ -354,7 +355,7 @@ def get_dataset(
     cache_format: str = "pickle",
     download_qualities: bool = True,
 ) -> OpenMLDataset:
-    """ Download the OpenML dataset representation, optionally also download actual data file.
+    """Download the OpenML dataset representation, optionally also download actual data file.
 
     This function is thread/multiprocessing safe.
     This function uses caching. A check will be performed to determine if the information has
@@ -407,7 +408,10 @@ def get_dataset(
             "`dataset_id` must be one of `str` or `int`, not {}.".format(type(dataset_id))
         )
 
-    did_cache_dir = _create_cache_directory_for_id(DATASETS_CACHE_DIR_NAME, dataset_id,)
+    did_cache_dir = _create_cache_directory_for_id(
+        DATASETS_CACHE_DIR_NAME,
+        dataset_id,
+    )
 
     remove_dataset_cache = True
     try:
@@ -450,7 +454,7 @@ def get_dataset(
 
 
 def attributes_arff_from_df(df):
-    """ Describe attributes of the dataframe according to ARFF specification.
+    """Describe attributes of the dataframe according to ARFF specification.
 
     Parameters
     ----------
@@ -746,7 +750,7 @@ def edit_dataset(
     original_data_url=None,
     paper_url=None,
 ) -> int:
-    """ Edits an OpenMLDataset.
+    """Edits an OpenMLDataset.
 
     In addition to providing the dataset id of the dataset to edit (through data_id),
     you must specify a value for at least one of the optional function arguments,
@@ -886,7 +890,7 @@ def _topic_add_dataset(data_id: int, topic: str):
         id of the dataset for which the topic needs to be added
     topic : str
         Topic to be added for the dataset
-   """
+    """
     if not isinstance(data_id, int):
         raise TypeError("`data_id` must be of type `int`, not {}.".format(type(data_id)))
     form_data = {"data_id": data_id, "topic": topic}
@@ -907,7 +911,7 @@ def _topic_delete_dataset(data_id: int, topic: str):
     topic : str
         Topic to be deleted
 
-   """
+    """
     if not isinstance(data_id, int):
         raise TypeError("`data_id` must be of type `int`, not {}.".format(type(data_id)))
     form_data = {"data_id": data_id, "topic": topic}
@@ -959,7 +963,7 @@ def _get_dataset_description(did_cache_dir, dataset_id):
 def _get_dataset_parquet(
     description: Union[Dict, OpenMLDataset], cache_directory: str = None
 ) -> Optional[str]:
-    """ Return the path to the local parquet file of the dataset. If is not cached, it is downloaded.
+    """Return the path to the local parquet file of the dataset. If is not cached, it is downloaded.
 
     Checks if the file is in the cache, if yes, return the path to the file.
     If not, downloads the file and caches it, then returns the file path.
@@ -1007,7 +1011,7 @@ def _get_dataset_parquet(
 
 
 def _get_dataset_arff(description: Union[Dict, OpenMLDataset], cache_directory: str = None) -> str:
-    """ Return the path to the local arff file of the dataset. If is not cached, it is downloaded.
+    """Return the path to the local arff file of the dataset. If is not cached, it is downloaded.
 
     Checks if the file is in the cache, if yes, return the path to the file.
     If not, downloads the file and caches it, then returns the file path.
diff --git a/openml/evaluations/functions.py b/openml/evaluations/functions.py
index b3fdd0aa0..30d376c04 100644
--- a/openml/evaluations/functions.py
+++ b/openml/evaluations/functions.py
@@ -253,7 +253,7 @@ def __list_evaluations(api_call, output_format="object"):
 
 
 def list_evaluation_measures() -> List[str]:
-    """ Return list of evaluation measures available.
+    """Return list of evaluation measures available.
 
     The function performs an API call to retrieve the entire list of
     evaluation measures that are available.
diff --git a/openml/exceptions.py b/openml/exceptions.py
index 781784ee2..a5f132128 100644
--- a/openml/exceptions.py
+++ b/openml/exceptions.py
@@ -9,7 +9,7 @@ def __init__(self, message: str):
 
 class OpenMLServerError(PyOpenMLError):
     """class for when something is really wrong on the server
-       (result did not parse to dict), contains unparsed error."""
+    (result did not parse to dict), contains unparsed error."""
 
     def __init__(self, message: str):
         super().__init__(message)
@@ -17,7 +17,7 @@ def __init__(self, message: str):
 
 class OpenMLServerException(OpenMLServerError):
     """exception for when the result of the server was
-       not 200 (e.g., listing call w/o results). """
+    not 200 (e.g., listing call w/o results)."""
 
     # Code needs to be optional to allow the exceptino to be picklable:
     # https://stackoverflow.com/questions/16244923/how-to-make-a-custom-exception-class-with-multiple-init-args-pickleable  # noqa: E501
@@ -28,11 +28,15 @@ def __init__(self, message: str, code: int = None, url: str = None):
         super().__init__(message)
 
     def __str__(self):
-        return "%s returned code %s: %s" % (self.url, self.code, self.message,)
+        return "%s returned code %s: %s" % (
+            self.url,
+            self.code,
+            self.message,
+        )
 
 
 class OpenMLServerNoResult(OpenMLServerException):
-    """exception for when the result of the server is empty. """
+    """exception for when the result of the server is empty."""
 
     pass
 
@@ -51,14 +55,14 @@ class OpenMLHashException(PyOpenMLError):
 
 
 class OpenMLPrivateDatasetError(PyOpenMLError):
-    """ Exception thrown when the user has no rights to access the dataset. """
+    """Exception thrown when the user has no rights to access the dataset."""
 
     def __init__(self, message: str):
         super().__init__(message)
 
 
 class OpenMLRunsExistError(PyOpenMLError):
-    """ Indicates run(s) already exists on the server when they should not be duplicated. """
+    """Indicates run(s) already exists on the server when they should not be duplicated."""
 
     def __init__(self, run_ids: set, message: str):
         if len(run_ids) < 1:
diff --git a/openml/extensions/extension_interface.py b/openml/extensions/extension_interface.py
index 4529ad163..f33ef7543 100644
--- a/openml/extensions/extension_interface.py
+++ b/openml/extensions/extension_interface.py
@@ -204,7 +204,9 @@ def _run_model_on_fold(
 
     @abstractmethod
     def obtain_parameter_values(
-        self, flow: "OpenMLFlow", model: Any = None,
+        self,
+        flow: "OpenMLFlow",
+        model: Any = None,
     ) -> List[Dict[str, Any]]:
         """Extracts all parameter settings required for the flow from the model.
 
@@ -247,7 +249,9 @@ def check_if_model_fitted(self, model: Any) -> bool:
 
     @abstractmethod
     def instantiate_model_from_hpo_class(
-        self, model: Any, trace_iteration: "OpenMLTraceIteration",
+        self,
+        model: Any,
+        trace_iteration: "OpenMLTraceIteration",
     ) -> Any:
         """Instantiate a base model which can be searched over by the hyperparameter optimization
         model.
diff --git a/openml/extensions/functions.py b/openml/extensions/functions.py
index 52bb03961..a080e1004 100644
--- a/openml/extensions/functions.py
+++ b/openml/extensions/functions.py
@@ -30,7 +30,8 @@ def register_extension(extension: Type[Extension]) -> None:
 
 
 def get_extension_by_flow(
-    flow: "OpenMLFlow", raise_if_no_extension: bool = False,
+    flow: "OpenMLFlow",
+    raise_if_no_extension: bool = False,
 ) -> Optional[Extension]:
     """Get an extension which can handle the given flow.
 
@@ -66,7 +67,10 @@ def get_extension_by_flow(
         )
 
 
-def get_extension_by_model(model: Any, raise_if_no_extension: bool = False,) -> Optional[Extension]:
+def get_extension_by_model(
+    model: Any,
+    raise_if_no_extension: bool = False,
+) -> Optional[Extension]:
     """Get an extension which can handle the given flow.
 
     Iterates all registered extensions and checks whether they can handle the presented model.
diff --git a/openml/extensions/sklearn/extension.py b/openml/extensions/sklearn/extension.py
index d49a9a9c5..f8936b0db 100644
--- a/openml/extensions/sklearn/extension.py
+++ b/openml/extensions/sklearn/extension.py
@@ -11,7 +11,7 @@
 from re import IGNORECASE
 import sys
 import time
-from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union, cast
+from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union, cast, Sized
 import warnings
 
 import numpy as np
@@ -66,8 +66,8 @@
 
 class SklearnExtension(Extension):
     """Connect scikit-learn to OpenML-Python.
-       The estimators which use this extension must be scikit-learn compatible,
-       i.e needs to be a subclass of sklearn.base.BaseEstimator".
+    The estimators which use this extension must be scikit-learn compatible,
+    i.e needs to be a subclass of sklearn.base.BaseEstimator".
     """
 
     ################################################################################################
@@ -107,7 +107,7 @@ def can_handle_model(cls, model: Any) -> bool:
     def trim_flow_name(
         cls, long_name: str, extra_trim_length: int = 100, _outer: bool = True
     ) -> str:
-        """ Shorten generated sklearn flow name to at most ``max_length`` characters.
+        """Shorten generated sklearn flow name to at most ``max_length`` characters.
 
         Flows are assumed to have the following naming structure:
         ``(model_selection)? (pipeline)? (steps)+``
@@ -223,7 +223,7 @@ def remove_all_in_parentheses(string: str) -> str:
 
     @classmethod
     def _min_dependency_str(cls, sklearn_version: str) -> str:
-        """ Returns a string containing the minimum dependencies for the sklearn version passed.
+        """Returns a string containing the minimum dependencies for the sklearn version passed.
 
         Parameters
         ----------
@@ -499,7 +499,7 @@ def _serialize_sklearn(self, o: Any, parent_model: Optional[Any] = None) -> Any:
                 rval = tuple(rval)
         elif isinstance(o, SIMPLE_TYPES) or o is None:
             if isinstance(o, tuple(SIMPLE_NUMPY_TYPES)):
-                o = o.item()
+                o = o.item()  # type: ignore
             # base parameter values
             rval = o
         elif isinstance(o, dict):
@@ -858,7 +858,9 @@ def _get_tags(self) -> List[str]:
         ]
 
     def _get_external_version_string(
-        self, model: Any, sub_components: Dict[str, OpenMLFlow],
+        self,
+        model: Any,
+        sub_components: Dict[str, OpenMLFlow],
     ) -> str:
         # Create external version string for a flow, given the model and the
         # already parsed dictionary of sub_components. Retrieves the external
@@ -874,7 +876,8 @@ def _get_external_version_string(
             module = importlib.import_module(model_package_name)
             model_package_version_number = module.__version__  # type: ignore
             external_version = self._format_external_version(
-                model_package_name, model_package_version_number,
+                model_package_name,
+                model_package_version_number,
             )
             external_versions.add(external_version)
 
@@ -890,7 +893,9 @@ def _get_external_version_string(
         return ",".join(list(sorted(external_versions)))
 
     def _check_multiple_occurence_of_component_in_flow(
-        self, model: Any, sub_components: Dict[str, OpenMLFlow],
+        self,
+        model: Any,
+        sub_components: Dict[str, OpenMLFlow],
     ) -> None:
         to_visit_stack = []  # type: List[OpenMLFlow]
         to_visit_stack.extend(sub_components.values())
@@ -910,7 +915,8 @@ def _check_multiple_occurence_of_component_in_flow(
                 to_visit_stack.extend(visitee.components.values())
 
     def _extract_information_from_model(
-        self, model: Any,
+        self,
+        model: Any,
     ) -> Tuple[
         "OrderedDict[str, Optional[str]]",
         "OrderedDict[str, Optional[Dict]]",
@@ -936,7 +942,7 @@ def _extract_information_from_model(
             rval = self._serialize_sklearn(v, model)
 
             def flatten_all(list_):
-                """ Flattens arbitrary depth lists of lists (e.g. [[1,2],[3,[1]]] -> [1,2,3,1]). """
+                """Flattens arbitrary depth lists of lists (e.g. [[1,2],[3,[1]]] -> [1,2,3,1])."""
                 for el in list_:
                     if isinstance(el, (list, tuple)) and len(el) > 0:
                         yield from flatten_all(el)
@@ -1351,7 +1357,7 @@ def _serialize_cross_validator(self, o: Any) -> "OrderedDict[str, Union[str, Dic
                     # if the parameter is deprecated, don't show it
                     continue
 
-            if not (hasattr(value, "__len__") and len(value) == 0):
+            if not (isinstance(value, Sized) and len(value) == 0):
                 value = json.dumps(value)
                 parameters[key] = value
             else:
@@ -1381,7 +1387,9 @@ def _deserialize_cross_validator(
         return model_class(**parameters)
 
     def _format_external_version(
-        self, model_package_name: str, model_package_version_number: str,
+        self,
+        model_package_name: str,
+        model_package_version_number: str,
     ) -> str:
         return "%s==%s" % (model_package_name, model_package_version_number)
 
@@ -1530,7 +1538,7 @@ def _seed_current_object(current_value):
                 # statement) this way we guarantee that if a different set of
                 # subflows is seeded, the same number of the random generator is
                 # used
-                new_value = rs.randint(0, 2 ** 16)
+                new_value = rs.randint(0, 2**16)
                 if _seed_current_object(current_value):
                     random_states[param_name] = new_value
 
@@ -1540,7 +1548,7 @@ def _seed_current_object(current_value):
                     continue
 
                 current_value = model_params[param_name].random_state
-                new_value = rs.randint(0, 2 ** 16)
+                new_value = rs.randint(0, 2**16)
                 if _seed_current_object(current_value):
                     model_params[param_name].random_state = new_value
 
@@ -1777,7 +1785,8 @@ def _prediction_to_probabilities(
                     # for class 3 because the rest of the library expects that the
                     # probabilities are ordered the same way as the classes are ordered).
                     message = "Estimator only predicted for {}/{} classes!".format(
-                        proba_y.shape[1], len(task.class_labels),
+                        proba_y.shape[1],
+                        len(task.class_labels),
                     )
                     warnings.warn(message)
                     openml.config.logger.warning(message)
@@ -1815,7 +1824,9 @@ def _prediction_to_probabilities(
         return pred_y, proba_y, user_defined_measures, trace
 
     def obtain_parameter_values(
-        self, flow: "OpenMLFlow", model: Any = None,
+        self,
+        flow: "OpenMLFlow",
+        model: Any = None,
     ) -> List[Dict[str, Any]]:
         """Extracts all parameter settings required for the flow from the model.
 
@@ -2019,7 +2030,9 @@ def is_subcomponent_specification(values):
         return parameters
 
     def _openml_param_name_to_sklearn(
-        self, openml_parameter: openml.setups.OpenMLParameter, flow: OpenMLFlow,
+        self,
+        openml_parameter: openml.setups.OpenMLParameter,
+        flow: OpenMLFlow,
     ) -> str:
         """
         Converts the name of an OpenMLParameter into the sklean name, given a flow.
@@ -2068,7 +2081,9 @@ def _is_hpo_class(self, model: Any) -> bool:
         return isinstance(model, sklearn.model_selection._search.BaseSearchCV)
 
     def instantiate_model_from_hpo_class(
-        self, model: Any, trace_iteration: OpenMLTraceIteration,
+        self,
+        model: Any,
+        trace_iteration: OpenMLTraceIteration,
     ) -> Any:
         """Instantiate a ``base_estimator`` which can be searched over by the hyperparameter
         optimization model.
@@ -2114,7 +2129,11 @@ def _extract_trace_data(self, model, rep_no, fold_no):
             arff_tracecontent.append(arff_line)
         return arff_tracecontent
 
-    def _obtain_arff_trace(self, model: Any, trace_content: List,) -> "OpenMLRunTrace":
+    def _obtain_arff_trace(
+        self,
+        model: Any,
+        trace_content: List,
+    ) -> "OpenMLRunTrace":
         """Create arff trace object from a fitted model and the trace content obtained by
         repeatedly calling ``run_model_on_task``.
 
@@ -2176,4 +2195,7 @@ def _obtain_arff_trace(self, model: Any, trace_content: List,) -> "OpenMLRunTrac
                 attribute = (PREFIX + key[6:], type)
                 trace_attributes.append(attribute)
 
-        return OpenMLRunTrace.generate(trace_attributes, trace_content,)
+        return OpenMLRunTrace.generate(
+            trace_attributes,
+            trace_content,
+        )
diff --git a/openml/flows/flow.py b/openml/flows/flow.py
index 2a340e625..b9752e77c 100644
--- a/openml/flows/flow.py
+++ b/openml/flows/flow.py
@@ -174,7 +174,7 @@ def extension(self):
             )
 
     def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
-        """ Collect all information to display in the __repr__ body. """
+        """Collect all information to display in the __repr__ body."""
         fields = {
             "Flow Name": self.name,
             "Flow Description": self.description,
@@ -203,7 +203,7 @@ def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
         return [(key, fields[key]) for key in order if key in fields]
 
     def _to_dict(self) -> "OrderedDict[str, OrderedDict]":
-        """ Creates a dictionary representation of self. """
+        """Creates a dictionary representation of self."""
         flow_container = OrderedDict()  # type: 'OrderedDict[str, OrderedDict]'
         flow_dict = OrderedDict(
             [("@xmlns:oml", "http://openml.org/openml")]
@@ -297,7 +297,7 @@ def _from_dict(cls, xml_dict):
 
         Calls itself recursively to create :class:`OpenMLFlow` objects of
         subflows (components).
-        
+
         XML definition of a flow is available at
         https://github.com/openml/OpenML/blob/master/openml_OS/views/pages/api_new/v1/xsd/openml.implementation.upload.xsd
 
@@ -400,11 +400,11 @@ def from_filesystem(cls, input_directory) -> "OpenMLFlow":
         return OpenMLFlow._from_dict(xmltodict.parse(xml_string))
 
     def _parse_publish_response(self, xml_response: Dict):
-        """ Parse the id from the xml_response and assign it to self. """
+        """Parse the id from the xml_response and assign it to self."""
         self.flow_id = int(xml_response["oml:upload_flow"]["oml:id"])
 
     def publish(self, raise_error_if_exists: bool = False) -> "OpenMLFlow":
-        """ Publish this flow to OpenML server.
+        """Publish this flow to OpenML server.
 
         Raises a PyOpenMLError if the flow exists on the server, but
         `self.flow_id` does not match the server known flow id.
diff --git a/openml/flows/functions.py b/openml/flows/functions.py
index 28d49b691..73c2b1d3a 100644
--- a/openml/flows/functions.py
+++ b/openml/flows/functions.py
@@ -122,7 +122,8 @@ def _get_flow_description(flow_id: int) -> OpenMLFlow:
     except OpenMLCacheException:
 
         xml_file = os.path.join(
-            openml.utils._create_cache_directory_for_id(FLOWS_CACHE_DIR_NAME, flow_id), "flow.xml",
+            openml.utils._create_cache_directory_for_id(FLOWS_CACHE_DIR_NAME, flow_id),
+            "flow.xml",
         )
 
         flow_xml = openml._api_calls._perform_api_call("flow/%d" % flow_id, request_method="get")
@@ -253,7 +254,9 @@ def flow_exists(name: str, external_version: str) -> Union[int, bool]:
         raise ValueError("Argument 'version' should be a non-empty string")
 
     xml_response = openml._api_calls._perform_api_call(
-        "flow/exists", "get", data={"name": name, "external_version": external_version},
+        "flow/exists",
+        "get",
+        data={"name": name, "external_version": external_version},
     )
 
     result_dict = xmltodict.parse(xml_response)
@@ -265,7 +268,9 @@ def flow_exists(name: str, external_version: str) -> Union[int, bool]:
 
 
 def get_flow_id(
-    model: Optional[Any] = None, name: Optional[str] = None, exact_version=True,
+    model: Optional[Any] = None,
+    name: Optional[str] = None,
+    exact_version=True,
 ) -> Union[int, bool, List[int]]:
     """Retrieves the flow id for a model or a flow name.
 
@@ -357,7 +362,7 @@ def __list_flows(api_call: str, output_format: str = "dict") -> Union[Dict, pd.D
 
 
 def _check_flow_for_server_id(flow: OpenMLFlow) -> None:
-    """ Raises a ValueError if the flow or any of its subflows has no flow id. """
+    """Raises a ValueError if the flow or any of its subflows has no flow id."""
 
     # Depth-first search to check if all components were uploaded to the
     # server before parsing the parameters
@@ -429,6 +434,9 @@ def assert_flows_equal(
         attr1 = getattr(flow1, key, None)
         attr2 = getattr(flow2, key, None)
         if key == "components":
+            if not (isinstance(attr1, Dict) and isinstance(attr2, Dict)):
+                raise TypeError("Cannot compare components because they are not dictionary.")
+
             for name in set(attr1.keys()).union(attr2.keys()):
                 if name not in attr1:
                     raise ValueError(
@@ -490,8 +498,8 @@ def assert_flows_equal(
                 # dictionary with keys specifying the parameter's 'description' and 'data_type'
                 # checking parameter descriptions can be ignored since that might change
                 # data type check can also be ignored if one of them is not defined, i.e., None
-                params1 = set(flow1.parameters_meta_info.keys())
-                params2 = set(flow2.parameters_meta_info.keys())
+                params1 = set(flow1.parameters_meta_info)
+                params2 = set(flow2.parameters_meta_info)
                 if params1 != params2:
                     raise ValueError(
                         "Parameter list in meta info for parameters differ " "in the two flows."
diff --git a/openml/runs/functions.py b/openml/runs/functions.py
index 8bbe3b956..08b2fe972 100644
--- a/openml/runs/functions.py
+++ b/openml/runs/functions.py
@@ -353,7 +353,10 @@ def initialize_model_from_run(run_id: int) -> Any:
 
 
 def initialize_model_from_trace(
-    run_id: int, repeat: int, fold: int, iteration: Optional[int] = None,
+    run_id: int,
+    repeat: int,
+    fold: int,
+    iteration: Optional[int] = None,
 ) -> Any:
     """
     Initialize a model based on the parameters that were set
@@ -461,7 +464,12 @@ def _run_task_get_arffcontent(
 
     jobs = []
     for n_fit, (rep_no, fold_no, sample_no) in enumerate(
-        itertools.product(range(num_reps), range(num_folds), range(num_samples),), start=1
+        itertools.product(
+            range(num_reps),
+            range(num_folds),
+            range(num_samples),
+        ),
+        start=1,
     ):
         jobs.append((n_fit, rep_no, fold_no, sample_no))
 
@@ -537,7 +545,8 @@ def _calculate_local_measure(sklearn_fn, openml_name):
 
             if add_local_measures:
                 _calculate_local_measure(
-                    sklearn.metrics.accuracy_score, "predictive_accuracy",
+                    sklearn.metrics.accuracy_score,
+                    "predictive_accuracy",
                 )
 
         elif isinstance(task, OpenMLRegressionTask):
@@ -557,7 +566,8 @@ def _calculate_local_measure(sklearn_fn, openml_name):
 
             if add_local_measures:
                 _calculate_local_measure(
-                    sklearn.metrics.mean_absolute_error, "mean_absolute_error",
+                    sklearn.metrics.mean_absolute_error,
+                    "mean_absolute_error",
                 )
 
         elif isinstance(task, OpenMLClusteringTask):
@@ -921,7 +931,10 @@ def obtain_field(xml_obj, fieldname, from_server, cast=None):
 
 def _get_cached_run(run_id):
     """Load a run from the cache."""
-    run_cache_dir = openml.utils._create_cache_directory_for_id(RUNS_CACHE_DIR_NAME, run_id,)
+    run_cache_dir = openml.utils._create_cache_directory_for_id(
+        RUNS_CACHE_DIR_NAME,
+        run_id,
+    )
     try:
         run_file = os.path.join(run_cache_dir, "description.xml")
         with io.open(run_file, encoding="utf8") as fh:
@@ -1144,7 +1157,7 @@ def format_prediction(
     sample: Optional[int] = None,
     proba: Optional[Dict[str, float]] = None,
 ) -> List[Union[str, int, float]]:
-    """ Format the predictions in the specific order as required for the run results.
+    """Format the predictions in the specific order as required for the run results.
 
     Parameters
     ----------
diff --git a/openml/runs/run.py b/openml/runs/run.py
index 5c93e9518..58367179e 100644
--- a/openml/runs/run.py
+++ b/openml/runs/run.py
@@ -121,7 +121,7 @@ def __init__(
 
     @property
     def predictions(self) -> pd.DataFrame:
-        """ Return a DataFrame with predictions for this run """
+        """Return a DataFrame with predictions for this run"""
         if self._predictions is None:
             if self.data_content:
                 arff_dict = self._generate_arff_dict()
@@ -140,7 +140,7 @@ def id(self) -> Optional[int]:
         return self.run_id
 
     def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
-        """ Collect all information to display in the __repr__ body. """
+        """Collect all information to display in the __repr__ body."""
         fields = {
             "Uploader Name": self.uploader_name,
             "Metric": self.task_evaluation_measure,
@@ -251,7 +251,11 @@ def from_filesystem(cls, directory: str, expect_model: bool = True) -> "OpenMLRu
 
         return run
 
-    def to_filesystem(self, directory: str, store_model: bool = True,) -> None:
+    def to_filesystem(
+        self,
+        directory: str,
+        store_model: bool = True,
+    ) -> None:
         """
         The inverse of the from_filesystem method. Serializes a run
         on the filesystem, to be uploaded later.
@@ -408,7 +412,8 @@ def get_metric_fn(self, sklearn_fn, kwargs=None):
             predictions_arff = self._generate_arff_dict()
         elif "predictions" in self.output_files:
             predictions_file_url = openml._api_calls._file_id_to_url(
-                self.output_files["predictions"], "predictions.arff",
+                self.output_files["predictions"],
+                "predictions.arff",
             )
             response = openml._api_calls._download_text_file(predictions_file_url)
             predictions_arff = arff.loads(response)
@@ -516,11 +521,11 @@ def _attribute_list_to_dict(attribute_list):
         return np.array(scores)
 
     def _parse_publish_response(self, xml_response: Dict):
-        """ Parse the id from the xml_response and assign it to self. """
+        """Parse the id from the xml_response and assign it to self."""
         self.run_id = int(xml_response["oml:upload_run"]["oml:run_id"])
 
     def _get_file_elements(self) -> Dict:
-        """ Get file_elements to upload to the server.
+        """Get file_elements to upload to the server.
 
         Derived child classes should overwrite this method as necessary.
         The description field will be populated automatically if not provided.
@@ -544,7 +549,8 @@ def _get_file_elements(self) -> Dict:
             if self.flow is None:
                 self.flow = openml.flows.get_flow(self.flow_id)
             self.parameter_settings = self.flow.extension.obtain_parameter_values(
-                self.flow, self.model,
+                self.flow,
+                self.model,
             )
 
         file_elements = {"description": ("description.xml", self._to_xml())}
@@ -559,7 +565,7 @@ def _get_file_elements(self) -> Dict:
         return file_elements
 
     def _to_dict(self) -> "OrderedDict[str, OrderedDict]":
-        """ Creates a dictionary representation of self. """
+        """Creates a dictionary representation of self."""
         description = OrderedDict()  # type: 'OrderedDict'
         description["oml:run"] = OrderedDict()
         description["oml:run"]["@xmlns:oml"] = "http://openml.org/openml"
diff --git a/openml/runs/trace.py b/openml/runs/trace.py
index 0c05b9dc8..e6885260e 100644
--- a/openml/runs/trace.py
+++ b/openml/runs/trace.py
@@ -331,7 +331,12 @@ def trace_from_xml(cls, xml):
                 )
 
             current = OpenMLTraceIteration(
-                repeat, fold, iteration, setup_string, evaluation, selected,
+                repeat,
+                fold,
+                iteration,
+                setup_string,
+                evaluation,
+                selected,
             )
             trace[(repeat, fold, iteration)] = current
 
@@ -372,7 +377,8 @@ def merge_traces(cls, traces: List["OpenMLRunTrace"]) -> "OpenMLRunTrace":
 
     def __repr__(self):
         return "[Run id: {}, {} trace iterations]".format(
-            -1 if self.run_id is None else self.run_id, len(self.trace_iterations),
+            -1 if self.run_id is None else self.run_id,
+            len(self.trace_iterations),
         )
 
     def __iter__(self):
@@ -410,7 +416,14 @@ class OpenMLTraceIteration(object):
     """
 
     def __init__(
-        self, repeat, fold, iteration, setup_string, evaluation, selected, parameters=None,
+        self,
+        repeat,
+        fold,
+        iteration,
+        setup_string,
+        evaluation,
+        selected,
+        parameters=None,
     ):
 
         if not isinstance(selected, bool):
diff --git a/openml/setups/functions.py b/openml/setups/functions.py
index b418a6106..675172738 100644
--- a/openml/setups/functions.py
+++ b/openml/setups/functions.py
@@ -175,7 +175,7 @@ def _list_setups(setup=None, output_format="object", **kwargs):
     Returns
     -------
     dict or dataframe
-        """
+    """
 
     api_call = "setup/list"
     if setup is not None:
diff --git a/openml/study/functions.py b/openml/study/functions.py
index 26cb9bd55..ae257dd9c 100644
--- a/openml/study/functions.py
+++ b/openml/study/functions.py
@@ -30,7 +30,8 @@ def get_suite(suite_id: Union[int, str]) -> OpenMLBenchmarkSuite:
 
 
 def get_study(
-    study_id: Union[int, str], arg_for_backwards_compat: Optional[str] = None,
+    study_id: Union[int, str],
+    arg_for_backwards_compat: Optional[str] = None,
 ) -> OpenMLStudy:  # noqa F401
     """
     Retrieves all relevant information of an OpenML study from the server.
@@ -83,7 +84,8 @@ def _get_study(id_: Union[int, str], entity_type) -> BaseStudy:
     if entity_type != main_entity_type:
         raise ValueError(
             "Unexpected entity type '{}' reported by the server, expected '{}'".format(
-                main_entity_type, entity_type,
+                main_entity_type,
+                entity_type,
             )
         )
     benchmark_suite = (
@@ -207,7 +209,10 @@ def create_study(
 
 
 def create_benchmark_suite(
-    name: str, description: str, task_ids: List[int], alias: Optional[str] = None,
+    name: str,
+    description: str,
+    task_ids: List[int],
+    alias: Optional[str] = None,
 ) -> OpenMLBenchmarkSuite:
     """
     Creates an OpenML benchmark suite (collection of entity types, where
diff --git a/openml/study/study.py b/openml/study/study.py
index dbbef6e89..0cdc913f9 100644
--- a/openml/study/study.py
+++ b/openml/study/study.py
@@ -99,7 +99,7 @@ def id(self) -> Optional[int]:
         return self.study_id
 
     def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
-        """ Collect all information to display in the __repr__ body. """
+        """Collect all information to display in the __repr__ body."""
         fields = {
             "Name": self.name,
             "Status": self.status,
@@ -138,11 +138,11 @@ def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
         return [(key, fields[key]) for key in order if key in fields]
 
     def _parse_publish_response(self, xml_response: Dict):
-        """ Parse the id from the xml_response and assign it to self. """
+        """Parse the id from the xml_response and assign it to self."""
         self.study_id = int(xml_response["oml:study_upload"]["oml:id"])
 
     def _to_dict(self) -> "OrderedDict[str, OrderedDict]":
-        """ Creates a dictionary representation of self. """
+        """Creates a dictionary representation of self."""
         # some can not be uploaded, e.g., id, creator, creation_date
         simple_props = ["alias", "main_entity_type", "name", "description"]
         # maps from attribute name (which is used as outer tag name) to immer
diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py
index 2c5a56ad7..75731d01f 100644
--- a/openml/tasks/functions.py
+++ b/openml/tasks/functions.py
@@ -354,7 +354,10 @@ def get_task(
     except (ValueError, TypeError):
         raise ValueError("Dataset ID is neither an Integer nor can be cast to an Integer.")
 
-    tid_cache_dir = openml.utils._create_cache_directory_for_id(TASKS_CACHE_DIR_NAME, task_id,)
+    tid_cache_dir = openml.utils._create_cache_directory_for_id(
+        TASKS_CACHE_DIR_NAME,
+        task_id,
+    )
 
     try:
         task = _get_task_description(task_id)
@@ -371,7 +374,8 @@ def get_task(
                 task.download_split()
     except Exception as e:
         openml.utils._remove_cache_dir_for_id(
-            TASKS_CACHE_DIR_NAME, tid_cache_dir,
+            TASKS_CACHE_DIR_NAME,
+            tid_cache_dir,
         )
         raise e
 
@@ -384,7 +388,11 @@ def _get_task_description(task_id):
         return _get_cached_task(task_id)
     except OpenMLCacheException:
         xml_file = os.path.join(
-            openml.utils._create_cache_directory_for_id(TASKS_CACHE_DIR_NAME, task_id,), "task.xml",
+            openml.utils._create_cache_directory_for_id(
+                TASKS_CACHE_DIR_NAME,
+                task_id,
+            ),
+            "task.xml",
         )
         task_xml = openml._api_calls._perform_api_call("task/%d" % task_id, "get")
 
diff --git a/openml/tasks/split.py b/openml/tasks/split.py
index 515be895a..e5fafedc5 100644
--- a/openml/tasks/split.py
+++ b/openml/tasks/split.py
@@ -14,11 +14,11 @@
 class OpenMLSplit(object):
     """OpenML Split object.
 
-       Parameters
-       ----------
-       name : int or str
-       description : str
-       split : dict
+    Parameters
+    ----------
+    name : int or str
+    description : str
+    split : dict
     """
 
     def __init__(self, name, description, split):
diff --git a/openml/tasks/task.py b/openml/tasks/task.py
index 6a1f2a4c5..095730645 100644
--- a/openml/tasks/task.py
+++ b/openml/tasks/task.py
@@ -34,16 +34,16 @@ class TaskType(Enum):
 class OpenMLTask(OpenMLBase):
     """OpenML Task object.
 
-       Parameters
-       ----------
-       task_type_id : TaskType
-           Refers to the type of task.
-       task_type : str
-           Refers to the task.
-       data_set_id: int
-           Refers to the data.
-       estimation_procedure_id: int
-           Refers to the type of estimates used.
+    Parameters
+    ----------
+    task_type_id : TaskType
+        Refers to the type of task.
+    task_type : str
+        Refers to the task.
+    data_set_id: int
+        Refers to the data.
+    estimation_procedure_id: int
+        Refers to the type of estimates used.
     """
 
     def __init__(
@@ -82,7 +82,7 @@ def id(self) -> Optional[int]:
         return self.task_id
 
     def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
-        """ Collect all information to display in the __repr__ body. """
+        """Collect all information to display in the __repr__ body."""
         fields = {
             "Task Type Description": "{}/tt/{}".format(
                 openml.config.get_server_base_url(), self.task_type_id
@@ -120,14 +120,21 @@ def get_dataset(self) -> datasets.OpenMLDataset:
         return datasets.get_dataset(self.dataset_id)
 
     def get_train_test_split_indices(
-        self, fold: int = 0, repeat: int = 0, sample: int = 0,
+        self,
+        fold: int = 0,
+        repeat: int = 0,
+        sample: int = 0,
     ) -> Tuple[np.ndarray, np.ndarray]:
 
         # Replace with retrieve from cache
         if self.split is None:
             self.split = self.download_split()
 
-        train_indices, test_indices = self.split.get(repeat=repeat, fold=fold, sample=sample,)
+        train_indices, test_indices = self.split.get(
+            repeat=repeat,
+            fold=fold,
+            sample=sample,
+        )
         return train_indices, test_indices
 
     def _download_split(self, cache_file: str):
@@ -137,14 +144,15 @@ def _download_split(self, cache_file: str):
         except (OSError, IOError):
             split_url = self.estimation_procedure["data_splits_url"]
             openml._api_calls._download_text_file(
-                source=str(split_url), output_path=cache_file,
+                source=str(split_url),
+                output_path=cache_file,
             )
 
     def download_split(self) -> OpenMLSplit:
-        """Download the OpenML split for a given task.
-        """
+        """Download the OpenML split for a given task."""
         cached_split_file = os.path.join(
-            _create_cache_directory_for_id("tasks", self.task_id), "datasplits.arff",
+            _create_cache_directory_for_id("tasks", self.task_id),
+            "datasplits.arff",
         )
 
         try:
@@ -164,11 +172,11 @@ def get_split_dimensions(self) -> Tuple[int, int, int]:
         return self.split.repeats, self.split.folds, self.split.samples
 
     def _to_dict(self) -> "OrderedDict[str, OrderedDict]":
-        """ Creates a dictionary representation of self. """
+        """Creates a dictionary representation of self."""
         task_container = OrderedDict()  # type: OrderedDict[str, OrderedDict]
         task_dict = OrderedDict(
             [("@xmlns:oml", "http://openml.org/openml")]
-        )  # type: OrderedDict[str, Union[List, str, TaskType]]
+        )  # type: OrderedDict[str, Union[List, str, int]]
 
         task_container["oml:task_inputs"] = task_dict
         task_dict["oml:task_type_id"] = self.task_type_id.value
@@ -192,17 +200,17 @@ def _to_dict(self) -> "OrderedDict[str, OrderedDict]":
         return task_container
 
     def _parse_publish_response(self, xml_response: Dict):
-        """ Parse the id from the xml_response and assign it to self. """
+        """Parse the id from the xml_response and assign it to self."""
         self.task_id = int(xml_response["oml:upload_task"]["oml:id"])
 
 
 class OpenMLSupervisedTask(OpenMLTask, ABC):
     """OpenML Supervised Classification object.
 
-       Parameters
-       ----------
-       target_name : str
-           Name of the target feature (the class variable).
+    Parameters
+    ----------
+    target_name : str
+        Name of the target feature (the class variable).
     """
 
     def __init__(
@@ -233,7 +241,8 @@ def __init__(
         self.target_name = target_name
 
     def get_X_and_y(
-        self, dataset_format: str = "array",
+        self,
+        dataset_format: str = "array",
     ) -> Tuple[
         Union[np.ndarray, pd.DataFrame, scipy.sparse.spmatrix], Union[np.ndarray, pd.Series]
     ]:
@@ -257,7 +266,10 @@ def get_X_and_y(
             TaskType.LEARNING_CURVE,
         ):
             raise NotImplementedError(self.task_type)
-        X, y, _, _ = dataset.get_data(dataset_format=dataset_format, target=self.target_name,)
+        X, y, _, _ = dataset.get_data(
+            dataset_format=dataset_format,
+            target=self.target_name,
+        )
         return X, y
 
     def _to_dict(self) -> "OrderedDict[str, OrderedDict]":
@@ -291,10 +303,10 @@ def estimation_parameters(self, est_parameters):
 class OpenMLClassificationTask(OpenMLSupervisedTask):
     """OpenML Classification object.
 
-       Parameters
-       ----------
-       class_labels : List of str (optional)
-       cost_matrix: array (optional)
+    Parameters
+    ----------
+    class_labels : List of str (optional)
+    cost_matrix: array (optional)
     """
 
     def __init__(
@@ -333,8 +345,7 @@ def __init__(
 
 
 class OpenMLRegressionTask(OpenMLSupervisedTask):
-    """OpenML Regression object.
-    """
+    """OpenML Regression object."""
 
     def __init__(
         self,
@@ -366,11 +377,11 @@ def __init__(
 class OpenMLClusteringTask(OpenMLTask):
     """OpenML Clustering object.
 
-       Parameters
-       ----------
-       target_name : str (optional)
-           Name of the target feature (class) that is not part of the
-           feature set for the clustering task.
+    Parameters
+    ----------
+    target_name : str (optional)
+        Name of the target feature (class) that is not part of the
+        feature set for the clustering task.
     """
 
     def __init__(
@@ -401,7 +412,8 @@ def __init__(
         self.target_name = target_name
 
     def get_X(
-        self, dataset_format: str = "array",
+        self,
+        dataset_format: str = "array",
     ) -> Union[np.ndarray, pd.DataFrame, scipy.sparse.spmatrix]:
         """Get data associated with the current task.
 
@@ -417,7 +429,10 @@ def get_X(
 
         """
         dataset = self.get_dataset()
-        data, *_ = dataset.get_data(dataset_format=dataset_format, target=None,)
+        data, *_ = dataset.get_data(
+            dataset_format=dataset_format,
+            target=None,
+        )
         return data
 
     def _to_dict(self) -> "OrderedDict[str, OrderedDict]":
@@ -442,8 +457,7 @@ def _to_dict(self) -> "OrderedDict[str, OrderedDict]":
 
 
 class OpenMLLearningCurveTask(OpenMLClassificationTask):
-    """OpenML Learning Curve object.
-    """
+    """OpenML Learning Curve object."""
 
     def __init__(
         self,
diff --git a/openml/testing.py b/openml/testing.py
index 922d373b2..56445a253 100644
--- a/openml/testing.py
+++ b/openml/testing.py
@@ -114,7 +114,7 @@ def tearDown(self):
 
     @classmethod
     def _mark_entity_for_removal(self, entity_type, entity_id):
-        """ Static record of entities uploaded to test server
+        """Static record of entities uploaded to test server
 
         Dictionary of lists where the keys are 'entity_type'.
         Each such dictionary is a list of integer IDs.
@@ -128,7 +128,7 @@ def _mark_entity_for_removal(self, entity_type, entity_id):
 
     @classmethod
     def _delete_entity_from_tracker(self, entity_type, entity):
-        """ Deletes entity records from the static file_tracker
+        """Deletes entity records from the static file_tracker
 
         Given an entity type and corresponding ID, deletes all entries, including
         duplicate entries of the ID for the entity type.
diff --git a/openml/utils.py b/openml/utils.py
index a482bf0bc..8ab238463 100644
--- a/openml/utils.py
+++ b/openml/utils.py
@@ -71,7 +71,7 @@ def extract_xml_tags(xml_tag_name, node, allow_none=True):
 
 
 def _get_rest_api_type_alias(oml_object: "OpenMLBase") -> str:
-    """ Return the alias of the openml entity as it is defined for the REST API. """
+    """Return the alias of the openml entity as it is defined for the REST API."""
     rest_api_mapping = [
         (openml.datasets.OpenMLDataset, "data"),
         (openml.flows.OpenMLFlow, "flow"),
diff --git a/setup.py b/setup.py
index f5e70abb5..9f3cdd0e6 100644
--- a/setup.py
+++ b/setup.py
@@ -40,7 +40,8 @@
     # Make sure to remove stale files such as the egg-info before updating this:
     # https://stackoverflow.com/a/26547314
     packages=setuptools.find_packages(
-        include=["openml.*", "openml"], exclude=["*.tests", "*.tests.*", "tests.*", "tests"],
+        include=["openml.*", "openml"],
+        exclude=["*.tests", "*.tests.*", "tests.*", "tests"],
     ),
     package_data={"": ["*.txt", "*.md", "py.typed"]},
     python_requires=">=3.6",
@@ -84,7 +85,12 @@
             "seaborn",
         ],
         "examples_unix": ["fanova"],
-        "docs": ["sphinx>=3", "sphinx-gallery", "sphinx_bootstrap_theme", "numpydoc",],
+        "docs": [
+            "sphinx>=3",
+            "sphinx-gallery",
+            "sphinx_bootstrap_theme",
+            "numpydoc",
+        ],
     },
     test_suite="pytest",
     classifiers=[
diff --git a/tests/conftest.py b/tests/conftest.py
index c1f728a72..cf3f33834 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -38,7 +38,7 @@
 
 
 def worker_id() -> str:
-    """ Returns the name of the worker process owning this function call.
+    """Returns the name of the worker process owning this function call.
 
     :return: str
         Possible outputs from the set of {'master', 'gw0', 'gw1', ..., 'gw(n-1)'}
diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py
index 9d67ee177..878b2288a 100644
--- a/tests/test_datasets/test_dataset_functions.py
+++ b/tests/test_datasets/test_dataset_functions.py
@@ -58,7 +58,8 @@ def _remove_pickle_files(self):
         self.lock_path = os.path.join(openml.config.get_cache_directory(), "locks")
         for did in ["-1", "2"]:
             with lockutils.external_lock(
-                name="datasets.functions.get_dataset:%s" % did, lock_path=self.lock_path,
+                name="datasets.functions.get_dataset:%s" % did,
+                lock_path=self.lock_path,
             ):
                 pickle_path = os.path.join(
                     openml.config.get_cache_directory(), "datasets", did, "dataset.pkl.py3"
@@ -175,7 +176,10 @@ def test_list_datasets_empty(self):
     def test_check_datasets_active(self):
         # Have to test on live because there is no deactivated dataset on the test server.
         openml.config.server = self.production_server
-        active = openml.datasets.check_datasets_active([2, 17, 79], raise_error_if_not_exist=False,)
+        active = openml.datasets.check_datasets_active(
+            [2, 17, 79],
+            raise_error_if_not_exist=False,
+        )
         self.assertTrue(active[2])
         self.assertFalse(active[17])
         self.assertIsNone(active.get(79))
@@ -188,7 +192,7 @@ def test_check_datasets_active(self):
         openml.config.server = self.test_server
 
     def _datasets_retrieved_successfully(self, dids, metadata_only=True):
-        """ Checks that all files for the given dids have been downloaded.
+        """Checks that all files for the given dids have been downloaded.
 
         This includes:
             - description
@@ -229,24 +233,24 @@ def _datasets_retrieved_successfully(self, dids, metadata_only=True):
             )
 
     def test__name_to_id_with_deactivated(self):
-        """ Check that an activated dataset is returned if an earlier deactivated one exists. """
+        """Check that an activated dataset is returned if an earlier deactivated one exists."""
         openml.config.server = self.production_server
         # /d/1 was deactivated
         self.assertEqual(openml.datasets.functions._name_to_id("anneal"), 2)
         openml.config.server = self.test_server
 
     def test__name_to_id_with_multiple_active(self):
-        """ With multiple active datasets, retrieve the least recent active. """
+        """With multiple active datasets, retrieve the least recent active."""
         openml.config.server = self.production_server
         self.assertEqual(openml.datasets.functions._name_to_id("iris"), 61)
 
     def test__name_to_id_with_version(self):
-        """ With multiple active datasets, retrieve the least recent active. """
+        """With multiple active datasets, retrieve the least recent active."""
         openml.config.server = self.production_server
         self.assertEqual(openml.datasets.functions._name_to_id("iris", version=3), 969)
 
     def test__name_to_id_with_multiple_active_error(self):
-        """ With multiple active datasets, retrieve the least recent active. """
+        """With multiple active datasets, retrieve the least recent active."""
         openml.config.server = self.production_server
         self.assertRaisesRegex(
             ValueError,
@@ -257,7 +261,7 @@ def test__name_to_id_with_multiple_active_error(self):
         )
 
     def test__name_to_id_name_does_not_exist(self):
-        """ With multiple active datasets, retrieve the least recent active. """
+        """With multiple active datasets, retrieve the least recent active."""
         self.assertRaisesRegex(
             RuntimeError,
             "No active datasets exist with name does_not_exist",
@@ -266,7 +270,7 @@ def test__name_to_id_name_does_not_exist(self):
         )
 
     def test__name_to_id_version_does_not_exist(self):
-        """ With multiple active datasets, retrieve the least recent active. """
+        """With multiple active datasets, retrieve the least recent active."""
         self.assertRaisesRegex(
             RuntimeError,
             "No active datasets exist with name iris and version 100000",
@@ -356,7 +360,7 @@ def test_get_dataset_lazy(self):
         self.assertRaises(OpenMLPrivateDatasetError, openml.datasets.get_dataset, 45, False)
 
     def test_get_dataset_lazy_all_functions(self):
-        """ Test that all expected functionality is available without downloading the dataset. """
+        """Test that all expected functionality is available without downloading the dataset."""
         dataset = openml.datasets.get_dataset(1, download_data=False)
         # We only tests functions as general integrity is tested by test_get_dataset_lazy
 
@@ -537,10 +541,14 @@ def test__get_dataset_skip_download(self):
 
     def test_deletion_of_cache_dir(self):
         # Simple removal
-        did_cache_dir = _create_cache_directory_for_id(DATASETS_CACHE_DIR_NAME, 1,)
+        did_cache_dir = _create_cache_directory_for_id(
+            DATASETS_CACHE_DIR_NAME,
+            1,
+        )
         self.assertTrue(os.path.exists(did_cache_dir))
         openml.utils._remove_cache_dir_for_id(
-            DATASETS_CACHE_DIR_NAME, did_cache_dir,
+            DATASETS_CACHE_DIR_NAME,
+            did_cache_dir,
         )
         self.assertFalse(os.path.exists(did_cache_dir))
 
@@ -1526,7 +1534,10 @@ def test_data_fork(self):
         self.assertNotEqual(did, result)
         # Check server exception when unknown dataset is provided
         self.assertRaisesRegex(
-            OpenMLServerException, "Unknown dataset", fork_dataset, data_id=999999,
+            OpenMLServerException,
+            "Unknown dataset",
+            fork_dataset,
+            data_id=999999,
         )
 
     def test_get_dataset_parquet(self):
diff --git a/tests/test_extensions/test_functions.py b/tests/test_extensions/test_functions.py
index 85361cc02..791e815e1 100644
--- a/tests/test_extensions/test_functions.py
+++ b/tests/test_extensions/test_functions.py
@@ -73,7 +73,8 @@ def test_get_extension_by_flow(self):
         self.assertIsInstance(get_extension_by_flow(DummyFlow()), DummyExtension1)
         register_extension(DummyExtension1)
         with self.assertRaisesRegex(
-            ValueError, "Multiple extensions registered which can handle flow:",
+            ValueError,
+            "Multiple extensions registered which can handle flow:",
         ):
             get_extension_by_flow(DummyFlow())
 
@@ -87,6 +88,7 @@ def test_get_extension_by_model(self):
         self.assertIsInstance(get_extension_by_model(DummyModel()), DummyExtension1)
         register_extension(DummyExtension1)
         with self.assertRaisesRegex(
-            ValueError, "Multiple extensions registered which can handle model:",
+            ValueError,
+            "Multiple extensions registered which can handle model:",
         ):
             get_extension_by_model(DummyModel())
diff --git a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
index e45eeea53..a906d7ebd 100644
--- a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
+++ b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
@@ -736,10 +736,18 @@ def test_serialize_feature_union_switched_names(self):
         fu2 = sklearn.pipeline.FeatureUnion(transformer_list=[("scaler", ohe), ("ohe", scaler)])
 
         fu1_serialization, _ = self._serialization_test_helper(
-            fu1, X=None, y=None, subcomponent_parameters=(), dependencies_mock_call_count=(3, 6),
+            fu1,
+            X=None,
+            y=None,
+            subcomponent_parameters=(),
+            dependencies_mock_call_count=(3, 6),
         )
         fu2_serialization, _ = self._serialization_test_helper(
-            fu2, X=None, y=None, subcomponent_parameters=(), dependencies_mock_call_count=(3, 6),
+            fu2,
+            X=None,
+            y=None,
+            subcomponent_parameters=(),
+            dependencies_mock_call_count=(3, 6),
         )
 
         # OneHotEncoder was moved to _encoders module in 0.20
@@ -1104,7 +1112,8 @@ def test_serialize_advanced_grid_fails(self):
         }
 
         clf = sklearn.model_selection.GridSearchCV(
-            sklearn.ensemble.BaggingClassifier(), param_grid=param_grid,
+            sklearn.ensemble.BaggingClassifier(),
+            param_grid=param_grid,
         )
         with self.assertRaisesRegex(
             TypeError, re.compile(r".*OpenML.*Flow.*is not JSON serializable", flags=re.DOTALL)
@@ -1513,7 +1522,9 @@ def test_obtain_parameter_values_flow_not_from_server(self):
             self.extension.obtain_parameter_values(flow)
 
         model = sklearn.ensemble.AdaBoostClassifier(
-            base_estimator=sklearn.linear_model.LogisticRegression(solver="lbfgs",)
+            base_estimator=sklearn.linear_model.LogisticRegression(
+                solver="lbfgs",
+            )
         )
         flow = self.extension.model_to_flow(model)
         flow.flow_id = 1
@@ -1546,14 +1557,14 @@ def test_obtain_parameter_values(self):
                 self.assertEqual(parameter["oml:component"], 2)
 
     def test_numpy_type_allowed_in_flow(self):
-        """ Simple numpy types should be serializable. """
+        """Simple numpy types should be serializable."""
         dt = sklearn.tree.DecisionTreeClassifier(
             max_depth=np.float64(3.0), min_samples_leaf=np.int32(5)
         )
         self.extension.model_to_flow(dt)
 
     def test_numpy_array_not_allowed_in_flow(self):
-        """ Simple numpy arrays should not be serializable. """
+        """Simple numpy arrays should not be serializable."""
         bin = sklearn.preprocessing.MultiLabelBinarizer(classes=np.asarray([1, 2, 3]))
         with self.assertRaises(TypeError):
             self.extension.model_to_flow(bin)
@@ -1772,7 +1783,8 @@ def test_run_model_on_fold_classification_2(self):
         y_test = y[test_indices]
 
         pipeline = sklearn.model_selection.GridSearchCV(
-            sklearn.tree.DecisionTreeClassifier(), {"max_depth": [1, 2]},
+            sklearn.tree.DecisionTreeClassifier(),
+            {"max_depth": [1, 2]},
         )
         # TODO add some mocking here to actually test the innards of this function, too!
         res = self.extension._run_model_on_fold(
@@ -1947,7 +1959,11 @@ def test_run_model_on_fold_clustering(self):
         )
         # TODO add some mocking here to actually test the innards of this function, too!
         res = self.extension._run_model_on_fold(
-            model=pipeline, task=task, fold_no=0, rep_no=0, X_train=X,
+            model=pipeline,
+            task=task,
+            fold_no=0,
+            rep_no=0,
+            X_train=X,
         )
 
         y_hat, y_hat_proba, user_defined_measures, trace = res
@@ -1984,7 +2000,9 @@ def test__extract_trace_data(self):
         num_iters = 10
         task = openml.tasks.get_task(20)  # balance-scale; crossvalidation
         clf = sklearn.model_selection.RandomizedSearchCV(
-            sklearn.neural_network.MLPClassifier(), param_grid, num_iters,
+            sklearn.neural_network.MLPClassifier(),
+            param_grid,
+            num_iters,
         )
         # just run the task on the model (without invoking any fancy extension & openml code)
         train, _ = task.get_train_test_split_indices(0, 0)
@@ -2149,7 +2167,8 @@ def test_run_on_model_with_empty_steps(self):
         self.assertEqual(flow.components["prep"].class_name, "sklearn.pipeline.Pipeline")
         self.assertIsInstance(flow.components["prep"].components["columntransformer"], OpenMLFlow)
         self.assertIsInstance(
-            flow.components["prep"].components["columntransformer"].components["cat"], OpenMLFlow,
+            flow.components["prep"].components["columntransformer"].components["cat"],
+            OpenMLFlow,
         )
         self.assertEqual(
             flow.components["prep"].components["columntransformer"].components["cat"].name, "drop"
@@ -2189,8 +2208,7 @@ def test_sklearn_serialization_with_none_step(self):
         reason="columntransformer introduction in 0.20.0",
     )
     def test_failed_serialization_of_custom_class(self):
-        """Test to check if any custom class inherited from sklearn expectedly fails serialization
-        """
+        """Check if any custom class inherited from sklearn expectedly fails serialization"""
         try:
             from sklearn.impute import SimpleImputer
         except ImportError:
diff --git a/tests/test_flows/test_flow.py b/tests/test_flows/test_flow.py
index 8d08f4eaf..50d152192 100644
--- a/tests/test_flows/test_flow.py
+++ b/tests/test_flows/test_flow.py
@@ -176,7 +176,8 @@ def test_publish_flow(self):
             parameters=collections.OrderedDict(),
             parameters_meta_info=collections.OrderedDict(),
             external_version=self.extension._format_external_version(
-                "sklearn", sklearn.__version__,
+                "sklearn",
+                sklearn.__version__,
             ),
             tags=[],
             language="English",
@@ -368,7 +369,10 @@ def test_existing_flow_exists(self):
         steps = [
             ("imputation", SimpleImputer(strategy="median")),
             ("hotencoding", sklearn.preprocessing.OneHotEncoder(**ohe_params)),
-            ("variencethreshold", sklearn.feature_selection.VarianceThreshold(),),
+            (
+                "variencethreshold",
+                sklearn.feature_selection.VarianceThreshold(),
+            ),
             ("classifier", sklearn.tree.DecisionTreeClassifier()),
         ]
         complicated = sklearn.pipeline.Pipeline(steps=steps)
@@ -387,7 +391,10 @@ def test_existing_flow_exists(self):
 
             # check if flow exists can find it
             flow = openml.flows.get_flow(flow.flow_id)
-            downloaded_flow_id = openml.flows.flow_exists(flow.name, flow.external_version,)
+            downloaded_flow_id = openml.flows.flow_exists(
+                flow.name,
+                flow.external_version,
+            )
             self.assertEqual(downloaded_flow_id, flow.flow_id)
 
     def test_sklearn_to_upload_to_flow(self):
diff --git a/tests/test_flows/test_flow_functions.py b/tests/test_flows/test_flow_functions.py
index a65dcbf70..eb80c2861 100644
--- a/tests/test_flows/test_flow_functions.py
+++ b/tests/test_flows/test_flow_functions.py
@@ -112,10 +112,14 @@ def test_are_flows_equal(self):
             new_flow = copy.deepcopy(flow)
             setattr(new_flow, attribute, new_value)
             self.assertNotEqual(
-                getattr(flow, attribute), getattr(new_flow, attribute),
+                getattr(flow, attribute),
+                getattr(new_flow, attribute),
             )
             self.assertRaises(
-                ValueError, openml.flows.functions.assert_flows_equal, flow, new_flow,
+                ValueError,
+                openml.flows.functions.assert_flows_equal,
+                flow,
+                new_flow,
             )
 
         # Test that the API ignores several keys when comparing flows
@@ -134,7 +138,8 @@ def test_are_flows_equal(self):
             new_flow = copy.deepcopy(flow)
             setattr(new_flow, attribute, new_value)
             self.assertNotEqual(
-                getattr(flow, attribute), getattr(new_flow, attribute),
+                getattr(flow, attribute),
+                getattr(new_flow, attribute),
             )
             openml.flows.functions.assert_flows_equal(flow, new_flow)
 
@@ -370,7 +375,8 @@ def test_get_flow_id(self):
                 name=flow.name, exact_version=True
             )
             flow_ids_exact_version_False = openml.flows.get_flow_id(
-                name=flow.name, exact_version=False,
+                name=flow.name,
+                exact_version=False,
             )
             self.assertEqual(flow_ids_exact_version_True, flow_ids_exact_version_False)
             self.assertIn(flow.flow_id, flow_ids_exact_version_True)
diff --git a/tests/test_openml/test_api_calls.py b/tests/test_openml/test_api_calls.py
index 16bdbc7df..ecc7111fa 100644
--- a/tests/test_openml/test_api_calls.py
+++ b/tests/test_openml/test_api_calls.py
@@ -7,7 +7,8 @@
 class TestConfig(openml.testing.TestBase):
     def test_too_long_uri(self):
         with self.assertRaisesRegex(
-            openml.exceptions.OpenMLServerError, "URI too long!",
+            openml.exceptions.OpenMLServerError,
+            "URI too long!",
         ):
             openml.datasets.list_datasets(data_id=list(range(10000)))
 
diff --git a/tests/test_openml/test_config.py b/tests/test_openml/test_config.py
index 638f02420..ba70689a1 100644
--- a/tests/test_openml/test_config.py
+++ b/tests/test_openml/test_config.py
@@ -37,7 +37,7 @@ def side_effect(path_):
             openml.config._setup()
 
     def test_get_config_as_dict(self):
-        """ Checks if the current configuration is returned accurately as a dict. """
+        """Checks if the current configuration is returned accurately as a dict."""
         config = openml.config.get_config_as_dict()
         _config = dict()
         _config["apikey"] = "610344db6388d9ba34f6db45a3cf71de"
@@ -51,7 +51,7 @@ def test_get_config_as_dict(self):
         self.assertDictEqual(config, _config)
 
     def test_setup_with_config(self):
-        """ Checks if the OpenML configuration can be updated using _setup(). """
+        """Checks if the OpenML configuration can be updated using _setup()."""
         _config = dict()
         _config["apikey"] = "610344db6388d9ba34f6db45a3cf71de"
         _config["server"] = "https://www.openml.org/api/v1/xml"
@@ -68,7 +68,7 @@ def test_setup_with_config(self):
 
 class TestConfigurationForExamples(openml.testing.TestBase):
     def test_switch_to_example_configuration(self):
-        """ Verifies the test configuration is loaded properly. """
+        """Verifies the test configuration is loaded properly."""
         # Below is the default test key which would be used anyway, but just for clarity:
         openml.config.apikey = "610344db6388d9ba34f6db45a3cf71de"
         openml.config.server = self.production_server
@@ -79,7 +79,7 @@ def test_switch_to_example_configuration(self):
         self.assertEqual(openml.config.server, self.test_server)
 
     def test_switch_from_example_configuration(self):
-        """ Verifies the previous configuration is loaded after stopping. """
+        """Verifies the previous configuration is loaded after stopping."""
         # Below is the default test key which would be used anyway, but just for clarity:
         openml.config.apikey = "610344db6388d9ba34f6db45a3cf71de"
         openml.config.server = self.production_server
@@ -91,14 +91,14 @@ def test_switch_from_example_configuration(self):
         self.assertEqual(openml.config.server, self.production_server)
 
     def test_example_configuration_stop_before_start(self):
-        """ Verifies an error is raised is `stop_...` is called before `start_...`. """
+        """Verifies an error is raised is `stop_...` is called before `start_...`."""
         error_regex = ".*stop_use_example_configuration.*start_use_example_configuration.*first"
         self.assertRaisesRegex(
             RuntimeError, error_regex, openml.config.stop_using_configuration_for_example
         )
 
     def test_example_configuration_start_twice(self):
-        """ Checks that the original config can be returned to if `start..` is called twice. """
+        """Checks that the original config can be returned to if `start..` is called twice."""
         openml.config.apikey = "610344db6388d9ba34f6db45a3cf71de"
         openml.config.server = self.production_server
 
diff --git a/tests/test_openml/test_openml.py b/tests/test_openml/test_openml.py
index 80f5e67f0..93d2e6925 100644
--- a/tests/test_openml/test_openml.py
+++ b/tests/test_openml/test_openml.py
@@ -15,7 +15,11 @@ class TestInit(TestBase):
     @mock.patch("openml.flows.functions.get_flow")
     @mock.patch("openml.runs.functions.get_run")
     def test_populate_cache(
-        self, run_mock, flow_mock, dataset_mock, task_mock,
+        self,
+        run_mock,
+        flow_mock,
+        dataset_mock,
+        task_mock,
     ):
         openml.populate_cache(task_ids=[1, 2], dataset_ids=[3, 4], flow_ids=[5, 6], run_ids=[7, 8])
         self.assertEqual(run_mock.call_count, 2)
@@ -27,7 +31,10 @@ def test_populate_cache(
             self.assertEqual(argument[0], fixture)
 
         self.assertEqual(dataset_mock.call_count, 2)
-        for argument, fixture in zip(dataset_mock.call_args_list, [(3,), (4,)],):
+        for argument, fixture in zip(
+            dataset_mock.call_args_list,
+            [(3,), (4,)],
+        ):
             self.assertEqual(argument[0], fixture)
 
         self.assertEqual(task_mock.call_count, 2)
diff --git a/tests/test_runs/test_run.py b/tests/test_runs/test_run.py
index dd0da5c00..88c998bc3 100644
--- a/tests/test_runs/test_run.py
+++ b/tests/test_runs/test_run.py
@@ -79,8 +79,14 @@ def _check_array(array, type_):
             int_part_prime = [line[:3] for line in run_prime_trace_content]
             _check_array(int_part_prime, int)
 
-            float_part = np.array(np.array(run_trace_content)[:, 3:4], dtype=float,)
-            float_part_prime = np.array(np.array(run_prime_trace_content)[:, 3:4], dtype=float,)
+            float_part = np.array(
+                np.array(run_trace_content)[:, 3:4],
+                dtype=float,
+            )
+            float_part_prime = np.array(
+                np.array(run_prime_trace_content)[:, 3:4],
+                dtype=float,
+            )
             bool_part = [line[4] for line in run_trace_content]
             bool_part_prime = [line[4] for line in run_prime_trace_content]
             for bp, bpp in zip(bool_part, bool_part_prime):
@@ -113,7 +119,11 @@ def test_to_from_filesystem_vanilla(self):
             upload_flow=True,
         )
 
-        cache_path = os.path.join(self.workdir, "runs", str(random.getrandbits(128)),)
+        cache_path = os.path.join(
+            self.workdir,
+            "runs",
+            str(random.getrandbits(128)),
+        )
         run.to_filesystem(cache_path)
 
         run_prime = openml.runs.OpenMLRun.from_filesystem(cache_path)
@@ -146,7 +156,10 @@ def test_to_from_filesystem_search(self):
 
         task = openml.tasks.get_task(119)  # diabetes; crossvalidation
         run = openml.runs.run_model_on_task(
-            model=model, task=task, add_local_measures=False, avoid_duplicate_runs=False,
+            model=model,
+            task=task,
+            add_local_measures=False,
+            avoid_duplicate_runs=False,
         )
 
         cache_path = os.path.join(self.workdir, "runs", str(random.getrandbits(128)))
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index 8eafb0a7b..7a860dab3 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -143,7 +143,9 @@ def _compare_predictions(self, predictions, predictions_prime):
                 val_2 = predictions_prime["data"][idx][col_idx]
                 if type(val_1) == float or type(val_2) == float:
                     self.assertAlmostEqual(
-                        float(val_1), float(val_2), places=6,
+                        float(val_1),
+                        float(val_2),
+                        places=6,
                     )
                 else:
                     self.assertEqual(val_1, val_2)
@@ -165,11 +167,17 @@ def _rerun_model_and_compare_predictions(self, run_id, model_prime, seed, create
         if create_task_obj:
             task = openml.tasks.get_task(run.task_id)
             run_prime = openml.runs.run_model_on_task(
-                model=model_prime, task=task, avoid_duplicate_runs=False, seed=seed,
+                model=model_prime,
+                task=task,
+                avoid_duplicate_runs=False,
+                seed=seed,
             )
         else:
             run_prime = openml.runs.run_model_on_task(
-                model=model_prime, task=run.task_id, avoid_duplicate_runs=False, seed=seed,
+                model=model_prime,
+                task=run.task_id,
+                avoid_duplicate_runs=False,
+                seed=seed,
             )
 
         predictions_prime = run_prime._generate_arff_dict()
@@ -277,7 +285,9 @@ def _remove_random_state(flow):
             # test the initialize setup function
             run_id = run_.run_id
             run_server = openml.runs.get_run(run_id)
-            clf_server = openml.setups.initialize_model(setup_id=run_server.setup_id,)
+            clf_server = openml.setups.initialize_model(
+                setup_id=run_server.setup_id,
+            )
             flow_local = self.extension.model_to_flow(clf)
             flow_server = self.extension.model_to_flow(clf_server)
 
@@ -299,7 +309,9 @@ def _remove_random_state(flow):
             openml.flows.assert_flows_equal(flow_local, flow_server)
 
             # and test the initialize setup from run function
-            clf_server2 = openml.runs.initialize_model_from_run(run_id=run_server.run_id,)
+            clf_server2 = openml.runs.initialize_model_from_run(
+                run_id=run_server.run_id,
+            )
             flow_server2 = self.extension.model_to_flow(clf_server2)
             if flow.class_name not in classes_without_random_state:
                 self.assertEqual(flow_server2.parameters["random_state"], flow_expected_rsv)
@@ -382,7 +394,10 @@ def test_run_regression_on_classif_task(self):
             AttributeError, "'LinearRegression' object has no attribute 'classes_'"
         ):
             openml.runs.run_model_on_task(
-                model=clf, task=task, avoid_duplicate_runs=False, dataset_format="array",
+                model=clf,
+                task=task,
+                avoid_duplicate_runs=False,
+                dataset_format="array",
             )
 
     def test_check_erronous_sklearn_flow_fails(self):
@@ -396,7 +411,8 @@ def test_check_erronous_sklearn_flow_fails(self):
             r"Penalty term must be positive; got \(C=u?'abc'\)",  # u? for 2.7/3.4-6 compability
         ):
             openml.runs.run_model_on_task(
-                task=task, model=clf,
+                task=task,
+                model=clf,
             )
 
     ###########################################################################
@@ -474,7 +490,9 @@ def determine_grid_size(param_grid):
             self._wait_for_processed_run(run.run_id, 600)
             try:
                 model_prime = openml.runs.initialize_model_from_trace(
-                    run_id=run.run_id, repeat=0, fold=0,
+                    run_id=run.run_id,
+                    repeat=0,
+                    fold=0,
                 )
             except openml.exceptions.OpenMLServerException as e:
                 e.message = "%s; run_id %d" % (e.message, run.run_id)
@@ -815,8 +833,8 @@ def test_learning_curve_task_2(self):
                     RandomizedSearchCV(
                         DecisionTreeClassifier(),
                         {
-                            "min_samples_split": [2 ** x for x in range(1, 8)],
-                            "min_samples_leaf": [2 ** x for x in range(0, 7)],
+                            "min_samples_split": [2**x for x in range(1, 8)],
+                            "min_samples_leaf": [2**x for x in range(0, 7)],
                         },
                         cv=3,
                         n_iter=10,
@@ -858,7 +876,10 @@ def test_initialize_cv_from_run(self):
 
         task = openml.tasks.get_task(11)  # kr-vs-kp; holdout
         run = openml.runs.run_model_on_task(
-            model=randomsearch, task=task, avoid_duplicate_runs=False, seed=1,
+            model=randomsearch,
+            task=task,
+            avoid_duplicate_runs=False,
+            seed=1,
         )
         run_ = run.publish()
         TestBase._mark_entity_for_removal("run", run.run_id)
@@ -896,7 +917,10 @@ def _test_local_evaluations(self, run):
         else:
             tests.append((sklearn.metrics.jaccard_score, {}))
         for test_idx, test in enumerate(tests):
-            alt_scores = run.get_metric_fn(sklearn_fn=test[0], kwargs=test[1],)
+            alt_scores = run.get_metric_fn(
+                sklearn_fn=test[0],
+                kwargs=test[1],
+            )
             self.assertEqual(len(alt_scores), 10)
             for idx in range(len(alt_scores)):
                 self.assertGreaterEqual(alt_scores[idx], 0)
@@ -909,7 +933,10 @@ def test_local_run_swapped_parameter_order_model(self):
 
         # task and clf are purposely in the old order
         run = openml.runs.run_model_on_task(
-            task, clf, avoid_duplicate_runs=False, upload_flow=False,
+            task,
+            clf,
+            avoid_duplicate_runs=False,
+            upload_flow=False,
         )
 
         self._test_local_evaluations(run)
@@ -935,7 +962,10 @@ def test_local_run_swapped_parameter_order_flow(self):
 
         # invoke OpenML run
         run = openml.runs.run_flow_on_task(
-            task, flow, avoid_duplicate_runs=False, upload_flow=False,
+            task,
+            flow,
+            avoid_duplicate_runs=False,
+            upload_flow=False,
         )
 
         self._test_local_evaluations(run)
@@ -960,7 +990,10 @@ def test_local_run_metric_score(self):
 
         # invoke OpenML run
         run = openml.runs.run_model_on_task(
-            model=clf, task=task, avoid_duplicate_runs=False, upload_flow=False,
+            model=clf,
+            task=task,
+            avoid_duplicate_runs=False,
+            upload_flow=False,
         )
 
         self._test_local_evaluations(run)
@@ -1013,7 +1046,11 @@ def test_initialize_model_from_run(self):
             TestBase.logger.info("collected from test_run_functions: {}".format(task_id))
 
         task = openml.tasks.get_task(task_id)
-        run = openml.runs.run_model_on_task(model=clf, task=task, avoid_duplicate_runs=False,)
+        run = openml.runs.run_model_on_task(
+            model=clf,
+            task=task,
+            avoid_duplicate_runs=False,
+        )
         run_ = run.publish()
         TestBase._mark_entity_for_removal("run", run_.run_id)
         TestBase.logger.info("collected from test_run_functions: {}".format(run_.run_id))
@@ -1098,7 +1135,9 @@ def test_run_with_illegal_flow_id(self):
         )
         with self.assertRaisesRegex(openml.exceptions.PyOpenMLError, expected_message_regex):
             openml.runs.run_flow_on_task(
-                task=task, flow=flow, avoid_duplicate_runs=True,
+                task=task,
+                flow=flow,
+                avoid_duplicate_runs=True,
             )
 
     def test_run_with_illegal_flow_id_after_load(self):
@@ -1113,7 +1152,11 @@ def test_run_with_illegal_flow_id_after_load(self):
             task=task, flow=flow, avoid_duplicate_runs=False, upload_flow=False
         )
 
-        cache_path = os.path.join(self.workdir, "runs", str(random.getrandbits(128)),)
+        cache_path = os.path.join(
+            self.workdir,
+            "runs",
+            str(random.getrandbits(128)),
+        )
         run.to_filesystem(cache_path)
         loaded_run = openml.runs.OpenMLRun.from_filesystem(cache_path)
 
@@ -1144,7 +1187,9 @@ def test_run_with_illegal_flow_id_1(self):
         expected_message_regex = "Local flow_id does not match server flow_id: " "'-1' vs '[0-9]+'"
         with self.assertRaisesRegex(openml.exceptions.PyOpenMLError, expected_message_regex):
             openml.runs.run_flow_on_task(
-                task=task, flow=flow_new, avoid_duplicate_runs=True,
+                task=task,
+                flow=flow_new,
+                avoid_duplicate_runs=True,
             )
 
     def test_run_with_illegal_flow_id_1_after_load(self):
@@ -1167,7 +1212,11 @@ def test_run_with_illegal_flow_id_1_after_load(self):
             task=task, flow=flow_new, avoid_duplicate_runs=False, upload_flow=False
         )
 
-        cache_path = os.path.join(self.workdir, "runs", str(random.getrandbits(128)),)
+        cache_path = os.path.join(
+            self.workdir,
+            "runs",
+            str(random.getrandbits(128)),
+        )
         run.to_filesystem(cache_path)
         loaded_run = openml.runs.OpenMLRun.from_filesystem(cache_path)
 
@@ -1488,7 +1537,10 @@ def test_run_flow_on_task_downloaded_flow(self):
         downloaded_flow = openml.flows.get_flow(flow.flow_id)
         task = openml.tasks.get_task(self.TEST_SERVER_TASK_SIMPLE["task_id"])
         run = openml.runs.run_flow_on_task(
-            flow=downloaded_flow, task=task, avoid_duplicate_runs=False, upload_flow=False,
+            flow=downloaded_flow,
+            task=task,
+            avoid_duplicate_runs=False,
+            upload_flow=False,
         )
 
         run.publish()
@@ -1573,7 +1625,7 @@ def test_format_prediction_task_regression(self):
     )
     @unittest.mock.patch("openml.extensions.sklearn.SklearnExtension._prevent_optimize_n_jobs")
     def test__run_task_get_arffcontent_2(self, parallel_mock):
-        """ Tests if a run executed in parallel is collated correctly. """
+        """Tests if a run executed in parallel is collated correctly."""
         task = openml.tasks.get_task(7)  # Supervised Classification on kr-vs-kp
         x, y = task.get_X_and_y(dataset_format="dataframe")
         num_instances = x.shape[0]
@@ -1626,7 +1678,7 @@ def test__run_task_get_arffcontent_2(self, parallel_mock):
     )
     @unittest.mock.patch("openml.extensions.sklearn.SklearnExtension._prevent_optimize_n_jobs")
     def test_joblib_backends(self, parallel_mock):
-        """ Tests evaluation of a run using various joblib backends and n_jobs. """
+        """Tests evaluation of a run using various joblib backends and n_jobs."""
         task = openml.tasks.get_task(7)  # Supervised Classification on kr-vs-kp
         x, y = task.get_X_and_y(dataset_format="dataframe")
         num_instances = x.shape[0]
diff --git a/tests/test_runs/test_trace.py b/tests/test_runs/test_trace.py
index 96724d139..0b4b64359 100644
--- a/tests/test_runs/test_trace.py
+++ b/tests/test_runs/test_trace.py
@@ -25,19 +25,22 @@ def test_get_selected_iteration(self):
         # This next one should simply not fail
         self.assertEqual(trace.get_selected_iteration(2, 2), 2)
         with self.assertRaisesRegex(
-            ValueError, "Could not find the selected iteration for rep/fold 3/3",
+            ValueError,
+            "Could not find the selected iteration for rep/fold 3/3",
         ):
 
             trace.get_selected_iteration(3, 3)
 
     def test_initialization(self):
-        """Check all different ways to fail the initialization """
+        """Check all different ways to fail the initialization"""
         with self.assertRaisesRegex(
-            ValueError, "Trace content not available.",
+            ValueError,
+            "Trace content not available.",
         ):
             OpenMLRunTrace.generate(attributes="foo", content=None)
         with self.assertRaisesRegex(
-            ValueError, "Trace attributes not available.",
+            ValueError,
+            "Trace attributes not available.",
         ):
             OpenMLRunTrace.generate(attributes=None, content="foo")
         with self.assertRaisesRegex(ValueError, "Trace content is empty."):
diff --git a/tests/test_setups/test_setup_functions.py b/tests/test_setups/test_setup_functions.py
index 538b08821..464431b94 100644
--- a/tests/test_setups/test_setup_functions.py
+++ b/tests/test_setups/test_setup_functions.py
@@ -87,7 +87,9 @@ def side_effect(self):
             self.priors = None
 
         with unittest.mock.patch.object(
-            sklearn.naive_bayes.GaussianNB, "__init__", side_effect,
+            sklearn.naive_bayes.GaussianNB,
+            "__init__",
+            side_effect,
         ):
             # Check a flow with zero hyperparameters
             nb = sklearn.naive_bayes.GaussianNB()
diff --git a/tests/test_study/test_study_functions.py b/tests/test_study/test_study_functions.py
index 904df4d3a..3d7811f6e 100644
--- a/tests/test_study/test_study_functions.py
+++ b/tests/test_study/test_study_functions.py
@@ -44,7 +44,8 @@ def test_get_study_error(self):
         openml.config.server = self.production_server
 
         with self.assertRaisesRegex(
-            ValueError, "Unexpected entity type 'task' reported by the server, expected 'run'",
+            ValueError,
+            "Unexpected entity type 'task' reported by the server, expected 'run'",
         ):
             openml.study.get_study(99)
 
@@ -62,7 +63,8 @@ def test_get_suite_error(self):
         openml.config.server = self.production_server
 
         with self.assertRaisesRegex(
-            ValueError, "Unexpected entity type 'run' reported by the server, expected 'task'",
+            ValueError,
+            "Unexpected entity type 'run' reported by the server, expected 'task'",
         ):
             openml.study.get_suite(123)
 
diff --git a/tests/test_tasks/test_split.py b/tests/test_tasks/test_split.py
index 7c3dcf9aa..7d8004a91 100644
--- a/tests/test_tasks/test_split.py
+++ b/tests/test_tasks/test_split.py
@@ -82,8 +82,16 @@ def test_get_split(self):
         self.assertEqual(train_split.shape[0], 808)
         self.assertEqual(test_split.shape[0], 90)
         self.assertRaisesRegex(
-            ValueError, "Repeat 10 not known", split.get, 10, 2,
+            ValueError,
+            "Repeat 10 not known",
+            split.get,
+            10,
+            2,
         )
         self.assertRaisesRegex(
-            ValueError, "Fold 10 not known", split.get, 2, 10,
+            ValueError,
+            "Fold 10 not known",
+            split.get,
+            2,
+            10,
         )
diff --git a/tests/test_tasks/test_task_functions.py b/tests/test_tasks/test_task_functions.py
index 418b21b65..be5b0c9bd 100644
--- a/tests/test_tasks/test_task_functions.py
+++ b/tests/test_tasks/test_task_functions.py
@@ -143,7 +143,15 @@ def test_get_task(self):
         self.assertIsInstance(task, OpenMLTask)
         self.assertTrue(
             os.path.exists(
-                os.path.join(self.workdir, "org", "openml", "test", "tasks", "1", "task.xml",)
+                os.path.join(
+                    self.workdir,
+                    "org",
+                    "openml",
+                    "test",
+                    "tasks",
+                    "1",
+                    "task.xml",
+                )
             )
         )
         self.assertTrue(
@@ -162,7 +170,15 @@ def test_get_task_lazy(self):
         self.assertIsInstance(task, OpenMLTask)
         self.assertTrue(
             os.path.exists(
-                os.path.join(self.workdir, "org", "openml", "test", "tasks", "2", "task.xml",)
+                os.path.join(
+                    self.workdir,
+                    "org",
+                    "openml",
+                    "test",
+                    "tasks",
+                    "2",
+                    "task.xml",
+                )
             )
         )
         self.assertEqual(task.class_labels, ["1", "2", "3", "4", "5", "U"])
@@ -230,7 +246,10 @@ def test_download_split(self):
 
     def test_deletion_of_cache_dir(self):
         # Simple removal
-        tid_cache_dir = openml.utils._create_cache_directory_for_id("tasks", 1,)
+        tid_cache_dir = openml.utils._create_cache_directory_for_id(
+            "tasks",
+            1,
+        )
         self.assertTrue(os.path.exists(tid_cache_dir))
         openml.utils._remove_cache_dir_for_id("tasks", tid_cache_dir)
         self.assertFalse(os.path.exists(tid_cache_dir))
diff --git a/tests/test_utils/test_utils.py b/tests/test_utils/test_utils.py
index 4fa08e1ab..a5add31c8 100644
--- a/tests/test_utils/test_utils.py
+++ b/tests/test_utils/test_utils.py
@@ -98,6 +98,7 @@ def test__create_cache_directory(self, config_mock):
             os.chmod(subdir, 0o444)
             config_mock.return_value = subdir
             with self.assertRaisesRegex(
-                openml.exceptions.OpenMLCacheException, r"Cannot create cache directory",
+                openml.exceptions.OpenMLCacheException,
+                r"Cannot create cache directory",
             ):
                 openml.utils._create_cache_directory("ghi")

From a8d96d53f8d7ccc860601bdf3aba52b8293cf281 Mon Sep 17 00:00:00 2001
From: Pieter Gijsbers <p.gijsbers@tue.nl>
Date: Tue, 16 Aug 2022 13:52:15 +0200
Subject: [PATCH 12/53] Replace removed file with new target for download test
 (#1158)

---
 tests/test_datasets/test_dataset_functions.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py
index 878b2288a..2fa97860b 100644
--- a/tests/test_datasets/test_dataset_functions.py
+++ b/tests/test_datasets/test_dataset_functions.py
@@ -462,9 +462,9 @@ def test__download_minio_file_raises_FileExists_if_destination_in_use(self):
         )
 
     def test__download_minio_file_works_with_bucket_subdirectory(self):
-        file_destination = pathlib.Path(self.workdir, "custom.csv")
+        file_destination = pathlib.Path(self.workdir, "custom.pq")
         _download_minio_file(
-            source="http://openml1.win.tue.nl/test/subdirectory/test.csv",
+            source="http://openml1.win.tue.nl/dataset61/dataset_61.pq",
             destination=file_destination,
             exists_ok=True,
         )

From ccb3e8eb356768e1d2e0108ac104fe1a04316c00 Mon Sep 17 00:00:00 2001
From: chadmarchand <37517821+chadmarchand@users.noreply.github.com>
Date: Thu, 6 Oct 2022 08:41:35 -0500
Subject: [PATCH 13/53] Fix outdated docstring for list_tasks function (#1149)

---
 doc/progress.rst          |  1 +
 openml/tasks/functions.py | 21 ++-------------------
 2 files changed, 3 insertions(+), 19 deletions(-)

diff --git a/doc/progress.rst b/doc/progress.rst
index 88b0dd29d..6bbd66f51 100644
--- a/doc/progress.rst
+++ b/doc/progress.rst
@@ -8,6 +8,7 @@ Changelog
 
 0.13.0
 ~~~~~~
+ * MAINT#1104: Fix outdated docstring for ``list_task``.
  * FIX#1030: ``pre-commit`` hooks now no longer should issue a warning.
  * FIX#1110: Make arguments to ``create_study`` and ``create_suite`` that are defined as optional by the OpenML XSD actually optional.
  * FIX#1147: ``openml.flow.flow_exists`` no longer requires an API key.
diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py
index 75731d01f..4c0aeaf4a 100644
--- a/openml/tasks/functions.py
+++ b/openml/tasks/functions.py
@@ -135,15 +135,7 @@ def list_tasks(
     it is used as task_type in the task description, but it is named
     type when used as a filter in list tasks call.
     task_type : TaskType, optional
-        ID of the task type as detailed `here <https://www.openml.org/search?type=task_type>`_.
-        - Supervised classification: 1
-        - Supervised regression: 2
-        - Learning curve: 3
-        - Supervised data stream classification: 4
-        - Clustering: 5
-        - Machine Learning Challenge: 6
-        - Survival Analysis: 7
-        - Subgroup Discovery: 8
+        Refers to the type of task.
     offset : int, optional
         the number of tasks to skip, starting from the first
     size : int, optional
@@ -196,16 +188,7 @@ def _list_tasks(task_type=None, output_format="dict", **kwargs):
     it is used as task_type in the task description, but it is named
     type when used as a filter in list tasks call.
     task_type : TaskType, optional
-        ID of the task type as detailed
-        `here <https://www.openml.org/search?type=task_type>`_.
-        - Supervised classification: 1
-        - Supervised regression: 2
-        - Learning curve: 3
-        - Supervised data stream classification: 4
-        - Clustering: 5
-        - Machine Learning Challenge: 6
-        - Survival Analysis: 7
-        - Subgroup Discovery: 8
+        Refers to the type of task.
     output_format: str, optional (default='dict')
         The parameter decides the format of the output.
         - If 'dict' the output is a dict of dict

From 9ce2a6bb0a7bbfdd46ed1c517842a040bdc89d17 Mon Sep 17 00:00:00 2001
From: Pieter Gijsbers <p.gijsbers@tue.nl>
Date: Fri, 7 Oct 2022 12:21:50 +0200
Subject: [PATCH 14/53] Improve the error message on out-of-sync flow ids
 (#1171)

* Improve the error message on out-of-sync flow ids

* Add more meaningful messages on test fail
---
 openml/setups/functions.py            | 5 ++++-
 tests/test_runs/test_run_functions.py | 4 ++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/openml/setups/functions.py b/openml/setups/functions.py
index 675172738..1ce0ed005 100644
--- a/openml/setups/functions.py
+++ b/openml/setups/functions.py
@@ -42,7 +42,10 @@ def setup_exists(flow) -> int:
     # checks whether the flow exists on the server and flow ids align
     exists = flow_exists(flow.name, flow.external_version)
     if exists != flow.flow_id:
-        raise ValueError("This should not happen!")
+        raise ValueError(
+            f"Local flow id ({flow.id}) differs from server id ({exists}). "
+            "If this issue persists, please contact the developers."
+        )
 
     openml_param_settings = flow.extension.obtain_parameter_values(flow)
     description = xmltodict.unparse(_to_dict(flow.flow_id, openml_param_settings), pretty=True)
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index 7a860dab3..8d79852bb 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -1112,13 +1112,13 @@ def test__run_exists(self):
 
             flow = self.extension.model_to_flow(clf)
             flow_exists = openml.flows.flow_exists(flow.name, flow.external_version)
-            self.assertGreater(flow_exists, 0)
+            self.assertGreater(flow_exists, 0, "Server says flow from run does not exist.")
             # Do NOT use get_flow reinitialization, this potentially sets
             # hyperparameter values wrong. Rather use the local model.
             downloaded_flow = openml.flows.get_flow(flow_exists)
             downloaded_flow.model = clf
             setup_exists = openml.setups.setup_exists(downloaded_flow)
-            self.assertGreater(setup_exists, 0)
+            self.assertGreater(setup_exists, 0, "Server says setup of run does not exist.")
             run_ids = run_exists(task.task_id, setup_exists)
             self.assertTrue(run_ids, msg=(run_ids, clf))
 

From 2ed77dba15b3845d448e566d0ade001d41d4d2b3 Mon Sep 17 00:00:00 2001
From: Pieter Gijsbers <p.gijsbers@tue.nl>
Date: Fri, 7 Oct 2022 12:22:10 +0200
Subject: [PATCH 15/53] Add scikit-learn 1.0 and 1.1 values for test (#1168)

* Add scikit-learn 1.0 and 1.1 values for test

DecisionTree and RandomForestRegressor have one less default
hyperparameter: `min_impurity_split`

* Remove min_impurity_split requirements for >=1.0

* Update KMeans checks for scikit-learn 1.0 and 1.1
---
 .../test_sklearn_extension.py                 | 59 ++++++++++++++++++-
 1 file changed, 56 insertions(+), 3 deletions(-)

diff --git a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
index a906d7ebd..a9fa018fb 100644
--- a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
+++ b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
@@ -168,7 +168,7 @@ def test_serialize_model(self):
                     ("splitter", '"best"'),
                 )
             )
-        else:
+        elif LooseVersion(sklearn.__version__) < "1.0":
             fixture_parameters = OrderedDict(
                 (
                     ("class_weight", "null"),
@@ -186,6 +186,24 @@ def test_serialize_model(self):
                     ("splitter", '"best"'),
                 )
             )
+        else:
+            fixture_parameters = OrderedDict(
+                (
+                    ("class_weight", "null"),
+                    ("criterion", '"entropy"'),
+                    ("max_depth", "null"),
+                    ("max_features", '"auto"'),
+                    ("max_leaf_nodes", "2000"),
+                    ("min_impurity_decrease", "0.0"),
+                    ("min_samples_leaf", "1"),
+                    ("min_samples_split", "2"),
+                    ("min_weight_fraction_leaf", "0.0"),
+                    ("presort", presort_val),
+                    ("random_state", "null"),
+                    ("splitter", '"best"'),
+                )
+            )
+
         if LooseVersion(sklearn.__version__) >= "0.22":
             fixture_parameters.update({"ccp_alpha": "0.0"})
             fixture_parameters.move_to_end("ccp_alpha", last=False)
@@ -249,7 +267,7 @@ def test_serialize_model_clustering(self):
                     ("verbose", "0"),
                 )
             )
-        else:
+        elif LooseVersion(sklearn.__version__) < "1.0":
             fixture_parameters = OrderedDict(
                 (
                     ("algorithm", '"auto"'),
@@ -265,6 +283,34 @@ def test_serialize_model_clustering(self):
                     ("verbose", "0"),
                 )
             )
+        elif LooseVersion(sklearn.__version__) < "1.1":
+            fixture_parameters = OrderedDict(
+                (
+                    ("algorithm", '"auto"'),
+                    ("copy_x", "true"),
+                    ("init", '"k-means++"'),
+                    ("max_iter", "300"),
+                    ("n_clusters", "8"),
+                    ("n_init", "10"),
+                    ("random_state", "null"),
+                    ("tol", "0.0001"),
+                    ("verbose", "0"),
+                )
+            )
+        else:
+            fixture_parameters = OrderedDict(
+                (
+                    ("algorithm", '"lloyd"'),
+                    ("copy_x", "true"),
+                    ("init", '"k-means++"'),
+                    ("max_iter", "300"),
+                    ("n_clusters", "8"),
+                    ("n_init", "10"),
+                    ("random_state", "null"),
+                    ("tol", "0.0001"),
+                    ("verbose", "0"),
+                )
+            )
         fixture_structure = {"sklearn.cluster.{}.KMeans".format(cluster_name): []}
 
         serialization, _ = self._serialization_test_helper(
@@ -1335,12 +1381,19 @@ def test__get_fn_arguments_with_defaults(self):
                 (sklearn.tree.DecisionTreeClassifier.__init__, 14),
                 (sklearn.pipeline.Pipeline.__init__, 2),
             ]
-        else:
+        elif sklearn_version < "1.0":
             fns = [
                 (sklearn.ensemble.RandomForestRegressor.__init__, 18),
                 (sklearn.tree.DecisionTreeClassifier.__init__, 13),
                 (sklearn.pipeline.Pipeline.__init__, 2),
             ]
+        else:
+            # Tested with 1.0 and 1.1
+            fns = [
+                (sklearn.ensemble.RandomForestRegressor.__init__, 17),
+                (sklearn.tree.DecisionTreeClassifier.__init__, 12),
+                (sklearn.pipeline.Pipeline.__init__, 2),
+            ]
 
         for fn, num_params_with_defaults in fns:
             defaults, defaultless = self.extension._get_fn_arguments_with_defaults(fn)

From 2fde8d51af644422018f844cb877500e2c7c149d Mon Sep 17 00:00:00 2001
From: Pieter Gijsbers <p.gijsbers@tue.nl>
Date: Fri, 7 Oct 2022 12:22:54 +0200
Subject: [PATCH 16/53] Update Pipeline description for >=1.0 (#1170)

---
 .../test_sklearn_extension.py                 | 38 ++++++++++++++++++-
 1 file changed, 36 insertions(+), 2 deletions(-)

diff --git a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
index a9fa018fb..789229d8a 100644
--- a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
+++ b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
@@ -399,7 +399,24 @@ def test_serialize_pipeline(self):
         )
         fixture_short_name = "sklearn.Pipeline(StandardScaler,DummyClassifier)"
 
-        if version.parse(sklearn.__version__) >= version.parse("0.21.0"):
+        if version.parse(sklearn.__version__) >= version.parse("1.0"):
+            fixture_description = (
+                "Pipeline of transforms with a final estimator.\n\nSequentially"
+                " apply a list of transforms and a final estimator.\n"
+                "Intermediate steps of the pipeline must be 'transforms', that "
+                "is, they\nmust implement `fit` and `transform` methods.\nThe final "
+                "estimator only needs to implement `fit`.\nThe transformers in "
+                "the pipeline can be cached using ``memory`` argument.\n\nThe "
+                "purpose of the pipeline is to assemble several steps that can "
+                "be\ncross-validated together while setting different parameters"
+                ". For this, it\nenables setting parameters of the various steps"
+                " using their names and the\nparameter name separated by a `'__'`,"
+                " as in the example below. A step's\nestimator may be replaced "
+                "entirely by setting the parameter with its name\nto another "
+                "estimator, or a transformer removed by setting it to\n"
+                "`'passthrough'` or `None`."
+            )
+        elif version.parse(sklearn.__version__) >= version.parse("0.21.0"):
             fixture_description = (
                 "Pipeline of transforms with a final estimator.\n\nSequentially"
                 " apply a list of transforms and a final estimator.\n"
@@ -489,7 +506,24 @@ def test_serialize_pipeline_clustering(self):
         )
         fixture_short_name = "sklearn.Pipeline(StandardScaler,KMeans)"
 
-        if version.parse(sklearn.__version__) >= version.parse("0.21.0"):
+        if version.parse(sklearn.__version__) >= version.parse("1.0"):
+            fixture_description = (
+                "Pipeline of transforms with a final estimator.\n\nSequentially"
+                " apply a list of transforms and a final estimator.\n"
+                "Intermediate steps of the pipeline must be 'transforms', that "
+                "is, they\nmust implement `fit` and `transform` methods.\nThe final "
+                "estimator only needs to implement `fit`.\nThe transformers in "
+                "the pipeline can be cached using ``memory`` argument.\n\nThe "
+                "purpose of the pipeline is to assemble several steps that can "
+                "be\ncross-validated together while setting different parameters"
+                ". For this, it\nenables setting parameters of the various steps"
+                " using their names and the\nparameter name separated by a `'__'`,"
+                " as in the example below. A step's\nestimator may be replaced "
+                "entirely by setting the parameter with its name\nto another "
+                "estimator, or a transformer removed by setting it to\n"
+                "`'passthrough'` or `None`."
+            )
+        elif version.parse(sklearn.__version__) >= version.parse("0.21.0"):
             fixture_description = (
                 "Pipeline of transforms with a final estimator.\n\nSequentially"
                 " apply a list of transforms and a final estimator.\n"

From 2ddae0f72b10a03e82c58cdd3e1c1e142d80fa31 Mon Sep 17 00:00:00 2001
From: Pieter Gijsbers <p.gijsbers@tue.nl>
Date: Fri, 7 Oct 2022 12:24:41 +0200
Subject: [PATCH 17/53] Update URL to reflect new endpoint (#1172)

---
 tests/test_runs/test_run_functions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index 8d79852bb..89b6ef0e6 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -1302,7 +1302,7 @@ def test_get_run(self):
         assert "weka" in run.tags
         assert "weka_3.7.12" in run.tags
         assert run.predictions_url == (
-            "https://www.openml.org/data/download/1667125/"
+            "https://api.openml.org/data/download/1667125/"
             "weka_generated_predictions4575715871712251329.arff"
         )
 

From c17704e82f5a1585409c75d70ce5fa1bea36ed57 Mon Sep 17 00:00:00 2001
From: Pieter Gijsbers <p.gijsbers@tue.nl>
Date: Fri, 7 Oct 2022 12:25:10 +0200
Subject: [PATCH 18/53] Remove tests which only test scikit-learn functionality
 (#1169)

We should only test code that we write.
---
 .../test_sklearn_extension.py                 | 30 -------------------
 1 file changed, 30 deletions(-)

diff --git a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
index 789229d8a..8de75c1b4 100644
--- a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
+++ b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
@@ -1304,36 +1304,6 @@ def test_illegal_parameter_names(self):
         for case in cases:
             self.assertRaises(PyOpenMLError, self.extension.model_to_flow, case)
 
-    def test_illegal_parameter_names_pipeline(self):
-        # illegal name: steps
-        steps = [
-            ("Imputer", SimpleImputer(strategy="median")),
-            (
-                "OneHotEncoder",
-                sklearn.preprocessing.OneHotEncoder(sparse=False, handle_unknown="ignore"),
-            ),
-            (
-                "steps",
-                sklearn.ensemble.BaggingClassifier(
-                    base_estimator=sklearn.tree.DecisionTreeClassifier
-                ),
-            ),
-        ]
-        self.assertRaises(ValueError, sklearn.pipeline.Pipeline, steps=steps)
-
-    def test_illegal_parameter_names_featureunion(self):
-        # illegal name: transformer_list
-        transformer_list = [
-            ("transformer_list", SimpleImputer(strategy="median")),
-            (
-                "OneHotEncoder",
-                sklearn.preprocessing.OneHotEncoder(sparse=False, handle_unknown="ignore"),
-            ),
-        ]
-        self.assertRaises(
-            ValueError, sklearn.pipeline.FeatureUnion, transformer_list=transformer_list
-        )
-
     def test_paralizable_check(self):
         # using this model should pass the test (if param distribution is
         # legal)

From 953f84e93069859191575b5acb188e3d26573fad Mon Sep 17 00:00:00 2001
From: Will Martin <32962172+willcmartin@users.noreply.github.com>
Date: Fri, 7 Oct 2022 05:29:03 -0500
Subject: [PATCH 19/53] fix nonetype error during print for tasks without class
 labels (#1148)

* fix nonetype error during print for tasks without class labels

* fix #1100/#1058 nonetype error

Co-authored-by: Pieter Gijsbers <p.gijsbers@tue.nl>
---
 doc/progress.rst     | 3 ++-
 openml/tasks/task.py | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/doc/progress.rst b/doc/progress.rst
index 6bbd66f51..b8e6864a8 100644
--- a/doc/progress.rst
+++ b/doc/progress.rst
@@ -8,11 +8,12 @@ Changelog
 
 0.13.0
 ~~~~~~
- * MAINT#1104: Fix outdated docstring for ``list_task``.
  * FIX#1030: ``pre-commit`` hooks now no longer should issue a warning.
+ * FIX#1058, #1100: Avoid ``NoneType`` error when printing task without ``class_labels`` attribute.
  * FIX#1110: Make arguments to ``create_study`` and ``create_suite`` that are defined as optional by the OpenML XSD actually optional.
  * FIX#1147: ``openml.flow.flow_exists`` no longer requires an API key.
  * MAIN#1088: Do CI for Windows on Github Actions instead of Appveyor.
+ * MAINT#1104: Fix outdated docstring for ``list_task``.
  * MAIN#1146: Update the pre-commit dependencies.
  * ADD#1103: Add a ``predictions`` property to OpenMLRun for easy accessibility of prediction data.
 
diff --git a/openml/tasks/task.py b/openml/tasks/task.py
index 095730645..14a85357b 100644
--- a/openml/tasks/task.py
+++ b/openml/tasks/task.py
@@ -97,7 +97,7 @@ def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
             fields["Estimation Procedure"] = self.estimation_procedure["type"]
         if getattr(self, "target_name", None) is not None:
             fields["Target Feature"] = getattr(self, "target_name")
-            if hasattr(self, "class_labels"):
+            if hasattr(self, "class_labels") and getattr(self, "class_labels") is not None:
                 fields["# of Classes"] = len(getattr(self, "class_labels"))
             if hasattr(self, "cost_matrix"):
                 fields["Cost Matrix"] = "Available"

From 6da0aacae000d3990ed8e0d22589ffae8829198d Mon Sep 17 00:00:00 2001
From: Pieter Gijsbers <p.gijsbers@tue.nl>
Date: Mon, 10 Oct 2022 10:42:40 +0200
Subject: [PATCH 20/53] Flow exists GET is deprecated, use POST (#1173)

---
 openml/flows/functions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openml/flows/functions.py b/openml/flows/functions.py
index 73c2b1d3a..43cb453fa 100644
--- a/openml/flows/functions.py
+++ b/openml/flows/functions.py
@@ -255,7 +255,7 @@ def flow_exists(name: str, external_version: str) -> Union[int, bool]:
 
     xml_response = openml._api_calls._perform_api_call(
         "flow/exists",
-        "get",
+        "post",
         data={"name": name, "external_version": external_version},
     )
 

From 22ee9cd019a96918dc3cadef0135a960b4b6bebc Mon Sep 17 00:00:00 2001
From: Pieter Gijsbers <p.gijsbers@tue.nl>
Date: Tue, 11 Oct 2022 11:08:19 +0200
Subject: [PATCH 21/53] Test `get_parquet` on production server (#1174)

The test server has minio urls disabled. This is because we currently
do not have a setup that represents the live server in a test
environment yet. So, we download from the production server instead.
---
 tests/test_datasets/test_dataset_functions.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py
index 2fa97860b..995474142 100644
--- a/tests/test_datasets/test_dataset_functions.py
+++ b/tests/test_datasets/test_dataset_functions.py
@@ -1541,7 +1541,10 @@ def test_data_fork(self):
         )
 
     def test_get_dataset_parquet(self):
-        dataset = openml.datasets.get_dataset(20)
+        # Parquet functionality is disabled on the test server
+        # There is no parquet-copy of the test server yet.
+        openml.config.server = self.production_server
+        dataset = openml.datasets.get_dataset(61)
         self.assertIsNotNone(dataset._minio_url)
         self.assertIsNotNone(dataset.parquet_file)
         self.assertTrue(os.path.isfile(dataset.parquet_file))

From 5cd697334d281146b573e2512969cf3bd3f372eb Mon Sep 17 00:00:00 2001
From: Pieter Gijsbers <p.gijsbers@tue.nl>
Date: Tue, 18 Oct 2022 13:09:40 +0200
Subject: [PATCH 22/53] Refactor out different test cases to separate tests
 (#1176)

The previous solution had two test conditions (strict and not strict)
and several scikit-learn versions, because of two distinct changes
within scikit-learn (the removal of min_impurity_split in 1.0, and the
restructuring of public/private models in 0.24).
I refactored out the separate test cases to greatly simplify the
individual tests, and I added a test case for scikit-learn>=1.0,
which was previously not covered.
---
 tests/test_flows/test_flow_functions.py | 67 +++++++++++++++++--------
 1 file changed, 45 insertions(+), 22 deletions(-)

diff --git a/tests/test_flows/test_flow_functions.py b/tests/test_flows/test_flow_functions.py
index eb80c2861..fe058df23 100644
--- a/tests/test_flows/test_flow_functions.py
+++ b/tests/test_flows/test_flow_functions.py
@@ -324,32 +324,55 @@ def test_get_flow_reinstantiate_model_no_extension(self):
         )
 
     @unittest.skipIf(
-        LooseVersion(sklearn.__version__) == "0.19.1", reason="Target flow is from sklearn 0.19.1"
+        LooseVersion(sklearn.__version__) == "0.19.1",
+        reason="Requires scikit-learn!=0.19.1, because target flow is from that version.",
     )
-    def test_get_flow_reinstantiate_model_wrong_version(self):
-        # Note that CI does not test against 0.19.1.
+    def test_get_flow_with_reinstantiate_strict_with_wrong_version_raises_exception(self):
         openml.config.server = self.production_server
-        _, sklearn_major, _ = LooseVersion(sklearn.__version__).version[:3]
-        if sklearn_major > 23:
-            flow = 18587  # 18687, 18725 --- flows building random forest on >= 0.23
-            flow_sklearn_version = "0.23.1"
-        else:
-            flow = 8175
-            flow_sklearn_version = "0.19.1"
-        expected = (
-            "Trying to deserialize a model with dependency "
-            "sklearn=={} not satisfied.".format(flow_sklearn_version)
-        )
+        flow = 8175
+        expected = "Trying to deserialize a model with dependency sklearn==0.19.1 not satisfied."
         self.assertRaisesRegex(
-            ValueError, expected, openml.flows.get_flow, flow_id=flow, reinstantiate=True
+            ValueError,
+            expected,
+            openml.flows.get_flow,
+            flow_id=flow,
+            reinstantiate=True,
+            strict_version=True,
         )
-        if LooseVersion(sklearn.__version__) > "0.19.1":
-            # 0.18 actually can't deserialize this because of incompatibility
-            flow = openml.flows.get_flow(flow_id=flow, reinstantiate=True, strict_version=False)
-            # ensure that a new flow was created
-            assert flow.flow_id is None
-            assert "sklearn==0.19.1" not in flow.dependencies
-            assert "sklearn>=0.19.1" not in flow.dependencies
+
+    @unittest.skipIf(
+        LooseVersion(sklearn.__version__) < "1" and LooseVersion(sklearn.__version__) != "1.0.0",
+        reason="Requires scikit-learn < 1.0.1."
+        # Because scikit-learn dropped min_impurity_split hyperparameter in 1.0,
+        # and the requested flow is from 1.0.0 exactly.
+    )
+    def test_get_flow_reinstantiate_flow_not_strict_post_1(self):
+        openml.config.server = self.production_server
+        flow = openml.flows.get_flow(flow_id=19190, reinstantiate=True, strict_version=False)
+        assert flow.flow_id is None
+        assert "sklearn==1.0.0" not in flow.dependencies
+
+    @unittest.skipIf(
+        (LooseVersion(sklearn.__version__) < "0.23.2")
+        or ("1.0" < LooseVersion(sklearn.__version__)),
+        reason="Requires scikit-learn 0.23.2 or ~0.24."
+        # Because these still have min_impurity_split, but with new scikit-learn module structure."
+    )
+    def test_get_flow_reinstantiate_flow_not_strict_023_and_024(self):
+        openml.config.server = self.production_server
+        flow = openml.flows.get_flow(flow_id=18587, reinstantiate=True, strict_version=False)
+        assert flow.flow_id is None
+        assert "sklearn==0.23.1" not in flow.dependencies
+
+    @unittest.skipIf(
+        "0.23" < LooseVersion(sklearn.__version__),
+        reason="Requires scikit-learn<=0.23, because the scikit-learn module structure changed.",
+    )
+    def test_get_flow_reinstantiate_flow_not_strict_pre_023(self):
+        openml.config.server = self.production_server
+        flow = openml.flows.get_flow(flow_id=8175, reinstantiate=True, strict_version=False)
+        assert flow.flow_id is None
+        assert "sklearn==0.19.1" not in flow.dependencies
 
     def test_get_flow_id(self):
         if self.long_version:

From e6250fa6e01b24e71ce1ab3720236fd5cbfc67f2 Mon Sep 17 00:00:00 2001
From: Pieter Gijsbers <p.gijsbers@tue.nl>
Date: Mon, 24 Oct 2022 19:58:11 +0200
Subject: [PATCH 23/53] Provide clearer error when server provides bad data
 description XML (#1178)

---
 openml/_api_calls.py                          | 15 +++++++++------
 openml/datasets/functions.py                  | 12 +++++++++---
 tests/test_datasets/test_dataset_functions.py |  2 +-
 3 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/openml/_api_calls.py b/openml/_api_calls.py
index 959cad51a..87511693c 100644
--- a/openml/_api_calls.py
+++ b/openml/_api_calls.py
@@ -23,6 +23,14 @@
 )
 
 
+def _create_url_from_endpoint(endpoint: str) -> str:
+    url = config.server
+    if not url.endswith("/"):
+        url += "/"
+    url += endpoint
+    return url.replace("=", "%3d")
+
+
 def _perform_api_call(call, request_method, data=None, file_elements=None):
     """
     Perform an API call at the OpenML server.
@@ -50,12 +58,7 @@ def _perform_api_call(call, request_method, data=None, file_elements=None):
     return_value : str
         Return value of the OpenML server
     """
-    url = config.server
-    if not url.endswith("/"):
-        url += "/"
-    url += call
-
-    url = url.replace("=", "%3d")
+    url = _create_url_from_endpoint(call)
     logging.info("Starting [%s] request for the URL %s", request_method, url)
     start = time.time()
 
diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
index fb2e201f6..1e6fb5c78 100644
--- a/openml/datasets/functions.py
+++ b/openml/datasets/functions.py
@@ -3,6 +3,7 @@
 import io
 import logging
 import os
+from pyexpat import ExpatError
 from typing import List, Dict, Union, Optional, cast
 
 import numpy as np
@@ -19,6 +20,7 @@
 from .dataset import OpenMLDataset
 from ..exceptions import (
     OpenMLHashException,
+    OpenMLServerError,
     OpenMLServerException,
     OpenMLPrivateDatasetError,
 )
@@ -437,7 +439,7 @@ def get_dataset(
             parquet_file = None
         remove_dataset_cache = False
     except OpenMLServerException as e:
-        # if there was an exception,
+        # if there was an exception
         # check if the user had access to the dataset
         if e.code == 112:
             raise OpenMLPrivateDatasetError(e.message) from None
@@ -949,14 +951,18 @@ def _get_dataset_description(did_cache_dir, dataset_id):
     try:
         with io.open(description_file, encoding="utf8") as fh:
             dataset_xml = fh.read()
+        description = xmltodict.parse(dataset_xml)["oml:data_set_description"]
     except Exception:
         url_extension = "data/{}".format(dataset_id)
         dataset_xml = openml._api_calls._perform_api_call(url_extension, "get")
+        try:
+            description = xmltodict.parse(dataset_xml)["oml:data_set_description"]
+        except ExpatError as e:
+            url = openml._api_calls._create_url_from_endpoint(url_extension)
+            raise OpenMLServerError(f"Dataset description XML at '{url}' is malformed.") from e
         with io.open(description_file, "w", encoding="utf8") as fh:
             fh.write(dataset_xml)
 
-    description = xmltodict.parse(dataset_xml)["oml:data_set_description"]
-
     return description
 
 
diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py
index 995474142..50f449ebb 100644
--- a/tests/test_datasets/test_dataset_functions.py
+++ b/tests/test_datasets/test_dataset_functions.py
@@ -1240,7 +1240,7 @@ def _wait_for_dataset_being_processed(self, dataset_id):
             try:
                 downloaded_dataset = openml.datasets.get_dataset(dataset_id)
                 break
-            except Exception as e:
+            except OpenMLServerException as e:
                 # returned code 273: Dataset not processed yet
                 # returned code 362: No qualities found
                 TestBase.logger.error(

From 75fed8a7a0409daecc5ff54a14925de4403309c9 Mon Sep 17 00:00:00 2001
From: Pieter Gijsbers <p.gijsbers@tue.nl>
Date: Mon, 24 Oct 2022 20:00:47 +0200
Subject: [PATCH 24/53] Update more sklearn tests (#1175)

* n_iter is now keyword-only

* Standardize sklearn pipeline description lookups

* `priors` is no longer positional, and wasn't used in the first place

* Remove loss=kneighbours from the complex pipelin
---
 .../test_sklearn_extension.py                 | 150 ++++++------------
 1 file changed, 45 insertions(+), 105 deletions(-)

diff --git a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
index 8de75c1b4..709d123f0 100644
--- a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
+++ b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
@@ -5,6 +5,7 @@
 import re
 import os
 import sys
+from typing import Any
 import unittest
 from distutils.version import LooseVersion
 from collections import OrderedDict
@@ -73,6 +74,45 @@ def setUp(self):
 
         self.extension = SklearnExtension()
 
+    def _get_expected_pipeline_description(self, model: Any) -> str:
+        if version.parse(sklearn.__version__) >= version.parse("1.0"):
+            expected_fixture = (
+                "Pipeline of transforms with a final estimator.\n\nSequentially"
+                " apply a list of transforms and a final estimator.\n"
+                "Intermediate steps of the pipeline must be 'transforms', that "
+                "is, they\nmust implement `fit` and `transform` methods.\nThe final "
+                "estimator only needs to implement `fit`.\nThe transformers in "
+                "the pipeline can be cached using ``memory`` argument.\n\nThe "
+                "purpose of the pipeline is to assemble several steps that can "
+                "be\ncross-validated together while setting different parameters"
+                ". For this, it\nenables setting parameters of the various steps"
+                " using their names and the\nparameter name separated by a `'__'`,"
+                " as in the example below. A step's\nestimator may be replaced "
+                "entirely by setting the parameter with its name\nto another "
+                "estimator, or a transformer removed by setting it to\n"
+                "`'passthrough'` or `None`."
+            )
+        elif version.parse(sklearn.__version__) >= version.parse("0.21.0"):
+            expected_fixture = (
+                "Pipeline of transforms with a final estimator.\n\nSequentially"
+                " apply a list of transforms and a final estimator.\n"
+                "Intermediate steps of the pipeline must be 'transforms', that "
+                "is, they\nmust implement fit and transform methods.\nThe final "
+                "estimator only needs to implement fit.\nThe transformers in "
+                "the pipeline can be cached using ``memory`` argument.\n\nThe "
+                "purpose of the pipeline is to assemble several steps that can "
+                "be\ncross-validated together while setting different parameters"
+                ".\nFor this, it enables setting parameters of the various steps"
+                " using their\nnames and the parameter name separated by a '__',"
+                " as in the example below.\nA step's estimator may be replaced "
+                "entirely by setting the parameter\nwith its name to another "
+                "estimator, or a transformer removed by setting\nit to "
+                "'passthrough' or ``None``."
+            )
+        else:
+            expected_fixture = self.extension._get_sklearn_description(model)
+        return expected_fixture
+
     def _serialization_test_helper(
         self, model, X, y, subcomponent_parameters, dependencies_mock_call_count=(1, 2)
     ):
@@ -398,44 +438,7 @@ def test_serialize_pipeline(self):
             "dummy=sklearn.dummy.DummyClassifier)".format(scaler_name)
         )
         fixture_short_name = "sklearn.Pipeline(StandardScaler,DummyClassifier)"
-
-        if version.parse(sklearn.__version__) >= version.parse("1.0"):
-            fixture_description = (
-                "Pipeline of transforms with a final estimator.\n\nSequentially"
-                " apply a list of transforms and a final estimator.\n"
-                "Intermediate steps of the pipeline must be 'transforms', that "
-                "is, they\nmust implement `fit` and `transform` methods.\nThe final "
-                "estimator only needs to implement `fit`.\nThe transformers in "
-                "the pipeline can be cached using ``memory`` argument.\n\nThe "
-                "purpose of the pipeline is to assemble several steps that can "
-                "be\ncross-validated together while setting different parameters"
-                ". For this, it\nenables setting parameters of the various steps"
-                " using their names and the\nparameter name separated by a `'__'`,"
-                " as in the example below. A step's\nestimator may be replaced "
-                "entirely by setting the parameter with its name\nto another "
-                "estimator, or a transformer removed by setting it to\n"
-                "`'passthrough'` or `None`."
-            )
-        elif version.parse(sklearn.__version__) >= version.parse("0.21.0"):
-            fixture_description = (
-                "Pipeline of transforms with a final estimator.\n\nSequentially"
-                " apply a list of transforms and a final estimator.\n"
-                "Intermediate steps of the pipeline must be 'transforms', that "
-                "is, they\nmust implement fit and transform methods.\nThe final "
-                "estimator only needs to implement fit.\nThe transformers in "
-                "the pipeline can be cached using ``memory`` argument.\n\nThe "
-                "purpose of the pipeline is to assemble several steps that can "
-                "be\ncross-validated together while setting different parameters"
-                ".\nFor this, it enables setting parameters of the various steps"
-                " using their\nnames and the parameter name separated by a '__',"
-                " as in the example below.\nA step's estimator may be replaced "
-                "entirely by setting the parameter\nwith its name to another "
-                "estimator, or a transformer removed by setting\nit to "
-                "'passthrough' or ``None``."
-            )
-        else:
-            fixture_description = self.extension._get_sklearn_description(model)
-
+        fixture_description = self._get_expected_pipeline_description(model)
         fixture_structure = {
             fixture_name: [],
             "sklearn.preprocessing.{}.StandardScaler".format(scaler_name): ["scaler"],
@@ -505,43 +508,7 @@ def test_serialize_pipeline_clustering(self):
             "clusterer=sklearn.cluster.{}.KMeans)".format(scaler_name, cluster_name)
         )
         fixture_short_name = "sklearn.Pipeline(StandardScaler,KMeans)"
-
-        if version.parse(sklearn.__version__) >= version.parse("1.0"):
-            fixture_description = (
-                "Pipeline of transforms with a final estimator.\n\nSequentially"
-                " apply a list of transforms and a final estimator.\n"
-                "Intermediate steps of the pipeline must be 'transforms', that "
-                "is, they\nmust implement `fit` and `transform` methods.\nThe final "
-                "estimator only needs to implement `fit`.\nThe transformers in "
-                "the pipeline can be cached using ``memory`` argument.\n\nThe "
-                "purpose of the pipeline is to assemble several steps that can "
-                "be\ncross-validated together while setting different parameters"
-                ". For this, it\nenables setting parameters of the various steps"
-                " using their names and the\nparameter name separated by a `'__'`,"
-                " as in the example below. A step's\nestimator may be replaced "
-                "entirely by setting the parameter with its name\nto another "
-                "estimator, or a transformer removed by setting it to\n"
-                "`'passthrough'` or `None`."
-            )
-        elif version.parse(sklearn.__version__) >= version.parse("0.21.0"):
-            fixture_description = (
-                "Pipeline of transforms with a final estimator.\n\nSequentially"
-                " apply a list of transforms and a final estimator.\n"
-                "Intermediate steps of the pipeline must be 'transforms', that "
-                "is, they\nmust implement fit and transform methods.\nThe final "
-                "estimator only needs to implement fit.\nThe transformers in "
-                "the pipeline can be cached using ``memory`` argument.\n\nThe "
-                "purpose of the pipeline is to assemble several steps that can "
-                "be\ncross-validated together while setting different parameters"
-                ".\nFor this, it enables setting parameters of the various steps"
-                " using their\nnames and the parameter name separated by a '__',"
-                " as in the example below.\nA step's estimator may be replaced "
-                "entirely by setting the parameter\nwith its name to another "
-                "estimator, or a transformer removed by setting\nit to "
-                "'passthrough' or ``None``."
-            )
-        else:
-            fixture_description = self.extension._get_sklearn_description(model)
+        fixture_description = self._get_expected_pipeline_description(model)
         fixture_structure = {
             fixture_name: [],
             "sklearn.preprocessing.{}.StandardScaler".format(scaler_name): ["scaler"],
@@ -699,27 +666,7 @@ def test_serialize_column_transformer_pipeline(self):
             fixture_name: [],
         }
 
-        if version.parse(sklearn.__version__) >= version.parse("0.21.0"):
-            # str obtained from self.extension._get_sklearn_description(model)
-            fixture_description = (
-                "Pipeline of transforms with a final estimator.\n\nSequentially"
-                " apply a list of transforms and a final estimator.\n"
-                "Intermediate steps of the pipeline must be 'transforms', that "
-                "is, they\nmust implement fit and transform methods.\nThe final"
-                " estimator only needs to implement fit.\nThe transformers in "
-                "the pipeline can be cached using ``memory`` argument.\n\nThe "
-                "purpose of the pipeline is to assemble several steps that can "
-                "be\ncross-validated together while setting different "
-                "parameters.\nFor this, it enables setting parameters of the "
-                "various steps using their\nnames and the parameter name "
-                "separated by a '__', as in the example below.\nA step's "
-                "estimator may be replaced entirely by setting the parameter\n"
-                "with its name to another estimator, or a transformer removed by"
-                " setting\nit to 'passthrough' or ``None``."
-            )
-        else:
-            fixture_description = self.extension._get_sklearn_description(model)
-
+        fixture_description = self._get_expected_pipeline_description(model)
         serialization, new_model = self._serialization_test_helper(
             model,
             X=None,
@@ -1494,9 +1441,7 @@ def test_deserialize_complex_with_defaults(self):
                 "Estimator",
                 sklearn.ensemble.AdaBoostClassifier(
                     sklearn.ensemble.BaggingClassifier(
-                        sklearn.ensemble.GradientBoostingClassifier(
-                            sklearn.neighbors.KNeighborsClassifier()
-                        )
+                        sklearn.ensemble.GradientBoostingClassifier()
                     )
                 ),
             ),
@@ -1511,7 +1456,6 @@ def test_deserialize_complex_with_defaults(self):
                 "Estimator__n_estimators": 10,
                 "Estimator__base_estimator__n_estimators": 10,
                 "Estimator__base_estimator__base_estimator__learning_rate": 0.1,
-                "Estimator__base_estimator__base_estimator__loss__n_neighbors": 13,
             }
         else:
             params = {
@@ -1520,7 +1464,6 @@ def test_deserialize_complex_with_defaults(self):
                 "Estimator__n_estimators": 50,
                 "Estimator__base_estimator__n_estimators": 10,
                 "Estimator__base_estimator__base_estimator__learning_rate": 0.1,
-                "Estimator__base_estimator__base_estimator__loss__n_neighbors": 5,
             }
         pipe_adjusted.set_params(**params)
         flow = self.extension.model_to_flow(pipe_adjusted)
@@ -1886,9 +1829,6 @@ def test_run_model_on_fold_classification_3(self):
         class HardNaiveBayes(sklearn.naive_bayes.GaussianNB):
             # class for testing a naive bayes classifier that does not allow soft
             # predictions
-            def __init__(self, priors=None):
-                super(HardNaiveBayes, self).__init__(priors)
-
             def predict_proba(*args, **kwargs):
                 raise AttributeError("predict_proba is not available when " "probability=False")
 
@@ -2059,7 +1999,7 @@ def test__extract_trace_data(self):
         clf = sklearn.model_selection.RandomizedSearchCV(
             sklearn.neural_network.MLPClassifier(),
             param_grid,
-            num_iters,
+            n_iter=num_iters,
         )
         # just run the task on the model (without invoking any fancy extension & openml code)
         train, _ = task.get_train_test_split_indices(0, 0)

From f37ebbec94dffd1aad176978304cd7e17fcf666f Mon Sep 17 00:00:00 2001
From: Pieter Gijsbers <p.gijsbers@tue.nl>
Date: Thu, 24 Nov 2022 19:18:05 +0100
Subject: [PATCH 25/53] Remove dtype checking for prediction comparison (#1177)

It looks like the predictions loaded from an arff file are read as
floats by the arff reader, which results in a different type
(float v int). Because "equality" of values is already checked,
I figured dtype is not as imported. That said, I am not sure why
there are so many redundant comparisons in the first place?
Anyway, the difference should be due to pandas inference behavior,
and if that is what we want to test, then we should make a small
isolated test case instead of integrating it into every prediction
unit test. Finally, over the next year we should move away from ARFF.
---
 tests/test_runs/test_run_functions.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index 89b6ef0e6..a9abcd05e 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -183,7 +183,11 @@ def _rerun_model_and_compare_predictions(self, run_id, model_prime, seed, create
         predictions_prime = run_prime._generate_arff_dict()
 
         self._compare_predictions(predictions, predictions_prime)
-        pd.testing.assert_frame_equal(run.predictions, run_prime.predictions)
+        pd.testing.assert_frame_equal(
+            run.predictions,
+            run_prime.predictions,
+            check_dtype=False,  # Loaded ARFF reads NUMERIC as float, even if integer.
+        )
 
     def _perform_run(
         self,

From a909a0c31b95d0ffb46bb129d412875ab08d02c8 Mon Sep 17 00:00:00 2001
From: Eddie Bergman <eddiebergmanhs@gmail.com>
Date: Fri, 25 Nov 2022 13:47:58 +0100
Subject: [PATCH 26/53] feat(minio): Allow for proxies (#1184)

* feat(minio): Allow for proxies

* fix: Declared proxy_client as None

* refactor(proxy): Change to `str | None` with "auto"
---
 openml/_api_calls.py | 38 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 36 insertions(+), 2 deletions(-)

diff --git a/openml/_api_calls.py b/openml/_api_calls.py
index 87511693c..7db1155cc 100644
--- a/openml/_api_calls.py
+++ b/openml/_api_calls.py
@@ -10,6 +10,7 @@
 import urllib.parse
 import xml
 import xmltodict
+from urllib3 import ProxyManager
 from typing import Dict, Optional, Union
 
 import minio
@@ -23,6 +24,26 @@
 )
 
 
+def resolve_env_proxies(url: str) -> Optional[str]:
+    """Attempt to find a suitable proxy for this url.
+
+    Relies on ``requests`` internals to remain consistent. To disable this from the
+    environment, please set the enviornment varialbe ``no_proxy="*"``.
+
+    Parameters
+    ----------
+    url : str
+        The url endpoint
+
+    Returns
+    -------
+    Optional[str]
+        The proxy url if found, else None
+    """
+    resolved_proxies = requests.utils.get_environ_proxies(url)
+    selected_proxy = requests.utils.select_proxy(url, resolved_proxies)
+    return selected_proxy
+
 def _create_url_from_endpoint(endpoint: str) -> str:
     url = config.server
     if not url.endswith("/"):
@@ -84,6 +105,7 @@ def _download_minio_file(
     source: str,
     destination: Union[str, pathlib.Path],
     exists_ok: bool = True,
+    proxy: Optional[str] = "auto",
 ) -> None:
     """Download file ``source`` from a MinIO Bucket and store it at ``destination``.
 
@@ -95,7 +117,10 @@ def _download_minio_file(
         Path to store the file to, if a directory is provided the original filename is used.
     exists_ok : bool, optional (default=True)
         If False, raise FileExists if a file already exists in ``destination``.
-
+    proxy: str, optional (default = "auto")
+        The proxy server to use. By default it's "auto" which uses ``requests`` to
+        automatically find the proxy to use. Pass None or the environment variable
+        ``no_proxy="*"`` to disable proxies.
     """
     destination = pathlib.Path(destination)
     parsed_url = urllib.parse.urlparse(source)
@@ -107,7 +132,16 @@ def _download_minio_file(
     if destination.is_file() and not exists_ok:
         raise FileExistsError(f"File already exists in {destination}.")
 
-    client = minio.Minio(endpoint=parsed_url.netloc, secure=False)
+    if proxy == "auto":
+        proxy = resolve_env_proxies(parsed_url.geturl())
+
+    proxy_client = ProxyManager(proxy) if proxy else None
+
+    client = minio.Minio(
+        endpoint=parsed_url.netloc,
+        secure=False,
+        http_client=proxy_client
+    )
 
     try:
         client.fget_object(

From 1dfe3988cea0ab0b74ef18b0b5485bd53cb5c007 Mon Sep 17 00:00:00 2001
From: Pieter Gijsbers <p.gijsbers@tue.nl>
Date: Fri, 25 Nov 2022 15:09:49 +0100
Subject: [PATCH 27/53] Update __version__.py (#1189)

---
 openml/__version__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openml/__version__.py b/openml/__version__.py
index 0f368c426..976394309 100644
--- a/openml/__version__.py
+++ b/openml/__version__.py
@@ -3,4 +3,4 @@
 # License: BSD 3-Clause
 
 # The following line *must* be the last in the module, exactly as formatted:
-__version__ = "0.12.2"
+__version__ = "0.13.0"

From 580b5363d98bdda030f4600ba45cba9e6696f321 Mon Sep 17 00:00:00 2001
From: Pieter Gijsbers <p.gijsbers@tue.nl>
Date: Fri, 25 Nov 2022 15:10:08 +0100
Subject: [PATCH 28/53] Download all files (#1188)

* Towards downloading buckets

* Download entire bucket instead of dataset file

* Dont download arff, skip files already cached

* Automatically unzip any downloaded archives

* Make downloading the bucket optional

Additionally, rename old cached files to the new filename format.

* Allow users to download the full bucket when pq is already cached

Otherwise the only way would be to delete the cache.

* Add unit test stub

* Remove redundant try/catch

* Remove commented out print statement

* Still download arff

* Towards downloading buckets

* Download entire bucket instead of dataset file

* Dont download arff, skip files already cached

* Automatically unzip any downloaded archives

* Make downloading the bucket optional

Additionally, rename old cached files to the new filename format.

* Allow users to download the full bucket when pq is already cached

Otherwise the only way would be to delete the cache.

* Add unit test stub

* Remove redundant try/catch

* Remove commented out print statement

* Still download arff

* ADD: download all files from minio bucket

* Add note for #1184

* Fix pre-commit issues (mypy, flake)

Co-authored-by: Matthias Feurer <feurerm@informatik.uni-freiburg.de>
---
 doc/progress.rst                              |  2 +
 openml/_api_calls.py                          | 45 ++++++++++++++++---
 openml/datasets/functions.py                  | 45 ++++++++++++++++---
 tests/test_datasets/test_dataset_functions.py |  9 ++++
 4 files changed, 91 insertions(+), 10 deletions(-)

diff --git a/doc/progress.rst b/doc/progress.rst
index b8e6864a8..d3d33caf6 100644
--- a/doc/progress.rst
+++ b/doc/progress.rst
@@ -12,10 +12,12 @@ Changelog
  * FIX#1058, #1100: Avoid ``NoneType`` error when printing task without ``class_labels`` attribute.
  * FIX#1110: Make arguments to ``create_study`` and ``create_suite`` that are defined as optional by the OpenML XSD actually optional.
  * FIX#1147: ``openml.flow.flow_exists`` no longer requires an API key.
+ * FIX#1184: Automatically resolve proxies when downloading from minio. Turn this off by setting environment variable ``no_proxy="*"``.
  * MAIN#1088: Do CI for Windows on Github Actions instead of Appveyor.
  * MAINT#1104: Fix outdated docstring for ``list_task``.
  * MAIN#1146: Update the pre-commit dependencies.
  * ADD#1103: Add a ``predictions`` property to OpenMLRun for easy accessibility of prediction data.
+ * ADD#1188: EXPERIMENTAL. Allow downloading all files from a minio bucket with ``download_all_files=True`` for ``get_dataset``.
 
 
 0.12.2
diff --git a/openml/_api_calls.py b/openml/_api_calls.py
index 7db1155cc..f3c3306fc 100644
--- a/openml/_api_calls.py
+++ b/openml/_api_calls.py
@@ -12,6 +12,7 @@
 import xmltodict
 from urllib3 import ProxyManager
 from typing import Dict, Optional, Union
+import zipfile
 
 import minio
 
@@ -44,6 +45,7 @@ def resolve_env_proxies(url: str) -> Optional[str]:
     selected_proxy = requests.utils.select_proxy(url, resolved_proxies)
     return selected_proxy
 
+
 def _create_url_from_endpoint(endpoint: str) -> str:
     url = config.server
     if not url.endswith("/"):
@@ -137,11 +139,7 @@ def _download_minio_file(
 
     proxy_client = ProxyManager(proxy) if proxy else None
 
-    client = minio.Minio(
-        endpoint=parsed_url.netloc,
-        secure=False,
-        http_client=proxy_client
-    )
+    client = minio.Minio(endpoint=parsed_url.netloc, secure=False, http_client=proxy_client)
 
     try:
         client.fget_object(
@@ -149,6 +147,10 @@ def _download_minio_file(
             object_name=object_name,
             file_path=str(destination),
         )
+        if destination.is_file() and destination.suffix == ".zip":
+            with zipfile.ZipFile(destination, "r") as zip_ref:
+                zip_ref.extractall(destination.parent)
+
     except minio.error.S3Error as e:
         if e.message.startswith("Object does not exist"):
             raise FileNotFoundError(f"Object at '{source}' does not exist.") from e
@@ -157,6 +159,39 @@ def _download_minio_file(
         raise FileNotFoundError("Bucket does not exist or is private.") from e
 
 
+def _download_minio_bucket(
+    source: str,
+    destination: Union[str, pathlib.Path],
+    exists_ok: bool = True,
+) -> None:
+    """Download file ``source`` from a MinIO Bucket and store it at ``destination``.
+
+    Parameters
+    ----------
+    source : Union[str, pathlib.Path]
+        URL to a MinIO bucket.
+    destination : str
+        Path to a directory to store the bucket content in.
+    exists_ok : bool, optional (default=True)
+        If False, raise FileExists if a file already exists in ``destination``.
+    """
+
+    destination = pathlib.Path(destination)
+    parsed_url = urllib.parse.urlparse(source)
+
+    # expect path format: /BUCKET/path/to/file.ext
+    bucket = parsed_url.path[1:]
+
+    client = minio.Minio(endpoint=parsed_url.netloc, secure=False)
+
+    for file_object in client.list_objects(bucket, recursive=True):
+        _download_minio_file(
+            source=source + "/" + file_object.object_name,
+            destination=pathlib.Path(destination, file_object.object_name),
+            exists_ok=True,
+        )
+
+
 def _download_text_file(
     source: str,
     output_path: Optional[str] = None,
diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
index 1e6fb5c78..770413a23 100644
--- a/openml/datasets/functions.py
+++ b/openml/datasets/functions.py
@@ -5,6 +5,7 @@
 import os
 from pyexpat import ExpatError
 from typing import List, Dict, Union, Optional, cast
+import warnings
 
 import numpy as np
 import arff
@@ -356,6 +357,7 @@ def get_dataset(
     error_if_multiple: bool = False,
     cache_format: str = "pickle",
     download_qualities: bool = True,
+    download_all_files: bool = False,
 ) -> OpenMLDataset:
     """Download the OpenML dataset representation, optionally also download actual data file.
 
@@ -389,11 +391,20 @@ def get_dataset(
         no.of.rows is very high.
     download_qualities : bool (default=True)
         Option to download 'qualities' meta-data in addition to the minimal dataset description.
+    download_all_files: bool (default=False)
+        EXPERIMENTAL. Download all files related to the dataset that reside on the server.
+        Useful for datasets which refer to auxiliary files (e.g., meta-album).
+
     Returns
     -------
     dataset : :class:`openml.OpenMLDataset`
         The downloaded dataset.
     """
+    if download_all_files:
+        warnings.warn(
+            "``download_all_files`` is experimental and is likely to break with new releases."
+        )
+
     if cache_format not in ["feather", "pickle"]:
         raise ValueError(
             "cache_format must be one of 'feather' or 'pickle. "
@@ -434,7 +445,12 @@ def get_dataset(
 
         arff_file = _get_dataset_arff(description) if download_data else None
         if "oml:minio_url" in description and download_data:
-            parquet_file = _get_dataset_parquet(description)
+            try:
+                parquet_file = _get_dataset_parquet(
+                    description, download_all_files=download_all_files
+                )
+            except urllib3.exceptions.MaxRetryError:
+                parquet_file = None
         else:
             parquet_file = None
         remove_dataset_cache = False
@@ -967,7 +983,9 @@ def _get_dataset_description(did_cache_dir, dataset_id):
 
 
 def _get_dataset_parquet(
-    description: Union[Dict, OpenMLDataset], cache_directory: str = None
+    description: Union[Dict, OpenMLDataset],
+    cache_directory: str = None,
+    download_all_files: bool = False,
 ) -> Optional[str]:
     """Return the path to the local parquet file of the dataset. If is not cached, it is downloaded.
 
@@ -987,23 +1005,40 @@ def _get_dataset_parquet(
         Folder to store the parquet file in.
         If None, use the default cache directory for the dataset.
 
+    download_all_files: bool, optional (default=False)
+        If `True`, download all data found in the bucket to which the description's
+        ``minio_url`` points, only download the parquet file otherwise.
+
     Returns
     -------
     output_filename : string, optional
         Location of the Parquet file if successfully downloaded, None otherwise.
     """
     if isinstance(description, dict):
-        url = description.get("oml:minio_url")
+        url = cast(str, description.get("oml:minio_url"))
         did = description.get("oml:id")
     elif isinstance(description, OpenMLDataset):
-        url = description._minio_url
+        url = cast(str, description._minio_url)
         did = description.dataset_id
     else:
         raise TypeError("`description` should be either OpenMLDataset or Dict.")
 
     if cache_directory is None:
         cache_directory = _create_cache_directory_for_id(DATASETS_CACHE_DIR_NAME, did)
-    output_file_path = os.path.join(cache_directory, "dataset.pq")
+    output_file_path = os.path.join(cache_directory, f"dataset_{did}.pq")
+
+    old_file_path = os.path.join(cache_directory, "dataset.pq")
+    if os.path.isfile(old_file_path):
+        os.rename(old_file_path, output_file_path)
+
+    # For this release, we want to be able to force a new download even if the
+    # parquet file is already present when ``download_all_files`` is set.
+    # For now, it would be the only way for the user to fetch the additional
+    # files in the bucket (no function exists on an OpenMLDataset to do this).
+    if download_all_files:
+        if url.endswith(".pq"):
+            url, _ = url.rsplit("/", maxsplit=1)
+        openml._api_calls._download_minio_bucket(source=cast(str, url), destination=cache_directory)
 
     if not os.path.isfile(output_file_path):
         try:
diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py
index 50f449ebb..e6c4fe3ec 100644
--- a/tests/test_datasets/test_dataset_functions.py
+++ b/tests/test_datasets/test_dataset_functions.py
@@ -322,6 +322,15 @@ def test_get_dataset_by_name(self):
         openml.config.server = self.production_server
         self.assertRaises(OpenMLPrivateDatasetError, openml.datasets.get_dataset, 45)
 
+    @pytest.mark.skip("Feature is experimental, can not test against stable server.")
+    def test_get_dataset_download_all_files(self):
+        # openml.datasets.get_dataset(id, download_all_files=True)
+        # check for expected files
+        # checking that no additional files are downloaded if
+        # the default (false) is used, seems covered by
+        # test_get_dataset_lazy
+        raise NotImplementedError
+
     def test_get_dataset_uint8_dtype(self):
         dataset = openml.datasets.get_dataset(1)
         self.assertEqual(type(dataset), OpenMLDataset)

From 5eb84ce0961d469f16a95c5a3f82f35b7cbcec0e Mon Sep 17 00:00:00 2001
From: Pieter Gijsbers <p.gijsbers@tue.nl>
Date: Fri, 25 Nov 2022 15:10:19 +0100
Subject: [PATCH 29/53] Skip tests that use arff reading optimization for
 typecheck (#1185)

Those types changed in the switch to parquet, and we need to
update the server parquet files and/or test expectations.
---
 tests/test_datasets/test_dataset.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tests/test_datasets/test_dataset.py b/tests/test_datasets/test_dataset.py
index e9cb86c50..15a801383 100644
--- a/tests/test_datasets/test_dataset.py
+++ b/tests/test_datasets/test_dataset.py
@@ -143,6 +143,7 @@ def test_get_data_pandas(self):
             self.assertTrue(X[col_name].dtype.name == col_dtype[col_name])
         self.assertTrue(y.dtype.name == col_dtype["survived"])
 
+    @pytest.mark.skip("https://github.com/openml/openml-python/issues/1157")
     def test_get_data_boolean_pandas(self):
         # test to check that we are converting properly True and False even
         # with some inconsistency when dumping the data on openml
@@ -170,6 +171,7 @@ def _check_expected_type(self, dtype, is_cat, col):
 
         self.assertEqual(dtype.name, expected_type)
 
+    @pytest.mark.skip("https://github.com/openml/openml-python/issues/1157")
     def test_get_data_with_rowid(self):
         self.dataset.row_id_attribute = "condition"
         rval, _, categorical, _ = self.dataset.get_data(include_row_id=True)
@@ -196,6 +198,7 @@ def test_get_data_with_target_array(self):
         self.assertEqual(len(attribute_names), 38)
         self.assertNotIn("class", attribute_names)
 
+    @pytest.mark.skip("https://github.com/openml/openml-python/issues/1157")
     def test_get_data_with_target_pandas(self):
         X, y, categorical, attribute_names = self.dataset.get_data(target="class")
         self.assertIsInstance(X, pd.DataFrame)
@@ -220,6 +223,7 @@ def test_get_data_rowid_and_ignore_and_target(self):
         self.assertListEqual(categorical, cats)
         self.assertEqual(y.shape, (898,))
 
+    @pytest.mark.skip("https://github.com/openml/openml-python/issues/1157")
     def test_get_data_with_ignore_attributes(self):
         self.dataset.ignore_attribute = ["condition"]
         rval, _, categorical, _ = self.dataset.get_data(include_ignore_attribute=True)

From 467f6eb5d4b6568ede3a7480f091fa5466da4ca3 Mon Sep 17 00:00:00 2001
From: Pieter Gijsbers <p.gijsbers@tue.nl>
Date: Mon, 20 Feb 2023 10:48:59 +0100
Subject: [PATCH 30/53] Update configs (#1199)

* Update flake8 repo from gitlab to github

* Exclude `venv`

* Numpy scalar aliases are removed in 1.24

Fix numpy for future 0.13 releases, then fix and bump as needed
---
 .gitignore              | 2 ++
 .pre-commit-config.yaml | 2 +-
 setup.py                | 2 +-
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index 3e5102233..c06e715ef 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,8 @@
 *~
 doc/generated
 examples/.ipynb_checkpoints
+venv
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index ebea5251e..05bac7967 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -19,7 +19,7 @@ repos:
         additional_dependencies:
           - types-requests
           - types-python-dateutil
-  - repo: https://gitlab.com/pycqa/flake8
+  - repo: https://github.com/pycqa/flake8
     rev: 4.0.1
     hooks:
       - id: flake8
diff --git a/setup.py b/setup.py
index 9f3cdd0e6..281452548 100644
--- a/setup.py
+++ b/setup.py
@@ -53,7 +53,7 @@
         "python-dateutil",  # Installed through pandas anyway.
         "pandas>=1.0.0",
         "scipy>=0.13.3",
-        "numpy>=1.6.2",
+        "numpy>=1.6.2,<1.24",
         "minio",
         "pyarrow",
     ],

From dd62f2b1e06895731f616d42cdc8b8fdbe2ed17b Mon Sep 17 00:00:00 2001
From: Pieter Gijsbers <p.gijsbers@tue.nl>
Date: Mon, 20 Feb 2023 13:25:36 +0100
Subject: [PATCH 31/53] Update tests for sklearn 1.2, server issue (#1200)

* Relax error checking

* Skip unit test due to server issue openml/openml#1180

* Account for rename parameter `base_estimator` to `estimator` in sk 1.2

* Update n_init parameter for sklearn 1.2

* Test for more specific exceptions
---
 .../test_sklearn_extension.py                 | 46 +++++++++----------
 tests/test_runs/test_run_functions.py         | 18 ++++++--
 2 files changed, 36 insertions(+), 28 deletions(-)

diff --git a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
index 709d123f0..26c2dd563 100644
--- a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
+++ b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
@@ -338,6 +338,7 @@ def test_serialize_model_clustering(self):
                 )
             )
         else:
+            n_init = '"warn"' if LooseVersion(sklearn.__version__) >= "1.2" else "10"
             fixture_parameters = OrderedDict(
                 (
                     ("algorithm", '"lloyd"'),
@@ -345,7 +346,7 @@ def test_serialize_model_clustering(self):
                     ("init", '"k-means++"'),
                     ("max_iter", "300"),
                     ("n_clusters", "8"),
-                    ("n_init", "10"),
+                    ("n_init", n_init),
                     ("random_state", "null"),
                     ("tol", "0.0001"),
                     ("verbose", "0"),
@@ -358,13 +359,13 @@ def test_serialize_model_clustering(self):
         )
         structure = serialization.get_structure("name")
 
-        self.assertEqual(serialization.name, fixture_name)
-        self.assertEqual(serialization.class_name, fixture_name)
-        self.assertEqual(serialization.custom_name, fixture_short_name)
-        self.assertEqual(serialization.description, fixture_description)
-        self.assertEqual(serialization.parameters, fixture_parameters)
-        self.assertEqual(serialization.dependencies, version_fixture)
-        self.assertDictEqual(structure, fixture_structure)
+        assert serialization.name == fixture_name
+        assert serialization.class_name == fixture_name
+        assert serialization.custom_name == fixture_short_name
+        assert serialization.description == fixture_description
+        assert serialization.parameters == fixture_parameters
+        assert serialization.dependencies == version_fixture
+        assert structure == fixture_structure
 
     def test_serialize_model_with_subcomponent(self):
         model = sklearn.ensemble.AdaBoostClassifier(
@@ -1449,22 +1450,19 @@ def test_deserialize_complex_with_defaults(self):
         pipe_orig = sklearn.pipeline.Pipeline(steps=steps)
 
         pipe_adjusted = sklearn.clone(pipe_orig)
-        if LooseVersion(sklearn.__version__) < "0.23":
-            params = {
-                "Imputer__strategy": "median",
-                "OneHotEncoder__sparse": False,
-                "Estimator__n_estimators": 10,
-                "Estimator__base_estimator__n_estimators": 10,
-                "Estimator__base_estimator__base_estimator__learning_rate": 0.1,
-            }
-        else:
-            params = {
-                "Imputer__strategy": "mean",
-                "OneHotEncoder__sparse": True,
-                "Estimator__n_estimators": 50,
-                "Estimator__base_estimator__n_estimators": 10,
-                "Estimator__base_estimator__base_estimator__learning_rate": 0.1,
-            }
+        impute_strategy = "median" if LooseVersion(sklearn.__version__) < "0.23" else "mean"
+        sparse = LooseVersion(sklearn.__version__) >= "0.23"
+        estimator_name = (
+            "base_estimator" if LooseVersion(sklearn.__version__) < "1.2" else "estimator"
+        )
+        params = {
+            "Imputer__strategy": impute_strategy,
+            "OneHotEncoder__sparse": sparse,
+            "Estimator__n_estimators": 10,
+            f"Estimator__{estimator_name}__n_estimators": 10,
+            f"Estimator__{estimator_name}__{estimator_name}__learning_rate": 0.1,
+        }
+
         pipe_adjusted.set_params(**params)
         flow = self.extension.model_to_flow(pipe_adjusted)
         pipe_deserialized = self.extension.flow_to_model(flow, initialize_with_defaults=True)
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index a9abcd05e..1e92613c3 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -410,10 +410,19 @@ def test_check_erronous_sklearn_flow_fails(self):
 
         # Invalid parameter values
         clf = LogisticRegression(C="abc", solver="lbfgs")
-        with self.assertRaisesRegex(
-            ValueError,
-            r"Penalty term must be positive; got \(C=u?'abc'\)",  # u? for 2.7/3.4-6 compability
-        ):
+        # The exact error message depends on scikit-learn version.
+        # Because the sklearn-extension module is to be separated,
+        # I will simply relax specifics of the raised Error.
+        # old: r"Penalty term must be positive; got \(C=u?'abc'\)"
+        # new: sklearn.utils._param_validation.InvalidParameterError:
+        #   The 'C' parameter of LogisticRegression must be a float in the range (0, inf]. Got 'abc' instead.  # noqa: E501
+        try:
+            from sklearn.utils._param_validation import InvalidParameterError
+
+            exceptions = (ValueError, InvalidParameterError)
+        except ImportError:
+            exceptions = (ValueError,)
+        with self.assertRaises(exceptions):
             openml.runs.run_model_on_task(
                 task=task,
                 model=clf,
@@ -680,6 +689,7 @@ def get_ct_cf(nominal_indices, numeric_indices):
             sentinel=sentinel,
         )
 
+    @unittest.skip("https://github.com/openml/OpenML/issues/1180")
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.20",
         reason="columntransformer introduction in 0.20.0",

From 2a7ab1765f2b9bd0360b049724cdd7d352dd901d Mon Sep 17 00:00:00 2001
From: Pieter Gijsbers <p.gijsbers@tue.nl>
Date: Mon, 20 Feb 2023 17:03:46 +0100
Subject: [PATCH 32/53] Version bump to dev and add changelog stub (#1190)

---
 doc/progress.rst      | 7 +++++++
 openml/__version__.py | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/doc/progress.rst b/doc/progress.rst
index d3d33caf6..6b42e851f 100644
--- a/doc/progress.rst
+++ b/doc/progress.rst
@@ -6,8 +6,15 @@
 Changelog
 =========
 
+0.13.1
+~~~~~~
+
+ * Add new contributions here.
+
+
 0.13.0
 ~~~~~~
+
  * FIX#1030: ``pre-commit`` hooks now no longer should issue a warning.
  * FIX#1058, #1100: Avoid ``NoneType`` error when printing task without ``class_labels`` attribute.
  * FIX#1110: Make arguments to ``create_study`` and ``create_suite`` that are defined as optional by the OpenML XSD actually optional.
diff --git a/openml/__version__.py b/openml/__version__.py
index 976394309..c27a62daa 100644
--- a/openml/__version__.py
+++ b/openml/__version__.py
@@ -3,4 +3,4 @@
 # License: BSD 3-Clause
 
 # The following line *must* be the last in the module, exactly as formatted:
-__version__ = "0.13.0"
+__version__ = "0.13.1.dev"

From 5f72e2eaebd160cea3b77ed7da3db53741b92ac8 Mon Sep 17 00:00:00 2001
From: Eddie Bergman <eddiebergmanhs@gmail.com>
Date: Mon, 20 Feb 2023 17:15:11 +0100
Subject: [PATCH 33/53] Add: dependabot checks for workflow versions (#1155)

---
 .github/dependabot.yml | 9 +++++++++
 1 file changed, 9 insertions(+)
 create mode 100644 .github/dependabot.yml

diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 000000000..e5e5092a2
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,9 @@
+version: 2
+
+updates:
+  # This will check for updates to github actions every day
+  # https://docs.github.com/en/enterprise-server@3.4/code-security/dependabot/working-with-dependabot/keeping-your-actions-up-to-date-with-dependabot
+  - package-ecosystem: "github-actions"
+    directory: "/"
+    schedule:
+      interval: "daily"

From 7d069a92644d8111708d20e16986fb36d6f2e4de Mon Sep 17 00:00:00 2001
From: Pieter Gijsbers <p.gijsbers@tue.nl>
Date: Tue, 21 Feb 2023 09:38:08 +0100
Subject: [PATCH 34/53] Change the cached file to reflect new standard #1188
 (#1203)

In #1188 we changed the standard cache file convention from
dataset.pq to dataset_{did}.pq. See also #1188.
---
 .../test/datasets/30/{dataset.pq => dataset_30.pq}  | Bin
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename tests/files/org/openml/test/datasets/30/{dataset.pq => dataset_30.pq} (100%)

diff --git a/tests/files/org/openml/test/datasets/30/dataset.pq b/tests/files/org/openml/test/datasets/30/dataset_30.pq
similarity index 100%
rename from tests/files/org/openml/test/datasets/30/dataset.pq
rename to tests/files/org/openml/test/datasets/30/dataset_30.pq

From 23755bf578d305b1b1bdb2c3455b0839fee591f5 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 21 Feb 2023 10:50:01 +0100
Subject: [PATCH 35/53] Bump actions/checkout from 2 to 3 (#1206)

Bumps [actions/checkout](https://github.com/actions/checkout) from 2 to 3.
- [Release notes](https://github.com/actions/checkout/releases)
- [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md)
- [Commits](https://github.com/actions/checkout/compare/v2...v3)

---
updated-dependencies:
- dependency-name: actions/checkout
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/dist.yaml           | 2 +-
 .github/workflows/docs.yaml           | 2 +-
 .github/workflows/pre-commit.yaml     | 2 +-
 .github/workflows/release_docker.yaml | 2 +-
 .github/workflows/test.yml            | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/dist.yaml b/.github/workflows/dist.yaml
index 51ffe03d5..4ae570190 100644
--- a/.github/workflows/dist.yaml
+++ b/.github/workflows/dist.yaml
@@ -6,7 +6,7 @@ jobs:
   dist:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
     - name: Setup Python
       uses: actions/setup-python@v2
       with:
diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index c14bd07d0..89870cbdd 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -5,7 +5,7 @@ jobs:
   build-and-deploy:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
     - name: Setup Python
       uses: actions/setup-python@v2
       with:
diff --git a/.github/workflows/pre-commit.yaml b/.github/workflows/pre-commit.yaml
index 6132b2de2..c81729d04 100644
--- a/.github/workflows/pre-commit.yaml
+++ b/.github/workflows/pre-commit.yaml
@@ -6,7 +6,7 @@ jobs:
   run-all-files:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
     - name: Setup Python 3.7
       uses: actions/setup-python@v2
       with:
diff --git a/.github/workflows/release_docker.yaml b/.github/workflows/release_docker.yaml
index c4522c0be..670b38e02 100644
--- a/.github/workflows/release_docker.yaml
+++ b/.github/workflows/release_docker.yaml
@@ -19,7 +19,7 @@ jobs:
         with:
           username: ${{ secrets.DOCKERHUB_USERNAME }}
           password: ${{ secrets.DOCKERHUB_TOKEN }}
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
       - name: Build and push
         id: docker_build
         uses: docker/build-push-action@v2
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 059aec58d..5ac6d8dbb 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -35,7 +35,7 @@ jobs:
       max-parallel: 4
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
       with:
         fetch-depth: 2
     - name: Setup Python ${{ matrix.python-version }}

From 603fe60725fe6bf00c9f109d54249e4d2161af2f Mon Sep 17 00:00:00 2001
From: Matthias Feurer <feurerm@informatik.uni-freiburg.de>
Date: Wed, 22 Feb 2023 17:18:33 +0100
Subject: [PATCH 36/53] Update docker actions (#1211)

* Update docker actions

* Fix context

* Specify tag for docker container to use strict python version (3.10)

* Load OpenML in Docker file

* load correct image

* load correct image

* Remove loading python again
---
 .github/workflows/release_docker.yaml | 27 ++++++++++++++++++++++-----
 docker/Dockerfile                     |  2 +-
 2 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/release_docker.yaml b/.github/workflows/release_docker.yaml
index 670b38e02..3df6cdf4c 100644
--- a/.github/workflows/release_docker.yaml
+++ b/.github/workflows/release_docker.yaml
@@ -3,29 +3,46 @@ name: release-docker
 on:
   push:
     branches:
+      - 'main'
       - 'develop'
       - 'docker'
 
 jobs:
+
   docker:
+
     runs-on: ubuntu-latest
+
     steps:
       - name: Set up QEMU
-        uses: docker/setup-qemu-action@v1
+        uses: docker/setup-qemu-action@v2
+
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v1
+
       - name: Login to DockerHub
-        uses: docker/login-action@v1
+        uses: docker/login-action@v2
         with:
           username: ${{ secrets.DOCKERHUB_USERNAME }}
           password: ${{ secrets.DOCKERHUB_TOKEN }}
-      - uses: actions/checkout@v3
+
+      - name: Check out the repo
+        uses: actions/checkout@v3
+
+      - name: Extract metadata (tags, labels) for Docker Hub
+        id: meta_dockerhub
+        uses: docker/metadata-action@v4
+        with:
+          images: "openml/openml-python"
+
       - name: Build and push
         id: docker_build
-        uses: docker/build-push-action@v2
+        uses: docker/build-push-action@v4
         with:
           context: ./docker/
           push: true
-          tags: openml/openml-python:latest
+          tags: ${{ steps.meta_dockerhub.outputs.tags }}
+          labels: ${{ steps.meta_dockerhub.outputs.labels }}
+          
       - name: Image digest
         run: echo ${{ steps.docker_build.outputs.digest }}
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 5fcc16e34..c27abba40 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -1,6 +1,6 @@
 # Dockerfile to build an image with preinstalled dependencies
 # Useful building docs or running unix tests from a Windows host.
-FROM python:3
+FROM python:3.10
 
 RUN git clone  https://github.com/openml/openml-python.git omlp
 WORKDIR omlp

From 17ff086e55d63ddca6a2b0d428ef45806ece9b99 Mon Sep 17 00:00:00 2001
From: Matthias Feurer <feurerm@informatik.uni-freiburg.de>
Date: Thu, 23 Feb 2023 10:44:57 +0100
Subject: [PATCH 37/53] Support new numpy (#1215)

* Drop upper bound on numpy version

* Update changelog
---
 doc/progress.rst                                     |  2 +-
 openml/extensions/sklearn/extension.py               | 12 ++++++++----
 setup.py                                             |  2 +-
 .../test_sklearn_extension/test_sklearn_extension.py |  5 ++++-
 4 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/doc/progress.rst b/doc/progress.rst
index 6b42e851f..344a0e3dd 100644
--- a/doc/progress.rst
+++ b/doc/progress.rst
@@ -9,7 +9,7 @@ Changelog
 0.13.1
 ~~~~~~
 
- * Add new contributions here.
+ * FIX #1198: Support numpy 1.24 and higher.
 
 
 0.13.0
diff --git a/openml/extensions/sklearn/extension.py b/openml/extensions/sklearn/extension.py
index f8936b0db..28ecd217f 100644
--- a/openml/extensions/sklearn/extension.py
+++ b/openml/extensions/sklearn/extension.py
@@ -1252,14 +1252,16 @@ def _check_dependencies(self, dependencies: str, strict_version: bool = True) ->
     def _serialize_type(self, o: Any) -> "OrderedDict[str, str]":
         mapping = {
             float: "float",
-            np.float: "np.float",  # type: ignore
             np.float32: "np.float32",
             np.float64: "np.float64",
             int: "int",
-            np.int: "np.int",  # type: ignore
             np.int32: "np.int32",
             np.int64: "np.int64",
         }
+        if LooseVersion(np.__version__) < "1.24":
+            mapping[np.float] = "np.float"
+            mapping[np.int] = "np.int"
+
         ret = OrderedDict()  # type: 'OrderedDict[str, str]'
         ret["oml-python:serialized_object"] = "type"
         ret["value"] = mapping[o]
@@ -1268,14 +1270,16 @@ def _serialize_type(self, o: Any) -> "OrderedDict[str, str]":
     def _deserialize_type(self, o: str) -> Any:
         mapping = {
             "float": float,
-            "np.float": np.float,  # type: ignore
             "np.float32": np.float32,
             "np.float64": np.float64,
             "int": int,
-            "np.int": np.int,  # type: ignore
             "np.int32": np.int32,
             "np.int64": np.int64,
         }
+        if LooseVersion(np.__version__) < "1.24":
+            mapping["np.float"] = np.float
+            mapping["np.int"] = np.int
+
         return mapping[o]
 
     def _serialize_rv_frozen(self, o: Any) -> "OrderedDict[str, Union[str, Dict]]":
diff --git a/setup.py b/setup.py
index 281452548..9f3cdd0e6 100644
--- a/setup.py
+++ b/setup.py
@@ -53,7 +53,7 @@
         "python-dateutil",  # Installed through pandas anyway.
         "pandas>=1.0.0",
         "scipy>=0.13.3",
-        "numpy>=1.6.2,<1.24",
+        "numpy>=1.6.2",
         "minio",
         "pyarrow",
     ],
diff --git a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
index 26c2dd563..1046970f3 100644
--- a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
+++ b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
@@ -952,7 +952,10 @@ def test_serialize_strings_as_pipeline_steps(self):
         self.assertEqual(extracted_info[2]["drop"].name, "drop")
 
     def test_serialize_type(self):
-        supported_types = [float, np.float, np.float32, np.float64, int, np.int, np.int32, np.int64]
+        supported_types = [float, np.float32, np.float64, int, np.int32, np.int64]
+        if LooseVersion(np.__version__) < "1.24":
+            supported_types.append(np.float)
+            supported_types.append(np.int)
 
         for supported_type in supported_types:
             serialized = self.extension.model_to_flow(supported_type)

From d9850bea4bcc38b3f332d5c8caf44acf7cbdbe7b Mon Sep 17 00:00:00 2001
From: Matthias Feurer <feurerm@informatik.uni-freiburg.de>
Date: Thu, 23 Feb 2023 14:06:59 +0100
Subject: [PATCH 38/53] Allow unknown task types on the server (#1216)

* Allow unknown task types on the server

* Applied black to openml/tasks/functions.py

* Some more fixes
---
 openml/tasks/functions.py | 42 ++++++++++++++++++++++++++++++---------
 1 file changed, 33 insertions(+), 9 deletions(-)

diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py
index 4c0aeaf4a..c44d55ea7 100644
--- a/openml/tasks/functions.py
+++ b/openml/tasks/functions.py
@@ -90,7 +90,7 @@ def _get_estimation_procedure_list():
     procs_dict = xmltodict.parse(xml_string)
     # Minimalistic check if the XML is useful
     if "oml:estimationprocedures" not in procs_dict:
-        raise ValueError("Error in return XML, does not contain tag " "oml:estimationprocedures.")
+        raise ValueError("Error in return XML, does not contain tag oml:estimationprocedures.")
     elif "@xmlns:oml" not in procs_dict["oml:estimationprocedures"]:
         raise ValueError(
             "Error in return XML, does not contain tag "
@@ -106,10 +106,19 @@ def _get_estimation_procedure_list():
 
     procs = []
     for proc_ in procs_dict["oml:estimationprocedures"]["oml:estimationprocedure"]:
+        task_type_int = int(proc_["oml:ttid"])
+        try:
+            task_type_id = TaskType(task_type_int)
+        except ValueError as e:
+            warnings.warn(
+                f"Could not create task type id for {task_type_int} due to error {e}",
+                RuntimeWarning,
+            )
+            continue
         procs.append(
             {
                 "id": int(proc_["oml:id"]),
-                "task_type_id": TaskType(int(proc_["oml:ttid"])),
+                "task_type_id": task_type_id,
                 "name": proc_["oml:name"],
                 "type": proc_["oml:type"],
             }
@@ -124,7 +133,7 @@ def list_tasks(
     size: Optional[int] = None,
     tag: Optional[str] = None,
     output_format: str = "dict",
-    **kwargs
+    **kwargs,
 ) -> Union[Dict, pd.DataFrame]:
     """
     Return a number of tasks having the given tag and task_type
@@ -175,7 +184,7 @@ def list_tasks(
         offset=offset,
         size=size,
         tag=tag,
-        **kwargs
+        **kwargs,
     )
 
 
@@ -240,9 +249,18 @@ def __list_tasks(api_call, output_format="dict"):
         tid = None
         try:
             tid = int(task_["oml:task_id"])
+            task_type_int = int(task_["oml:task_type_id"])
+            try:
+                task_type_id = TaskType(task_type_int)
+            except ValueError as e:
+                warnings.warn(
+                    f"Could not create task type id for {task_type_int} due to error {e}",
+                    RuntimeWarning,
+                )
+                continue
             task = {
                 "tid": tid,
-                "ttid": TaskType(int(task_["oml:task_type_id"])),
+                "ttid": task_type_id,
                 "did": int(task_["oml:did"]),
                 "name": task_["oml:name"],
                 "task_type": task_["oml:task_type"],
@@ -330,7 +348,10 @@ def get_task(
     task
     """
     if not isinstance(task_id, int):
-        warnings.warn("Task id must be specified as `int` from 0.14.0 onwards.", DeprecationWarning)
+        warnings.warn(
+            "Task id must be specified as `int` from 0.14.0 onwards.",
+            DeprecationWarning,
+        )
 
     try:
         task_id = int(task_id)
@@ -466,9 +487,12 @@ def create_task(
     estimation_procedure_id: int,
     target_name: Optional[str] = None,
     evaluation_measure: Optional[str] = None,
-    **kwargs
+    **kwargs,
 ) -> Union[
-    OpenMLClassificationTask, OpenMLRegressionTask, OpenMLLearningCurveTask, OpenMLClusteringTask
+    OpenMLClassificationTask,
+    OpenMLRegressionTask,
+    OpenMLLearningCurveTask,
+    OpenMLClusteringTask,
 ]:
     """Create a task based on different given attributes.
 
@@ -519,5 +543,5 @@ def create_task(
             target_name=target_name,
             estimation_procedure_id=estimation_procedure_id,
             evaluation_measure=evaluation_measure,
-            **kwargs
+            **kwargs,
         )

From a9682886448938c269997401606838e480ea6a49 Mon Sep 17 00:00:00 2001
From: Pieter Gijsbers <p.gijsbers@tue.nl>
Date: Thu, 23 Feb 2023 15:01:07 +0100
Subject: [PATCH 39/53] Mark sklearn tests (#1202)

* Add sklearn marker

* Mark tests that use scikit-learn

* Only run scikit-learn tests multiple times

The generic tests that don't use scikit-learn should only be tested once
(per platform).

* Rename for correct variable

* Add sklearn mark for filesystem test

* Remove quotes around sklearn

* Instead include sklearn in the matrix definition

* Update jobnames

* Add explicit false to jobname

* Remove space

* Add function inside of expression?

* Do string testing instead

* Add missing ${{

* Add explicit true to old sklearn tests

* Add instruction to add pytest marker for sklearn tests
---
 .github/workflows/test.yml                    | 13 ++++-
 CONTRIBUTING.md                               |  3 +-
 tests/conftest.py                             |  4 ++
 .../test_sklearn_extension.py                 | 52 +++++++++++++++++++
 tests/test_flows/test_flow.py                 | 10 ++++
 tests/test_flows/test_flow_functions.py       |  7 +++
 tests/test_runs/test_run.py                   |  4 ++
 tests/test_runs/test_run_functions.py         | 29 +++++++++++
 tests/test_setups/test_setup_functions.py     |  5 ++
 tests/test_study/test_study_examples.py       |  2 +
 10 files changed, 126 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 5ac6d8dbb..5adfa3eac 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -4,13 +4,14 @@ on: [push, pull_request]
 
 jobs:
   test:
-    name: (${{ matrix.os }}, Py${{ matrix.python-version }}, sk${{ matrix.scikit-learn }})
+    name: (${{ matrix.os }}, Py${{ matrix.python-version }}, sk${{ matrix.scikit-learn }}, sk-only:${{ matrix.sklearn-only }})
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
         python-version: [3.6, 3.7, 3.8]
         scikit-learn: [0.21.2, 0.22.2, 0.23.1, 0.24]
         os: [ubuntu-latest]
+        sklearn-only: ['true']
         exclude:  # no scikit-learn 0.21.2 release for Python 3.8
           - python-version: 3.8
             scikit-learn: 0.21.2
@@ -19,17 +20,22 @@ jobs:
             scikit-learn: 0.18.2
             scipy: 1.2.0
             os: ubuntu-latest
+            sklearn-only: 'true'
           - python-version: 3.6
             scikit-learn: 0.19.2
             os: ubuntu-latest
+            sklearn-only: 'true'
           - python-version: 3.6
             scikit-learn: 0.20.2
             os: ubuntu-latest
+            sklearn-only: 'true'
           - python-version: 3.8
             scikit-learn: 0.23.1
             code-cov: true
+            sklearn-only: 'false'
             os: ubuntu-latest
           - os: windows-latest
+            sklearn-only: 'false'
             scikit-learn: 0.24.*
       fail-fast:  false
       max-parallel: 4
@@ -62,7 +68,10 @@ jobs:
       if: matrix.os == 'ubuntu-latest'
       run: |
         if [ ${{ matrix.code-cov }} ]; then codecov='--cov=openml --long  --cov-report=xml'; fi
-        pytest -n 4 --durations=20 --timeout=600 --timeout-method=thread --dist load -sv $codecov --reruns 5 --reruns-delay 1
+        # Most of the time, running only the scikit-learn tests is sufficient
+        if [ ${{ matrix.sklearn-only }} = 'true' ]; then sklearn='-m sklearn'; fi
+        echo pytest -n 4 --durations=20 --timeout=600 --timeout-method=thread --dist load -sv $codecov $sklearn --reruns 5 --reruns-delay 1
+        pytest -n 4 --durations=20 --timeout=600 --timeout-method=thread --dist load -sv $codecov $sklearn --reruns 5 --reruns-delay 1
     - name: Run tests on Windows
       if: matrix.os == 'windows-latest'
       run: |  # we need a separate step because of the bash-specific if-statement in the previous one.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 688dbd7a9..87c8ae3c6 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -153,7 +153,8 @@ following rules before you submit a pull request:
    
 - Add [unit tests](https://github.com/openml/openml-python/tree/develop/tests) and [examples](https://github.com/openml/openml-python/tree/develop/examples) for any new functionality being introduced. 
     - If an unit test contains an upload to the test server, please ensure that it is followed by a file collection for deletion, to prevent the test server from bulking up. For example, `TestBase._mark_entity_for_removal('data', dataset.dataset_id)`, `TestBase._mark_entity_for_removal('flow', (flow.flow_id, flow.name))`.
-    - Please ensure that the example is run on the test server by beginning with the call to `openml.config.start_using_configuration_for_example()`.      
+    - Please ensure that the example is run on the test server by beginning with the call to `openml.config.start_using_configuration_for_example()`.
+    - Add the `@pytest.mark.sklearn` marker to your unit tests if they have a dependency on scikit-learn.
 
 -  All tests pass when running `pytest`. On
    Unix-like systems, check with (from the toplevel source folder):
diff --git a/tests/conftest.py b/tests/conftest.py
index cf3f33834..89da5fca4 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -174,6 +174,10 @@ def pytest_sessionfinish() -> None:
     logger.info("{} is killed".format(worker))
 
 
+def pytest_configure(config):
+    config.addinivalue_line("markers", "sklearn: marks tests that use scikit-learn")
+
+
 def pytest_addoption(parser):
     parser.addoption(
         "--long",
diff --git a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
index 1046970f3..86ae419d2 100644
--- a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
+++ b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
@@ -15,6 +15,7 @@
 
 import numpy as np
 import pandas as pd
+import pytest
 import scipy.optimize
 import scipy.stats
 import sklearn.base
@@ -176,6 +177,7 @@ def _serialization_test_helper(
 
             return serialization, new_model
 
+    @pytest.mark.sklearn
     def test_serialize_model(self):
         model = sklearn.tree.DecisionTreeClassifier(
             criterion="entropy", max_features="auto", max_leaf_nodes=2000
@@ -265,6 +267,7 @@ def test_serialize_model(self):
         self.assertEqual(serialization.dependencies, version_fixture)
         self.assertDictEqual(structure, structure_fixture)
 
+    @pytest.mark.sklearn
     def test_can_handle_flow(self):
         openml.config.server = self.production_server
 
@@ -275,6 +278,7 @@ def test_can_handle_flow(self):
 
         openml.config.server = self.test_server
 
+    @pytest.mark.sklearn
     def test_serialize_model_clustering(self):
         model = sklearn.cluster.KMeans()
 
@@ -367,6 +371,7 @@ def test_serialize_model_clustering(self):
         assert serialization.dependencies == version_fixture
         assert structure == fixture_structure
 
+    @pytest.mark.sklearn
     def test_serialize_model_with_subcomponent(self):
         model = sklearn.ensemble.AdaBoostClassifier(
             n_estimators=100, base_estimator=sklearn.tree.DecisionTreeClassifier()
@@ -427,6 +432,7 @@ def test_serialize_model_with_subcomponent(self):
         )
         self.assertDictEqual(structure, fixture_structure)
 
+    @pytest.mark.sklearn
     def test_serialize_pipeline(self):
         scaler = sklearn.preprocessing.StandardScaler(with_mean=False)
         dummy = sklearn.dummy.DummyClassifier(strategy="prior")
@@ -496,6 +502,7 @@ def test_serialize_pipeline(self):
         self.assertIsNot(new_model.steps[0][1], model.steps[0][1])
         self.assertIsNot(new_model.steps[1][1], model.steps[1][1])
 
+    @pytest.mark.sklearn
     def test_serialize_pipeline_clustering(self):
         scaler = sklearn.preprocessing.StandardScaler(with_mean=False)
         km = sklearn.cluster.KMeans()
@@ -564,6 +571,7 @@ def test_serialize_pipeline_clustering(self):
         self.assertIsNot(new_model.steps[0][1], model.steps[0][1])
         self.assertIsNot(new_model.steps[1][1], model.steps[1][1])
 
+    @pytest.mark.sklearn
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.20",
         reason="columntransformer introduction in 0.20.0",
@@ -622,6 +630,7 @@ def test_serialize_column_transformer(self):
         self.assertEqual(serialization.description, fixture_description)
         self.assertDictEqual(structure, fixture_structure)
 
+    @pytest.mark.sklearn
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.20",
         reason="columntransformer introduction in 0.20.0",
@@ -688,6 +697,7 @@ def test_serialize_column_transformer_pipeline(self):
 
         self.assertDictEqual(structure, fixture_structure)
 
+    @pytest.mark.sklearn
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.20", reason="Pipeline processing behaviour updated"
     )
@@ -756,6 +766,7 @@ def test_serialize_feature_union(self):
         )
         self.assertIs(new_model.transformer_list[1][1], "drop")
 
+    @pytest.mark.sklearn
     def test_serialize_feature_union_switched_names(self):
         ohe_params = {"categories": "auto"} if LooseVersion(sklearn.__version__) >= "0.20" else {}
         ohe = sklearn.preprocessing.OneHotEncoder(**ohe_params)
@@ -796,6 +807,7 @@ def test_serialize_feature_union_switched_names(self):
             "ohe=sklearn.preprocessing.{}.StandardScaler)".format(module_name_encoder, scaler_name),
         )
 
+    @pytest.mark.sklearn
     def test_serialize_complex_flow(self):
         ohe = sklearn.preprocessing.OneHotEncoder(handle_unknown="ignore")
         scaler = sklearn.preprocessing.StandardScaler(with_mean=False)
@@ -856,6 +868,7 @@ def test_serialize_complex_flow(self):
         self.assertEqual(serialized.name, fixture_name)
         self.assertEqual(structure, fixture_structure)
 
+    @pytest.mark.sklearn
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.21",
         reason="Pipeline till 0.20 doesn't support 'passthrough'",
@@ -951,6 +964,7 @@ def test_serialize_strings_as_pipeline_steps(self):
         self.assertIsInstance(extracted_info[2]["drop"], OpenMLFlow)
         self.assertEqual(extracted_info[2]["drop"].name, "drop")
 
+    @pytest.mark.sklearn
     def test_serialize_type(self):
         supported_types = [float, np.float32, np.float64, int, np.int32, np.int64]
         if LooseVersion(np.__version__) < "1.24":
@@ -962,6 +976,7 @@ def test_serialize_type(self):
             deserialized = self.extension.flow_to_model(serialized)
             self.assertEqual(deserialized, supported_type)
 
+    @pytest.mark.sklearn
     def test_serialize_rvs(self):
         supported_rvs = [
             scipy.stats.norm(loc=1, scale=5),
@@ -977,11 +992,13 @@ def test_serialize_rvs(self):
             del supported_rv.dist
             self.assertEqual(deserialized.__dict__, supported_rv.__dict__)
 
+    @pytest.mark.sklearn
     def test_serialize_function(self):
         serialized = self.extension.model_to_flow(sklearn.feature_selection.chi2)
         deserialized = self.extension.flow_to_model(serialized)
         self.assertEqual(deserialized, sklearn.feature_selection.chi2)
 
+    @pytest.mark.sklearn
     def test_serialize_cvobject(self):
         methods = [sklearn.model_selection.KFold(3), sklearn.model_selection.LeaveOneOut()]
         fixtures = [
@@ -1031,6 +1048,7 @@ def test_serialize_cvobject(self):
             self.assertIsNot(m_new, m)
             self.assertIsInstance(m_new, type(method))
 
+    @pytest.mark.sklearn
     def test_serialize_simple_parameter_grid(self):
 
         # We cannot easily test for scipy random variables in here, but they
@@ -1078,6 +1096,7 @@ def test_serialize_simple_parameter_grid(self):
             del deserialized_params["estimator"]
             self.assertEqual(hpo_params, deserialized_params)
 
+    @pytest.mark.sklearn
     @unittest.skip(
         "This feature needs further reworking. If we allow several "
         "components, we need to register them all in the downstream "
@@ -1132,6 +1151,7 @@ def test_serialize_advanced_grid(self):
         self.assertEqual(grid[1]["reduce_dim__k"], deserialized[1]["reduce_dim__k"])
         self.assertEqual(grid[1]["classify__C"], deserialized[1]["classify__C"])
 
+    @pytest.mark.sklearn
     def test_serialize_advanced_grid_fails(self):
         # This unit test is checking that the test we skip above would actually fail
 
@@ -1151,6 +1171,7 @@ def test_serialize_advanced_grid_fails(self):
         ):
             self.extension.model_to_flow(clf)
 
+    @pytest.mark.sklearn
     def test_serialize_resampling(self):
         kfold = sklearn.model_selection.StratifiedKFold(n_splits=4, shuffle=True)
         serialized = self.extension.model_to_flow(kfold)
@@ -1159,6 +1180,7 @@ def test_serialize_resampling(self):
         self.assertEqual(str(deserialized), str(kfold))
         self.assertIsNot(deserialized, kfold)
 
+    @pytest.mark.sklearn
     def test_hypothetical_parameter_values(self):
         # The hypothetical parameter values of true, 1, 0.1 formatted as a
         # string (and their correct serialization and deserialization) an only
@@ -1172,6 +1194,7 @@ def test_hypothetical_parameter_values(self):
         self.assertEqual(deserialized.get_params(), model.get_params())
         self.assertIsNot(deserialized, model)
 
+    @pytest.mark.sklearn
     def test_gaussian_process(self):
         opt = scipy.optimize.fmin_l_bfgs_b
         kernel = sklearn.gaussian_process.kernels.Matern()
@@ -1182,6 +1205,7 @@ def test_gaussian_process(self):
         ):
             self.extension.model_to_flow(gp)
 
+    @pytest.mark.sklearn
     def test_error_on_adding_component_multiple_times_to_flow(self):
         # this function implicitly checks
         # - openml.flows._check_multiple_occurence_of_component_in_flow()
@@ -1206,6 +1230,7 @@ def test_error_on_adding_component_multiple_times_to_flow(self):
         with self.assertRaisesRegex(ValueError, fixture):
             self.extension.model_to_flow(pipeline2)
 
+    @pytest.mark.sklearn
     def test_subflow_version_propagated(self):
         this_directory = os.path.dirname(os.path.abspath(__file__))
         tests_directory = os.path.abspath(os.path.join(this_directory, "..", ".."))
@@ -1230,12 +1255,14 @@ def test_subflow_version_propagated(self):
             ),
         )
 
+    @pytest.mark.sklearn
     @mock.patch("warnings.warn")
     def test_check_dependencies(self, warnings_mock):
         dependencies = ["sklearn==0.1", "sklearn>=99.99.99", "sklearn>99.99.99"]
         for dependency in dependencies:
             self.assertRaises(ValueError, self.extension._check_dependencies, dependency)
 
+    @pytest.mark.sklearn
     def test_illegal_parameter_names(self):
         # illegal name: estimators
         clf1 = sklearn.ensemble.VotingClassifier(
@@ -1255,6 +1282,7 @@ def test_illegal_parameter_names(self):
         for case in cases:
             self.assertRaises(PyOpenMLError, self.extension.model_to_flow, case)
 
+    @pytest.mark.sklearn
     def test_paralizable_check(self):
         # using this model should pass the test (if param distribution is
         # legal)
@@ -1304,6 +1332,7 @@ def test_paralizable_check(self):
             with self.assertRaises(PyOpenMLError):
                 self.extension._prevent_optimize_n_jobs(model)
 
+    @pytest.mark.sklearn
     def test__get_fn_arguments_with_defaults(self):
         sklearn_version = LooseVersion(sklearn.__version__)
         if sklearn_version < "0.19":
@@ -1361,6 +1390,7 @@ def test__get_fn_arguments_with_defaults(self):
             self.assertSetEqual(set(defaults.keys()), set(defaults.keys()) - defaultless)
             self.assertSetEqual(defaultless, defaultless - set(defaults.keys()))
 
+    @pytest.mark.sklearn
     def test_deserialize_with_defaults(self):
         # used the 'initialize_with_defaults' flag of the deserialization
         # method to return a flow that contains default hyperparameter
@@ -1396,6 +1426,7 @@ def test_deserialize_with_defaults(self):
             self.extension.model_to_flow(pipe_deserialized),
         )
 
+    @pytest.mark.sklearn
     def test_deserialize_adaboost_with_defaults(self):
         # used the 'initialize_with_defaults' flag of the deserialization
         # method to return a flow that contains default hyperparameter
@@ -1434,6 +1465,7 @@ def test_deserialize_adaboost_with_defaults(self):
             self.extension.model_to_flow(pipe_deserialized),
         )
 
+    @pytest.mark.sklearn
     def test_deserialize_complex_with_defaults(self):
         # used the 'initialize_with_defaults' flag of the deserialization
         # method to return a flow that contains default hyperparameter
@@ -1477,6 +1509,7 @@ def test_deserialize_complex_with_defaults(self):
             self.extension.model_to_flow(pipe_deserialized),
         )
 
+    @pytest.mark.sklearn
     def test_openml_param_name_to_sklearn(self):
         scaler = sklearn.preprocessing.StandardScaler(with_mean=False)
         boosting = sklearn.ensemble.AdaBoostClassifier(
@@ -1511,6 +1544,7 @@ def test_openml_param_name_to_sklearn(self):
             openml_name = "%s(%s)_%s" % (subflow.name, subflow.version, splitted[-1])
             self.assertEqual(parameter.full_name, openml_name)
 
+    @pytest.mark.sklearn
     def test_obtain_parameter_values_flow_not_from_server(self):
         model = sklearn.linear_model.LogisticRegression(solver="lbfgs")
         flow = self.extension.model_to_flow(model)
@@ -1532,6 +1566,7 @@ def test_obtain_parameter_values_flow_not_from_server(self):
         with self.assertRaisesRegex(ValueError, msg):
             self.extension.obtain_parameter_values(flow)
 
+    @pytest.mark.sklearn
     def test_obtain_parameter_values(self):
 
         model = sklearn.model_selection.RandomizedSearchCV(
@@ -1557,6 +1592,7 @@ def test_obtain_parameter_values(self):
                 self.assertEqual(parameter["oml:value"], "5")
                 self.assertEqual(parameter["oml:component"], 2)
 
+    @pytest.mark.sklearn
     def test_numpy_type_allowed_in_flow(self):
         """Simple numpy types should be serializable."""
         dt = sklearn.tree.DecisionTreeClassifier(
@@ -1564,6 +1600,7 @@ def test_numpy_type_allowed_in_flow(self):
         )
         self.extension.model_to_flow(dt)
 
+    @pytest.mark.sklearn
     def test_numpy_array_not_allowed_in_flow(self):
         """Simple numpy arrays should not be serializable."""
         bin = sklearn.preprocessing.MultiLabelBinarizer(classes=np.asarray([1, 2, 3]))
@@ -1581,6 +1618,7 @@ def setUp(self):
     ################################################################################################
     # Test methods for performing runs with this extension module
 
+    @pytest.mark.sklearn
     def test_run_model_on_task(self):
         task = openml.tasks.get_task(1)  # anneal; crossvalidation
         # using most_frequent imputer since dataset has mixed types and to keep things simple
@@ -1592,6 +1630,7 @@ def test_run_model_on_task(self):
         )
         openml.runs.run_model_on_task(pipe, task, dataset_format="array")
 
+    @pytest.mark.sklearn
     def test_seed_model(self):
         # randomized models that are initialized without seeds, can be seeded
         randomized_clfs = [
@@ -1634,6 +1673,7 @@ def test_seed_model(self):
             if idx == 1:
                 self.assertEqual(clf.cv.random_state, 56422)
 
+    @pytest.mark.sklearn
     def test_seed_model_raises(self):
         # the _set_model_seed_where_none should raise exception if random_state is
         # anything else than an int
@@ -1646,6 +1686,7 @@ def test_seed_model_raises(self):
             with self.assertRaises(ValueError):
                 self.extension.seed_model(model=clf, seed=42)
 
+    @pytest.mark.sklearn
     def test_run_model_on_fold_classification_1_array(self):
         task = openml.tasks.get_task(1)  # anneal; crossvalidation
 
@@ -1702,6 +1743,7 @@ def test_run_model_on_fold_classification_1_array(self):
             check_scores=False,
         )
 
+    @pytest.mark.sklearn
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.21",
         reason="SimpleImputer, ColumnTransformer available only after 0.19 and "
@@ -1773,6 +1815,7 @@ def test_run_model_on_fold_classification_1_dataframe(self):
             check_scores=False,
         )
 
+    @pytest.mark.sklearn
     def test_run_model_on_fold_classification_2(self):
         task = openml.tasks.get_task(7)  # kr-vs-kp; crossvalidation
 
@@ -1826,6 +1869,7 @@ def test_run_model_on_fold_classification_2(self):
             check_scores=False,
         )
 
+    @pytest.mark.sklearn
     def test_run_model_on_fold_classification_3(self):
         class HardNaiveBayes(sklearn.naive_bayes.GaussianNB):
             # class for testing a naive bayes classifier that does not allow soft
@@ -1896,6 +1940,7 @@ def predict_proba(*args, **kwargs):
                 X_test.shape[0] * len(task.class_labels),
             )
 
+    @pytest.mark.sklearn
     def test_run_model_on_fold_regression(self):
         # There aren't any regression tasks on the test server
         openml.config.server = self.production_server
@@ -1945,6 +1990,7 @@ def test_run_model_on_fold_regression(self):
             check_scores=False,
         )
 
+    @pytest.mark.sklearn
     def test_run_model_on_fold_clustering(self):
         # There aren't any regression tasks on the test server
         openml.config.server = self.production_server
@@ -1987,6 +2033,7 @@ def test_run_model_on_fold_clustering(self):
             check_scores=False,
         )
 
+    @pytest.mark.sklearn
     def test__extract_trace_data(self):
 
         param_grid = {
@@ -2038,6 +2085,7 @@ def test__extract_trace_data(self):
                 param_value = json.loads(trace_iteration.parameters[param_in_trace])
                 self.assertTrue(param_value in param_grid[param])
 
+    @pytest.mark.sklearn
     def test_trim_flow_name(self):
         import re
 
@@ -2100,6 +2148,7 @@ def test_trim_flow_name(self):
             "weka.IsolationForest", SklearnExtension.trim_flow_name("weka.IsolationForest")
         )
 
+    @pytest.mark.sklearn
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.21",
         reason="SimpleImputer, ColumnTransformer available only after 0.19 and "
@@ -2189,6 +2238,7 @@ def test_run_on_model_with_empty_steps(self):
         self.assertEqual(len(new_model.named_steps), 3)
         self.assertEqual(new_model.named_steps["dummystep"], "passthrough")
 
+    @pytest.mark.sklearn
     def test_sklearn_serialization_with_none_step(self):
         msg = (
             "Cannot serialize objects of None type. Please use a valid "
@@ -2201,6 +2251,7 @@ def test_sklearn_serialization_with_none_step(self):
         with self.assertRaisesRegex(ValueError, msg):
             self.extension.model_to_flow(clf)
 
+    @pytest.mark.sklearn
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.20",
         reason="columntransformer introduction in 0.20.0",
@@ -2236,6 +2287,7 @@ def test_failed_serialization_of_custom_class(self):
             else:
                 raise Exception(e)
 
+    @pytest.mark.sklearn
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.20",
         reason="columntransformer introduction in 0.20.0",
diff --git a/tests/test_flows/test_flow.py b/tests/test_flows/test_flow.py
index 50d152192..c3c72f267 100644
--- a/tests/test_flows/test_flow.py
+++ b/tests/test_flows/test_flow.py
@@ -7,6 +7,7 @@
 import re
 import time
 from unittest import mock
+import pytest
 
 import scipy.stats
 import sklearn
@@ -148,6 +149,7 @@ def test_from_xml_to_xml(self):
 
             self.assertEqual(new_xml, flow_xml)
 
+    @pytest.mark.sklearn
     def test_to_xml_from_xml(self):
         scaler = sklearn.preprocessing.StandardScaler(with_mean=False)
         boosting = sklearn.ensemble.AdaBoostClassifier(
@@ -166,6 +168,7 @@ def test_to_xml_from_xml(self):
         openml.flows.functions.assert_flows_equal(new_flow, flow)
         self.assertIsNot(new_flow, flow)
 
+    @pytest.mark.sklearn
     def test_publish_flow(self):
         flow = openml.OpenMLFlow(
             name="sklearn.dummy.DummyClassifier",
@@ -191,6 +194,7 @@ def test_publish_flow(self):
         TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id))
         self.assertIsInstance(flow.flow_id, int)
 
+    @pytest.mark.sklearn
     @mock.patch("openml.flows.functions.flow_exists")
     def test_publish_existing_flow(self, flow_exists_mock):
         clf = sklearn.tree.DecisionTreeClassifier(max_depth=2)
@@ -206,6 +210,7 @@ def test_publish_existing_flow(self, flow_exists_mock):
 
         self.assertTrue("OpenMLFlow already exists" in context_manager.exception.message)
 
+    @pytest.mark.sklearn
     def test_publish_flow_with_similar_components(self):
         clf = sklearn.ensemble.VotingClassifier(
             [("lr", sklearn.linear_model.LogisticRegression(solver="lbfgs"))]
@@ -259,6 +264,7 @@ def test_publish_flow_with_similar_components(self):
         TestBase._mark_entity_for_removal("flow", (flow3.flow_id, flow3.name))
         TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow3.flow_id))
 
+    @pytest.mark.sklearn
     def test_semi_legal_flow(self):
         # TODO: Test if parameters are set correctly!
         # should not throw error as it contains two differentiable forms of
@@ -275,6 +281,7 @@ def test_semi_legal_flow(self):
         TestBase._mark_entity_for_removal("flow", (flow.flow_id, flow.name))
         TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id))
 
+    @pytest.mark.sklearn
     @mock.patch("openml.flows.functions.get_flow")
     @mock.patch("openml.flows.functions.flow_exists")
     @mock.patch("openml._api_calls._perform_api_call")
@@ -331,6 +338,7 @@ def test_publish_error(self, api_call_mock, flow_exists_mock, get_flow_mock):
         self.assertEqual(context_manager.exception.args[0], fixture)
         self.assertEqual(get_flow_mock.call_count, 2)
 
+    @pytest.mark.sklearn
     def test_illegal_flow(self):
         # should throw error as it contains two imputers
         illegal = sklearn.pipeline.Pipeline(
@@ -359,6 +367,7 @@ def get_sentinel():
         flow_id = openml.flows.flow_exists(name, version)
         self.assertFalse(flow_id)
 
+    @pytest.mark.sklearn
     def test_existing_flow_exists(self):
         # create a flow
         nb = sklearn.naive_bayes.GaussianNB()
@@ -397,6 +406,7 @@ def test_existing_flow_exists(self):
             )
             self.assertEqual(downloaded_flow_id, flow.flow_id)
 
+    @pytest.mark.sklearn
     def test_sklearn_to_upload_to_flow(self):
         iris = sklearn.datasets.load_iris()
         X = iris.data
diff --git a/tests/test_flows/test_flow_functions.py b/tests/test_flows/test_flow_functions.py
index fe058df23..532fb1d1b 100644
--- a/tests/test_flows/test_flow_functions.py
+++ b/tests/test_flows/test_flow_functions.py
@@ -271,6 +271,7 @@ def test_are_flows_equal_ignore_if_older(self):
         )
         assert_flows_equal(flow, flow, ignore_parameter_values_on_older_children=None)
 
+    @pytest.mark.sklearn
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.20",
         reason="OrdinalEncoder introduced in 0.20. "
@@ -302,6 +303,7 @@ def test_get_flow1(self):
         flow = openml.flows.get_flow(1)
         self.assertIsNone(flow.external_version)
 
+    @pytest.mark.sklearn
     def test_get_flow_reinstantiate_model(self):
         model = ensemble.RandomForestClassifier(n_estimators=33)
         extension = openml.extensions.get_extension_by_model(model)
@@ -323,6 +325,7 @@ def test_get_flow_reinstantiate_model_no_extension(self):
             reinstantiate=True,
         )
 
+    @pytest.mark.sklearn
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) == "0.19.1",
         reason="Requires scikit-learn!=0.19.1, because target flow is from that version.",
@@ -340,6 +343,7 @@ def test_get_flow_with_reinstantiate_strict_with_wrong_version_raises_exception(
             strict_version=True,
         )
 
+    @pytest.mark.sklearn
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "1" and LooseVersion(sklearn.__version__) != "1.0.0",
         reason="Requires scikit-learn < 1.0.1."
@@ -352,6 +356,7 @@ def test_get_flow_reinstantiate_flow_not_strict_post_1(self):
         assert flow.flow_id is None
         assert "sklearn==1.0.0" not in flow.dependencies
 
+    @pytest.mark.sklearn
     @unittest.skipIf(
         (LooseVersion(sklearn.__version__) < "0.23.2")
         or ("1.0" < LooseVersion(sklearn.__version__)),
@@ -364,6 +369,7 @@ def test_get_flow_reinstantiate_flow_not_strict_023_and_024(self):
         assert flow.flow_id is None
         assert "sklearn==0.23.1" not in flow.dependencies
 
+    @pytest.mark.sklearn
     @unittest.skipIf(
         "0.23" < LooseVersion(sklearn.__version__),
         reason="Requires scikit-learn<=0.23, because the scikit-learn module structure changed.",
@@ -374,6 +380,7 @@ def test_get_flow_reinstantiate_flow_not_strict_pre_023(self):
         assert flow.flow_id is None
         assert "sklearn==0.19.1" not in flow.dependencies
 
+    @pytest.mark.sklearn
     def test_get_flow_id(self):
         if self.long_version:
             list_all = openml.utils._list_all
diff --git a/tests/test_runs/test_run.py b/tests/test_runs/test_run.py
index 88c998bc3..e64ffeed6 100644
--- a/tests/test_runs/test_run.py
+++ b/tests/test_runs/test_run.py
@@ -102,6 +102,7 @@ def _check_array(array, type_):
         else:
             self.assertIsNone(run_prime_trace_content)
 
+    @pytest.mark.sklearn
     def test_to_from_filesystem_vanilla(self):
 
         model = Pipeline(
@@ -137,6 +138,7 @@ def test_to_from_filesystem_vanilla(self):
             "collected from {}: {}".format(__file__.split("/")[-1], run_prime.run_id)
         )
 
+    @pytest.mark.sklearn
     @pytest.mark.flaky()
     def test_to_from_filesystem_search(self):
 
@@ -173,6 +175,7 @@ def test_to_from_filesystem_search(self):
             "collected from {}: {}".format(__file__.split("/")[-1], run_prime.run_id)
         )
 
+    @pytest.mark.sklearn
     def test_to_from_filesystem_no_model(self):
 
         model = Pipeline(
@@ -189,6 +192,7 @@ def test_to_from_filesystem_no_model(self):
         with self.assertRaises(ValueError, msg="Could not find model.pkl"):
             openml.runs.OpenMLRun.from_filesystem(cache_path)
 
+    @pytest.mark.sklearn
     def test_publish_with_local_loaded_flow(self):
         """
         Publish a run tied to a local flow after it has first been saved to
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index 1e92613c3..ca38750d8 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -20,6 +20,7 @@
 import unittest
 import warnings
 import pandas as pd
+import pytest
 
 import openml.extensions.sklearn
 from openml.testing import TestBase, SimpleImputer, CustomImputer
@@ -387,6 +388,7 @@ def _check_sample_evaluations(
                                 self.assertGreater(evaluation, 0)
                             self.assertLess(evaluation, max_time_allowed)
 
+    @pytest.mark.sklearn
     def test_run_regression_on_classif_task(self):
         task_id = 115  # diabetes; crossvalidation
 
@@ -404,6 +406,7 @@ def test_run_regression_on_classif_task(self):
                 dataset_format="array",
             )
 
+    @pytest.mark.sklearn
     def test_check_erronous_sklearn_flow_fails(self):
         task_id = 115  # diabetes; crossvalidation
         task = openml.tasks.get_task(task_id)
@@ -578,6 +581,7 @@ def _run_and_upload_regression(
             sentinel=sentinel,
         )
 
+    @pytest.mark.sklearn
     def test_run_and_upload_logistic_regression(self):
         lr = LogisticRegression(solver="lbfgs", max_iter=1000)
         task_id = self.TEST_SERVER_TASK_SIMPLE["task_id"]
@@ -585,6 +589,7 @@ def test_run_and_upload_logistic_regression(self):
         n_test_obs = self.TEST_SERVER_TASK_SIMPLE["n_test_obs"]
         self._run_and_upload_classification(lr, task_id, n_missing_vals, n_test_obs, "62501")
 
+    @pytest.mark.sklearn
     def test_run_and_upload_linear_regression(self):
         lr = LinearRegression()
         task_id = self.TEST_SERVER_TASK_REGRESSION["task_id"]
@@ -614,6 +619,7 @@ def test_run_and_upload_linear_regression(self):
         n_test_obs = self.TEST_SERVER_TASK_REGRESSION["n_test_obs"]
         self._run_and_upload_regression(lr, task_id, n_missing_vals, n_test_obs, "62501")
 
+    @pytest.mark.sklearn
     def test_run_and_upload_pipeline_dummy_pipeline(self):
 
         pipeline1 = Pipeline(
@@ -627,6 +633,7 @@ def test_run_and_upload_pipeline_dummy_pipeline(self):
         n_test_obs = self.TEST_SERVER_TASK_SIMPLE["n_test_obs"]
         self._run_and_upload_classification(pipeline1, task_id, n_missing_vals, n_test_obs, "62501")
 
+    @pytest.mark.sklearn
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.20",
         reason="columntransformer introduction in 0.20.0",
@@ -689,6 +696,7 @@ def get_ct_cf(nominal_indices, numeric_indices):
             sentinel=sentinel,
         )
 
+    @pytest.mark.sklearn
     @unittest.skip("https://github.com/openml/OpenML/issues/1180")
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.20",
@@ -740,6 +748,7 @@ def test_run_and_upload_knn_pipeline(self, warnings_mock):
                 call_count += 1
         self.assertEqual(call_count, 3)
 
+    @pytest.mark.sklearn
     def test_run_and_upload_gridsearch(self):
         gridsearch = GridSearchCV(
             BaggingClassifier(base_estimator=SVC()),
@@ -758,6 +767,7 @@ def test_run_and_upload_gridsearch(self):
         )
         self.assertEqual(len(run.trace.trace_iterations), 9)
 
+    @pytest.mark.sklearn
     def test_run_and_upload_randomsearch(self):
         randomsearch = RandomizedSearchCV(
             RandomForestClassifier(n_estimators=5),
@@ -789,6 +799,7 @@ def test_run_and_upload_randomsearch(self):
         trace = openml.runs.get_run_trace(run.run_id)
         self.assertEqual(len(trace.trace_iterations), 5)
 
+    @pytest.mark.sklearn
     def test_run_and_upload_maskedarrays(self):
         # This testcase is important for 2 reasons:
         # 1) it verifies the correct handling of masked arrays (not all
@@ -811,6 +822,7 @@ def test_run_and_upload_maskedarrays(self):
 
     ##########################################################################
 
+    @pytest.mark.sklearn
     def test_learning_curve_task_1(self):
         task_id = 801  # diabates dataset
         num_test_instances = 6144  # for learning curve
@@ -830,6 +842,7 @@ def test_learning_curve_task_1(self):
         )
         self._check_sample_evaluations(run.sample_evaluations, num_repeats, num_folds, num_samples)
 
+    @pytest.mark.sklearn
     def test_learning_curve_task_2(self):
         task_id = 801  # diabates dataset
         num_test_instances = 6144  # for learning curve
@@ -861,6 +874,7 @@ def test_learning_curve_task_2(self):
         )
         self._check_sample_evaluations(run.sample_evaluations, num_repeats, num_folds, num_samples)
 
+    @pytest.mark.sklearn
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.21",
         reason="Pipelines don't support indexing (used for the assert check)",
@@ -940,6 +954,7 @@ def _test_local_evaluations(self, run):
                 self.assertGreaterEqual(alt_scores[idx], 0)
                 self.assertLessEqual(alt_scores[idx], 1)
 
+    @pytest.mark.sklearn
     def test_local_run_swapped_parameter_order_model(self):
         clf = DecisionTreeClassifier()
         australian_task = 595  # Australian; crossvalidation
@@ -955,6 +970,7 @@ def test_local_run_swapped_parameter_order_model(self):
 
         self._test_local_evaluations(run)
 
+    @pytest.mark.sklearn
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.20",
         reason="SimpleImputer doesn't handle mixed type DataFrame as input",
@@ -984,6 +1000,7 @@ def test_local_run_swapped_parameter_order_flow(self):
 
         self._test_local_evaluations(run)
 
+    @pytest.mark.sklearn
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.20",
         reason="SimpleImputer doesn't handle mixed type DataFrame as input",
@@ -1021,6 +1038,7 @@ def test_online_run_metric_score(self):
 
         self._test_local_evaluations(run)
 
+    @pytest.mark.sklearn
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.20",
         reason="SimpleImputer doesn't handle mixed type DataFrame as input",
@@ -1082,6 +1100,7 @@ def test_initialize_model_from_run(self):
         self.assertEqual(flowS.components["Imputer"].parameters["strategy"], '"most_frequent"')
         self.assertEqual(flowS.components["VarianceThreshold"].parameters["threshold"], "0.05")
 
+    @pytest.mark.sklearn
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.20",
         reason="SimpleImputer doesn't handle mixed type DataFrame as input",
@@ -1136,6 +1155,7 @@ def test__run_exists(self):
             run_ids = run_exists(task.task_id, setup_exists)
             self.assertTrue(run_ids, msg=(run_ids, clf))
 
+    @pytest.mark.sklearn
     def test_run_with_illegal_flow_id(self):
         # check the case where the user adds an illegal flow id to a
         # non-existing flo
@@ -1154,6 +1174,7 @@ def test_run_with_illegal_flow_id(self):
                 avoid_duplicate_runs=True,
             )
 
+    @pytest.mark.sklearn
     def test_run_with_illegal_flow_id_after_load(self):
         # Same as `test_run_with_illegal_flow_id`, but test this error is also
         # caught if the run is stored to and loaded from disk first.
@@ -1182,6 +1203,7 @@ def test_run_with_illegal_flow_id_after_load(self):
             TestBase._mark_entity_for_removal("run", loaded_run.run_id)
             TestBase.logger.info("collected from test_run_functions: {}".format(loaded_run.run_id))
 
+    @pytest.mark.sklearn
     def test_run_with_illegal_flow_id_1(self):
         # Check the case where the user adds an illegal flow id to an existing
         # flow. Comes to a different value error than the previous test
@@ -1206,6 +1228,7 @@ def test_run_with_illegal_flow_id_1(self):
                 avoid_duplicate_runs=True,
             )
 
+    @pytest.mark.sklearn
     def test_run_with_illegal_flow_id_1_after_load(self):
         # Same as `test_run_with_illegal_flow_id_1`, but test this error is
         # also caught if the run is stored to and loaded from disk first.
@@ -1239,6 +1262,7 @@ def test_run_with_illegal_flow_id_1_after_load(self):
             openml.exceptions.PyOpenMLError, expected_message_regex, loaded_run.publish
         )
 
+    @pytest.mark.sklearn
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.20",
         reason="OneHotEncoder cannot handle mixed type DataFrame as input",
@@ -1455,6 +1479,7 @@ def test_get_runs_list_by_tag(self):
         runs = openml.runs.list_runs(tag="curves")
         self.assertGreaterEqual(len(runs), 1)
 
+    @pytest.mark.sklearn
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.20",
         reason="columntransformer introduction in 0.20.0",
@@ -1490,6 +1515,7 @@ def test_run_on_dataset_with_missing_labels_dataframe(self):
             # repeat, fold, row_id, 6 confidences, prediction and correct label
             self.assertEqual(len(row), 12)
 
+    @pytest.mark.sklearn
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.20",
         reason="columntransformer introduction in 0.20.0",
@@ -1541,6 +1567,7 @@ def test_get_uncached_run(self):
         with self.assertRaises(openml.exceptions.OpenMLCacheException):
             openml.runs.functions._get_cached_run(10)
 
+    @pytest.mark.sklearn
     def test_run_flow_on_task_downloaded_flow(self):
         model = sklearn.ensemble.RandomForestClassifier(n_estimators=33)
         flow = self.extension.model_to_flow(model)
@@ -1633,6 +1660,7 @@ def test_format_prediction_task_regression(self):
         res = format_prediction(regression, *ignored_input)
         self.assertListEqual(res, [0] * 5)
 
+    @pytest.mark.sklearn
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.21",
         reason="couldn't perform local tests successfully w/o bloating RAM",
@@ -1686,6 +1714,7 @@ def test__run_task_get_arffcontent_2(self, parallel_mock):
             scores, expected_scores, decimal=2 if os.name == "nt" else 7
         )
 
+    @pytest.mark.sklearn
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.21",
         reason="couldn't perform local tests successfully w/o bloating RAM",
diff --git a/tests/test_setups/test_setup_functions.py b/tests/test_setups/test_setup_functions.py
index 464431b94..73a691d84 100644
--- a/tests/test_setups/test_setup_functions.py
+++ b/tests/test_setups/test_setup_functions.py
@@ -10,6 +10,7 @@
 from openml.testing import TestBase
 from typing import Dict
 import pandas as pd
+import pytest
 
 import sklearn.tree
 import sklearn.naive_bayes
@@ -34,6 +35,7 @@ def setUp(self):
         self.extension = openml.extensions.sklearn.SklearnExtension()
         super().setUp()
 
+    @pytest.mark.sklearn
     def test_nonexisting_setup_exists(self):
         # first publish a non-existing flow
         sentinel = get_sentinel()
@@ -81,6 +83,7 @@ def _existing_setup_exists(self, classif):
         setup_id = openml.setups.setup_exists(flow)
         self.assertEqual(setup_id, run.setup_id)
 
+    @pytest.mark.sklearn
     def test_existing_setup_exists_1(self):
         def side_effect(self):
             self.var_smoothing = 1e-9
@@ -95,10 +98,12 @@ def side_effect(self):
             nb = sklearn.naive_bayes.GaussianNB()
             self._existing_setup_exists(nb)
 
+    @pytest.mark.sklearn
     def test_exisiting_setup_exists_2(self):
         # Check a flow with one hyperparameter
         self._existing_setup_exists(sklearn.naive_bayes.GaussianNB())
 
+    @pytest.mark.sklearn
     def test_existing_setup_exists_3(self):
         # Check a flow with many hyperparameters
         self._existing_setup_exists(
diff --git a/tests/test_study/test_study_examples.py b/tests/test_study/test_study_examples.py
index 682359a61..cc3294085 100644
--- a/tests/test_study/test_study_examples.py
+++ b/tests/test_study/test_study_examples.py
@@ -3,6 +3,7 @@
 from openml.testing import TestBase
 from openml.extensions.sklearn import cat, cont
 
+import pytest
 import sklearn
 import unittest
 from distutils.version import LooseVersion
@@ -12,6 +13,7 @@ class TestStudyFunctions(TestBase):
     _multiprocess_can_split_ = True
     """Test the example code of Bischl et al. (2018)"""
 
+    @pytest.mark.sklearn
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.24",
         reason="columntransformer introduction in 0.24.0",

From beb598cbfa8b56705f50909a24f21ad6080effa4 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 24 Feb 2023 09:14:02 +0100
Subject: [PATCH 40/53] Bump actions/setup-python from 2 to 4 (#1212)

Bumps [actions/setup-python](https://github.com/actions/setup-python) from 2 to 4.
- [Release notes](https://github.com/actions/setup-python/releases)
- [Commits](https://github.com/actions/setup-python/compare/v2...v4)

---
updated-dependencies:
- dependency-name: actions/setup-python
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/dist.yaml       | 2 +-
 .github/workflows/docs.yaml       | 2 +-
 .github/workflows/pre-commit.yaml | 2 +-
 .github/workflows/test.yml        | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/dist.yaml b/.github/workflows/dist.yaml
index 4ae570190..63641ae72 100644
--- a/.github/workflows/dist.yaml
+++ b/.github/workflows/dist.yaml
@@ -8,7 +8,7 @@ jobs:
     steps:
     - uses: actions/checkout@v3
     - name: Setup Python
-      uses: actions/setup-python@v2
+      uses: actions/setup-python@v4
       with:
         python-version: 3.8
     - name: Build dist
diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index 89870cbdd..95764d3c8 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -7,7 +7,7 @@ jobs:
     steps:
     - uses: actions/checkout@v3
     - name: Setup Python
-      uses: actions/setup-python@v2
+      uses: actions/setup-python@v4
       with:
         python-version: 3.8
     - name: Install dependencies
diff --git a/.github/workflows/pre-commit.yaml b/.github/workflows/pre-commit.yaml
index c81729d04..45e4f1bd0 100644
--- a/.github/workflows/pre-commit.yaml
+++ b/.github/workflows/pre-commit.yaml
@@ -8,7 +8,7 @@ jobs:
     steps:
     - uses: actions/checkout@v3
     - name: Setup Python 3.7
-      uses: actions/setup-python@v2
+      uses: actions/setup-python@v4
       with:
         python-version: 3.7
     - name: Install pre-commit
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 5adfa3eac..7241f7990 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -46,7 +46,7 @@ jobs:
         fetch-depth: 2
     - name: Setup Python ${{ matrix.python-version }}
       if: matrix.os != 'windows-latest'  # windows-latest only uses preinstalled Python (3.7.9)
-      uses: actions/setup-python@v2
+      uses: actions/setup-python@v4
       with:
         python-version: ${{ matrix.python-version }}
     - name: Install test dependencies

From c590b3a3b6715fef88ee1aa9f65dd398b8de23c1 Mon Sep 17 00:00:00 2001
From: Pieter Gijsbers <p.gijsbers@tue.nl>
Date: Fri, 24 Feb 2023 09:20:48 +0100
Subject: [PATCH 41/53] Make OpenMLTraceIteration a dataclass (#1201)

It provides a better repr and is less verbose.
---
 openml/runs/trace.py          | 86 +++++++++++++++--------------------
 tests/test_runs/test_trace.py |  2 +-
 2 files changed, 38 insertions(+), 50 deletions(-)

diff --git a/openml/runs/trace.py b/openml/runs/trace.py
index e6885260e..0b8571fe5 100644
--- a/openml/runs/trace.py
+++ b/openml/runs/trace.py
@@ -1,6 +1,7 @@
 # License: BSD 3-Clause
 
 from collections import OrderedDict
+from dataclasses import dataclass
 import json
 import os
 from typing import List, Tuple, Optional  # noqa F401
@@ -331,12 +332,12 @@ def trace_from_xml(cls, xml):
                 )
 
             current = OpenMLTraceIteration(
-                repeat,
-                fold,
-                iteration,
-                setup_string,
-                evaluation,
-                selected,
+                repeat=repeat,
+                fold=fold,
+                iteration=iteration,
+                setup_string=setup_string,
+                evaluation=evaluation,
+                selected=selected,
             )
             trace[(repeat, fold, iteration)] = current
 
@@ -386,8 +387,11 @@ def __iter__(self):
             yield val
 
 
-class OpenMLTraceIteration(object):
-    """OpenML Trace Iteration: parsed output from Run Trace call
+@dataclass
+class OpenMLTraceIteration:
+    """
+    OpenML Trace Iteration: parsed output from Run Trace call
+    Exactly one of `setup_string` or `parameters` must be provided.
 
     Parameters
     ----------
@@ -400,8 +404,9 @@ class OpenMLTraceIteration(object):
     iteration : int
         iteration number of optimization procedure
 
-    setup_string : str
+    setup_string : str, optional
         json string representing the parameters
+        If not provided, ``parameters`` should be set.
 
     evaluation : double
         The evaluation that was awarded to this trace iteration.
@@ -412,42 +417,37 @@ class OpenMLTraceIteration(object):
         selected for making predictions. Per fold/repeat there
         should be only one iteration selected
 
-    parameters : OrderedDict
+    parameters : OrderedDict, optional
+        Dictionary specifying parameter names and their values.
+        If not provided, ``setup_string`` should be set.
     """
 
-    def __init__(
-        self,
-        repeat,
-        fold,
-        iteration,
-        setup_string,
-        evaluation,
-        selected,
-        parameters=None,
-    ):
-
-        if not isinstance(selected, bool):
-            raise TypeError(type(selected))
-        if setup_string and parameters:
+    repeat: int
+    fold: int
+    iteration: int
+
+    evaluation: float
+    selected: bool
+
+    setup_string: Optional[str] = None
+    parameters: Optional[OrderedDict] = None
+
+    def __post_init__(self):
+        # TODO: refactor into one argument of type <str | OrderedDict>
+        if self.setup_string and self.parameters:
             raise ValueError(
-                "Can only be instantiated with either " "setup_string or parameters argument."
+                "Can only be instantiated with either `setup_string` or `parameters` argument."
             )
-        elif not setup_string and not parameters:
-            raise ValueError("Either setup_string or parameters needs to be passed as " "argument.")
-        if parameters is not None and not isinstance(parameters, OrderedDict):
+        elif not (self.setup_string or self.parameters):
+            raise ValueError(
+                "Either `setup_string` or `parameters` needs to be passed as argument."
+            )
+        if self.parameters is not None and not isinstance(self.parameters, OrderedDict):
             raise TypeError(
                 "argument parameters is not an instance of OrderedDict, but %s"
-                % str(type(parameters))
+                % str(type(self.parameters))
             )
 
-        self.repeat = repeat
-        self.fold = fold
-        self.iteration = iteration
-        self.setup_string = setup_string
-        self.evaluation = evaluation
-        self.selected = selected
-        self.parameters = parameters
-
     def get_parameters(self):
         result = {}
         # parameters have prefix 'parameter_'
@@ -461,15 +461,3 @@ def get_parameters(self):
             for param, value in self.parameters.items():
                 result[param[len(PREFIX) :]] = value
         return result
-
-    def __repr__(self):
-        """
-        tmp string representation, will be changed in the near future
-        """
-        return "[(%d,%d,%d): %f (%r)]" % (
-            self.repeat,
-            self.fold,
-            self.iteration,
-            self.evaluation,
-            self.selected,
-        )
diff --git a/tests/test_runs/test_trace.py b/tests/test_runs/test_trace.py
index 0b4b64359..6e8a7afba 100644
--- a/tests/test_runs/test_trace.py
+++ b/tests/test_runs/test_trace.py
@@ -63,7 +63,7 @@ def test_duplicate_name(self):
         ]
         trace_content = [[0, 0, 0, 0.5, "true", 1], [0, 0, 0, 0.9, "false", 2]]
         with self.assertRaisesRegex(
-            ValueError, "Either setup_string or parameters needs to be passed as argument."
+            ValueError, "Either `setup_string` or `parameters` needs to be passed as argument."
         ):
             OpenMLRunTrace.generate(trace_attributes, trace_content)
 

From bbf09b344d533f05a94f576ace2a430ca60b49b5 Mon Sep 17 00:00:00 2001
From: Lennart Purucker <lennart.purucker@uni-siegen.de>
Date: Fri, 24 Feb 2023 09:48:10 +0100
Subject: [PATCH 42/53] Fix: correctly order the ground truth and prediction
 for ARFF files in run.data_content (#1209)

* add test and fix for switch of ground truth and predictions

* undo import optimization

* fix bug with model passing to function

* fix order in other tests

* update progress.rst

* new unit test for run consistency and bug fixed

* clarify new assert

* minor loop refactor

* refactor default to None

* directly test prediction data equal

* Update tests/test_runs/test_run.py

Co-authored-by: Pieter Gijsbers <p.gijsbers@tue.nl>

* Mark sklearn tests (#1202)

* Add sklearn marker

* Mark tests that use scikit-learn

* Only run scikit-learn tests multiple times

The generic tests that don't use scikit-learn should only be tested once
(per platform).

* Rename for correct variable

* Add sklearn mark for filesystem test

* Remove quotes around sklearn

* Instead include sklearn in the matrix definition

* Update jobnames

* Add explicit false to jobname

* Remove space

* Add function inside of expression?

* Do string testing instead

* Add missing ${{

* Add explicit true to old sklearn tests

* Add instruction to add pytest marker for sklearn tests

* add test and fix for switch of ground truth and predictions

* undo import optimization

* fix mask error resulting from rebase

* make dummy classifier strategy consistent to avoid problems as a result of the random state problems for sklearn < 0.24

---------

Co-authored-by: Pieter Gijsbers <p.gijsbers@tue.nl>
---
 doc/progress.rst                      |   2 +-
 openml/runs/functions.py              |  26 ++--
 openml/runs/run.py                    |   6 +-
 tests/test_runs/test_run.py           | 200 +++++++++++++++++++++-----
 tests/test_runs/test_run_functions.py |   7 +-
 5 files changed, 188 insertions(+), 53 deletions(-)

diff --git a/doc/progress.rst b/doc/progress.rst
index 344a0e3dd..46c34c03c 100644
--- a/doc/progress.rst
+++ b/doc/progress.rst
@@ -9,9 +9,9 @@ Changelog
 0.13.1
 ~~~~~~
 
+ * FIX #1197 #559 #1131: Fix the order of ground truth and predictions in the ``OpenMLRun`` object and in ``format_prediction``.
  * FIX #1198: Support numpy 1.24 and higher.
 
-
 0.13.0
 ~~~~~~
 
diff --git a/openml/runs/functions.py b/openml/runs/functions.py
index 08b2fe972..ff1f07c06 100644
--- a/openml/runs/functions.py
+++ b/openml/runs/functions.py
@@ -155,7 +155,6 @@ def run_flow_on_task(
     dataset_format: str = "dataframe",
     n_jobs: Optional[int] = None,
 ) -> OpenMLRun:
-
     """Run the model provided by the flow on the dataset defined by task.
 
     Takes the flow and repeat information into account.
@@ -515,13 +514,13 @@ def _calculate_local_measure(sklearn_fn, openml_name):
                         else pred_y[i]
                     )
                     if isinstance(test_y, pd.Series):
-                        test_prediction = (
+                        truth = (
                             task.class_labels[test_y.iloc[i]]
                             if isinstance(test_y.iloc[i], int)
                             else test_y.iloc[i]
                         )
                     else:
-                        test_prediction = (
+                        truth = (
                             task.class_labels[test_y[i]]
                             if isinstance(test_y[i], (int, np.integer))
                             else test_y[i]
@@ -535,7 +534,7 @@ def _calculate_local_measure(sklearn_fn, openml_name):
                         sample=sample_no,
                         index=tst_idx,
                         prediction=prediction,
-                        truth=test_prediction,
+                        truth=truth,
                         proba=dict(zip(task.class_labels, pred_prob)),
                     )
                 else:
@@ -552,14 +551,14 @@ def _calculate_local_measure(sklearn_fn, openml_name):
         elif isinstance(task, OpenMLRegressionTask):
 
             for i, _ in enumerate(test_indices):
-                test_prediction = test_y.iloc[i] if isinstance(test_y, pd.Series) else test_y[i]
+                truth = test_y.iloc[i] if isinstance(test_y, pd.Series) else test_y[i]
                 arff_line = format_prediction(
                     task=task,
                     repeat=rep_no,
                     fold=fold_no,
                     index=test_indices[i],
                     prediction=pred_y[i],
-                    truth=test_prediction,
+                    truth=truth,
                 )
 
                 arff_datacontent.append(arff_line)
@@ -920,9 +919,10 @@ def obtain_field(xml_obj, fieldname, from_server, cast=None):
         parameter_settings=parameters,
         dataset_id=dataset_id,
         output_files=files,
-        evaluations=evaluations,
-        fold_evaluations=fold_evaluations,
-        sample_evaluations=sample_evaluations,
+        # Make sure default values are used where needed to keep run objects identical
+        evaluations=evaluations or None,
+        fold_evaluations=fold_evaluations or None,
+        sample_evaluations=sample_evaluations or None,
         tags=tags,
         predictions_url=predictions_url,
         run_details=run_details,
@@ -1186,6 +1186,10 @@ def format_prediction(
     -------
     A list with elements for the prediction results of a run.
 
+    The returned order of the elements is (if available):
+        [repeat, fold, sample, index, prediction, truth, *probabilities]
+
+    This order follows the R Client API.
     """
     if isinstance(task, OpenMLClassificationTask):
         if proba is None:
@@ -1200,8 +1204,8 @@ def format_prediction(
             else:
                 sample = 0
         probabilities = [proba[c] for c in task.class_labels]
-        return [repeat, fold, sample, index, *probabilities, truth, prediction]
+        return [repeat, fold, sample, index, prediction, truth, *probabilities]
     elif isinstance(task, OpenMLRegressionTask):
-        return [repeat, fold, index, truth, prediction]
+        return [repeat, fold, index, prediction, truth]
     else:
         raise NotImplementedError(f"Formatting for {type(task)} is not supported.")
diff --git a/openml/runs/run.py b/openml/runs/run.py
index 58367179e..804c0f484 100644
--- a/openml/runs/run.py
+++ b/openml/runs/run.py
@@ -304,6 +304,8 @@ def _generate_arff_dict(self) -> "OrderedDict[str, Any]":
 
         Assumes that the run has been executed.
 
+        The order of the attributes follows the order defined by the Client API for R.
+
         Returns
         -------
         arf_dict : dict
@@ -337,11 +339,11 @@ def _generate_arff_dict(self) -> "OrderedDict[str, Any]":
             if class_labels is not None:
                 arff_dict["attributes"] = (
                     arff_dict["attributes"]
+                    + [("prediction", class_labels), ("correct", class_labels)]
                     + [
                         ("confidence." + class_labels[i], "NUMERIC")
                         for i in range(len(class_labels))
                     ]
-                    + [("prediction", class_labels), ("correct", class_labels)]
                 )
             else:
                 raise ValueError("The task has no class labels")
@@ -362,7 +364,7 @@ def _generate_arff_dict(self) -> "OrderedDict[str, Any]":
                 ]
                 prediction_and_true = [("prediction", class_labels), ("correct", class_labels)]
                 arff_dict["attributes"] = (
-                    arff_dict["attributes"] + prediction_confidences + prediction_and_true
+                    arff_dict["attributes"] + prediction_and_true + prediction_confidences
                 )
             else:
                 raise ValueError("The task has no class labels")
diff --git a/tests/test_runs/test_run.py b/tests/test_runs/test_run.py
index e64ffeed6..67e15d62b 100644
--- a/tests/test_runs/test_run.py
+++ b/tests/test_runs/test_run.py
@@ -7,9 +7,11 @@
 
 import xmltodict
 from sklearn.dummy import DummyClassifier
+from sklearn.linear_model import LinearRegression
 from sklearn.tree import DecisionTreeClassifier
 from sklearn.model_selection import GridSearchCV
 from sklearn.pipeline import Pipeline
+from sklearn.base import clone
 
 from openml import OpenMLRun
 from openml.testing import TestBase, SimpleImputer
@@ -39,6 +41,25 @@ def test_tagging(self):
         run_list = openml.runs.list_runs(tag=tag)
         self.assertEqual(len(run_list), 0)
 
+    @staticmethod
+    def _test_prediction_data_equal(run, run_prime):
+        # Determine which attributes are numeric and which not
+        num_cols = np.array(
+            [d_type == "NUMERIC" for _, d_type in run._generate_arff_dict()["attributes"]]
+        )
+        # Get run data consistently
+        #   (For run from server, .data_content does not exist)
+        run_data_content = run.predictions.values
+        run_prime_data_content = run_prime.predictions.values
+
+        # Assert numeric and string parts separately
+        numeric_part = np.array(run_data_content[:, num_cols], dtype=float)
+        numeric_part_prime = np.array(run_prime_data_content[:, num_cols], dtype=float)
+        string_part = run_data_content[:, ~num_cols]
+        string_part_prime = run_prime_data_content[:, ~num_cols]
+        np.testing.assert_array_almost_equal(numeric_part, numeric_part_prime)
+        np.testing.assert_array_equal(string_part, string_part_prime)
+
     def _test_run_obj_equals(self, run, run_prime):
         for dictionary in ["evaluations", "fold_evaluations", "sample_evaluations"]:
             if getattr(run, dictionary) is not None:
@@ -49,14 +70,9 @@ def _test_run_obj_equals(self, run, run_prime):
                 if other is not None:
                     self.assertDictEqual(other, dict())
         self.assertEqual(run._to_xml(), run_prime._to_xml())
+        self._test_prediction_data_equal(run, run_prime)
 
-        numeric_part = np.array(np.array(run.data_content)[:, 0:-2], dtype=float)
-        numeric_part_prime = np.array(np.array(run_prime.data_content)[:, 0:-2], dtype=float)
-        string_part = np.array(run.data_content)[:, -2:]
-        string_part_prime = np.array(run_prime.data_content)[:, -2:]
-        np.testing.assert_array_almost_equal(numeric_part, numeric_part_prime)
-        np.testing.assert_array_equal(string_part, string_part_prime)
-
+        # Test trace
         if run.trace is not None:
             run_trace_content = run.trace.trace_to_arff()["data"]
         else:
@@ -192,6 +208,73 @@ def test_to_from_filesystem_no_model(self):
         with self.assertRaises(ValueError, msg="Could not find model.pkl"):
             openml.runs.OpenMLRun.from_filesystem(cache_path)
 
+    @staticmethod
+    def _get_models_tasks_for_tests():
+        model_clf = Pipeline(
+            [
+                ("imputer", SimpleImputer(strategy="mean")),
+                ("classifier", DummyClassifier(strategy="prior")),
+            ]
+        )
+        model_reg = Pipeline(
+            [
+                ("imputer", SimpleImputer(strategy="mean")),
+                (
+                    "regressor",
+                    # LR because dummy does not produce enough float-like values
+                    LinearRegression(),
+                ),
+            ]
+        )
+
+        task_clf = openml.tasks.get_task(119)  # diabetes; hold out validation
+        task_reg = openml.tasks.get_task(733)  # quake; crossvalidation
+
+        return [(model_clf, task_clf), (model_reg, task_reg)]
+
+    @staticmethod
+    def assert_run_prediction_data(task, run, model):
+        # -- Get y_pred and y_true as it should be stored in the run
+        n_repeats, n_folds, n_samples = task.get_split_dimensions()
+        if (n_repeats > 1) or (n_samples > 1):
+            raise ValueError("Test does not support this task type's split dimensions.")
+
+        X, y = task.get_X_and_y()
+
+        # Check correctness of y_true and y_pred in run
+        for fold_id in range(n_folds):
+            # Get data for fold
+            _, test_indices = task.get_train_test_split_indices(repeat=0, fold=fold_id, sample=0)
+            train_mask = np.full(len(X), True)
+            train_mask[test_indices] = False
+
+            # Get train / test
+            X_train = X[train_mask]
+            y_train = y[train_mask]
+            X_test = X[~train_mask]
+            y_test = y[~train_mask]
+
+            # Get y_pred
+            y_pred = model.fit(X_train, y_train).predict(X_test)
+
+            # Get stored data for fold
+            saved_fold_data = run.predictions[run.predictions["fold"] == fold_id].sort_values(
+                by="row_id"
+            )
+            saved_y_pred = saved_fold_data["prediction"].values
+            gt_key = "truth" if "truth" in list(saved_fold_data) else "correct"
+            saved_y_test = saved_fold_data[gt_key].values
+
+            assert_method = np.testing.assert_array_almost_equal
+            if task.task_type == "Supervised Classification":
+                y_pred = np.take(task.class_labels, y_pred)
+                y_test = np.take(task.class_labels, y_test)
+                assert_method = np.testing.assert_array_equal
+
+            # Assert correctness
+            assert_method(y_pred, saved_y_pred)
+            assert_method(y_test, saved_y_test)
+
     @pytest.mark.sklearn
     def test_publish_with_local_loaded_flow(self):
         """
@@ -200,40 +283,85 @@ def test_publish_with_local_loaded_flow(self):
         """
         extension = openml.extensions.sklearn.SklearnExtension()
 
-        model = Pipeline(
-            [("imputer", SimpleImputer(strategy="mean")), ("classifier", DummyClassifier())]
-        )
-        task = openml.tasks.get_task(119)  # diabetes; crossvalidation
+        for model, task in self._get_models_tasks_for_tests():
+            # Make sure the flow does not exist on the server yet.
+            flow = extension.model_to_flow(model)
+            self._add_sentinel_to_flow_name(flow)
+            self.assertFalse(openml.flows.flow_exists(flow.name, flow.external_version))
+
+            run = openml.runs.run_flow_on_task(
+                flow=flow,
+                task=task,
+                add_local_measures=False,
+                avoid_duplicate_runs=False,
+                upload_flow=False,
+            )
 
-        # Make sure the flow does not exist on the server yet.
-        flow = extension.model_to_flow(model)
-        self._add_sentinel_to_flow_name(flow)
-        self.assertFalse(openml.flows.flow_exists(flow.name, flow.external_version))
+            # Make sure that the flow has not been uploaded as requested.
+            self.assertFalse(openml.flows.flow_exists(flow.name, flow.external_version))
 
-        run = openml.runs.run_flow_on_task(
-            flow=flow,
-            task=task,
-            add_local_measures=False,
-            avoid_duplicate_runs=False,
-            upload_flow=False,
-        )
+            # Make sure that the prediction data stored in the run is correct.
+            self.assert_run_prediction_data(task, run, clone(model))
 
-        # Make sure that the flow has not been uploaded as requested.
-        self.assertFalse(openml.flows.flow_exists(flow.name, flow.external_version))
+            cache_path = os.path.join(self.workdir, "runs", str(random.getrandbits(128)))
+            run.to_filesystem(cache_path)
+            # obtain run from filesystem
+            loaded_run = openml.runs.OpenMLRun.from_filesystem(cache_path)
+            loaded_run.publish()
 
-        cache_path = os.path.join(self.workdir, "runs", str(random.getrandbits(128)))
-        run.to_filesystem(cache_path)
-        # obtain run from filesystem
-        loaded_run = openml.runs.OpenMLRun.from_filesystem(cache_path)
-        loaded_run.publish()
-        TestBase._mark_entity_for_removal("run", loaded_run.run_id)
-        TestBase.logger.info(
-            "collected from {}: {}".format(__file__.split("/")[-1], loaded_run.run_id)
-        )
+            # Clean up
+            TestBase._mark_entity_for_removal("run", loaded_run.run_id)
+            TestBase.logger.info(
+                "collected from {}: {}".format(__file__.split("/")[-1], loaded_run.run_id)
+            )
+
+            # make sure the flow is published as part of publishing the run.
+            self.assertTrue(openml.flows.flow_exists(flow.name, flow.external_version))
+            openml.runs.get_run(loaded_run.run_id)
+
+    @pytest.mark.sklearn
+    def test_offline_and_online_run_identical(self):
+
+        extension = openml.extensions.sklearn.SklearnExtension()
+
+        for model, task in self._get_models_tasks_for_tests():
+            # Make sure the flow does not exist on the server yet.
+            flow = extension.model_to_flow(model)
+            self._add_sentinel_to_flow_name(flow)
+            self.assertFalse(openml.flows.flow_exists(flow.name, flow.external_version))
+
+            run = openml.runs.run_flow_on_task(
+                flow=flow,
+                task=task,
+                add_local_measures=False,
+                avoid_duplicate_runs=False,
+                upload_flow=False,
+            )
 
-        # make sure the flow is published as part of publishing the run.
-        self.assertTrue(openml.flows.flow_exists(flow.name, flow.external_version))
-        openml.runs.get_run(loaded_run.run_id)
+            # Make sure that the flow has not been uploaded as requested.
+            self.assertFalse(openml.flows.flow_exists(flow.name, flow.external_version))
+
+            # Load from filesystem
+            cache_path = os.path.join(self.workdir, "runs", str(random.getrandbits(128)))
+            run.to_filesystem(cache_path)
+            loaded_run = openml.runs.OpenMLRun.from_filesystem(cache_path)
+
+            # Assert identical for offline - offline
+            self._test_run_obj_equals(run, loaded_run)
+
+            # Publish and test for offline - online
+            run.publish()
+            self.assertTrue(openml.flows.flow_exists(flow.name, flow.external_version))
+
+            try:
+                online_run = openml.runs.get_run(run.run_id, ignore_cache=True)
+                self._test_prediction_data_equal(run, online_run)
+            finally:
+                # Clean up
+                TestBase._mark_entity_for_removal("run", run.run_id)
+                TestBase.logger.info(
+                    "collected from {}: {}".format(__file__.split("/")[-1], loaded_run.run_id)
+                )
 
     def test_run_setup_string_included_in_xml(self):
         SETUP_STRING = "setup-string"
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index ca38750d8..14e6d7298 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -1308,10 +1308,11 @@ def test__run_task_get_arffcontent(self):
             # check row id
             self.assertGreaterEqual(arff_line[2], 0)
             self.assertLessEqual(arff_line[2], num_instances - 1)
+            # check prediction and ground truth columns
+            self.assertIn(arff_line[4], ["won", "nowin"])
+            self.assertIn(arff_line[5], ["won", "nowin"])
             # check confidences
-            self.assertAlmostEqual(sum(arff_line[4:6]), 1.0)
-            self.assertIn(arff_line[6], ["won", "nowin"])
-            self.assertIn(arff_line[7], ["won", "nowin"])
+            self.assertAlmostEqual(sum(arff_line[6:]), 1.0)
 
     def test__create_trace_from_arff(self):
         with open(self.static_cache_dir + "/misc/trace.arff", "r") as arff_file:

From b84536ad19c9110d6eda44963e082a52ecc8b1aa Mon Sep 17 00:00:00 2001
From: Matthias Feurer <feurerm@informatik.uni-freiburg.de>
Date: Fri, 24 Feb 2023 10:37:47 +0100
Subject: [PATCH 43/53] Fix documentation building (#1217)

* Fix documentation building

* Fix numpy version

* Fix two links
---
 .github/workflows/docs.yaml                        | 3 +++
 doc/contributing.rst                               | 2 +-
 doc/index.rst                                      | 4 ++--
 examples/30_extended/fetch_evaluations_tutorial.py | 4 ++--
 examples/30_extended/fetch_runtimes_tutorial.py    | 2 +-
 examples/README.txt                                | 2 ++
 6 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index 95764d3c8..e601176b3 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -13,6 +13,9 @@ jobs:
     - name: Install dependencies
       run: |
         pip install -e .[docs,examples,examples_unix]
+        # dependency "fanova" does not work with numpy 1.24 or later
+        # https://github.com/automl/fanova/issues/108
+        pip install numpy==1.23.5
     - name: Make docs
       run: |
         cd doc
diff --git a/doc/contributing.rst b/doc/contributing.rst
index f710f8a71..e8d537338 100644
--- a/doc/contributing.rst
+++ b/doc/contributing.rst
@@ -23,6 +23,6 @@ In particular, a few ways to contribute to openml-python are:
 
  * `Cite OpenML <https://www.openml.org/cite>`_ if you use it in a scientific publication.
 
- * Visit one of our `hackathons <https://meet.openml.org/>`_.
+ * Visit one of our `hackathons <https://www.openml.org/meet>`_.
 
  * Contribute to another OpenML project, such as `the main OpenML project <https://github.com/openml/OpenML/blob/master/CONTRIBUTING.md>`_.
diff --git a/doc/index.rst b/doc/index.rst
index b0140c1d0..b8856e83b 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -40,7 +40,7 @@ Example
     run.publish()
     print(f'View the run online: {run.openml_url}')
 
-You can find more examples in our :ref:`sphx_glr_examples`.
+You can find more examples in our :ref:`examples-index`.
 
 ----------------------------
 How to get OpenML for python
@@ -60,7 +60,7 @@ Content
 
 * :ref:`usage`
 * :ref:`api`
-* :ref:`sphx_glr_examples`
+* :ref:`examples-index`
 * :ref:`extensions`
 * :ref:`contributing`
 * :ref:`progress`
diff --git a/examples/30_extended/fetch_evaluations_tutorial.py b/examples/30_extended/fetch_evaluations_tutorial.py
index 2823eabf3..86302e2d1 100644
--- a/examples/30_extended/fetch_evaluations_tutorial.py
+++ b/examples/30_extended/fetch_evaluations_tutorial.py
@@ -90,9 +90,9 @@ def plot_cdf(values, metric="predictive_accuracy"):
     plt.title("CDF")
     plt.xlabel(metric)
     plt.ylabel("Likelihood")
-    plt.grid(b=True, which="major", linestyle="-")
+    plt.grid(visible=True, which="major", linestyle="-")
     plt.minorticks_on()
-    plt.grid(b=True, which="minor", linestyle="--")
+    plt.grid(visible=True, which="minor", linestyle="--")
     plt.axvline(max_val, linestyle="--", color="gray")
     plt.text(max_val, 0, "%.3f" % max_val, fontsize=9)
     plt.show()
diff --git a/examples/30_extended/fetch_runtimes_tutorial.py b/examples/30_extended/fetch_runtimes_tutorial.py
index 535f3607d..1a6e5117f 100644
--- a/examples/30_extended/fetch_runtimes_tutorial.py
+++ b/examples/30_extended/fetch_runtimes_tutorial.py
@@ -408,7 +408,7 @@ def get_incumbent_trace(trace):
 ################################################################################
 # Running a Neural Network from scikit-learn that uses scikit-learn independent
 # parallelism using libraries such as `MKL, OpenBLAS or BLIS
-# <https://scikit-learn.org/stable/computing/parallelism.html#parallel-numpy-routines-from-numerical-libraries>`_.
+# <https://scikit-learn.org/stable/computing/parallelism.html#parallel-numpy-and-scipy-routines-from-numerical-libraries>`_.
 
 mlp = MLPClassifier(max_iter=10)
 
diff --git a/examples/README.txt b/examples/README.txt
index 332a5b990..d10746bcb 100644
--- a/examples/README.txt
+++ b/examples/README.txt
@@ -1,3 +1,5 @@
+.. _examples-index:
+
 ================
 Examples Gallery
 ================

From 5730669fadbb6ddd69e4497cca4491ca23b7700b Mon Sep 17 00:00:00 2001
From: Matthias Feurer <feurerm@informatik.uni-freiburg.de>
Date: Fri, 24 Feb 2023 11:23:41 +0100
Subject: [PATCH 44/53] Fix CI Python 3.6 (#1218)

* Try Ubunte 20.04 for Python 3.6

* use old ubuntu for python 3.6
---
 .github/workflows/test.yml | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 7241f7990..782b6e0a3 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -8,7 +8,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        python-version: [3.6, 3.7, 3.8]
+        python-version: [3.7, 3.8]
         scikit-learn: [0.21.2, 0.22.2, 0.23.1, 0.24]
         os: [ubuntu-latest]
         sklearn-only: ['true']
@@ -19,15 +19,31 @@ jobs:
           - python-version: 3.6
             scikit-learn: 0.18.2
             scipy: 1.2.0
-            os: ubuntu-latest
+            os: ubuntu-20.04 
             sklearn-only: 'true'
           - python-version: 3.6
             scikit-learn: 0.19.2
-            os: ubuntu-latest
+            os: ubuntu-20.04 
             sklearn-only: 'true'
           - python-version: 3.6
             scikit-learn: 0.20.2
-            os: ubuntu-latest
+            os: ubuntu-20.04 
+            sklearn-only: 'true'
+          - python-version: 3.6
+            scikit-learn: 0.21.2
+            os: ubuntu-20.04 
+            sklearn-only: 'true'
+          - python-version: 3.6
+            scikit-learn: 0.22.2
+            os: ubuntu-20.04 
+            sklearn-only: 'true'
+          - python-version: 3.6
+            scikit-learn: 0.23.1
+            os: ubuntu-20.04 
+            sklearn-only: 'true'
+          - python-version: 3.6
+            scikit-learn: 0.24
+            os: ubuntu-20.04 
             sklearn-only: 'true'
           - python-version: 3.8
             scikit-learn: 0.23.1

From 5b2ac461da654b021e1ee050d850990d99798558 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 24 Feb 2023 15:26:02 +0100
Subject: [PATCH 45/53] Bump docker/setup-buildx-action from 1 to 2 (#1221)

Bumps [docker/setup-buildx-action](https://github.com/docker/setup-buildx-action) from 1 to 2.
- [Release notes](https://github.com/docker/setup-buildx-action/releases)
- [Commits](https://github.com/docker/setup-buildx-action/compare/v1...v2)

---
updated-dependencies:
- dependency-name: docker/setup-buildx-action
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/release_docker.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/release_docker.yaml b/.github/workflows/release_docker.yaml
index 3df6cdf4c..6ceb1d060 100644
--- a/.github/workflows/release_docker.yaml
+++ b/.github/workflows/release_docker.yaml
@@ -18,7 +18,7 @@ jobs:
         uses: docker/setup-qemu-action@v2
 
       - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v1
+        uses: docker/setup-buildx-action@v2
 
       - name: Login to DockerHub
         uses: docker/login-action@v2

From 5dcb7a319c687befe6faf86404780d5c574496f8 Mon Sep 17 00:00:00 2001
From: Vishal Parmar <vishalm524112@gmail.com>
Date: Fri, 24 Feb 2023 21:39:52 +0530
Subject: [PATCH 46/53] Update run.py (#1194)

* Update run.py

* Update run.py

updated description to not contain duplicate information.

* Update run.py
---
 openml/runs/run.py | 29 ++++++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)

diff --git a/openml/runs/run.py b/openml/runs/run.py
index 804c0f484..90e7a4b0b 100644
--- a/openml/runs/run.py
+++ b/openml/runs/run.py
@@ -31,36 +31,55 @@ class OpenMLRun(OpenMLBase):
     Parameters
     ----------
     task_id: int
+        The ID of the OpenML task associated with the run.
     flow_id: int
+        The ID of the OpenML flow associated with the run.
     dataset_id: int
+        The ID of the OpenML dataset used for the run.
     setup_string: str
+        The setup string of the run.
     output_files: Dict[str, str]
-        A dictionary that specifies where each related file can be found.
+        Specifies where each related file can be found. 
     setup_id: int
+        An integer representing the ID of the setup used for the run.
     tags: List[str]
+        Representing the tags associated with the run.
     uploader: int
-        User ID of the uploader.
+        User ID of the uploader. 
     uploader_name: str
+        The name of the person who uploaded the run.
     evaluations: Dict
+        Representing the evaluations of the run.
     fold_evaluations: Dict
+        The evaluations of the run for each fold.
     sample_evaluations: Dict
+        The evaluations of the run for each sample.
     data_content: List[List]
         The predictions generated from executing this run.
     trace: OpenMLRunTrace
+        The trace containing information on internal model evaluations of this run.
     model: object
+        The untrained model that was evaluated in the run.
     task_type: str
+        The type of the OpenML task associated with the run.
     task_evaluation_measure: str
+        The evaluation measure used for the task.
     flow_name: str
+        The name of the OpenML flow associated with the run.
     parameter_settings: List[OrderedDict]
+        Representing the parameter settings used for the run.
     predictions_url: str
+        The URL of the predictions file.
     task: OpenMLTask
+        An instance of the OpenMLTask class, representing the OpenML task associated with the run.
     flow: OpenMLFlow
+        An instance of the OpenMLFlow class, representing the OpenML flow associated with the run.
     run_id: int
+        The ID of the run.
     description_text: str, optional
-        Description text to add to the predictions file.
-        If left None, is set to the time the arff file is generated.
+        Description text to add to the predictions file. If left None, is set to the time the arff file is generated.
     run_details: str, optional (default=None)
-        Description of the run stored in the run meta-data.
+        Description of the run stored in the run meta-data. 
     """
 
     def __init__(

From 687a0f11e7eead5a26135ad4a1c826acc0aa1503 Mon Sep 17 00:00:00 2001
From: Pieter Gijsbers <p.gijsbers@tue.nl>
Date: Wed, 1 Mar 2023 08:41:17 +0100
Subject: [PATCH 47/53] Refactor if-statements (#1219)

* Refactor if-statements

* Add explicit names to conditional expression

* Add 'dependencies' to better mimic OpenMLFlow
---
 openml/_api_calls.py                    |  4 +--
 openml/datasets/dataset.py              | 12 +++----
 openml/extensions/sklearn/extension.py  | 47 ++++++++++---------------
 openml/flows/functions.py               |  5 +--
 openml/setups/functions.py              |  5 +--
 openml/tasks/split.py                   | 10 +++---
 openml/utils.py                         |  5 +--
 tests/test_extensions/test_functions.py |  9 ++---
 tests/test_runs/test_run_functions.py   |  6 ++--
 9 files changed, 37 insertions(+), 66 deletions(-)

diff --git a/openml/_api_calls.py b/openml/_api_calls.py
index f3c3306fc..c22f82840 100644
--- a/openml/_api_calls.py
+++ b/openml/_api_calls.py
@@ -303,9 +303,7 @@ def __is_checksum_equal(downloaded_file, md5_checksum=None):
     md5 = hashlib.md5()
     md5.update(downloaded_file.encode("utf-8"))
     md5_checksum_download = md5.hexdigest()
-    if md5_checksum == md5_checksum_download:
-        return True
-    return False
+    return md5_checksum == md5_checksum_download
 
 
 def _send_request(request_method, url, data, files=None, md5_checksum=None):
diff --git a/openml/datasets/dataset.py b/openml/datasets/dataset.py
index 6f3f66853..1644ff177 100644
--- a/openml/datasets/dataset.py
+++ b/openml/datasets/dataset.py
@@ -275,7 +275,7 @@ def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
 
     def __eq__(self, other):
 
-        if type(other) != OpenMLDataset:
+        if not isinstance(other, OpenMLDataset):
             return False
 
         server_fields = {
@@ -287,14 +287,12 @@ def __eq__(self, other):
             "data_file",
         }
 
-        # check that the keys are identical
+        # check that common keys and values are identical
         self_keys = set(self.__dict__.keys()) - server_fields
         other_keys = set(other.__dict__.keys()) - server_fields
-        if self_keys != other_keys:
-            return False
-
-        # check that values of the common keys are identical
-        return all(self.__dict__[key] == other.__dict__[key] for key in self_keys)
+        return self_keys == other_keys and all(
+            self.__dict__[key] == other.__dict__[key] for key in self_keys
+        )
 
     def _download_data(self) -> None:
         """Download ARFF data file to standard cache directory. Set `self.data_file`."""
diff --git a/openml/extensions/sklearn/extension.py b/openml/extensions/sklearn/extension.py
index 28ecd217f..997a9b8ea 100644
--- a/openml/extensions/sklearn/extension.py
+++ b/openml/extensions/sklearn/extension.py
@@ -38,19 +38,16 @@
 
 logger = logging.getLogger(__name__)
 
-
 if sys.version_info >= (3, 5):
     from json.decoder import JSONDecodeError
 else:
     JSONDecodeError = ValueError
 
-
 DEPENDENCIES_PATTERN = re.compile(
     r"^(?P<name>[\w\-]+)((?P<operation>==|>=|>)"
     r"(?P<version>(\d+\.)?(\d+\.)?(\d+)?(dev)?[0-9]*))?$"
 )
 
-
 SIMPLE_NUMPY_TYPES = [
     nptype
     for type_cat, nptypes in np.sctypes.items()
@@ -580,15 +577,11 @@ def _is_cross_validator(self, o: Any) -> bool:
 
     @classmethod
     def _is_sklearn_flow(cls, flow: OpenMLFlow) -> bool:
-        if getattr(flow, "dependencies", None) is not None and "sklearn" in flow.dependencies:
-            return True
-        if flow.external_version is None:
-            return False
-        else:
-            return (
-                flow.external_version.startswith("sklearn==")
-                or ",sklearn==" in flow.external_version
-            )
+        sklearn_dependency = isinstance(flow.dependencies, str) and "sklearn" in flow.dependencies
+        sklearn_as_external = isinstance(flow.external_version, str) and (
+            flow.external_version.startswith("sklearn==") or ",sklearn==" in flow.external_version
+        )
+        return sklearn_dependency or sklearn_as_external
 
     def _get_sklearn_description(self, model: Any, char_lim: int = 1024) -> str:
         """Fetches the sklearn function docstring for the flow description
@@ -1867,24 +1860,22 @@ def is_subcomponent_specification(values):
                 # checks whether the current value can be a specification of
                 # subcomponents, as for example the value for steps parameter
                 # (in Pipeline) or transformers parameter (in
-                # ColumnTransformer). These are always lists/tuples of lists/
-                # tuples, size bigger than 2 and an OpenMLFlow item involved.
-                if not isinstance(values, (tuple, list)):
-                    return False
-                for item in values:
-                    if not isinstance(item, (tuple, list)):
-                        return False
-                    if len(item) < 2:
-                        return False
-                    if not isinstance(item[1], (openml.flows.OpenMLFlow, str)):
-                        if (
+                # ColumnTransformer).
+                return (
+                    # Specification requires list/tuple of list/tuple with
+                    # at least length 2.
+                    isinstance(values, (tuple, list))
+                    and all(isinstance(item, (tuple, list)) and len(item) > 1 for item in values)
+                    # And each component needs to be a flow or interpretable string
+                    and all(
+                        isinstance(item[1], openml.flows.OpenMLFlow)
+                        or (
                             isinstance(item[1], str)
                             and item[1] in SKLEARN_PIPELINE_STRING_COMPONENTS
-                        ):
-                            pass
-                        else:
-                            return False
-                return True
+                        )
+                        for item in values
+                    )
+                )
 
             # _flow is openml flow object, _param dict maps from flow name to flow
             # id for the main call, the param dict can be overridden (useful for
diff --git a/openml/flows/functions.py b/openml/flows/functions.py
index 43cb453fa..99525c3e4 100644
--- a/openml/flows/functions.py
+++ b/openml/flows/functions.py
@@ -261,10 +261,7 @@ def flow_exists(name: str, external_version: str) -> Union[int, bool]:
 
     result_dict = xmltodict.parse(xml_response)
     flow_id = int(result_dict["oml:flow_exists"]["oml:id"])
-    if flow_id > 0:
-        return flow_id
-    else:
-        return False
+    return flow_id if flow_id > 0 else False
 
 
 def get_flow_id(
diff --git a/openml/setups/functions.py b/openml/setups/functions.py
index 1ce0ed005..f4fab3219 100644
--- a/openml/setups/functions.py
+++ b/openml/setups/functions.py
@@ -55,10 +55,7 @@ def setup_exists(flow) -> int:
     )
     result_dict = xmltodict.parse(result)
     setup_id = int(result_dict["oml:setup_exists"]["oml:id"])
-    if setup_id > 0:
-        return setup_id
-    else:
-        return False
+    return setup_id if setup_id > 0 else False
 
 
 def _get_cached_setup(setup_id):
diff --git a/openml/tasks/split.py b/openml/tasks/split.py
index e5fafedc5..dc496ef7d 100644
--- a/openml/tasks/split.py
+++ b/openml/tasks/split.py
@@ -47,12 +47,10 @@ def __eq__(self, other):
             or self.name != other.name
             or self.description != other.description
             or self.split.keys() != other.split.keys()
-        ):
-            return False
-
-        if any(
-            self.split[repetition].keys() != other.split[repetition].keys()
-            for repetition in self.split
+            or any(
+                self.split[repetition].keys() != other.split[repetition].keys()
+                for repetition in self.split
+            )
         ):
             return False
 
diff --git a/openml/utils.py b/openml/utils.py
index 8ab238463..0f60f2bb8 100644
--- a/openml/utils.py
+++ b/openml/utils.py
@@ -174,10 +174,7 @@ def _delete_entity(entity_type, entity_id):
     url_suffix = "%s/%d" % (entity_type, entity_id)
     result_xml = openml._api_calls._perform_api_call(url_suffix, "delete")
     result = xmltodict.parse(result_xml)
-    if "oml:%s_delete" % entity_type in result:
-        return True
-    else:
-        return False
+    return "oml:%s_delete" % entity_type in result
 
 
 def _list_all(listing_call, output_format="dict", *args, **filters):
diff --git a/tests/test_extensions/test_functions.py b/tests/test_extensions/test_functions.py
index 791e815e1..36bb06061 100644
--- a/tests/test_extensions/test_functions.py
+++ b/tests/test_extensions/test_functions.py
@@ -9,6 +9,7 @@
 
 class DummyFlow:
     external_version = "DummyFlow==0.1"
+    dependencies = None
 
 
 class DummyModel:
@@ -18,15 +19,11 @@ class DummyModel:
 class DummyExtension1:
     @staticmethod
     def can_handle_flow(flow):
-        if not inspect.stack()[2].filename.endswith("test_functions.py"):
-            return False
-        return True
+        return inspect.stack()[2].filename.endswith("test_functions.py")
 
     @staticmethod
     def can_handle_model(model):
-        if not inspect.stack()[2].filename.endswith("test_functions.py"):
-            return False
-        return True
+        return inspect.stack()[2].filename.endswith("test_functions.py")
 
 
 class DummyExtension2:
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index 14e6d7298..786ab2291 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -127,7 +127,7 @@ def _wait_for_processed_run(self, run_id, max_waiting_time_seconds):
             "evaluated correctly on the server".format(run_id)
         )
 
-    def _compare_predictions(self, predictions, predictions_prime):
+    def _assert_predictions_equal(self, predictions, predictions_prime):
         self.assertEqual(
             np.array(predictions_prime["data"]).shape, np.array(predictions["data"]).shape
         )
@@ -151,8 +151,6 @@ def _compare_predictions(self, predictions, predictions_prime):
                 else:
                     self.assertEqual(val_1, val_2)
 
-        return True
-
     def _rerun_model_and_compare_predictions(self, run_id, model_prime, seed, create_task_obj):
         run = openml.runs.get_run(run_id)
 
@@ -183,7 +181,7 @@ def _rerun_model_and_compare_predictions(self, run_id, model_prime, seed, create
 
         predictions_prime = run_prime._generate_arff_dict()
 
-        self._compare_predictions(predictions, predictions_prime)
+        self._assert_predictions_equal(predictions, predictions_prime)
         pd.testing.assert_frame_equal(
             run.predictions,
             run_prime.predictions,

From c0a75bdd0d30dc1b038a56cfa51ca51e5ba5f5b1 Mon Sep 17 00:00:00 2001
From: Matthias Feurer <feurerm@informatik.uni-freiburg.de>
Date: Wed, 1 Mar 2023 09:26:54 +0100
Subject: [PATCH 48/53] Ci python 38 (#1220)

* Install custom numpy version for specific combination of Python3.8 and numpy

* Debug output

* Change syntax

* move to coverage action v3

* Remove test output
---
 .github/workflows/test.yml | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 782b6e0a3..974147ed3 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -72,6 +72,11 @@ jobs:
     - name: Install scikit-learn ${{ matrix.scikit-learn }}
       run: |
         pip install scikit-learn==${{ matrix.scikit-learn }}
+    - name: Install numpy for Python 3.8
+      # Python 3.8 & scikit-learn<0.24 requires numpy<=1.23.5
+      if: ${{ matrix.python-version == '3.8' && contains(fromJSON('["0.23.1", "0.22.2", "0.21.2"]'), matrix.scikit-learn) }}
+      run: |
+        pip install numpy==1.23.5
     - name: Install scipy ${{ matrix.scipy }}
       if: ${{ matrix.scipy }}
       run: |
@@ -105,7 +110,7 @@ jobs:
         fi
     - name: Upload coverage
       if: matrix.code-cov && always()
-      uses: codecov/codecov-action@v1
+      uses: codecov/codecov-action@v3
       with:
         files: coverage.xml
         fail_ci_if_error: true

From ce82fd50ac209c4e41e4478e7742cec39c1853dd Mon Sep 17 00:00:00 2001
From: Lennart Purucker <lennart.purucker@uni-siegen.de>
Date: Wed, 1 Mar 2023 11:34:53 +0100
Subject: [PATCH 49/53] Add summary of locally computed metrics to
 representation of run   (#1214)

* added additional task agnostic local result to print of run

* add PR to progress.rst

* fix comment typo

* Update openml/runs/run.py

Co-authored-by: Matthias Feurer <lists@matthiasfeurer.de>

* add a function to list available estimation procedures

* refactor print to only work for supported task types and local measures

* add test for pint out and update progress

* added additional task agnostic local result to print of run

* add PR to progress.rst

* fix comment typo

* Update openml/runs/run.py

Co-authored-by: Matthias Feurer <lists@matthiasfeurer.de>

* add a function to list available estimation procedures

* refactor print to only work for supported task types and local measures

* add test for pint out and update progress

* Fix CI Python 3.6 (#1218)

* Try Ubunte 20.04 for Python 3.6

* use old ubuntu for python 3.6

* Bump docker/setup-buildx-action from 1 to 2 (#1221)

Bumps [docker/setup-buildx-action](https://github.com/docker/setup-buildx-action) from 1 to 2.
- [Release notes](https://github.com/docker/setup-buildx-action/releases)
- [Commits](https://github.com/docker/setup-buildx-action/compare/v1...v2)

---
updated-dependencies:
- dependency-name: docker/setup-buildx-action
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>

* Update run.py (#1194)

* Update run.py

* Update run.py

updated description to not contain duplicate information.

* Update run.py

* add type hint for new function

* update add description

* Refactor if-statements (#1219)

* Refactor if-statements

* Add explicit names to conditional expression

* Add 'dependencies' to better mimic OpenMLFlow

* Ci python 38 (#1220)

* Install custom numpy version for specific combination of Python3.8 and numpy

* Debug output

* Change syntax

* move to coverage action v3

* Remove test output

* added additional task agnostic local result to print of run

* add PR to progress.rst

* fix comment typo

* Update openml/runs/run.py

Co-authored-by: Matthias Feurer <lists@matthiasfeurer.de>

* add a function to list available estimation procedures

* refactor print to only work for supported task types and local measures

* add test for pint out and update progress

* added additional task agnostic local result to print of run

* add PR to progress.rst

* add type hint for new function

* update add description

* fix run doc string

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: Matthias Feurer <lists@matthiasfeurer.de>
Co-authored-by: Matthias Feurer <feurerm@informatik.uni-freiburg.de>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Vishal Parmar <vishalm524112@gmail.com>
Co-authored-by: Pieter Gijsbers <p.gijsbers@tue.nl>
---
 doc/progress.rst                      |  3 +
 openml/evaluations/functions.py       | 33 +++++++++++
 openml/runs/run.py                    | 80 ++++++++++++++++++++++-----
 tests/test_runs/test_run_functions.py |  8 +++
 4 files changed, 110 insertions(+), 14 deletions(-)

diff --git a/doc/progress.rst b/doc/progress.rst
index 46c34c03c..48dc2a1a3 100644
--- a/doc/progress.rst
+++ b/doc/progress.rst
@@ -9,8 +9,11 @@ Changelog
 0.13.1
 ~~~~~~
 
+ * Add new contributions here.
+ * ADD#1144: Add locally computed results to the ``OpenMLRun`` object's representation.
  * FIX #1197 #559 #1131: Fix the order of ground truth and predictions in the ``OpenMLRun`` object and in ``format_prediction``.
  * FIX #1198: Support numpy 1.24 and higher.
+ * ADD#1144: Add locally computed results to the ``OpenMLRun`` object's representation if the run was created locally and not downloaded from the server.
 
 0.13.0
 ~~~~~~
diff --git a/openml/evaluations/functions.py b/openml/evaluations/functions.py
index 30d376c04..693ec06cf 100644
--- a/openml/evaluations/functions.py
+++ b/openml/evaluations/functions.py
@@ -275,6 +275,39 @@ def list_evaluation_measures() -> List[str]:
     return qualities
 
 
+def list_estimation_procedures() -> List[str]:
+    """Return list of evaluation procedures available.
+
+    The function performs an API call to retrieve the entire list of
+    evaluation procedures' names that are available.
+
+    Returns
+    -------
+    list
+    """
+
+    api_call = "estimationprocedure/list"
+    xml_string = openml._api_calls._perform_api_call(api_call, "get")
+    api_results = xmltodict.parse(xml_string)
+
+    # Minimalistic check if the XML is useful
+    if "oml:estimationprocedures" not in api_results:
+        raise ValueError("Error in return XML, does not contain " '"oml:estimationprocedures"')
+    if "oml:estimationprocedure" not in api_results["oml:estimationprocedures"]:
+        raise ValueError("Error in return XML, does not contain " '"oml:estimationprocedure"')
+
+    if not isinstance(api_results["oml:estimationprocedures"]["oml:estimationprocedure"], list):
+        raise TypeError(
+            "Error in return XML, does not contain " '"oml:estimationprocedure" as a list'
+        )
+
+    prods = [
+        prod["oml:name"]
+        for prod in api_results["oml:estimationprocedures"]["oml:estimationprocedure"]
+    ]
+    return prods
+
+
 def list_evaluations_setups(
     function: str,
     offset: Optional[int] = None,
diff --git a/openml/runs/run.py b/openml/runs/run.py
index 90e7a4b0b..5528c8a67 100644
--- a/openml/runs/run.py
+++ b/openml/runs/run.py
@@ -26,7 +26,7 @@
 
 
 class OpenMLRun(OpenMLBase):
-    """OpenML Run: result of running a model on an openml dataset.
+    """OpenML Run: result of running a model on an OpenML dataset.
 
     Parameters
     ----------
@@ -39,13 +39,13 @@ class OpenMLRun(OpenMLBase):
     setup_string: str
         The setup string of the run.
     output_files: Dict[str, str]
-        Specifies where each related file can be found. 
+        Specifies where each related file can be found.
     setup_id: int
         An integer representing the ID of the setup used for the run.
     tags: List[str]
         Representing the tags associated with the run.
     uploader: int
-        User ID of the uploader. 
+        User ID of the uploader.
     uploader_name: str
         The name of the person who uploaded the run.
     evaluations: Dict
@@ -71,15 +71,18 @@ class OpenMLRun(OpenMLBase):
     predictions_url: str
         The URL of the predictions file.
     task: OpenMLTask
-        An instance of the OpenMLTask class, representing the OpenML task associated with the run.
+        An instance of the OpenMLTask class, representing the OpenML task associated
+        with the run.
     flow: OpenMLFlow
-        An instance of the OpenMLFlow class, representing the OpenML flow associated with the run.
+        An instance of the OpenMLFlow class, representing the OpenML flow associated
+        with the run.
     run_id: int
         The ID of the run.
     description_text: str, optional
-        Description text to add to the predictions file. If left None, is set to the time the arff file is generated.
+        Description text to add to the predictions file. If left None, is set to the
+        time the arff file is generated.
     run_details: str, optional (default=None)
-        Description of the run stored in the run meta-data. 
+        Description of the run stored in the run meta-data.
     """
 
     def __init__(
@@ -158,8 +161,37 @@ def predictions(self) -> pd.DataFrame:
     def id(self) -> Optional[int]:
         return self.run_id
 
+    def _evaluation_summary(self, metric: str) -> str:
+        """Summarizes the evaluation of a metric over all folds.
+
+        The fold scores for the metric must exist already. During run creation,
+        by default, the MAE for OpenMLRegressionTask and the accuracy for
+        OpenMLClassificationTask/OpenMLLearningCurveTasktasks are computed.
+
+        If repetition exist, we take the mean over all repetitions.
+
+        Parameters
+        ----------
+        metric: str
+            Name of an evaluation metric that was used to compute fold scores.
+
+        Returns
+        -------
+        metric_summary: str
+            A formatted string that displays the metric's evaluation summary.
+            The summary consists of the mean and std.
+        """
+        fold_score_lists = self.fold_evaluations[metric].values()
+
+        # Get the mean and std over all repetitions
+        rep_means = [np.mean(list(x.values())) for x in fold_score_lists]
+        rep_stds = [np.std(list(x.values())) for x in fold_score_lists]
+
+        return "{:.4f} +- {:.4f}".format(np.mean(rep_means), np.mean(rep_stds))
+
     def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
         """Collect all information to display in the __repr__ body."""
+        # Set up fields
         fields = {
             "Uploader Name": self.uploader_name,
             "Metric": self.task_evaluation_measure,
@@ -175,6 +207,10 @@ def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
             "Dataset ID": self.dataset_id,
             "Dataset URL": openml.datasets.OpenMLDataset.url_for_id(self.dataset_id),
         }
+
+        # determines the order of the initial fields in which the information will be printed
+        order = ["Uploader Name", "Uploader Profile", "Metric", "Result"]
+
         if self.uploader is not None:
             fields["Uploader Profile"] = "{}/u/{}".format(
                 openml.config.get_server_base_url(), self.uploader
@@ -183,13 +219,29 @@ def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
             fields["Run URL"] = self.openml_url
         if self.evaluations is not None and self.task_evaluation_measure in self.evaluations:
             fields["Result"] = self.evaluations[self.task_evaluation_measure]
-
-        # determines the order in which the information will be printed
-        order = [
-            "Uploader Name",
-            "Uploader Profile",
-            "Metric",
-            "Result",
+        elif self.fold_evaluations is not None:
+            # -- Add locally computed summary values if possible
+            if "predictive_accuracy" in self.fold_evaluations:
+                # OpenMLClassificationTask; OpenMLLearningCurveTask
+                # default: predictive_accuracy
+                result_field = "Local Result - Accuracy (+- STD)"
+                fields[result_field] = self._evaluation_summary("predictive_accuracy")
+                order.append(result_field)
+            elif "mean_absolute_error" in self.fold_evaluations:
+                # OpenMLRegressionTask
+                # default: mean_absolute_error
+                result_field = "Local Result - MAE (+- STD)"
+                fields[result_field] = self._evaluation_summary("mean_absolute_error")
+                order.append(result_field)
+
+            if "usercpu_time_millis" in self.fold_evaluations:
+                # Runtime should be available for most tasks types
+                rt_field = "Local Runtime - ms (+- STD)"
+                fields[rt_field] = self._evaluation_summary("usercpu_time_millis")
+                order.append(rt_field)
+
+        # determines the remaining order
+        order += [
             "Run ID",
             "Run URL",
             "Task ID",
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index 786ab2291..520b7c0bc 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -531,6 +531,14 @@ def determine_grid_size(param_grid):
 
         # todo: check if runtime is present
         self._check_fold_timing_evaluations(run.fold_evaluations, 1, num_folds, task_type=task_type)
+
+        # Check if run string and print representation do not run into an error
+        #   The above check already verifies that all columns needed for supported
+        #   representations are present.
+        #   Supported: SUPERVISED_CLASSIFICATION, LEARNING_CURVE, SUPERVISED_REGRESSION
+        str(run)
+        self.logger.info(run)
+
         return run
 
     def _run_and_upload_classification(

From c177d39194df264cbdeb5eea2bea64c75ee115e2 Mon Sep 17 00:00:00 2001
From: Lennart Purucker <lennart.purucker@uni-siegen.de>
Date: Sat, 4 Mar 2023 09:48:19 +0100
Subject: [PATCH 50/53] Better Error for Checksum Mismatch (#1225)

* add better error handling for checksum when downloading a file

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* update usage of __is_checksum_equal

* Update openml/_api_calls.py

Co-authored-by: Pieter Gijsbers <p.gijsbers@tue.nl>

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Pieter Gijsbers <p.gijsbers@tue.nl>
---
 openml/_api_calls.py | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/openml/_api_calls.py b/openml/_api_calls.py
index c22f82840..5140a3470 100644
--- a/openml/_api_calls.py
+++ b/openml/_api_calls.py
@@ -297,11 +297,11 @@ def __read_url(url, request_method, data=None, md5_checksum=None):
     )
 
 
-def __is_checksum_equal(downloaded_file, md5_checksum=None):
+def __is_checksum_equal(downloaded_file_binary: bytes, md5_checksum: Optional[str] = None) -> bool:
     if md5_checksum is None:
         return True
     md5 = hashlib.md5()
-    md5.update(downloaded_file.encode("utf-8"))
+    md5.update(downloaded_file_binary)
     md5_checksum_download = md5.hexdigest()
     return md5_checksum == md5_checksum_download
 
@@ -323,7 +323,21 @@ def _send_request(request_method, url, data, files=None, md5_checksum=None):
                 else:
                     raise NotImplementedError()
                 __check_response(response=response, url=url, file_elements=files)
-                if request_method == "get" and not __is_checksum_equal(response.text, md5_checksum):
+                if request_method == "get" and not __is_checksum_equal(
+                    response.text.encode("utf-8"), md5_checksum
+                ):
+
+                    # -- Check if encoding is not UTF-8 perhaps
+                    if __is_checksum_equal(response.content, md5_checksum):
+                        raise OpenMLHashException(
+                            "Checksum of downloaded file is unequal to the expected checksum {}"
+                            "because the text encoding is not UTF-8 when downloading {}. "
+                            "There might be a sever-sided issue with the file, "
+                            "see: https://github.com/openml/openml-python/issues/1180.".format(
+                                md5_checksum, url
+                            )
+                        )
+
                     raise OpenMLHashException(
                         "Checksum of downloaded file is unequal to the expected checksum {} "
                         "when downloading {}.".format(md5_checksum, url)
@@ -384,7 +398,6 @@ def __parse_server_exception(
     url: str,
     file_elements: Dict,
 ) -> OpenMLServerError:
-
     if response.status_code == 414:
         raise OpenMLServerError("URI too long! ({})".format(url))
     try:

From 24cbc5ed902e2a90d5f277f0b3c86836bc76891d Mon Sep 17 00:00:00 2001
From: Pieter Gijsbers <p.gijsbers@tue.nl>
Date: Sat, 4 Mar 2023 17:53:54 +0100
Subject: [PATCH 51/53] Fix coverage (#1226)

* Correctly only clean up tests/files/

* Log to console for pytest invocation
---
 .github/workflows/test.yml |  4 ++--
 tests/conftest.py          | 21 ++++++++-------------
 2 files changed, 10 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 974147ed3..cc38aebb2 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -91,8 +91,8 @@ jobs:
         if [ ${{ matrix.code-cov }} ]; then codecov='--cov=openml --long  --cov-report=xml'; fi
         # Most of the time, running only the scikit-learn tests is sufficient
         if [ ${{ matrix.sklearn-only }} = 'true' ]; then sklearn='-m sklearn'; fi
-        echo pytest -n 4 --durations=20 --timeout=600 --timeout-method=thread --dist load -sv $codecov $sklearn --reruns 5 --reruns-delay 1
-        pytest -n 4 --durations=20 --timeout=600 --timeout-method=thread --dist load -sv $codecov $sklearn --reruns 5 --reruns-delay 1
+        echo pytest -n 4 --durations=20 --timeout=600 --timeout-method=thread --dist load -sv $codecov $sklearn --reruns 5 --reruns-delay 1 -o log_cli=true
+        pytest -n 4 --durations=20 --timeout=600 --timeout-method=thread --dist load -sv $codecov $sklearn --reruns 5 --reruns-delay 1 -o log_cli=true
     - name: Run tests on Windows
       if: matrix.os == 'windows-latest'
       run: |  # we need a separate step because of the bash-specific if-statement in the previous one.
diff --git a/tests/conftest.py b/tests/conftest.py
index 89da5fca4..d727bb537 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -24,6 +24,7 @@
 
 import os
 import logging
+import pathlib
 from typing import List
 import pytest
 
@@ -51,26 +52,20 @@ def worker_id() -> str:
         return "master"
 
 
-def read_file_list() -> List[str]:
+def read_file_list() -> List[pathlib.Path]:
     """Returns a list of paths to all files that currently exist in 'openml/tests/files/'
 
-    :return: List[str]
+    :return: List[pathlib.Path]
     """
-    this_dir = os.path.abspath(os.path.dirname(os.path.abspath(__file__)))
-    directory = os.path.join(this_dir, "..")
-    logger.info("Collecting file lists from: {}".format(directory))
-    file_list = []
-    for root, _, filenames in os.walk(directory):
-        for filename in filenames:
-            file_list.append(os.path.join(root, filename))
-    return file_list
+    test_files_dir = pathlib.Path(__file__).parent / "files"
+    return [f for f in test_files_dir.rglob("*") if f.is_file()]
 
 
-def compare_delete_files(old_list, new_list) -> None:
+def compare_delete_files(old_list: List[pathlib.Path], new_list: List[pathlib.Path]) -> None:
     """Deletes files that are there in the new_list but not in the old_list
 
-    :param old_list: List[str]
-    :param new_list: List[str]
+    :param old_list: List[pathlib.Path]
+    :param new_list: List[pathlib.Path]
     :return: None
     """
     file_list = list(set(new_list) - set(old_list))

From 3c00d7b05b17d248d53db40d1b437808f86e1442 Mon Sep 17 00:00:00 2001
From: Mohammad Mirkazemi <mohammad.mirkazemi@gmail.com>
Date: Tue, 21 Mar 2023 09:48:56 +0100
Subject: [PATCH 52/53] Issue 1028: public delete functions for run, task, flow
 and database (#1060)

---
 .gitignore                                    |   1 +
 doc/api.rst                                   |   4 +
 doc/progress.rst                              |   4 +-
 openml/_api_calls.py                          |  10 +-
 openml/datasets/__init__.py                   |   2 +
 openml/datasets/functions.py                  |  19 +++
 openml/exceptions.py                          |  25 ++--
 openml/flows/__init__.py                      |  10 +-
 openml/flows/functions.py                     |  19 +++
 openml/runs/__init__.py                       |   2 +
 openml/runs/functions.py                      |  18 +++
 openml/tasks/__init__.py                      |   2 +
 openml/tasks/functions.py                     |  19 +++
 openml/testing.py                             |  22 ++-
 openml/utils.py                               |  39 ++++-
 tests/conftest.py                             |  10 ++
 .../datasets/data_delete_has_tasks.xml        |   4 +
 .../datasets/data_delete_not_exist.xml        |   4 +
 .../datasets/data_delete_not_owned.xml        |   4 +
 .../datasets/data_delete_successful.xml       |   3 +
 .../flows/flow_delete_has_runs.xml            |   5 +
 .../flows/flow_delete_is_subflow.xml          |   5 +
 .../flows/flow_delete_not_exist.xml           |   4 +
 .../flows/flow_delete_not_owned.xml           |   4 +
 .../flows/flow_delete_successful.xml          |   3 +
 .../runs/run_delete_not_exist.xml             |   4 +
 .../runs/run_delete_not_owned.xml             |   4 +
 .../runs/run_delete_successful.xml            |   3 +
 .../tasks/task_delete_has_runs.xml            |   4 +
 .../tasks/task_delete_not_exist.xml           |   4 +
 .../tasks/task_delete_not_owned.xml           |   4 +
 .../tasks/task_delete_successful.xml          |   3 +
 tests/test_datasets/test_dataset_functions.py | 139 +++++++++++++++++-
 tests/test_flows/test_flow_functions.py       | 129 +++++++++++++++-
 tests/test_runs/test_run_functions.py         | 103 ++++++++++++-
 tests/test_tasks/test_task_functions.py       |  88 ++++++++++-
 36 files changed, 691 insertions(+), 36 deletions(-)
 create mode 100644 tests/files/mock_responses/datasets/data_delete_has_tasks.xml
 create mode 100644 tests/files/mock_responses/datasets/data_delete_not_exist.xml
 create mode 100644 tests/files/mock_responses/datasets/data_delete_not_owned.xml
 create mode 100644 tests/files/mock_responses/datasets/data_delete_successful.xml
 create mode 100644 tests/files/mock_responses/flows/flow_delete_has_runs.xml
 create mode 100644 tests/files/mock_responses/flows/flow_delete_is_subflow.xml
 create mode 100644 tests/files/mock_responses/flows/flow_delete_not_exist.xml
 create mode 100644 tests/files/mock_responses/flows/flow_delete_not_owned.xml
 create mode 100644 tests/files/mock_responses/flows/flow_delete_successful.xml
 create mode 100644 tests/files/mock_responses/runs/run_delete_not_exist.xml
 create mode 100644 tests/files/mock_responses/runs/run_delete_not_owned.xml
 create mode 100644 tests/files/mock_responses/runs/run_delete_successful.xml
 create mode 100644 tests/files/mock_responses/tasks/task_delete_has_runs.xml
 create mode 100644 tests/files/mock_responses/tasks/task_delete_not_exist.xml
 create mode 100644 tests/files/mock_responses/tasks/task_delete_not_owned.xml
 create mode 100644 tests/files/mock_responses/tasks/task_delete_successful.xml

diff --git a/.gitignore b/.gitignore
index c06e715ef..060db33be 100644
--- a/.gitignore
+++ b/.gitignore
@@ -77,6 +77,7 @@ target/
 # IDE
 .idea
 *.swp
+.vscode
 
 # MYPY
 .mypy_cache
diff --git a/doc/api.rst b/doc/api.rst
index 86bfd121e..288bf66fb 100644
--- a/doc/api.rst
+++ b/doc/api.rst
@@ -38,6 +38,7 @@ Dataset Functions
     attributes_arff_from_df
     check_datasets_active
     create_dataset
+    delete_dataset
     get_dataset
     get_datasets
     list_datasets
@@ -103,6 +104,7 @@ Flow Functions
    :template: function.rst
 
     assert_flows_equal
+    delete_flow
     flow_exists
     get_flow
     list_flows
@@ -133,6 +135,7 @@ Run Functions
    :toctree: generated/
    :template: function.rst
 
+    delete_run
     get_run
     get_runs
     get_run_trace
@@ -251,6 +254,7 @@ Task Functions
    :template: function.rst
 
     create_task
+    delete_task
     get_task
     get_tasks
     list_tasks
diff --git a/doc/progress.rst b/doc/progress.rst
index 48dc2a1a3..d981c09c0 100644
--- a/doc/progress.rst
+++ b/doc/progress.rst
@@ -10,10 +10,10 @@ Changelog
 ~~~~~~
 
  * Add new contributions here.
- * ADD#1144: Add locally computed results to the ``OpenMLRun`` object's representation.
+ * ADD#1028: Add functions to delete runs, flows, datasets, and tasks (e.g., ``openml.datasets.delete_dataset``).
+ * ADD#1144: Add locally computed results to the ``OpenMLRun`` object's representation if the run was created locally and not downloaded from the server.
  * FIX #1197 #559 #1131: Fix the order of ground truth and predictions in the ``OpenMLRun`` object and in ``format_prediction``.
  * FIX #1198: Support numpy 1.24 and higher.
- * ADD#1144: Add locally computed results to the ``OpenMLRun`` object's representation if the run was created locally and not downloaded from the server.
 
 0.13.0
 ~~~~~~
diff --git a/openml/_api_calls.py b/openml/_api_calls.py
index 5140a3470..f7b2a34c5 100644
--- a/openml/_api_calls.py
+++ b/openml/_api_calls.py
@@ -351,10 +351,12 @@ def _send_request(request_method, url, data, files=None, md5_checksum=None):
                 xml.parsers.expat.ExpatError,
                 OpenMLHashException,
             ) as e:
-                if isinstance(e, OpenMLServerException):
-                    if e.code not in [107]:
-                        # 107: database connection error
-                        raise
+                if isinstance(e, OpenMLServerException) and e.code != 107:
+                    # Propagate all server errors to the calling functions, except
+                    # for 107 which represents a database connection error.
+                    # These are typically caused by high server load,
+                    # which means trying again might resolve the issue.
+                    raise
                 elif isinstance(e, xml.parsers.expat.ExpatError):
                     if request_method != "get" or retry_counter >= n_retries:
                         raise OpenMLServerError(
diff --git a/openml/datasets/__init__.py b/openml/datasets/__init__.py
index abde85c06..efa5a5d5b 100644
--- a/openml/datasets/__init__.py
+++ b/openml/datasets/__init__.py
@@ -11,6 +11,7 @@
     list_qualities,
     edit_dataset,
     fork_dataset,
+    delete_dataset,
 )
 from .dataset import OpenMLDataset
 from .data_feature import OpenMLDataFeature
@@ -28,4 +29,5 @@
     "list_qualities",
     "edit_dataset",
     "fork_dataset",
+    "delete_dataset",
 ]
diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
index 770413a23..4307c8008 100644
--- a/openml/datasets/functions.py
+++ b/openml/datasets/functions.py
@@ -1271,3 +1271,22 @@ def _get_online_dataset_format(dataset_id):
     dataset_xml = openml._api_calls._perform_api_call("data/%d" % dataset_id, "get")
     # build a dict from the xml and get the format from the dataset description
     return xmltodict.parse(dataset_xml)["oml:data_set_description"]["oml:format"].lower()
+
+
+def delete_dataset(dataset_id: int) -> bool:
+    """Delete dataset with id `dataset_id` from the OpenML server.
+
+    This can only be done if you are the owner of the dataset and
+    no tasks are attached to the dataset.
+
+    Parameters
+    ----------
+    dataset_id : int
+        OpenML id of the dataset
+
+    Returns
+    -------
+    bool
+        True if the deletion was successful. False otherwise.
+    """
+    return openml.utils._delete_entity("data", dataset_id)
diff --git a/openml/exceptions.py b/openml/exceptions.py
index a5f132128..fe2138e76 100644
--- a/openml/exceptions.py
+++ b/openml/exceptions.py
@@ -11,15 +11,14 @@ class OpenMLServerError(PyOpenMLError):
     """class for when something is really wrong on the server
     (result did not parse to dict), contains unparsed error."""
 
-    def __init__(self, message: str):
-        super().__init__(message)
+    pass
 
 
 class OpenMLServerException(OpenMLServerError):
     """exception for when the result of the server was
     not 200 (e.g., listing call w/o results)."""
 
-    # Code needs to be optional to allow the exceptino to be picklable:
+    # Code needs to be optional to allow the exception to be picklable:
     # https://stackoverflow.com/questions/16244923/how-to-make-a-custom-exception-class-with-multiple-init-args-pickleable  # noqa: E501
     def __init__(self, message: str, code: int = None, url: str = None):
         self.message = message
@@ -28,15 +27,11 @@ def __init__(self, message: str, code: int = None, url: str = None):
         super().__init__(message)
 
     def __str__(self):
-        return "%s returned code %s: %s" % (
-            self.url,
-            self.code,
-            self.message,
-        )
+        return f"{self.url} returned code {self.code}: {self.message}"
 
 
 class OpenMLServerNoResult(OpenMLServerException):
-    """exception for when the result of the server is empty."""
+    """Exception for when the result of the server is empty."""
 
     pass
 
@@ -44,8 +39,7 @@ class OpenMLServerNoResult(OpenMLServerException):
 class OpenMLCacheException(PyOpenMLError):
     """Dataset / task etc not found in cache"""
 
-    def __init__(self, message: str):
-        super().__init__(message)
+    pass
 
 
 class OpenMLHashException(PyOpenMLError):
@@ -57,8 +51,7 @@ class OpenMLHashException(PyOpenMLError):
 class OpenMLPrivateDatasetError(PyOpenMLError):
     """Exception thrown when the user has no rights to access the dataset."""
 
-    def __init__(self, message: str):
-        super().__init__(message)
+    pass
 
 
 class OpenMLRunsExistError(PyOpenMLError):
@@ -69,3 +62,9 @@ def __init__(self, run_ids: set, message: str):
             raise ValueError("Set of run ids must be non-empty.")
         self.run_ids = run_ids
         super().__init__(message)
+
+
+class OpenMLNotAuthorizedError(OpenMLServerError):
+    """Indicates an authenticated user is not authorized to execute the requested action."""
+
+    pass
diff --git a/openml/flows/__init__.py b/openml/flows/__init__.py
index 3642b9c56..f8d35c3f5 100644
--- a/openml/flows/__init__.py
+++ b/openml/flows/__init__.py
@@ -2,7 +2,14 @@
 
 from .flow import OpenMLFlow
 
-from .functions import get_flow, list_flows, flow_exists, get_flow_id, assert_flows_equal
+from .functions import (
+    get_flow,
+    list_flows,
+    flow_exists,
+    get_flow_id,
+    assert_flows_equal,
+    delete_flow,
+)
 
 __all__ = [
     "OpenMLFlow",
@@ -11,4 +18,5 @@
     "get_flow_id",
     "flow_exists",
     "assert_flows_equal",
+    "delete_flow",
 ]
diff --git a/openml/flows/functions.py b/openml/flows/functions.py
index 99525c3e4..aea5cae6d 100644
--- a/openml/flows/functions.py
+++ b/openml/flows/functions.py
@@ -544,3 +544,22 @@ def _create_flow_from_xml(flow_xml: str) -> OpenMLFlow:
     """
 
     return OpenMLFlow._from_dict(xmltodict.parse(flow_xml))
+
+
+def delete_flow(flow_id: int) -> bool:
+    """Delete flow with id `flow_id` from the OpenML server.
+
+    You can only delete flows which you uploaded and which
+    which are not linked to runs.
+
+    Parameters
+    ----------
+    flow_id : int
+        OpenML id of the flow
+
+    Returns
+    -------
+    bool
+        True if the deletion was successful. False otherwise.
+    """
+    return openml.utils._delete_entity("flow", flow_id)
diff --git a/openml/runs/__init__.py b/openml/runs/__init__.py
index e917a57a5..2abbd8f29 100644
--- a/openml/runs/__init__.py
+++ b/openml/runs/__init__.py
@@ -12,6 +12,7 @@
     run_exists,
     initialize_model_from_run,
     initialize_model_from_trace,
+    delete_run,
 )
 
 __all__ = [
@@ -27,4 +28,5 @@
     "run_exists",
     "initialize_model_from_run",
     "initialize_model_from_trace",
+    "delete_run",
 ]
diff --git a/openml/runs/functions.py b/openml/runs/functions.py
index ff1f07c06..d52b43add 100644
--- a/openml/runs/functions.py
+++ b/openml/runs/functions.py
@@ -1209,3 +1209,21 @@ def format_prediction(
         return [repeat, fold, index, prediction, truth]
     else:
         raise NotImplementedError(f"Formatting for {type(task)} is not supported.")
+
+
+def delete_run(run_id: int) -> bool:
+    """Delete run with id `run_id` from the OpenML server.
+
+    You can only delete runs which you uploaded.
+
+    Parameters
+    ----------
+    run_id : int
+        OpenML id of the run
+
+    Returns
+    -------
+    bool
+        True if the deletion was successful. False otherwise.
+    """
+    return openml.utils._delete_entity("run", run_id)
diff --git a/openml/tasks/__init__.py b/openml/tasks/__init__.py
index cba0aa14f..a5d578d2d 100644
--- a/openml/tasks/__init__.py
+++ b/openml/tasks/__init__.py
@@ -15,6 +15,7 @@
     get_task,
     get_tasks,
     list_tasks,
+    delete_task,
 )
 
 __all__ = [
@@ -30,4 +31,5 @@
     "list_tasks",
     "OpenMLSplit",
     "TaskType",
+    "delete_task",
 ]
diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py
index c44d55ea7..964277760 100644
--- a/openml/tasks/functions.py
+++ b/openml/tasks/functions.py
@@ -545,3 +545,22 @@ def create_task(
             evaluation_measure=evaluation_measure,
             **kwargs,
         )
+
+
+def delete_task(task_id: int) -> bool:
+    """Delete task with id `task_id` from the OpenML server.
+
+    You can only delete tasks which you created and have
+    no runs associated with them.
+
+    Parameters
+    ----------
+    task_id : int
+        OpenML id of the task
+
+    Returns
+    -------
+    bool
+        True if the deletion was successful. False otherwise.
+    """
+    return openml.utils._delete_entity("task", task_id)
diff --git a/openml/testing.py b/openml/testing.py
index 56445a253..4e2f0c006 100644
--- a/openml/testing.py
+++ b/openml/testing.py
@@ -3,12 +3,14 @@
 import hashlib
 import inspect
 import os
+import pathlib
 import shutil
 import sys
 import time
 from typing import Dict, Union, cast
 import unittest
 import pandas as pd
+import requests
 
 import openml
 from openml.tasks import TaskType
@@ -306,4 +308,22 @@ class CustomImputer(SimpleImputer):
     pass
 
 
-__all__ = ["TestBase", "SimpleImputer", "CustomImputer", "check_task_existence"]
+def create_request_response(
+    *, status_code: int, content_filepath: pathlib.Path
+) -> requests.Response:
+    with open(content_filepath, "r") as xml_response:
+        response_body = xml_response.read()
+
+    response = requests.Response()
+    response.status_code = status_code
+    response._content = response_body.encode()
+    return response
+
+
+__all__ = [
+    "TestBase",
+    "SimpleImputer",
+    "CustomImputer",
+    "check_task_existence",
+    "create_request_response",
+]
diff --git a/openml/utils.py b/openml/utils.py
index 0f60f2bb8..3c2fa876f 100644
--- a/openml/utils.py
+++ b/openml/utils.py
@@ -172,9 +172,42 @@ def _delete_entity(entity_type, entity_id):
         raise ValueError("Can't delete a %s" % entity_type)
 
     url_suffix = "%s/%d" % (entity_type, entity_id)
-    result_xml = openml._api_calls._perform_api_call(url_suffix, "delete")
-    result = xmltodict.parse(result_xml)
-    return "oml:%s_delete" % entity_type in result
+    try:
+        result_xml = openml._api_calls._perform_api_call(url_suffix, "delete")
+        result = xmltodict.parse(result_xml)
+        return f"oml:{entity_type}_delete" in result
+    except openml.exceptions.OpenMLServerException as e:
+        # https://github.com/openml/OpenML/blob/21f6188d08ac24fcd2df06ab94cf421c946971b0/openml_OS/views/pages/api_new/v1/xml/pre.php
+        # Most exceptions are descriptive enough to be raised as their standard
+        # OpenMLServerException, however there are two cases where we add information:
+        #  - a generic "failed" message, we direct them to the right issue board
+        #  - when the user successfully authenticates with the server,
+        #    but user is not allowed to take the requested action,
+        #    in which case we specify a OpenMLNotAuthorizedError.
+        by_other_user = [323, 353, 393, 453, 594]
+        has_dependent_entities = [324, 326, 327, 328, 354, 454, 464, 595]
+        unknown_reason = [325, 355, 394, 455, 593]
+        if e.code in by_other_user:
+            raise openml.exceptions.OpenMLNotAuthorizedError(
+                message=(
+                    f"The {entity_type} can not be deleted because it was not uploaded by you."
+                ),
+            ) from e
+        if e.code in has_dependent_entities:
+            raise openml.exceptions.OpenMLNotAuthorizedError(
+                message=(
+                    f"The {entity_type} can not be deleted because "
+                    f"it still has associated entities: {e.message}"
+                )
+            ) from e
+        if e.code in unknown_reason:
+            raise openml.exceptions.OpenMLServerError(
+                message=(
+                    f"The {entity_type} can not be deleted for unknown reason,"
+                    " please open an issue at: https://github.com/openml/openml/issues/new"
+                ),
+            ) from e
+        raise
 
 
 def _list_all(listing_call, output_format="dict", *args, **filters):
diff --git a/tests/conftest.py b/tests/conftest.py
index d727bb537..43e2cc3ee 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -185,3 +185,13 @@ def pytest_addoption(parser):
 @pytest.fixture(scope="class")
 def long_version(request):
     request.cls.long_version = request.config.getoption("--long")
+
+
+@pytest.fixture
+def test_files_directory() -> pathlib.Path:
+    return pathlib.Path(__file__).parent / "files"
+
+
+@pytest.fixture()
+def test_api_key() -> str:
+    return "c0c42819af31e706efe1f4b88c23c6c1"
diff --git a/tests/files/mock_responses/datasets/data_delete_has_tasks.xml b/tests/files/mock_responses/datasets/data_delete_has_tasks.xml
new file mode 100644
index 000000000..fc866047c
--- /dev/null
+++ b/tests/files/mock_responses/datasets/data_delete_has_tasks.xml
@@ -0,0 +1,4 @@
+<oml:error xmlns:oml="http://openml.org/openml">
+	<oml:code>354</oml:code>
+	<oml:message>Dataset is in use by other content. Can not be deleted</oml:message>
+</oml:error>
diff --git a/tests/files/mock_responses/datasets/data_delete_not_exist.xml b/tests/files/mock_responses/datasets/data_delete_not_exist.xml
new file mode 100644
index 000000000..b3b212fbe
--- /dev/null
+++ b/tests/files/mock_responses/datasets/data_delete_not_exist.xml
@@ -0,0 +1,4 @@
+<oml:error xmlns:oml="http://openml.org/openml">
+	<oml:code>352</oml:code>
+	<oml:message>Dataset does not exist</oml:message>
+</oml:error>
diff --git a/tests/files/mock_responses/datasets/data_delete_not_owned.xml b/tests/files/mock_responses/datasets/data_delete_not_owned.xml
new file mode 100644
index 000000000..7d412d48e
--- /dev/null
+++ b/tests/files/mock_responses/datasets/data_delete_not_owned.xml
@@ -0,0 +1,4 @@
+<oml:error xmlns:oml="http://openml.org/openml">
+	<oml:code>353</oml:code>
+	<oml:message>Dataset is not owned by you</oml:message>
+</oml:error>
\ No newline at end of file
diff --git a/tests/files/mock_responses/datasets/data_delete_successful.xml b/tests/files/mock_responses/datasets/data_delete_successful.xml
new file mode 100644
index 000000000..9df47c1a2
--- /dev/null
+++ b/tests/files/mock_responses/datasets/data_delete_successful.xml
@@ -0,0 +1,3 @@
+<oml:data_delete xmlns:oml="http://openml.org/openml">
+  <oml:id>40000</oml:id>
+</oml:data_delete>
diff --git a/tests/files/mock_responses/flows/flow_delete_has_runs.xml b/tests/files/mock_responses/flows/flow_delete_has_runs.xml
new file mode 100644
index 000000000..5c8530e75
--- /dev/null
+++ b/tests/files/mock_responses/flows/flow_delete_has_runs.xml
@@ -0,0 +1,5 @@
+<oml:error xmlns:oml="http://openml.org/openml">
+	<oml:code>324</oml:code>
+	<oml:message>flow is in use by other content (runs). Can not be deleted</oml:message>
+    <oml:additional_information>{10716, 10707} ()</oml:additional_information>
+</oml:error>
diff --git a/tests/files/mock_responses/flows/flow_delete_is_subflow.xml b/tests/files/mock_responses/flows/flow_delete_is_subflow.xml
new file mode 100644
index 000000000..ddc314ae4
--- /dev/null
+++ b/tests/files/mock_responses/flows/flow_delete_is_subflow.xml
@@ -0,0 +1,5 @@
+<oml:error xmlns:oml="http://openml.org/openml">
+	<oml:code>328</oml:code>
+	<oml:message>flow is in use by other content (it is a subflow). Can not be deleted</oml:message>
+    <oml:additional_information>{37661}</oml:additional_information>
+</oml:error>
diff --git a/tests/files/mock_responses/flows/flow_delete_not_exist.xml b/tests/files/mock_responses/flows/flow_delete_not_exist.xml
new file mode 100644
index 000000000..4df49149f
--- /dev/null
+++ b/tests/files/mock_responses/flows/flow_delete_not_exist.xml
@@ -0,0 +1,4 @@
+<oml:error xmlns:oml="http://openml.org/openml">
+	<oml:code>322</oml:code>
+	<oml:message>flow does not exist</oml:message>
+</oml:error>
diff --git a/tests/files/mock_responses/flows/flow_delete_not_owned.xml b/tests/files/mock_responses/flows/flow_delete_not_owned.xml
new file mode 100644
index 000000000..3aa9a9ef2
--- /dev/null
+++ b/tests/files/mock_responses/flows/flow_delete_not_owned.xml
@@ -0,0 +1,4 @@
+<oml:error xmlns:oml="http://openml.org/openml">
+	<oml:code>323</oml:code>
+	<oml:message>flow is not owned by you</oml:message>
+</oml:error>
diff --git a/tests/files/mock_responses/flows/flow_delete_successful.xml b/tests/files/mock_responses/flows/flow_delete_successful.xml
new file mode 100644
index 000000000..7638e942d
--- /dev/null
+++ b/tests/files/mock_responses/flows/flow_delete_successful.xml
@@ -0,0 +1,3 @@
+<oml:flow_delete xmlns:oml="http://openml.org/openml">
+    <oml:id>33364</oml:id>
+</oml:flow_delete>
diff --git a/tests/files/mock_responses/runs/run_delete_not_exist.xml b/tests/files/mock_responses/runs/run_delete_not_exist.xml
new file mode 100644
index 000000000..855c223fa
--- /dev/null
+++ b/tests/files/mock_responses/runs/run_delete_not_exist.xml
@@ -0,0 +1,4 @@
+<oml:error xmlns:oml="http://openml.org/openml">
+	<oml:code>392</oml:code>
+	<oml:message>Run does not exist</oml:message>
+</oml:error>
diff --git a/tests/files/mock_responses/runs/run_delete_not_owned.xml b/tests/files/mock_responses/runs/run_delete_not_owned.xml
new file mode 100644
index 000000000..551252e22
--- /dev/null
+++ b/tests/files/mock_responses/runs/run_delete_not_owned.xml
@@ -0,0 +1,4 @@
+<oml:error xmlns:oml="http://openml.org/openml">
+	<oml:code>393</oml:code>
+	<oml:message>Run is not owned by you</oml:message>
+</oml:error>
diff --git a/tests/files/mock_responses/runs/run_delete_successful.xml b/tests/files/mock_responses/runs/run_delete_successful.xml
new file mode 100644
index 000000000..fe4233afa
--- /dev/null
+++ b/tests/files/mock_responses/runs/run_delete_successful.xml
@@ -0,0 +1,3 @@
+<oml:run_delete xmlns:oml="http://openml.org/openml">
+  <oml:id>10591880</oml:id>
+</oml:run_delete>
diff --git a/tests/files/mock_responses/tasks/task_delete_has_runs.xml b/tests/files/mock_responses/tasks/task_delete_has_runs.xml
new file mode 100644
index 000000000..87a92540d
--- /dev/null
+++ b/tests/files/mock_responses/tasks/task_delete_has_runs.xml
@@ -0,0 +1,4 @@
+<oml:error xmlns:oml="http://openml.org/openml">
+	<oml:code>454</oml:code>
+	<oml:message>Task is executed in some runs. Delete these first</oml:message>
+	</oml:error>
diff --git a/tests/files/mock_responses/tasks/task_delete_not_exist.xml b/tests/files/mock_responses/tasks/task_delete_not_exist.xml
new file mode 100644
index 000000000..8a262af29
--- /dev/null
+++ b/tests/files/mock_responses/tasks/task_delete_not_exist.xml
@@ -0,0 +1,4 @@
+<oml:error xmlns:oml="http://openml.org/openml">
+	<oml:code>452</oml:code>
+	<oml:message>Task does not exist</oml:message>
+</oml:error>
diff --git a/tests/files/mock_responses/tasks/task_delete_not_owned.xml b/tests/files/mock_responses/tasks/task_delete_not_owned.xml
new file mode 100644
index 000000000..3d504772b
--- /dev/null
+++ b/tests/files/mock_responses/tasks/task_delete_not_owned.xml
@@ -0,0 +1,4 @@
+<oml:error xmlns:oml="http://openml.org/openml">
+	<oml:code>453</oml:code>
+	<oml:message>Task is not owned by you</oml:message>
+	</oml:error>
diff --git a/tests/files/mock_responses/tasks/task_delete_successful.xml b/tests/files/mock_responses/tasks/task_delete_successful.xml
new file mode 100644
index 000000000..594b6e992
--- /dev/null
+++ b/tests/files/mock_responses/tasks/task_delete_successful.xml
@@ -0,0 +1,3 @@
+<oml:task_delete xmlns:oml="http://openml.org/openml">
+  <oml:id>361323</oml:id>
+</oml:task_delete>
diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py
index e6c4fe3ec..45a64ab8a 100644
--- a/tests/test_datasets/test_dataset_functions.py
+++ b/tests/test_datasets/test_dataset_functions.py
@@ -13,6 +13,7 @@
 import pytest
 import numpy as np
 import pandas as pd
+import requests
 import scipy.sparse
 from oslo_concurrency import lockutils
 
@@ -23,8 +24,9 @@
     OpenMLHashException,
     OpenMLPrivateDatasetError,
     OpenMLServerException,
+    OpenMLNotAuthorizedError,
 )
-from openml.testing import TestBase
+from openml.testing import TestBase, create_request_response
 from openml.utils import _tag_entity, _create_cache_directory_for_id
 from openml.datasets.functions import (
     create_dataset,
@@ -1672,3 +1674,138 @@ def test_valid_attribute_validations(default_target_attribute, row_id_attribute,
         original_data_url=original_data_url,
         paper_url=paper_url,
     )
+
+    def test_delete_dataset(self):
+        data = [
+            ["a", "sunny", 85.0, 85.0, "FALSE", "no"],
+            ["b", "sunny", 80.0, 90.0, "TRUE", "no"],
+            ["c", "overcast", 83.0, 86.0, "FALSE", "yes"],
+            ["d", "rainy", 70.0, 96.0, "FALSE", "yes"],
+            ["e", "rainy", 68.0, 80.0, "FALSE", "yes"],
+        ]
+        column_names = ["rnd_str", "outlook", "temperature", "humidity", "windy", "play"]
+        df = pd.DataFrame(data, columns=column_names)
+        # enforce the type of each column
+        df["outlook"] = df["outlook"].astype("category")
+        df["windy"] = df["windy"].astype("bool")
+        df["play"] = df["play"].astype("category")
+        # meta-information
+        name = "%s-pandas_testing_dataset" % self._get_sentinel()
+        description = "Synthetic dataset created from a Pandas DataFrame"
+        creator = "OpenML tester"
+        collection_date = "01-01-2018"
+        language = "English"
+        licence = "MIT"
+        citation = "None"
+        original_data_url = "http://openml.github.io/openml-python"
+        paper_url = "http://openml.github.io/openml-python"
+        dataset = openml.datasets.functions.create_dataset(
+            name=name,
+            description=description,
+            creator=creator,
+            contributor=None,
+            collection_date=collection_date,
+            language=language,
+            licence=licence,
+            default_target_attribute="play",
+            row_id_attribute=None,
+            ignore_attribute=None,
+            citation=citation,
+            attributes="auto",
+            data=df,
+            version_label="test",
+            original_data_url=original_data_url,
+            paper_url=paper_url,
+        )
+        dataset.publish()
+        _dataset_id = dataset.id
+        self.assertTrue(openml.datasets.delete_dataset(_dataset_id))
+
+
+@mock.patch.object(requests.Session, "delete")
+def test_delete_dataset_not_owned(mock_delete, test_files_directory, test_api_key):
+    openml.config.start_using_configuration_for_example()
+    content_file = (
+        test_files_directory / "mock_responses" / "datasets" / "data_delete_not_owned.xml"
+    )
+    mock_delete.return_value = create_request_response(
+        status_code=412, content_filepath=content_file
+    )
+
+    with pytest.raises(
+        OpenMLNotAuthorizedError,
+        match="The data can not be deleted because it was not uploaded by you.",
+    ):
+        openml.datasets.delete_dataset(40_000)
+
+    expected_call_args = [
+        ("https://test.openml.org/api/v1/xml/data/40000",),
+        {"params": {"api_key": test_api_key}},
+    ]
+    assert expected_call_args == list(mock_delete.call_args)
+
+
+@mock.patch.object(requests.Session, "delete")
+def test_delete_dataset_with_run(mock_delete, test_files_directory, test_api_key):
+    openml.config.start_using_configuration_for_example()
+    content_file = (
+        test_files_directory / "mock_responses" / "datasets" / "data_delete_has_tasks.xml"
+    )
+    mock_delete.return_value = create_request_response(
+        status_code=412, content_filepath=content_file
+    )
+
+    with pytest.raises(
+        OpenMLNotAuthorizedError,
+        match="The data can not be deleted because it still has associated entities:",
+    ):
+        openml.datasets.delete_dataset(40_000)
+
+    expected_call_args = [
+        ("https://test.openml.org/api/v1/xml/data/40000",),
+        {"params": {"api_key": test_api_key}},
+    ]
+    assert expected_call_args == list(mock_delete.call_args)
+
+
+@mock.patch.object(requests.Session, "delete")
+def test_delete_dataset_success(mock_delete, test_files_directory, test_api_key):
+    openml.config.start_using_configuration_for_example()
+    content_file = (
+        test_files_directory / "mock_responses" / "datasets" / "data_delete_successful.xml"
+    )
+    mock_delete.return_value = create_request_response(
+        status_code=200, content_filepath=content_file
+    )
+
+    success = openml.datasets.delete_dataset(40000)
+    assert success
+
+    expected_call_args = [
+        ("https://test.openml.org/api/v1/xml/data/40000",),
+        {"params": {"api_key": test_api_key}},
+    ]
+    assert expected_call_args == list(mock_delete.call_args)
+
+
+@mock.patch.object(requests.Session, "delete")
+def test_delete_unknown_dataset(mock_delete, test_files_directory, test_api_key):
+    openml.config.start_using_configuration_for_example()
+    content_file = (
+        test_files_directory / "mock_responses" / "datasets" / "data_delete_not_exist.xml"
+    )
+    mock_delete.return_value = create_request_response(
+        status_code=412, content_filepath=content_file
+    )
+
+    with pytest.raises(
+        OpenMLServerException,
+        match="Dataset does not exist",
+    ):
+        openml.datasets.delete_dataset(9_999_999)
+
+    expected_call_args = [
+        ("https://test.openml.org/api/v1/xml/data/9999999",),
+        {"params": {"api_key": test_api_key}},
+    ]
+    assert expected_call_args == list(mock_delete.call_args)
diff --git a/tests/test_flows/test_flow_functions.py b/tests/test_flows/test_flow_functions.py
index 532fb1d1b..f2520cb36 100644
--- a/tests/test_flows/test_flow_functions.py
+++ b/tests/test_flows/test_flow_functions.py
@@ -4,16 +4,20 @@
 import copy
 import functools
 import unittest
+from unittest import mock
 from unittest.mock import patch
 
 from distutils.version import LooseVersion
+
+import requests
 import sklearn
 from sklearn import ensemble
 import pandas as pd
 import pytest
 
 import openml
-from openml.testing import TestBase
+from openml.exceptions import OpenMLNotAuthorizedError, OpenMLServerException
+from openml.testing import TestBase, create_request_response
 import openml.extensions.sklearn
 
 
@@ -410,3 +414,126 @@ def test_get_flow_id(self):
             )
             self.assertEqual(flow_ids_exact_version_True, flow_ids_exact_version_False)
             self.assertIn(flow.flow_id, flow_ids_exact_version_True)
+
+    def test_delete_flow(self):
+        flow = openml.OpenMLFlow(
+            name="sklearn.dummy.DummyClassifier",
+            class_name="sklearn.dummy.DummyClassifier",
+            description="test description",
+            model=sklearn.dummy.DummyClassifier(),
+            components=OrderedDict(),
+            parameters=OrderedDict(),
+            parameters_meta_info=OrderedDict(),
+            external_version="1",
+            tags=[],
+            language="English",
+            dependencies=None,
+        )
+
+        flow, _ = self._add_sentinel_to_flow_name(flow, None)
+
+        flow.publish()
+        _flow_id = flow.flow_id
+        self.assertTrue(openml.flows.delete_flow(_flow_id))
+
+
+@mock.patch.object(requests.Session, "delete")
+def test_delete_flow_not_owned(mock_delete, test_files_directory, test_api_key):
+    openml.config.start_using_configuration_for_example()
+    content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_not_owned.xml"
+    mock_delete.return_value = create_request_response(
+        status_code=412, content_filepath=content_file
+    )
+
+    with pytest.raises(
+        OpenMLNotAuthorizedError,
+        match="The flow can not be deleted because it was not uploaded by you.",
+    ):
+        openml.flows.delete_flow(40_000)
+
+    expected_call_args = [
+        ("https://test.openml.org/api/v1/xml/flow/40000",),
+        {"params": {"api_key": test_api_key}},
+    ]
+    assert expected_call_args == list(mock_delete.call_args)
+
+
+@mock.patch.object(requests.Session, "delete")
+def test_delete_flow_with_run(mock_delete, test_files_directory, test_api_key):
+    openml.config.start_using_configuration_for_example()
+    content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_has_runs.xml"
+    mock_delete.return_value = create_request_response(
+        status_code=412, content_filepath=content_file
+    )
+
+    with pytest.raises(
+        OpenMLNotAuthorizedError,
+        match="The flow can not be deleted because it still has associated entities:",
+    ):
+        openml.flows.delete_flow(40_000)
+
+    expected_call_args = [
+        ("https://test.openml.org/api/v1/xml/flow/40000",),
+        {"params": {"api_key": test_api_key}},
+    ]
+    assert expected_call_args == list(mock_delete.call_args)
+
+
+@mock.patch.object(requests.Session, "delete")
+def test_delete_subflow(mock_delete, test_files_directory, test_api_key):
+    openml.config.start_using_configuration_for_example()
+    content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_is_subflow.xml"
+    mock_delete.return_value = create_request_response(
+        status_code=412, content_filepath=content_file
+    )
+
+    with pytest.raises(
+        OpenMLNotAuthorizedError,
+        match="The flow can not be deleted because it still has associated entities:",
+    ):
+        openml.flows.delete_flow(40_000)
+
+    expected_call_args = [
+        ("https://test.openml.org/api/v1/xml/flow/40000",),
+        {"params": {"api_key": test_api_key}},
+    ]
+    assert expected_call_args == list(mock_delete.call_args)
+
+
+@mock.patch.object(requests.Session, "delete")
+def test_delete_flow_success(mock_delete, test_files_directory, test_api_key):
+    openml.config.start_using_configuration_for_example()
+    content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_successful.xml"
+    mock_delete.return_value = create_request_response(
+        status_code=200, content_filepath=content_file
+    )
+
+    success = openml.flows.delete_flow(33364)
+    assert success
+
+    expected_call_args = [
+        ("https://test.openml.org/api/v1/xml/flow/33364",),
+        {"params": {"api_key": test_api_key}},
+    ]
+    assert expected_call_args == list(mock_delete.call_args)
+
+
+@mock.patch.object(requests.Session, "delete")
+def test_delete_unknown_flow(mock_delete, test_files_directory, test_api_key):
+    openml.config.start_using_configuration_for_example()
+    content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_not_exist.xml"
+    mock_delete.return_value = create_request_response(
+        status_code=412, content_filepath=content_file
+    )
+
+    with pytest.raises(
+        OpenMLServerException,
+        match="flow does not exist",
+    ):
+        openml.flows.delete_flow(9_999_999)
+
+    expected_call_args = [
+        ("https://test.openml.org/api/v1/xml/flow/9999999",),
+        {"params": {"api_key": test_api_key}},
+    ]
+    assert expected_call_args == list(mock_delete.call_args)
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index 520b7c0bc..91dd4ce5e 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -1,5 +1,4 @@
 # License: BSD 3-Clause
-
 import arff
 from distutils.version import LooseVersion
 import os
@@ -7,10 +6,11 @@
 import time
 import sys
 import ast
-import unittest.mock
+from unittest import mock
 
 import numpy as np
 import joblib
+import requests
 from joblib import parallel_backend
 
 import openml
@@ -23,13 +23,21 @@
 import pytest
 
 import openml.extensions.sklearn
-from openml.testing import TestBase, SimpleImputer, CustomImputer
+from openml.testing import TestBase, SimpleImputer, CustomImputer, create_request_response
 from openml.extensions.sklearn import cat, cont
-from openml.runs.functions import _run_task_get_arffcontent, run_exists, format_prediction
+from openml.runs.functions import (
+    _run_task_get_arffcontent,
+    run_exists,
+    format_prediction,
+    delete_run,
+)
 from openml.runs.trace import OpenMLRunTrace
 from openml.tasks import TaskType
 from openml.testing import check_task_existence
-from openml.exceptions import OpenMLServerException
+from openml.exceptions import (
+    OpenMLServerException,
+    OpenMLNotAuthorizedError,
+)
 
 from sklearn.naive_bayes import GaussianNB
 from sklearn.model_selection._search import BaseSearchCV
@@ -708,7 +716,7 @@ def get_ct_cf(nominal_indices, numeric_indices):
         LooseVersion(sklearn.__version__) < "0.20",
         reason="columntransformer introduction in 0.20.0",
     )
-    @unittest.mock.patch("warnings.warn")
+    @mock.patch("warnings.warn")
     def test_run_and_upload_knn_pipeline(self, warnings_mock):
 
         cat_imp = make_pipeline(
@@ -1672,7 +1680,7 @@ def test_format_prediction_task_regression(self):
         LooseVersion(sklearn.__version__) < "0.21",
         reason="couldn't perform local tests successfully w/o bloating RAM",
     )
-    @unittest.mock.patch("openml.extensions.sklearn.SklearnExtension._prevent_optimize_n_jobs")
+    @mock.patch("openml.extensions.sklearn.SklearnExtension._prevent_optimize_n_jobs")
     def test__run_task_get_arffcontent_2(self, parallel_mock):
         """Tests if a run executed in parallel is collated correctly."""
         task = openml.tasks.get_task(7)  # Supervised Classification on kr-vs-kp
@@ -1726,7 +1734,7 @@ def test__run_task_get_arffcontent_2(self, parallel_mock):
         LooseVersion(sklearn.__version__) < "0.21",
         reason="couldn't perform local tests successfully w/o bloating RAM",
     )
-    @unittest.mock.patch("openml.extensions.sklearn.SklearnExtension._prevent_optimize_n_jobs")
+    @mock.patch("openml.extensions.sklearn.SklearnExtension._prevent_optimize_n_jobs")
     def test_joblib_backends(self, parallel_mock):
         """Tests evaluation of a run using various joblib backends and n_jobs."""
         task = openml.tasks.get_task(7)  # Supervised Classification on kr-vs-kp
@@ -1777,3 +1785,82 @@ def test_joblib_backends(self, parallel_mock):
             self.assertEqual(len(res[2]["predictive_accuracy"][0]), 10)
             self.assertEqual(len(res[3]["predictive_accuracy"][0]), 10)
             self.assertEqual(parallel_mock.call_count, call_count)
+
+    @unittest.skipIf(
+        LooseVersion(sklearn.__version__) < "0.20",
+        reason="SimpleImputer doesn't handle mixed type DataFrame as input",
+    )
+    def test_delete_run(self):
+        rs = 1
+        clf = sklearn.pipeline.Pipeline(
+            steps=[("imputer", SimpleImputer()), ("estimator", DecisionTreeClassifier())]
+        )
+        task = openml.tasks.get_task(32)  # diabetes; crossvalidation
+
+        run = openml.runs.run_model_on_task(model=clf, task=task, seed=rs)
+        run.publish()
+        TestBase._mark_entity_for_removal("run", run.run_id)
+        TestBase.logger.info("collected from test_run_functions: {}".format(run.run_id))
+
+        _run_id = run.run_id
+        self.assertTrue(delete_run(_run_id))
+
+
+@mock.patch.object(requests.Session, "delete")
+def test_delete_run_not_owned(mock_delete, test_files_directory, test_api_key):
+    openml.config.start_using_configuration_for_example()
+    content_file = test_files_directory / "mock_responses" / "runs" / "run_delete_not_owned.xml"
+    mock_delete.return_value = create_request_response(
+        status_code=412, content_filepath=content_file
+    )
+
+    with pytest.raises(
+        OpenMLNotAuthorizedError,
+        match="The run can not be deleted because it was not uploaded by you.",
+    ):
+        openml.runs.delete_run(40_000)
+
+    expected_call_args = [
+        ("https://test.openml.org/api/v1/xml/run/40000",),
+        {"params": {"api_key": test_api_key}},
+    ]
+    assert expected_call_args == list(mock_delete.call_args)
+
+
+@mock.patch.object(requests.Session, "delete")
+def test_delete_run_success(mock_delete, test_files_directory, test_api_key):
+    openml.config.start_using_configuration_for_example()
+    content_file = test_files_directory / "mock_responses" / "runs" / "run_delete_successful.xml"
+    mock_delete.return_value = create_request_response(
+        status_code=200, content_filepath=content_file
+    )
+
+    success = openml.runs.delete_run(10591880)
+    assert success
+
+    expected_call_args = [
+        ("https://test.openml.org/api/v1/xml/run/10591880",),
+        {"params": {"api_key": test_api_key}},
+    ]
+    assert expected_call_args == list(mock_delete.call_args)
+
+
+@mock.patch.object(requests.Session, "delete")
+def test_delete_unknown_run(mock_delete, test_files_directory, test_api_key):
+    openml.config.start_using_configuration_for_example()
+    content_file = test_files_directory / "mock_responses" / "runs" / "run_delete_not_exist.xml"
+    mock_delete.return_value = create_request_response(
+        status_code=412, content_filepath=content_file
+    )
+
+    with pytest.raises(
+        OpenMLServerException,
+        match="Run does not exist",
+    ):
+        openml.runs.delete_run(9_999_999)
+
+    expected_call_args = [
+        ("https://test.openml.org/api/v1/xml/run/9999999",),
+        {"params": {"api_key": test_api_key}},
+    ]
+    assert expected_call_args == list(mock_delete.call_args)
diff --git a/tests/test_tasks/test_task_functions.py b/tests/test_tasks/test_task_functions.py
index be5b0c9bd..dde3561f4 100644
--- a/tests/test_tasks/test_task_functions.py
+++ b/tests/test_tasks/test_task_functions.py
@@ -3,10 +3,13 @@
 import os
 from unittest import mock
 
+import pytest
+import requests
+
 from openml.tasks import TaskType
-from openml.testing import TestBase
+from openml.testing import TestBase, create_request_response
 from openml import OpenMLSplit, OpenMLTask
-from openml.exceptions import OpenMLCacheException
+from openml.exceptions import OpenMLCacheException, OpenMLNotAuthorizedError, OpenMLServerException
 import openml
 import unittest
 import pandas as pd
@@ -253,3 +256,84 @@ def test_deletion_of_cache_dir(self):
         self.assertTrue(os.path.exists(tid_cache_dir))
         openml.utils._remove_cache_dir_for_id("tasks", tid_cache_dir)
         self.assertFalse(os.path.exists(tid_cache_dir))
+
+
+@mock.patch.object(requests.Session, "delete")
+def test_delete_task_not_owned(mock_delete, test_files_directory, test_api_key):
+    openml.config.start_using_configuration_for_example()
+    content_file = test_files_directory / "mock_responses" / "tasks" / "task_delete_not_owned.xml"
+    mock_delete.return_value = create_request_response(
+        status_code=412, content_filepath=content_file
+    )
+
+    with pytest.raises(
+        OpenMLNotAuthorizedError,
+        match="The task can not be deleted because it was not uploaded by you.",
+    ):
+        openml.tasks.delete_task(1)
+
+    expected_call_args = [
+        ("https://test.openml.org/api/v1/xml/task/1",),
+        {"params": {"api_key": test_api_key}},
+    ]
+    assert expected_call_args == list(mock_delete.call_args)
+
+
+@mock.patch.object(requests.Session, "delete")
+def test_delete_task_with_run(mock_delete, test_files_directory, test_api_key):
+    openml.config.start_using_configuration_for_example()
+    content_file = test_files_directory / "mock_responses" / "tasks" / "task_delete_has_runs.xml"
+    mock_delete.return_value = create_request_response(
+        status_code=412, content_filepath=content_file
+    )
+
+    with pytest.raises(
+        OpenMLNotAuthorizedError,
+        match="The task can not be deleted because it still has associated entities:",
+    ):
+        openml.tasks.delete_task(3496)
+
+    expected_call_args = [
+        ("https://test.openml.org/api/v1/xml/task/3496",),
+        {"params": {"api_key": test_api_key}},
+    ]
+    assert expected_call_args == list(mock_delete.call_args)
+
+
+@mock.patch.object(requests.Session, "delete")
+def test_delete_success(mock_delete, test_files_directory, test_api_key):
+    openml.config.start_using_configuration_for_example()
+    content_file = test_files_directory / "mock_responses" / "tasks" / "task_delete_successful.xml"
+    mock_delete.return_value = create_request_response(
+        status_code=200, content_filepath=content_file
+    )
+
+    success = openml.tasks.delete_task(361323)
+    assert success
+
+    expected_call_args = [
+        ("https://test.openml.org/api/v1/xml/task/361323",),
+        {"params": {"api_key": test_api_key}},
+    ]
+    assert expected_call_args == list(mock_delete.call_args)
+
+
+@mock.patch.object(requests.Session, "delete")
+def test_delete_unknown_task(mock_delete, test_files_directory, test_api_key):
+    openml.config.start_using_configuration_for_example()
+    content_file = test_files_directory / "mock_responses" / "tasks" / "task_delete_not_exist.xml"
+    mock_delete.return_value = create_request_response(
+        status_code=412, content_filepath=content_file
+    )
+
+    with pytest.raises(
+        OpenMLServerException,
+        match="Task does not exist",
+    ):
+        openml.tasks.delete_task(9_999_999)
+
+    expected_call_args = [
+        ("https://test.openml.org/api/v1/xml/task/9999999",),
+        {"params": {"api_key": test_api_key}},
+    ]
+    assert expected_call_args == list(mock_delete.call_args)

From 7127e9cd4312e422a8267fcd5410625579f6f39b Mon Sep 17 00:00:00 2001
From: Matthias Feurer <feurerm@informatik.uni-freiburg.de>
Date: Wed, 22 Mar 2023 10:02:24 +0100
Subject: [PATCH 53/53] Update changelog and version number for new release
 (#1230)

---
 doc/progress.rst      | 34 +++++++++++++++++++++-------------
 openml/__version__.py |  2 +-
 2 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/doc/progress.rst b/doc/progress.rst
index d981c09c0..6b58213e5 100644
--- a/doc/progress.rst
+++ b/doc/progress.rst
@@ -9,25 +9,33 @@ Changelog
 0.13.1
 ~~~~~~
 
- * Add new contributions here.
- * ADD#1028: Add functions to delete runs, flows, datasets, and tasks (e.g., ``openml.datasets.delete_dataset``).
- * ADD#1144: Add locally computed results to the ``OpenMLRun`` object's representation if the run was created locally and not downloaded from the server.
+ * ADD #1028: Add functions to delete runs, flows, datasets, and tasks (e.g., ``openml.datasets.delete_dataset``).
+ * ADD #1144: Add locally computed results to the ``OpenMLRun`` object's representation if the run was created locally and not downloaded from the server.
+ * ADD #1180: Improve the error message when the checksum of a downloaded dataset does not match the checksum provided by the API.
+ * ADD #1201: Make ``OpenMLTraceIteration`` a dataclass.
+ * DOC #1069: Add argument documentation for the ``OpenMLRun`` class.
  * FIX #1197 #559 #1131: Fix the order of ground truth and predictions in the ``OpenMLRun`` object and in ``format_prediction``.
  * FIX #1198: Support numpy 1.24 and higher.
+ * FIX #1216: Allow unknown task types on the server. This is only relevant when new task types are added to the test server.
+ * MAINT #1155: Add dependabot github action to automatically update other github actions.
+ * MAINT #1199: Obtain pre-commit's flake8 from github.com instead of gitlab.com.
+ * MAINT #1215: Support latest numpy version.
+ * MAINT #1218: Test Python3.6 on Ubuntu 20.04 instead of the latest Ubuntu (which is 22.04).
+ * MAINT #1221 #1212 #1206 #1211: Update github actions to the latest versions.
 
 0.13.0
 ~~~~~~
 
- * FIX#1030: ``pre-commit`` hooks now no longer should issue a warning.
- * FIX#1058, #1100: Avoid ``NoneType`` error when printing task without ``class_labels`` attribute.
- * FIX#1110: Make arguments to ``create_study`` and ``create_suite`` that are defined as optional by the OpenML XSD actually optional.
- * FIX#1147: ``openml.flow.flow_exists`` no longer requires an API key.
- * FIX#1184: Automatically resolve proxies when downloading from minio. Turn this off by setting environment variable ``no_proxy="*"``.
- * MAIN#1088: Do CI for Windows on Github Actions instead of Appveyor.
- * MAINT#1104: Fix outdated docstring for ``list_task``.
- * MAIN#1146: Update the pre-commit dependencies.
- * ADD#1103: Add a ``predictions`` property to OpenMLRun for easy accessibility of prediction data.
- * ADD#1188: EXPERIMENTAL. Allow downloading all files from a minio bucket with ``download_all_files=True`` for ``get_dataset``.
+ * FIX #1030: ``pre-commit`` hooks now no longer should issue a warning.
+ * FIX #1058, #1100: Avoid ``NoneType`` error when printing task without ``class_labels`` attribute.
+ * FIX #1110: Make arguments to ``create_study`` and ``create_suite`` that are defined as optional by the OpenML XSD actually optional.
+ * FIX #1147: ``openml.flow.flow_exists`` no longer requires an API key.
+ * FIX #1184: Automatically resolve proxies when downloading from minio. Turn this off by setting environment variable ``no_proxy="*"``.
+ * MAINT #1088: Do CI for Windows on Github Actions instead of Appveyor.
+ * MAINT #1104: Fix outdated docstring for ``list_task``.
+ * MAINT #1146: Update the pre-commit dependencies.
+ * ADD #1103: Add a ``predictions`` property to OpenMLRun for easy accessibility of prediction data.
+ * ADD #1188: EXPERIMENTAL. Allow downloading all files from a minio bucket with ``download_all_files=True`` for ``get_dataset``.
 
 
 0.12.2
diff --git a/openml/__version__.py b/openml/__version__.py
index c27a62daa..9c98e03c5 100644
--- a/openml/__version__.py
+++ b/openml/__version__.py
@@ -3,4 +3,4 @@
 # License: BSD 3-Clause
 
 # The following line *must* be the last in the module, exactly as formatted:
-__version__ = "0.13.1.dev"
+__version__ = "0.13.1"