diff --git a/.flake8 b/.flake8
index 211234f22..2d17eec10 100644
--- a/.flake8
+++ b/.flake8
@@ -5,7 +5,7 @@ select = C,E,F,W,B,T
 ignore = E203, E402, W503
 per-file-ignores =
     *__init__.py:F401
-    *cli.py:T001
+    *cli.py:T201
 exclude =
     venv
     examples
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 000000000..e5e5092a2
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,9 @@
+version: 2
+
+updates:
+  # This will check for updates to github actions every day
+  # https://docs.github.com/en/enterprise-server@3.4/code-security/dependabot/working-with-dependabot/keeping-your-actions-up-to-date-with-dependabot
+  - package-ecosystem: "github-actions"
+    directory: "/"
+    schedule:
+      interval: "daily"
diff --git a/.github/workflows/dist.yaml b/.github/workflows/dist.yaml
index 51ffe03d5..63641ae72 100644
--- a/.github/workflows/dist.yaml
+++ b/.github/workflows/dist.yaml
@@ -6,9 +6,9 @@ jobs:
   dist:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
     - name: Setup Python
-      uses: actions/setup-python@v2
+      uses: actions/setup-python@v4
       with:
         python-version: 3.8
     - name: Build dist
diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
index c14bd07d0..e601176b3 100644
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -5,14 +5,17 @@ jobs:
   build-and-deploy:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
     - name: Setup Python
-      uses: actions/setup-python@v2
+      uses: actions/setup-python@v4
       with:
         python-version: 3.8
     - name: Install dependencies
       run: |
         pip install -e .[docs,examples,examples_unix]
+        # dependency "fanova" does not work with numpy 1.24 or later
+        # https://github.com/automl/fanova/issues/108
+        pip install numpy==1.23.5
     - name: Make docs
       run: |
         cd doc
diff --git a/.github/workflows/pre-commit.yaml b/.github/workflows/pre-commit.yaml
index 6132b2de2..45e4f1bd0 100644
--- a/.github/workflows/pre-commit.yaml
+++ b/.github/workflows/pre-commit.yaml
@@ -6,9 +6,9 @@ jobs:
   run-all-files:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
     - name: Setup Python 3.7
-      uses: actions/setup-python@v2
+      uses: actions/setup-python@v4
       with:
         python-version: 3.7
     - name: Install pre-commit
diff --git a/.github/workflows/release_docker.yaml b/.github/workflows/release_docker.yaml
index c4522c0be..6ceb1d060 100644
--- a/.github/workflows/release_docker.yaml
+++ b/.github/workflows/release_docker.yaml
@@ -3,29 +3,46 @@ name: release-docker
 on:
   push:
     branches:
+      - 'main'
       - 'develop'
       - 'docker'
 
 jobs:
+
   docker:
+
     runs-on: ubuntu-latest
+
     steps:
       - name: Set up QEMU
-        uses: docker/setup-qemu-action@v1
+        uses: docker/setup-qemu-action@v2
+
       - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v1
+        uses: docker/setup-buildx-action@v2
+
       - name: Login to DockerHub
-        uses: docker/login-action@v1
+        uses: docker/login-action@v2
         with:
           username: ${{ secrets.DOCKERHUB_USERNAME }}
           password: ${{ secrets.DOCKERHUB_TOKEN }}
-      - uses: actions/checkout@v2
+
+      - name: Check out the repo
+        uses: actions/checkout@v3
+
+      - name: Extract metadata (tags, labels) for Docker Hub
+        id: meta_dockerhub
+        uses: docker/metadata-action@v4
+        with:
+          images: "openml/openml-python"
+
       - name: Build and push
         id: docker_build
-        uses: docker/build-push-action@v2
+        uses: docker/build-push-action@v4
         with:
           context: ./docker/
           push: true
-          tags: openml/openml-python:latest
+          tags: ${{ steps.meta_dockerhub.outputs.tags }}
+          labels: ${{ steps.meta_dockerhub.outputs.labels }}
+          
       - name: Image digest
         run: echo ${{ steps.docker_build.outputs.digest }}
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
new file mode 100644
index 000000000..cc38aebb2
--- /dev/null
+++ b/.github/workflows/test.yml
@@ -0,0 +1,117 @@
+name: Tests
+
+on: [push, pull_request]
+
+jobs:
+  test:
+    name: (${{ matrix.os }}, Py${{ matrix.python-version }}, sk${{ matrix.scikit-learn }}, sk-only:${{ matrix.sklearn-only }})
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        python-version: [3.7, 3.8]
+        scikit-learn: [0.21.2, 0.22.2, 0.23.1, 0.24]
+        os: [ubuntu-latest]
+        sklearn-only: ['true']
+        exclude:  # no scikit-learn 0.21.2 release for Python 3.8
+          - python-version: 3.8
+            scikit-learn: 0.21.2
+        include:
+          - python-version: 3.6
+            scikit-learn: 0.18.2
+            scipy: 1.2.0
+            os: ubuntu-20.04 
+            sklearn-only: 'true'
+          - python-version: 3.6
+            scikit-learn: 0.19.2
+            os: ubuntu-20.04 
+            sklearn-only: 'true'
+          - python-version: 3.6
+            scikit-learn: 0.20.2
+            os: ubuntu-20.04 
+            sklearn-only: 'true'
+          - python-version: 3.6
+            scikit-learn: 0.21.2
+            os: ubuntu-20.04 
+            sklearn-only: 'true'
+          - python-version: 3.6
+            scikit-learn: 0.22.2
+            os: ubuntu-20.04 
+            sklearn-only: 'true'
+          - python-version: 3.6
+            scikit-learn: 0.23.1
+            os: ubuntu-20.04 
+            sklearn-only: 'true'
+          - python-version: 3.6
+            scikit-learn: 0.24
+            os: ubuntu-20.04 
+            sklearn-only: 'true'
+          - python-version: 3.8
+            scikit-learn: 0.23.1
+            code-cov: true
+            sklearn-only: 'false'
+            os: ubuntu-latest
+          - os: windows-latest
+            sklearn-only: 'false'
+            scikit-learn: 0.24.*
+      fail-fast:  false
+      max-parallel: 4
+
+    steps:
+    - uses: actions/checkout@v3
+      with:
+        fetch-depth: 2
+    - name: Setup Python ${{ matrix.python-version }}
+      if: matrix.os != 'windows-latest'  # windows-latest only uses preinstalled Python (3.7.9)
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install test dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -e .[test]
+    - name: Install scikit-learn ${{ matrix.scikit-learn }}
+      run: |
+        pip install scikit-learn==${{ matrix.scikit-learn }}
+    - name: Install numpy for Python 3.8
+      # Python 3.8 & scikit-learn<0.24 requires numpy<=1.23.5
+      if: ${{ matrix.python-version == '3.8' && contains(fromJSON('["0.23.1", "0.22.2", "0.21.2"]'), matrix.scikit-learn) }}
+      run: |
+        pip install numpy==1.23.5
+    - name: Install scipy ${{ matrix.scipy }}
+      if: ${{ matrix.scipy }}
+      run: |
+        pip install scipy==${{ matrix.scipy }}
+    - name: Store repository status
+      id: status-before
+      run: |
+        echo "::set-output name=BEFORE::$(git status --porcelain -b)"
+    - name: Run tests on Ubuntu
+      if: matrix.os == 'ubuntu-latest'
+      run: |
+        if [ ${{ matrix.code-cov }} ]; then codecov='--cov=openml --long  --cov-report=xml'; fi
+        # Most of the time, running only the scikit-learn tests is sufficient
+        if [ ${{ matrix.sklearn-only }} = 'true' ]; then sklearn='-m sklearn'; fi
+        echo pytest -n 4 --durations=20 --timeout=600 --timeout-method=thread --dist load -sv $codecov $sklearn --reruns 5 --reruns-delay 1 -o log_cli=true
+        pytest -n 4 --durations=20 --timeout=600 --timeout-method=thread --dist load -sv $codecov $sklearn --reruns 5 --reruns-delay 1 -o log_cli=true
+    - name: Run tests on Windows
+      if: matrix.os == 'windows-latest'
+      run: |  # we need a separate step because of the bash-specific if-statement in the previous one.
+        pytest -n 4 --durations=20 --timeout=600 --timeout-method=thread --dist load -sv --reruns 5 --reruns-delay 1
+    - name: Check for files left behind by test
+      if: matrix.os != 'windows-latest' && always()
+      run: |
+        before="${{ steps.status-before.outputs.BEFORE }}"
+        after="$(git status --porcelain -b)"
+        if [[ "$before" != "$after" ]]; then
+            echo "git status from before: $before"
+            echo "git status from after: $after"
+            echo "Not all generated files have been deleted!"
+            exit 1
+        fi
+    - name: Upload coverage
+      if: matrix.code-cov && always()
+      uses: codecov/codecov-action@v3
+      with:
+        files: coverage.xml
+        fail_ci_if_error: true
+        verbose: true
diff --git a/.github/workflows/ubuntu-test.yml b/.github/workflows/ubuntu-test.yml
deleted file mode 100644
index 41cc155ac..000000000
--- a/.github/workflows/ubuntu-test.yml
+++ /dev/null
@@ -1,74 +0,0 @@
-name: Tests
-
-on: [push, pull_request]
-
-jobs:
-  ubuntu:
-
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        python-version: [3.6, 3.7, 3.8]
-        scikit-learn: [0.21.2, 0.22.2, 0.23.1, 0.24]
-        exclude:  # no scikit-learn 0.21.2 release for Python 3.8
-          - python-version: 3.8
-            scikit-learn: 0.21.2
-        include:
-          - python-version: 3.6
-            scikit-learn: 0.18.2
-            scipy: 1.2.0
-          - python-version: 3.6
-            scikit-learn: 0.19.2
-          - python-version: 3.6
-            scikit-learn: 0.20.2
-          - python-version: 3.8
-            scikit-learn: 0.23.1
-            code-cov: true
-      fail-fast:  false
-      max-parallel: 4
-
-    steps:
-    - uses: actions/checkout@v2
-      with:
-        fetch-depth: 2
-    - name: Setup Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v2
-      with:
-        python-version: ${{ matrix.python-version }}
-    - name: Install test dependencies
-      run: |
-        python -m pip install --upgrade pip
-        pip install -e .[test]
-    - name: Install scikit-learn ${{ matrix.scikit-learn }}
-      run: |
-        pip install scikit-learn==${{ matrix.scikit-learn }}
-    - name: Install scipy ${{ matrix.scipy }}
-      if: ${{ matrix.scipy }}
-      run: |
-        pip install scipy==${{ matrix.scipy }}
-    - name: Store repository status
-      id: status-before
-      run: |
-        echo "::set-output name=BEFORE::$(git status --porcelain -b)"
-    - name: Run tests
-      run: |
-        if [ ${{ matrix.code-cov }} ]; then codecov='--cov=openml --long  --cov-report=xml'; fi
-        pytest -n 4 --durations=20 --timeout=600 --timeout-method=thread --dist load -sv $codecov --reruns 5 --reruns-delay 1
-    - name: Check for files left behind by test
-      if: ${{ always() }}
-      run: |
-        before="${{ steps.status-before.outputs.BEFORE }}"
-        after="$(git status --porcelain -b)"
-        if [[ "$before" != "$after" ]]; then
-            echo "git status from before: $before"
-            echo "git status from after: $after"
-            echo "Not all generated files have been deleted!"
-            exit 1
-        fi
-    - name: Upload coverage
-      if: matrix.code-cov && always()
-      uses: codecov/codecov-action@v1
-      with:
-        files: coverage.xml
-        fail_ci_if_error: true
-        verbose: true
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 3e5102233..060db33be 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,8 @@
 *~
 doc/generated
 examples/.ipynb_checkpoints
+venv
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
@@ -75,6 +77,7 @@ target/
 # IDE
 .idea
 *.swp
+.vscode
 
 # MYPY
 .mypy_cache
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index b3a1d2aba..05bac7967 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,28 +1,34 @@
 repos:
   - repo: https://github.com/psf/black
-    rev: 19.10b0
+    rev: 22.6.0
     hooks:
       - id: black
         args: [--line-length=100]
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v0.761
+    rev: v0.961
     hooks:
       - id: mypy
         name: mypy openml
-        files: openml/*
+        files: openml/.*
+        additional_dependencies:
+          - types-requests
+          - types-python-dateutil
       - id: mypy
         name: mypy tests
-        files: tests/*
-  - repo: https://gitlab.com/pycqa/flake8
-    rev: 3.8.3
+        files: tests/.*
+        additional_dependencies:
+          - types-requests
+          - types-python-dateutil
+  - repo: https://github.com/pycqa/flake8
+    rev: 4.0.1
     hooks:
       - id: flake8
         name: flake8 openml
-        files: openml/*
+        files: openml/.*
         additional_dependencies:
-          - flake8-print==3.1.4
+          - flake8-print==5.0.0
       - id: flake8
         name: flake8 tests
-        files: tests/*
+        files: tests/.*
         additional_dependencies:
-          - flake8-print==3.1.4
+          - flake8-print==5.0.0
diff --git a/CITATION.cff b/CITATION.cff
new file mode 100644
index 000000000..c5454ef6f
--- /dev/null
+++ b/CITATION.cff
@@ -0,0 +1,40 @@
+cff-version: 1.2.0
+message: "If you use this software in a publication, please cite the metadata from preferred-citation."
+preferred-citation:
+  type: article
+  authors:
+  - family-names: "Feurer"
+    given-names: "Matthias"
+    orcid: "https://orcid.org/0000-0001-9611-8588"
+  - family-names: "van Rijn"
+    given-names: "Jan N."
+    orcid: "https://orcid.org/0000-0003-2898-2168"
+  - family-names: "Kadra"
+    given-names: "Arlind"
+  - family-names: "Gijsbers"
+    given-names: "Pieter"
+    orcid: "https://orcid.org/0000-0001-7346-8075"
+  - family-names: "Mallik"
+    given-names: "Neeratyoy"
+    orcid: "https://orcid.org/0000-0002-0598-1608"
+  - family-names: "Ravi"
+    given-names: "Sahithya"
+  - family-names: "Müller"
+    given-names: "Andreas"
+    orcid: "https://orcid.org/0000-0002-2349-9428"
+  - family-names: "Vanschoren"
+    given-names: "Joaquin"
+    orcid: "https://orcid.org/0000-0001-7044-9805"
+  - family-names: "Hutter"
+    given-names: "Frank"
+    orcid: "https://orcid.org/0000-0002-2037-3694"
+  journal: "Journal of Machine Learning Research"
+  title: "OpenML-Python: an extensible Python API for OpenML"
+  abstract: "OpenML is an online platform for open science collaboration in machine learning, used to share datasets and results of machine learning experiments. In this paper, we introduce OpenML-Python, a client API for Python, which opens up the OpenML platform for a wide range of Python-based machine learning tools. It provides easy access to all datasets, tasks and experiments on OpenML from within Python. It also provides functionality to conduct machine learning experiments, upload the results to OpenML, and reproduce results which are stored on OpenML. Furthermore, it comes with a scikit-learn extension and an extension mechanism to easily integrate other machine learning libraries written in Python into the OpenML ecosystem. Source code and documentation are available at https://github.com/openml/openml-python/."
+  volume: 22
+  year: 2021
+  start: 1
+  end: 5
+  pages: 5
+  number: 100
+  url: https://jmlr.org/papers/v22/19-920.html
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 688dbd7a9..87c8ae3c6 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -153,7 +153,8 @@ following rules before you submit a pull request:
    
 - Add [unit tests](https://github.com/openml/openml-python/tree/develop/tests) and [examples](https://github.com/openml/openml-python/tree/develop/examples) for any new functionality being introduced. 
     - If an unit test contains an upload to the test server, please ensure that it is followed by a file collection for deletion, to prevent the test server from bulking up. For example, `TestBase._mark_entity_for_removal('data', dataset.dataset_id)`, `TestBase._mark_entity_for_removal('flow', (flow.flow_id, flow.name))`.
-    - Please ensure that the example is run on the test server by beginning with the call to `openml.config.start_using_configuration_for_example()`.      
+    - Please ensure that the example is run on the test server by beginning with the call to `openml.config.start_using_configuration_for_example()`.
+    - Add the `@pytest.mark.sklearn` marker to your unit tests if they have a dependency on scikit-learn.
 
 -  All tests pass when running `pytest`. On
    Unix-like systems, check with (from the toplevel source folder):
diff --git a/appveyor.yml b/appveyor.yml
deleted file mode 100644
index e3fa74aaf..000000000
--- a/appveyor.yml
+++ /dev/null
@@ -1,48 +0,0 @@
-clone_folder: C:\\projects\\openml-python
-
-environment:
-# global:
-#     CMD_IN_ENV: "cmd /E:ON /V:ON /C .\\appveyor\\scikit-learn-contrib\\run_with_env.cmd"
-
- matrix:
-    - PYTHON: "C:\\Python3-x64"
-      PYTHON_VERSION: "3.6"
-      PYTHON_ARCH: "64"
-      MINICONDA: "C:\\Miniconda36-x64"
-
-matrix:
-    fast_finish: true
-
-
-install:
-  # Miniconda is pre-installed in the worker build
-  - "SET PATH=%MINICONDA%;%MINICONDA%\\Scripts;%PATH%"
-  - "python -m pip install -U pip"
-
-  # Check that we have the expected version and architecture for Python
-  - "python --version"
-  - "python -c \"import struct; print(struct.calcsize('P') * 8)\""
-  - "pip --version"
-
-  # Remove cygwin because it clashes with conda
-  # see http://help.appveyor.com/discussions/problems/3712-git-remote-https-seems-to-be-broken
-  - rmdir C:\\cygwin /s /q
-
-  # Update previous packages and install the build and runtime dependencies of the project.
-  - conda update conda --yes
-  - conda update --all --yes
-
-  # Install the build and runtime dependencies of the project.
-  - "cd C:\\projects\\openml-python"
-  - "pip install .[examples,test]"
-  - "pip install scikit-learn==0.21"
-  # Uninstall coverage, as it leads to an error on appveyor
-  - "pip uninstall -y pytest-cov"
-
-
-# Not a .NET project, we build scikit-learn in the install step instead
-build: false
-
-test_script:
-  - "cd C:\\projects\\openml-python"
-  - "%CMD_IN_ENV% pytest -n 4 --timeout=600 --timeout-method=thread --dist load -sv"
diff --git a/appveyor/run_with_env.cmd b/appveyor/run_with_env.cmd
deleted file mode 100644
index 5da547c49..000000000
--- a/appveyor/run_with_env.cmd
+++ /dev/null
@@ -1,88 +0,0 @@
-:: To build extensions for 64 bit Python 3, we need to configure environment
-:: variables to use the MSVC 2010 C++ compilers from GRMSDKX_EN_DVD.iso of:
-:: MS Windows SDK for Windows 7 and .NET Framework 4 (SDK v7.1)
-::
-:: To build extensions for 64 bit Python 2, we need to configure environment
-:: variables to use the MSVC 2008 C++ compilers from GRMSDKX_EN_DVD.iso of:
-:: MS Windows SDK for Windows 7 and .NET Framework 3.5 (SDK v7.0)
-::
-:: 32 bit builds, and 64-bit builds for 3.5 and beyond, do not require specific
-:: environment configurations.
-::
-:: Note: this script needs to be run with the /E:ON and /V:ON flags for the
-:: cmd interpreter, at least for (SDK v7.0)
-::
-:: More details at:
-:: https://github.com/cython/cython/wiki/64BitCythonExtensionsOnWindows
-:: http://stackoverflow.com/a/13751649/163740
-::
-:: Author: Olivier Grisel
-:: License: CC0 1.0 Universal: http://creativecommons.org/publicdomain/zero/1.0/
-::
-:: Notes about batch files for Python people:
-::
-:: Quotes in values are literally part of the values:
-::      SET FOO="bar"
-:: FOO is now five characters long: " b a r "
-:: If you don't want quotes, don't include them on the right-hand side.
-::
-:: The CALL lines at the end of this file look redundant, but if you move them
-:: outside of the IF clauses, they do not run properly in the SET_SDK_64==Y
-:: case, I don't know why.
-@ECHO OFF
-
-SET COMMAND_TO_RUN=%*
-SET WIN_SDK_ROOT=C:\Program Files\Microsoft SDKs\Windows
-SET WIN_WDK=c:\Program Files (x86)\Windows Kits\10\Include\wdf
-
-:: Extract the major and minor versions, and allow for the minor version to be
-:: more than 9.  This requires the version number to have two dots in it.
-SET MAJOR_PYTHON_VERSION=%PYTHON_VERSION:~0,1%
-IF "%PYTHON_VERSION:~3,1%" == "." (
-    SET MINOR_PYTHON_VERSION=%PYTHON_VERSION:~2,1%
-) ELSE (
-    SET MINOR_PYTHON_VERSION=%PYTHON_VERSION:~2,2%
-)
-
-:: Based on the Python version, determine what SDK version to use, and whether
-:: to set the SDK for 64-bit.
-IF %MAJOR_PYTHON_VERSION% == 2 (
-    SET WINDOWS_SDK_VERSION="v7.0"
-    SET SET_SDK_64=Y
-) ELSE (
-    IF %MAJOR_PYTHON_VERSION% == 3 (
-        SET WINDOWS_SDK_VERSION="v7.1"
-        IF %MINOR_PYTHON_VERSION% LEQ 4 (
-            SET SET_SDK_64=Y
-        ) ELSE (
-            SET SET_SDK_64=N
-            IF EXIST "%WIN_WDK%" (
-                :: See: https://connect.microsoft.com/VisualStudio/feedback/details/1610302/
-                REN "%WIN_WDK%" 0wdf
-            )
-        )
-    ) ELSE (
-        ECHO Unsupported Python version: "%MAJOR_PYTHON_VERSION%"
-        EXIT 1
-    )
-)
-
-IF %PYTHON_ARCH% == 64 (
-    IF %SET_SDK_64% == Y (
-        ECHO Configuring Windows SDK %WINDOWS_SDK_VERSION% for Python %MAJOR_PYTHON_VERSION% on a 64 bit architecture
-        SET DISTUTILS_USE_SDK=1
-        SET MSSdk=1
-        "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Setup\WindowsSdkVer.exe" -q -version:%WINDOWS_SDK_VERSION%
-        "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Bin\SetEnv.cmd" /x64 /release
-        ECHO Executing: %COMMAND_TO_RUN%
-        call %COMMAND_TO_RUN% || EXIT 1
-    ) ELSE (
-        ECHO Using default MSVC build environment for 64 bit architecture
-        ECHO Executing: %COMMAND_TO_RUN%
-        call %COMMAND_TO_RUN% || EXIT 1
-    )
-) ELSE (
-    ECHO Using default MSVC build environment for 32 bit architecture
-    ECHO Executing: %COMMAND_TO_RUN%
-    call %COMMAND_TO_RUN% || EXIT 1
-)
diff --git a/doc/api.rst b/doc/api.rst
index 86bfd121e..288bf66fb 100644
--- a/doc/api.rst
+++ b/doc/api.rst
@@ -38,6 +38,7 @@ Dataset Functions
     attributes_arff_from_df
     check_datasets_active
     create_dataset
+    delete_dataset
     get_dataset
     get_datasets
     list_datasets
@@ -103,6 +104,7 @@ Flow Functions
    :template: function.rst
 
     assert_flows_equal
+    delete_flow
     flow_exists
     get_flow
     list_flows
@@ -133,6 +135,7 @@ Run Functions
    :toctree: generated/
    :template: function.rst
 
+    delete_run
     get_run
     get_runs
     get_run_trace
@@ -251,6 +254,7 @@ Task Functions
    :template: function.rst
 
     create_task
+    delete_task
     get_task
     get_tasks
     list_tasks
diff --git a/doc/contributing.rst b/doc/contributing.rst
index f710f8a71..e8d537338 100644
--- a/doc/contributing.rst
+++ b/doc/contributing.rst
@@ -23,6 +23,6 @@ In particular, a few ways to contribute to openml-python are:
 
  * `Cite OpenML <https://www.openml.org/cite>`_ if you use it in a scientific publication.
 
- * Visit one of our `hackathons <https://meet.openml.org/>`_.
+ * Visit one of our `hackathons <https://www.openml.org/meet>`_.
 
  * Contribute to another OpenML project, such as `the main OpenML project <https://github.com/openml/OpenML/blob/master/CONTRIBUTING.md>`_.
diff --git a/doc/index.rst b/doc/index.rst
index b0140c1d0..b8856e83b 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -40,7 +40,7 @@ Example
     run.publish()
     print(f'View the run online: {run.openml_url}')
 
-You can find more examples in our :ref:`sphx_glr_examples`.
+You can find more examples in our :ref:`examples-index`.
 
 ----------------------------
 How to get OpenML for python
@@ -60,7 +60,7 @@ Content
 
 * :ref:`usage`
 * :ref:`api`
-* :ref:`sphx_glr_examples`
+* :ref:`examples-index`
 * :ref:`extensions`
 * :ref:`contributing`
 * :ref:`progress`
diff --git a/doc/progress.rst b/doc/progress.rst
index b0c182e05..6b58213e5 100644
--- a/doc/progress.rst
+++ b/doc/progress.rst
@@ -6,6 +6,38 @@
 Changelog
 =========
 
+0.13.1
+~~~~~~
+
+ * ADD #1028: Add functions to delete runs, flows, datasets, and tasks (e.g., ``openml.datasets.delete_dataset``).
+ * ADD #1144: Add locally computed results to the ``OpenMLRun`` object's representation if the run was created locally and not downloaded from the server.
+ * ADD #1180: Improve the error message when the checksum of a downloaded dataset does not match the checksum provided by the API.
+ * ADD #1201: Make ``OpenMLTraceIteration`` a dataclass.
+ * DOC #1069: Add argument documentation for the ``OpenMLRun`` class.
+ * FIX #1197 #559 #1131: Fix the order of ground truth and predictions in the ``OpenMLRun`` object and in ``format_prediction``.
+ * FIX #1198: Support numpy 1.24 and higher.
+ * FIX #1216: Allow unknown task types on the server. This is only relevant when new task types are added to the test server.
+ * MAINT #1155: Add dependabot github action to automatically update other github actions.
+ * MAINT #1199: Obtain pre-commit's flake8 from github.com instead of gitlab.com.
+ * MAINT #1215: Support latest numpy version.
+ * MAINT #1218: Test Python3.6 on Ubuntu 20.04 instead of the latest Ubuntu (which is 22.04).
+ * MAINT #1221 #1212 #1206 #1211: Update github actions to the latest versions.
+
+0.13.0
+~~~~~~
+
+ * FIX #1030: ``pre-commit`` hooks now no longer should issue a warning.
+ * FIX #1058, #1100: Avoid ``NoneType`` error when printing task without ``class_labels`` attribute.
+ * FIX #1110: Make arguments to ``create_study`` and ``create_suite`` that are defined as optional by the OpenML XSD actually optional.
+ * FIX #1147: ``openml.flow.flow_exists`` no longer requires an API key.
+ * FIX #1184: Automatically resolve proxies when downloading from minio. Turn this off by setting environment variable ``no_proxy="*"``.
+ * MAINT #1088: Do CI for Windows on Github Actions instead of Appveyor.
+ * MAINT #1104: Fix outdated docstring for ``list_task``.
+ * MAINT #1146: Update the pre-commit dependencies.
+ * ADD #1103: Add a ``predictions`` property to OpenMLRun for easy accessibility of prediction data.
+ * ADD #1188: EXPERIMENTAL. Allow downloading all files from a minio bucket with ``download_all_files=True`` for ``get_dataset``.
+
+
 0.12.2
 ~~~~~~
 
diff --git a/doc/usage.rst b/doc/usage.rst
index dd85d989c..8c713b586 100644
--- a/doc/usage.rst
+++ b/doc/usage.rst
@@ -29,15 +29,18 @@ machine learning algorithms on them and then share the results online.
 The following tutorial gives a short introduction on how to install and set up
 the OpenML Python connector, followed up by a simple example.
 
-* `:ref:`sphx_glr_examples_20_basic_introduction_tutorial.py`
+* :ref:`sphx_glr_examples_20_basic_introduction_tutorial.py`
 
 ~~~~~~~~~~~~~
 Configuration
 ~~~~~~~~~~~~~
 
-The configuration file resides in a directory ``.openml`` in the home
-directory of the user and is called config. It consists of ``key = value`` pairs
-which are separated by newlines. The following keys are defined:
+The configuration file resides in a directory ``.config/openml`` in the home
+directory of the user and is called config (More specifically, it resides in the
+`configuration directory specified by the XDGB Base Directory Specification
+<https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html>`_).
+It consists of ``key = value`` pairs which are separated by newlines.
+The following keys are defined:
 
 * apikey:
     * required to access the server. The :ref:`sphx_glr_examples_20_basic_introduction_tutorial.py`
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 5fcc16e34..c27abba40 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -1,6 +1,6 @@
 # Dockerfile to build an image with preinstalled dependencies
 # Useful building docs or running unix tests from a Windows host.
-FROM python:3
+FROM python:3.10
 
 RUN git clone  https://github.com/openml/openml-python.git omlp
 WORKDIR omlp
diff --git a/examples/30_extended/custom_flow_.py b/examples/30_extended/custom_flow_.py
index ae5f37631..513d445ba 100644
--- a/examples/30_extended/custom_flow_.py
+++ b/examples/30_extended/custom_flow_.py
@@ -85,7 +85,9 @@
 # but that does not matter for this demonstration.
 
 autosklearn_flow = openml.flows.get_flow(9313)  # auto-sklearn 0.5.1
-subflow = dict(components=OrderedDict(automl_tool=autosklearn_flow),)
+subflow = dict(
+    components=OrderedDict(automl_tool=autosklearn_flow),
+)
 
 ####################################################################################################
 # With all parameters of the flow defined, we can now initialize the OpenMLFlow and publish.
@@ -98,7 +100,10 @@
 # the model of the flow to `None`.
 
 autosklearn_amlb_flow = openml.flows.OpenMLFlow(
-    **general, **flow_hyperparameters, **subflow, model=None,
+    **general,
+    **flow_hyperparameters,
+    **subflow,
+    model=None,
 )
 autosklearn_amlb_flow.publish()
 print(f"autosklearn flow created: {autosklearn_amlb_flow.flow_id}")
diff --git a/examples/30_extended/fetch_evaluations_tutorial.py b/examples/30_extended/fetch_evaluations_tutorial.py
index 2823eabf3..86302e2d1 100644
--- a/examples/30_extended/fetch_evaluations_tutorial.py
+++ b/examples/30_extended/fetch_evaluations_tutorial.py
@@ -90,9 +90,9 @@ def plot_cdf(values, metric="predictive_accuracy"):
     plt.title("CDF")
     plt.xlabel(metric)
     plt.ylabel("Likelihood")
-    plt.grid(b=True, which="major", linestyle="-")
+    plt.grid(visible=True, which="major", linestyle="-")
     plt.minorticks_on()
-    plt.grid(b=True, which="minor", linestyle="--")
+    plt.grid(visible=True, which="minor", linestyle="--")
     plt.axvline(max_val, linestyle="--", color="gray")
     plt.text(max_val, 0, "%.3f" % max_val, fontsize=9)
     plt.show()
diff --git a/examples/30_extended/fetch_runtimes_tutorial.py b/examples/30_extended/fetch_runtimes_tutorial.py
index 3d5183613..1a6e5117f 100644
--- a/examples/30_extended/fetch_runtimes_tutorial.py
+++ b/examples/30_extended/fetch_runtimes_tutorial.py
@@ -72,7 +72,10 @@
 n_repeats, n_folds, n_samples = task.get_split_dimensions()
 print(
     "Task {}: number of repeats: {}, number of folds: {}, number of samples {}.".format(
-        task_id, n_repeats, n_folds, n_samples,
+        task_id,
+        n_repeats,
+        n_folds,
+        n_samples,
     )
 )
 
@@ -97,7 +100,10 @@ def print_compare_runtimes(measures):
 clf = RandomForestClassifier(n_estimators=10)
 
 run1 = openml.runs.run_model_on_task(
-    model=clf, task=task, upload_flow=False, avoid_duplicate_runs=False,
+    model=clf,
+    task=task,
+    upload_flow=False,
+    avoid_duplicate_runs=False,
 )
 measures = run1.fold_evaluations
 
@@ -402,7 +408,7 @@ def get_incumbent_trace(trace):
 ################################################################################
 # Running a Neural Network from scikit-learn that uses scikit-learn independent
 # parallelism using libraries such as `MKL, OpenBLAS or BLIS
-# <https://scikit-learn.org/stable/computing/parallelism.html#parallel-numpy-routines-from-numerical-libraries>`_.
+# <https://scikit-learn.org/stable/computing/parallelism.html#parallel-numpy-and-scipy-routines-from-numerical-libraries>`_.
 
 mlp = MLPClassifier(max_iter=10)
 
diff --git a/examples/30_extended/flows_and_runs_tutorial.py b/examples/30_extended/flows_and_runs_tutorial.py
index 714ce7b55..05b8c8cce 100644
--- a/examples/30_extended/flows_and_runs_tutorial.py
+++ b/examples/30_extended/flows_and_runs_tutorial.py
@@ -176,7 +176,11 @@
 
 # The following lines can then be executed offline:
 run = openml.runs.run_model_on_task(
-    pipe, task, avoid_duplicate_runs=False, upload_flow=False, dataset_format="array",
+    pipe,
+    task,
+    avoid_duplicate_runs=False,
+    upload_flow=False,
+    dataset_format="array",
 )
 
 # The run may be stored offline, and the flow will be stored along with it:
diff --git a/examples/30_extended/run_setup_tutorial.py b/examples/30_extended/run_setup_tutorial.py
index 1bb123aad..a2bc3a4df 100644
--- a/examples/30_extended/run_setup_tutorial.py
+++ b/examples/30_extended/run_setup_tutorial.py
@@ -57,10 +57,18 @@
 # easy as you want it to be
 
 
-cat_imp = make_pipeline(OneHotEncoder(handle_unknown="ignore", sparse=False), TruncatedSVD(),)
+cat_imp = make_pipeline(
+    OneHotEncoder(handle_unknown="ignore", sparse=False),
+    TruncatedSVD(),
+)
 cont_imp = SimpleImputer(strategy="median")
 ct = ColumnTransformer([("cat", cat_imp, cat), ("cont", cont_imp, cont)])
-model_original = Pipeline(steps=[("transform", ct), ("estimator", RandomForestClassifier()),])
+model_original = Pipeline(
+    steps=[
+        ("transform", ct),
+        ("estimator", RandomForestClassifier()),
+    ]
+)
 
 # Let's change some hyperparameters. Of course, in any good application we
 # would tune them using, e.g., Random Search or Bayesian Optimization, but for
diff --git a/examples/30_extended/study_tutorial.py b/examples/30_extended/study_tutorial.py
index b66c49096..d5bfcd88a 100644
--- a/examples/30_extended/study_tutorial.py
+++ b/examples/30_extended/study_tutorial.py
@@ -51,7 +51,9 @@
 # And we can use the evaluation listing functionality to learn more about
 # the evaluations available for the conducted runs:
 evaluations = openml.evaluations.list_evaluations(
-    function="predictive_accuracy", output_format="dataframe", study=study.study_id,
+    function="predictive_accuracy",
+    output_format="dataframe",
+    study=study.study_id,
 )
 print(evaluations.head())
 
diff --git a/examples/30_extended/task_manual_iteration_tutorial.py b/examples/30_extended/task_manual_iteration_tutorial.py
index c30ff66a3..676a742a1 100644
--- a/examples/30_extended/task_manual_iteration_tutorial.py
+++ b/examples/30_extended/task_manual_iteration_tutorial.py
@@ -44,7 +44,10 @@
 
 print(
     "Task {}: number of repeats: {}, number of folds: {}, number of samples {}.".format(
-        task_id, n_repeats, n_folds, n_samples,
+        task_id,
+        n_repeats,
+        n_folds,
+        n_samples,
     )
 )
 
@@ -53,7 +56,11 @@
 # samples (indexing is zero-based). Usually, one would loop over all repeats, folds and sample
 # sizes, but we can neglect this here as there is only a single repetition.
 
-train_indices, test_indices = task.get_train_test_split_indices(repeat=0, fold=0, sample=0,)
+train_indices, test_indices = task.get_train_test_split_indices(
+    repeat=0,
+    fold=0,
+    sample=0,
+)
 
 print(train_indices.shape, train_indices.dtype)
 print(test_indices.shape, test_indices.dtype)
@@ -69,7 +76,10 @@
 
 print(
     "X_train.shape: {}, y_train.shape: {}, X_test.shape: {}, y_test.shape: {}".format(
-        X_train.shape, y_train.shape, X_test.shape, y_test.shape,
+        X_train.shape,
+        y_train.shape,
+        X_test.shape,
+        y_test.shape,
     )
 )
 
@@ -82,7 +92,10 @@
 n_repeats, n_folds, n_samples = task.get_split_dimensions()
 print(
     "Task {}: number of repeats: {}, number of folds: {}, number of samples {}.".format(
-        task_id, n_repeats, n_folds, n_samples,
+        task_id,
+        n_repeats,
+        n_folds,
+        n_samples,
     )
 )
 
@@ -92,7 +105,9 @@
     for fold_idx in range(n_folds):
         for sample_idx in range(n_samples):
             train_indices, test_indices = task.get_train_test_split_indices(
-                repeat=repeat_idx, fold=fold_idx, sample=sample_idx,
+                repeat=repeat_idx,
+                fold=fold_idx,
+                sample=sample_idx,
             )
             X_train = X.iloc[train_indices]
             y_train = y.iloc[train_indices]
@@ -121,7 +136,10 @@
 n_repeats, n_folds, n_samples = task.get_split_dimensions()
 print(
     "Task {}: number of repeats: {}, number of folds: {}, number of samples {}.".format(
-        task_id, n_repeats, n_folds, n_samples,
+        task_id,
+        n_repeats,
+        n_folds,
+        n_samples,
     )
 )
 
@@ -131,7 +149,9 @@
     for fold_idx in range(n_folds):
         for sample_idx in range(n_samples):
             train_indices, test_indices = task.get_train_test_split_indices(
-                repeat=repeat_idx, fold=fold_idx, sample=sample_idx,
+                repeat=repeat_idx,
+                fold=fold_idx,
+                sample=sample_idx,
             )
             X_train = X.iloc[train_indices]
             y_train = y.iloc[train_indices]
@@ -160,7 +180,10 @@
 n_repeats, n_folds, n_samples = task.get_split_dimensions()
 print(
     "Task {}: number of repeats: {}, number of folds: {}, number of samples {}.".format(
-        task_id, n_repeats, n_folds, n_samples,
+        task_id,
+        n_repeats,
+        n_folds,
+        n_samples,
     )
 )
 
@@ -170,7 +193,9 @@
     for fold_idx in range(n_folds):
         for sample_idx in range(n_samples):
             train_indices, test_indices = task.get_train_test_split_indices(
-                repeat=repeat_idx, fold=fold_idx, sample=sample_idx,
+                repeat=repeat_idx,
+                fold=fold_idx,
+                sample=sample_idx,
             )
             X_train = X.iloc[train_indices]
             y_train = y.iloc[train_indices]
diff --git a/examples/README.txt b/examples/README.txt
index 332a5b990..d10746bcb 100644
--- a/examples/README.txt
+++ b/examples/README.txt
@@ -1,3 +1,5 @@
+.. _examples-index:
+
 ================
 Examples Gallery
 ================
diff --git a/openml/__version__.py b/openml/__version__.py
index 0f368c426..9c98e03c5 100644
--- a/openml/__version__.py
+++ b/openml/__version__.py
@@ -3,4 +3,4 @@
 # License: BSD 3-Clause
 
 # The following line *must* be the last in the module, exactly as formatted:
-__version__ = "0.12.2"
+__version__ = "0.13.1"
diff --git a/openml/_api_calls.py b/openml/_api_calls.py
index b5ed976bc..f7b2a34c5 100644
--- a/openml/_api_calls.py
+++ b/openml/_api_calls.py
@@ -10,7 +10,9 @@
 import urllib.parse
 import xml
 import xmltodict
+from urllib3 import ProxyManager
 from typing import Dict, Optional, Union
+import zipfile
 
 import minio
 
@@ -23,6 +25,35 @@
 )
 
 
+def resolve_env_proxies(url: str) -> Optional[str]:
+    """Attempt to find a suitable proxy for this url.
+
+    Relies on ``requests`` internals to remain consistent. To disable this from the
+    environment, please set the enviornment varialbe ``no_proxy="*"``.
+
+    Parameters
+    ----------
+    url : str
+        The url endpoint
+
+    Returns
+    -------
+    Optional[str]
+        The proxy url if found, else None
+    """
+    resolved_proxies = requests.utils.get_environ_proxies(url)
+    selected_proxy = requests.utils.select_proxy(url, resolved_proxies)
+    return selected_proxy
+
+
+def _create_url_from_endpoint(endpoint: str) -> str:
+    url = config.server
+    if not url.endswith("/"):
+        url += "/"
+    url += endpoint
+    return url.replace("=", "%3d")
+
+
 def _perform_api_call(call, request_method, data=None, file_elements=None):
     """
     Perform an API call at the OpenML server.
@@ -50,12 +81,7 @@ def _perform_api_call(call, request_method, data=None, file_elements=None):
     return_value : str
         Return value of the OpenML server
     """
-    url = config.server
-    if not url.endswith("/"):
-        url += "/"
-    url += call
-
-    url = url.replace("=", "%3d")
+    url = _create_url_from_endpoint(call)
     logging.info("Starting [%s] request for the URL %s", request_method, url)
     start = time.time()
 
@@ -69,15 +95,21 @@ def _perform_api_call(call, request_method, data=None, file_elements=None):
     __check_response(response, url, file_elements)
 
     logging.info(
-        "%.7fs taken for [%s] request for the URL %s", time.time() - start, request_method, url,
+        "%.7fs taken for [%s] request for the URL %s",
+        time.time() - start,
+        request_method,
+        url,
     )
     return response.text
 
 
 def _download_minio_file(
-    source: str, destination: Union[str, pathlib.Path], exists_ok: bool = True,
+    source: str,
+    destination: Union[str, pathlib.Path],
+    exists_ok: bool = True,
+    proxy: Optional[str] = "auto",
 ) -> None:
-    """ Download file ``source`` from a MinIO Bucket and store it at ``destination``.
+    """Download file ``source`` from a MinIO Bucket and store it at ``destination``.
 
     Parameters
     ----------
@@ -87,7 +119,10 @@ def _download_minio_file(
         Path to store the file to, if a directory is provided the original filename is used.
     exists_ok : bool, optional (default=True)
         If False, raise FileExists if a file already exists in ``destination``.
-
+    proxy: str, optional (default = "auto")
+        The proxy server to use. By default it's "auto" which uses ``requests`` to
+        automatically find the proxy to use. Pass None or the environment variable
+        ``no_proxy="*"`` to disable proxies.
     """
     destination = pathlib.Path(destination)
     parsed_url = urllib.parse.urlparse(source)
@@ -99,12 +134,23 @@ def _download_minio_file(
     if destination.is_file() and not exists_ok:
         raise FileExistsError(f"File already exists in {destination}.")
 
-    client = minio.Minio(endpoint=parsed_url.netloc, secure=False)
+    if proxy == "auto":
+        proxy = resolve_env_proxies(parsed_url.geturl())
+
+    proxy_client = ProxyManager(proxy) if proxy else None
+
+    client = minio.Minio(endpoint=parsed_url.netloc, secure=False, http_client=proxy_client)
 
     try:
         client.fget_object(
-            bucket_name=bucket, object_name=object_name, file_path=str(destination),
+            bucket_name=bucket,
+            object_name=object_name,
+            file_path=str(destination),
         )
+        if destination.is_file() and destination.suffix == ".zip":
+            with zipfile.ZipFile(destination, "r") as zip_ref:
+                zip_ref.extractall(destination.parent)
+
     except minio.error.S3Error as e:
         if e.message.startswith("Object does not exist"):
             raise FileNotFoundError(f"Object at '{source}' does not exist.") from e
@@ -113,6 +159,39 @@ def _download_minio_file(
         raise FileNotFoundError("Bucket does not exist or is private.") from e
 
 
+def _download_minio_bucket(
+    source: str,
+    destination: Union[str, pathlib.Path],
+    exists_ok: bool = True,
+) -> None:
+    """Download file ``source`` from a MinIO Bucket and store it at ``destination``.
+
+    Parameters
+    ----------
+    source : Union[str, pathlib.Path]
+        URL to a MinIO bucket.
+    destination : str
+        Path to a directory to store the bucket content in.
+    exists_ok : bool, optional (default=True)
+        If False, raise FileExists if a file already exists in ``destination``.
+    """
+
+    destination = pathlib.Path(destination)
+    parsed_url = urllib.parse.urlparse(source)
+
+    # expect path format: /BUCKET/path/to/file.ext
+    bucket = parsed_url.path[1:]
+
+    client = minio.Minio(endpoint=parsed_url.netloc, secure=False)
+
+    for file_object in client.list_objects(bucket, recursive=True):
+        _download_minio_file(
+            source=source + "/" + file_object.object_name,
+            destination=pathlib.Path(destination, file_object.object_name),
+            exists_ok=True,
+        )
+
+
 def _download_text_file(
     source: str,
     output_path: Optional[str] = None,
@@ -120,7 +199,7 @@ def _download_text_file(
     exists_ok: bool = True,
     encoding: str = "utf8",
 ) -> Optional[str]:
-    """ Download the text file at `source` and store it in `output_path`.
+    """Download the text file at `source` and store it in `output_path`.
 
     By default, do nothing if a file already exists in `output_path`.
     The downloaded file can be checked against an expected md5 checksum.
@@ -156,7 +235,10 @@ def _download_text_file(
 
     if output_path is None:
         logging.info(
-            "%.7fs taken for [%s] request for the URL %s", time.time() - start, "get", source,
+            "%.7fs taken for [%s] request for the URL %s",
+            time.time() - start,
+            "get",
+            source,
         )
         return downloaded_file
 
@@ -165,7 +247,10 @@ def _download_text_file(
             fh.write(downloaded_file)
 
         logging.info(
-            "%.7fs taken for [%s] request for the URL %s", time.time() - start, "get", source,
+            "%.7fs taken for [%s] request for the URL %s",
+            time.time() - start,
+            "get",
+            source,
         )
 
         del downloaded_file
@@ -174,8 +259,8 @@ def _download_text_file(
 
 def _file_id_to_url(file_id, filename=None):
     """
-     Presents the URL how to download a given file id
-     filename is optional
+    Presents the URL how to download a given file id
+    filename is optional
     """
     openml_url = config.server.split("/api/")
     url = openml_url[0] + "/data/download/%s" % file_id
@@ -194,7 +279,12 @@ def _read_url_files(url, data=None, file_elements=None):
         file_elements = {}
     # Using requests.post sets header 'Accept-encoding' automatically to
     # 'gzip,deflate'
-    response = _send_request(request_method="post", url=url, data=data, files=file_elements,)
+    response = _send_request(
+        request_method="post",
+        url=url,
+        data=data,
+        files=file_elements,
+    )
     return response
 
 
@@ -207,15 +297,13 @@ def __read_url(url, request_method, data=None, md5_checksum=None):
     )
 
 
-def __is_checksum_equal(downloaded_file, md5_checksum=None):
+def __is_checksum_equal(downloaded_file_binary: bytes, md5_checksum: Optional[str] = None) -> bool:
     if md5_checksum is None:
         return True
     md5 = hashlib.md5()
-    md5.update(downloaded_file.encode("utf-8"))
+    md5.update(downloaded_file_binary)
     md5_checksum_download = md5.hexdigest()
-    if md5_checksum == md5_checksum_download:
-        return True
-    return False
+    return md5_checksum == md5_checksum_download
 
 
 def _send_request(request_method, url, data, files=None, md5_checksum=None):
@@ -235,29 +323,48 @@ def _send_request(request_method, url, data, files=None, md5_checksum=None):
                 else:
                     raise NotImplementedError()
                 __check_response(response=response, url=url, file_elements=files)
-                if request_method == "get" and not __is_checksum_equal(response.text, md5_checksum):
+                if request_method == "get" and not __is_checksum_equal(
+                    response.text.encode("utf-8"), md5_checksum
+                ):
+
+                    # -- Check if encoding is not UTF-8 perhaps
+                    if __is_checksum_equal(response.content, md5_checksum):
+                        raise OpenMLHashException(
+                            "Checksum of downloaded file is unequal to the expected checksum {}"
+                            "because the text encoding is not UTF-8 when downloading {}. "
+                            "There might be a sever-sided issue with the file, "
+                            "see: https://github.com/openml/openml-python/issues/1180.".format(
+                                md5_checksum, url
+                            )
+                        )
+
                     raise OpenMLHashException(
                         "Checksum of downloaded file is unequal to the expected checksum {} "
                         "when downloading {}.".format(md5_checksum, url)
                     )
                 break
             except (
+                requests.exceptions.ChunkedEncodingError,
                 requests.exceptions.ConnectionError,
                 requests.exceptions.SSLError,
                 OpenMLServerException,
                 xml.parsers.expat.ExpatError,
                 OpenMLHashException,
             ) as e:
-                if isinstance(e, OpenMLServerException):
-                    if e.code not in [107]:
-                        # 107: database connection error
-                        raise
+                if isinstance(e, OpenMLServerException) and e.code != 107:
+                    # Propagate all server errors to the calling functions, except
+                    # for 107 which represents a database connection error.
+                    # These are typically caused by high server load,
+                    # which means trying again might resolve the issue.
+                    raise
                 elif isinstance(e, xml.parsers.expat.ExpatError):
                     if request_method != "get" or retry_counter >= n_retries:
                         raise OpenMLServerError(
                             "Unexpected server error when calling {}. Please contact the "
                             "developers!\nStatus code: {}\n{}".format(
-                                url, response.status_code, response.text,
+                                url,
+                                response.status_code,
+                                response.text,
                             )
                         )
                 if retry_counter >= n_retries:
@@ -289,9 +396,10 @@ def __check_response(response, url, file_elements):
 
 
 def __parse_server_exception(
-    response: requests.Response, url: str, file_elements: Dict,
+    response: requests.Response,
+    url: str,
+    file_elements: Dict,
 ) -> OpenMLServerError:
-
     if response.status_code == 414:
         raise OpenMLServerError("URI too long! ({})".format(url))
     try:
@@ -318,12 +426,17 @@ def __parse_server_exception(
 
         # 512 for runs, 372 for datasets, 500 for flows
         # 482 for tasks, 542 for evaluations, 674 for setups
-        return OpenMLServerNoResult(code=code, message=full_message,)
+        return OpenMLServerNoResult(
+            code=code,
+            message=full_message,
+        )
     # 163: failure to validate flow XML (https://www.openml.org/api_docs#!/flow/post_flow)
     if code in [163] and file_elements is not None and "description" in file_elements:
         # file_elements['description'] is the XML file description of the flow
         full_message = "\n{}\n{} - {}".format(
-            file_elements["description"], message, additional_information,
+            file_elements["description"],
+            message,
+            additional_information,
         )
     else:
         full_message = "{} - {}".format(message, additional_information)
diff --git a/openml/base.py b/openml/base.py
index 1b6e5ccc7..35a9ce58f 100644
--- a/openml/base.py
+++ b/openml/base.py
@@ -13,7 +13,7 @@
 
 
 class OpenMLBase(ABC):
-    """ Base object for functionality that is shared across entities. """
+    """Base object for functionality that is shared across entities."""
 
     def __repr__(self):
         body_fields = self._get_repr_body_fields()
@@ -22,32 +22,32 @@ def __repr__(self):
     @property
     @abstractmethod
     def id(self) -> Optional[int]:
-        """ The id of the entity, it is unique for its entity type. """
+        """The id of the entity, it is unique for its entity type."""
         pass
 
     @property
     def openml_url(self) -> Optional[str]:
-        """ The URL of the object on the server, if it was uploaded, else None. """
+        """The URL of the object on the server, if it was uploaded, else None."""
         if self.id is None:
             return None
         return self.__class__.url_for_id(self.id)
 
     @classmethod
     def url_for_id(cls, id_: int) -> str:
-        """ Return the OpenML URL for the object of the class entity with the given id. """
+        """Return the OpenML URL for the object of the class entity with the given id."""
         # Sample url for a flow: openml.org/f/123
         return "{}/{}/{}".format(openml.config.get_server_base_url(), cls._entity_letter(), id_)
 
     @classmethod
     def _entity_letter(cls) -> str:
-        """ Return the letter which represents the entity type in urls, e.g. 'f' for flow."""
+        """Return the letter which represents the entity type in urls, e.g. 'f' for flow."""
         # We take advantage of the class naming convention (OpenMLX),
         # which holds for all entities except studies and tasks, which overwrite this method.
         return cls.__name__.lower()[len("OpenML") :][0]
 
     @abstractmethod
     def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
-        """ Collect all information to display in the __repr__ body.
+        """Collect all information to display in the __repr__ body.
 
         Returns
         ------
@@ -60,13 +60,13 @@ def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
         pass
 
     def _apply_repr_template(self, body_fields: List[Tuple[str, str]]) -> str:
-        """ Generates the header and formats the body for string representation of the object.
+        """Generates the header and formats the body for string representation of the object.
 
-         Parameters
-         ----------
-         body_fields: List[Tuple[str, str]]
-            A list of (name, value) pairs to display in the body of the __repr__.
-         """
+        Parameters
+        ----------
+        body_fields: List[Tuple[str, str]]
+           A list of (name, value) pairs to display in the body of the __repr__.
+        """
         # We add spaces between capitals, e.g. ClassificationTask -> Classification Task
         name_with_spaces = re.sub(
             r"(\w)([A-Z])", r"\1 \2", self.__class__.__name__[len("OpenML") :]
@@ -81,7 +81,7 @@ def _apply_repr_template(self, body_fields: List[Tuple[str, str]]) -> str:
 
     @abstractmethod
     def _to_dict(self) -> "OrderedDict[str, OrderedDict]":
-        """ Creates a dictionary representation of self.
+        """Creates a dictionary representation of self.
 
         Uses OrderedDict to ensure consistent ordering when converting to xml.
         The return value (OrderedDict) will be used to create the upload xml file.
@@ -98,7 +98,7 @@ def _to_dict(self) -> "OrderedDict[str, OrderedDict]":
         pass
 
     def _to_xml(self) -> str:
-        """ Generate xml representation of self for upload to server. """
+        """Generate xml representation of self for upload to server."""
         dict_representation = self._to_dict()
         xml_representation = xmltodict.unparse(dict_representation, pretty=True)
 
@@ -108,7 +108,7 @@ def _to_xml(self) -> str:
         return xml_body
 
     def _get_file_elements(self) -> Dict:
-        """ Get file_elements to upload to the server, called during Publish.
+        """Get file_elements to upload to the server, called during Publish.
 
         Derived child classes should overwrite this method as necessary.
         The description field will be populated automatically if not provided.
@@ -117,7 +117,7 @@ def _get_file_elements(self) -> Dict:
 
     @abstractmethod
     def _parse_publish_response(self, xml_response: Dict):
-        """ Parse the id from the xml_response and assign it to self. """
+        """Parse the id from the xml_response and assign it to self."""
         pass
 
     def publish(self) -> "OpenMLBase":
@@ -136,7 +136,7 @@ def publish(self) -> "OpenMLBase":
         return self
 
     def open_in_browser(self):
-        """ Opens the OpenML web page corresponding to this object in your default browser. """
+        """Opens the OpenML web page corresponding to this object in your default browser."""
         webbrowser.open(self.openml_url)
 
     def push_tag(self, tag: str):
diff --git a/openml/cli.py b/openml/cli.py
index cfd453e9f..039ac227c 100644
--- a/openml/cli.py
+++ b/openml/cli.py
@@ -26,7 +26,7 @@ def looks_like_url(url: str) -> bool:
 def wait_until_valid_input(
     prompt: str, check: Callable[[str], str], sanitize: Union[Callable[[str], str], None]
 ) -> str:
-    """  Asks `prompt` until an input is received which returns True for `check`.
+    """Asks `prompt` until an input is received which returns True for `check`.
 
     Parameters
     ----------
@@ -252,7 +252,7 @@ def configure_field(
     input_message: str,
     sanitize: Union[Callable[[str], str], None] = None,
 ) -> None:
-    """ Configure `field` with `value`. If `value` is None ask the user for input.
+    """Configure `field` with `value`. If `value` is None ask the user for input.
 
     `value` and user input are first corrected/auto-completed with `convert_value` if provided,
     then validated with `check_with_message` function.
@@ -288,13 +288,15 @@ def configure_field(
     else:
         print(intro_message)
         value = wait_until_valid_input(
-            prompt=input_message, check=check_with_message, sanitize=sanitize,
+            prompt=input_message,
+            check=check_with_message,
+            sanitize=sanitize,
         )
     verbose_set(field, value)
 
 
 def configure(args: argparse.Namespace):
-    """ Calls the right submenu(s) to edit `args.field` in the configuration file. """
+    """Calls the right submenu(s) to edit `args.field` in the configuration file."""
     set_functions = {
         "apikey": configure_apikey,
         "server": configure_server,
@@ -348,7 +350,11 @@ def main() -> None:
     )
 
     parser_configure.add_argument(
-        "value", type=str, default=None, nargs="?", help="The value to set the FIELD to.",
+        "value",
+        type=str,
+        default=None,
+        nargs="?",
+        help="The value to set the FIELD to.",
     )
 
     args = parser.parse_args()
diff --git a/openml/config.py b/openml/config.py
index 8593ad484..09359d33d 100644
--- a/openml/config.py
+++ b/openml/config.py
@@ -23,7 +23,7 @@
 
 
 def _create_log_handlers(create_file_handler=True):
-    """ Creates but does not attach the log handlers. """
+    """Creates but does not attach the log handlers."""
     global console_handler, file_handler
     if console_handler is not None or file_handler is not None:
         logger.debug("Requested to create log handlers, but they are already created.")
@@ -36,7 +36,7 @@ def _create_log_handlers(create_file_handler=True):
     console_handler.setFormatter(output_formatter)
 
     if create_file_handler:
-        one_mb = 2 ** 20
+        one_mb = 2**20
         log_path = os.path.join(cache_directory, "openml_python.log")
         file_handler = logging.handlers.RotatingFileHandler(
             log_path, maxBytes=one_mb, backupCount=1, delay=True
@@ -45,7 +45,7 @@ def _create_log_handlers(create_file_handler=True):
 
 
 def _convert_log_levels(log_level: int) -> Tuple[int, int]:
-    """ Converts a log level that's either defined by OpenML/Python to both specifications. """
+    """Converts a log level that's either defined by OpenML/Python to both specifications."""
     # OpenML verbosity level don't match Python values directly:
     openml_to_python = {0: logging.WARNING, 1: logging.INFO, 2: logging.DEBUG}
     python_to_openml = {
@@ -62,7 +62,7 @@ def _convert_log_levels(log_level: int) -> Tuple[int, int]:
 
 
 def _set_level_register_and_store(handler: logging.Handler, log_level: int):
-    """ Set handler log level, register it if needed, save setting to config file if specified. """
+    """Set handler log level, register it if needed, save setting to config file if specified."""
     oml_level, py_level = _convert_log_levels(log_level)
     handler.setLevel(py_level)
 
@@ -74,13 +74,13 @@ def _set_level_register_and_store(handler: logging.Handler, log_level: int):
 
 
 def set_console_log_level(console_output_level: int):
-    """ Set console output to the desired level and register it with openml logger if needed. """
+    """Set console output to the desired level and register it with openml logger if needed."""
     global console_handler
     _set_level_register_and_store(cast(logging.Handler, console_handler), console_output_level)
 
 
 def set_file_log_level(file_output_level: int):
-    """ Set file output to the desired level and register it with openml logger if needed. """
+    """Set file output to the desired level and register it with openml logger if needed."""
     global file_handler
     _set_level_register_and_store(cast(logging.Handler, file_handler), file_output_level)
 
@@ -90,7 +90,14 @@ def set_file_log_level(file_output_level: int):
     "apikey": "",
     "server": "https://www.openml.org/api/v1/xml",
     "cachedir": (
-        os.environ.get("XDG_CACHE_HOME", os.path.join("~", ".cache", "openml",))
+        os.environ.get(
+            "XDG_CACHE_HOME",
+            os.path.join(
+                "~",
+                ".cache",
+                "openml",
+            ),
+        )
         if platform.system() == "Linux"
         else os.path.join("~", ".openml")
     ),
@@ -144,7 +151,7 @@ def set_retry_policy(value: str, n_retries: Optional[int] = None) -> None:
 
 
 class ConfigurationForExamples:
-    """ Allows easy switching to and from a test configuration, used for examples. """
+    """Allows easy switching to and from a test configuration, used for examples."""
 
     _last_used_server = None
     _last_used_key = None
@@ -154,7 +161,7 @@ class ConfigurationForExamples:
 
     @classmethod
     def start_using_configuration_for_example(cls):
-        """ Sets the configuration to connect to the test server with valid apikey.
+        """Sets the configuration to connect to the test server with valid apikey.
 
         To configuration as was before this call is stored, and can be recovered
         by using the `stop_use_example_configuration` method.
@@ -181,7 +188,7 @@ def start_using_configuration_for_example(cls):
 
     @classmethod
     def stop_using_configuration_for_example(cls):
-        """ Return to configuration as it was before `start_use_example_configuration`. """
+        """Return to configuration as it was before `start_use_example_configuration`."""
         if not cls._start_last_called:
             # We don't want to allow this because it will (likely) result in the `server` and
             # `apikey` variables being set to None.
@@ -281,7 +288,7 @@ def _get(config, key):
 
 
 def set_field_in_config_file(field: str, value: Any):
-    """ Overwrites the `field` in the configuration file with the new `value`. """
+    """Overwrites the `field` in the configuration file with the new `value`."""
     if field not in _defaults:
         return ValueError(f"Field '{field}' is not valid and must be one of '{_defaults.keys()}'.")
 
@@ -302,7 +309,7 @@ def set_field_in_config_file(field: str, value: Any):
 
 
 def _parse_config(config_file: str):
-    """ Parse the config file, set up defaults. """
+    """Parse the config file, set up defaults."""
     config = configparser.RawConfigParser(defaults=_defaults)
 
     # The ConfigParser requires a [SECTION_HEADER], which we do not expect in our config file.
diff --git a/openml/datasets/__init__.py b/openml/datasets/__init__.py
index abde85c06..efa5a5d5b 100644
--- a/openml/datasets/__init__.py
+++ b/openml/datasets/__init__.py
@@ -11,6 +11,7 @@
     list_qualities,
     edit_dataset,
     fork_dataset,
+    delete_dataset,
 )
 from .dataset import OpenMLDataset
 from .data_feature import OpenMLDataFeature
@@ -28,4 +29,5 @@
     "list_qualities",
     "edit_dataset",
     "fork_dataset",
+    "delete_dataset",
 ]
diff --git a/openml/datasets/dataset.py b/openml/datasets/dataset.py
index 122e2e697..1644ff177 100644
--- a/openml/datasets/dataset.py
+++ b/openml/datasets/dataset.py
@@ -239,7 +239,7 @@ def id(self) -> Optional[int]:
         return self.dataset_id
 
     def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
-        """ Collect all information to display in the __repr__ body. """
+        """Collect all information to display in the __repr__ body."""
         fields = {
             "Name": self.name,
             "Version": self.version,
@@ -275,7 +275,7 @@ def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
 
     def __eq__(self, other):
 
-        if type(other) != OpenMLDataset:
+        if not isinstance(other, OpenMLDataset):
             return False
 
         server_fields = {
@@ -287,17 +287,15 @@ def __eq__(self, other):
             "data_file",
         }
 
-        # check that the keys are identical
+        # check that common keys and values are identical
         self_keys = set(self.__dict__.keys()) - server_fields
         other_keys = set(other.__dict__.keys()) - server_fields
-        if self_keys != other_keys:
-            return False
-
-        # check that values of the common keys are identical
-        return all(self.__dict__[key] == other.__dict__[key] for key in self_keys)
+        return self_keys == other_keys and all(
+            self.__dict__[key] == other.__dict__[key] for key in self_keys
+        )
 
     def _download_data(self) -> None:
-        """ Download ARFF data file to standard cache directory. Set `self.data_file`. """
+        """Download ARFF data file to standard cache directory. Set `self.data_file`."""
         # import required here to avoid circular import.
         from .functions import _get_dataset_arff, _get_dataset_parquet
 
@@ -354,8 +352,8 @@ def decode_arff(fh):
             return decoder.decode(fh, encode_nominal=True, return_type=return_type)
 
         if filename[-3:] == ".gz":
-            with gzip.open(filename) as fh:
-                return decode_arff(fh)
+            with gzip.open(filename) as zipfile:
+                return decode_arff(zipfile)
         else:
             with open(filename, encoding="utf8") as fh:
                 return decode_arff(fh)
@@ -363,7 +361,7 @@ def decode_arff(fh):
     def _parse_data_from_arff(
         self, arff_file_path: str
     ) -> Tuple[Union[pd.DataFrame, scipy.sparse.csr_matrix], List[bool], List[str]]:
-        """ Parse all required data from arff file.
+        """Parse all required data from arff file.
 
         Parameters
         ----------
@@ -473,7 +471,7 @@ def _compressed_cache_file_paths(self, data_file: str) -> Tuple[str, str, str]:
     def _cache_compressed_file_from_file(
         self, data_file: str
     ) -> Tuple[Union[pd.DataFrame, scipy.sparse.csr_matrix], List[bool], List[str]]:
-        """ Store data from the local file in compressed format.
+        """Store data from the local file in compressed format.
 
         If a local parquet file is present it will be used instead of the arff file.
         Sets cache_format to 'pickle' if data is sparse.
@@ -519,7 +517,7 @@ def _cache_compressed_file_from_file(
         return data, categorical, attribute_names
 
     def _load_data(self):
-        """ Load data from compressed format or arff. Download data if not present on disk. """
+        """Load data from compressed format or arff. Download data if not present on disk."""
         need_to_create_pickle = self.cache_format == "pickle" and self.data_pickle_file is None
         need_to_create_feather = self.cache_format == "feather" and self.data_feather_file is None
 
@@ -544,15 +542,23 @@ def _load_data(self):
                     data, categorical, attribute_names = pickle.load(fh)
         except FileNotFoundError:
             raise ValueError(f"Cannot find file for dataset {self.name} at location '{fpath}'.")
-        except (EOFError, ModuleNotFoundError, ValueError) as e:
+        except (EOFError, ModuleNotFoundError, ValueError, AttributeError) as e:
             error_message = e.message if hasattr(e, "message") else e.args[0]
             hint = ""
 
             if isinstance(e, EOFError):
                 readable_error = "Detected a corrupt cache file"
-            elif isinstance(e, ModuleNotFoundError):
+            elif isinstance(e, (ModuleNotFoundError, AttributeError)):
                 readable_error = "Detected likely dependency issues"
-                hint = "This is most likely due to https://github.com/openml/openml-python/issues/918. "  # noqa: 501
+                hint = (
+                    "This can happen if the cache was constructed with a different pandas version "
+                    "than the one that is used to load the data. See also "
+                )
+                if isinstance(e, ModuleNotFoundError):
+                    hint += "https://github.com/openml/openml-python/issues/918. "
+                elif isinstance(e, AttributeError):
+                    hint += "https://github.com/openml/openml-python/pull/1121. "
+
             elif isinstance(e, ValueError) and "unsupported pickle protocol" in e.args[0]:
                 readable_error = "Encountered unsupported pickle protocol"
             else:
@@ -667,7 +673,7 @@ def get_data(
         List[bool],
         List[str],
     ]:
-        """ Returns dataset content as dataframes or sparse matrices.
+        """Returns dataset content as dataframes or sparse matrices.
 
         Parameters
         ----------
@@ -855,7 +861,7 @@ def get_features_by_type(
         return result
 
     def _get_file_elements(self) -> Dict:
-        """ Adds the 'dataset' to file elements. """
+        """Adds the 'dataset' to file elements."""
         file_elements = {}
         path = None if self.data_file is None else os.path.abspath(self.data_file)
 
@@ -874,11 +880,11 @@ def _get_file_elements(self) -> Dict:
         return file_elements
 
     def _parse_publish_response(self, xml_response: Dict):
-        """ Parse the id from the xml_response and assign it to self. """
+        """Parse the id from the xml_response and assign it to self."""
         self.dataset_id = int(xml_response["oml:upload_data_set"]["oml:id"])
 
     def _to_dict(self) -> "OrderedDict[str, OrderedDict]":
-        """ Creates a dictionary representation of self. """
+        """Creates a dictionary representation of self."""
         props = [
             "id",
             "name",
diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
index 34156eff7..4307c8008 100644
--- a/openml/datasets/functions.py
+++ b/openml/datasets/functions.py
@@ -3,7 +3,9 @@
 import io
 import logging
 import os
+from pyexpat import ExpatError
 from typing import List, Dict, Union, Optional, cast
+import warnings
 
 import numpy as np
 import arff
@@ -19,6 +21,7 @@
 from .dataset import OpenMLDataset
 from ..exceptions import (
     OpenMLHashException,
+    OpenMLServerError,
     OpenMLServerException,
     OpenMLPrivateDatasetError,
 )
@@ -36,12 +39,12 @@
 
 
 def _get_cache_directory(dataset: OpenMLDataset) -> str:
-    """ Return the cache directory of the OpenMLDataset """
+    """Return the cache directory of the OpenMLDataset"""
     return _create_cache_directory_for_id(DATASETS_CACHE_DIR_NAME, dataset.dataset_id)
 
 
 def list_qualities() -> List[str]:
-    """ Return list of data qualities available.
+    """Return list of data qualities available.
 
     The function performs an API call to retrieve the entire list of
     data qualities that are computed on the datasets uploaded.
@@ -236,7 +239,8 @@ def _validated_data_attributes(
 
 
 def check_datasets_active(
-    dataset_ids: List[int], raise_error_if_not_exist: bool = True,
+    dataset_ids: List[int],
+    raise_error_if_not_exist: bool = True,
 ) -> Dict[int, bool]:
     """
     Check if the dataset ids provided are active.
@@ -274,7 +278,7 @@ def check_datasets_active(
 def _name_to_id(
     dataset_name: str, version: Optional[int] = None, error_if_multiple: bool = False
 ) -> int:
-    """ Attempt to find the dataset id of the dataset with the given name.
+    """Attempt to find the dataset id of the dataset with the given name.
 
     If multiple datasets with the name exist, and ``error_if_multiple`` is ``False``,
     then return the least recent still active dataset.
@@ -353,8 +357,9 @@ def get_dataset(
     error_if_multiple: bool = False,
     cache_format: str = "pickle",
     download_qualities: bool = True,
+    download_all_files: bool = False,
 ) -> OpenMLDataset:
-    """ Download the OpenML dataset representation, optionally also download actual data file.
+    """Download the OpenML dataset representation, optionally also download actual data file.
 
     This function is thread/multiprocessing safe.
     This function uses caching. A check will be performed to determine if the information has
@@ -386,11 +391,20 @@ def get_dataset(
         no.of.rows is very high.
     download_qualities : bool (default=True)
         Option to download 'qualities' meta-data in addition to the minimal dataset description.
+    download_all_files: bool (default=False)
+        EXPERIMENTAL. Download all files related to the dataset that reside on the server.
+        Useful for datasets which refer to auxiliary files (e.g., meta-album).
+
     Returns
     -------
     dataset : :class:`openml.OpenMLDataset`
         The downloaded dataset.
     """
+    if download_all_files:
+        warnings.warn(
+            "``download_all_files`` is experimental and is likely to break with new releases."
+        )
+
     if cache_format not in ["feather", "pickle"]:
         raise ValueError(
             "cache_format must be one of 'feather' or 'pickle. "
@@ -407,7 +421,10 @@ def get_dataset(
             "`dataset_id` must be one of `str` or `int`, not {}.".format(type(dataset_id))
         )
 
-    did_cache_dir = _create_cache_directory_for_id(DATASETS_CACHE_DIR_NAME, dataset_id,)
+    did_cache_dir = _create_cache_directory_for_id(
+        DATASETS_CACHE_DIR_NAME,
+        dataset_id,
+    )
 
     remove_dataset_cache = True
     try:
@@ -429,14 +446,16 @@ def get_dataset(
         arff_file = _get_dataset_arff(description) if download_data else None
         if "oml:minio_url" in description and download_data:
             try:
-                parquet_file = _get_dataset_parquet(description)
+                parquet_file = _get_dataset_parquet(
+                    description, download_all_files=download_all_files
+                )
             except urllib3.exceptions.MaxRetryError:
                 parquet_file = None
         else:
             parquet_file = None
         remove_dataset_cache = False
     except OpenMLServerException as e:
-        # if there was an exception,
+        # if there was an exception
         # check if the user had access to the dataset
         if e.code == 112:
             raise OpenMLPrivateDatasetError(e.message) from None
@@ -453,7 +472,7 @@ def get_dataset(
 
 
 def attributes_arff_from_df(df):
-    """ Describe attributes of the dataframe according to ARFF specification.
+    """Describe attributes of the dataframe according to ARFF specification.
 
     Parameters
     ----------
@@ -749,7 +768,7 @@ def edit_dataset(
     original_data_url=None,
     paper_url=None,
 ) -> int:
-    """ Edits an OpenMLDataset.
+    """Edits an OpenMLDataset.
 
     In addition to providing the dataset id of the dataset to edit (through data_id),
     you must specify a value for at least one of the optional function arguments,
@@ -889,7 +908,7 @@ def _topic_add_dataset(data_id: int, topic: str):
         id of the dataset for which the topic needs to be added
     topic : str
         Topic to be added for the dataset
-   """
+    """
     if not isinstance(data_id, int):
         raise TypeError("`data_id` must be of type `int`, not {}.".format(type(data_id)))
     form_data = {"data_id": data_id, "topic": topic}
@@ -910,7 +929,7 @@ def _topic_delete_dataset(data_id: int, topic: str):
     topic : str
         Topic to be deleted
 
-   """
+    """
     if not isinstance(data_id, int):
         raise TypeError("`data_id` must be of type `int`, not {}.".format(type(data_id)))
     form_data = {"data_id": data_id, "topic": topic}
@@ -948,21 +967,27 @@ def _get_dataset_description(did_cache_dir, dataset_id):
     try:
         with io.open(description_file, encoding="utf8") as fh:
             dataset_xml = fh.read()
+        description = xmltodict.parse(dataset_xml)["oml:data_set_description"]
     except Exception:
         url_extension = "data/{}".format(dataset_id)
         dataset_xml = openml._api_calls._perform_api_call(url_extension, "get")
+        try:
+            description = xmltodict.parse(dataset_xml)["oml:data_set_description"]
+        except ExpatError as e:
+            url = openml._api_calls._create_url_from_endpoint(url_extension)
+            raise OpenMLServerError(f"Dataset description XML at '{url}' is malformed.") from e
         with io.open(description_file, "w", encoding="utf8") as fh:
             fh.write(dataset_xml)
 
-    description = xmltodict.parse(dataset_xml)["oml:data_set_description"]
-
     return description
 
 
 def _get_dataset_parquet(
-    description: Union[Dict, OpenMLDataset], cache_directory: str = None
+    description: Union[Dict, OpenMLDataset],
+    cache_directory: str = None,
+    download_all_files: bool = False,
 ) -> Optional[str]:
-    """ Return the path to the local parquet file of the dataset. If is not cached, it is downloaded.
+    """Return the path to the local parquet file of the dataset. If is not cached, it is downloaded.
 
     Checks if the file is in the cache, if yes, return the path to the file.
     If not, downloads the file and caches it, then returns the file path.
@@ -980,36 +1005,54 @@ def _get_dataset_parquet(
         Folder to store the parquet file in.
         If None, use the default cache directory for the dataset.
 
+    download_all_files: bool, optional (default=False)
+        If `True`, download all data found in the bucket to which the description's
+        ``minio_url`` points, only download the parquet file otherwise.
+
     Returns
     -------
     output_filename : string, optional
         Location of the Parquet file if successfully downloaded, None otherwise.
     """
     if isinstance(description, dict):
-        url = description.get("oml:minio_url")
+        url = cast(str, description.get("oml:minio_url"))
         did = description.get("oml:id")
     elif isinstance(description, OpenMLDataset):
-        url = description._minio_url
+        url = cast(str, description._minio_url)
         did = description.dataset_id
     else:
         raise TypeError("`description` should be either OpenMLDataset or Dict.")
 
     if cache_directory is None:
         cache_directory = _create_cache_directory_for_id(DATASETS_CACHE_DIR_NAME, did)
-    output_file_path = os.path.join(cache_directory, "dataset.pq")
+    output_file_path = os.path.join(cache_directory, f"dataset_{did}.pq")
+
+    old_file_path = os.path.join(cache_directory, "dataset.pq")
+    if os.path.isfile(old_file_path):
+        os.rename(old_file_path, output_file_path)
+
+    # For this release, we want to be able to force a new download even if the
+    # parquet file is already present when ``download_all_files`` is set.
+    # For now, it would be the only way for the user to fetch the additional
+    # files in the bucket (no function exists on an OpenMLDataset to do this).
+    if download_all_files:
+        if url.endswith(".pq"):
+            url, _ = url.rsplit("/", maxsplit=1)
+        openml._api_calls._download_minio_bucket(source=cast(str, url), destination=cache_directory)
 
     if not os.path.isfile(output_file_path):
         try:
             openml._api_calls._download_minio_file(
                 source=cast(str, url), destination=output_file_path
             )
-        except FileNotFoundError:
+        except (FileNotFoundError, urllib3.exceptions.MaxRetryError) as e:
+            logger.warning("Could not download file from %s: %s" % (cast(str, url), e))
             return None
     return output_file_path
 
 
 def _get_dataset_arff(description: Union[Dict, OpenMLDataset], cache_directory: str = None) -> str:
-    """ Return the path to the local arff file of the dataset. If is not cached, it is downloaded.
+    """Return the path to the local arff file of the dataset. If is not cached, it is downloaded.
 
     Checks if the file is in the cache, if yes, return the path to the file.
     If not, downloads the file and caches it, then returns the file path.
@@ -1228,3 +1271,22 @@ def _get_online_dataset_format(dataset_id):
     dataset_xml = openml._api_calls._perform_api_call("data/%d" % dataset_id, "get")
     # build a dict from the xml and get the format from the dataset description
     return xmltodict.parse(dataset_xml)["oml:data_set_description"]["oml:format"].lower()
+
+
+def delete_dataset(dataset_id: int) -> bool:
+    """Delete dataset with id `dataset_id` from the OpenML server.
+
+    This can only be done if you are the owner of the dataset and
+    no tasks are attached to the dataset.
+
+    Parameters
+    ----------
+    dataset_id : int
+        OpenML id of the dataset
+
+    Returns
+    -------
+    bool
+        True if the deletion was successful. False otherwise.
+    """
+    return openml.utils._delete_entity("data", dataset_id)
diff --git a/openml/evaluations/functions.py b/openml/evaluations/functions.py
index b3fdd0aa0..693ec06cf 100644
--- a/openml/evaluations/functions.py
+++ b/openml/evaluations/functions.py
@@ -253,7 +253,7 @@ def __list_evaluations(api_call, output_format="object"):
 
 
 def list_evaluation_measures() -> List[str]:
-    """ Return list of evaluation measures available.
+    """Return list of evaluation measures available.
 
     The function performs an API call to retrieve the entire list of
     evaluation measures that are available.
@@ -275,6 +275,39 @@ def list_evaluation_measures() -> List[str]:
     return qualities
 
 
+def list_estimation_procedures() -> List[str]:
+    """Return list of evaluation procedures available.
+
+    The function performs an API call to retrieve the entire list of
+    evaluation procedures' names that are available.
+
+    Returns
+    -------
+    list
+    """
+
+    api_call = "estimationprocedure/list"
+    xml_string = openml._api_calls._perform_api_call(api_call, "get")
+    api_results = xmltodict.parse(xml_string)
+
+    # Minimalistic check if the XML is useful
+    if "oml:estimationprocedures" not in api_results:
+        raise ValueError("Error in return XML, does not contain " '"oml:estimationprocedures"')
+    if "oml:estimationprocedure" not in api_results["oml:estimationprocedures"]:
+        raise ValueError("Error in return XML, does not contain " '"oml:estimationprocedure"')
+
+    if not isinstance(api_results["oml:estimationprocedures"]["oml:estimationprocedure"], list):
+        raise TypeError(
+            "Error in return XML, does not contain " '"oml:estimationprocedure" as a list'
+        )
+
+    prods = [
+        prod["oml:name"]
+        for prod in api_results["oml:estimationprocedures"]["oml:estimationprocedure"]
+    ]
+    return prods
+
+
 def list_evaluations_setups(
     function: str,
     offset: Optional[int] = None,
diff --git a/openml/exceptions.py b/openml/exceptions.py
index 781784ee2..fe2138e76 100644
--- a/openml/exceptions.py
+++ b/openml/exceptions.py
@@ -9,17 +9,16 @@ def __init__(self, message: str):
 
 class OpenMLServerError(PyOpenMLError):
     """class for when something is really wrong on the server
-       (result did not parse to dict), contains unparsed error."""
+    (result did not parse to dict), contains unparsed error."""
 
-    def __init__(self, message: str):
-        super().__init__(message)
+    pass
 
 
 class OpenMLServerException(OpenMLServerError):
     """exception for when the result of the server was
-       not 200 (e.g., listing call w/o results). """
+    not 200 (e.g., listing call w/o results)."""
 
-    # Code needs to be optional to allow the exceptino to be picklable:
+    # Code needs to be optional to allow the exception to be picklable:
     # https://stackoverflow.com/questions/16244923/how-to-make-a-custom-exception-class-with-multiple-init-args-pickleable  # noqa: E501
     def __init__(self, message: str, code: int = None, url: str = None):
         self.message = message
@@ -28,11 +27,11 @@ def __init__(self, message: str, code: int = None, url: str = None):
         super().__init__(message)
 
     def __str__(self):
-        return "%s returned code %s: %s" % (self.url, self.code, self.message,)
+        return f"{self.url} returned code {self.code}: {self.message}"
 
 
 class OpenMLServerNoResult(OpenMLServerException):
-    """exception for when the result of the server is empty. """
+    """Exception for when the result of the server is empty."""
 
     pass
 
@@ -40,8 +39,7 @@ class OpenMLServerNoResult(OpenMLServerException):
 class OpenMLCacheException(PyOpenMLError):
     """Dataset / task etc not found in cache"""
 
-    def __init__(self, message: str):
-        super().__init__(message)
+    pass
 
 
 class OpenMLHashException(PyOpenMLError):
@@ -51,17 +49,22 @@ class OpenMLHashException(PyOpenMLError):
 
 
 class OpenMLPrivateDatasetError(PyOpenMLError):
-    """ Exception thrown when the user has no rights to access the dataset. """
+    """Exception thrown when the user has no rights to access the dataset."""
 
-    def __init__(self, message: str):
-        super().__init__(message)
+    pass
 
 
 class OpenMLRunsExistError(PyOpenMLError):
-    """ Indicates run(s) already exists on the server when they should not be duplicated. """
+    """Indicates run(s) already exists on the server when they should not be duplicated."""
 
     def __init__(self, run_ids: set, message: str):
         if len(run_ids) < 1:
             raise ValueError("Set of run ids must be non-empty.")
         self.run_ids = run_ids
         super().__init__(message)
+
+
+class OpenMLNotAuthorizedError(OpenMLServerError):
+    """Indicates an authenticated user is not authorized to execute the requested action."""
+
+    pass
diff --git a/openml/extensions/extension_interface.py b/openml/extensions/extension_interface.py
index 4529ad163..f33ef7543 100644
--- a/openml/extensions/extension_interface.py
+++ b/openml/extensions/extension_interface.py
@@ -204,7 +204,9 @@ def _run_model_on_fold(
 
     @abstractmethod
     def obtain_parameter_values(
-        self, flow: "OpenMLFlow", model: Any = None,
+        self,
+        flow: "OpenMLFlow",
+        model: Any = None,
     ) -> List[Dict[str, Any]]:
         """Extracts all parameter settings required for the flow from the model.
 
@@ -247,7 +249,9 @@ def check_if_model_fitted(self, model: Any) -> bool:
 
     @abstractmethod
     def instantiate_model_from_hpo_class(
-        self, model: Any, trace_iteration: "OpenMLTraceIteration",
+        self,
+        model: Any,
+        trace_iteration: "OpenMLTraceIteration",
     ) -> Any:
         """Instantiate a base model which can be searched over by the hyperparameter optimization
         model.
diff --git a/openml/extensions/functions.py b/openml/extensions/functions.py
index 52bb03961..a080e1004 100644
--- a/openml/extensions/functions.py
+++ b/openml/extensions/functions.py
@@ -30,7 +30,8 @@ def register_extension(extension: Type[Extension]) -> None:
 
 
 def get_extension_by_flow(
-    flow: "OpenMLFlow", raise_if_no_extension: bool = False,
+    flow: "OpenMLFlow",
+    raise_if_no_extension: bool = False,
 ) -> Optional[Extension]:
     """Get an extension which can handle the given flow.
 
@@ -66,7 +67,10 @@ def get_extension_by_flow(
         )
 
 
-def get_extension_by_model(model: Any, raise_if_no_extension: bool = False,) -> Optional[Extension]:
+def get_extension_by_model(
+    model: Any,
+    raise_if_no_extension: bool = False,
+) -> Optional[Extension]:
     """Get an extension which can handle the given flow.
 
     Iterates all registered extensions and checks whether they can handle the presented model.
diff --git a/openml/extensions/sklearn/extension.py b/openml/extensions/sklearn/extension.py
index d49a9a9c5..997a9b8ea 100644
--- a/openml/extensions/sklearn/extension.py
+++ b/openml/extensions/sklearn/extension.py
@@ -11,7 +11,7 @@
 from re import IGNORECASE
 import sys
 import time
-from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union, cast
+from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union, cast, Sized
 import warnings
 
 import numpy as np
@@ -38,19 +38,16 @@
 
 logger = logging.getLogger(__name__)
 
-
 if sys.version_info >= (3, 5):
     from json.decoder import JSONDecodeError
 else:
     JSONDecodeError = ValueError
 
-
 DEPENDENCIES_PATTERN = re.compile(
     r"^(?P<name>[\w\-]+)((?P<operation>==|>=|>)"
     r"(?P<version>(\d+\.)?(\d+\.)?(\d+)?(dev)?[0-9]*))?$"
 )
 
-
 SIMPLE_NUMPY_TYPES = [
     nptype
     for type_cat, nptypes in np.sctypes.items()
@@ -66,8 +63,8 @@
 
 class SklearnExtension(Extension):
     """Connect scikit-learn to OpenML-Python.
-       The estimators which use this extension must be scikit-learn compatible,
-       i.e needs to be a subclass of sklearn.base.BaseEstimator".
+    The estimators which use this extension must be scikit-learn compatible,
+    i.e needs to be a subclass of sklearn.base.BaseEstimator".
     """
 
     ################################################################################################
@@ -107,7 +104,7 @@ def can_handle_model(cls, model: Any) -> bool:
     def trim_flow_name(
         cls, long_name: str, extra_trim_length: int = 100, _outer: bool = True
     ) -> str:
-        """ Shorten generated sklearn flow name to at most ``max_length`` characters.
+        """Shorten generated sklearn flow name to at most ``max_length`` characters.
 
         Flows are assumed to have the following naming structure:
         ``(model_selection)? (pipeline)? (steps)+``
@@ -223,7 +220,7 @@ def remove_all_in_parentheses(string: str) -> str:
 
     @classmethod
     def _min_dependency_str(cls, sklearn_version: str) -> str:
-        """ Returns a string containing the minimum dependencies for the sklearn version passed.
+        """Returns a string containing the minimum dependencies for the sklearn version passed.
 
         Parameters
         ----------
@@ -499,7 +496,7 @@ def _serialize_sklearn(self, o: Any, parent_model: Optional[Any] = None) -> Any:
                 rval = tuple(rval)
         elif isinstance(o, SIMPLE_TYPES) or o is None:
             if isinstance(o, tuple(SIMPLE_NUMPY_TYPES)):
-                o = o.item()
+                o = o.item()  # type: ignore
             # base parameter values
             rval = o
         elif isinstance(o, dict):
@@ -580,15 +577,11 @@ def _is_cross_validator(self, o: Any) -> bool:
 
     @classmethod
     def _is_sklearn_flow(cls, flow: OpenMLFlow) -> bool:
-        if getattr(flow, "dependencies", None) is not None and "sklearn" in flow.dependencies:
-            return True
-        if flow.external_version is None:
-            return False
-        else:
-            return (
-                flow.external_version.startswith("sklearn==")
-                or ",sklearn==" in flow.external_version
-            )
+        sklearn_dependency = isinstance(flow.dependencies, str) and "sklearn" in flow.dependencies
+        sklearn_as_external = isinstance(flow.external_version, str) and (
+            flow.external_version.startswith("sklearn==") or ",sklearn==" in flow.external_version
+        )
+        return sklearn_dependency or sklearn_as_external
 
     def _get_sklearn_description(self, model: Any, char_lim: int = 1024) -> str:
         """Fetches the sklearn function docstring for the flow description
@@ -858,7 +851,9 @@ def _get_tags(self) -> List[str]:
         ]
 
     def _get_external_version_string(
-        self, model: Any, sub_components: Dict[str, OpenMLFlow],
+        self,
+        model: Any,
+        sub_components: Dict[str, OpenMLFlow],
     ) -> str:
         # Create external version string for a flow, given the model and the
         # already parsed dictionary of sub_components. Retrieves the external
@@ -874,7 +869,8 @@ def _get_external_version_string(
             module = importlib.import_module(model_package_name)
             model_package_version_number = module.__version__  # type: ignore
             external_version = self._format_external_version(
-                model_package_name, model_package_version_number,
+                model_package_name,
+                model_package_version_number,
             )
             external_versions.add(external_version)
 
@@ -890,7 +886,9 @@ def _get_external_version_string(
         return ",".join(list(sorted(external_versions)))
 
     def _check_multiple_occurence_of_component_in_flow(
-        self, model: Any, sub_components: Dict[str, OpenMLFlow],
+        self,
+        model: Any,
+        sub_components: Dict[str, OpenMLFlow],
     ) -> None:
         to_visit_stack = []  # type: List[OpenMLFlow]
         to_visit_stack.extend(sub_components.values())
@@ -910,7 +908,8 @@ def _check_multiple_occurence_of_component_in_flow(
                 to_visit_stack.extend(visitee.components.values())
 
     def _extract_information_from_model(
-        self, model: Any,
+        self,
+        model: Any,
     ) -> Tuple[
         "OrderedDict[str, Optional[str]]",
         "OrderedDict[str, Optional[Dict]]",
@@ -936,7 +935,7 @@ def _extract_information_from_model(
             rval = self._serialize_sklearn(v, model)
 
             def flatten_all(list_):
-                """ Flattens arbitrary depth lists of lists (e.g. [[1,2],[3,[1]]] -> [1,2,3,1]). """
+                """Flattens arbitrary depth lists of lists (e.g. [[1,2],[3,[1]]] -> [1,2,3,1])."""
                 for el in list_:
                     if isinstance(el, (list, tuple)) and len(el) > 0:
                         yield from flatten_all(el)
@@ -1246,14 +1245,16 @@ def _check_dependencies(self, dependencies: str, strict_version: bool = True) ->
     def _serialize_type(self, o: Any) -> "OrderedDict[str, str]":
         mapping = {
             float: "float",
-            np.float: "np.float",  # type: ignore
             np.float32: "np.float32",
             np.float64: "np.float64",
             int: "int",
-            np.int: "np.int",  # type: ignore
             np.int32: "np.int32",
             np.int64: "np.int64",
         }
+        if LooseVersion(np.__version__) < "1.24":
+            mapping[np.float] = "np.float"
+            mapping[np.int] = "np.int"
+
         ret = OrderedDict()  # type: 'OrderedDict[str, str]'
         ret["oml-python:serialized_object"] = "type"
         ret["value"] = mapping[o]
@@ -1262,14 +1263,16 @@ def _serialize_type(self, o: Any) -> "OrderedDict[str, str]":
     def _deserialize_type(self, o: str) -> Any:
         mapping = {
             "float": float,
-            "np.float": np.float,  # type: ignore
             "np.float32": np.float32,
             "np.float64": np.float64,
             "int": int,
-            "np.int": np.int,  # type: ignore
             "np.int32": np.int32,
             "np.int64": np.int64,
         }
+        if LooseVersion(np.__version__) < "1.24":
+            mapping["np.float"] = np.float
+            mapping["np.int"] = np.int
+
         return mapping[o]
 
     def _serialize_rv_frozen(self, o: Any) -> "OrderedDict[str, Union[str, Dict]]":
@@ -1351,7 +1354,7 @@ def _serialize_cross_validator(self, o: Any) -> "OrderedDict[str, Union[str, Dic
                     # if the parameter is deprecated, don't show it
                     continue
 
-            if not (hasattr(value, "__len__") and len(value) == 0):
+            if not (isinstance(value, Sized) and len(value) == 0):
                 value = json.dumps(value)
                 parameters[key] = value
             else:
@@ -1381,7 +1384,9 @@ def _deserialize_cross_validator(
         return model_class(**parameters)
 
     def _format_external_version(
-        self, model_package_name: str, model_package_version_number: str,
+        self,
+        model_package_name: str,
+        model_package_version_number: str,
     ) -> str:
         return "%s==%s" % (model_package_name, model_package_version_number)
 
@@ -1530,7 +1535,7 @@ def _seed_current_object(current_value):
                 # statement) this way we guarantee that if a different set of
                 # subflows is seeded, the same number of the random generator is
                 # used
-                new_value = rs.randint(0, 2 ** 16)
+                new_value = rs.randint(0, 2**16)
                 if _seed_current_object(current_value):
                     random_states[param_name] = new_value
 
@@ -1540,7 +1545,7 @@ def _seed_current_object(current_value):
                     continue
 
                 current_value = model_params[param_name].random_state
-                new_value = rs.randint(0, 2 ** 16)
+                new_value = rs.randint(0, 2**16)
                 if _seed_current_object(current_value):
                     model_params[param_name].random_state = new_value
 
@@ -1777,7 +1782,8 @@ def _prediction_to_probabilities(
                     # for class 3 because the rest of the library expects that the
                     # probabilities are ordered the same way as the classes are ordered).
                     message = "Estimator only predicted for {}/{} classes!".format(
-                        proba_y.shape[1], len(task.class_labels),
+                        proba_y.shape[1],
+                        len(task.class_labels),
                     )
                     warnings.warn(message)
                     openml.config.logger.warning(message)
@@ -1815,7 +1821,9 @@ def _prediction_to_probabilities(
         return pred_y, proba_y, user_defined_measures, trace
 
     def obtain_parameter_values(
-        self, flow: "OpenMLFlow", model: Any = None,
+        self,
+        flow: "OpenMLFlow",
+        model: Any = None,
     ) -> List[Dict[str, Any]]:
         """Extracts all parameter settings required for the flow from the model.
 
@@ -1852,24 +1860,22 @@ def is_subcomponent_specification(values):
                 # checks whether the current value can be a specification of
                 # subcomponents, as for example the value for steps parameter
                 # (in Pipeline) or transformers parameter (in
-                # ColumnTransformer). These are always lists/tuples of lists/
-                # tuples, size bigger than 2 and an OpenMLFlow item involved.
-                if not isinstance(values, (tuple, list)):
-                    return False
-                for item in values:
-                    if not isinstance(item, (tuple, list)):
-                        return False
-                    if len(item) < 2:
-                        return False
-                    if not isinstance(item[1], (openml.flows.OpenMLFlow, str)):
-                        if (
+                # ColumnTransformer).
+                return (
+                    # Specification requires list/tuple of list/tuple with
+                    # at least length 2.
+                    isinstance(values, (tuple, list))
+                    and all(isinstance(item, (tuple, list)) and len(item) > 1 for item in values)
+                    # And each component needs to be a flow or interpretable string
+                    and all(
+                        isinstance(item[1], openml.flows.OpenMLFlow)
+                        or (
                             isinstance(item[1], str)
                             and item[1] in SKLEARN_PIPELINE_STRING_COMPONENTS
-                        ):
-                            pass
-                        else:
-                            return False
-                return True
+                        )
+                        for item in values
+                    )
+                )
 
             # _flow is openml flow object, _param dict maps from flow name to flow
             # id for the main call, the param dict can be overridden (useful for
@@ -2019,7 +2025,9 @@ def is_subcomponent_specification(values):
         return parameters
 
     def _openml_param_name_to_sklearn(
-        self, openml_parameter: openml.setups.OpenMLParameter, flow: OpenMLFlow,
+        self,
+        openml_parameter: openml.setups.OpenMLParameter,
+        flow: OpenMLFlow,
     ) -> str:
         """
         Converts the name of an OpenMLParameter into the sklean name, given a flow.
@@ -2068,7 +2076,9 @@ def _is_hpo_class(self, model: Any) -> bool:
         return isinstance(model, sklearn.model_selection._search.BaseSearchCV)
 
     def instantiate_model_from_hpo_class(
-        self, model: Any, trace_iteration: OpenMLTraceIteration,
+        self,
+        model: Any,
+        trace_iteration: OpenMLTraceIteration,
     ) -> Any:
         """Instantiate a ``base_estimator`` which can be searched over by the hyperparameter
         optimization model.
@@ -2114,7 +2124,11 @@ def _extract_trace_data(self, model, rep_no, fold_no):
             arff_tracecontent.append(arff_line)
         return arff_tracecontent
 
-    def _obtain_arff_trace(self, model: Any, trace_content: List,) -> "OpenMLRunTrace":
+    def _obtain_arff_trace(
+        self,
+        model: Any,
+        trace_content: List,
+    ) -> "OpenMLRunTrace":
         """Create arff trace object from a fitted model and the trace content obtained by
         repeatedly calling ``run_model_on_task``.
 
@@ -2176,4 +2190,7 @@ def _obtain_arff_trace(self, model: Any, trace_content: List,) -> "OpenMLRunTrac
                 attribute = (PREFIX + key[6:], type)
                 trace_attributes.append(attribute)
 
-        return OpenMLRunTrace.generate(trace_attributes, trace_content,)
+        return OpenMLRunTrace.generate(
+            trace_attributes,
+            trace_content,
+        )
diff --git a/openml/flows/__init__.py b/openml/flows/__init__.py
index 3642b9c56..f8d35c3f5 100644
--- a/openml/flows/__init__.py
+++ b/openml/flows/__init__.py
@@ -2,7 +2,14 @@
 
 from .flow import OpenMLFlow
 
-from .functions import get_flow, list_flows, flow_exists, get_flow_id, assert_flows_equal
+from .functions import (
+    get_flow,
+    list_flows,
+    flow_exists,
+    get_flow_id,
+    assert_flows_equal,
+    delete_flow,
+)
 
 __all__ = [
     "OpenMLFlow",
@@ -11,4 +18,5 @@
     "get_flow_id",
     "flow_exists",
     "assert_flows_equal",
+    "delete_flow",
 ]
diff --git a/openml/flows/flow.py b/openml/flows/flow.py
index 2a340e625..b9752e77c 100644
--- a/openml/flows/flow.py
+++ b/openml/flows/flow.py
@@ -174,7 +174,7 @@ def extension(self):
             )
 
     def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
-        """ Collect all information to display in the __repr__ body. """
+        """Collect all information to display in the __repr__ body."""
         fields = {
             "Flow Name": self.name,
             "Flow Description": self.description,
@@ -203,7 +203,7 @@ def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
         return [(key, fields[key]) for key in order if key in fields]
 
     def _to_dict(self) -> "OrderedDict[str, OrderedDict]":
-        """ Creates a dictionary representation of self. """
+        """Creates a dictionary representation of self."""
         flow_container = OrderedDict()  # type: 'OrderedDict[str, OrderedDict]'
         flow_dict = OrderedDict(
             [("@xmlns:oml", "http://openml.org/openml")]
@@ -297,7 +297,7 @@ def _from_dict(cls, xml_dict):
 
         Calls itself recursively to create :class:`OpenMLFlow` objects of
         subflows (components).
-        
+
         XML definition of a flow is available at
         https://github.com/openml/OpenML/blob/master/openml_OS/views/pages/api_new/v1/xsd/openml.implementation.upload.xsd
 
@@ -400,11 +400,11 @@ def from_filesystem(cls, input_directory) -> "OpenMLFlow":
         return OpenMLFlow._from_dict(xmltodict.parse(xml_string))
 
     def _parse_publish_response(self, xml_response: Dict):
-        """ Parse the id from the xml_response and assign it to self. """
+        """Parse the id from the xml_response and assign it to self."""
         self.flow_id = int(xml_response["oml:upload_flow"]["oml:id"])
 
     def publish(self, raise_error_if_exists: bool = False) -> "OpenMLFlow":
-        """ Publish this flow to OpenML server.
+        """Publish this flow to OpenML server.
 
         Raises a PyOpenMLError if the flow exists on the server, but
         `self.flow_id` does not match the server known flow id.
diff --git a/openml/flows/functions.py b/openml/flows/functions.py
index 048fa92a4..aea5cae6d 100644
--- a/openml/flows/functions.py
+++ b/openml/flows/functions.py
@@ -122,7 +122,8 @@ def _get_flow_description(flow_id: int) -> OpenMLFlow:
     except OpenMLCacheException:
 
         xml_file = os.path.join(
-            openml.utils._create_cache_directory_for_id(FLOWS_CACHE_DIR_NAME, flow_id), "flow.xml",
+            openml.utils._create_cache_directory_for_id(FLOWS_CACHE_DIR_NAME, flow_id),
+            "flow.xml",
         )
 
         flow_xml = openml._api_calls._perform_api_call("flow/%d" % flow_id, request_method="get")
@@ -253,19 +254,20 @@ def flow_exists(name: str, external_version: str) -> Union[int, bool]:
         raise ValueError("Argument 'version' should be a non-empty string")
 
     xml_response = openml._api_calls._perform_api_call(
-        "flow/exists", "post", data={"name": name, "external_version": external_version},
+        "flow/exists",
+        "post",
+        data={"name": name, "external_version": external_version},
     )
 
     result_dict = xmltodict.parse(xml_response)
     flow_id = int(result_dict["oml:flow_exists"]["oml:id"])
-    if flow_id > 0:
-        return flow_id
-    else:
-        return False
+    return flow_id if flow_id > 0 else False
 
 
 def get_flow_id(
-    model: Optional[Any] = None, name: Optional[str] = None, exact_version=True,
+    model: Optional[Any] = None,
+    name: Optional[str] = None,
+    exact_version=True,
 ) -> Union[int, bool, List[int]]:
     """Retrieves the flow id for a model or a flow name.
 
@@ -357,7 +359,7 @@ def __list_flows(api_call: str, output_format: str = "dict") -> Union[Dict, pd.D
 
 
 def _check_flow_for_server_id(flow: OpenMLFlow) -> None:
-    """ Raises a ValueError if the flow or any of its subflows has no flow id. """
+    """Raises a ValueError if the flow or any of its subflows has no flow id."""
 
     # Depth-first search to check if all components were uploaded to the
     # server before parsing the parameters
@@ -429,6 +431,9 @@ def assert_flows_equal(
         attr1 = getattr(flow1, key, None)
         attr2 = getattr(flow2, key, None)
         if key == "components":
+            if not (isinstance(attr1, Dict) and isinstance(attr2, Dict)):
+                raise TypeError("Cannot compare components because they are not dictionary.")
+
             for name in set(attr1.keys()).union(attr2.keys()):
                 if name not in attr1:
                     raise ValueError(
@@ -490,8 +495,8 @@ def assert_flows_equal(
                 # dictionary with keys specifying the parameter's 'description' and 'data_type'
                 # checking parameter descriptions can be ignored since that might change
                 # data type check can also be ignored if one of them is not defined, i.e., None
-                params1 = set(flow1.parameters_meta_info.keys())
-                params2 = set(flow2.parameters_meta_info.keys())
+                params1 = set(flow1.parameters_meta_info)
+                params2 = set(flow2.parameters_meta_info)
                 if params1 != params2:
                     raise ValueError(
                         "Parameter list in meta info for parameters differ " "in the two flows."
@@ -539,3 +544,22 @@ def _create_flow_from_xml(flow_xml: str) -> OpenMLFlow:
     """
 
     return OpenMLFlow._from_dict(xmltodict.parse(flow_xml))
+
+
+def delete_flow(flow_id: int) -> bool:
+    """Delete flow with id `flow_id` from the OpenML server.
+
+    You can only delete flows which you uploaded and which
+    which are not linked to runs.
+
+    Parameters
+    ----------
+    flow_id : int
+        OpenML id of the flow
+
+    Returns
+    -------
+    bool
+        True if the deletion was successful. False otherwise.
+    """
+    return openml.utils._delete_entity("flow", flow_id)
diff --git a/openml/runs/__init__.py b/openml/runs/__init__.py
index e917a57a5..2abbd8f29 100644
--- a/openml/runs/__init__.py
+++ b/openml/runs/__init__.py
@@ -12,6 +12,7 @@
     run_exists,
     initialize_model_from_run,
     initialize_model_from_trace,
+    delete_run,
 )
 
 __all__ = [
@@ -27,4 +28,5 @@
     "run_exists",
     "initialize_model_from_run",
     "initialize_model_from_trace",
+    "delete_run",
 ]
diff --git a/openml/runs/functions.py b/openml/runs/functions.py
index 8bbe3b956..d52b43add 100644
--- a/openml/runs/functions.py
+++ b/openml/runs/functions.py
@@ -155,7 +155,6 @@ def run_flow_on_task(
     dataset_format: str = "dataframe",
     n_jobs: Optional[int] = None,
 ) -> OpenMLRun:
-
     """Run the model provided by the flow on the dataset defined by task.
 
     Takes the flow and repeat information into account.
@@ -353,7 +352,10 @@ def initialize_model_from_run(run_id: int) -> Any:
 
 
 def initialize_model_from_trace(
-    run_id: int, repeat: int, fold: int, iteration: Optional[int] = None,
+    run_id: int,
+    repeat: int,
+    fold: int,
+    iteration: Optional[int] = None,
 ) -> Any:
     """
     Initialize a model based on the parameters that were set
@@ -461,7 +463,12 @@ def _run_task_get_arffcontent(
 
     jobs = []
     for n_fit, (rep_no, fold_no, sample_no) in enumerate(
-        itertools.product(range(num_reps), range(num_folds), range(num_samples),), start=1
+        itertools.product(
+            range(num_reps),
+            range(num_folds),
+            range(num_samples),
+        ),
+        start=1,
     ):
         jobs.append((n_fit, rep_no, fold_no, sample_no))
 
@@ -507,13 +514,13 @@ def _calculate_local_measure(sklearn_fn, openml_name):
                         else pred_y[i]
                     )
                     if isinstance(test_y, pd.Series):
-                        test_prediction = (
+                        truth = (
                             task.class_labels[test_y.iloc[i]]
                             if isinstance(test_y.iloc[i], int)
                             else test_y.iloc[i]
                         )
                     else:
-                        test_prediction = (
+                        truth = (
                             task.class_labels[test_y[i]]
                             if isinstance(test_y[i], (int, np.integer))
                             else test_y[i]
@@ -527,7 +534,7 @@ def _calculate_local_measure(sklearn_fn, openml_name):
                         sample=sample_no,
                         index=tst_idx,
                         prediction=prediction,
-                        truth=test_prediction,
+                        truth=truth,
                         proba=dict(zip(task.class_labels, pred_prob)),
                     )
                 else:
@@ -537,27 +544,29 @@ def _calculate_local_measure(sklearn_fn, openml_name):
 
             if add_local_measures:
                 _calculate_local_measure(
-                    sklearn.metrics.accuracy_score, "predictive_accuracy",
+                    sklearn.metrics.accuracy_score,
+                    "predictive_accuracy",
                 )
 
         elif isinstance(task, OpenMLRegressionTask):
 
             for i, _ in enumerate(test_indices):
-                test_prediction = test_y.iloc[i] if isinstance(test_y, pd.Series) else test_y[i]
+                truth = test_y.iloc[i] if isinstance(test_y, pd.Series) else test_y[i]
                 arff_line = format_prediction(
                     task=task,
                     repeat=rep_no,
                     fold=fold_no,
                     index=test_indices[i],
                     prediction=pred_y[i],
-                    truth=test_prediction,
+                    truth=truth,
                 )
 
                 arff_datacontent.append(arff_line)
 
             if add_local_measures:
                 _calculate_local_measure(
-                    sklearn.metrics.mean_absolute_error, "mean_absolute_error",
+                    sklearn.metrics.mean_absolute_error,
+                    "mean_absolute_error",
                 )
 
         elif isinstance(task, OpenMLClusteringTask):
@@ -910,9 +919,10 @@ def obtain_field(xml_obj, fieldname, from_server, cast=None):
         parameter_settings=parameters,
         dataset_id=dataset_id,
         output_files=files,
-        evaluations=evaluations,
-        fold_evaluations=fold_evaluations,
-        sample_evaluations=sample_evaluations,
+        # Make sure default values are used where needed to keep run objects identical
+        evaluations=evaluations or None,
+        fold_evaluations=fold_evaluations or None,
+        sample_evaluations=sample_evaluations or None,
         tags=tags,
         predictions_url=predictions_url,
         run_details=run_details,
@@ -921,7 +931,10 @@ def obtain_field(xml_obj, fieldname, from_server, cast=None):
 
 def _get_cached_run(run_id):
     """Load a run from the cache."""
-    run_cache_dir = openml.utils._create_cache_directory_for_id(RUNS_CACHE_DIR_NAME, run_id,)
+    run_cache_dir = openml.utils._create_cache_directory_for_id(
+        RUNS_CACHE_DIR_NAME,
+        run_id,
+    )
     try:
         run_file = os.path.join(run_cache_dir, "description.xml")
         with io.open(run_file, encoding="utf8") as fh:
@@ -1144,7 +1157,7 @@ def format_prediction(
     sample: Optional[int] = None,
     proba: Optional[Dict[str, float]] = None,
 ) -> List[Union[str, int, float]]:
-    """ Format the predictions in the specific order as required for the run results.
+    """Format the predictions in the specific order as required for the run results.
 
     Parameters
     ----------
@@ -1173,6 +1186,10 @@ def format_prediction(
     -------
     A list with elements for the prediction results of a run.
 
+    The returned order of the elements is (if available):
+        [repeat, fold, sample, index, prediction, truth, *probabilities]
+
+    This order follows the R Client API.
     """
     if isinstance(task, OpenMLClassificationTask):
         if proba is None:
@@ -1187,8 +1204,26 @@ def format_prediction(
             else:
                 sample = 0
         probabilities = [proba[c] for c in task.class_labels]
-        return [repeat, fold, sample, index, *probabilities, truth, prediction]
+        return [repeat, fold, sample, index, prediction, truth, *probabilities]
     elif isinstance(task, OpenMLRegressionTask):
-        return [repeat, fold, index, truth, prediction]
+        return [repeat, fold, index, prediction, truth]
     else:
         raise NotImplementedError(f"Formatting for {type(task)} is not supported.")
+
+
+def delete_run(run_id: int) -> bool:
+    """Delete run with id `run_id` from the OpenML server.
+
+    You can only delete runs which you uploaded.
+
+    Parameters
+    ----------
+    run_id : int
+        OpenML id of the run
+
+    Returns
+    -------
+    bool
+        True if the deletion was successful. False otherwise.
+    """
+    return openml.utils._delete_entity("run", run_id)
diff --git a/openml/runs/run.py b/openml/runs/run.py
index 4c1c9907d..5528c8a67 100644
--- a/openml/runs/run.py
+++ b/openml/runs/run.py
@@ -8,6 +8,7 @@
 
 import arff
 import numpy as np
+import pandas as pd
 
 import openml
 import openml._api_calls
@@ -25,39 +26,61 @@
 
 
 class OpenMLRun(OpenMLBase):
-    """OpenML Run: result of running a model on an openml dataset.
+    """OpenML Run: result of running a model on an OpenML dataset.
 
     Parameters
     ----------
     task_id: int
+        The ID of the OpenML task associated with the run.
     flow_id: int
+        The ID of the OpenML flow associated with the run.
     dataset_id: int
+        The ID of the OpenML dataset used for the run.
     setup_string: str
+        The setup string of the run.
     output_files: Dict[str, str]
-        A dictionary that specifies where each related file can be found.
+        Specifies where each related file can be found.
     setup_id: int
+        An integer representing the ID of the setup used for the run.
     tags: List[str]
+        Representing the tags associated with the run.
     uploader: int
         User ID of the uploader.
     uploader_name: str
+        The name of the person who uploaded the run.
     evaluations: Dict
+        Representing the evaluations of the run.
     fold_evaluations: Dict
+        The evaluations of the run for each fold.
     sample_evaluations: Dict
+        The evaluations of the run for each sample.
     data_content: List[List]
         The predictions generated from executing this run.
     trace: OpenMLRunTrace
+        The trace containing information on internal model evaluations of this run.
     model: object
+        The untrained model that was evaluated in the run.
     task_type: str
+        The type of the OpenML task associated with the run.
     task_evaluation_measure: str
+        The evaluation measure used for the task.
     flow_name: str
+        The name of the OpenML flow associated with the run.
     parameter_settings: List[OrderedDict]
+        Representing the parameter settings used for the run.
     predictions_url: str
+        The URL of the predictions file.
     task: OpenMLTask
+        An instance of the OpenMLTask class, representing the OpenML task associated
+        with the run.
     flow: OpenMLFlow
+        An instance of the OpenMLFlow class, representing the OpenML flow associated
+        with the run.
     run_id: int
+        The ID of the run.
     description_text: str, optional
-        Description text to add to the predictions file.
-        If left None, is set to the time the arff file is generated.
+        Description text to add to the predictions file. If left None, is set to the
+        time the arff file is generated.
     run_details: str, optional (default=None)
         Description of the run stored in the run meta-data.
     """
@@ -116,13 +139,59 @@ def __init__(
         self.predictions_url = predictions_url
         self.description_text = description_text
         self.run_details = run_details
+        self._predictions = None
+
+    @property
+    def predictions(self) -> pd.DataFrame:
+        """Return a DataFrame with predictions for this run"""
+        if self._predictions is None:
+            if self.data_content:
+                arff_dict = self._generate_arff_dict()
+            elif self.predictions_url:
+                arff_text = openml._api_calls._download_text_file(self.predictions_url)
+                arff_dict = arff.loads(arff_text)
+            else:
+                raise RuntimeError("Run has no predictions.")
+            self._predictions = pd.DataFrame(
+                arff_dict["data"], columns=[name for name, _ in arff_dict["attributes"]]
+            )
+        return self._predictions
 
     @property
     def id(self) -> Optional[int]:
         return self.run_id
 
+    def _evaluation_summary(self, metric: str) -> str:
+        """Summarizes the evaluation of a metric over all folds.
+
+        The fold scores for the metric must exist already. During run creation,
+        by default, the MAE for OpenMLRegressionTask and the accuracy for
+        OpenMLClassificationTask/OpenMLLearningCurveTasktasks are computed.
+
+        If repetition exist, we take the mean over all repetitions.
+
+        Parameters
+        ----------
+        metric: str
+            Name of an evaluation metric that was used to compute fold scores.
+
+        Returns
+        -------
+        metric_summary: str
+            A formatted string that displays the metric's evaluation summary.
+            The summary consists of the mean and std.
+        """
+        fold_score_lists = self.fold_evaluations[metric].values()
+
+        # Get the mean and std over all repetitions
+        rep_means = [np.mean(list(x.values())) for x in fold_score_lists]
+        rep_stds = [np.std(list(x.values())) for x in fold_score_lists]
+
+        return "{:.4f} +- {:.4f}".format(np.mean(rep_means), np.mean(rep_stds))
+
     def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
-        """ Collect all information to display in the __repr__ body. """
+        """Collect all information to display in the __repr__ body."""
+        # Set up fields
         fields = {
             "Uploader Name": self.uploader_name,
             "Metric": self.task_evaluation_measure,
@@ -138,6 +207,10 @@ def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
             "Dataset ID": self.dataset_id,
             "Dataset URL": openml.datasets.OpenMLDataset.url_for_id(self.dataset_id),
         }
+
+        # determines the order of the initial fields in which the information will be printed
+        order = ["Uploader Name", "Uploader Profile", "Metric", "Result"]
+
         if self.uploader is not None:
             fields["Uploader Profile"] = "{}/u/{}".format(
                 openml.config.get_server_base_url(), self.uploader
@@ -146,13 +219,29 @@ def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
             fields["Run URL"] = self.openml_url
         if self.evaluations is not None and self.task_evaluation_measure in self.evaluations:
             fields["Result"] = self.evaluations[self.task_evaluation_measure]
-
-        # determines the order in which the information will be printed
-        order = [
-            "Uploader Name",
-            "Uploader Profile",
-            "Metric",
-            "Result",
+        elif self.fold_evaluations is not None:
+            # -- Add locally computed summary values if possible
+            if "predictive_accuracy" in self.fold_evaluations:
+                # OpenMLClassificationTask; OpenMLLearningCurveTask
+                # default: predictive_accuracy
+                result_field = "Local Result - Accuracy (+- STD)"
+                fields[result_field] = self._evaluation_summary("predictive_accuracy")
+                order.append(result_field)
+            elif "mean_absolute_error" in self.fold_evaluations:
+                # OpenMLRegressionTask
+                # default: mean_absolute_error
+                result_field = "Local Result - MAE (+- STD)"
+                fields[result_field] = self._evaluation_summary("mean_absolute_error")
+                order.append(result_field)
+
+            if "usercpu_time_millis" in self.fold_evaluations:
+                # Runtime should be available for most tasks types
+                rt_field = "Local Runtime - ms (+- STD)"
+                fields[rt_field] = self._evaluation_summary("usercpu_time_millis")
+                order.append(rt_field)
+
+        # determines the remaining order
+        order += [
             "Run ID",
             "Run URL",
             "Task ID",
@@ -233,7 +322,11 @@ def from_filesystem(cls, directory: str, expect_model: bool = True) -> "OpenMLRu
 
         return run
 
-    def to_filesystem(self, directory: str, store_model: bool = True,) -> None:
+    def to_filesystem(
+        self,
+        directory: str,
+        store_model: bool = True,
+    ) -> None:
         """
         The inverse of the from_filesystem method. Serializes a run
         on the filesystem, to be uploaded later.
@@ -282,6 +375,8 @@ def _generate_arff_dict(self) -> "OrderedDict[str, Any]":
 
         Assumes that the run has been executed.
 
+        The order of the attributes follows the order defined by the Client API for R.
+
         Returns
         -------
         arf_dict : dict
@@ -315,11 +410,11 @@ def _generate_arff_dict(self) -> "OrderedDict[str, Any]":
             if class_labels is not None:
                 arff_dict["attributes"] = (
                     arff_dict["attributes"]
+                    + [("prediction", class_labels), ("correct", class_labels)]
                     + [
                         ("confidence." + class_labels[i], "NUMERIC")
                         for i in range(len(class_labels))
                     ]
-                    + [("prediction", class_labels), ("correct", class_labels)]
                 )
             else:
                 raise ValueError("The task has no class labels")
@@ -340,7 +435,7 @@ def _generate_arff_dict(self) -> "OrderedDict[str, Any]":
                 ]
                 prediction_and_true = [("prediction", class_labels), ("correct", class_labels)]
                 arff_dict["attributes"] = (
-                    arff_dict["attributes"] + prediction_confidences + prediction_and_true
+                    arff_dict["attributes"] + prediction_and_true + prediction_confidences
                 )
             else:
                 raise ValueError("The task has no class labels")
@@ -390,7 +485,8 @@ def get_metric_fn(self, sklearn_fn, kwargs=None):
             predictions_arff = self._generate_arff_dict()
         elif "predictions" in self.output_files:
             predictions_file_url = openml._api_calls._file_id_to_url(
-                self.output_files["predictions"], "predictions.arff",
+                self.output_files["predictions"],
+                "predictions.arff",
             )
             response = openml._api_calls._download_text_file(predictions_file_url)
             predictions_arff = arff.loads(response)
@@ -498,11 +594,11 @@ def _attribute_list_to_dict(attribute_list):
         return np.array(scores)
 
     def _parse_publish_response(self, xml_response: Dict):
-        """ Parse the id from the xml_response and assign it to self. """
+        """Parse the id from the xml_response and assign it to self."""
         self.run_id = int(xml_response["oml:upload_run"]["oml:run_id"])
 
     def _get_file_elements(self) -> Dict:
-        """ Get file_elements to upload to the server.
+        """Get file_elements to upload to the server.
 
         Derived child classes should overwrite this method as necessary.
         The description field will be populated automatically if not provided.
@@ -526,7 +622,8 @@ def _get_file_elements(self) -> Dict:
             if self.flow is None:
                 self.flow = openml.flows.get_flow(self.flow_id)
             self.parameter_settings = self.flow.extension.obtain_parameter_values(
-                self.flow, self.model,
+                self.flow,
+                self.model,
             )
 
         file_elements = {"description": ("description.xml", self._to_xml())}
@@ -541,7 +638,7 @@ def _get_file_elements(self) -> Dict:
         return file_elements
 
     def _to_dict(self) -> "OrderedDict[str, OrderedDict]":
-        """ Creates a dictionary representation of self. """
+        """Creates a dictionary representation of self."""
         description = OrderedDict()  # type: 'OrderedDict'
         description["oml:run"] = OrderedDict()
         description["oml:run"]["@xmlns:oml"] = "http://openml.org/openml"
diff --git a/openml/runs/trace.py b/openml/runs/trace.py
index 0c05b9dc8..0b8571fe5 100644
--- a/openml/runs/trace.py
+++ b/openml/runs/trace.py
@@ -1,6 +1,7 @@
 # License: BSD 3-Clause
 
 from collections import OrderedDict
+from dataclasses import dataclass
 import json
 import os
 from typing import List, Tuple, Optional  # noqa F401
@@ -331,7 +332,12 @@ def trace_from_xml(cls, xml):
                 )
 
             current = OpenMLTraceIteration(
-                repeat, fold, iteration, setup_string, evaluation, selected,
+                repeat=repeat,
+                fold=fold,
+                iteration=iteration,
+                setup_string=setup_string,
+                evaluation=evaluation,
+                selected=selected,
             )
             trace[(repeat, fold, iteration)] = current
 
@@ -372,7 +378,8 @@ def merge_traces(cls, traces: List["OpenMLRunTrace"]) -> "OpenMLRunTrace":
 
     def __repr__(self):
         return "[Run id: {}, {} trace iterations]".format(
-            -1 if self.run_id is None else self.run_id, len(self.trace_iterations),
+            -1 if self.run_id is None else self.run_id,
+            len(self.trace_iterations),
         )
 
     def __iter__(self):
@@ -380,8 +387,11 @@ def __iter__(self):
             yield val
 
 
-class OpenMLTraceIteration(object):
-    """OpenML Trace Iteration: parsed output from Run Trace call
+@dataclass
+class OpenMLTraceIteration:
+    """
+    OpenML Trace Iteration: parsed output from Run Trace call
+    Exactly one of `setup_string` or `parameters` must be provided.
 
     Parameters
     ----------
@@ -394,8 +404,9 @@ class OpenMLTraceIteration(object):
     iteration : int
         iteration number of optimization procedure
 
-    setup_string : str
+    setup_string : str, optional
         json string representing the parameters
+        If not provided, ``parameters`` should be set.
 
     evaluation : double
         The evaluation that was awarded to this trace iteration.
@@ -406,35 +417,37 @@ class OpenMLTraceIteration(object):
         selected for making predictions. Per fold/repeat there
         should be only one iteration selected
 
-    parameters : OrderedDict
+    parameters : OrderedDict, optional
+        Dictionary specifying parameter names and their values.
+        If not provided, ``setup_string`` should be set.
     """
 
-    def __init__(
-        self, repeat, fold, iteration, setup_string, evaluation, selected, parameters=None,
-    ):
+    repeat: int
+    fold: int
+    iteration: int
+
+    evaluation: float
+    selected: bool
+
+    setup_string: Optional[str] = None
+    parameters: Optional[OrderedDict] = None
 
-        if not isinstance(selected, bool):
-            raise TypeError(type(selected))
-        if setup_string and parameters:
+    def __post_init__(self):
+        # TODO: refactor into one argument of type <str | OrderedDict>
+        if self.setup_string and self.parameters:
             raise ValueError(
-                "Can only be instantiated with either " "setup_string or parameters argument."
+                "Can only be instantiated with either `setup_string` or `parameters` argument."
             )
-        elif not setup_string and not parameters:
-            raise ValueError("Either setup_string or parameters needs to be passed as " "argument.")
-        if parameters is not None and not isinstance(parameters, OrderedDict):
+        elif not (self.setup_string or self.parameters):
+            raise ValueError(
+                "Either `setup_string` or `parameters` needs to be passed as argument."
+            )
+        if self.parameters is not None and not isinstance(self.parameters, OrderedDict):
             raise TypeError(
                 "argument parameters is not an instance of OrderedDict, but %s"
-                % str(type(parameters))
+                % str(type(self.parameters))
             )
 
-        self.repeat = repeat
-        self.fold = fold
-        self.iteration = iteration
-        self.setup_string = setup_string
-        self.evaluation = evaluation
-        self.selected = selected
-        self.parameters = parameters
-
     def get_parameters(self):
         result = {}
         # parameters have prefix 'parameter_'
@@ -448,15 +461,3 @@ def get_parameters(self):
             for param, value in self.parameters.items():
                 result[param[len(PREFIX) :]] = value
         return result
-
-    def __repr__(self):
-        """
-        tmp string representation, will be changed in the near future
-        """
-        return "[(%d,%d,%d): %f (%r)]" % (
-            self.repeat,
-            self.fold,
-            self.iteration,
-            self.evaluation,
-            self.selected,
-        )
diff --git a/openml/setups/functions.py b/openml/setups/functions.py
index b418a6106..f4fab3219 100644
--- a/openml/setups/functions.py
+++ b/openml/setups/functions.py
@@ -42,7 +42,10 @@ def setup_exists(flow) -> int:
     # checks whether the flow exists on the server and flow ids align
     exists = flow_exists(flow.name, flow.external_version)
     if exists != flow.flow_id:
-        raise ValueError("This should not happen!")
+        raise ValueError(
+            f"Local flow id ({flow.id}) differs from server id ({exists}). "
+            "If this issue persists, please contact the developers."
+        )
 
     openml_param_settings = flow.extension.obtain_parameter_values(flow)
     description = xmltodict.unparse(_to_dict(flow.flow_id, openml_param_settings), pretty=True)
@@ -52,10 +55,7 @@ def setup_exists(flow) -> int:
     )
     result_dict = xmltodict.parse(result)
     setup_id = int(result_dict["oml:setup_exists"]["oml:id"])
-    if setup_id > 0:
-        return setup_id
-    else:
-        return False
+    return setup_id if setup_id > 0 else False
 
 
 def _get_cached_setup(setup_id):
@@ -175,7 +175,7 @@ def _list_setups(setup=None, output_format="object", **kwargs):
     Returns
     -------
     dict or dataframe
-        """
+    """
 
     api_call = "setup/list"
     if setup is not None:
diff --git a/openml/study/functions.py b/openml/study/functions.py
index ee877ddf2..ae257dd9c 100644
--- a/openml/study/functions.py
+++ b/openml/study/functions.py
@@ -3,7 +3,6 @@
 from typing import cast, Dict, List, Optional, Union
 import warnings
 
-import dateutil.parser
 import xmltodict
 import pandas as pd
 
@@ -31,7 +30,8 @@ def get_suite(suite_id: Union[int, str]) -> OpenMLBenchmarkSuite:
 
 
 def get_study(
-    study_id: Union[int, str], arg_for_backwards_compat: Optional[str] = None,
+    study_id: Union[int, str],
+    arg_for_backwards_compat: Optional[str] = None,
 ) -> OpenMLStudy:  # noqa F401
     """
     Retrieves all relevant information of an OpenML study from the server.
@@ -84,7 +84,8 @@ def _get_study(id_: Union[int, str], entity_type) -> BaseStudy:
     if entity_type != main_entity_type:
         raise ValueError(
             "Unexpected entity type '{}' reported by the server, expected '{}'".format(
-                main_entity_type, entity_type,
+                main_entity_type,
+                entity_type,
             )
         )
     benchmark_suite = (
@@ -94,7 +95,6 @@ def _get_study(id_: Union[int, str], entity_type) -> BaseStudy:
     description = result_dict["oml:description"]
     status = result_dict["oml:status"]
     creation_date = result_dict["oml:creation_date"]
-    creation_date_as_date = dateutil.parser.parse(creation_date)
     creator = result_dict["oml:creator"]
 
     # tags is legacy. remove once no longer needed.
@@ -106,35 +106,18 @@ def _get_study(id_: Union[int, str], entity_type) -> BaseStudy:
                 current_tag["window_start"] = tag["oml:window_start"]
             tags.append(current_tag)
 
-    if "oml:data" in result_dict:
-        datasets = [int(x) for x in result_dict["oml:data"]["oml:data_id"]]
-    else:
-        raise ValueError("No datasets attached to study {}!".format(id_))
-    if "oml:tasks" in result_dict:
-        tasks = [int(x) for x in result_dict["oml:tasks"]["oml:task_id"]]
-    else:
-        raise ValueError("No tasks attached to study {}!".format(id_))
+    def get_nested_ids_from_result_dict(key: str, subkey: str) -> Optional[List]:
+        if result_dict.get(key) is not None:
+            return [int(oml_id) for oml_id in result_dict[key][subkey]]
+        return None
 
-    if main_entity_type in ["runs", "run"]:
+    datasets = get_nested_ids_from_result_dict("oml:data", "oml:data_id")
+    tasks = get_nested_ids_from_result_dict("oml:tasks", "oml:task_id")
 
-        if "oml:flows" in result_dict:
-            flows = [int(x) for x in result_dict["oml:flows"]["oml:flow_id"]]
-        else:
-            raise ValueError("No flows attached to study {}!".format(id_))
-        if "oml:setups" in result_dict:
-            setups = [int(x) for x in result_dict["oml:setups"]["oml:setup_id"]]
-        else:
-            raise ValueError("No setups attached to study {}!".format(id_))
-        if "oml:runs" in result_dict:
-            runs = [
-                int(x) for x in result_dict["oml:runs"]["oml:run_id"]
-            ]  # type: Optional[List[int]]
-        else:
-            if creation_date_as_date < dateutil.parser.parse("2019-01-01"):
-                # Legacy studies did not require runs
-                runs = None
-            else:
-                raise ValueError("No runs attached to study {}!".format(id_))
+    if main_entity_type in ["runs", "run"]:
+        flows = get_nested_ids_from_result_dict("oml:flows", "oml:flow_id")
+        setups = get_nested_ids_from_result_dict("oml:setups", "oml:setup_id")
+        runs = get_nested_ids_from_result_dict("oml:runs", "oml:run_id")
 
         study = OpenMLStudy(
             study_id=study_id,
@@ -154,7 +137,7 @@ def _get_study(id_: Union[int, str], entity_type) -> BaseStudy:
         )  # type: BaseStudy
 
     elif main_entity_type in ["tasks", "task"]:
-
+        tasks = cast("List[int]", tasks)
         study = OpenMLBenchmarkSuite(
             suite_id=study_id,
             alias=alias,
@@ -177,9 +160,9 @@ def _get_study(id_: Union[int, str], entity_type) -> BaseStudy:
 def create_study(
     name: str,
     description: str,
-    run_ids: List[int],
-    alias: Optional[str],
-    benchmark_suite: Optional[int],
+    run_ids: Optional[List[int]] = None,
+    alias: Optional[str] = None,
+    benchmark_suite: Optional[int] = None,
 ) -> OpenMLStudy:
     """
     Creates an OpenML study (collection of data, tasks, flows, setups and run),
@@ -188,16 +171,19 @@ def create_study(
 
     Parameters
     ----------
-    alias : str (optional)
-        a string ID, unique on server (url-friendly)
     benchmark_suite : int (optional)
         the benchmark suite (another study) upon which this study is ran.
     name : str
         the name of the study (meta-info)
     description : str
         brief description (meta-info)
-    run_ids : list
-        a list of run ids associated with this study
+    run_ids : list, optional
+        a list of run ids associated with this study,
+        these can also be added later with ``attach_to_study``.
+    alias : str (optional)
+        a string ID, unique on server (url-friendly)
+    benchmark_suite: int (optional)
+        the ID of the suite for which this study contains run results
 
     Returns
     -------
@@ -217,13 +203,16 @@ def create_study(
         data=None,
         tasks=None,
         flows=None,
-        runs=run_ids,
+        runs=run_ids if run_ids != [] else None,
         setups=None,
     )
 
 
 def create_benchmark_suite(
-    name: str, description: str, task_ids: List[int], alias: Optional[str],
+    name: str,
+    description: str,
+    task_ids: List[int],
+    alias: Optional[str] = None,
 ) -> OpenMLBenchmarkSuite:
     """
     Creates an OpenML benchmark suite (collection of entity types, where
@@ -231,14 +220,15 @@ def create_benchmark_suite(
 
     Parameters
     ----------
-    alias : str (optional)
-        a string ID, unique on server (url-friendly)
     name : str
         the name of the study (meta-info)
     description : str
         brief description (meta-info)
     task_ids : list
         a list of task ids associated with this study
+        more can be added later with ``attach_to_suite``.
+    alias : str (optional)
+        a string ID, unique on server (url-friendly)
 
     Returns
     -------
diff --git a/openml/study/study.py b/openml/study/study.py
index dbbef6e89..0cdc913f9 100644
--- a/openml/study/study.py
+++ b/openml/study/study.py
@@ -99,7 +99,7 @@ def id(self) -> Optional[int]:
         return self.study_id
 
     def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
-        """ Collect all information to display in the __repr__ body. """
+        """Collect all information to display in the __repr__ body."""
         fields = {
             "Name": self.name,
             "Status": self.status,
@@ -138,11 +138,11 @@ def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
         return [(key, fields[key]) for key in order if key in fields]
 
     def _parse_publish_response(self, xml_response: Dict):
-        """ Parse the id from the xml_response and assign it to self. """
+        """Parse the id from the xml_response and assign it to self."""
         self.study_id = int(xml_response["oml:study_upload"]["oml:id"])
 
     def _to_dict(self) -> "OrderedDict[str, OrderedDict]":
-        """ Creates a dictionary representation of self. """
+        """Creates a dictionary representation of self."""
         # some can not be uploaded, e.g., id, creator, creation_date
         simple_props = ["alias", "main_entity_type", "name", "description"]
         # maps from attribute name (which is used as outer tag name) to immer
diff --git a/openml/tasks/__init__.py b/openml/tasks/__init__.py
index cba0aa14f..a5d578d2d 100644
--- a/openml/tasks/__init__.py
+++ b/openml/tasks/__init__.py
@@ -15,6 +15,7 @@
     get_task,
     get_tasks,
     list_tasks,
+    delete_task,
 )
 
 __all__ = [
@@ -30,4 +31,5 @@
     "list_tasks",
     "OpenMLSplit",
     "TaskType",
+    "delete_task",
 ]
diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py
index 2c5a56ad7..964277760 100644
--- a/openml/tasks/functions.py
+++ b/openml/tasks/functions.py
@@ -90,7 +90,7 @@ def _get_estimation_procedure_list():
     procs_dict = xmltodict.parse(xml_string)
     # Minimalistic check if the XML is useful
     if "oml:estimationprocedures" not in procs_dict:
-        raise ValueError("Error in return XML, does not contain tag " "oml:estimationprocedures.")
+        raise ValueError("Error in return XML, does not contain tag oml:estimationprocedures.")
     elif "@xmlns:oml" not in procs_dict["oml:estimationprocedures"]:
         raise ValueError(
             "Error in return XML, does not contain tag "
@@ -106,10 +106,19 @@ def _get_estimation_procedure_list():
 
     procs = []
     for proc_ in procs_dict["oml:estimationprocedures"]["oml:estimationprocedure"]:
+        task_type_int = int(proc_["oml:ttid"])
+        try:
+            task_type_id = TaskType(task_type_int)
+        except ValueError as e:
+            warnings.warn(
+                f"Could not create task type id for {task_type_int} due to error {e}",
+                RuntimeWarning,
+            )
+            continue
         procs.append(
             {
                 "id": int(proc_["oml:id"]),
-                "task_type_id": TaskType(int(proc_["oml:ttid"])),
+                "task_type_id": task_type_id,
                 "name": proc_["oml:name"],
                 "type": proc_["oml:type"],
             }
@@ -124,7 +133,7 @@ def list_tasks(
     size: Optional[int] = None,
     tag: Optional[str] = None,
     output_format: str = "dict",
-    **kwargs
+    **kwargs,
 ) -> Union[Dict, pd.DataFrame]:
     """
     Return a number of tasks having the given tag and task_type
@@ -135,15 +144,7 @@ def list_tasks(
     it is used as task_type in the task description, but it is named
     type when used as a filter in list tasks call.
     task_type : TaskType, optional
-        ID of the task type as detailed `here <https://www.openml.org/search?type=task_type>`_.
-        - Supervised classification: 1
-        - Supervised regression: 2
-        - Learning curve: 3
-        - Supervised data stream classification: 4
-        - Clustering: 5
-        - Machine Learning Challenge: 6
-        - Survival Analysis: 7
-        - Subgroup Discovery: 8
+        Refers to the type of task.
     offset : int, optional
         the number of tasks to skip, starting from the first
     size : int, optional
@@ -183,7 +184,7 @@ def list_tasks(
         offset=offset,
         size=size,
         tag=tag,
-        **kwargs
+        **kwargs,
     )
 
 
@@ -196,16 +197,7 @@ def _list_tasks(task_type=None, output_format="dict", **kwargs):
     it is used as task_type in the task description, but it is named
     type when used as a filter in list tasks call.
     task_type : TaskType, optional
-        ID of the task type as detailed
-        `here <https://www.openml.org/search?type=task_type>`_.
-        - Supervised classification: 1
-        - Supervised regression: 2
-        - Learning curve: 3
-        - Supervised data stream classification: 4
-        - Clustering: 5
-        - Machine Learning Challenge: 6
-        - Survival Analysis: 7
-        - Subgroup Discovery: 8
+        Refers to the type of task.
     output_format: str, optional (default='dict')
         The parameter decides the format of the output.
         - If 'dict' the output is a dict of dict
@@ -257,9 +249,18 @@ def __list_tasks(api_call, output_format="dict"):
         tid = None
         try:
             tid = int(task_["oml:task_id"])
+            task_type_int = int(task_["oml:task_type_id"])
+            try:
+                task_type_id = TaskType(task_type_int)
+            except ValueError as e:
+                warnings.warn(
+                    f"Could not create task type id for {task_type_int} due to error {e}",
+                    RuntimeWarning,
+                )
+                continue
             task = {
                 "tid": tid,
-                "ttid": TaskType(int(task_["oml:task_type_id"])),
+                "ttid": task_type_id,
                 "did": int(task_["oml:did"]),
                 "name": task_["oml:name"],
                 "task_type": task_["oml:task_type"],
@@ -347,14 +348,20 @@ def get_task(
     task
     """
     if not isinstance(task_id, int):
-        warnings.warn("Task id must be specified as `int` from 0.14.0 onwards.", DeprecationWarning)
+        warnings.warn(
+            "Task id must be specified as `int` from 0.14.0 onwards.",
+            DeprecationWarning,
+        )
 
     try:
         task_id = int(task_id)
     except (ValueError, TypeError):
         raise ValueError("Dataset ID is neither an Integer nor can be cast to an Integer.")
 
-    tid_cache_dir = openml.utils._create_cache_directory_for_id(TASKS_CACHE_DIR_NAME, task_id,)
+    tid_cache_dir = openml.utils._create_cache_directory_for_id(
+        TASKS_CACHE_DIR_NAME,
+        task_id,
+    )
 
     try:
         task = _get_task_description(task_id)
@@ -371,7 +378,8 @@ def get_task(
                 task.download_split()
     except Exception as e:
         openml.utils._remove_cache_dir_for_id(
-            TASKS_CACHE_DIR_NAME, tid_cache_dir,
+            TASKS_CACHE_DIR_NAME,
+            tid_cache_dir,
         )
         raise e
 
@@ -384,7 +392,11 @@ def _get_task_description(task_id):
         return _get_cached_task(task_id)
     except OpenMLCacheException:
         xml_file = os.path.join(
-            openml.utils._create_cache_directory_for_id(TASKS_CACHE_DIR_NAME, task_id,), "task.xml",
+            openml.utils._create_cache_directory_for_id(
+                TASKS_CACHE_DIR_NAME,
+                task_id,
+            ),
+            "task.xml",
         )
         task_xml = openml._api_calls._perform_api_call("task/%d" % task_id, "get")
 
@@ -475,9 +487,12 @@ def create_task(
     estimation_procedure_id: int,
     target_name: Optional[str] = None,
     evaluation_measure: Optional[str] = None,
-    **kwargs
+    **kwargs,
 ) -> Union[
-    OpenMLClassificationTask, OpenMLRegressionTask, OpenMLLearningCurveTask, OpenMLClusteringTask
+    OpenMLClassificationTask,
+    OpenMLRegressionTask,
+    OpenMLLearningCurveTask,
+    OpenMLClusteringTask,
 ]:
     """Create a task based on different given attributes.
 
@@ -528,5 +543,24 @@ def create_task(
             target_name=target_name,
             estimation_procedure_id=estimation_procedure_id,
             evaluation_measure=evaluation_measure,
-            **kwargs
+            **kwargs,
         )
+
+
+def delete_task(task_id: int) -> bool:
+    """Delete task with id `task_id` from the OpenML server.
+
+    You can only delete tasks which you created and have
+    no runs associated with them.
+
+    Parameters
+    ----------
+    task_id : int
+        OpenML id of the task
+
+    Returns
+    -------
+    bool
+        True if the deletion was successful. False otherwise.
+    """
+    return openml.utils._delete_entity("task", task_id)
diff --git a/openml/tasks/split.py b/openml/tasks/split.py
index 515be895a..dc496ef7d 100644
--- a/openml/tasks/split.py
+++ b/openml/tasks/split.py
@@ -14,11 +14,11 @@
 class OpenMLSplit(object):
     """OpenML Split object.
 
-       Parameters
-       ----------
-       name : int or str
-       description : str
-       split : dict
+    Parameters
+    ----------
+    name : int or str
+    description : str
+    split : dict
     """
 
     def __init__(self, name, description, split):
@@ -47,12 +47,10 @@ def __eq__(self, other):
             or self.name != other.name
             or self.description != other.description
             or self.split.keys() != other.split.keys()
-        ):
-            return False
-
-        if any(
-            self.split[repetition].keys() != other.split[repetition].keys()
-            for repetition in self.split
+            or any(
+                self.split[repetition].keys() != other.split[repetition].keys()
+                for repetition in self.split
+            )
         ):
             return False
 
diff --git a/openml/tasks/task.py b/openml/tasks/task.py
index 6a1f2a4c5..14a85357b 100644
--- a/openml/tasks/task.py
+++ b/openml/tasks/task.py
@@ -34,16 +34,16 @@ class TaskType(Enum):
 class OpenMLTask(OpenMLBase):
     """OpenML Task object.
 
-       Parameters
-       ----------
-       task_type_id : TaskType
-           Refers to the type of task.
-       task_type : str
-           Refers to the task.
-       data_set_id: int
-           Refers to the data.
-       estimation_procedure_id: int
-           Refers to the type of estimates used.
+    Parameters
+    ----------
+    task_type_id : TaskType
+        Refers to the type of task.
+    task_type : str
+        Refers to the task.
+    data_set_id: int
+        Refers to the data.
+    estimation_procedure_id: int
+        Refers to the type of estimates used.
     """
 
     def __init__(
@@ -82,7 +82,7 @@ def id(self) -> Optional[int]:
         return self.task_id
 
     def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
-        """ Collect all information to display in the __repr__ body. """
+        """Collect all information to display in the __repr__ body."""
         fields = {
             "Task Type Description": "{}/tt/{}".format(
                 openml.config.get_server_base_url(), self.task_type_id
@@ -97,7 +97,7 @@ def _get_repr_body_fields(self) -> List[Tuple[str, Union[str, int, List[str]]]]:
             fields["Estimation Procedure"] = self.estimation_procedure["type"]
         if getattr(self, "target_name", None) is not None:
             fields["Target Feature"] = getattr(self, "target_name")
-            if hasattr(self, "class_labels"):
+            if hasattr(self, "class_labels") and getattr(self, "class_labels") is not None:
                 fields["# of Classes"] = len(getattr(self, "class_labels"))
             if hasattr(self, "cost_matrix"):
                 fields["Cost Matrix"] = "Available"
@@ -120,14 +120,21 @@ def get_dataset(self) -> datasets.OpenMLDataset:
         return datasets.get_dataset(self.dataset_id)
 
     def get_train_test_split_indices(
-        self, fold: int = 0, repeat: int = 0, sample: int = 0,
+        self,
+        fold: int = 0,
+        repeat: int = 0,
+        sample: int = 0,
     ) -> Tuple[np.ndarray, np.ndarray]:
 
         # Replace with retrieve from cache
         if self.split is None:
             self.split = self.download_split()
 
-        train_indices, test_indices = self.split.get(repeat=repeat, fold=fold, sample=sample,)
+        train_indices, test_indices = self.split.get(
+            repeat=repeat,
+            fold=fold,
+            sample=sample,
+        )
         return train_indices, test_indices
 
     def _download_split(self, cache_file: str):
@@ -137,14 +144,15 @@ def _download_split(self, cache_file: str):
         except (OSError, IOError):
             split_url = self.estimation_procedure["data_splits_url"]
             openml._api_calls._download_text_file(
-                source=str(split_url), output_path=cache_file,
+                source=str(split_url),
+                output_path=cache_file,
             )
 
     def download_split(self) -> OpenMLSplit:
-        """Download the OpenML split for a given task.
-        """
+        """Download the OpenML split for a given task."""
         cached_split_file = os.path.join(
-            _create_cache_directory_for_id("tasks", self.task_id), "datasplits.arff",
+            _create_cache_directory_for_id("tasks", self.task_id),
+            "datasplits.arff",
         )
 
         try:
@@ -164,11 +172,11 @@ def get_split_dimensions(self) -> Tuple[int, int, int]:
         return self.split.repeats, self.split.folds, self.split.samples
 
     def _to_dict(self) -> "OrderedDict[str, OrderedDict]":
-        """ Creates a dictionary representation of self. """
+        """Creates a dictionary representation of self."""
         task_container = OrderedDict()  # type: OrderedDict[str, OrderedDict]
         task_dict = OrderedDict(
             [("@xmlns:oml", "http://openml.org/openml")]
-        )  # type: OrderedDict[str, Union[List, str, TaskType]]
+        )  # type: OrderedDict[str, Union[List, str, int]]
 
         task_container["oml:task_inputs"] = task_dict
         task_dict["oml:task_type_id"] = self.task_type_id.value
@@ -192,17 +200,17 @@ def _to_dict(self) -> "OrderedDict[str, OrderedDict]":
         return task_container
 
     def _parse_publish_response(self, xml_response: Dict):
-        """ Parse the id from the xml_response and assign it to self. """
+        """Parse the id from the xml_response and assign it to self."""
         self.task_id = int(xml_response["oml:upload_task"]["oml:id"])
 
 
 class OpenMLSupervisedTask(OpenMLTask, ABC):
     """OpenML Supervised Classification object.
 
-       Parameters
-       ----------
-       target_name : str
-           Name of the target feature (the class variable).
+    Parameters
+    ----------
+    target_name : str
+        Name of the target feature (the class variable).
     """
 
     def __init__(
@@ -233,7 +241,8 @@ def __init__(
         self.target_name = target_name
 
     def get_X_and_y(
-        self, dataset_format: str = "array",
+        self,
+        dataset_format: str = "array",
     ) -> Tuple[
         Union[np.ndarray, pd.DataFrame, scipy.sparse.spmatrix], Union[np.ndarray, pd.Series]
     ]:
@@ -257,7 +266,10 @@ def get_X_and_y(
             TaskType.LEARNING_CURVE,
         ):
             raise NotImplementedError(self.task_type)
-        X, y, _, _ = dataset.get_data(dataset_format=dataset_format, target=self.target_name,)
+        X, y, _, _ = dataset.get_data(
+            dataset_format=dataset_format,
+            target=self.target_name,
+        )
         return X, y
 
     def _to_dict(self) -> "OrderedDict[str, OrderedDict]":
@@ -291,10 +303,10 @@ def estimation_parameters(self, est_parameters):
 class OpenMLClassificationTask(OpenMLSupervisedTask):
     """OpenML Classification object.
 
-       Parameters
-       ----------
-       class_labels : List of str (optional)
-       cost_matrix: array (optional)
+    Parameters
+    ----------
+    class_labels : List of str (optional)
+    cost_matrix: array (optional)
     """
 
     def __init__(
@@ -333,8 +345,7 @@ def __init__(
 
 
 class OpenMLRegressionTask(OpenMLSupervisedTask):
-    """OpenML Regression object.
-    """
+    """OpenML Regression object."""
 
     def __init__(
         self,
@@ -366,11 +377,11 @@ def __init__(
 class OpenMLClusteringTask(OpenMLTask):
     """OpenML Clustering object.
 
-       Parameters
-       ----------
-       target_name : str (optional)
-           Name of the target feature (class) that is not part of the
-           feature set for the clustering task.
+    Parameters
+    ----------
+    target_name : str (optional)
+        Name of the target feature (class) that is not part of the
+        feature set for the clustering task.
     """
 
     def __init__(
@@ -401,7 +412,8 @@ def __init__(
         self.target_name = target_name
 
     def get_X(
-        self, dataset_format: str = "array",
+        self,
+        dataset_format: str = "array",
     ) -> Union[np.ndarray, pd.DataFrame, scipy.sparse.spmatrix]:
         """Get data associated with the current task.
 
@@ -417,7 +429,10 @@ def get_X(
 
         """
         dataset = self.get_dataset()
-        data, *_ = dataset.get_data(dataset_format=dataset_format, target=None,)
+        data, *_ = dataset.get_data(
+            dataset_format=dataset_format,
+            target=None,
+        )
         return data
 
     def _to_dict(self) -> "OrderedDict[str, OrderedDict]":
@@ -442,8 +457,7 @@ def _to_dict(self) -> "OrderedDict[str, OrderedDict]":
 
 
 class OpenMLLearningCurveTask(OpenMLClassificationTask):
-    """OpenML Learning Curve object.
-    """
+    """OpenML Learning Curve object."""
 
     def __init__(
         self,
diff --git a/openml/testing.py b/openml/testing.py
index 922d373b2..4e2f0c006 100644
--- a/openml/testing.py
+++ b/openml/testing.py
@@ -3,12 +3,14 @@
 import hashlib
 import inspect
 import os
+import pathlib
 import shutil
 import sys
 import time
 from typing import Dict, Union, cast
 import unittest
 import pandas as pd
+import requests
 
 import openml
 from openml.tasks import TaskType
@@ -114,7 +116,7 @@ def tearDown(self):
 
     @classmethod
     def _mark_entity_for_removal(self, entity_type, entity_id):
-        """ Static record of entities uploaded to test server
+        """Static record of entities uploaded to test server
 
         Dictionary of lists where the keys are 'entity_type'.
         Each such dictionary is a list of integer IDs.
@@ -128,7 +130,7 @@ def _mark_entity_for_removal(self, entity_type, entity_id):
 
     @classmethod
     def _delete_entity_from_tracker(self, entity_type, entity):
-        """ Deletes entity records from the static file_tracker
+        """Deletes entity records from the static file_tracker
 
         Given an entity type and corresponding ID, deletes all entries, including
         duplicate entries of the ID for the entity type.
@@ -306,4 +308,22 @@ class CustomImputer(SimpleImputer):
     pass
 
 
-__all__ = ["TestBase", "SimpleImputer", "CustomImputer", "check_task_existence"]
+def create_request_response(
+    *, status_code: int, content_filepath: pathlib.Path
+) -> requests.Response:
+    with open(content_filepath, "r") as xml_response:
+        response_body = xml_response.read()
+
+    response = requests.Response()
+    response.status_code = status_code
+    response._content = response_body.encode()
+    return response
+
+
+__all__ = [
+    "TestBase",
+    "SimpleImputer",
+    "CustomImputer",
+    "check_task_existence",
+    "create_request_response",
+]
diff --git a/openml/utils.py b/openml/utils.py
index a482bf0bc..3c2fa876f 100644
--- a/openml/utils.py
+++ b/openml/utils.py
@@ -71,7 +71,7 @@ def extract_xml_tags(xml_tag_name, node, allow_none=True):
 
 
 def _get_rest_api_type_alias(oml_object: "OpenMLBase") -> str:
-    """ Return the alias of the openml entity as it is defined for the REST API. """
+    """Return the alias of the openml entity as it is defined for the REST API."""
     rest_api_mapping = [
         (openml.datasets.OpenMLDataset, "data"),
         (openml.flows.OpenMLFlow, "flow"),
@@ -172,12 +172,42 @@ def _delete_entity(entity_type, entity_id):
         raise ValueError("Can't delete a %s" % entity_type)
 
     url_suffix = "%s/%d" % (entity_type, entity_id)
-    result_xml = openml._api_calls._perform_api_call(url_suffix, "delete")
-    result = xmltodict.parse(result_xml)
-    if "oml:%s_delete" % entity_type in result:
-        return True
-    else:
-        return False
+    try:
+        result_xml = openml._api_calls._perform_api_call(url_suffix, "delete")
+        result = xmltodict.parse(result_xml)
+        return f"oml:{entity_type}_delete" in result
+    except openml.exceptions.OpenMLServerException as e:
+        # https://github.com/openml/OpenML/blob/21f6188d08ac24fcd2df06ab94cf421c946971b0/openml_OS/views/pages/api_new/v1/xml/pre.php
+        # Most exceptions are descriptive enough to be raised as their standard
+        # OpenMLServerException, however there are two cases where we add information:
+        #  - a generic "failed" message, we direct them to the right issue board
+        #  - when the user successfully authenticates with the server,
+        #    but user is not allowed to take the requested action,
+        #    in which case we specify a OpenMLNotAuthorizedError.
+        by_other_user = [323, 353, 393, 453, 594]
+        has_dependent_entities = [324, 326, 327, 328, 354, 454, 464, 595]
+        unknown_reason = [325, 355, 394, 455, 593]
+        if e.code in by_other_user:
+            raise openml.exceptions.OpenMLNotAuthorizedError(
+                message=(
+                    f"The {entity_type} can not be deleted because it was not uploaded by you."
+                ),
+            ) from e
+        if e.code in has_dependent_entities:
+            raise openml.exceptions.OpenMLNotAuthorizedError(
+                message=(
+                    f"The {entity_type} can not be deleted because "
+                    f"it still has associated entities: {e.message}"
+                )
+            ) from e
+        if e.code in unknown_reason:
+            raise openml.exceptions.OpenMLServerError(
+                message=(
+                    f"The {entity_type} can not be deleted for unknown reason,"
+                    " please open an issue at: https://github.com/openml/openml/issues/new"
+                ),
+            ) from e
+        raise
 
 
 def _list_all(listing_call, output_format="dict", *args, **filters):
diff --git a/setup.py b/setup.py
index f5e70abb5..9f3cdd0e6 100644
--- a/setup.py
+++ b/setup.py
@@ -40,7 +40,8 @@
     # Make sure to remove stale files such as the egg-info before updating this:
     # https://stackoverflow.com/a/26547314
     packages=setuptools.find_packages(
-        include=["openml.*", "openml"], exclude=["*.tests", "*.tests.*", "tests.*", "tests"],
+        include=["openml.*", "openml"],
+        exclude=["*.tests", "*.tests.*", "tests.*", "tests"],
     ),
     package_data={"": ["*.txt", "*.md", "py.typed"]},
     python_requires=">=3.6",
@@ -84,7 +85,12 @@
             "seaborn",
         ],
         "examples_unix": ["fanova"],
-        "docs": ["sphinx>=3", "sphinx-gallery", "sphinx_bootstrap_theme", "numpydoc",],
+        "docs": [
+            "sphinx>=3",
+            "sphinx-gallery",
+            "sphinx_bootstrap_theme",
+            "numpydoc",
+        ],
     },
     test_suite="pytest",
     classifiers=[
diff --git a/tests/conftest.py b/tests/conftest.py
index c1f728a72..43e2cc3ee 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -24,6 +24,7 @@
 
 import os
 import logging
+import pathlib
 from typing import List
 import pytest
 
@@ -38,7 +39,7 @@
 
 
 def worker_id() -> str:
-    """ Returns the name of the worker process owning this function call.
+    """Returns the name of the worker process owning this function call.
 
     :return: str
         Possible outputs from the set of {'master', 'gw0', 'gw1', ..., 'gw(n-1)'}
@@ -51,26 +52,20 @@ def worker_id() -> str:
         return "master"
 
 
-def read_file_list() -> List[str]:
+def read_file_list() -> List[pathlib.Path]:
     """Returns a list of paths to all files that currently exist in 'openml/tests/files/'
 
-    :return: List[str]
+    :return: List[pathlib.Path]
     """
-    this_dir = os.path.abspath(os.path.dirname(os.path.abspath(__file__)))
-    directory = os.path.join(this_dir, "..")
-    logger.info("Collecting file lists from: {}".format(directory))
-    file_list = []
-    for root, _, filenames in os.walk(directory):
-        for filename in filenames:
-            file_list.append(os.path.join(root, filename))
-    return file_list
+    test_files_dir = pathlib.Path(__file__).parent / "files"
+    return [f for f in test_files_dir.rglob("*") if f.is_file()]
 
 
-def compare_delete_files(old_list, new_list) -> None:
+def compare_delete_files(old_list: List[pathlib.Path], new_list: List[pathlib.Path]) -> None:
     """Deletes files that are there in the new_list but not in the old_list
 
-    :param old_list: List[str]
-    :param new_list: List[str]
+    :param old_list: List[pathlib.Path]
+    :param new_list: List[pathlib.Path]
     :return: None
     """
     file_list = list(set(new_list) - set(old_list))
@@ -174,6 +169,10 @@ def pytest_sessionfinish() -> None:
     logger.info("{} is killed".format(worker))
 
 
+def pytest_configure(config):
+    config.addinivalue_line("markers", "sklearn: marks tests that use scikit-learn")
+
+
 def pytest_addoption(parser):
     parser.addoption(
         "--long",
@@ -186,3 +185,13 @@ def pytest_addoption(parser):
 @pytest.fixture(scope="class")
 def long_version(request):
     request.cls.long_version = request.config.getoption("--long")
+
+
+@pytest.fixture
+def test_files_directory() -> pathlib.Path:
+    return pathlib.Path(__file__).parent / "files"
+
+
+@pytest.fixture()
+def test_api_key() -> str:
+    return "c0c42819af31e706efe1f4b88c23c6c1"
diff --git a/tests/files/mock_responses/datasets/data_delete_has_tasks.xml b/tests/files/mock_responses/datasets/data_delete_has_tasks.xml
new file mode 100644
index 000000000..fc866047c
--- /dev/null
+++ b/tests/files/mock_responses/datasets/data_delete_has_tasks.xml
@@ -0,0 +1,4 @@
+<oml:error xmlns:oml="http://openml.org/openml">
+	<oml:code>354</oml:code>
+	<oml:message>Dataset is in use by other content. Can not be deleted</oml:message>
+</oml:error>
diff --git a/tests/files/mock_responses/datasets/data_delete_not_exist.xml b/tests/files/mock_responses/datasets/data_delete_not_exist.xml
new file mode 100644
index 000000000..b3b212fbe
--- /dev/null
+++ b/tests/files/mock_responses/datasets/data_delete_not_exist.xml
@@ -0,0 +1,4 @@
+<oml:error xmlns:oml="http://openml.org/openml">
+	<oml:code>352</oml:code>
+	<oml:message>Dataset does not exist</oml:message>
+</oml:error>
diff --git a/tests/files/mock_responses/datasets/data_delete_not_owned.xml b/tests/files/mock_responses/datasets/data_delete_not_owned.xml
new file mode 100644
index 000000000..7d412d48e
--- /dev/null
+++ b/tests/files/mock_responses/datasets/data_delete_not_owned.xml
@@ -0,0 +1,4 @@
+<oml:error xmlns:oml="http://openml.org/openml">
+	<oml:code>353</oml:code>
+	<oml:message>Dataset is not owned by you</oml:message>
+</oml:error>
\ No newline at end of file
diff --git a/tests/files/mock_responses/datasets/data_delete_successful.xml b/tests/files/mock_responses/datasets/data_delete_successful.xml
new file mode 100644
index 000000000..9df47c1a2
--- /dev/null
+++ b/tests/files/mock_responses/datasets/data_delete_successful.xml
@@ -0,0 +1,3 @@
+<oml:data_delete xmlns:oml="http://openml.org/openml">
+  <oml:id>40000</oml:id>
+</oml:data_delete>
diff --git a/tests/files/mock_responses/flows/flow_delete_has_runs.xml b/tests/files/mock_responses/flows/flow_delete_has_runs.xml
new file mode 100644
index 000000000..5c8530e75
--- /dev/null
+++ b/tests/files/mock_responses/flows/flow_delete_has_runs.xml
@@ -0,0 +1,5 @@
+<oml:error xmlns:oml="http://openml.org/openml">
+	<oml:code>324</oml:code>
+	<oml:message>flow is in use by other content (runs). Can not be deleted</oml:message>
+    <oml:additional_information>{10716, 10707} ()</oml:additional_information>
+</oml:error>
diff --git a/tests/files/mock_responses/flows/flow_delete_is_subflow.xml b/tests/files/mock_responses/flows/flow_delete_is_subflow.xml
new file mode 100644
index 000000000..ddc314ae4
--- /dev/null
+++ b/tests/files/mock_responses/flows/flow_delete_is_subflow.xml
@@ -0,0 +1,5 @@
+<oml:error xmlns:oml="http://openml.org/openml">
+	<oml:code>328</oml:code>
+	<oml:message>flow is in use by other content (it is a subflow). Can not be deleted</oml:message>
+    <oml:additional_information>{37661}</oml:additional_information>
+</oml:error>
diff --git a/tests/files/mock_responses/flows/flow_delete_not_exist.xml b/tests/files/mock_responses/flows/flow_delete_not_exist.xml
new file mode 100644
index 000000000..4df49149f
--- /dev/null
+++ b/tests/files/mock_responses/flows/flow_delete_not_exist.xml
@@ -0,0 +1,4 @@
+<oml:error xmlns:oml="http://openml.org/openml">
+	<oml:code>322</oml:code>
+	<oml:message>flow does not exist</oml:message>
+</oml:error>
diff --git a/tests/files/mock_responses/flows/flow_delete_not_owned.xml b/tests/files/mock_responses/flows/flow_delete_not_owned.xml
new file mode 100644
index 000000000..3aa9a9ef2
--- /dev/null
+++ b/tests/files/mock_responses/flows/flow_delete_not_owned.xml
@@ -0,0 +1,4 @@
+<oml:error xmlns:oml="http://openml.org/openml">
+	<oml:code>323</oml:code>
+	<oml:message>flow is not owned by you</oml:message>
+</oml:error>
diff --git a/tests/files/mock_responses/flows/flow_delete_successful.xml b/tests/files/mock_responses/flows/flow_delete_successful.xml
new file mode 100644
index 000000000..7638e942d
--- /dev/null
+++ b/tests/files/mock_responses/flows/flow_delete_successful.xml
@@ -0,0 +1,3 @@
+<oml:flow_delete xmlns:oml="http://openml.org/openml">
+    <oml:id>33364</oml:id>
+</oml:flow_delete>
diff --git a/tests/files/mock_responses/runs/run_delete_not_exist.xml b/tests/files/mock_responses/runs/run_delete_not_exist.xml
new file mode 100644
index 000000000..855c223fa
--- /dev/null
+++ b/tests/files/mock_responses/runs/run_delete_not_exist.xml
@@ -0,0 +1,4 @@
+<oml:error xmlns:oml="http://openml.org/openml">
+	<oml:code>392</oml:code>
+	<oml:message>Run does not exist</oml:message>
+</oml:error>
diff --git a/tests/files/mock_responses/runs/run_delete_not_owned.xml b/tests/files/mock_responses/runs/run_delete_not_owned.xml
new file mode 100644
index 000000000..551252e22
--- /dev/null
+++ b/tests/files/mock_responses/runs/run_delete_not_owned.xml
@@ -0,0 +1,4 @@
+<oml:error xmlns:oml="http://openml.org/openml">
+	<oml:code>393</oml:code>
+	<oml:message>Run is not owned by you</oml:message>
+</oml:error>
diff --git a/tests/files/mock_responses/runs/run_delete_successful.xml b/tests/files/mock_responses/runs/run_delete_successful.xml
new file mode 100644
index 000000000..fe4233afa
--- /dev/null
+++ b/tests/files/mock_responses/runs/run_delete_successful.xml
@@ -0,0 +1,3 @@
+<oml:run_delete xmlns:oml="http://openml.org/openml">
+  <oml:id>10591880</oml:id>
+</oml:run_delete>
diff --git a/tests/files/mock_responses/tasks/task_delete_has_runs.xml b/tests/files/mock_responses/tasks/task_delete_has_runs.xml
new file mode 100644
index 000000000..87a92540d
--- /dev/null
+++ b/tests/files/mock_responses/tasks/task_delete_has_runs.xml
@@ -0,0 +1,4 @@
+<oml:error xmlns:oml="http://openml.org/openml">
+	<oml:code>454</oml:code>
+	<oml:message>Task is executed in some runs. Delete these first</oml:message>
+	</oml:error>
diff --git a/tests/files/mock_responses/tasks/task_delete_not_exist.xml b/tests/files/mock_responses/tasks/task_delete_not_exist.xml
new file mode 100644
index 000000000..8a262af29
--- /dev/null
+++ b/tests/files/mock_responses/tasks/task_delete_not_exist.xml
@@ -0,0 +1,4 @@
+<oml:error xmlns:oml="http://openml.org/openml">
+	<oml:code>452</oml:code>
+	<oml:message>Task does not exist</oml:message>
+</oml:error>
diff --git a/tests/files/mock_responses/tasks/task_delete_not_owned.xml b/tests/files/mock_responses/tasks/task_delete_not_owned.xml
new file mode 100644
index 000000000..3d504772b
--- /dev/null
+++ b/tests/files/mock_responses/tasks/task_delete_not_owned.xml
@@ -0,0 +1,4 @@
+<oml:error xmlns:oml="http://openml.org/openml">
+	<oml:code>453</oml:code>
+	<oml:message>Task is not owned by you</oml:message>
+	</oml:error>
diff --git a/tests/files/mock_responses/tasks/task_delete_successful.xml b/tests/files/mock_responses/tasks/task_delete_successful.xml
new file mode 100644
index 000000000..594b6e992
--- /dev/null
+++ b/tests/files/mock_responses/tasks/task_delete_successful.xml
@@ -0,0 +1,3 @@
+<oml:task_delete xmlns:oml="http://openml.org/openml">
+  <oml:id>361323</oml:id>
+</oml:task_delete>
diff --git a/tests/files/org/openml/test/datasets/30/dataset.pq b/tests/files/org/openml/test/datasets/30/dataset_30.pq
similarity index 100%
rename from tests/files/org/openml/test/datasets/30/dataset.pq
rename to tests/files/org/openml/test/datasets/30/dataset_30.pq
diff --git a/tests/test_datasets/test_dataset.py b/tests/test_datasets/test_dataset.py
index e9cb86c50..15a801383 100644
--- a/tests/test_datasets/test_dataset.py
+++ b/tests/test_datasets/test_dataset.py
@@ -143,6 +143,7 @@ def test_get_data_pandas(self):
             self.assertTrue(X[col_name].dtype.name == col_dtype[col_name])
         self.assertTrue(y.dtype.name == col_dtype["survived"])
 
+    @pytest.mark.skip("https://github.com/openml/openml-python/issues/1157")
     def test_get_data_boolean_pandas(self):
         # test to check that we are converting properly True and False even
         # with some inconsistency when dumping the data on openml
@@ -170,6 +171,7 @@ def _check_expected_type(self, dtype, is_cat, col):
 
         self.assertEqual(dtype.name, expected_type)
 
+    @pytest.mark.skip("https://github.com/openml/openml-python/issues/1157")
     def test_get_data_with_rowid(self):
         self.dataset.row_id_attribute = "condition"
         rval, _, categorical, _ = self.dataset.get_data(include_row_id=True)
@@ -196,6 +198,7 @@ def test_get_data_with_target_array(self):
         self.assertEqual(len(attribute_names), 38)
         self.assertNotIn("class", attribute_names)
 
+    @pytest.mark.skip("https://github.com/openml/openml-python/issues/1157")
     def test_get_data_with_target_pandas(self):
         X, y, categorical, attribute_names = self.dataset.get_data(target="class")
         self.assertIsInstance(X, pd.DataFrame)
@@ -220,6 +223,7 @@ def test_get_data_rowid_and_ignore_and_target(self):
         self.assertListEqual(categorical, cats)
         self.assertEqual(y.shape, (898,))
 
+    @pytest.mark.skip("https://github.com/openml/openml-python/issues/1157")
     def test_get_data_with_ignore_attributes(self):
         self.dataset.ignore_attribute = ["condition"]
         rval, _, categorical, _ = self.dataset.get_data(include_ignore_attribute=True)
diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py
index 9d67ee177..45a64ab8a 100644
--- a/tests/test_datasets/test_dataset_functions.py
+++ b/tests/test_datasets/test_dataset_functions.py
@@ -13,6 +13,7 @@
 import pytest
 import numpy as np
 import pandas as pd
+import requests
 import scipy.sparse
 from oslo_concurrency import lockutils
 
@@ -23,8 +24,9 @@
     OpenMLHashException,
     OpenMLPrivateDatasetError,
     OpenMLServerException,
+    OpenMLNotAuthorizedError,
 )
-from openml.testing import TestBase
+from openml.testing import TestBase, create_request_response
 from openml.utils import _tag_entity, _create_cache_directory_for_id
 from openml.datasets.functions import (
     create_dataset,
@@ -58,7 +60,8 @@ def _remove_pickle_files(self):
         self.lock_path = os.path.join(openml.config.get_cache_directory(), "locks")
         for did in ["-1", "2"]:
             with lockutils.external_lock(
-                name="datasets.functions.get_dataset:%s" % did, lock_path=self.lock_path,
+                name="datasets.functions.get_dataset:%s" % did,
+                lock_path=self.lock_path,
             ):
                 pickle_path = os.path.join(
                     openml.config.get_cache_directory(), "datasets", did, "dataset.pkl.py3"
@@ -175,7 +178,10 @@ def test_list_datasets_empty(self):
     def test_check_datasets_active(self):
         # Have to test on live because there is no deactivated dataset on the test server.
         openml.config.server = self.production_server
-        active = openml.datasets.check_datasets_active([2, 17, 79], raise_error_if_not_exist=False,)
+        active = openml.datasets.check_datasets_active(
+            [2, 17, 79],
+            raise_error_if_not_exist=False,
+        )
         self.assertTrue(active[2])
         self.assertFalse(active[17])
         self.assertIsNone(active.get(79))
@@ -188,7 +194,7 @@ def test_check_datasets_active(self):
         openml.config.server = self.test_server
 
     def _datasets_retrieved_successfully(self, dids, metadata_only=True):
-        """ Checks that all files for the given dids have been downloaded.
+        """Checks that all files for the given dids have been downloaded.
 
         This includes:
             - description
@@ -229,24 +235,24 @@ def _datasets_retrieved_successfully(self, dids, metadata_only=True):
             )
 
     def test__name_to_id_with_deactivated(self):
-        """ Check that an activated dataset is returned if an earlier deactivated one exists. """
+        """Check that an activated dataset is returned if an earlier deactivated one exists."""
         openml.config.server = self.production_server
         # /d/1 was deactivated
         self.assertEqual(openml.datasets.functions._name_to_id("anneal"), 2)
         openml.config.server = self.test_server
 
     def test__name_to_id_with_multiple_active(self):
-        """ With multiple active datasets, retrieve the least recent active. """
+        """With multiple active datasets, retrieve the least recent active."""
         openml.config.server = self.production_server
         self.assertEqual(openml.datasets.functions._name_to_id("iris"), 61)
 
     def test__name_to_id_with_version(self):
-        """ With multiple active datasets, retrieve the least recent active. """
+        """With multiple active datasets, retrieve the least recent active."""
         openml.config.server = self.production_server
         self.assertEqual(openml.datasets.functions._name_to_id("iris", version=3), 969)
 
     def test__name_to_id_with_multiple_active_error(self):
-        """ With multiple active datasets, retrieve the least recent active. """
+        """With multiple active datasets, retrieve the least recent active."""
         openml.config.server = self.production_server
         self.assertRaisesRegex(
             ValueError,
@@ -257,7 +263,7 @@ def test__name_to_id_with_multiple_active_error(self):
         )
 
     def test__name_to_id_name_does_not_exist(self):
-        """ With multiple active datasets, retrieve the least recent active. """
+        """With multiple active datasets, retrieve the least recent active."""
         self.assertRaisesRegex(
             RuntimeError,
             "No active datasets exist with name does_not_exist",
@@ -266,7 +272,7 @@ def test__name_to_id_name_does_not_exist(self):
         )
 
     def test__name_to_id_version_does_not_exist(self):
-        """ With multiple active datasets, retrieve the least recent active. """
+        """With multiple active datasets, retrieve the least recent active."""
         self.assertRaisesRegex(
             RuntimeError,
             "No active datasets exist with name iris and version 100000",
@@ -318,6 +324,15 @@ def test_get_dataset_by_name(self):
         openml.config.server = self.production_server
         self.assertRaises(OpenMLPrivateDatasetError, openml.datasets.get_dataset, 45)
 
+    @pytest.mark.skip("Feature is experimental, can not test against stable server.")
+    def test_get_dataset_download_all_files(self):
+        # openml.datasets.get_dataset(id, download_all_files=True)
+        # check for expected files
+        # checking that no additional files are downloaded if
+        # the default (false) is used, seems covered by
+        # test_get_dataset_lazy
+        raise NotImplementedError
+
     def test_get_dataset_uint8_dtype(self):
         dataset = openml.datasets.get_dataset(1)
         self.assertEqual(type(dataset), OpenMLDataset)
@@ -356,7 +371,7 @@ def test_get_dataset_lazy(self):
         self.assertRaises(OpenMLPrivateDatasetError, openml.datasets.get_dataset, 45, False)
 
     def test_get_dataset_lazy_all_functions(self):
-        """ Test that all expected functionality is available without downloading the dataset. """
+        """Test that all expected functionality is available without downloading the dataset."""
         dataset = openml.datasets.get_dataset(1, download_data=False)
         # We only tests functions as general integrity is tested by test_get_dataset_lazy
 
@@ -458,9 +473,9 @@ def test__download_minio_file_raises_FileExists_if_destination_in_use(self):
         )
 
     def test__download_minio_file_works_with_bucket_subdirectory(self):
-        file_destination = pathlib.Path(self.workdir, "custom.csv")
+        file_destination = pathlib.Path(self.workdir, "custom.pq")
         _download_minio_file(
-            source="http://openml1.win.tue.nl/test/subdirectory/test.csv",
+            source="http://openml1.win.tue.nl/dataset61/dataset_61.pq",
             destination=file_destination,
             exists_ok=True,
         )
@@ -537,10 +552,14 @@ def test__get_dataset_skip_download(self):
 
     def test_deletion_of_cache_dir(self):
         # Simple removal
-        did_cache_dir = _create_cache_directory_for_id(DATASETS_CACHE_DIR_NAME, 1,)
+        did_cache_dir = _create_cache_directory_for_id(
+            DATASETS_CACHE_DIR_NAME,
+            1,
+        )
         self.assertTrue(os.path.exists(did_cache_dir))
         openml.utils._remove_cache_dir_for_id(
-            DATASETS_CACHE_DIR_NAME, did_cache_dir,
+            DATASETS_CACHE_DIR_NAME,
+            did_cache_dir,
         )
         self.assertFalse(os.path.exists(did_cache_dir))
 
@@ -1232,7 +1251,7 @@ def _wait_for_dataset_being_processed(self, dataset_id):
             try:
                 downloaded_dataset = openml.datasets.get_dataset(dataset_id)
                 break
-            except Exception as e:
+            except OpenMLServerException as e:
                 # returned code 273: Dataset not processed yet
                 # returned code 362: No qualities found
                 TestBase.logger.error(
@@ -1526,11 +1545,17 @@ def test_data_fork(self):
         self.assertNotEqual(did, result)
         # Check server exception when unknown dataset is provided
         self.assertRaisesRegex(
-            OpenMLServerException, "Unknown dataset", fork_dataset, data_id=999999,
+            OpenMLServerException,
+            "Unknown dataset",
+            fork_dataset,
+            data_id=999999,
         )
 
     def test_get_dataset_parquet(self):
-        dataset = openml.datasets.get_dataset(20)
+        # Parquet functionality is disabled on the test server
+        # There is no parquet-copy of the test server yet.
+        openml.config.server = self.production_server
+        dataset = openml.datasets.get_dataset(61)
         self.assertIsNotNone(dataset._minio_url)
         self.assertIsNotNone(dataset.parquet_file)
         self.assertTrue(os.path.isfile(dataset.parquet_file))
@@ -1649,3 +1674,138 @@ def test_valid_attribute_validations(default_target_attribute, row_id_attribute,
         original_data_url=original_data_url,
         paper_url=paper_url,
     )
+
+    def test_delete_dataset(self):
+        data = [
+            ["a", "sunny", 85.0, 85.0, "FALSE", "no"],
+            ["b", "sunny", 80.0, 90.0, "TRUE", "no"],
+            ["c", "overcast", 83.0, 86.0, "FALSE", "yes"],
+            ["d", "rainy", 70.0, 96.0, "FALSE", "yes"],
+            ["e", "rainy", 68.0, 80.0, "FALSE", "yes"],
+        ]
+        column_names = ["rnd_str", "outlook", "temperature", "humidity", "windy", "play"]
+        df = pd.DataFrame(data, columns=column_names)
+        # enforce the type of each column
+        df["outlook"] = df["outlook"].astype("category")
+        df["windy"] = df["windy"].astype("bool")
+        df["play"] = df["play"].astype("category")
+        # meta-information
+        name = "%s-pandas_testing_dataset" % self._get_sentinel()
+        description = "Synthetic dataset created from a Pandas DataFrame"
+        creator = "OpenML tester"
+        collection_date = "01-01-2018"
+        language = "English"
+        licence = "MIT"
+        citation = "None"
+        original_data_url = "http://openml.github.io/openml-python"
+        paper_url = "http://openml.github.io/openml-python"
+        dataset = openml.datasets.functions.create_dataset(
+            name=name,
+            description=description,
+            creator=creator,
+            contributor=None,
+            collection_date=collection_date,
+            language=language,
+            licence=licence,
+            default_target_attribute="play",
+            row_id_attribute=None,
+            ignore_attribute=None,
+            citation=citation,
+            attributes="auto",
+            data=df,
+            version_label="test",
+            original_data_url=original_data_url,
+            paper_url=paper_url,
+        )
+        dataset.publish()
+        _dataset_id = dataset.id
+        self.assertTrue(openml.datasets.delete_dataset(_dataset_id))
+
+
+@mock.patch.object(requests.Session, "delete")
+def test_delete_dataset_not_owned(mock_delete, test_files_directory, test_api_key):
+    openml.config.start_using_configuration_for_example()
+    content_file = (
+        test_files_directory / "mock_responses" / "datasets" / "data_delete_not_owned.xml"
+    )
+    mock_delete.return_value = create_request_response(
+        status_code=412, content_filepath=content_file
+    )
+
+    with pytest.raises(
+        OpenMLNotAuthorizedError,
+        match="The data can not be deleted because it was not uploaded by you.",
+    ):
+        openml.datasets.delete_dataset(40_000)
+
+    expected_call_args = [
+        ("https://test.openml.org/api/v1/xml/data/40000",),
+        {"params": {"api_key": test_api_key}},
+    ]
+    assert expected_call_args == list(mock_delete.call_args)
+
+
+@mock.patch.object(requests.Session, "delete")
+def test_delete_dataset_with_run(mock_delete, test_files_directory, test_api_key):
+    openml.config.start_using_configuration_for_example()
+    content_file = (
+        test_files_directory / "mock_responses" / "datasets" / "data_delete_has_tasks.xml"
+    )
+    mock_delete.return_value = create_request_response(
+        status_code=412, content_filepath=content_file
+    )
+
+    with pytest.raises(
+        OpenMLNotAuthorizedError,
+        match="The data can not be deleted because it still has associated entities:",
+    ):
+        openml.datasets.delete_dataset(40_000)
+
+    expected_call_args = [
+        ("https://test.openml.org/api/v1/xml/data/40000",),
+        {"params": {"api_key": test_api_key}},
+    ]
+    assert expected_call_args == list(mock_delete.call_args)
+
+
+@mock.patch.object(requests.Session, "delete")
+def test_delete_dataset_success(mock_delete, test_files_directory, test_api_key):
+    openml.config.start_using_configuration_for_example()
+    content_file = (
+        test_files_directory / "mock_responses" / "datasets" / "data_delete_successful.xml"
+    )
+    mock_delete.return_value = create_request_response(
+        status_code=200, content_filepath=content_file
+    )
+
+    success = openml.datasets.delete_dataset(40000)
+    assert success
+
+    expected_call_args = [
+        ("https://test.openml.org/api/v1/xml/data/40000",),
+        {"params": {"api_key": test_api_key}},
+    ]
+    assert expected_call_args == list(mock_delete.call_args)
+
+
+@mock.patch.object(requests.Session, "delete")
+def test_delete_unknown_dataset(mock_delete, test_files_directory, test_api_key):
+    openml.config.start_using_configuration_for_example()
+    content_file = (
+        test_files_directory / "mock_responses" / "datasets" / "data_delete_not_exist.xml"
+    )
+    mock_delete.return_value = create_request_response(
+        status_code=412, content_filepath=content_file
+    )
+
+    with pytest.raises(
+        OpenMLServerException,
+        match="Dataset does not exist",
+    ):
+        openml.datasets.delete_dataset(9_999_999)
+
+    expected_call_args = [
+        ("https://test.openml.org/api/v1/xml/data/9999999",),
+        {"params": {"api_key": test_api_key}},
+    ]
+    assert expected_call_args == list(mock_delete.call_args)
diff --git a/tests/test_extensions/test_functions.py b/tests/test_extensions/test_functions.py
index 85361cc02..36bb06061 100644
--- a/tests/test_extensions/test_functions.py
+++ b/tests/test_extensions/test_functions.py
@@ -9,6 +9,7 @@
 
 class DummyFlow:
     external_version = "DummyFlow==0.1"
+    dependencies = None
 
 
 class DummyModel:
@@ -18,15 +19,11 @@ class DummyModel:
 class DummyExtension1:
     @staticmethod
     def can_handle_flow(flow):
-        if not inspect.stack()[2].filename.endswith("test_functions.py"):
-            return False
-        return True
+        return inspect.stack()[2].filename.endswith("test_functions.py")
 
     @staticmethod
     def can_handle_model(model):
-        if not inspect.stack()[2].filename.endswith("test_functions.py"):
-            return False
-        return True
+        return inspect.stack()[2].filename.endswith("test_functions.py")
 
 
 class DummyExtension2:
@@ -73,7 +70,8 @@ def test_get_extension_by_flow(self):
         self.assertIsInstance(get_extension_by_flow(DummyFlow()), DummyExtension1)
         register_extension(DummyExtension1)
         with self.assertRaisesRegex(
-            ValueError, "Multiple extensions registered which can handle flow:",
+            ValueError,
+            "Multiple extensions registered which can handle flow:",
         ):
             get_extension_by_flow(DummyFlow())
 
@@ -87,6 +85,7 @@ def test_get_extension_by_model(self):
         self.assertIsInstance(get_extension_by_model(DummyModel()), DummyExtension1)
         register_extension(DummyExtension1)
         with self.assertRaisesRegex(
-            ValueError, "Multiple extensions registered which can handle model:",
+            ValueError,
+            "Multiple extensions registered which can handle model:",
         ):
             get_extension_by_model(DummyModel())
diff --git a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
index e45eeea53..86ae419d2 100644
--- a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
+++ b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py
@@ -5,6 +5,7 @@
 import re
 import os
 import sys
+from typing import Any
 import unittest
 from distutils.version import LooseVersion
 from collections import OrderedDict
@@ -14,6 +15,7 @@
 
 import numpy as np
 import pandas as pd
+import pytest
 import scipy.optimize
 import scipy.stats
 import sklearn.base
@@ -73,6 +75,45 @@ def setUp(self):
 
         self.extension = SklearnExtension()
 
+    def _get_expected_pipeline_description(self, model: Any) -> str:
+        if version.parse(sklearn.__version__) >= version.parse("1.0"):
+            expected_fixture = (
+                "Pipeline of transforms with a final estimator.\n\nSequentially"
+                " apply a list of transforms and a final estimator.\n"
+                "Intermediate steps of the pipeline must be 'transforms', that "
+                "is, they\nmust implement `fit` and `transform` methods.\nThe final "
+                "estimator only needs to implement `fit`.\nThe transformers in "
+                "the pipeline can be cached using ``memory`` argument.\n\nThe "
+                "purpose of the pipeline is to assemble several steps that can "
+                "be\ncross-validated together while setting different parameters"
+                ". For this, it\nenables setting parameters of the various steps"
+                " using their names and the\nparameter name separated by a `'__'`,"
+                " as in the example below. A step's\nestimator may be replaced "
+                "entirely by setting the parameter with its name\nto another "
+                "estimator, or a transformer removed by setting it to\n"
+                "`'passthrough'` or `None`."
+            )
+        elif version.parse(sklearn.__version__) >= version.parse("0.21.0"):
+            expected_fixture = (
+                "Pipeline of transforms with a final estimator.\n\nSequentially"
+                " apply a list of transforms and a final estimator.\n"
+                "Intermediate steps of the pipeline must be 'transforms', that "
+                "is, they\nmust implement fit and transform methods.\nThe final "
+                "estimator only needs to implement fit.\nThe transformers in "
+                "the pipeline can be cached using ``memory`` argument.\n\nThe "
+                "purpose of the pipeline is to assemble several steps that can "
+                "be\ncross-validated together while setting different parameters"
+                ".\nFor this, it enables setting parameters of the various steps"
+                " using their\nnames and the parameter name separated by a '__',"
+                " as in the example below.\nA step's estimator may be replaced "
+                "entirely by setting the parameter\nwith its name to another "
+                "estimator, or a transformer removed by setting\nit to "
+                "'passthrough' or ``None``."
+            )
+        else:
+            expected_fixture = self.extension._get_sklearn_description(model)
+        return expected_fixture
+
     def _serialization_test_helper(
         self, model, X, y, subcomponent_parameters, dependencies_mock_call_count=(1, 2)
     ):
@@ -136,6 +177,7 @@ def _serialization_test_helper(
 
             return serialization, new_model
 
+    @pytest.mark.sklearn
     def test_serialize_model(self):
         model = sklearn.tree.DecisionTreeClassifier(
             criterion="entropy", max_features="auto", max_leaf_nodes=2000
@@ -168,7 +210,7 @@ def test_serialize_model(self):
                     ("splitter", '"best"'),
                 )
             )
-        else:
+        elif LooseVersion(sklearn.__version__) < "1.0":
             fixture_parameters = OrderedDict(
                 (
                     ("class_weight", "null"),
@@ -186,6 +228,24 @@ def test_serialize_model(self):
                     ("splitter", '"best"'),
                 )
             )
+        else:
+            fixture_parameters = OrderedDict(
+                (
+                    ("class_weight", "null"),
+                    ("criterion", '"entropy"'),
+                    ("max_depth", "null"),
+                    ("max_features", '"auto"'),
+                    ("max_leaf_nodes", "2000"),
+                    ("min_impurity_decrease", "0.0"),
+                    ("min_samples_leaf", "1"),
+                    ("min_samples_split", "2"),
+                    ("min_weight_fraction_leaf", "0.0"),
+                    ("presort", presort_val),
+                    ("random_state", "null"),
+                    ("splitter", '"best"'),
+                )
+            )
+
         if LooseVersion(sklearn.__version__) >= "0.22":
             fixture_parameters.update({"ccp_alpha": "0.0"})
             fixture_parameters.move_to_end("ccp_alpha", last=False)
@@ -207,6 +267,7 @@ def test_serialize_model(self):
         self.assertEqual(serialization.dependencies, version_fixture)
         self.assertDictEqual(structure, structure_fixture)
 
+    @pytest.mark.sklearn
     def test_can_handle_flow(self):
         openml.config.server = self.production_server
 
@@ -217,6 +278,7 @@ def test_can_handle_flow(self):
 
         openml.config.server = self.test_server
 
+    @pytest.mark.sklearn
     def test_serialize_model_clustering(self):
         model = sklearn.cluster.KMeans()
 
@@ -249,7 +311,7 @@ def test_serialize_model_clustering(self):
                     ("verbose", "0"),
                 )
             )
-        else:
+        elif LooseVersion(sklearn.__version__) < "1.0":
             fixture_parameters = OrderedDict(
                 (
                     ("algorithm", '"auto"'),
@@ -265,6 +327,35 @@ def test_serialize_model_clustering(self):
                     ("verbose", "0"),
                 )
             )
+        elif LooseVersion(sklearn.__version__) < "1.1":
+            fixture_parameters = OrderedDict(
+                (
+                    ("algorithm", '"auto"'),
+                    ("copy_x", "true"),
+                    ("init", '"k-means++"'),
+                    ("max_iter", "300"),
+                    ("n_clusters", "8"),
+                    ("n_init", "10"),
+                    ("random_state", "null"),
+                    ("tol", "0.0001"),
+                    ("verbose", "0"),
+                )
+            )
+        else:
+            n_init = '"warn"' if LooseVersion(sklearn.__version__) >= "1.2" else "10"
+            fixture_parameters = OrderedDict(
+                (
+                    ("algorithm", '"lloyd"'),
+                    ("copy_x", "true"),
+                    ("init", '"k-means++"'),
+                    ("max_iter", "300"),
+                    ("n_clusters", "8"),
+                    ("n_init", n_init),
+                    ("random_state", "null"),
+                    ("tol", "0.0001"),
+                    ("verbose", "0"),
+                )
+            )
         fixture_structure = {"sklearn.cluster.{}.KMeans".format(cluster_name): []}
 
         serialization, _ = self._serialization_test_helper(
@@ -272,14 +363,15 @@ def test_serialize_model_clustering(self):
         )
         structure = serialization.get_structure("name")
 
-        self.assertEqual(serialization.name, fixture_name)
-        self.assertEqual(serialization.class_name, fixture_name)
-        self.assertEqual(serialization.custom_name, fixture_short_name)
-        self.assertEqual(serialization.description, fixture_description)
-        self.assertEqual(serialization.parameters, fixture_parameters)
-        self.assertEqual(serialization.dependencies, version_fixture)
-        self.assertDictEqual(structure, fixture_structure)
+        assert serialization.name == fixture_name
+        assert serialization.class_name == fixture_name
+        assert serialization.custom_name == fixture_short_name
+        assert serialization.description == fixture_description
+        assert serialization.parameters == fixture_parameters
+        assert serialization.dependencies == version_fixture
+        assert structure == fixture_structure
 
+    @pytest.mark.sklearn
     def test_serialize_model_with_subcomponent(self):
         model = sklearn.ensemble.AdaBoostClassifier(
             n_estimators=100, base_estimator=sklearn.tree.DecisionTreeClassifier()
@@ -340,6 +432,7 @@ def test_serialize_model_with_subcomponent(self):
         )
         self.assertDictEqual(structure, fixture_structure)
 
+    @pytest.mark.sklearn
     def test_serialize_pipeline(self):
         scaler = sklearn.preprocessing.StandardScaler(with_mean=False)
         dummy = sklearn.dummy.DummyClassifier(strategy="prior")
@@ -352,27 +445,7 @@ def test_serialize_pipeline(self):
             "dummy=sklearn.dummy.DummyClassifier)".format(scaler_name)
         )
         fixture_short_name = "sklearn.Pipeline(StandardScaler,DummyClassifier)"
-
-        if version.parse(sklearn.__version__) >= version.parse("0.21.0"):
-            fixture_description = (
-                "Pipeline of transforms with a final estimator.\n\nSequentially"
-                " apply a list of transforms and a final estimator.\n"
-                "Intermediate steps of the pipeline must be 'transforms', that "
-                "is, they\nmust implement fit and transform methods.\nThe final "
-                "estimator only needs to implement fit.\nThe transformers in "
-                "the pipeline can be cached using ``memory`` argument.\n\nThe "
-                "purpose of the pipeline is to assemble several steps that can "
-                "be\ncross-validated together while setting different parameters"
-                ".\nFor this, it enables setting parameters of the various steps"
-                " using their\nnames and the parameter name separated by a '__',"
-                " as in the example below.\nA step's estimator may be replaced "
-                "entirely by setting the parameter\nwith its name to another "
-                "estimator, or a transformer removed by setting\nit to "
-                "'passthrough' or ``None``."
-            )
-        else:
-            fixture_description = self.extension._get_sklearn_description(model)
-
+        fixture_description = self._get_expected_pipeline_description(model)
         fixture_structure = {
             fixture_name: [],
             "sklearn.preprocessing.{}.StandardScaler".format(scaler_name): ["scaler"],
@@ -429,6 +502,7 @@ def test_serialize_pipeline(self):
         self.assertIsNot(new_model.steps[0][1], model.steps[0][1])
         self.assertIsNot(new_model.steps[1][1], model.steps[1][1])
 
+    @pytest.mark.sklearn
     def test_serialize_pipeline_clustering(self):
         scaler = sklearn.preprocessing.StandardScaler(with_mean=False)
         km = sklearn.cluster.KMeans()
@@ -442,26 +516,7 @@ def test_serialize_pipeline_clustering(self):
             "clusterer=sklearn.cluster.{}.KMeans)".format(scaler_name, cluster_name)
         )
         fixture_short_name = "sklearn.Pipeline(StandardScaler,KMeans)"
-
-        if version.parse(sklearn.__version__) >= version.parse("0.21.0"):
-            fixture_description = (
-                "Pipeline of transforms with a final estimator.\n\nSequentially"
-                " apply a list of transforms and a final estimator.\n"
-                "Intermediate steps of the pipeline must be 'transforms', that "
-                "is, they\nmust implement fit and transform methods.\nThe final "
-                "estimator only needs to implement fit.\nThe transformers in "
-                "the pipeline can be cached using ``memory`` argument.\n\nThe "
-                "purpose of the pipeline is to assemble several steps that can "
-                "be\ncross-validated together while setting different parameters"
-                ".\nFor this, it enables setting parameters of the various steps"
-                " using their\nnames and the parameter name separated by a '__',"
-                " as in the example below.\nA step's estimator may be replaced "
-                "entirely by setting the parameter\nwith its name to another "
-                "estimator, or a transformer removed by setting\nit to "
-                "'passthrough' or ``None``."
-            )
-        else:
-            fixture_description = self.extension._get_sklearn_description(model)
+        fixture_description = self._get_expected_pipeline_description(model)
         fixture_structure = {
             fixture_name: [],
             "sklearn.preprocessing.{}.StandardScaler".format(scaler_name): ["scaler"],
@@ -516,6 +571,7 @@ def test_serialize_pipeline_clustering(self):
         self.assertIsNot(new_model.steps[0][1], model.steps[0][1])
         self.assertIsNot(new_model.steps[1][1], model.steps[1][1])
 
+    @pytest.mark.sklearn
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.20",
         reason="columntransformer introduction in 0.20.0",
@@ -574,6 +630,7 @@ def test_serialize_column_transformer(self):
         self.assertEqual(serialization.description, fixture_description)
         self.assertDictEqual(structure, fixture_structure)
 
+    @pytest.mark.sklearn
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.20",
         reason="columntransformer introduction in 0.20.0",
@@ -619,27 +676,7 @@ def test_serialize_column_transformer_pipeline(self):
             fixture_name: [],
         }
 
-        if version.parse(sklearn.__version__) >= version.parse("0.21.0"):
-            # str obtained from self.extension._get_sklearn_description(model)
-            fixture_description = (
-                "Pipeline of transforms with a final estimator.\n\nSequentially"
-                " apply a list of transforms and a final estimator.\n"
-                "Intermediate steps of the pipeline must be 'transforms', that "
-                "is, they\nmust implement fit and transform methods.\nThe final"
-                " estimator only needs to implement fit.\nThe transformers in "
-                "the pipeline can be cached using ``memory`` argument.\n\nThe "
-                "purpose of the pipeline is to assemble several steps that can "
-                "be\ncross-validated together while setting different "
-                "parameters.\nFor this, it enables setting parameters of the "
-                "various steps using their\nnames and the parameter name "
-                "separated by a '__', as in the example below.\nA step's "
-                "estimator may be replaced entirely by setting the parameter\n"
-                "with its name to another estimator, or a transformer removed by"
-                " setting\nit to 'passthrough' or ``None``."
-            )
-        else:
-            fixture_description = self.extension._get_sklearn_description(model)
-
+        fixture_description = self._get_expected_pipeline_description(model)
         serialization, new_model = self._serialization_test_helper(
             model,
             X=None,
@@ -660,6 +697,7 @@ def test_serialize_column_transformer_pipeline(self):
 
         self.assertDictEqual(structure, fixture_structure)
 
+    @pytest.mark.sklearn
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.20", reason="Pipeline processing behaviour updated"
     )
@@ -728,6 +766,7 @@ def test_serialize_feature_union(self):
         )
         self.assertIs(new_model.transformer_list[1][1], "drop")
 
+    @pytest.mark.sklearn
     def test_serialize_feature_union_switched_names(self):
         ohe_params = {"categories": "auto"} if LooseVersion(sklearn.__version__) >= "0.20" else {}
         ohe = sklearn.preprocessing.OneHotEncoder(**ohe_params)
@@ -736,10 +775,18 @@ def test_serialize_feature_union_switched_names(self):
         fu2 = sklearn.pipeline.FeatureUnion(transformer_list=[("scaler", ohe), ("ohe", scaler)])
 
         fu1_serialization, _ = self._serialization_test_helper(
-            fu1, X=None, y=None, subcomponent_parameters=(), dependencies_mock_call_count=(3, 6),
+            fu1,
+            X=None,
+            y=None,
+            subcomponent_parameters=(),
+            dependencies_mock_call_count=(3, 6),
         )
         fu2_serialization, _ = self._serialization_test_helper(
-            fu2, X=None, y=None, subcomponent_parameters=(), dependencies_mock_call_count=(3, 6),
+            fu2,
+            X=None,
+            y=None,
+            subcomponent_parameters=(),
+            dependencies_mock_call_count=(3, 6),
         )
 
         # OneHotEncoder was moved to _encoders module in 0.20
@@ -760,6 +807,7 @@ def test_serialize_feature_union_switched_names(self):
             "ohe=sklearn.preprocessing.{}.StandardScaler)".format(module_name_encoder, scaler_name),
         )
 
+    @pytest.mark.sklearn
     def test_serialize_complex_flow(self):
         ohe = sklearn.preprocessing.OneHotEncoder(handle_unknown="ignore")
         scaler = sklearn.preprocessing.StandardScaler(with_mean=False)
@@ -820,6 +868,7 @@ def test_serialize_complex_flow(self):
         self.assertEqual(serialized.name, fixture_name)
         self.assertEqual(structure, fixture_structure)
 
+    @pytest.mark.sklearn
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.21",
         reason="Pipeline till 0.20 doesn't support 'passthrough'",
@@ -915,14 +964,19 @@ def test_serialize_strings_as_pipeline_steps(self):
         self.assertIsInstance(extracted_info[2]["drop"], OpenMLFlow)
         self.assertEqual(extracted_info[2]["drop"].name, "drop")
 
+    @pytest.mark.sklearn
     def test_serialize_type(self):
-        supported_types = [float, np.float, np.float32, np.float64, int, np.int, np.int32, np.int64]
+        supported_types = [float, np.float32, np.float64, int, np.int32, np.int64]
+        if LooseVersion(np.__version__) < "1.24":
+            supported_types.append(np.float)
+            supported_types.append(np.int)
 
         for supported_type in supported_types:
             serialized = self.extension.model_to_flow(supported_type)
             deserialized = self.extension.flow_to_model(serialized)
             self.assertEqual(deserialized, supported_type)
 
+    @pytest.mark.sklearn
     def test_serialize_rvs(self):
         supported_rvs = [
             scipy.stats.norm(loc=1, scale=5),
@@ -938,11 +992,13 @@ def test_serialize_rvs(self):
             del supported_rv.dist
             self.assertEqual(deserialized.__dict__, supported_rv.__dict__)
 
+    @pytest.mark.sklearn
     def test_serialize_function(self):
         serialized = self.extension.model_to_flow(sklearn.feature_selection.chi2)
         deserialized = self.extension.flow_to_model(serialized)
         self.assertEqual(deserialized, sklearn.feature_selection.chi2)
 
+    @pytest.mark.sklearn
     def test_serialize_cvobject(self):
         methods = [sklearn.model_selection.KFold(3), sklearn.model_selection.LeaveOneOut()]
         fixtures = [
@@ -992,6 +1048,7 @@ def test_serialize_cvobject(self):
             self.assertIsNot(m_new, m)
             self.assertIsInstance(m_new, type(method))
 
+    @pytest.mark.sklearn
     def test_serialize_simple_parameter_grid(self):
 
         # We cannot easily test for scipy random variables in here, but they
@@ -1039,6 +1096,7 @@ def test_serialize_simple_parameter_grid(self):
             del deserialized_params["estimator"]
             self.assertEqual(hpo_params, deserialized_params)
 
+    @pytest.mark.sklearn
     @unittest.skip(
         "This feature needs further reworking. If we allow several "
         "components, we need to register them all in the downstream "
@@ -1093,6 +1151,7 @@ def test_serialize_advanced_grid(self):
         self.assertEqual(grid[1]["reduce_dim__k"], deserialized[1]["reduce_dim__k"])
         self.assertEqual(grid[1]["classify__C"], deserialized[1]["classify__C"])
 
+    @pytest.mark.sklearn
     def test_serialize_advanced_grid_fails(self):
         # This unit test is checking that the test we skip above would actually fail
 
@@ -1104,13 +1163,15 @@ def test_serialize_advanced_grid_fails(self):
         }
 
         clf = sklearn.model_selection.GridSearchCV(
-            sklearn.ensemble.BaggingClassifier(), param_grid=param_grid,
+            sklearn.ensemble.BaggingClassifier(),
+            param_grid=param_grid,
         )
         with self.assertRaisesRegex(
             TypeError, re.compile(r".*OpenML.*Flow.*is not JSON serializable", flags=re.DOTALL)
         ):
             self.extension.model_to_flow(clf)
 
+    @pytest.mark.sklearn
     def test_serialize_resampling(self):
         kfold = sklearn.model_selection.StratifiedKFold(n_splits=4, shuffle=True)
         serialized = self.extension.model_to_flow(kfold)
@@ -1119,6 +1180,7 @@ def test_serialize_resampling(self):
         self.assertEqual(str(deserialized), str(kfold))
         self.assertIsNot(deserialized, kfold)
 
+    @pytest.mark.sklearn
     def test_hypothetical_parameter_values(self):
         # The hypothetical parameter values of true, 1, 0.1 formatted as a
         # string (and their correct serialization and deserialization) an only
@@ -1132,6 +1194,7 @@ def test_hypothetical_parameter_values(self):
         self.assertEqual(deserialized.get_params(), model.get_params())
         self.assertIsNot(deserialized, model)
 
+    @pytest.mark.sklearn
     def test_gaussian_process(self):
         opt = scipy.optimize.fmin_l_bfgs_b
         kernel = sklearn.gaussian_process.kernels.Matern()
@@ -1142,6 +1205,7 @@ def test_gaussian_process(self):
         ):
             self.extension.model_to_flow(gp)
 
+    @pytest.mark.sklearn
     def test_error_on_adding_component_multiple_times_to_flow(self):
         # this function implicitly checks
         # - openml.flows._check_multiple_occurence_of_component_in_flow()
@@ -1166,6 +1230,7 @@ def test_error_on_adding_component_multiple_times_to_flow(self):
         with self.assertRaisesRegex(ValueError, fixture):
             self.extension.model_to_flow(pipeline2)
 
+    @pytest.mark.sklearn
     def test_subflow_version_propagated(self):
         this_directory = os.path.dirname(os.path.abspath(__file__))
         tests_directory = os.path.abspath(os.path.join(this_directory, "..", ".."))
@@ -1190,12 +1255,14 @@ def test_subflow_version_propagated(self):
             ),
         )
 
+    @pytest.mark.sklearn
     @mock.patch("warnings.warn")
     def test_check_dependencies(self, warnings_mock):
         dependencies = ["sklearn==0.1", "sklearn>=99.99.99", "sklearn>99.99.99"]
         for dependency in dependencies:
             self.assertRaises(ValueError, self.extension._check_dependencies, dependency)
 
+    @pytest.mark.sklearn
     def test_illegal_parameter_names(self):
         # illegal name: estimators
         clf1 = sklearn.ensemble.VotingClassifier(
@@ -1215,36 +1282,7 @@ def test_illegal_parameter_names(self):
         for case in cases:
             self.assertRaises(PyOpenMLError, self.extension.model_to_flow, case)
 
-    def test_illegal_parameter_names_pipeline(self):
-        # illegal name: steps
-        steps = [
-            ("Imputer", SimpleImputer(strategy="median")),
-            (
-                "OneHotEncoder",
-                sklearn.preprocessing.OneHotEncoder(sparse=False, handle_unknown="ignore"),
-            ),
-            (
-                "steps",
-                sklearn.ensemble.BaggingClassifier(
-                    base_estimator=sklearn.tree.DecisionTreeClassifier
-                ),
-            ),
-        ]
-        self.assertRaises(ValueError, sklearn.pipeline.Pipeline, steps=steps)
-
-    def test_illegal_parameter_names_featureunion(self):
-        # illegal name: transformer_list
-        transformer_list = [
-            ("transformer_list", SimpleImputer(strategy="median")),
-            (
-                "OneHotEncoder",
-                sklearn.preprocessing.OneHotEncoder(sparse=False, handle_unknown="ignore"),
-            ),
-        ]
-        self.assertRaises(
-            ValueError, sklearn.pipeline.FeatureUnion, transformer_list=transformer_list
-        )
-
+    @pytest.mark.sklearn
     def test_paralizable_check(self):
         # using this model should pass the test (if param distribution is
         # legal)
@@ -1294,6 +1332,7 @@ def test_paralizable_check(self):
             with self.assertRaises(PyOpenMLError):
                 self.extension._prevent_optimize_n_jobs(model)
 
+    @pytest.mark.sklearn
     def test__get_fn_arguments_with_defaults(self):
         sklearn_version = LooseVersion(sklearn.__version__)
         if sklearn_version < "0.19":
@@ -1326,12 +1365,19 @@ def test__get_fn_arguments_with_defaults(self):
                 (sklearn.tree.DecisionTreeClassifier.__init__, 14),
                 (sklearn.pipeline.Pipeline.__init__, 2),
             ]
-        else:
+        elif sklearn_version < "1.0":
             fns = [
                 (sklearn.ensemble.RandomForestRegressor.__init__, 18),
                 (sklearn.tree.DecisionTreeClassifier.__init__, 13),
                 (sklearn.pipeline.Pipeline.__init__, 2),
             ]
+        else:
+            # Tested with 1.0 and 1.1
+            fns = [
+                (sklearn.ensemble.RandomForestRegressor.__init__, 17),
+                (sklearn.tree.DecisionTreeClassifier.__init__, 12),
+                (sklearn.pipeline.Pipeline.__init__, 2),
+            ]
 
         for fn, num_params_with_defaults in fns:
             defaults, defaultless = self.extension._get_fn_arguments_with_defaults(fn)
@@ -1344,6 +1390,7 @@ def test__get_fn_arguments_with_defaults(self):
             self.assertSetEqual(set(defaults.keys()), set(defaults.keys()) - defaultless)
             self.assertSetEqual(defaultless, defaultless - set(defaults.keys()))
 
+    @pytest.mark.sklearn
     def test_deserialize_with_defaults(self):
         # used the 'initialize_with_defaults' flag of the deserialization
         # method to return a flow that contains default hyperparameter
@@ -1379,6 +1426,7 @@ def test_deserialize_with_defaults(self):
             self.extension.model_to_flow(pipe_deserialized),
         )
 
+    @pytest.mark.sklearn
     def test_deserialize_adaboost_with_defaults(self):
         # used the 'initialize_with_defaults' flag of the deserialization
         # method to return a flow that contains default hyperparameter
@@ -1417,6 +1465,7 @@ def test_deserialize_adaboost_with_defaults(self):
             self.extension.model_to_flow(pipe_deserialized),
         )
 
+    @pytest.mark.sklearn
     def test_deserialize_complex_with_defaults(self):
         # used the 'initialize_with_defaults' flag of the deserialization
         # method to return a flow that contains default hyperparameter
@@ -1428,9 +1477,7 @@ def test_deserialize_complex_with_defaults(self):
                 "Estimator",
                 sklearn.ensemble.AdaBoostClassifier(
                     sklearn.ensemble.BaggingClassifier(
-                        sklearn.ensemble.GradientBoostingClassifier(
-                            sklearn.neighbors.KNeighborsClassifier()
-                        )
+                        sklearn.ensemble.GradientBoostingClassifier()
                     )
                 ),
             ),
@@ -1438,24 +1485,19 @@ def test_deserialize_complex_with_defaults(self):
         pipe_orig = sklearn.pipeline.Pipeline(steps=steps)
 
         pipe_adjusted = sklearn.clone(pipe_orig)
-        if LooseVersion(sklearn.__version__) < "0.23":
-            params = {
-                "Imputer__strategy": "median",
-                "OneHotEncoder__sparse": False,
-                "Estimator__n_estimators": 10,
-                "Estimator__base_estimator__n_estimators": 10,
-                "Estimator__base_estimator__base_estimator__learning_rate": 0.1,
-                "Estimator__base_estimator__base_estimator__loss__n_neighbors": 13,
-            }
-        else:
-            params = {
-                "Imputer__strategy": "mean",
-                "OneHotEncoder__sparse": True,
-                "Estimator__n_estimators": 50,
-                "Estimator__base_estimator__n_estimators": 10,
-                "Estimator__base_estimator__base_estimator__learning_rate": 0.1,
-                "Estimator__base_estimator__base_estimator__loss__n_neighbors": 5,
-            }
+        impute_strategy = "median" if LooseVersion(sklearn.__version__) < "0.23" else "mean"
+        sparse = LooseVersion(sklearn.__version__) >= "0.23"
+        estimator_name = (
+            "base_estimator" if LooseVersion(sklearn.__version__) < "1.2" else "estimator"
+        )
+        params = {
+            "Imputer__strategy": impute_strategy,
+            "OneHotEncoder__sparse": sparse,
+            "Estimator__n_estimators": 10,
+            f"Estimator__{estimator_name}__n_estimators": 10,
+            f"Estimator__{estimator_name}__{estimator_name}__learning_rate": 0.1,
+        }
+
         pipe_adjusted.set_params(**params)
         flow = self.extension.model_to_flow(pipe_adjusted)
         pipe_deserialized = self.extension.flow_to_model(flow, initialize_with_defaults=True)
@@ -1467,6 +1509,7 @@ def test_deserialize_complex_with_defaults(self):
             self.extension.model_to_flow(pipe_deserialized),
         )
 
+    @pytest.mark.sklearn
     def test_openml_param_name_to_sklearn(self):
         scaler = sklearn.preprocessing.StandardScaler(with_mean=False)
         boosting = sklearn.ensemble.AdaBoostClassifier(
@@ -1501,6 +1544,7 @@ def test_openml_param_name_to_sklearn(self):
             openml_name = "%s(%s)_%s" % (subflow.name, subflow.version, splitted[-1])
             self.assertEqual(parameter.full_name, openml_name)
 
+    @pytest.mark.sklearn
     def test_obtain_parameter_values_flow_not_from_server(self):
         model = sklearn.linear_model.LogisticRegression(solver="lbfgs")
         flow = self.extension.model_to_flow(model)
@@ -1513,13 +1557,16 @@ def test_obtain_parameter_values_flow_not_from_server(self):
             self.extension.obtain_parameter_values(flow)
 
         model = sklearn.ensemble.AdaBoostClassifier(
-            base_estimator=sklearn.linear_model.LogisticRegression(solver="lbfgs",)
+            base_estimator=sklearn.linear_model.LogisticRegression(
+                solver="lbfgs",
+            )
         )
         flow = self.extension.model_to_flow(model)
         flow.flow_id = 1
         with self.assertRaisesRegex(ValueError, msg):
             self.extension.obtain_parameter_values(flow)
 
+    @pytest.mark.sklearn
     def test_obtain_parameter_values(self):
 
         model = sklearn.model_selection.RandomizedSearchCV(
@@ -1545,15 +1592,17 @@ def test_obtain_parameter_values(self):
                 self.assertEqual(parameter["oml:value"], "5")
                 self.assertEqual(parameter["oml:component"], 2)
 
+    @pytest.mark.sklearn
     def test_numpy_type_allowed_in_flow(self):
-        """ Simple numpy types should be serializable. """
+        """Simple numpy types should be serializable."""
         dt = sklearn.tree.DecisionTreeClassifier(
             max_depth=np.float64(3.0), min_samples_leaf=np.int32(5)
         )
         self.extension.model_to_flow(dt)
 
+    @pytest.mark.sklearn
     def test_numpy_array_not_allowed_in_flow(self):
-        """ Simple numpy arrays should not be serializable. """
+        """Simple numpy arrays should not be serializable."""
         bin = sklearn.preprocessing.MultiLabelBinarizer(classes=np.asarray([1, 2, 3]))
         with self.assertRaises(TypeError):
             self.extension.model_to_flow(bin)
@@ -1569,6 +1618,7 @@ def setUp(self):
     ################################################################################################
     # Test methods for performing runs with this extension module
 
+    @pytest.mark.sklearn
     def test_run_model_on_task(self):
         task = openml.tasks.get_task(1)  # anneal; crossvalidation
         # using most_frequent imputer since dataset has mixed types and to keep things simple
@@ -1580,6 +1630,7 @@ def test_run_model_on_task(self):
         )
         openml.runs.run_model_on_task(pipe, task, dataset_format="array")
 
+    @pytest.mark.sklearn
     def test_seed_model(self):
         # randomized models that are initialized without seeds, can be seeded
         randomized_clfs = [
@@ -1622,6 +1673,7 @@ def test_seed_model(self):
             if idx == 1:
                 self.assertEqual(clf.cv.random_state, 56422)
 
+    @pytest.mark.sklearn
     def test_seed_model_raises(self):
         # the _set_model_seed_where_none should raise exception if random_state is
         # anything else than an int
@@ -1634,6 +1686,7 @@ def test_seed_model_raises(self):
             with self.assertRaises(ValueError):
                 self.extension.seed_model(model=clf, seed=42)
 
+    @pytest.mark.sklearn
     def test_run_model_on_fold_classification_1_array(self):
         task = openml.tasks.get_task(1)  # anneal; crossvalidation
 
@@ -1690,6 +1743,7 @@ def test_run_model_on_fold_classification_1_array(self):
             check_scores=False,
         )
 
+    @pytest.mark.sklearn
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.21",
         reason="SimpleImputer, ColumnTransformer available only after 0.19 and "
@@ -1761,6 +1815,7 @@ def test_run_model_on_fold_classification_1_dataframe(self):
             check_scores=False,
         )
 
+    @pytest.mark.sklearn
     def test_run_model_on_fold_classification_2(self):
         task = openml.tasks.get_task(7)  # kr-vs-kp; crossvalidation
 
@@ -1772,7 +1827,8 @@ def test_run_model_on_fold_classification_2(self):
         y_test = y[test_indices]
 
         pipeline = sklearn.model_selection.GridSearchCV(
-            sklearn.tree.DecisionTreeClassifier(), {"max_depth": [1, 2]},
+            sklearn.tree.DecisionTreeClassifier(),
+            {"max_depth": [1, 2]},
         )
         # TODO add some mocking here to actually test the innards of this function, too!
         res = self.extension._run_model_on_fold(
@@ -1813,13 +1869,11 @@ def test_run_model_on_fold_classification_2(self):
             check_scores=False,
         )
 
+    @pytest.mark.sklearn
     def test_run_model_on_fold_classification_3(self):
         class HardNaiveBayes(sklearn.naive_bayes.GaussianNB):
             # class for testing a naive bayes classifier that does not allow soft
             # predictions
-            def __init__(self, priors=None):
-                super(HardNaiveBayes, self).__init__(priors)
-
             def predict_proba(*args, **kwargs):
                 raise AttributeError("predict_proba is not available when " "probability=False")
 
@@ -1886,6 +1940,7 @@ def predict_proba(*args, **kwargs):
                 X_test.shape[0] * len(task.class_labels),
             )
 
+    @pytest.mark.sklearn
     def test_run_model_on_fold_regression(self):
         # There aren't any regression tasks on the test server
         openml.config.server = self.production_server
@@ -1935,6 +1990,7 @@ def test_run_model_on_fold_regression(self):
             check_scores=False,
         )
 
+    @pytest.mark.sklearn
     def test_run_model_on_fold_clustering(self):
         # There aren't any regression tasks on the test server
         openml.config.server = self.production_server
@@ -1947,7 +2003,11 @@ def test_run_model_on_fold_clustering(self):
         )
         # TODO add some mocking here to actually test the innards of this function, too!
         res = self.extension._run_model_on_fold(
-            model=pipeline, task=task, fold_no=0, rep_no=0, X_train=X,
+            model=pipeline,
+            task=task,
+            fold_no=0,
+            rep_no=0,
+            X_train=X,
         )
 
         y_hat, y_hat_proba, user_defined_measures, trace = res
@@ -1973,6 +2033,7 @@ def test_run_model_on_fold_clustering(self):
             check_scores=False,
         )
 
+    @pytest.mark.sklearn
     def test__extract_trace_data(self):
 
         param_grid = {
@@ -1984,7 +2045,9 @@ def test__extract_trace_data(self):
         num_iters = 10
         task = openml.tasks.get_task(20)  # balance-scale; crossvalidation
         clf = sklearn.model_selection.RandomizedSearchCV(
-            sklearn.neural_network.MLPClassifier(), param_grid, num_iters,
+            sklearn.neural_network.MLPClassifier(),
+            param_grid,
+            n_iter=num_iters,
         )
         # just run the task on the model (without invoking any fancy extension & openml code)
         train, _ = task.get_train_test_split_indices(0, 0)
@@ -2022,6 +2085,7 @@ def test__extract_trace_data(self):
                 param_value = json.loads(trace_iteration.parameters[param_in_trace])
                 self.assertTrue(param_value in param_grid[param])
 
+    @pytest.mark.sklearn
     def test_trim_flow_name(self):
         import re
 
@@ -2084,6 +2148,7 @@ def test_trim_flow_name(self):
             "weka.IsolationForest", SklearnExtension.trim_flow_name("weka.IsolationForest")
         )
 
+    @pytest.mark.sklearn
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.21",
         reason="SimpleImputer, ColumnTransformer available only after 0.19 and "
@@ -2149,7 +2214,8 @@ def test_run_on_model_with_empty_steps(self):
         self.assertEqual(flow.components["prep"].class_name, "sklearn.pipeline.Pipeline")
         self.assertIsInstance(flow.components["prep"].components["columntransformer"], OpenMLFlow)
         self.assertIsInstance(
-            flow.components["prep"].components["columntransformer"].components["cat"], OpenMLFlow,
+            flow.components["prep"].components["columntransformer"].components["cat"],
+            OpenMLFlow,
         )
         self.assertEqual(
             flow.components["prep"].components["columntransformer"].components["cat"].name, "drop"
@@ -2172,6 +2238,7 @@ def test_run_on_model_with_empty_steps(self):
         self.assertEqual(len(new_model.named_steps), 3)
         self.assertEqual(new_model.named_steps["dummystep"], "passthrough")
 
+    @pytest.mark.sklearn
     def test_sklearn_serialization_with_none_step(self):
         msg = (
             "Cannot serialize objects of None type. Please use a valid "
@@ -2184,13 +2251,13 @@ def test_sklearn_serialization_with_none_step(self):
         with self.assertRaisesRegex(ValueError, msg):
             self.extension.model_to_flow(clf)
 
+    @pytest.mark.sklearn
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.20",
         reason="columntransformer introduction in 0.20.0",
     )
     def test_failed_serialization_of_custom_class(self):
-        """Test to check if any custom class inherited from sklearn expectedly fails serialization
-        """
+        """Check if any custom class inherited from sklearn expectedly fails serialization"""
         try:
             from sklearn.impute import SimpleImputer
         except ImportError:
@@ -2220,6 +2287,7 @@ def test_failed_serialization_of_custom_class(self):
             else:
                 raise Exception(e)
 
+    @pytest.mark.sklearn
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.20",
         reason="columntransformer introduction in 0.20.0",
diff --git a/tests/test_flows/test_flow.py b/tests/test_flows/test_flow.py
index 8d08f4eaf..c3c72f267 100644
--- a/tests/test_flows/test_flow.py
+++ b/tests/test_flows/test_flow.py
@@ -7,6 +7,7 @@
 import re
 import time
 from unittest import mock
+import pytest
 
 import scipy.stats
 import sklearn
@@ -148,6 +149,7 @@ def test_from_xml_to_xml(self):
 
             self.assertEqual(new_xml, flow_xml)
 
+    @pytest.mark.sklearn
     def test_to_xml_from_xml(self):
         scaler = sklearn.preprocessing.StandardScaler(with_mean=False)
         boosting = sklearn.ensemble.AdaBoostClassifier(
@@ -166,6 +168,7 @@ def test_to_xml_from_xml(self):
         openml.flows.functions.assert_flows_equal(new_flow, flow)
         self.assertIsNot(new_flow, flow)
 
+    @pytest.mark.sklearn
     def test_publish_flow(self):
         flow = openml.OpenMLFlow(
             name="sklearn.dummy.DummyClassifier",
@@ -176,7 +179,8 @@ def test_publish_flow(self):
             parameters=collections.OrderedDict(),
             parameters_meta_info=collections.OrderedDict(),
             external_version=self.extension._format_external_version(
-                "sklearn", sklearn.__version__,
+                "sklearn",
+                sklearn.__version__,
             ),
             tags=[],
             language="English",
@@ -190,6 +194,7 @@ def test_publish_flow(self):
         TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id))
         self.assertIsInstance(flow.flow_id, int)
 
+    @pytest.mark.sklearn
     @mock.patch("openml.flows.functions.flow_exists")
     def test_publish_existing_flow(self, flow_exists_mock):
         clf = sklearn.tree.DecisionTreeClassifier(max_depth=2)
@@ -205,6 +210,7 @@ def test_publish_existing_flow(self, flow_exists_mock):
 
         self.assertTrue("OpenMLFlow already exists" in context_manager.exception.message)
 
+    @pytest.mark.sklearn
     def test_publish_flow_with_similar_components(self):
         clf = sklearn.ensemble.VotingClassifier(
             [("lr", sklearn.linear_model.LogisticRegression(solver="lbfgs"))]
@@ -258,6 +264,7 @@ def test_publish_flow_with_similar_components(self):
         TestBase._mark_entity_for_removal("flow", (flow3.flow_id, flow3.name))
         TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow3.flow_id))
 
+    @pytest.mark.sklearn
     def test_semi_legal_flow(self):
         # TODO: Test if parameters are set correctly!
         # should not throw error as it contains two differentiable forms of
@@ -274,6 +281,7 @@ def test_semi_legal_flow(self):
         TestBase._mark_entity_for_removal("flow", (flow.flow_id, flow.name))
         TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id))
 
+    @pytest.mark.sklearn
     @mock.patch("openml.flows.functions.get_flow")
     @mock.patch("openml.flows.functions.flow_exists")
     @mock.patch("openml._api_calls._perform_api_call")
@@ -330,6 +338,7 @@ def test_publish_error(self, api_call_mock, flow_exists_mock, get_flow_mock):
         self.assertEqual(context_manager.exception.args[0], fixture)
         self.assertEqual(get_flow_mock.call_count, 2)
 
+    @pytest.mark.sklearn
     def test_illegal_flow(self):
         # should throw error as it contains two imputers
         illegal = sklearn.pipeline.Pipeline(
@@ -358,6 +367,7 @@ def get_sentinel():
         flow_id = openml.flows.flow_exists(name, version)
         self.assertFalse(flow_id)
 
+    @pytest.mark.sklearn
     def test_existing_flow_exists(self):
         # create a flow
         nb = sklearn.naive_bayes.GaussianNB()
@@ -368,7 +378,10 @@ def test_existing_flow_exists(self):
         steps = [
             ("imputation", SimpleImputer(strategy="median")),
             ("hotencoding", sklearn.preprocessing.OneHotEncoder(**ohe_params)),
-            ("variencethreshold", sklearn.feature_selection.VarianceThreshold(),),
+            (
+                "variencethreshold",
+                sklearn.feature_selection.VarianceThreshold(),
+            ),
             ("classifier", sklearn.tree.DecisionTreeClassifier()),
         ]
         complicated = sklearn.pipeline.Pipeline(steps=steps)
@@ -387,9 +400,13 @@ def test_existing_flow_exists(self):
 
             # check if flow exists can find it
             flow = openml.flows.get_flow(flow.flow_id)
-            downloaded_flow_id = openml.flows.flow_exists(flow.name, flow.external_version,)
+            downloaded_flow_id = openml.flows.flow_exists(
+                flow.name,
+                flow.external_version,
+            )
             self.assertEqual(downloaded_flow_id, flow.flow_id)
 
+    @pytest.mark.sklearn
     def test_sklearn_to_upload_to_flow(self):
         iris = sklearn.datasets.load_iris()
         X = iris.data
diff --git a/tests/test_flows/test_flow_functions.py b/tests/test_flows/test_flow_functions.py
index a65dcbf70..f2520cb36 100644
--- a/tests/test_flows/test_flow_functions.py
+++ b/tests/test_flows/test_flow_functions.py
@@ -4,16 +4,20 @@
 import copy
 import functools
 import unittest
+from unittest import mock
 from unittest.mock import patch
 
 from distutils.version import LooseVersion
+
+import requests
 import sklearn
 from sklearn import ensemble
 import pandas as pd
 import pytest
 
 import openml
-from openml.testing import TestBase
+from openml.exceptions import OpenMLNotAuthorizedError, OpenMLServerException
+from openml.testing import TestBase, create_request_response
 import openml.extensions.sklearn
 
 
@@ -112,10 +116,14 @@ def test_are_flows_equal(self):
             new_flow = copy.deepcopy(flow)
             setattr(new_flow, attribute, new_value)
             self.assertNotEqual(
-                getattr(flow, attribute), getattr(new_flow, attribute),
+                getattr(flow, attribute),
+                getattr(new_flow, attribute),
             )
             self.assertRaises(
-                ValueError, openml.flows.functions.assert_flows_equal, flow, new_flow,
+                ValueError,
+                openml.flows.functions.assert_flows_equal,
+                flow,
+                new_flow,
             )
 
         # Test that the API ignores several keys when comparing flows
@@ -134,7 +142,8 @@ def test_are_flows_equal(self):
             new_flow = copy.deepcopy(flow)
             setattr(new_flow, attribute, new_value)
             self.assertNotEqual(
-                getattr(flow, attribute), getattr(new_flow, attribute),
+                getattr(flow, attribute),
+                getattr(new_flow, attribute),
             )
             openml.flows.functions.assert_flows_equal(flow, new_flow)
 
@@ -266,6 +275,7 @@ def test_are_flows_equal_ignore_if_older(self):
         )
         assert_flows_equal(flow, flow, ignore_parameter_values_on_older_children=None)
 
+    @pytest.mark.sklearn
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.20",
         reason="OrdinalEncoder introduced in 0.20. "
@@ -297,6 +307,7 @@ def test_get_flow1(self):
         flow = openml.flows.get_flow(1)
         self.assertIsNone(flow.external_version)
 
+    @pytest.mark.sklearn
     def test_get_flow_reinstantiate_model(self):
         model = ensemble.RandomForestClassifier(n_estimators=33)
         extension = openml.extensions.get_extension_by_model(model)
@@ -318,34 +329,62 @@ def test_get_flow_reinstantiate_model_no_extension(self):
             reinstantiate=True,
         )
 
+    @pytest.mark.sklearn
     @unittest.skipIf(
-        LooseVersion(sklearn.__version__) == "0.19.1", reason="Target flow is from sklearn 0.19.1"
+        LooseVersion(sklearn.__version__) == "0.19.1",
+        reason="Requires scikit-learn!=0.19.1, because target flow is from that version.",
     )
-    def test_get_flow_reinstantiate_model_wrong_version(self):
-        # Note that CI does not test against 0.19.1.
+    def test_get_flow_with_reinstantiate_strict_with_wrong_version_raises_exception(self):
         openml.config.server = self.production_server
-        _, sklearn_major, _ = LooseVersion(sklearn.__version__).version[:3]
-        if sklearn_major > 23:
-            flow = 18587  # 18687, 18725 --- flows building random forest on >= 0.23
-            flow_sklearn_version = "0.23.1"
-        else:
-            flow = 8175
-            flow_sklearn_version = "0.19.1"
-        expected = (
-            "Trying to deserialize a model with dependency "
-            "sklearn=={} not satisfied.".format(flow_sklearn_version)
-        )
+        flow = 8175
+        expected = "Trying to deserialize a model with dependency sklearn==0.19.1 not satisfied."
         self.assertRaisesRegex(
-            ValueError, expected, openml.flows.get_flow, flow_id=flow, reinstantiate=True
+            ValueError,
+            expected,
+            openml.flows.get_flow,
+            flow_id=flow,
+            reinstantiate=True,
+            strict_version=True,
         )
-        if LooseVersion(sklearn.__version__) > "0.19.1":
-            # 0.18 actually can't deserialize this because of incompatibility
-            flow = openml.flows.get_flow(flow_id=flow, reinstantiate=True, strict_version=False)
-            # ensure that a new flow was created
-            assert flow.flow_id is None
-            assert "sklearn==0.19.1" not in flow.dependencies
-            assert "sklearn>=0.19.1" not in flow.dependencies
 
+    @pytest.mark.sklearn
+    @unittest.skipIf(
+        LooseVersion(sklearn.__version__) < "1" and LooseVersion(sklearn.__version__) != "1.0.0",
+        reason="Requires scikit-learn < 1.0.1."
+        # Because scikit-learn dropped min_impurity_split hyperparameter in 1.0,
+        # and the requested flow is from 1.0.0 exactly.
+    )
+    def test_get_flow_reinstantiate_flow_not_strict_post_1(self):
+        openml.config.server = self.production_server
+        flow = openml.flows.get_flow(flow_id=19190, reinstantiate=True, strict_version=False)
+        assert flow.flow_id is None
+        assert "sklearn==1.0.0" not in flow.dependencies
+
+    @pytest.mark.sklearn
+    @unittest.skipIf(
+        (LooseVersion(sklearn.__version__) < "0.23.2")
+        or ("1.0" < LooseVersion(sklearn.__version__)),
+        reason="Requires scikit-learn 0.23.2 or ~0.24."
+        # Because these still have min_impurity_split, but with new scikit-learn module structure."
+    )
+    def test_get_flow_reinstantiate_flow_not_strict_023_and_024(self):
+        openml.config.server = self.production_server
+        flow = openml.flows.get_flow(flow_id=18587, reinstantiate=True, strict_version=False)
+        assert flow.flow_id is None
+        assert "sklearn==0.23.1" not in flow.dependencies
+
+    @pytest.mark.sklearn
+    @unittest.skipIf(
+        "0.23" < LooseVersion(sklearn.__version__),
+        reason="Requires scikit-learn<=0.23, because the scikit-learn module structure changed.",
+    )
+    def test_get_flow_reinstantiate_flow_not_strict_pre_023(self):
+        openml.config.server = self.production_server
+        flow = openml.flows.get_flow(flow_id=8175, reinstantiate=True, strict_version=False)
+        assert flow.flow_id is None
+        assert "sklearn==0.19.1" not in flow.dependencies
+
+    @pytest.mark.sklearn
     def test_get_flow_id(self):
         if self.long_version:
             list_all = openml.utils._list_all
@@ -370,7 +409,131 @@ def test_get_flow_id(self):
                 name=flow.name, exact_version=True
             )
             flow_ids_exact_version_False = openml.flows.get_flow_id(
-                name=flow.name, exact_version=False,
+                name=flow.name,
+                exact_version=False,
             )
             self.assertEqual(flow_ids_exact_version_True, flow_ids_exact_version_False)
             self.assertIn(flow.flow_id, flow_ids_exact_version_True)
+
+    def test_delete_flow(self):
+        flow = openml.OpenMLFlow(
+            name="sklearn.dummy.DummyClassifier",
+            class_name="sklearn.dummy.DummyClassifier",
+            description="test description",
+            model=sklearn.dummy.DummyClassifier(),
+            components=OrderedDict(),
+            parameters=OrderedDict(),
+            parameters_meta_info=OrderedDict(),
+            external_version="1",
+            tags=[],
+            language="English",
+            dependencies=None,
+        )
+
+        flow, _ = self._add_sentinel_to_flow_name(flow, None)
+
+        flow.publish()
+        _flow_id = flow.flow_id
+        self.assertTrue(openml.flows.delete_flow(_flow_id))
+
+
+@mock.patch.object(requests.Session, "delete")
+def test_delete_flow_not_owned(mock_delete, test_files_directory, test_api_key):
+    openml.config.start_using_configuration_for_example()
+    content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_not_owned.xml"
+    mock_delete.return_value = create_request_response(
+        status_code=412, content_filepath=content_file
+    )
+
+    with pytest.raises(
+        OpenMLNotAuthorizedError,
+        match="The flow can not be deleted because it was not uploaded by you.",
+    ):
+        openml.flows.delete_flow(40_000)
+
+    expected_call_args = [
+        ("https://test.openml.org/api/v1/xml/flow/40000",),
+        {"params": {"api_key": test_api_key}},
+    ]
+    assert expected_call_args == list(mock_delete.call_args)
+
+
+@mock.patch.object(requests.Session, "delete")
+def test_delete_flow_with_run(mock_delete, test_files_directory, test_api_key):
+    openml.config.start_using_configuration_for_example()
+    content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_has_runs.xml"
+    mock_delete.return_value = create_request_response(
+        status_code=412, content_filepath=content_file
+    )
+
+    with pytest.raises(
+        OpenMLNotAuthorizedError,
+        match="The flow can not be deleted because it still has associated entities:",
+    ):
+        openml.flows.delete_flow(40_000)
+
+    expected_call_args = [
+        ("https://test.openml.org/api/v1/xml/flow/40000",),
+        {"params": {"api_key": test_api_key}},
+    ]
+    assert expected_call_args == list(mock_delete.call_args)
+
+
+@mock.patch.object(requests.Session, "delete")
+def test_delete_subflow(mock_delete, test_files_directory, test_api_key):
+    openml.config.start_using_configuration_for_example()
+    content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_is_subflow.xml"
+    mock_delete.return_value = create_request_response(
+        status_code=412, content_filepath=content_file
+    )
+
+    with pytest.raises(
+        OpenMLNotAuthorizedError,
+        match="The flow can not be deleted because it still has associated entities:",
+    ):
+        openml.flows.delete_flow(40_000)
+
+    expected_call_args = [
+        ("https://test.openml.org/api/v1/xml/flow/40000",),
+        {"params": {"api_key": test_api_key}},
+    ]
+    assert expected_call_args == list(mock_delete.call_args)
+
+
+@mock.patch.object(requests.Session, "delete")
+def test_delete_flow_success(mock_delete, test_files_directory, test_api_key):
+    openml.config.start_using_configuration_for_example()
+    content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_successful.xml"
+    mock_delete.return_value = create_request_response(
+        status_code=200, content_filepath=content_file
+    )
+
+    success = openml.flows.delete_flow(33364)
+    assert success
+
+    expected_call_args = [
+        ("https://test.openml.org/api/v1/xml/flow/33364",),
+        {"params": {"api_key": test_api_key}},
+    ]
+    assert expected_call_args == list(mock_delete.call_args)
+
+
+@mock.patch.object(requests.Session, "delete")
+def test_delete_unknown_flow(mock_delete, test_files_directory, test_api_key):
+    openml.config.start_using_configuration_for_example()
+    content_file = test_files_directory / "mock_responses" / "flows" / "flow_delete_not_exist.xml"
+    mock_delete.return_value = create_request_response(
+        status_code=412, content_filepath=content_file
+    )
+
+    with pytest.raises(
+        OpenMLServerException,
+        match="flow does not exist",
+    ):
+        openml.flows.delete_flow(9_999_999)
+
+    expected_call_args = [
+        ("https://test.openml.org/api/v1/xml/flow/9999999",),
+        {"params": {"api_key": test_api_key}},
+    ]
+    assert expected_call_args == list(mock_delete.call_args)
diff --git a/tests/test_openml/test_api_calls.py b/tests/test_openml/test_api_calls.py
index 16bdbc7df..ecc7111fa 100644
--- a/tests/test_openml/test_api_calls.py
+++ b/tests/test_openml/test_api_calls.py
@@ -7,7 +7,8 @@
 class TestConfig(openml.testing.TestBase):
     def test_too_long_uri(self):
         with self.assertRaisesRegex(
-            openml.exceptions.OpenMLServerError, "URI too long!",
+            openml.exceptions.OpenMLServerError,
+            "URI too long!",
         ):
             openml.datasets.list_datasets(data_id=list(range(10000)))
 
diff --git a/tests/test_openml/test_config.py b/tests/test_openml/test_config.py
index 638f02420..ba70689a1 100644
--- a/tests/test_openml/test_config.py
+++ b/tests/test_openml/test_config.py
@@ -37,7 +37,7 @@ def side_effect(path_):
             openml.config._setup()
 
     def test_get_config_as_dict(self):
-        """ Checks if the current configuration is returned accurately as a dict. """
+        """Checks if the current configuration is returned accurately as a dict."""
         config = openml.config.get_config_as_dict()
         _config = dict()
         _config["apikey"] = "610344db6388d9ba34f6db45a3cf71de"
@@ -51,7 +51,7 @@ def test_get_config_as_dict(self):
         self.assertDictEqual(config, _config)
 
     def test_setup_with_config(self):
-        """ Checks if the OpenML configuration can be updated using _setup(). """
+        """Checks if the OpenML configuration can be updated using _setup()."""
         _config = dict()
         _config["apikey"] = "610344db6388d9ba34f6db45a3cf71de"
         _config["server"] = "https://www.openml.org/api/v1/xml"
@@ -68,7 +68,7 @@ def test_setup_with_config(self):
 
 class TestConfigurationForExamples(openml.testing.TestBase):
     def test_switch_to_example_configuration(self):
-        """ Verifies the test configuration is loaded properly. """
+        """Verifies the test configuration is loaded properly."""
         # Below is the default test key which would be used anyway, but just for clarity:
         openml.config.apikey = "610344db6388d9ba34f6db45a3cf71de"
         openml.config.server = self.production_server
@@ -79,7 +79,7 @@ def test_switch_to_example_configuration(self):
         self.assertEqual(openml.config.server, self.test_server)
 
     def test_switch_from_example_configuration(self):
-        """ Verifies the previous configuration is loaded after stopping. """
+        """Verifies the previous configuration is loaded after stopping."""
         # Below is the default test key which would be used anyway, but just for clarity:
         openml.config.apikey = "610344db6388d9ba34f6db45a3cf71de"
         openml.config.server = self.production_server
@@ -91,14 +91,14 @@ def test_switch_from_example_configuration(self):
         self.assertEqual(openml.config.server, self.production_server)
 
     def test_example_configuration_stop_before_start(self):
-        """ Verifies an error is raised is `stop_...` is called before `start_...`. """
+        """Verifies an error is raised is `stop_...` is called before `start_...`."""
         error_regex = ".*stop_use_example_configuration.*start_use_example_configuration.*first"
         self.assertRaisesRegex(
             RuntimeError, error_regex, openml.config.stop_using_configuration_for_example
         )
 
     def test_example_configuration_start_twice(self):
-        """ Checks that the original config can be returned to if `start..` is called twice. """
+        """Checks that the original config can be returned to if `start..` is called twice."""
         openml.config.apikey = "610344db6388d9ba34f6db45a3cf71de"
         openml.config.server = self.production_server
 
diff --git a/tests/test_openml/test_openml.py b/tests/test_openml/test_openml.py
index 80f5e67f0..93d2e6925 100644
--- a/tests/test_openml/test_openml.py
+++ b/tests/test_openml/test_openml.py
@@ -15,7 +15,11 @@ class TestInit(TestBase):
     @mock.patch("openml.flows.functions.get_flow")
     @mock.patch("openml.runs.functions.get_run")
     def test_populate_cache(
-        self, run_mock, flow_mock, dataset_mock, task_mock,
+        self,
+        run_mock,
+        flow_mock,
+        dataset_mock,
+        task_mock,
     ):
         openml.populate_cache(task_ids=[1, 2], dataset_ids=[3, 4], flow_ids=[5, 6], run_ids=[7, 8])
         self.assertEqual(run_mock.call_count, 2)
@@ -27,7 +31,10 @@ def test_populate_cache(
             self.assertEqual(argument[0], fixture)
 
         self.assertEqual(dataset_mock.call_count, 2)
-        for argument, fixture in zip(dataset_mock.call_args_list, [(3,), (4,)],):
+        for argument, fixture in zip(
+            dataset_mock.call_args_list,
+            [(3,), (4,)],
+        ):
             self.assertEqual(argument[0], fixture)
 
         self.assertEqual(task_mock.call_count, 2)
diff --git a/tests/test_runs/test_run.py b/tests/test_runs/test_run.py
index dd0da5c00..67e15d62b 100644
--- a/tests/test_runs/test_run.py
+++ b/tests/test_runs/test_run.py
@@ -7,9 +7,11 @@
 
 import xmltodict
 from sklearn.dummy import DummyClassifier
+from sklearn.linear_model import LinearRegression
 from sklearn.tree import DecisionTreeClassifier
 from sklearn.model_selection import GridSearchCV
 from sklearn.pipeline import Pipeline
+from sklearn.base import clone
 
 from openml import OpenMLRun
 from openml.testing import TestBase, SimpleImputer
@@ -39,6 +41,25 @@ def test_tagging(self):
         run_list = openml.runs.list_runs(tag=tag)
         self.assertEqual(len(run_list), 0)
 
+    @staticmethod
+    def _test_prediction_data_equal(run, run_prime):
+        # Determine which attributes are numeric and which not
+        num_cols = np.array(
+            [d_type == "NUMERIC" for _, d_type in run._generate_arff_dict()["attributes"]]
+        )
+        # Get run data consistently
+        #   (For run from server, .data_content does not exist)
+        run_data_content = run.predictions.values
+        run_prime_data_content = run_prime.predictions.values
+
+        # Assert numeric and string parts separately
+        numeric_part = np.array(run_data_content[:, num_cols], dtype=float)
+        numeric_part_prime = np.array(run_prime_data_content[:, num_cols], dtype=float)
+        string_part = run_data_content[:, ~num_cols]
+        string_part_prime = run_prime_data_content[:, ~num_cols]
+        np.testing.assert_array_almost_equal(numeric_part, numeric_part_prime)
+        np.testing.assert_array_equal(string_part, string_part_prime)
+
     def _test_run_obj_equals(self, run, run_prime):
         for dictionary in ["evaluations", "fold_evaluations", "sample_evaluations"]:
             if getattr(run, dictionary) is not None:
@@ -49,14 +70,9 @@ def _test_run_obj_equals(self, run, run_prime):
                 if other is not None:
                     self.assertDictEqual(other, dict())
         self.assertEqual(run._to_xml(), run_prime._to_xml())
+        self._test_prediction_data_equal(run, run_prime)
 
-        numeric_part = np.array(np.array(run.data_content)[:, 0:-2], dtype=float)
-        numeric_part_prime = np.array(np.array(run_prime.data_content)[:, 0:-2], dtype=float)
-        string_part = np.array(run.data_content)[:, -2:]
-        string_part_prime = np.array(run_prime.data_content)[:, -2:]
-        np.testing.assert_array_almost_equal(numeric_part, numeric_part_prime)
-        np.testing.assert_array_equal(string_part, string_part_prime)
-
+        # Test trace
         if run.trace is not None:
             run_trace_content = run.trace.trace_to_arff()["data"]
         else:
@@ -79,8 +95,14 @@ def _check_array(array, type_):
             int_part_prime = [line[:3] for line in run_prime_trace_content]
             _check_array(int_part_prime, int)
 
-            float_part = np.array(np.array(run_trace_content)[:, 3:4], dtype=float,)
-            float_part_prime = np.array(np.array(run_prime_trace_content)[:, 3:4], dtype=float,)
+            float_part = np.array(
+                np.array(run_trace_content)[:, 3:4],
+                dtype=float,
+            )
+            float_part_prime = np.array(
+                np.array(run_prime_trace_content)[:, 3:4],
+                dtype=float,
+            )
             bool_part = [line[4] for line in run_trace_content]
             bool_part_prime = [line[4] for line in run_prime_trace_content]
             for bp, bpp in zip(bool_part, bool_part_prime):
@@ -96,6 +118,7 @@ def _check_array(array, type_):
         else:
             self.assertIsNone(run_prime_trace_content)
 
+    @pytest.mark.sklearn
     def test_to_from_filesystem_vanilla(self):
 
         model = Pipeline(
@@ -113,7 +136,11 @@ def test_to_from_filesystem_vanilla(self):
             upload_flow=True,
         )
 
-        cache_path = os.path.join(self.workdir, "runs", str(random.getrandbits(128)),)
+        cache_path = os.path.join(
+            self.workdir,
+            "runs",
+            str(random.getrandbits(128)),
+        )
         run.to_filesystem(cache_path)
 
         run_prime = openml.runs.OpenMLRun.from_filesystem(cache_path)
@@ -127,6 +154,7 @@ def test_to_from_filesystem_vanilla(self):
             "collected from {}: {}".format(__file__.split("/")[-1], run_prime.run_id)
         )
 
+    @pytest.mark.sklearn
     @pytest.mark.flaky()
     def test_to_from_filesystem_search(self):
 
@@ -146,7 +174,10 @@ def test_to_from_filesystem_search(self):
 
         task = openml.tasks.get_task(119)  # diabetes; crossvalidation
         run = openml.runs.run_model_on_task(
-            model=model, task=task, add_local_measures=False, avoid_duplicate_runs=False,
+            model=model,
+            task=task,
+            add_local_measures=False,
+            avoid_duplicate_runs=False,
         )
 
         cache_path = os.path.join(self.workdir, "runs", str(random.getrandbits(128)))
@@ -160,6 +191,7 @@ def test_to_from_filesystem_search(self):
             "collected from {}: {}".format(__file__.split("/")[-1], run_prime.run_id)
         )
 
+    @pytest.mark.sklearn
     def test_to_from_filesystem_no_model(self):
 
         model = Pipeline(
@@ -176,6 +208,74 @@ def test_to_from_filesystem_no_model(self):
         with self.assertRaises(ValueError, msg="Could not find model.pkl"):
             openml.runs.OpenMLRun.from_filesystem(cache_path)
 
+    @staticmethod
+    def _get_models_tasks_for_tests():
+        model_clf = Pipeline(
+            [
+                ("imputer", SimpleImputer(strategy="mean")),
+                ("classifier", DummyClassifier(strategy="prior")),
+            ]
+        )
+        model_reg = Pipeline(
+            [
+                ("imputer", SimpleImputer(strategy="mean")),
+                (
+                    "regressor",
+                    # LR because dummy does not produce enough float-like values
+                    LinearRegression(),
+                ),
+            ]
+        )
+
+        task_clf = openml.tasks.get_task(119)  # diabetes; hold out validation
+        task_reg = openml.tasks.get_task(733)  # quake; crossvalidation
+
+        return [(model_clf, task_clf), (model_reg, task_reg)]
+
+    @staticmethod
+    def assert_run_prediction_data(task, run, model):
+        # -- Get y_pred and y_true as it should be stored in the run
+        n_repeats, n_folds, n_samples = task.get_split_dimensions()
+        if (n_repeats > 1) or (n_samples > 1):
+            raise ValueError("Test does not support this task type's split dimensions.")
+
+        X, y = task.get_X_and_y()
+
+        # Check correctness of y_true and y_pred in run
+        for fold_id in range(n_folds):
+            # Get data for fold
+            _, test_indices = task.get_train_test_split_indices(repeat=0, fold=fold_id, sample=0)
+            train_mask = np.full(len(X), True)
+            train_mask[test_indices] = False
+
+            # Get train / test
+            X_train = X[train_mask]
+            y_train = y[train_mask]
+            X_test = X[~train_mask]
+            y_test = y[~train_mask]
+
+            # Get y_pred
+            y_pred = model.fit(X_train, y_train).predict(X_test)
+
+            # Get stored data for fold
+            saved_fold_data = run.predictions[run.predictions["fold"] == fold_id].sort_values(
+                by="row_id"
+            )
+            saved_y_pred = saved_fold_data["prediction"].values
+            gt_key = "truth" if "truth" in list(saved_fold_data) else "correct"
+            saved_y_test = saved_fold_data[gt_key].values
+
+            assert_method = np.testing.assert_array_almost_equal
+            if task.task_type == "Supervised Classification":
+                y_pred = np.take(task.class_labels, y_pred)
+                y_test = np.take(task.class_labels, y_test)
+                assert_method = np.testing.assert_array_equal
+
+            # Assert correctness
+            assert_method(y_pred, saved_y_pred)
+            assert_method(y_test, saved_y_test)
+
+    @pytest.mark.sklearn
     def test_publish_with_local_loaded_flow(self):
         """
         Publish a run tied to a local flow after it has first been saved to
@@ -183,40 +283,85 @@ def test_publish_with_local_loaded_flow(self):
         """
         extension = openml.extensions.sklearn.SklearnExtension()
 
-        model = Pipeline(
-            [("imputer", SimpleImputer(strategy="mean")), ("classifier", DummyClassifier())]
-        )
-        task = openml.tasks.get_task(119)  # diabetes; crossvalidation
-
-        # Make sure the flow does not exist on the server yet.
-        flow = extension.model_to_flow(model)
-        self._add_sentinel_to_flow_name(flow)
-        self.assertFalse(openml.flows.flow_exists(flow.name, flow.external_version))
+        for model, task in self._get_models_tasks_for_tests():
+            # Make sure the flow does not exist on the server yet.
+            flow = extension.model_to_flow(model)
+            self._add_sentinel_to_flow_name(flow)
+            self.assertFalse(openml.flows.flow_exists(flow.name, flow.external_version))
+
+            run = openml.runs.run_flow_on_task(
+                flow=flow,
+                task=task,
+                add_local_measures=False,
+                avoid_duplicate_runs=False,
+                upload_flow=False,
+            )
+
+            # Make sure that the flow has not been uploaded as requested.
+            self.assertFalse(openml.flows.flow_exists(flow.name, flow.external_version))
+
+            # Make sure that the prediction data stored in the run is correct.
+            self.assert_run_prediction_data(task, run, clone(model))
+
+            cache_path = os.path.join(self.workdir, "runs", str(random.getrandbits(128)))
+            run.to_filesystem(cache_path)
+            # obtain run from filesystem
+            loaded_run = openml.runs.OpenMLRun.from_filesystem(cache_path)
+            loaded_run.publish()
+
+            # Clean up
+            TestBase._mark_entity_for_removal("run", loaded_run.run_id)
+            TestBase.logger.info(
+                "collected from {}: {}".format(__file__.split("/")[-1], loaded_run.run_id)
+            )
+
+            # make sure the flow is published as part of publishing the run.
+            self.assertTrue(openml.flows.flow_exists(flow.name, flow.external_version))
+            openml.runs.get_run(loaded_run.run_id)
+
+    @pytest.mark.sklearn
+    def test_offline_and_online_run_identical(self):
 
-        run = openml.runs.run_flow_on_task(
-            flow=flow,
-            task=task,
-            add_local_measures=False,
-            avoid_duplicate_runs=False,
-            upload_flow=False,
-        )
-
-        # Make sure that the flow has not been uploaded as requested.
-        self.assertFalse(openml.flows.flow_exists(flow.name, flow.external_version))
-
-        cache_path = os.path.join(self.workdir, "runs", str(random.getrandbits(128)))
-        run.to_filesystem(cache_path)
-        # obtain run from filesystem
-        loaded_run = openml.runs.OpenMLRun.from_filesystem(cache_path)
-        loaded_run.publish()
-        TestBase._mark_entity_for_removal("run", loaded_run.run_id)
-        TestBase.logger.info(
-            "collected from {}: {}".format(__file__.split("/")[-1], loaded_run.run_id)
-        )
+        extension = openml.extensions.sklearn.SklearnExtension()
 
-        # make sure the flow is published as part of publishing the run.
-        self.assertTrue(openml.flows.flow_exists(flow.name, flow.external_version))
-        openml.runs.get_run(loaded_run.run_id)
+        for model, task in self._get_models_tasks_for_tests():
+            # Make sure the flow does not exist on the server yet.
+            flow = extension.model_to_flow(model)
+            self._add_sentinel_to_flow_name(flow)
+            self.assertFalse(openml.flows.flow_exists(flow.name, flow.external_version))
+
+            run = openml.runs.run_flow_on_task(
+                flow=flow,
+                task=task,
+                add_local_measures=False,
+                avoid_duplicate_runs=False,
+                upload_flow=False,
+            )
+
+            # Make sure that the flow has not been uploaded as requested.
+            self.assertFalse(openml.flows.flow_exists(flow.name, flow.external_version))
+
+            # Load from filesystem
+            cache_path = os.path.join(self.workdir, "runs", str(random.getrandbits(128)))
+            run.to_filesystem(cache_path)
+            loaded_run = openml.runs.OpenMLRun.from_filesystem(cache_path)
+
+            # Assert identical for offline - offline
+            self._test_run_obj_equals(run, loaded_run)
+
+            # Publish and test for offline - online
+            run.publish()
+            self.assertTrue(openml.flows.flow_exists(flow.name, flow.external_version))
+
+            try:
+                online_run = openml.runs.get_run(run.run_id, ignore_cache=True)
+                self._test_prediction_data_equal(run, online_run)
+            finally:
+                # Clean up
+                TestBase._mark_entity_for_removal("run", run.run_id)
+                TestBase.logger.info(
+                    "collected from {}: {}".format(__file__.split("/")[-1], loaded_run.run_id)
+                )
 
     def test_run_setup_string_included_in_xml(self):
         SETUP_STRING = "setup-string"
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index c8f1729b7..91dd4ce5e 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -1,5 +1,4 @@
 # License: BSD 3-Clause
-
 import arff
 from distutils.version import LooseVersion
 import os
@@ -7,10 +6,11 @@
 import time
 import sys
 import ast
-import unittest.mock
+from unittest import mock
 
 import numpy as np
 import joblib
+import requests
 from joblib import parallel_backend
 
 import openml
@@ -20,15 +20,24 @@
 import unittest
 import warnings
 import pandas as pd
+import pytest
 
 import openml.extensions.sklearn
-from openml.testing import TestBase, SimpleImputer, CustomImputer
+from openml.testing import TestBase, SimpleImputer, CustomImputer, create_request_response
 from openml.extensions.sklearn import cat, cont
-from openml.runs.functions import _run_task_get_arffcontent, run_exists, format_prediction
+from openml.runs.functions import (
+    _run_task_get_arffcontent,
+    run_exists,
+    format_prediction,
+    delete_run,
+)
 from openml.runs.trace import OpenMLRunTrace
 from openml.tasks import TaskType
 from openml.testing import check_task_existence
-from openml.exceptions import OpenMLServerException
+from openml.exceptions import (
+    OpenMLServerException,
+    OpenMLNotAuthorizedError,
+)
 
 from sklearn.naive_bayes import GaussianNB
 from sklearn.model_selection._search import BaseSearchCV
@@ -126,7 +135,7 @@ def _wait_for_processed_run(self, run_id, max_waiting_time_seconds):
             "evaluated correctly on the server".format(run_id)
         )
 
-    def _compare_predictions(self, predictions, predictions_prime):
+    def _assert_predictions_equal(self, predictions, predictions_prime):
         self.assertEqual(
             np.array(predictions_prime["data"]).shape, np.array(predictions["data"]).shape
         )
@@ -143,13 +152,13 @@ def _compare_predictions(self, predictions, predictions_prime):
                 val_2 = predictions_prime["data"][idx][col_idx]
                 if type(val_1) == float or type(val_2) == float:
                     self.assertAlmostEqual(
-                        float(val_1), float(val_2), places=6,
+                        float(val_1),
+                        float(val_2),
+                        places=6,
                     )
                 else:
                     self.assertEqual(val_1, val_2)
 
-        return True
-
     def _rerun_model_and_compare_predictions(self, run_id, model_prime, seed, create_task_obj):
         run = openml.runs.get_run(run_id)
 
@@ -165,16 +174,27 @@ def _rerun_model_and_compare_predictions(self, run_id, model_prime, seed, create
         if create_task_obj:
             task = openml.tasks.get_task(run.task_id)
             run_prime = openml.runs.run_model_on_task(
-                model=model_prime, task=task, avoid_duplicate_runs=False, seed=seed,
+                model=model_prime,
+                task=task,
+                avoid_duplicate_runs=False,
+                seed=seed,
             )
         else:
             run_prime = openml.runs.run_model_on_task(
-                model=model_prime, task=run.task_id, avoid_duplicate_runs=False, seed=seed,
+                model=model_prime,
+                task=run.task_id,
+                avoid_duplicate_runs=False,
+                seed=seed,
             )
 
         predictions_prime = run_prime._generate_arff_dict()
 
-        self._compare_predictions(predictions, predictions_prime)
+        self._assert_predictions_equal(predictions, predictions_prime)
+        pd.testing.assert_frame_equal(
+            run.predictions,
+            run_prime.predictions,
+            check_dtype=False,  # Loaded ARFF reads NUMERIC as float, even if integer.
+        )
 
     def _perform_run(
         self,
@@ -276,7 +296,9 @@ def _remove_random_state(flow):
             # test the initialize setup function
             run_id = run_.run_id
             run_server = openml.runs.get_run(run_id)
-            clf_server = openml.setups.initialize_model(setup_id=run_server.setup_id,)
+            clf_server = openml.setups.initialize_model(
+                setup_id=run_server.setup_id,
+            )
             flow_local = self.extension.model_to_flow(clf)
             flow_server = self.extension.model_to_flow(clf_server)
 
@@ -298,7 +320,9 @@ def _remove_random_state(flow):
             openml.flows.assert_flows_equal(flow_local, flow_server)
 
             # and test the initialize setup from run function
-            clf_server2 = openml.runs.initialize_model_from_run(run_id=run_server.run_id,)
+            clf_server2 = openml.runs.initialize_model_from_run(
+                run_id=run_server.run_id,
+            )
             flow_server2 = self.extension.model_to_flow(clf_server2)
             if flow.class_name not in classes_without_random_state:
                 self.assertEqual(flow_server2.parameters["random_state"], flow_expected_rsv)
@@ -366,13 +390,11 @@ def _check_sample_evaluations(
                             evaluation = sample_evaluations[measure][rep][fold][sample]
                             self.assertIsInstance(evaluation, float)
                             if not (os.environ.get("CI_WINDOWS") or os.name == "nt"):
-                                # Either Appveyor is much faster than Travis
-                                # and/or measurements are not as accurate.
-                                # Either way, windows seems to get an eval-time
-                                # of 0 sometimes.
+                                # Windows seems to get an eval-time of 0 sometimes.
                                 self.assertGreater(evaluation, 0)
                             self.assertLess(evaluation, max_time_allowed)
 
+    @pytest.mark.sklearn
     def test_run_regression_on_classif_task(self):
         task_id = 115  # diabetes; crossvalidation
 
@@ -384,21 +406,35 @@ def test_run_regression_on_classif_task(self):
             AttributeError, "'LinearRegression' object has no attribute 'classes_'"
         ):
             openml.runs.run_model_on_task(
-                model=clf, task=task, avoid_duplicate_runs=False, dataset_format="array",
+                model=clf,
+                task=task,
+                avoid_duplicate_runs=False,
+                dataset_format="array",
             )
 
+    @pytest.mark.sklearn
     def test_check_erronous_sklearn_flow_fails(self):
         task_id = 115  # diabetes; crossvalidation
         task = openml.tasks.get_task(task_id)
 
         # Invalid parameter values
         clf = LogisticRegression(C="abc", solver="lbfgs")
-        with self.assertRaisesRegex(
-            ValueError,
-            r"Penalty term must be positive; got \(C=u?'abc'\)",  # u? for 2.7/3.4-6 compability
-        ):
+        # The exact error message depends on scikit-learn version.
+        # Because the sklearn-extension module is to be separated,
+        # I will simply relax specifics of the raised Error.
+        # old: r"Penalty term must be positive; got \(C=u?'abc'\)"
+        # new: sklearn.utils._param_validation.InvalidParameterError:
+        #   The 'C' parameter of LogisticRegression must be a float in the range (0, inf]. Got 'abc' instead.  # noqa: E501
+        try:
+            from sklearn.utils._param_validation import InvalidParameterError
+
+            exceptions = (ValueError, InvalidParameterError)
+        except ImportError:
+            exceptions = (ValueError,)
+        with self.assertRaises(exceptions):
             openml.runs.run_model_on_task(
-                task=task, model=clf,
+                task=task,
+                model=clf,
             )
 
     ###########################################################################
@@ -476,7 +512,9 @@ def determine_grid_size(param_grid):
             self._wait_for_processed_run(run.run_id, 600)
             try:
                 model_prime = openml.runs.initialize_model_from_trace(
-                    run_id=run.run_id, repeat=0, fold=0,
+                    run_id=run.run_id,
+                    repeat=0,
+                    fold=0,
                 )
             except openml.exceptions.OpenMLServerException as e:
                 e.message = "%s; run_id %d" % (e.message, run.run_id)
@@ -501,6 +539,14 @@ def determine_grid_size(param_grid):
 
         # todo: check if runtime is present
         self._check_fold_timing_evaluations(run.fold_evaluations, 1, num_folds, task_type=task_type)
+
+        # Check if run string and print representation do not run into an error
+        #   The above check already verifies that all columns needed for supported
+        #   representations are present.
+        #   Supported: SUPERVISED_CLASSIFICATION, LEARNING_CURVE, SUPERVISED_REGRESSION
+        str(run)
+        self.logger.info(run)
+
         return run
 
     def _run_and_upload_classification(
@@ -549,6 +595,7 @@ def _run_and_upload_regression(
             sentinel=sentinel,
         )
 
+    @pytest.mark.sklearn
     def test_run_and_upload_logistic_regression(self):
         lr = LogisticRegression(solver="lbfgs", max_iter=1000)
         task_id = self.TEST_SERVER_TASK_SIMPLE["task_id"]
@@ -556,6 +603,7 @@ def test_run_and_upload_logistic_regression(self):
         n_test_obs = self.TEST_SERVER_TASK_SIMPLE["n_test_obs"]
         self._run_and_upload_classification(lr, task_id, n_missing_vals, n_test_obs, "62501")
 
+    @pytest.mark.sklearn
     def test_run_and_upload_linear_regression(self):
         lr = LinearRegression()
         task_id = self.TEST_SERVER_TASK_REGRESSION["task_id"]
@@ -585,6 +633,7 @@ def test_run_and_upload_linear_regression(self):
         n_test_obs = self.TEST_SERVER_TASK_REGRESSION["n_test_obs"]
         self._run_and_upload_regression(lr, task_id, n_missing_vals, n_test_obs, "62501")
 
+    @pytest.mark.sklearn
     def test_run_and_upload_pipeline_dummy_pipeline(self):
 
         pipeline1 = Pipeline(
@@ -598,6 +647,7 @@ def test_run_and_upload_pipeline_dummy_pipeline(self):
         n_test_obs = self.TEST_SERVER_TASK_SIMPLE["n_test_obs"]
         self._run_and_upload_classification(pipeline1, task_id, n_missing_vals, n_test_obs, "62501")
 
+    @pytest.mark.sklearn
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.20",
         reason="columntransformer introduction in 0.20.0",
@@ -660,11 +710,13 @@ def get_ct_cf(nominal_indices, numeric_indices):
             sentinel=sentinel,
         )
 
+    @pytest.mark.sklearn
+    @unittest.skip("https://github.com/openml/OpenML/issues/1180")
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.20",
         reason="columntransformer introduction in 0.20.0",
     )
-    @unittest.mock.patch("warnings.warn")
+    @mock.patch("warnings.warn")
     def test_run_and_upload_knn_pipeline(self, warnings_mock):
 
         cat_imp = make_pipeline(
@@ -710,6 +762,7 @@ def test_run_and_upload_knn_pipeline(self, warnings_mock):
                 call_count += 1
         self.assertEqual(call_count, 3)
 
+    @pytest.mark.sklearn
     def test_run_and_upload_gridsearch(self):
         gridsearch = GridSearchCV(
             BaggingClassifier(base_estimator=SVC()),
@@ -728,6 +781,7 @@ def test_run_and_upload_gridsearch(self):
         )
         self.assertEqual(len(run.trace.trace_iterations), 9)
 
+    @pytest.mark.sklearn
     def test_run_and_upload_randomsearch(self):
         randomsearch = RandomizedSearchCV(
             RandomForestClassifier(n_estimators=5),
@@ -759,6 +813,7 @@ def test_run_and_upload_randomsearch(self):
         trace = openml.runs.get_run_trace(run.run_id)
         self.assertEqual(len(trace.trace_iterations), 5)
 
+    @pytest.mark.sklearn
     def test_run_and_upload_maskedarrays(self):
         # This testcase is important for 2 reasons:
         # 1) it verifies the correct handling of masked arrays (not all
@@ -781,6 +836,7 @@ def test_run_and_upload_maskedarrays(self):
 
     ##########################################################################
 
+    @pytest.mark.sklearn
     def test_learning_curve_task_1(self):
         task_id = 801  # diabates dataset
         num_test_instances = 6144  # for learning curve
@@ -800,6 +856,7 @@ def test_learning_curve_task_1(self):
         )
         self._check_sample_evaluations(run.sample_evaluations, num_repeats, num_folds, num_samples)
 
+    @pytest.mark.sklearn
     def test_learning_curve_task_2(self):
         task_id = 801  # diabates dataset
         num_test_instances = 6144  # for learning curve
@@ -817,8 +874,8 @@ def test_learning_curve_task_2(self):
                     RandomizedSearchCV(
                         DecisionTreeClassifier(),
                         {
-                            "min_samples_split": [2 ** x for x in range(1, 8)],
-                            "min_samples_leaf": [2 ** x for x in range(0, 7)],
+                            "min_samples_split": [2**x for x in range(1, 8)],
+                            "min_samples_leaf": [2**x for x in range(0, 7)],
                         },
                         cv=3,
                         n_iter=10,
@@ -831,6 +888,7 @@ def test_learning_curve_task_2(self):
         )
         self._check_sample_evaluations(run.sample_evaluations, num_repeats, num_folds, num_samples)
 
+    @pytest.mark.sklearn
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.21",
         reason="Pipelines don't support indexing (used for the assert check)",
@@ -860,7 +918,10 @@ def test_initialize_cv_from_run(self):
 
         task = openml.tasks.get_task(11)  # kr-vs-kp; holdout
         run = openml.runs.run_model_on_task(
-            model=randomsearch, task=task, avoid_duplicate_runs=False, seed=1,
+            model=randomsearch,
+            task=task,
+            avoid_duplicate_runs=False,
+            seed=1,
         )
         run_ = run.publish()
         TestBase._mark_entity_for_removal("run", run.run_id)
@@ -898,12 +959,16 @@ def _test_local_evaluations(self, run):
         else:
             tests.append((sklearn.metrics.jaccard_score, {}))
         for test_idx, test in enumerate(tests):
-            alt_scores = run.get_metric_fn(sklearn_fn=test[0], kwargs=test[1],)
+            alt_scores = run.get_metric_fn(
+                sklearn_fn=test[0],
+                kwargs=test[1],
+            )
             self.assertEqual(len(alt_scores), 10)
             for idx in range(len(alt_scores)):
                 self.assertGreaterEqual(alt_scores[idx], 0)
                 self.assertLessEqual(alt_scores[idx], 1)
 
+    @pytest.mark.sklearn
     def test_local_run_swapped_parameter_order_model(self):
         clf = DecisionTreeClassifier()
         australian_task = 595  # Australian; crossvalidation
@@ -911,11 +976,15 @@ def test_local_run_swapped_parameter_order_model(self):
 
         # task and clf are purposely in the old order
         run = openml.runs.run_model_on_task(
-            task, clf, avoid_duplicate_runs=False, upload_flow=False,
+            task,
+            clf,
+            avoid_duplicate_runs=False,
+            upload_flow=False,
         )
 
         self._test_local_evaluations(run)
 
+    @pytest.mark.sklearn
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.20",
         reason="SimpleImputer doesn't handle mixed type DataFrame as input",
@@ -937,11 +1006,15 @@ def test_local_run_swapped_parameter_order_flow(self):
 
         # invoke OpenML run
         run = openml.runs.run_flow_on_task(
-            task, flow, avoid_duplicate_runs=False, upload_flow=False,
+            task,
+            flow,
+            avoid_duplicate_runs=False,
+            upload_flow=False,
         )
 
         self._test_local_evaluations(run)
 
+    @pytest.mark.sklearn
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.20",
         reason="SimpleImputer doesn't handle mixed type DataFrame as input",
@@ -962,7 +1035,10 @@ def test_local_run_metric_score(self):
 
         # invoke OpenML run
         run = openml.runs.run_model_on_task(
-            model=clf, task=task, avoid_duplicate_runs=False, upload_flow=False,
+            model=clf,
+            task=task,
+            avoid_duplicate_runs=False,
+            upload_flow=False,
         )
 
         self._test_local_evaluations(run)
@@ -976,6 +1052,7 @@ def test_online_run_metric_score(self):
 
         self._test_local_evaluations(run)
 
+    @pytest.mark.sklearn
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.20",
         reason="SimpleImputer doesn't handle mixed type DataFrame as input",
@@ -1015,7 +1092,11 @@ def test_initialize_model_from_run(self):
             TestBase.logger.info("collected from test_run_functions: {}".format(task_id))
 
         task = openml.tasks.get_task(task_id)
-        run = openml.runs.run_model_on_task(model=clf, task=task, avoid_duplicate_runs=False,)
+        run = openml.runs.run_model_on_task(
+            model=clf,
+            task=task,
+            avoid_duplicate_runs=False,
+        )
         run_ = run.publish()
         TestBase._mark_entity_for_removal("run", run_.run_id)
         TestBase.logger.info("collected from test_run_functions: {}".format(run_.run_id))
@@ -1033,6 +1114,7 @@ def test_initialize_model_from_run(self):
         self.assertEqual(flowS.components["Imputer"].parameters["strategy"], '"most_frequent"')
         self.assertEqual(flowS.components["VarianceThreshold"].parameters["threshold"], "0.05")
 
+    @pytest.mark.sklearn
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.20",
         reason="SimpleImputer doesn't handle mixed type DataFrame as input",
@@ -1077,16 +1159,17 @@ def test__run_exists(self):
 
             flow = self.extension.model_to_flow(clf)
             flow_exists = openml.flows.flow_exists(flow.name, flow.external_version)
-            self.assertGreater(flow_exists, 0)
+            self.assertGreater(flow_exists, 0, "Server says flow from run does not exist.")
             # Do NOT use get_flow reinitialization, this potentially sets
             # hyperparameter values wrong. Rather use the local model.
             downloaded_flow = openml.flows.get_flow(flow_exists)
             downloaded_flow.model = clf
             setup_exists = openml.setups.setup_exists(downloaded_flow)
-            self.assertGreater(setup_exists, 0)
+            self.assertGreater(setup_exists, 0, "Server says setup of run does not exist.")
             run_ids = run_exists(task.task_id, setup_exists)
             self.assertTrue(run_ids, msg=(run_ids, clf))
 
+    @pytest.mark.sklearn
     def test_run_with_illegal_flow_id(self):
         # check the case where the user adds an illegal flow id to a
         # non-existing flo
@@ -1100,9 +1183,12 @@ def test_run_with_illegal_flow_id(self):
         )
         with self.assertRaisesRegex(openml.exceptions.PyOpenMLError, expected_message_regex):
             openml.runs.run_flow_on_task(
-                task=task, flow=flow, avoid_duplicate_runs=True,
+                task=task,
+                flow=flow,
+                avoid_duplicate_runs=True,
             )
 
+    @pytest.mark.sklearn
     def test_run_with_illegal_flow_id_after_load(self):
         # Same as `test_run_with_illegal_flow_id`, but test this error is also
         # caught if the run is stored to and loaded from disk first.
@@ -1115,7 +1201,11 @@ def test_run_with_illegal_flow_id_after_load(self):
             task=task, flow=flow, avoid_duplicate_runs=False, upload_flow=False
         )
 
-        cache_path = os.path.join(self.workdir, "runs", str(random.getrandbits(128)),)
+        cache_path = os.path.join(
+            self.workdir,
+            "runs",
+            str(random.getrandbits(128)),
+        )
         run.to_filesystem(cache_path)
         loaded_run = openml.runs.OpenMLRun.from_filesystem(cache_path)
 
@@ -1127,6 +1217,7 @@ def test_run_with_illegal_flow_id_after_load(self):
             TestBase._mark_entity_for_removal("run", loaded_run.run_id)
             TestBase.logger.info("collected from test_run_functions: {}".format(loaded_run.run_id))
 
+    @pytest.mark.sklearn
     def test_run_with_illegal_flow_id_1(self):
         # Check the case where the user adds an illegal flow id to an existing
         # flow. Comes to a different value error than the previous test
@@ -1146,9 +1237,12 @@ def test_run_with_illegal_flow_id_1(self):
         expected_message_regex = "Local flow_id does not match server flow_id: " "'-1' vs '[0-9]+'"
         with self.assertRaisesRegex(openml.exceptions.PyOpenMLError, expected_message_regex):
             openml.runs.run_flow_on_task(
-                task=task, flow=flow_new, avoid_duplicate_runs=True,
+                task=task,
+                flow=flow_new,
+                avoid_duplicate_runs=True,
             )
 
+    @pytest.mark.sklearn
     def test_run_with_illegal_flow_id_1_after_load(self):
         # Same as `test_run_with_illegal_flow_id_1`, but test this error is
         # also caught if the run is stored to and loaded from disk first.
@@ -1169,7 +1263,11 @@ def test_run_with_illegal_flow_id_1_after_load(self):
             task=task, flow=flow_new, avoid_duplicate_runs=False, upload_flow=False
         )
 
-        cache_path = os.path.join(self.workdir, "runs", str(random.getrandbits(128)),)
+        cache_path = os.path.join(
+            self.workdir,
+            "runs",
+            str(random.getrandbits(128)),
+        )
         run.to_filesystem(cache_path)
         loaded_run = openml.runs.OpenMLRun.from_filesystem(cache_path)
 
@@ -1178,6 +1276,7 @@ def test_run_with_illegal_flow_id_1_after_load(self):
             openml.exceptions.PyOpenMLError, expected_message_regex, loaded_run.publish
         )
 
+    @pytest.mark.sklearn
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.20",
         reason="OneHotEncoder cannot handle mixed type DataFrame as input",
@@ -1223,10 +1322,11 @@ def test__run_task_get_arffcontent(self):
             # check row id
             self.assertGreaterEqual(arff_line[2], 0)
             self.assertLessEqual(arff_line[2], num_instances - 1)
+            # check prediction and ground truth columns
+            self.assertIn(arff_line[4], ["won", "nowin"])
+            self.assertIn(arff_line[5], ["won", "nowin"])
             # check confidences
-            self.assertAlmostEqual(sum(arff_line[4:6]), 1.0)
-            self.assertIn(arff_line[6], ["won", "nowin"])
-            self.assertIn(arff_line[7], ["won", "nowin"])
+            self.assertAlmostEqual(sum(arff_line[6:]), 1.0)
 
     def test__create_trace_from_arff(self):
         with open(self.static_cache_dir + "/misc/trace.arff", "r") as arff_file:
@@ -1255,7 +1355,7 @@ def test_get_run(self):
         assert "weka" in run.tags
         assert "weka_3.7.12" in run.tags
         assert run.predictions_url == (
-            "https://www.openml.org/data/download/1667125/"
+            "https://api.openml.org/data/download/1667125/"
             "weka_generated_predictions4575715871712251329.arff"
         )
 
@@ -1394,6 +1494,7 @@ def test_get_runs_list_by_tag(self):
         runs = openml.runs.list_runs(tag="curves")
         self.assertGreaterEqual(len(runs), 1)
 
+    @pytest.mark.sklearn
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.20",
         reason="columntransformer introduction in 0.20.0",
@@ -1429,6 +1530,7 @@ def test_run_on_dataset_with_missing_labels_dataframe(self):
             # repeat, fold, row_id, 6 confidences, prediction and correct label
             self.assertEqual(len(row), 12)
 
+    @pytest.mark.sklearn
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.20",
         reason="columntransformer introduction in 0.20.0",
@@ -1480,6 +1582,7 @@ def test_get_uncached_run(self):
         with self.assertRaises(openml.exceptions.OpenMLCacheException):
             openml.runs.functions._get_cached_run(10)
 
+    @pytest.mark.sklearn
     def test_run_flow_on_task_downloaded_flow(self):
         model = sklearn.ensemble.RandomForestClassifier(n_estimators=33)
         flow = self.extension.model_to_flow(model)
@@ -1490,7 +1593,10 @@ def test_run_flow_on_task_downloaded_flow(self):
         downloaded_flow = openml.flows.get_flow(flow.flow_id)
         task = openml.tasks.get_task(self.TEST_SERVER_TASK_SIMPLE["task_id"])
         run = openml.runs.run_flow_on_task(
-            flow=downloaded_flow, task=task, avoid_duplicate_runs=False, upload_flow=False,
+            flow=downloaded_flow,
+            task=task,
+            avoid_duplicate_runs=False,
+            upload_flow=False,
         )
 
         run.publish()
@@ -1569,13 +1675,14 @@ def test_format_prediction_task_regression(self):
         res = format_prediction(regression, *ignored_input)
         self.assertListEqual(res, [0] * 5)
 
+    @pytest.mark.sklearn
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.21",
         reason="couldn't perform local tests successfully w/o bloating RAM",
     )
-    @unittest.mock.patch("openml.extensions.sklearn.SklearnExtension._prevent_optimize_n_jobs")
+    @mock.patch("openml.extensions.sklearn.SklearnExtension._prevent_optimize_n_jobs")
     def test__run_task_get_arffcontent_2(self, parallel_mock):
-        """ Tests if a run executed in parallel is collated correctly. """
+        """Tests if a run executed in parallel is collated correctly."""
         task = openml.tasks.get_task(7)  # Supervised Classification on kr-vs-kp
         x, y = task.get_X_and_y(dataset_format="dataframe")
         num_instances = x.shape[0]
@@ -1622,13 +1729,14 @@ def test__run_task_get_arffcontent_2(self, parallel_mock):
             scores, expected_scores, decimal=2 if os.name == "nt" else 7
         )
 
+    @pytest.mark.sklearn
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.21",
         reason="couldn't perform local tests successfully w/o bloating RAM",
     )
-    @unittest.mock.patch("openml.extensions.sklearn.SklearnExtension._prevent_optimize_n_jobs")
+    @mock.patch("openml.extensions.sklearn.SklearnExtension._prevent_optimize_n_jobs")
     def test_joblib_backends(self, parallel_mock):
-        """ Tests evaluation of a run using various joblib backends and n_jobs. """
+        """Tests evaluation of a run using various joblib backends and n_jobs."""
         task = openml.tasks.get_task(7)  # Supervised Classification on kr-vs-kp
         x, y = task.get_X_and_y(dataset_format="dataframe")
         num_instances = x.shape[0]
@@ -1677,3 +1785,82 @@ def test_joblib_backends(self, parallel_mock):
             self.assertEqual(len(res[2]["predictive_accuracy"][0]), 10)
             self.assertEqual(len(res[3]["predictive_accuracy"][0]), 10)
             self.assertEqual(parallel_mock.call_count, call_count)
+
+    @unittest.skipIf(
+        LooseVersion(sklearn.__version__) < "0.20",
+        reason="SimpleImputer doesn't handle mixed type DataFrame as input",
+    )
+    def test_delete_run(self):
+        rs = 1
+        clf = sklearn.pipeline.Pipeline(
+            steps=[("imputer", SimpleImputer()), ("estimator", DecisionTreeClassifier())]
+        )
+        task = openml.tasks.get_task(32)  # diabetes; crossvalidation
+
+        run = openml.runs.run_model_on_task(model=clf, task=task, seed=rs)
+        run.publish()
+        TestBase._mark_entity_for_removal("run", run.run_id)
+        TestBase.logger.info("collected from test_run_functions: {}".format(run.run_id))
+
+        _run_id = run.run_id
+        self.assertTrue(delete_run(_run_id))
+
+
+@mock.patch.object(requests.Session, "delete")
+def test_delete_run_not_owned(mock_delete, test_files_directory, test_api_key):
+    openml.config.start_using_configuration_for_example()
+    content_file = test_files_directory / "mock_responses" / "runs" / "run_delete_not_owned.xml"
+    mock_delete.return_value = create_request_response(
+        status_code=412, content_filepath=content_file
+    )
+
+    with pytest.raises(
+        OpenMLNotAuthorizedError,
+        match="The run can not be deleted because it was not uploaded by you.",
+    ):
+        openml.runs.delete_run(40_000)
+
+    expected_call_args = [
+        ("https://test.openml.org/api/v1/xml/run/40000",),
+        {"params": {"api_key": test_api_key}},
+    ]
+    assert expected_call_args == list(mock_delete.call_args)
+
+
+@mock.patch.object(requests.Session, "delete")
+def test_delete_run_success(mock_delete, test_files_directory, test_api_key):
+    openml.config.start_using_configuration_for_example()
+    content_file = test_files_directory / "mock_responses" / "runs" / "run_delete_successful.xml"
+    mock_delete.return_value = create_request_response(
+        status_code=200, content_filepath=content_file
+    )
+
+    success = openml.runs.delete_run(10591880)
+    assert success
+
+    expected_call_args = [
+        ("https://test.openml.org/api/v1/xml/run/10591880",),
+        {"params": {"api_key": test_api_key}},
+    ]
+    assert expected_call_args == list(mock_delete.call_args)
+
+
+@mock.patch.object(requests.Session, "delete")
+def test_delete_unknown_run(mock_delete, test_files_directory, test_api_key):
+    openml.config.start_using_configuration_for_example()
+    content_file = test_files_directory / "mock_responses" / "runs" / "run_delete_not_exist.xml"
+    mock_delete.return_value = create_request_response(
+        status_code=412, content_filepath=content_file
+    )
+
+    with pytest.raises(
+        OpenMLServerException,
+        match="Run does not exist",
+    ):
+        openml.runs.delete_run(9_999_999)
+
+    expected_call_args = [
+        ("https://test.openml.org/api/v1/xml/run/9999999",),
+        {"params": {"api_key": test_api_key}},
+    ]
+    assert expected_call_args == list(mock_delete.call_args)
diff --git a/tests/test_runs/test_trace.py b/tests/test_runs/test_trace.py
index 96724d139..6e8a7afba 100644
--- a/tests/test_runs/test_trace.py
+++ b/tests/test_runs/test_trace.py
@@ -25,19 +25,22 @@ def test_get_selected_iteration(self):
         # This next one should simply not fail
         self.assertEqual(trace.get_selected_iteration(2, 2), 2)
         with self.assertRaisesRegex(
-            ValueError, "Could not find the selected iteration for rep/fold 3/3",
+            ValueError,
+            "Could not find the selected iteration for rep/fold 3/3",
         ):
 
             trace.get_selected_iteration(3, 3)
 
     def test_initialization(self):
-        """Check all different ways to fail the initialization """
+        """Check all different ways to fail the initialization"""
         with self.assertRaisesRegex(
-            ValueError, "Trace content not available.",
+            ValueError,
+            "Trace content not available.",
         ):
             OpenMLRunTrace.generate(attributes="foo", content=None)
         with self.assertRaisesRegex(
-            ValueError, "Trace attributes not available.",
+            ValueError,
+            "Trace attributes not available.",
         ):
             OpenMLRunTrace.generate(attributes=None, content="foo")
         with self.assertRaisesRegex(ValueError, "Trace content is empty."):
@@ -60,7 +63,7 @@ def test_duplicate_name(self):
         ]
         trace_content = [[0, 0, 0, 0.5, "true", 1], [0, 0, 0, 0.9, "false", 2]]
         with self.assertRaisesRegex(
-            ValueError, "Either setup_string or parameters needs to be passed as argument."
+            ValueError, "Either `setup_string` or `parameters` needs to be passed as argument."
         ):
             OpenMLRunTrace.generate(trace_attributes, trace_content)
 
diff --git a/tests/test_setups/test_setup_functions.py b/tests/test_setups/test_setup_functions.py
index 538b08821..73a691d84 100644
--- a/tests/test_setups/test_setup_functions.py
+++ b/tests/test_setups/test_setup_functions.py
@@ -10,6 +10,7 @@
 from openml.testing import TestBase
 from typing import Dict
 import pandas as pd
+import pytest
 
 import sklearn.tree
 import sklearn.naive_bayes
@@ -34,6 +35,7 @@ def setUp(self):
         self.extension = openml.extensions.sklearn.SklearnExtension()
         super().setUp()
 
+    @pytest.mark.sklearn
     def test_nonexisting_setup_exists(self):
         # first publish a non-existing flow
         sentinel = get_sentinel()
@@ -81,22 +83,27 @@ def _existing_setup_exists(self, classif):
         setup_id = openml.setups.setup_exists(flow)
         self.assertEqual(setup_id, run.setup_id)
 
+    @pytest.mark.sklearn
     def test_existing_setup_exists_1(self):
         def side_effect(self):
             self.var_smoothing = 1e-9
             self.priors = None
 
         with unittest.mock.patch.object(
-            sklearn.naive_bayes.GaussianNB, "__init__", side_effect,
+            sklearn.naive_bayes.GaussianNB,
+            "__init__",
+            side_effect,
         ):
             # Check a flow with zero hyperparameters
             nb = sklearn.naive_bayes.GaussianNB()
             self._existing_setup_exists(nb)
 
+    @pytest.mark.sklearn
     def test_exisiting_setup_exists_2(self):
         # Check a flow with one hyperparameter
         self._existing_setup_exists(sklearn.naive_bayes.GaussianNB())
 
+    @pytest.mark.sklearn
     def test_existing_setup_exists_3(self):
         # Check a flow with many hyperparameters
         self._existing_setup_exists(
diff --git a/tests/test_study/test_study_examples.py b/tests/test_study/test_study_examples.py
index 682359a61..cc3294085 100644
--- a/tests/test_study/test_study_examples.py
+++ b/tests/test_study/test_study_examples.py
@@ -3,6 +3,7 @@
 from openml.testing import TestBase
 from openml.extensions.sklearn import cat, cont
 
+import pytest
 import sklearn
 import unittest
 from distutils.version import LooseVersion
@@ -12,6 +13,7 @@ class TestStudyFunctions(TestBase):
     _multiprocess_can_split_ = True
     """Test the example code of Bischl et al. (2018)"""
 
+    @pytest.mark.sklearn
     @unittest.skipIf(
         LooseVersion(sklearn.__version__) < "0.24",
         reason="columntransformer introduction in 0.24.0",
diff --git a/tests/test_study/test_study_functions.py b/tests/test_study/test_study_functions.py
index e028ba2bd..3d7811f6e 100644
--- a/tests/test_study/test_study_functions.py
+++ b/tests/test_study/test_study_functions.py
@@ -1,4 +1,5 @@
 # License: BSD 3-Clause
+from typing import Optional, List
 
 import openml
 import openml.study
@@ -43,7 +44,8 @@ def test_get_study_error(self):
         openml.config.server = self.production_server
 
         with self.assertRaisesRegex(
-            ValueError, "Unexpected entity type 'task' reported by the server, expected 'run'",
+            ValueError,
+            "Unexpected entity type 'task' reported by the server, expected 'run'",
         ):
             openml.study.get_study(99)
 
@@ -61,7 +63,8 @@ def test_get_suite_error(self):
         openml.config.server = self.production_server
 
         with self.assertRaisesRegex(
-            ValueError, "Unexpected entity type 'run' reported by the server, expected 'task'",
+            ValueError,
+            "Unexpected entity type 'run' reported by the server, expected 'task'",
         ):
             openml.study.get_suite(123)
 
@@ -114,6 +117,31 @@ def test_publish_benchmark_suite(self):
         self.assertEqual(study_downloaded.status, "deactivated")
         # can't delete study, now it's not longer in preparation
 
+    def _test_publish_empty_study_is_allowed(self, explicit: bool):
+        runs: Optional[List[int]] = [] if explicit else None
+        kind = "explicit" if explicit else "implicit"
+
+        study = openml.study.create_study(
+            name=f"empty-study-{kind}",
+            description=f"a study with no runs attached {kind}ly",
+            run_ids=runs,
+        )
+
+        study.publish()
+        TestBase._mark_entity_for_removal("study", study.id)
+        TestBase.logger.info("collected from {}: {}".format(__file__.split("/")[-1], study.id))
+
+        self.assertGreater(study.id, 0)
+        study_downloaded = openml.study.get_study(study.id)
+        self.assertEqual(study_downloaded.main_entity_type, "run")
+        self.assertIsNone(study_downloaded.runs)
+
+    def test_publish_empty_study_explicit(self):
+        self._test_publish_empty_study_is_allowed(explicit=True)
+
+    def test_publish_empty_study_implicit(self):
+        self._test_publish_empty_study_is_allowed(explicit=False)
+
     @pytest.mark.flaky()
     def test_publish_study(self):
         # get some random runs to attach
@@ -214,7 +242,7 @@ def test_study_attach_illegal(self):
 
     def test_study_list(self):
         study_list = openml.study.list_studies(status="in_preparation")
-        # might fail if server is recently resetted
+        # might fail if server is recently reset
         self.assertGreaterEqual(len(study_list), 2)
 
     def test_study_list_output_format(self):
diff --git a/tests/test_tasks/test_split.py b/tests/test_tasks/test_split.py
index 7c3dcf9aa..7d8004a91 100644
--- a/tests/test_tasks/test_split.py
+++ b/tests/test_tasks/test_split.py
@@ -82,8 +82,16 @@ def test_get_split(self):
         self.assertEqual(train_split.shape[0], 808)
         self.assertEqual(test_split.shape[0], 90)
         self.assertRaisesRegex(
-            ValueError, "Repeat 10 not known", split.get, 10, 2,
+            ValueError,
+            "Repeat 10 not known",
+            split.get,
+            10,
+            2,
         )
         self.assertRaisesRegex(
-            ValueError, "Fold 10 not known", split.get, 2, 10,
+            ValueError,
+            "Fold 10 not known",
+            split.get,
+            2,
+            10,
         )
diff --git a/tests/test_tasks/test_task_functions.py b/tests/test_tasks/test_task_functions.py
index 418b21b65..dde3561f4 100644
--- a/tests/test_tasks/test_task_functions.py
+++ b/tests/test_tasks/test_task_functions.py
@@ -3,10 +3,13 @@
 import os
 from unittest import mock
 
+import pytest
+import requests
+
 from openml.tasks import TaskType
-from openml.testing import TestBase
+from openml.testing import TestBase, create_request_response
 from openml import OpenMLSplit, OpenMLTask
-from openml.exceptions import OpenMLCacheException
+from openml.exceptions import OpenMLCacheException, OpenMLNotAuthorizedError, OpenMLServerException
 import openml
 import unittest
 import pandas as pd
@@ -143,7 +146,15 @@ def test_get_task(self):
         self.assertIsInstance(task, OpenMLTask)
         self.assertTrue(
             os.path.exists(
-                os.path.join(self.workdir, "org", "openml", "test", "tasks", "1", "task.xml",)
+                os.path.join(
+                    self.workdir,
+                    "org",
+                    "openml",
+                    "test",
+                    "tasks",
+                    "1",
+                    "task.xml",
+                )
             )
         )
         self.assertTrue(
@@ -162,7 +173,15 @@ def test_get_task_lazy(self):
         self.assertIsInstance(task, OpenMLTask)
         self.assertTrue(
             os.path.exists(
-                os.path.join(self.workdir, "org", "openml", "test", "tasks", "2", "task.xml",)
+                os.path.join(
+                    self.workdir,
+                    "org",
+                    "openml",
+                    "test",
+                    "tasks",
+                    "2",
+                    "task.xml",
+                )
             )
         )
         self.assertEqual(task.class_labels, ["1", "2", "3", "4", "5", "U"])
@@ -230,7 +249,91 @@ def test_download_split(self):
 
     def test_deletion_of_cache_dir(self):
         # Simple removal
-        tid_cache_dir = openml.utils._create_cache_directory_for_id("tasks", 1,)
+        tid_cache_dir = openml.utils._create_cache_directory_for_id(
+            "tasks",
+            1,
+        )
         self.assertTrue(os.path.exists(tid_cache_dir))
         openml.utils._remove_cache_dir_for_id("tasks", tid_cache_dir)
         self.assertFalse(os.path.exists(tid_cache_dir))
+
+
+@mock.patch.object(requests.Session, "delete")
+def test_delete_task_not_owned(mock_delete, test_files_directory, test_api_key):
+    openml.config.start_using_configuration_for_example()
+    content_file = test_files_directory / "mock_responses" / "tasks" / "task_delete_not_owned.xml"
+    mock_delete.return_value = create_request_response(
+        status_code=412, content_filepath=content_file
+    )
+
+    with pytest.raises(
+        OpenMLNotAuthorizedError,
+        match="The task can not be deleted because it was not uploaded by you.",
+    ):
+        openml.tasks.delete_task(1)
+
+    expected_call_args = [
+        ("https://test.openml.org/api/v1/xml/task/1",),
+        {"params": {"api_key": test_api_key}},
+    ]
+    assert expected_call_args == list(mock_delete.call_args)
+
+
+@mock.patch.object(requests.Session, "delete")
+def test_delete_task_with_run(mock_delete, test_files_directory, test_api_key):
+    openml.config.start_using_configuration_for_example()
+    content_file = test_files_directory / "mock_responses" / "tasks" / "task_delete_has_runs.xml"
+    mock_delete.return_value = create_request_response(
+        status_code=412, content_filepath=content_file
+    )
+
+    with pytest.raises(
+        OpenMLNotAuthorizedError,
+        match="The task can not be deleted because it still has associated entities:",
+    ):
+        openml.tasks.delete_task(3496)
+
+    expected_call_args = [
+        ("https://test.openml.org/api/v1/xml/task/3496",),
+        {"params": {"api_key": test_api_key}},
+    ]
+    assert expected_call_args == list(mock_delete.call_args)
+
+
+@mock.patch.object(requests.Session, "delete")
+def test_delete_success(mock_delete, test_files_directory, test_api_key):
+    openml.config.start_using_configuration_for_example()
+    content_file = test_files_directory / "mock_responses" / "tasks" / "task_delete_successful.xml"
+    mock_delete.return_value = create_request_response(
+        status_code=200, content_filepath=content_file
+    )
+
+    success = openml.tasks.delete_task(361323)
+    assert success
+
+    expected_call_args = [
+        ("https://test.openml.org/api/v1/xml/task/361323",),
+        {"params": {"api_key": test_api_key}},
+    ]
+    assert expected_call_args == list(mock_delete.call_args)
+
+
+@mock.patch.object(requests.Session, "delete")
+def test_delete_unknown_task(mock_delete, test_files_directory, test_api_key):
+    openml.config.start_using_configuration_for_example()
+    content_file = test_files_directory / "mock_responses" / "tasks" / "task_delete_not_exist.xml"
+    mock_delete.return_value = create_request_response(
+        status_code=412, content_filepath=content_file
+    )
+
+    with pytest.raises(
+        OpenMLServerException,
+        match="Task does not exist",
+    ):
+        openml.tasks.delete_task(9_999_999)
+
+    expected_call_args = [
+        ("https://test.openml.org/api/v1/xml/task/9999999",),
+        {"params": {"api_key": test_api_key}},
+    ]
+    assert expected_call_args == list(mock_delete.call_args)
diff --git a/tests/test_utils/test_utils.py b/tests/test_utils/test_utils.py
index 4fa08e1ab..a5add31c8 100644
--- a/tests/test_utils/test_utils.py
+++ b/tests/test_utils/test_utils.py
@@ -98,6 +98,7 @@ def test__create_cache_directory(self, config_mock):
             os.chmod(subdir, 0o444)
             config_mock.return_value = subdir
             with self.assertRaisesRegex(
-                openml.exceptions.OpenMLCacheException, r"Cannot create cache directory",
+                openml.exceptions.OpenMLCacheException,
+                r"Cannot create cache directory",
             ):
                 openml.utils._create_cache_directory("ghi")