Review/improvements (#1)

- bugfixing - add workflows for tests and tutorials - liniting - fix wheel issue
vanderschaarlab · Oct 31, 2022 · 14c6733 · 14c6733
1 parent f2da116
commit 14c6733
Show file tree

Hide file tree

Showing 29 changed files with 689 additions and 266 deletions.
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -0,0 +1,73 @@
+name: Package release
+
+on:
+  release:
+    types: [created]
+
+
+jobs:
+  deploy_osx:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        python-version: ["3.7", "3.8", "3.9", "3.10"]
+        os: [macos-latest]
+
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          submodules: true
+      - name: Set up Python
+        uses: actions/setup-python@v1
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Build and publish
+        env:
+          TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
+          TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
+        run: ${GITHUB_WORKSPACE}/.github/workflows/scripts/release_osx.sh
+
+  deploy_linux:
+    strategy:
+      matrix:
+        python-version:
+          - cp37-cp37m
+          - cp38-cp38
+          - cp39-cp39
+          - cp10-cp10
+
+    runs-on: ubuntu-latest
+    container: quay.io/pypa/manylinux2014_x86_64
+    steps:
+      - uses: actions/checkout@v1
+        with:
+          submodules: true
+      - name: Set target Python version PATH
+        run: |
+            echo "/opt/python/${{ matrix.python-version }}/bin" >> $GITHUB_PATH
+      - name: Build and publish
+        env:
+          TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
+          TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
+        run: ${GITHUB_WORKSPACE}/.github/workflows/scripts/release_linux.sh
+
+  deploy_windows:
+    runs-on: windows-latest
+    strategy:
+      matrix:
+        python-version: ["3.7", "3.8", "3.9", "3.10"]
+
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          submodules: true
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v1
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Build and publish
+        env:
+          TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
+          TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
+        run: |
+          ../../.github/workflows/scripts/release_windows.bat
diff --git a/.github/workflows/scripts/release_linux.sh b/.github/workflows/scripts/release_linux.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+
+set -e
+
+yum makecache -y
+yum install centos-release-scl -y
+yum-config-manager --enable rhel-server-rhscl-7-rpms
+yum install llvm-toolset-7.0 python3 python3-devel -y
+
+# Python
+python3 -m pip install --upgrade pip
+python3 -m pip install setuptools wheel twine auditwheel
+
+# Publish
+python3 -m pip wheel . -w dist/ --no-deps
+twine upload --verbose --skip-existing dist/*
diff --git a/.github/workflows/scripts/release_osx.sh b/.github/workflows/scripts/release_osx.sh
@@ -0,0 +1,9 @@
+#!/bin/sh
+
+export MACOSX_DEPLOYMENT_TARGET=10.14
+
+python -m pip install --upgrade pip
+pip install setuptools wheel twine auditwheel
+
+python3 setup.py build bdist_wheel --plat-name macosx_10_14_x86_64 --dist-dir wheel
+twine upload --skip-existing wheel/*
diff --git a/.github/workflows/scripts/release_windows.bat b/.github/workflows/scripts/release_windows.bat
@@ -0,0 +1,7 @@
+echo on
+
+python -m pip install --upgrade pip
+pip install setuptools wheel twine auditwheel
+
+pip wheel . -w wheel/ --no-deps
+twine upload --skip-existing wheel/*
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -0,0 +1,62 @@
+name: Tests
+
+on:
+  push:
+    branches: [main, release]
+  pull_request:
+    types: [opened, synchronize, reopened]
+  schedule:
+    - cron:  '0 0 * * 0'
+
+
+jobs:
+  Linter:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        python-version: [3.8]
+        os: [ubuntu-latest]
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          submodules: true
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v1
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies
+        run: pip install .[testing]
+      - name: pre-commit validation
+        run: pre-commit run --files data_iq/*
+      - name: Security checks
+        run: |
+            bandit -r data_iq/*
+
+  Library:
+    needs: [Linter]
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        python-version: ['3.7', '3.8', '3.9', "3.10"]
+        os: [macos-latest, ubuntu-latest, windows-latest]
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          submodules: true
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v1
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install MacOS dependencies
+        run: |
+            brew unlink libomp
+            brew install rajivshah3/libomp-tap/[email protected]
+        if: ${{ matrix.os == 'macos-latest' }}
+      - name: Install dependencies
+        run: |
+            pip install --upgrade pip
+            pip install --upgrade cryptography scipy scikit-learn xgboost lightgbm torch
+
+            pip install .[testing]
+      - name: Test with pytest
+        run: pytest -vvvsx -m "not slow" --durations=50
diff --git a/.github/workflows/test_tutorials.yml b/.github/workflows/test_tutorials.yml
@@ -0,0 +1,42 @@
+name: Tutorials
+
+on:
+  push:
+    branches: [main, release]
+  pull_request:
+    types: [opened, synchronize, reopened]
+  schedule:
+    - cron:  '0 0 * * 0'
+
+jobs:
+  Tutorials:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        python-version: ["3.7", "3.8", "3.9", "3.10"]
+        os: [ubuntu-latest]
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          submodules: true
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v1
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install MacOS dependencies
+        run: |
+            brew unlink libomp
+            brew install rajivshah3/libomp-tap/[email protected]
+        if: ${{ matrix.os == 'macos-latest' }}
+      - name: Install dependencies
+        run: |
+            pip install --upgrade pip
+            pip install --upgrade cryptography scipy scikit-learn xgboost lightgbm
+
+            pip install .
+            pip install .[testing]
+
+            python -m pip install ipykernel
+            python -m ipykernel install --user
+      - name: Run the tutorials
+        run: python tutorial/nb_test.py --nb_dir tutorial/
diff --git a/.gitignore b/.gitignore
@@ -127,3 +127,4 @@ dmypy.json
 
 # Pyre type checker
 .pyre/
+catboost_info
diff --git a/.isort.cfg b/.isort.cfg
@@ -0,0 +1,24 @@
+[settings]
+profile = black
+known_first_party = data_iq
+known_data_iq=data_iq
+multi_line_output = 3
+remove_redundant_aliases=True
+sections=FUTURE,STDLIB,THIRDPARTY,DATA_IQ,LOCALFOLDER
+default_section=THIRDPARTY
+lines_between_types=0
+force_sort_within_sections=True
+import_heading_future=future
+import_heading_stdlib=stdlib
+import_heading_thirdparty=third party
+import_heading_data_iq=data_iq absolute
+import_heading_localfolder=data_iq relative
+ignore_comments=False
+force_grid_wrap=True
+honor_noqa=True
+group_by_package=True
+combine_as_imports=True
+include_trailing_comma = True
+force_grid_wrap = 0
+use_parentheses = True
+line_length = 288
diff --git a/README.md b/README.md
@@ -1,4 +1,10 @@
 # Data-IQ: Characterizing subgroups with heterogeneous outcomes in tabular data
+
+[![Tests](https://github.com/vanderschaarlab/Data-IQ/actions/workflows/test.yml/badge.svg)](https://github.com/vanderschaarlab/Data-IQ/actions/workflows/test.yml)
+[![Tutorials](https://github.com/vanderschaarlab/Data-IQ/actions/workflows/test_tutorials.yml/badge.svg)](https://github.com/vanderschaarlab/Data-IQ/actions/workflows/test_tutorials.yml)
+[![arXiv](https://img.shields.io/badge/arXiv-2210.13043-b31b1b.svg)](https://arxiv.org/abs/2210.13043)
+[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://github.com/vanderschaarlab/Data-IQ/blob/main/LICENSE)
+
 ![image](pipeline.png "Data-IQ pipeline")
 
 This repository contains the implementation of Data-IQ, a "Data-Centric AI" framework to characterize subgroups with heterogeneous outcomes in tabular data.
@@ -21,15 +27,19 @@ For more details, please read our [NeurIPS 2022 paper](Link coming soon): *Data-
 
 - Minimum requirements to run Data-IQ on your own data
  ```shell
-pip install -r requirements-minimum.txt
+pip install data_iq
+ ```
+or from source,
+ ```shell
+pip install .
  ```
 
-- Full requirements to run Data-IQ 
+- Full requirements to run Data-IQ tests and tutorials
  ```shell
-pip install -r requirements.txt
+pip install .[testing]
  ```
 
-4. Link the venv to the kernal:
+4. Link the venv to the kernel:
   ```shell
  python -m ipykernel install --user --name=dataiq_env
  ```
@@ -40,7 +50,11 @@ pip install -r requirements.txt
 
 ### Getting Data-IQ
 
-* **Option 1**: Install as a package (called ``data_iq``)
+* **Option 1**: Install as a package (called ``data_iq``) from PyPI using
+ ```shell
+pip install data_iq
+ ```
+ or from source using
  ```shell
     python -m pip install -e .
  ```
@@ -83,7 +97,7 @@ for e in range(1, EPOCHS + 1):
     for X_batch, y_batch in train_loader:
         ### ADD TRAIN LOOP HERE ###
 
-    
+
     # *** CALL DATA-IQ on EPOCH END [LINE 2] ***
     dataiq.on_epoch_end(net, device=device)
 
@@ -118,7 +132,7 @@ clf.fit(X_train, y_train)
 
 for i in range(1, nest):
     # *** Characterize with Data-IQ [LINE 2] ***
-    dataiq_xgb.on_epoch_end(clf=clf, iteration=i) 
+    dataiq_xgb.on_epoch_end(clf=clf, iteration=i)
 
 # *** Access metrics ***
 aleatoric_uncertainty = dataiq_xgb.aleatoric
@@ -141,7 +155,7 @@ These notebooks can be found in the ``/tutorial`` folder. The Adult open-source
 
 
 ## Data-IQ metrics
-The Data-IQ package provides a wide variety of metrics to characterize training dynamics. 
+The Data-IQ package provides a wide variety of metrics to characterize training dynamics.
 
 The primary & suggested metrics are: **Aleatoric uncertainty** and **Predictive confidence**. However, other metrics are also included as part of the package.
 
@@ -157,7 +171,7 @@ The different metrics available to characterize training dynamics are as follows
 
 ## Use-cases
 
-We highlight different use-cases of Data-IQ from understanding learning dynamics, creating characteristic curves, feature acquisition etc as well as, different data modalities in notebooks which can be found in the ``/use_cases`` folder. 
+We highlight different use-cases of Data-IQ from understanding learning dynamics, creating characteristic curves, feature acquisition etc as well as, different data modalities in notebooks which can be found in the ``/use_cases`` folder.
 
 
 ## Citing

diff --git a/data_iq/__init__.py b/data_iq/__init__.py
@@ -1,3 +1,4 @@
-from .dataiq_class import DataIQ_Torch, DataIQ_SKLearn
+# data_iq relative
+from .dataiq_class import DataIQ_SKLearn, DataIQ_Torch  # noqa: F401
 
-__version__ = "0.0.1"
+__version__ = "0.0.1"
Original file line number	Diff line number	Diff line change
Expand Up		@@ -127,3 +127,4 @@ dmypy.json

		# Pyre type checker
		.pyre/
		catboost_info