Upgraded RANK and docs

KulikDM · Oct 29, 2023 · 132aefc · 132aefc
1 parent e93957d
commit 132aefc
Show file tree

Hide file tree

Showing 20 changed files with 562 additions and 183 deletions.
diff --git a/CHANGES.txt b/CHANGES.txt
@@ -70,3 +70,4 @@ v<0.3.4>, <09/05/2023> -- Added HDBSCAN to clust
 v<0.3.4>, <09/06/2023> -- Added RANK for OD ranking
 v<0.3.4>, <09/07/2023> -- Added Rankings to docs
 v<0.3.4>, <09/08/2023> -- Updated GNBC model for meta
+v<0.3.5>, <10/29/2023> -- Upgraded RANK and docs
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -17,3 +17,4 @@ include pythresh/models/meta_model_GNB.pkl
 include pythresh/models/meta_model_GNBC.pkl
 include pythresh/models/meta_model_GNBM.pkl
 include pythresh/models/meta_model_LIN.pkl
+include pythresh/models/rank_model_XGB.json
diff --git a/README.rst b/README.rst
@@ -167,10 +167,11 @@ Or with **pip**:
 -  ruptures (used in the CPD thresholder)
 -  geomstats (used in the KARCH thresholder)
 -  scikit-lego (used in the META thresholder)
--  joblib>=0.14.1 (used in the META thresholder)
+-  joblib>=0.14.1 (used in the META thresholder and RANK)
 -  pandas (used in the META thresholder)
 -  torch (used in the VAE thresholder)
 -  tqdm (used in the VAE thresholder)
+-  xgboost>=2.0.0 (used in the RANK)
 
 ****************
  API Cheatsheet

diff --git a/docs/figs/Rank1.png b/docs/figs/Rank1.png
diff --git a/docs/figs/Rank2.png b/docs/figs/Rank2.png
diff --git a/docs/figs/Rank3.png b/docs/figs/Rank3.png
diff --git a/docs/figs/Rank4.png b/docs/figs/Rank4.png
diff --git a/docs/figs/Rank5.png b/docs/figs/Rank5.png
diff --git a/docs/figs/Rank6.png b/docs/figs/Rank6.png
diff --git a/docs/figs/Rank7.png b/docs/figs/Rank7.png
diff --git a/docs/install.rst b/docs/install.rst
@@ -44,7 +44,8 @@ Or with **pip**:
 -  ruptures (used in the CPD thresholder)
 -  geomstats (used in the KARCH thresholder)
 -  scikit-lego (used in the META thresholder)
--  joblib>=0.14.1 (used in the META thresholder)
+-  joblib>=0.14.1 (used in the META thresholder and RANK)
 -  pandas (used in the META thresholder)
 -  torch (used in the VAE thresholder)
 -  tqdm (used in the VAE thresholder)
+-  xgboost>=2.0.0 (used in the RANK)
diff --git a/docs/pythresh.utils.rst b/docs/pythresh.utils.rst
@@ -8,7 +8,7 @@
 
 .. automodule:: pythresh.utils.rank
     :members:
-    :exclude-members: _cdf_metric, _clust_metric, _equi_rank, _equi_sort, _LK_metric
+    :exclude-members: _cdf_metric, _clust_metric, _consensus_metric, _equi_rank, _equi_sort
     :undoc-members:
     :show-inheritance:
     :inherited-members:
diff --git a/docs/ranking.rst b/docs/ranking.rst
diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -15,3 +15,4 @@ sphinx-rtd-theme
 sphinxcontrib-bibtex
 torch
 tqdm
+xgboost
diff --git a/pythresh/models/rank_model_XGB.json b/pythresh/models/rank_model_XGB.json
diff --git a/pythresh/test/test_rank.py b/pythresh/test/test_rank.py
@@ -10,6 +10,8 @@
 from pyod.utils.data import generate_data
 
 from pythresh.thresholds.filter import FILTER
+from pythresh.thresholds.karch import KARCH
+from pythresh.thresholds.ocsvm import OCSVM
 from pythresh.utils.rank import RANK
 
 # temporary solution for relative imports in case pythresh is not installed
@@ -30,7 +32,10 @@ def setUp(self):
 
         self.clfs = [KNN(), PCA(), IForest()]
 
-        self.thres = [FILTER(), self.contamination]
+        self.thres = [FILTER(), self.contamination,
+                      [FILTER(), KARCH(), OCSVM()]]
+
+        self.method = ['model', 'native']
 
         self.weights = [[0.5, 0.25, 0.25],
                         [0.25, 0.5, 0.25],
@@ -39,25 +44,31 @@ def setUp(self):
 
     def test_prediction_labels(self):
 
-        params = product(self.thres, self.weights)
+        params = product(self.thres,
+                         self.method,
+                         self.weights)
 
-        for thres, weights in params:
+        for thres, method, weights in params:
 
-            ranker = RANK(self.clfs, thres, weights=weights)
+            ranker = RANK(self.clfs, thres, method=method, weights=weights)
             rankings = ranker.eval(self.X_train)
 
             cdf_rank = ranker.cdf_rank_
             clust_rank = ranker.clust_rank_
-            mode_rank = ranker.mode_rank_
+            consensus_rank = ranker.consensus_rank_
 
             assert (cdf_rank is not None)
             assert (clust_rank is not None)
-            assert (mode_rank is not None)
+            assert (consensus_rank is not None)
             assert (rankings is not None)
 
-            len_clf = len(self.clfs)
+            n_clfs = len(self.clfs)
+            n_thres = len(thres) if isinstance(thres, list) else 1
+            len_models = n_clfs * n_thres
+
+            assert (len(cdf_rank) == len_models)
+            assert (len(clust_rank) == len_models)
+            assert (len(consensus_rank) == len_models)
+            assert (len(rankings) == len_models)
 
-            assert (len(cdf_rank) == len_clf)
-            assert (len(clust_rank) == len_clf)
-            assert (len(mode_rank) == len_clf)
-            assert (len(rankings) == len_clf)
+            assert (len(set(rankings)) == len_models)
diff --git a/pythresh/thresholds/meta.py b/pythresh/thresholds/meta.py
@@ -58,7 +58,7 @@ class META(BaseThresholder):
        magic.gamma, mammography, mnist, musk, optdigits, PageBlocks, pendigits, Pima,
        satellite, satimage-2, shuttle, smtp, SpamBase, speech, Stamps, thyroid, vertebral,
        vowels, Waveform,  WBC, WDBC, Wilt, wine, WPBC, yeast`` available at
-       `ADBench dataset <https://github.com/Minqi824/ADBench/tree/main/datasets/Classical>`_.
+       `ADBench dataset <https://github.com/Minqi824/ADBench/tree/main/adbench/datasets/Classical>`_.
        META uses a majority vote of all the trained models to determine the
        inlier/outlier labels.
-Original file line number
+Diff line change
@@ Expand Up / @@ -15,3 +15,4 @@ sphinx-rtd-theme @@
     sphinxcontrib-bibtex
     torch
     tqdm
+    xgboost