Skip to content
102 changes: 89 additions & 13 deletions Python/rerf/rerfClassifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,12 @@ class rerfClassifier(BaseEstimator, ClassifierMixin):
Parameters
----------
projection_matrix : str, optional (default: "RerF")
The random combination of features to use: either "RerF", "Base", or
"S-RerF". "RerF" randomly combines features for each `mtry`. Base
is our implementation of Random Forest. "S-RerF" is structured RerF,
combining multiple features together in random patches.
See Tomita et al. (2016) [#Tomita]_ for further details.
The random combination of features to use: either "RerF", "Base",
"MORF", or "MORF-3D". "RerF" randomly combines features for each
`mtry`. Base is our implementation of Random Forest. "MORF" is
structured RerF, combining multiple features together in random patches.
See Tomita et al. (2016) [#Tomita]_ for further details. "MORF-3D"
is the 3 dimensional extension of MORF.
n_estimators : int, optional (default: 500)
Number of trees in forest.

Expand Down Expand Up @@ -85,19 +86,26 @@ class rerfClassifier(BaseEstimator, ClassifierMixin):
Random seed to use. If None, set seed to ``np.random.randint(1, 1000000)``.

image_height : int, optional (default=None)
S-RerF required parameter. Image height of each observation.
MORF required parameter. Image height of each observation.
image_width : int, optional (default=None)
S-RerF required parameter. Width of each observation.
MORF required parameter. Image width of each observation.
image_depth : int, optional (default=None)
MORF required parameter. Image depth of each observation.
patch_height_max : int, optional (default=max(2, floor(sqrt(image_height))))
S-RerF parameter. Maximum image patch height to randomly select from.
MORF parameter. Maximum image patch height to randomly select from.
If None, set to ``max(2, floor(sqrt(image_height)))``.
patch_height_min : int, optional (default=1)
S-RerF parameter. Minimum image patch height to randomly select from.
MORF parameter. Minimum image patch height to randomly select from.
patch_width_max : int, optional (default=max(2, floor(sqrt(image_width))))
S-RerF parameter. Maximum image patch width to randomly select from.
MORF parameter. Maximum image patch width to randomly select from.
If None, set to ``max(2, floor(sqrt(image_width)))``.
patch_width_min : int, optional (default=1)
S-RerF parameter. Minimum image patch height to randomly select from.
MORF parameter. Minimum image patch width to randomly select from.
patch_depth_max : int, optional (default=max(2, floor(sqrt(image_depth))))
MORF parameter. Maximum image patch depth to randomly select from.
If None, set to ``max(2, floor(sqrt(image_depth)))``.
patch_depth_min : int, optional (default=1)
MORF parameter. Minimum image patch depth to randomly select from.

Returns
-------
Expand Down Expand Up @@ -147,10 +155,13 @@ def __init__(
random_state=None,
image_height=None,
image_width=None,
image_depth=None,
patch_height_max=None,
patch_height_min=1,
patch_width_max=None,
patch_width_min=1,
patch_depth_max=None,
patch_depth_min=1,
):
self.projection_matrix = projection_matrix
self.n_estimators = n_estimators
Expand All @@ -165,10 +176,13 @@ def __init__(
# s-rerf params
self.image_height = image_height
self.image_width = image_width
self.image_depth = image_depth
self.patch_height_max = patch_height_max
self.patch_height_min = patch_height_min
self.patch_width_max = patch_width_max
self.patch_width_min = patch_width_min
self.patch_depth_max = patch_depth_max
self.patch_depth_min = patch_depth_min

def fit(self, X, y):
"""Fit estimator.
Expand Down Expand Up @@ -230,10 +244,10 @@ def fit(self, X, y):
else:
forestType = "binnedBaseTern"
self.method_to_use_ = 1
elif self.projection_matrix == "S-RerF":
elif self.projection_matrix == "MORF":
if self.oob_score:
warn(
"OOB is not currently implemented for the S-RerF"
"OOB is not currently implemented for the MORF"
" algorithm. Continuing with oob_score = False.",
RuntimeWarning,
stacklevel=2,
Expand Down Expand Up @@ -277,6 +291,68 @@ def fit(self, X, y):
self.forest_.setParameter("patchHeightMin", self.patch_height_min_)
self.forest_.setParameter("patchWidthMax", self.patch_width_max_)
self.forest_.setParameter("patchWidthMin", self.patch_width_min_)
elif self.projection_matrix == "MORF-3D":
if self.oob_score:
warn(
"OOB is not currently implemented for the MORF-3D"
" algorithm. Continuing with oob_score = False.",
RuntimeWarning,
stacklevel=2,
)
self.oob_score = False

forestType = "binnedBaseTern" # this should change
self.method_to_use_ = 3
# Check that image_height and image_width are divisors of
# the num_features. This is the most we can do to
# prevent an invalid value being passed in.
if (num_features % self.image_height) != 0:
raise ValueError("Incorrect image_height given:")
else:
self.image_height_ = self.image_height
self.forest_.setParameter("imageHeight", self.image_height_)
if (num_features % self.image_width) != 0:
raise ValueError("Incorrect image_width given:")
else:
self.image_width_ = self.image_width
self.forest_.setParameter("imageWidth", self.image_width_)
if (num_features % self.image_depth) != 0:
raise ValueError("Incorrect image_depth given:")
else:
self.image_depth_ = self.image_depth
self.forest_.setParameter("imageDepth", self.image_depth_)
# If patch_height_{min, max} and patch_width_{min, max} are
# not set by the user, set them to defaults.
if self.patch_height_max is None:
self.patch_height_max_ = max(2, floor(sqrt(self.image_height_)))
else:
self.patch_height_max_ = self.patch_height_max
if self.patch_width_max is None:
self.patch_width_max_ = max(2, floor(sqrt(self.image_width_)))
else:
self.patch_width_max_ = self.patch_width_max
if self.patch_depth_max is None:
self.patch_depth_max_ = max(2, floor(sqrt(self.image_depth_)))
else:
self.patch_depth_max_ = self.patch_depth_max
if 1 <= self.patch_height_min <= self.patch_height_max_:
self.patch_height_min_ = self.patch_height_min
else:
raise ValueError("Incorrect patch_height_min")
if 1 <= self.patch_width_min <= self.patch_width_max_:
self.patch_width_min_ = self.patch_width_min
else:
raise ValueError("Incorrect patch_width_min")
if 1 <= self.patch_depth_min <= self.patch_depth_max_:
self.patch_depth_min_ = self.patch_depth_min
else:
raise ValueError("Incorrect patch_depth_min")
self.forest_.setParameter("patchHeightMax", self.patch_height_max_)
self.forest_.setParameter("patchHeightMin", self.patch_height_min_)
self.forest_.setParameter("patchWidthMax", self.patch_width_max_)
self.forest_.setParameter("patchWidthMin", self.patch_width_min_)
self.forest_.setParameter("patchDepthMax", self.patch_depth_max_)
self.forest_.setParameter("patchDepthMin", self.patch_depth_min_)
else:
raise ValueError("Incorrect projection matrix")
self.forest_.setParameter("forestType", forestType)
Expand Down
42 changes: 41 additions & 1 deletion Python/tests/test_rerfClassifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import re

import numpy as np
import pandas as pd
import pytest
from sklearn import datasets, metrics
from sklearn.utils.validation import check_random_state
Expand Down Expand Up @@ -115,7 +116,7 @@ def test_s_rerf():
y_train = y[: n // 2]

clf = rerfClassifier(
projection_matrix="S-RerF", image_height=8, image_width=8, n_estimators=10
projection_matrix="MORF", image_height=8, image_width=8, n_estimators=10
)

clf.fit(X_train, y_train)
Expand All @@ -142,6 +143,45 @@ def test_s_rerf():
assert clf.patch_height_min_ == 1


def test_s_rerf_3d():
#blob0 = np.random.multivariate_normal([1,1,1], np.eye((3), 100).reshape(100,-1)
#blob1 = np.random.multivariate_normal([-1,-1,-1], np.eye((3), 100).reshape(100,-1)
#X = np.vstack((blob0,blob1))
#Y = np.array([0]*100 + [1]*100).reshape(-1,1)
mat = pd.read_csv("../../packedForest/res/cifar_01.csv", header=None).values
X = mat[:,1:].reshape(200,32,32,3)
np.swapaxes(X, 1, -1)
X = X.reshape(200,-1)
Y = mat[:,0]

clf = rerfClassifier(
projection_matrix="MORF-3D", image_height=32, image_width=32, image_depth=3, n_estimators=10
)
clf.fit(X, Y)
score = clf.score(X, Y)
assert score > 0.5

assert hasattr(clf, "image_height")
assert hasattr(clf, "image_width")
assert hasattr(clf, "image_depth")
assert hasattr(clf, "patch_width_max")
assert hasattr(clf, "patch_width_min")
assert hasattr(clf, "patch_depth_max")
assert hasattr(clf, "patch_depth_min")
assert hasattr(clf, "patch_height_max")
assert hasattr(clf, "patch_height_min")

assert clf.image_height == 32
assert clf.image_width == 32
assert clf.image_depth == 3
assert clf.patch_height_max_ == math.floor(math.sqrt(32))
assert clf.patch_height_min_ == 1
assert clf.patch_width_max_ == math.floor(math.sqrt(32))
assert clf.patch_width_min_ == 1
assert clf.patch_depth_max_ == 2
assert clf.patch_depth_min_ == 1


def check_iris_criterion(projection_matrix):
# Check consistency on dataset iris.

Expand Down
Loading