From 1bd7fe7f775c0c8123834e4ed1223ad6da096fbc Mon Sep 17 00:00:00 2001 From: Vikas Kumar Yadav Date: Mon, 24 Jun 2019 18:20:25 +0530 Subject: [PATCH 1/3] Update pool-based_sampling.ipynb --- docs/source/content/examples/pool-based_sampling.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/content/examples/pool-based_sampling.ipynb b/docs/source/content/examples/pool-based_sampling.ipynb index 14a1746..4f18408 100644 --- a/docs/source/content/examples/pool-based_sampling.ipynb +++ b/docs/source/content/examples/pool-based_sampling.ipynb @@ -8,7 +8,7 @@ "\n", "## Overview\n", "\n", - "In this example, the we apply an `ActiveLearner` onto the iris dataset using pool-based sampling. In this setting, we assume a small set of labeled data $\\mathcal{L}$ and a large set of unlabeled data $\\mathcal{U}$ such that $\\left| \\mathcal{L} \\right| \\ll \\left| \\mathcal{U} \\right|$. In his review of the active learning literature, Settles covers a high-level overview of the general pool-based sampling algorithm:\n", + "In this example, we apply an `ActiveLearner` onto the iris dataset using pool-based sampling. In this setting, we assume a small set of labeled data $\\mathcal{L}$ and a large set of unlabeled data $\\mathcal{U}$ such that $\\left| \\mathcal{L} \\right| \\ll \\left| \\mathcal{U} \\right|$. In his review of the active learning literature, Settles covers a high-level overview of the general pool-based sampling algorithm:\n", "\n", "> Queries are selectively drawn from the pool, which is usually assumed to be closed (i.e., static or non-changing), although this is not strictly necessary. Typically, instances are queried in a greedy fashion, according to an informativeness measure used to evaluate all instances in the pool (or, perhaps if $\\mathcal{U}$ is very large, some subsample thereof).\n", "\n", From fdd2aa79640f4c65a1c462aa24c12f939b06eb63 Mon Sep 17 00:00:00 2001 From: cosmic-cortex Date: Mon, 11 Nov 2019 11:35:52 +0100 Subject: [PATCH 2/3] force_all_finite support added --- modAL/models/base.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/modAL/models/base.py b/modAL/models/base.py index 822fdbc..dc0d35d 100644 --- a/modAL/models/base.py +++ b/modAL/models/base.py @@ -30,6 +30,8 @@ class BaseLearner(ABC, BaseEstimator): for instance, modAL.uncertainty.uncertainty_sampling. X_training: Initial training samples, if available. y_training: Initial training labels corresponding to initial training samples. + force_all_finite: When True, forces all values of the data finite. + When False, accepts np.nan and np.inf values. bootstrap_init: If initial training data is available, bootstrapping can be done during the first training. Useful when building Committee models with bagging. **fit_kwargs: keyword arguments. @@ -47,6 +49,7 @@ def __init__(self, X_training: Optional[modALinput] = None, y_training: Optional[modALinput] = None, bootstrap_init: bool = False, + force_all_finite: bool = True, **fit_kwargs ) -> None: assert callable(query_strategy), 'query_strategy must be callable' @@ -59,6 +62,9 @@ def __init__(self, if X_training is not None: self._fit_to_known(bootstrap=bootstrap_init, **fit_kwargs) + assert isinstance(force_all_finite, bool), 'force_all_finite must be a bool' + self.force_all_finite = force_all_finite + def _add_training_data(self, X: modALinput, y: modALinput) -> None: """ Adds the new data and label to the known data, but does not retrain the model. @@ -71,7 +77,8 @@ def _add_training_data(self, X: modALinput, y: modALinput) -> None: If the classifier has been fitted, the features in X have to agree with the training samples which the classifier has seen. """ - check_X_y(X, y, accept_sparse=True, ensure_2d=False, allow_nd=True, multi_output=True, dtype=None) + check_X_y(X, y, accept_sparse=True, ensure_2d=False, allow_nd=True, multi_output=True, dtype=None, + force_all_finite=self.force_all_finite) if self.X_training is None: self.X_training = X @@ -117,7 +124,8 @@ def _fit_on_new(self, X: modALinput, y: modALinput, bootstrap: bool = False, **f Returns: self """ - check_X_y(X, y, accept_sparse=True, ensure_2d=False, allow_nd=True, multi_output=True, dtype=None) + check_X_y(X, y, accept_sparse=True, ensure_2d=False, allow_nd=True, multi_output=True, dtype=None, + force_all_finite=self.force_all_finite) if not bootstrap: self.estimator.fit(X, y, **fit_kwargs) @@ -146,7 +154,8 @@ def fit(self, X: modALinput, y: modALinput, bootstrap: bool = False, **fit_kwarg Returns: self """ - check_X_y(X, y, accept_sparse=True, ensure_2d=False, allow_nd=True, multi_output=True, dtype=None) + check_X_y(X, y, accept_sparse=True, ensure_2d=False, allow_nd=True, multi_output=True, dtype=None, + force_all_finite=self.force_all_finite) self.X_training, self.y_training = X, y return self._fit_to_known(bootstrap=bootstrap, **fit_kwargs) From 93c78041ecb2b7ed5893080c36cab1ec5116341e Mon Sep 17 00:00:00 2001 From: cosmic-cortex Date: Mon, 11 Nov 2019 11:36:16 +0100 Subject: [PATCH 3/3] tests added for force_all_finite mode --- tests/core_tests.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/core_tests.py b/tests/core_tests.py index bddbab7..1c80d7b 100644 --- a/tests/core_tests.py +++ b/tests/core_tests.py @@ -734,6 +734,25 @@ def test_teach(self): learner.teach(X, y, bootstrap=bootstrap, only_new=only_new) + def test_nan(self): + X_training_nan = np.ones(shape=(10, 2)) * np.nan + X_training_inf = np.ones(shape=(10, 2)) * np.inf + y_training = np.random.randint(0, 2, size=10) + + learner = modAL.models.learners.ActiveLearner( + X_training=X_training_nan, y_training=y_training, + estimator=mock.MockEstimator(), + force_all_finite=False + ) + learner.teach(X_training_nan, y_training) + + learner = modAL.models.learners.ActiveLearner( + X_training=X_training_inf, y_training=y_training, + estimator=mock.MockEstimator(), + force_all_finite=False + ) + learner.teach(X_training_inf, y_training) + def test_keras(self): pass