Skip to content

Commit

Permalink
Merge branch 'dev'
Browse files Browse the repository at this point in the history
  • Loading branch information
cosmic-cortex committed Nov 11, 2019
2 parents 356e3e4 + 93c7804 commit 451c968
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 4 deletions.
2 changes: 1 addition & 1 deletion docs/source/content/examples/pool-based_sampling.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"\n",
"## Overview\n",
"\n",
"In this example, the we apply an `ActiveLearner` onto the iris dataset using pool-based sampling. In this setting, we assume a small set of labeled data $\\mathcal{L}$ and a large set of unlabeled data $\\mathcal{U}$ such that $\\left| \\mathcal{L} \\right| \\ll \\left| \\mathcal{U} \\right|$. In his review of the active learning literature, Settles covers a high-level overview of the general pool-based sampling algorithm:\n",
"In this example, we apply an `ActiveLearner` onto the iris dataset using pool-based sampling. In this setting, we assume a small set of labeled data $\\mathcal{L}$ and a large set of unlabeled data $\\mathcal{U}$ such that $\\left| \\mathcal{L} \\right| \\ll \\left| \\mathcal{U} \\right|$. In his review of the active learning literature, Settles covers a high-level overview of the general pool-based sampling algorithm:\n",
"\n",
"> Queries are selectively drawn from the pool, which is usually assumed to be closed (i.e., static or non-changing), although this is not strictly necessary. Typically, instances are queried in a greedy fashion, according to an informativeness measure used to evaluate all instances in the pool (or, perhaps if $\\mathcal{U}$ is very large, some subsample thereof).\n",
"\n",
Expand Down
15 changes: 12 additions & 3 deletions modAL/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ class BaseLearner(ABC, BaseEstimator):
for instance, modAL.uncertainty.uncertainty_sampling.
X_training: Initial training samples, if available.
y_training: Initial training labels corresponding to initial training samples.
force_all_finite: When True, forces all values of the data finite.
When False, accepts np.nan and np.inf values.
bootstrap_init: If initial training data is available, bootstrapping can be done during the first training.
Useful when building Committee models with bagging.
**fit_kwargs: keyword arguments.
Expand All @@ -47,6 +49,7 @@ def __init__(self,
X_training: Optional[modALinput] = None,
y_training: Optional[modALinput] = None,
bootstrap_init: bool = False,
force_all_finite: bool = True,
**fit_kwargs
) -> None:
assert callable(query_strategy), 'query_strategy must be callable'
Expand All @@ -59,6 +62,9 @@ def __init__(self,
if X_training is not None:
self._fit_to_known(bootstrap=bootstrap_init, **fit_kwargs)

assert isinstance(force_all_finite, bool), 'force_all_finite must be a bool'
self.force_all_finite = force_all_finite

def _add_training_data(self, X: modALinput, y: modALinput) -> None:
"""
Adds the new data and label to the known data, but does not retrain the model.
Expand All @@ -71,7 +77,8 @@ def _add_training_data(self, X: modALinput, y: modALinput) -> None:
If the classifier has been fitted, the features in X have to agree with the training samples which the
classifier has seen.
"""
check_X_y(X, y, accept_sparse=True, ensure_2d=False, allow_nd=True, multi_output=True, dtype=None)
check_X_y(X, y, accept_sparse=True, ensure_2d=False, allow_nd=True, multi_output=True, dtype=None,
force_all_finite=self.force_all_finite)

if self.X_training is None:
self.X_training = X
Expand Down Expand Up @@ -117,7 +124,8 @@ def _fit_on_new(self, X: modALinput, y: modALinput, bootstrap: bool = False, **f
Returns:
self
"""
check_X_y(X, y, accept_sparse=True, ensure_2d=False, allow_nd=True, multi_output=True, dtype=None)
check_X_y(X, y, accept_sparse=True, ensure_2d=False, allow_nd=True, multi_output=True, dtype=None,
force_all_finite=self.force_all_finite)

if not bootstrap:
self.estimator.fit(X, y, **fit_kwargs)
Expand Down Expand Up @@ -146,7 +154,8 @@ def fit(self, X: modALinput, y: modALinput, bootstrap: bool = False, **fit_kwarg
Returns:
self
"""
check_X_y(X, y, accept_sparse=True, ensure_2d=False, allow_nd=True, multi_output=True, dtype=None)
check_X_y(X, y, accept_sparse=True, ensure_2d=False, allow_nd=True, multi_output=True, dtype=None,
force_all_finite=self.force_all_finite)
self.X_training, self.y_training = X, y
return self._fit_to_known(bootstrap=bootstrap, **fit_kwargs)

Expand Down
19 changes: 19 additions & 0 deletions tests/core_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -734,6 +734,25 @@ def test_teach(self):

learner.teach(X, y, bootstrap=bootstrap, only_new=only_new)

def test_nan(self):
X_training_nan = np.ones(shape=(10, 2)) * np.nan
X_training_inf = np.ones(shape=(10, 2)) * np.inf
y_training = np.random.randint(0, 2, size=10)

learner = modAL.models.learners.ActiveLearner(
X_training=X_training_nan, y_training=y_training,
estimator=mock.MockEstimator(),
force_all_finite=False
)
learner.teach(X_training_nan, y_training)

learner = modAL.models.learners.ActiveLearner(
X_training=X_training_inf, y_training=y_training,
estimator=mock.MockEstimator(),
force_all_finite=False
)
learner.teach(X_training_inf, y_training)

def test_keras(self):
pass

Expand Down

0 comments on commit 451c968

Please sign in to comment.