diff --git a/elk/training/classifier.py b/elk/training/classifier.py index 30e604f4..b668d786 100644 --- a/elk/training/classifier.py +++ b/elk/training/classifier.py @@ -85,18 +85,17 @@ def fit( Returns: Final value of the loss function after optimization. """ - # Use cuML backend for LASSO if l1_ratio > 0: from cuml import LogisticRegression model = LogisticRegression( - C=1 / alpha, penalty="elasticnet", l1_ratio=l1_ratio + C=1 / alpha, penalty="elasticnet", l1_ratio=l1_ratio, max_iter=1_000, ) model.fit(x, y) - W = torch.as_tensor(model.coef_).unsqueeze(0).to(x.device) + W = torch.as_tensor(model.coef_).to(x.device) b = torch.as_tensor(model.intercept_).to(x.device) - + self.linear.weight.data = W self.linear.bias.data = b return 0.0 @@ -219,6 +218,7 @@ def inlp( y: Tensor, eraser: LeaceEraser | None = None, l1_ratio: float = 0.0, + alpha: float = 0.001, max_iter: int | None = None, tol: float = 0.01, ) -> InlpResult: @@ -255,7 +255,7 @@ def inlp( result = InlpResult() for _ in range(max_iter): clf = cls(d, eraser=eraser, device=x.device, dtype=x.dtype) - loss = clf.fit(x, y) + loss = clf.fit(x, y, alpha=alpha, l1_ratio=l1_ratio) result.classifiers.append(clf) result.losses.append(loss) diff --git a/elk/training/supervised.py b/elk/training/supervised.py index 1b0c2a7e..a9e539a2 100644 --- a/elk/training/supervised.py +++ b/elk/training/supervised.py @@ -12,6 +12,7 @@ def train_supervised( mode: str, erase_paraphrases: bool = False, l1_ratio: float = 0.0, + alpha: float = 0.001, max_inlp_iter: int | None = None, ) -> list[Classifier]: assert not ( @@ -53,11 +54,11 @@ def train_supervised( return [lr_model] elif mode == "inlp": return Classifier.inlp( - X, train_labels, eraser=eraser, max_iter=max_inlp_iter, l1_ratio=l1_ratio + X, train_labels, eraser=eraser, max_iter=max_inlp_iter, l1_ratio=l1_ratio, alpha=alpha, ).classifiers elif mode == "single": lr_model = Classifier(X.shape[-1], device=device, eraser=eraser) - lr_model.fit(X, train_labels, l1_ratio=l1_ratio) + lr_model.fit(X, train_labels, l1_ratio=l1_ratio, alpha=alpha) return [lr_model] else: raise ValueError(f"Unknown mode: {mode}") diff --git a/elk/training/train.py b/elk/training/train.py index 12efc62d..58be69f3 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -24,6 +24,9 @@ class Elicit(Run): l1_ratio: float = 0.0 """Whether to use L1 regularization.""" + alpha: float = 0.001 + """Whether to use L1 regularization.""" + supervised: Literal["single", "inlp", "cv"] = "single" """Whether to train a supervised classifier, and if so, whether to use cross-validation. Defaults to "single", which means to train a single classifier @@ -80,6 +83,7 @@ def apply_to_layer( erase_paraphrases=self.erase_paraphrases, device=device, l1_ratio=self.l1_ratio, + alpha=self.alpha, mode=self.supervised, max_inlp_iter=self.max_inlp_iter, )