diff --git a/elk/training/classifier.py b/elk/training/classifier.py
index 30e604f4..b668d786 100644
--- a/elk/training/classifier.py
+++ b/elk/training/classifier.py
@@ -85,18 +85,17 @@ def fit(
         Returns:
             Final value of the loss function after optimization.
         """
-        # Use cuML backend for LASSO
         if l1_ratio > 0:
             from cuml import LogisticRegression
 
             model = LogisticRegression(
-                C=1 / alpha, penalty="elasticnet", l1_ratio=l1_ratio
+                C=1 / alpha, penalty="elasticnet", l1_ratio=l1_ratio, max_iter=1_000,
             )
             model.fit(x, y)
 
-            W = torch.as_tensor(model.coef_).unsqueeze(0).to(x.device)
+            W = torch.as_tensor(model.coef_).to(x.device)
             b = torch.as_tensor(model.intercept_).to(x.device)
-
+            
             self.linear.weight.data = W
             self.linear.bias.data = b
             return 0.0
@@ -219,6 +218,7 @@ def inlp(
         y: Tensor,
         eraser: LeaceEraser | None = None,
         l1_ratio: float = 0.0,
+        alpha: float = 0.001,
         max_iter: int | None = None,
         tol: float = 0.01,
     ) -> InlpResult:
@@ -255,7 +255,7 @@ def inlp(
         result = InlpResult()
         for _ in range(max_iter):
             clf = cls(d, eraser=eraser, device=x.device, dtype=x.dtype)
-            loss = clf.fit(x, y)
+            loss = clf.fit(x, y, alpha=alpha, l1_ratio=l1_ratio)
             result.classifiers.append(clf)
             result.losses.append(loss)
 
diff --git a/elk/training/supervised.py b/elk/training/supervised.py
index 1b0c2a7e..a9e539a2 100644
--- a/elk/training/supervised.py
+++ b/elk/training/supervised.py
@@ -12,6 +12,7 @@ def train_supervised(
     mode: str,
     erase_paraphrases: bool = False,
     l1_ratio: float = 0.0,
+    alpha: float = 0.001,
     max_inlp_iter: int | None = None,
 ) -> list[Classifier]:
     assert not (
@@ -53,11 +54,11 @@ def train_supervised(
         return [lr_model]
     elif mode == "inlp":
         return Classifier.inlp(
-            X, train_labels, eraser=eraser, max_iter=max_inlp_iter, l1_ratio=l1_ratio
+            X, train_labels, eraser=eraser, max_iter=max_inlp_iter, l1_ratio=l1_ratio, alpha=alpha,
         ).classifiers
     elif mode == "single":
         lr_model = Classifier(X.shape[-1], device=device, eraser=eraser)
-        lr_model.fit(X, train_labels, l1_ratio=l1_ratio)
+        lr_model.fit(X, train_labels, l1_ratio=l1_ratio, alpha=alpha)
         return [lr_model]
     else:
         raise ValueError(f"Unknown mode: {mode}")
diff --git a/elk/training/train.py b/elk/training/train.py
index 12efc62d..58be69f3 100644
--- a/elk/training/train.py
+++ b/elk/training/train.py
@@ -24,6 +24,9 @@ class Elicit(Run):
     l1_ratio: float = 0.0
     """Whether to use L1 regularization."""
 
+    alpha: float = 0.001
+    """Whether to use L1 regularization."""
+
     supervised: Literal["single", "inlp", "cv"] = "single"
     """Whether to train a supervised classifier, and if so, whether to use
     cross-validation. Defaults to "single", which means to train a single classifier
@@ -80,6 +83,7 @@ def apply_to_layer(
             erase_paraphrases=self.erase_paraphrases,
             device=device,
             l1_ratio=self.l1_ratio,
+            alpha=self.alpha,
             mode=self.supervised,
             max_inlp_iter=self.max_inlp_iter,
         )