NVIDIA · lijinf2 · Feb 18, 2025 · Feb 10, 2025 · Feb 10, 2025 · Feb 10, 2025
diff --git a/python/tests/test_common_estimator.py b/python/tests/test_common_estimator.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -377,6 +377,31 @@ def _test_est_copy(
     assert est_init.cuml_params == res_cuml_params
 
 
+def _test_model_copy(
+    gpu_model: Params,
+    cpu_model: Params,
+    input_spark_params: Dict[str, Any],
+) -> None:
+    """
+    This tests the copy() function of a model object.
+    """
+
+    gpu_attrs = {
+        getattr(gpu_model, p): input_spark_params[p] for p in input_spark_params
+    }
+    gpu_model_copy = gpu_model.copy(gpu_attrs)
+
+    cpu_attrs = {
+        getattr(cpu_model, p): input_spark_params[p] for p in input_spark_params
+    }
+    cpu_model_copy = cpu_model.copy(cpu_attrs)
+
+    for p in input_spark_params:
+        assert gpu_model_copy.getOrDefault(p) == input_spark_params[p]
+        assert gpu_model_copy.getOrDefault(p) == cpu_model_copy.getOrDefault(p)
+    return
+
+
 def test_default_cuml_params() -> None:
     cuml_params = get_default_cuml_parameters([CumlDummy], ["b"])
     spark_params = SparkRapidsMLDummy()._get_cuml_params_default()

diff --git a/python/tests/test_logistic_regression.py b/python/tests/test_logistic_regression.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -329,9 +329,10 @@ def test_params(tmp_path: str, caplog: LogCaptureFixture) -> None:
     _test_input_setter_getter(LogisticRegression)
 
 
-@pytest.mark.parametrize(
-    "input_spark_params,cuml_params_update",
-    [
+def test_lr_copy() -> None:
+    from .test_common_estimator import _test_est_copy
+
+    param_list: List[Tuple[Dict[str, Any], Optional[Dict[str, Any]]]] = [
         (
             {"regParam": 0.1, "elasticNetParam": 0.5},
             {"penalty": "elasticnet", "C": 10.0, "l1_ratio": 0.5},
@@ -368,15 +369,31 @@ def test_params(tmp_path: str, caplog: LogCaptureFixture) -> None:
             {"verbose": True},
             {"verbose": True},
         ),
-    ],
-)
-def test_lr_copy(
-    input_spark_params: Dict[str, Any],
-    cuml_params_update: Optional[Dict[str, Any]],
-) -> None:
-    from .test_common_estimator import _test_est_copy
+    ]
+
+    for pair in param_list:
+        input_spark_params = pair[0]
+        cuml_params_update = pair[1]
+        _test_est_copy(LogisticRegression, input_spark_params, cuml_params_update)
+
+
+def test_lr_model_copy() -> None:
+
+    from .test_common_estimator import _test_model_copy
+    from .utils import get_toy_model
+
+    model_params: List[Dict[str, Any]] = [
+        {"featuresCol": "fea_dummy"},
+        {"predictionCol": "fea_dummy"},
+        {"probabilityCol": "fea_dummy"},
+        {"rawPredictionCol": "fea_dummy"},
+    ]
+    with CleanSparkSession() as spark:
+        gpu_model = get_toy_model(LogisticRegression, spark)
+        cpu_model = get_toy_model(SparkLogisticRegression, spark)
 
-    _test_est_copy(LogisticRegression, input_spark_params, cuml_params_update)
+        for p in model_params:
+            _test_model_copy(gpu_model, cpu_model, p)
 
 
 @pytest.mark.parametrize("fit_intercept", [True, False])

diff --git a/python/tests/utils.py b/python/tests/utils.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2022, NVIDIA CORPORATION.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -16,14 +16,17 @@
 
 from collections import namedtuple
 from functools import lru_cache
-from typing import Any, Dict, Iterator, List, Optional, Tuple, TypeVar, Union
+from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple, TypeVar, Union
 
 import numpy as np
 import pandas as pd
 import pyspark
+from pyspark.ml import Estimator, Model
 from pyspark.ml.feature import VectorAssembler
+from pyspark.ml.linalg import Vectors
 from pyspark.sql import SparkSession
 from pyspark.sql.functions import array
+from pyspark.sql.types import Row
 from sklearn.datasets import make_classification, make_regression
 from sklearn.model_selection import train_test_split
 
@@ -224,3 +227,42 @@ def get_default_cuml_parameters(
     for cuml_cls in cuml_classes:
         params.update(_get_default_params_from_func(cuml_cls, excludes))
     return params
+
+
+def get_toy_model(EstimatorCLS: Callable, spark: SparkSession) -> Model:
+    data = [
+        Row(
+            id=0, label=1.0, weight=1.0, features=Vectors.sparse(3, {2: 1.0})
+        ),  # (0., 0., 1.)
+        Row(
+            id=1, label=1.0, weight=1.0, features=Vectors.dense([0.0, 1.0, 0.0])
+        ),  # (0., 1., 0.)
+        Row(
+            id=2, label=0.0, weight=1.0, features=Vectors.sparse(3, {0: 1.0})
+        ),  # (1., 0., 0.)
+        Row(
+            id=3, label=0.0, weight=1.0, features=Vectors.sparse(3, {0: 2.0, 2: -1.0})
+        ),  # (2., 0., -1.)
+    ]
+    train_df = spark.createDataFrame(data)
+
+    if "spark_rapids_ml" in EstimatorCLS.__module__:
+        est = EstimatorCLS(num_workers=1)
+    else:
+        est = EstimatorCLS()
+
+    if est.hasParam("inputCol"):
+        est.setInputCol("features")
+    elif est.hasParam("featuresCol"):
+        est.setFeaturesCol("features")
+    else:
+        assert False, "an Estimator must contain inputCol or featuresCol"
+
+    if est.hasParam("labelCol"):
+        est.setLabelCol("label")
+
+    if est.hasParam("idCol"):
+        est.setIdCol("id")
+
+    model = est.fit(train_df)
+    return model