secretflow · fdddy · Dec 26, 2024 · deadlywing · Feb 11, 2025 · deadlywing
diff --git a/libspu/version.h b/libspu/version.h
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#define SPU_VERSION "0.9.3.dev$$DATE$$"
+#define SPU_VERSION "0.9.3.dev20241226"
 
 #include <string_view>
 

diff --git a/sml/fyy_pca/BUILD.bazel b/sml/fyy_pca/BUILD.bazel
@@ -0,0 +1,16 @@
+
+
+load("@rules_python//python:defs.bzl", "py_library")
+
+package(default_visibility = ["//visibility:public"])
+
+py_library(
+    name = "jacobi_evd",
+    srcs = ["jacobi_evd.py"],
+)
+
+py_library(
+    name = "jacobi_pca",
+    srcs = ["jacobi_pca.py"],
+    deps = ["//sml/utils:extmath",":jacobi_evd"],
+)
diff --git a/sml/fyy_pca/emulations/3pc_128.json b/sml/fyy_pca/emulations/3pc_128.json
@@ -0,0 +1,52 @@
+{
+    "id": "outsourcing.3pc",
+    "nodes": {
+        "node:0": "127.0.0.1:61920",
+        "node:1": "127.0.0.1:61921",
+        "node:2": "127.0.0.1:61922",
+        "node:3": "127.0.0.1:61923",
+        "node:4": "127.0.0.1:61924"
+    },
+    "devices": {
+        "SPU": {
+            "kind": "SPU",
+            "config": {
+                "node_ids": [
+                    "node:0",
+                    "node:1",
+                    "node:2"
+                ],
+                "spu_internal_addrs": [
+                    "127.0.0.1:61930",
+                    "127.0.0.1:61931",
+                    "127.0.0.1:61932"
+                ],
+                "experimental_data_folder": [
+                    "/tmp/spu_data_0/",
+                    "/tmp/spu_data_1/",
+                    "/tmp/spu_data_2/"
+                ],
+                "runtime_config": {
+                    "protocol": "ABY3",
+                    "field": "FM128",
+                    "fxp_fraction_bits": 30,
+                    "enable_pphlo_profile": false,
+                    "enable_hal_profile": false,
+                    "enable_pphlo_trace": false
+                }
+            }
+        },
+        "P1": {
+            "kind": "PYU",
+            "config": {
+                "node_id": "node:3"
+            }
+        },
+        "P2": {
+            "kind": "PYU",
+            "config": {
+                "node_id": "node:4"
+            }
+        }
+    }
+}
diff --git a/sml/fyy_pca/emulations/BUILD.bazel b/sml/fyy_pca/emulations/BUILD.bazel
@@ -0,0 +1,30 @@
+
+load("@rules_python//python:defs.bzl", "py_binary")
+
+package(default_visibility = ["//visibility:public"])
+
+py_binary(
+    name = "jacobipca_emul",
+    srcs = ["jacobipca_emul.py"],
+    deps = [
+        "//sml/fyy_pca:jacobi_pca",
+        "//sml/utils:emulation",
+    ],
+)
+
+py_binary(
+    name = "jacobievd_emul",
+    srcs = ["jacobievd_emul.py"],
+    data = [":conf"],
+    deps = [
+        "//sml/utils:emulation",
+        "//sml/fyy_pca:jacobi_evd",
+    ],
+)
+
+filegroup(
+    name = "conf",
+    srcs = [
+        "3pc_128.json",
+    ],
+)
diff --git a/sml/fyy_pca/emulations/jacobievd_emul.py b/sml/fyy_pca/emulations/jacobievd_emul.py
@@ -0,0 +1,60 @@
+import jax.numpy as jnp
+import numpy as np
+
+import sml.utils.emulation as emulation
+from sml.fyy_pca.jacobi_evd import generate_ring_sequence, serial_jacobi_evd
+
+def emul_jacobievd(mode: emulation.Mode.MULTIPROCESS):
+    print("start jacobi evd emulation.")
+    np.random.seed(0)
+
+    # ONLY test small matrix for usage purpose
+    n = 10
+    mat = jnp.array(np.random.rand(n, n))
+    mat = (mat + mat.T) / 2
+
+    def _check_jacobievd_single(mat, max_jacobi_iter=5):
+        print("start jacobi evd emulation test, with shape=", mat.shape)
+
+        mat_spu = emulator.seal(mat)
+        rotate_mat_spu = emulator.seal(jnp.eye(mat.shape[0]))
+        val, vec = emulator.run(serial_jacobi_evd, static_argnums=(2,))(mat_spu, rotate_mat_spu, max_jacobi_iter)
+        sorted_indices = jnp.argsort(val)[::-1]
+        eig_vec = vec.T[sorted_indices]
+        eig_val = val[sorted_indices]
+
+        val_np, vec_np = np.linalg.eig(mat)
+        sorted_indices = jnp.argsort(val_np)[::-1]
+        eig_vec_np = vec_np.T[sorted_indices]
+        eig_val_np = val_np[sorted_indices]
+
+        abs_diff = np.abs(np.abs(eig_vec_np) - np.abs(eig_vec))
+        rel_error = abs_diff / (np.abs(eig_vec_np) + 1e-8)
+
+        print("avg absolute error:\n",np.mean(abs_diff))
+        print("avg relative error:\n",np.mean(rel_error))
+
+        # check eigen values equal
+        np.testing.assert_allclose(eig_val_np, eig_val, rtol=0.01, atol=0.01)
+
+        # check eigen vectors (maybe with sign flip)
+        np.testing.assert_allclose(
+            np.abs(eig_vec_np), np.abs(eig_vec), rtol=0.01, atol=0.01
+        )
+
+
+    try:
+        conf_path = "sml/fyy_pca/emulations/3pc_128.json"
+        emulator = emulation.Emulator(conf_path, mode, bandwidth=300, latency=20)
+        emulator.up()
+
+        _check_jacobievd_single(mat)
+
+        print("evd emulation pass.")
+
+    finally:
+        emulator.down()
+
+
+if __name__ == "__main__":
+    emul_jacobievd(emulation.Mode.MULTIPROCESS)
diff --git a/sml/fyy_pca/emulations/jacobipca_emul.py b/sml/fyy_pca/emulations/jacobipca_emul.py
@@ -0,0 +1,133 @@
+import os
+import sys
+
+import jax.numpy as jnp
+import jax.random as random
+import numpy as np
+from sklearn.decomposition import PCA as SklearnPCA
+
+# Add the library directory to the path
+sys.path.append(os.path.join(os.path.dirname(__file__), '../../../'))
+
+import sml.utils.emulation as emulation
+from sml.fyy_pca.jacobi_pca import PCA
+
+
+def emul_powerPCA(mode: emulation.Mode.MULTIPROCESS):
+    print("start power method emulation.")
+
+    def proc_transform(X):
+        model = PCA(
+            method='power_iteration',
+            n_components=6,
+            max_power_iter=200,
+        )
+
+        model.fit(X)
+        X_transformed = model.transform(X)
+        X_variances = model._variances
+        X_reconstructed = model.inverse_transform(X_transformed)
+
+        return X_transformed, X_variances, X_reconstructed
+
+    try:
+        # bandwidth and latency only work for docker mode
+        emulator = emulation.Emulator(
+            emulation.CLUSTER_ABY3_3PC, mode, bandwidth=300, latency=20
+        )
+        emulator.up()
+
+        # Create a simple dataset
+        X = random.normal(random.PRNGKey(0), (10, 20))
+        X_spu = emulator.seal(X)
+        result = emulator.run(proc_transform)(X_spu)
+
+        # # The transformed data should have 2 dimensions
+        # assert result[0].shape[1] == 2
+        # The mean of the transformed data should be approximately 0
+        assert jnp.allclose(jnp.mean(result[0], axis=0), 0, atol=1e-3)
+
+        # Compare with sklearn
+        model = SklearnPCA(n_components=6)
+        model.fit(X)
+        X_transformed_sklearn = model.transform(X)
+        X_variances = model.explained_variance_
+
+        # Compare the transform results(omit sign)
+        np.testing.assert_allclose(
+            np.abs(X_transformed_sklearn), np.abs(result[0]), rtol=0.1, atol=0.1
+        )
+
+        # Compare the variance results
+        np.testing.assert_allclose(X_variances, result[1], rtol=0.1, atol=0.1)
+
+        X_reconstructed = model.inverse_transform(X_transformed_sklearn)
+
+        np.testing.assert_allclose(X_reconstructed, result[2], atol=1e-3)
+
+    finally:
+        emulator.down()
+
+def emul_jacobi_PCA(mode: emulation.Mode.MULTIPROCESS):
+    print("start jacobi method emulation.")
+
+    def proc_transform(X, rotate_matrix):
+        model = PCA(
+            method='serial_jacobi_iteration',
+            n_components=6,
+            rotate_matrix=rotate_matrix,
+            max_jacobi_iter=5,
+        )
+
+        model.fit(X)
+        X_transformed = model.transform(X)
+        X_variances = model._variances
+        X_reconstructed = model.inverse_transform(X_transformed)
+
+        return X_transformed, X_variances, X_reconstructed
+
+    try:
+        # bandwidth and latency only work for docker mode
+        emulator = emulation.Emulator(
+            emulation.CLUSTER_ABY3_3PC, mode, bandwidth=300, latency=20
+        )
+        emulator.up()
+
+        # Create a simple dataset
+        X = random.normal(random.PRNGKey(0), (10, 20))
+
+        # Create rotate_matrix
+        rotate_matrix = jnp.eye(X.shape[1])
+
+        X_spu = emulator.seal(X)
+        rotate_matrix_spu = emulator.seal(rotate_matrix)
+        result = emulator.run(proc_transform)(X_spu, rotate_matrix_spu)
+
+        # The mean of the transformed data should be approximately 0
+        assert jnp.allclose(jnp.mean(result[0], axis=0), 0, atol=1e-3)
+
+        # Compare with sklearn
+        model = SklearnPCA(n_components=6)
+        model.fit(X)
+        X_transformed_sklearn = model.transform(X)
+        X_variances = model.explained_variance_
+
+        # Compare the transform results(omit sign)
+        np.testing.assert_allclose(
+            np.abs(X_transformed_sklearn), np.abs(result[0]), rtol=0.1, atol=0.1
+        )
+
+        # Compare the variance results
+        np.testing.assert_allclose(X_variances, result[1], rtol=0.1, atol=0.1)
+
+        X_reconstructed = model.inverse_transform(X_transformed_sklearn)
+
+        np.testing.assert_allclose(X_reconstructed, result[2], atol=0.1)
+
+    finally:
+        emulator.down()
+
+
+if __name__ == "__main__":
+    # emul_powerPCA(emulation.Mode.MULTIPROCESS)
+    emul_jacobi_PCA(emulation.Mode.MULTIPROCESS)