Skip to content

Commit

Permalink
update secure lr (#28)
Browse files Browse the repository at this point in the history
  • Loading branch information
yanxinyi620 authored Sep 10, 2024
1 parent 9889559 commit 7c679e7
Show file tree
Hide file tree
Showing 25 changed files with 1,574 additions and 50 deletions.
3 changes: 3 additions & 0 deletions python/ppc_common/ppc_crypto/ihc_cipher.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ def __add__(self, other):
cipher_left = self.c_left + other.c_left
cipher_right = self.c_right + other.c_right
return IhcCiphertext(cipher_left, cipher_right)

def __mul__(self, num: int):
return IhcCiphertext(num * self.c_left, num * self.c_right)

def __eq__(self, other):
return self.c_left == other.c_left and self.c_right == other.c_right
Expand Down
2 changes: 1 addition & 1 deletion python/ppc_model/common/model_setting.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def __init__(self, model_dict):
"iv_thresh", 0.1, model_dict, False))
self.use_goss = common_func.get_config_value(
"use_goss", False, model_dict, False)
self.test_dataset_percentage = float(common_func.get_config_value(
self.test_size = float(common_func.get_config_value(
"test_dataset_percentage", 0.3, model_dict, False))
self.learning_rate = float(common_func.get_config_value(
"learning_rate", 0.1, model_dict, False))
Expand Down
2 changes: 2 additions & 0 deletions python/ppc_model/common/protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ class ModelTask(Enum):
FEATURE_ENGINEERING = "FEATURE_ENGINEERING"
XGB_TRAINING = "XGB_TRAINING"
XGB_PREDICTING = "XGB_PREDICTING"
LR_TRAINING = "LR_TRAINING"
LR_PREDICTING = "LR_PREDICTING"


class TaskStatus(Enum):
Expand Down
12 changes: 6 additions & 6 deletions python/ppc_model/datasets/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@ def __init__(self, ctx: SecureLGBMContext, model_data=None, delimiter: str = ' '
self.selected_col_file = ctx.selected_col_file
self.is_label_holder = ctx.is_label_holder
self.algorithm_type = ctx.algorithm_type
self.test_size = ctx.lgbm_params.test_size
self.random_state = ctx.lgbm_params.random_state
self.eval_set_column = ctx.lgbm_params.eval_set_column
self.train_set_value = ctx.lgbm_params.train_set_value
self.eval_set_value = ctx.lgbm_params.eval_set_value
self.test_size = ctx.model_params.test_size
self.random_state = ctx.model_params.random_state
self.eval_set_column = ctx.model_params.eval_set_column
self.train_set_value = ctx.model_params.train_set_value
self.eval_set_value = ctx.model_params.eval_set_value

self.ctx = ctx
self.train_X = None
Expand Down Expand Up @@ -197,7 +197,7 @@ def _construct_dataset(self):
and not os.path.exists(self.selected_col_file):
try:
self.ctx.remote_selected_col_file = os.path.join(
self.ctx.lgbm_params.training_job_id, self.ctx.SELECTED_COL_FILE)
self.ctx.model_params.training_job_id, self.ctx.SELECTED_COL_FILE)
ResultFileHandling._download_file(self.ctx.components.storage_client,
self.selected_col_file, self.ctx.remote_selected_col_file)
self._dataset_fe_selected(self.selected_col_file, 'id')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
class FeatureBinning:
def __init__(self, ctx: Context):
self.ctx = ctx
self.params = ctx.lgbm_params
self.params = ctx.model_params
self.data = None
self.data_bin = None
self.data_split = None
Expand Down
10 changes: 5 additions & 5 deletions python/ppc_model/datasets/test/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def test_random_split_dataset(self):
}
}
task_info = SecureLGBMContext(args, self.components)
print(task_info.lgbm_params.get_all_params())
print(task_info.model_params.get_all_params())

# 模拟构造主动方数据集
dataset_with_y = SecureDataset(task_info, self.df_with_y)
Expand All @@ -99,7 +99,7 @@ def test_random_split_dataset(self):
}
}
task_info = SecureLGBMContext(args, self.components)
print(task_info.lgbm_params.get_all_params())
print(task_info.model_params.get_all_params())

# 模拟构造被动方数据集
dataset_without_y = SecureDataset(task_info, self.df_without_y)
Expand Down Expand Up @@ -128,7 +128,7 @@ def test_customized_split_dataset(self):
}
}
task_info = SecureLGBMContext(args, self.components)
print(task_info.lgbm_params.get_all_params())
print(task_info.model_params.get_all_params())

# 模拟构造主动方数据集
task_info.eval_column_file = self.eval_column_file
Expand Down Expand Up @@ -158,7 +158,7 @@ def test_predict_dataset(self):
'model_dict': {}
}
task_info = SecureLGBMContext(args, self.components)
print(task_info.lgbm_params.get_all_params())
print(task_info.model_params.get_all_params())

# 模拟构造主动方数据集
task_info.model_prepare_file = self.df_with_y_file
Expand All @@ -184,7 +184,7 @@ def test_iv_selected_dataset(self):
'model_dict': {}
}
task_info = SecureLGBMContext(args, self.components)
print(task_info.lgbm_params.get_all_params())
print(task_info.model_params.get_all_params())

# 模拟构造主动方数据集
task_info.model_prepare_file = self.df_with_y_file
Expand Down
18 changes: 17 additions & 1 deletion python/ppc_model/metrics/loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ class Loss:

class BinaryLoss(Loss):

def __init__(self, objective: str) -> None:
def __init__(self, objective: str = None) -> None:
super().__init__()
self.objective = objective

Expand All @@ -30,3 +30,19 @@ def compute_loss(y_true: np.ndarray, y_pred: np.ndarray):
epsilon = 1e-15
y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
return -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))

@staticmethod
def dot_product(x, theta):
if x.ndim == 1:
x.reshape(1, len(x))
if theta.ndim == 1:
theta.reshape(len(theta), 1)
g = np.matmul(x, theta)
return g

@staticmethod
def inference(g):
# h = np.divide(np.exp(g), np.exp(g) + 1)
# 近似
h = 0.125 * g
return h
3 changes: 3 additions & 0 deletions python/ppc_model/ppc_model_app.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Note: here can't be refactored by autopep
from ppc_model.secure_lgbm.secure_lgbm_training_engine import SecureLGBMTrainingEngine
from ppc_model.secure_lgbm.secure_lgbm_prediction_engine import SecureLGBMPredictionEngine
from ppc_model.secure_lr.secure_lr_training_engine import SecureLRTrainingEngine
from ppc_model.preprocessing.preprocessing_engine import PreprocessingEngine
from ppc_model.network.http.restx import api
from ppc_model.network.http.model_controller import ns2 as log_namespace
Expand Down Expand Up @@ -49,6 +50,8 @@ def register_task_handler():
ModelTask.XGB_TRAINING, SecureLGBMTrainingEngine.run)
task_manager.register_task_handler(
ModelTask.XGB_PREDICTING, SecureLGBMPredictionEngine.run)
task_manager.register_task_handler(
ModelTask.LR_TRAINING, SecureLRTrainingEngine.run)


def model_serve():
Expand Down
22 changes: 11 additions & 11 deletions python/ppc_model/secure_lgbm/secure_lgbm_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,28 +221,28 @@ def __init__(self,
else:
self.dataset_file_path = None

self.lgbm_params = SecureLGBMParams()
self.model_params = SecureLGBMParams()
model_setting = ModelSetting(args['model_dict'])
self.set_lgbm_params(model_setting)
self.set_model_params(model_setting)
if model_setting.train_features is not None and len(model_setting.train_features) > 0:
self.lgbm_params.train_feature = model_setting.train_features.split(
self.model_params.train_feature = model_setting.train_features.split(
',')
self.lgbm_params.n_estimators = model_setting.num_trees
self.lgbm_params.feature_rate = model_setting.colsample_bytree
self.lgbm_params.min_split_gain = model_setting.gamma
self.lgbm_params.random_state = model_setting.seed
self.model_params.n_estimators = model_setting.num_trees
self.model_params.feature_rate = model_setting.colsample_bytree
self.model_params.min_split_gain = model_setting.gamma
self.model_params.random_state = model_setting.seed

self.sync_file_list = {}
if self.algorithm_type == AlgorithmType.Train.name:
self.set_sync_file()

def set_lgbm_params(self, model_setting: ModelSetting):
def set_model_params(self, model_setting: ModelSetting):
"""设置lgbm参数"""
self.lgbm_params.set_model_setting(model_setting)
self.model_params.set_model_setting(model_setting)

def get_lgbm_params(self):
def get_model_params(self):
"""获取lgbm参数"""
return self.lgbm_params
return self.model_params

def set_sync_file(self):
self.sync_file_list['metrics_iteration'] = [self.metrics_iteration_file, self.remote_metrics_iteration_file]
Expand Down
38 changes: 19 additions & 19 deletions python/ppc_model/secure_lgbm/test/test_secure_lgbm_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ class TestSecureLGBMContext(unittest.TestCase):
components.config_data = {'JOB_TEMP_DIR': '/tmp'}
components.mock_logger = MockLogger()

def test_get_lgbm_params(self):
def test_get_model_params(self):

args = {
'job_id': 'j-123',
Expand All @@ -26,15 +26,15 @@ def test_get_lgbm_params(self):
}

task_info = SecureLGBMContext(args, self.components)
lgbm_params = task_info.get_lgbm_params()
model_params = task_info.get_model_params()
# 打印LGBMModel默认参数
print(lgbm_params._get_params())
print(model_params._get_params())

# 默认自定义参数为空字典
assert lgbm_params.get_params() == {}
# assert lgbm_params.get_all_params() != lgbm_params._get_params()
assert model_params.get_params() == {}
# assert model_params.get_all_params() != model_params._get_params()

def test_set_lgbm_params(self):
def test_set_model_params(self):

args = {
'job_id': 'j-123',
Expand All @@ -49,28 +49,28 @@ def test_set_lgbm_params(self):
'objective': 'regression',
'n_estimators': 6,
'max_depth': 3,
'test_size': 0.2,
'test_dataset_percentage': 0.2,
'use_goss': 1
}
}

task_info = SecureLGBMContext(args, self.components)
lgbm_params = task_info.get_lgbm_params()
model_params = task_info.get_model_params()
# 打印SecureLGBMParams自定义参数
print(lgbm_params.get_params())
print(model_params.get_params())
# 打印SecureLGBMParams所有参数
print(lgbm_params.get_all_params())
print(model_params.get_all_params())

assert lgbm_params.get_params() == args['model_dict']
self.assertEqual(lgbm_params.get_all_params()[
'learning_rate'], lgbm_params._get_params()['learning_rate'])
self.assertEqual(lgbm_params.learning_rate,
lgbm_params._get_params()['learning_rate'])
self.assertEqual(lgbm_params.n_estimators,
# assert model_params.get_params() == args['model_dict']
self.assertEqual(model_params.get_all_params()[
'learning_rate'], model_params._get_params()['learning_rate'])
self.assertEqual(model_params.learning_rate,
model_params._get_params()['learning_rate'])
self.assertEqual(model_params.n_estimators,
args['model_dict']['n_estimators'])
self.assertEqual(lgbm_params.test_size,
args['model_dict']['test_size'])
self.assertEqual(lgbm_params.use_goss, args['model_dict']['use_goss'])
self.assertEqual(model_params.test_size,
args['model_dict']['test_dataset_percentage'])
self.assertEqual(model_params.use_goss, args['model_dict']['use_goss'])


if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion python/ppc_model/secure_lgbm/vertical/active_party.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class VerticalLGBMActiveParty(VerticalBooster):

def __init__(self, ctx: SecureLGBMContext, dataset: SecureDataset) -> None:
super().__init__(ctx, dataset)
self.params = ctx.lgbm_params
self.params = ctx.model_params
self._loss_func = BinaryLoss(self.params.objective)
self._all_feature_name = [dataset.feature_name]
self._all_feature_num = len(dataset.feature_name)
Expand Down
8 changes: 4 additions & 4 deletions python/ppc_model/secure_lgbm/vertical/booster.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ def __init__(self, ctx: SecureLGBMContext, dataset: SecureDataset) -> None:
self._test_weights = None
self._test_praba = None

random.seed(ctx.lgbm_params.random_state)
np.random.seed(ctx.lgbm_params.random_state)
random.seed(ctx.model_params.random_state)
np.random.seed(ctx.model_params.random_state)

def _build_tree(self, *args, **kwargs):

Expand Down Expand Up @@ -250,9 +250,9 @@ def load_model(self, file_path=None):
file_path, self.ctx.MODEL_DATA_FILE)
if self.ctx.algorithm_type == AlgorithmType.Predict.name:
self.ctx.remote_feature_bin_file = os.path.join(
self.ctx.lgbm_params.training_job_id, self.ctx.FEATURE_BIN_FILE)
self.ctx.model_params.training_job_id, self.ctx.FEATURE_BIN_FILE)
self.ctx.remote_model_data_file = os.path.join(
self.ctx.lgbm_params.training_job_id, self.ctx.MODEL_DATA_FILE)
self.ctx.model_params.training_job_id, self.ctx.MODEL_DATA_FILE)

ResultFileHandling._download_file(self.ctx.components.storage_client,
self.ctx.feature_bin_file, self.ctx.remote_feature_bin_file)
Expand Down
2 changes: 1 addition & 1 deletion python/ppc_model/secure_lgbm/vertical/passive_party.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ class VerticalLGBMPassiveParty(VerticalBooster):

def __init__(self, ctx: SecureLGBMContext, dataset: SecureDataset) -> None:
super().__init__(ctx, dataset)
self.params = ctx.lgbm_params
self.params = ctx.model_params
self.log = ctx.components.logger()
self.log.info(
f'task {self.ctx.task_id}: print all params: {self.params.get_all_params()}')
Expand Down
Empty file.
Loading

0 comments on commit 7c679e7

Please sign in to comment.