Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update secure lr #28

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions python/ppc_common/ppc_crypto/ihc_cipher.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ def __add__(self, other):
cipher_left = self.c_left + other.c_left
cipher_right = self.c_right + other.c_right
return IhcCiphertext(cipher_left, cipher_right)

def __mul__(self, num: int):
return IhcCiphertext(num * self.c_left, num * self.c_right)

def __eq__(self, other):
return self.c_left == other.c_left and self.c_right == other.c_right
Expand Down
2 changes: 1 addition & 1 deletion python/ppc_model/common/model_setting.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def __init__(self, model_dict):
"iv_thresh", 0.1, model_dict, False))
self.use_goss = common_func.get_config_value(
"use_goss", False, model_dict, False)
self.test_dataset_percentage = float(common_func.get_config_value(
self.test_size = float(common_func.get_config_value(
"test_dataset_percentage", 0.3, model_dict, False))
self.learning_rate = float(common_func.get_config_value(
"learning_rate", 0.1, model_dict, False))
Expand Down
2 changes: 2 additions & 0 deletions python/ppc_model/common/protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ class ModelTask(Enum):
FEATURE_ENGINEERING = "FEATURE_ENGINEERING"
XGB_TRAINING = "XGB_TRAINING"
XGB_PREDICTING = "XGB_PREDICTING"
LR_TRAINING = "LR_TRAINING"
LR_PREDICTING = "LR_PREDICTING"


class TaskStatus(Enum):
Expand Down
12 changes: 6 additions & 6 deletions python/ppc_model/datasets/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@ def __init__(self, ctx: SecureLGBMContext, model_data=None, delimiter: str = ' '
self.selected_col_file = ctx.selected_col_file
self.is_label_holder = ctx.is_label_holder
self.algorithm_type = ctx.algorithm_type
self.test_size = ctx.lgbm_params.test_size
self.random_state = ctx.lgbm_params.random_state
self.eval_set_column = ctx.lgbm_params.eval_set_column
self.train_set_value = ctx.lgbm_params.train_set_value
self.eval_set_value = ctx.lgbm_params.eval_set_value
self.test_size = ctx.model_params.test_size
self.random_state = ctx.model_params.random_state
self.eval_set_column = ctx.model_params.eval_set_column
self.train_set_value = ctx.model_params.train_set_value
self.eval_set_value = ctx.model_params.eval_set_value

self.ctx = ctx
self.train_X = None
Expand Down Expand Up @@ -197,7 +197,7 @@ def _construct_dataset(self):
and not os.path.exists(self.selected_col_file):
try:
self.ctx.remote_selected_col_file = os.path.join(
self.ctx.lgbm_params.training_job_id, self.ctx.SELECTED_COL_FILE)
self.ctx.model_params.training_job_id, self.ctx.SELECTED_COL_FILE)
ResultFileHandling._download_file(self.ctx.components.storage_client,
self.selected_col_file, self.ctx.remote_selected_col_file)
self._dataset_fe_selected(self.selected_col_file, 'id')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
class FeatureBinning:
def __init__(self, ctx: Context):
self.ctx = ctx
self.params = ctx.lgbm_params
self.params = ctx.model_params
self.data = None
self.data_bin = None
self.data_split = None
Expand Down
10 changes: 5 additions & 5 deletions python/ppc_model/datasets/test/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def test_random_split_dataset(self):
}
}
task_info = SecureLGBMContext(args, self.components)
print(task_info.lgbm_params.get_all_params())
print(task_info.model_params.get_all_params())

# 模拟构造主动方数据集
dataset_with_y = SecureDataset(task_info, self.df_with_y)
Expand All @@ -99,7 +99,7 @@ def test_random_split_dataset(self):
}
}
task_info = SecureLGBMContext(args, self.components)
print(task_info.lgbm_params.get_all_params())
print(task_info.model_params.get_all_params())

# 模拟构造被动方数据集
dataset_without_y = SecureDataset(task_info, self.df_without_y)
Expand Down Expand Up @@ -128,7 +128,7 @@ def test_customized_split_dataset(self):
}
}
task_info = SecureLGBMContext(args, self.components)
print(task_info.lgbm_params.get_all_params())
print(task_info.model_params.get_all_params())

# 模拟构造主动方数据集
task_info.eval_column_file = self.eval_column_file
Expand Down Expand Up @@ -158,7 +158,7 @@ def test_predict_dataset(self):
'model_dict': {}
}
task_info = SecureLGBMContext(args, self.components)
print(task_info.lgbm_params.get_all_params())
print(task_info.model_params.get_all_params())

# 模拟构造主动方数据集
task_info.model_prepare_file = self.df_with_y_file
Expand All @@ -184,7 +184,7 @@ def test_iv_selected_dataset(self):
'model_dict': {}
}
task_info = SecureLGBMContext(args, self.components)
print(task_info.lgbm_params.get_all_params())
print(task_info.model_params.get_all_params())

# 模拟构造主动方数据集
task_info.model_prepare_file = self.df_with_y_file
Expand Down
18 changes: 17 additions & 1 deletion python/ppc_model/metrics/loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ class Loss:

class BinaryLoss(Loss):

def __init__(self, objective: str) -> None:
def __init__(self, objective: str = None) -> None:
super().__init__()
self.objective = objective

Expand All @@ -30,3 +30,19 @@ def compute_loss(y_true: np.ndarray, y_pred: np.ndarray):
epsilon = 1e-15
y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
return -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))

@staticmethod
def dot_product(x, theta):
if x.ndim == 1:
x.reshape(1, len(x))
if theta.ndim == 1:
theta.reshape(len(theta), 1)
g = np.matmul(x, theta)
return g

@staticmethod
def inference(g):
# h = np.divide(np.exp(g), np.exp(g) + 1)
# 近似
h = 0.125 * g
return h
3 changes: 3 additions & 0 deletions python/ppc_model/ppc_model_app.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Note: here can't be refactored by autopep
from ppc_model.secure_lgbm.secure_lgbm_training_engine import SecureLGBMTrainingEngine
from ppc_model.secure_lgbm.secure_lgbm_prediction_engine import SecureLGBMPredictionEngine
from ppc_model.secure_lr.secure_lr_training_engine import SecureLRTrainingEngine
from ppc_model.preprocessing.preprocessing_engine import PreprocessingEngine
from ppc_model.network.http.restx import api
from ppc_model.network.http.model_controller import ns2 as log_namespace
Expand Down Expand Up @@ -49,6 +50,8 @@ def register_task_handler():
ModelTask.XGB_TRAINING, SecureLGBMTrainingEngine.run)
task_manager.register_task_handler(
ModelTask.XGB_PREDICTING, SecureLGBMPredictionEngine.run)
task_manager.register_task_handler(
ModelTask.LR_TRAINING, SecureLRTrainingEngine.run)


def model_serve():
Expand Down
22 changes: 11 additions & 11 deletions python/ppc_model/secure_lgbm/secure_lgbm_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,28 +221,28 @@ def __init__(self,
else:
self.dataset_file_path = None

self.lgbm_params = SecureLGBMParams()
self.model_params = SecureLGBMParams()
model_setting = ModelSetting(args['model_dict'])
self.set_lgbm_params(model_setting)
self.set_model_params(model_setting)
if model_setting.train_features is not None and len(model_setting.train_features) > 0:
self.lgbm_params.train_feature = model_setting.train_features.split(
self.model_params.train_feature = model_setting.train_features.split(
',')
self.lgbm_params.n_estimators = model_setting.num_trees
self.lgbm_params.feature_rate = model_setting.colsample_bytree
self.lgbm_params.min_split_gain = model_setting.gamma
self.lgbm_params.random_state = model_setting.seed
self.model_params.n_estimators = model_setting.num_trees
self.model_params.feature_rate = model_setting.colsample_bytree
self.model_params.min_split_gain = model_setting.gamma
self.model_params.random_state = model_setting.seed

self.sync_file_list = {}
if self.algorithm_type == AlgorithmType.Train.name:
self.set_sync_file()

def set_lgbm_params(self, model_setting: ModelSetting):
def set_model_params(self, model_setting: ModelSetting):
"""设置lgbm参数"""
self.lgbm_params.set_model_setting(model_setting)
self.model_params.set_model_setting(model_setting)

def get_lgbm_params(self):
def get_model_params(self):
"""获取lgbm参数"""
return self.lgbm_params
return self.model_params

def set_sync_file(self):
self.sync_file_list['metrics_iteration'] = [self.metrics_iteration_file, self.remote_metrics_iteration_file]
Expand Down
38 changes: 19 additions & 19 deletions python/ppc_model/secure_lgbm/test/test_secure_lgbm_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ class TestSecureLGBMContext(unittest.TestCase):
components.config_data = {'JOB_TEMP_DIR': '/tmp'}
components.mock_logger = MockLogger()

def test_get_lgbm_params(self):
def test_get_model_params(self):

args = {
'job_id': 'j-123',
Expand All @@ -26,15 +26,15 @@ def test_get_lgbm_params(self):
}

task_info = SecureLGBMContext(args, self.components)
lgbm_params = task_info.get_lgbm_params()
model_params = task_info.get_model_params()
# 打印LGBMModel默认参数
print(lgbm_params._get_params())
print(model_params._get_params())

# 默认自定义参数为空字典
assert lgbm_params.get_params() == {}
# assert lgbm_params.get_all_params() != lgbm_params._get_params()
assert model_params.get_params() == {}
# assert model_params.get_all_params() != model_params._get_params()

def test_set_lgbm_params(self):
def test_set_model_params(self):

args = {
'job_id': 'j-123',
Expand All @@ -49,28 +49,28 @@ def test_set_lgbm_params(self):
'objective': 'regression',
'n_estimators': 6,
'max_depth': 3,
'test_size': 0.2,
'test_dataset_percentage': 0.2,
'use_goss': 1
}
}

task_info = SecureLGBMContext(args, self.components)
lgbm_params = task_info.get_lgbm_params()
model_params = task_info.get_model_params()
# 打印SecureLGBMParams自定义参数
print(lgbm_params.get_params())
print(model_params.get_params())
# 打印SecureLGBMParams所有参数
print(lgbm_params.get_all_params())
print(model_params.get_all_params())

assert lgbm_params.get_params() == args['model_dict']
self.assertEqual(lgbm_params.get_all_params()[
'learning_rate'], lgbm_params._get_params()['learning_rate'])
self.assertEqual(lgbm_params.learning_rate,
lgbm_params._get_params()['learning_rate'])
self.assertEqual(lgbm_params.n_estimators,
# assert model_params.get_params() == args['model_dict']
self.assertEqual(model_params.get_all_params()[
'learning_rate'], model_params._get_params()['learning_rate'])
self.assertEqual(model_params.learning_rate,
model_params._get_params()['learning_rate'])
self.assertEqual(model_params.n_estimators,
args['model_dict']['n_estimators'])
self.assertEqual(lgbm_params.test_size,
args['model_dict']['test_size'])
self.assertEqual(lgbm_params.use_goss, args['model_dict']['use_goss'])
self.assertEqual(model_params.test_size,
args['model_dict']['test_dataset_percentage'])
self.assertEqual(model_params.use_goss, args['model_dict']['use_goss'])


if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion python/ppc_model/secure_lgbm/vertical/active_party.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class VerticalLGBMActiveParty(VerticalBooster):

def __init__(self, ctx: SecureLGBMContext, dataset: SecureDataset) -> None:
super().__init__(ctx, dataset)
self.params = ctx.lgbm_params
self.params = ctx.model_params
self._loss_func = BinaryLoss(self.params.objective)
self._all_feature_name = [dataset.feature_name]
self._all_feature_num = len(dataset.feature_name)
Expand Down
8 changes: 4 additions & 4 deletions python/ppc_model/secure_lgbm/vertical/booster.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ def __init__(self, ctx: SecureLGBMContext, dataset: SecureDataset) -> None:
self._test_weights = None
self._test_praba = None

random.seed(ctx.lgbm_params.random_state)
np.random.seed(ctx.lgbm_params.random_state)
random.seed(ctx.model_params.random_state)
np.random.seed(ctx.model_params.random_state)

def _build_tree(self, *args, **kwargs):

Expand Down Expand Up @@ -250,9 +250,9 @@ def load_model(self, file_path=None):
file_path, self.ctx.MODEL_DATA_FILE)
if self.ctx.algorithm_type == AlgorithmType.Predict.name:
self.ctx.remote_feature_bin_file = os.path.join(
self.ctx.lgbm_params.training_job_id, self.ctx.FEATURE_BIN_FILE)
self.ctx.model_params.training_job_id, self.ctx.FEATURE_BIN_FILE)
self.ctx.remote_model_data_file = os.path.join(
self.ctx.lgbm_params.training_job_id, self.ctx.MODEL_DATA_FILE)
self.ctx.model_params.training_job_id, self.ctx.MODEL_DATA_FILE)

ResultFileHandling._download_file(self.ctx.components.storage_client,
self.ctx.feature_bin_file, self.ctx.remote_feature_bin_file)
Expand Down
2 changes: 1 addition & 1 deletion python/ppc_model/secure_lgbm/vertical/passive_party.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ class VerticalLGBMPassiveParty(VerticalBooster):

def __init__(self, ctx: SecureLGBMContext, dataset: SecureDataset) -> None:
super().__init__(ctx, dataset)
self.params = ctx.lgbm_params
self.params = ctx.model_params
self.log = ctx.components.logger()
self.log.info(
f'task {self.ctx.task_id}: print all params: {self.params.get_all_params()}')
Expand Down
Empty file.
Loading
Loading