-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
rename ppc_dev to wedpr-ml-toolkit (#55)
* update secure lr * update model and predict * update ppc_dev * update model setting * Update booster.py * update wedpr_ml_toolkit
- Loading branch information
1 parent
abf582f
commit 456e83c
Showing
23 changed files
with
197 additions
and
149 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
2 changes: 1 addition & 1 deletion
2
python/ppc_dev/common/base_result.py → ...on/wedpr_ml_toolkit/common/base_result.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
import pandas as pd | ||
import io | ||
|
||
from ppc_common.deps_services import storage_loader | ||
|
||
|
||
class HDFSApi: | ||
def __init__(self, hdfs_endpoint): | ||
self.hdfs_endpoint = hdfs_endpoint | ||
|
||
config_data = {} | ||
config_data['STORAGE_TYPE'] = 'HDFS' | ||
config_data['HDFS_URL'] = self.hdfs_endpoint | ||
config_data['HDFS_ENDPOINT'] = self.hdfs_endpoint | ||
self.storage_client = storage_loader.load(config_data, logger=None) | ||
|
||
def upload(self, dataframe, hdfs_path): | ||
""" | ||
上传Pandas DataFrame到HDFS | ||
:param dataframe: 要上传的Pandas DataFrame | ||
:param hdfs_path: HDFS目标路径 | ||
:return: 响应信息 | ||
""" | ||
# 将DataFrame转换为CSV格式 | ||
csv_buffer = io.StringIO() | ||
dataframe.to_csv(csv_buffer, index=False) | ||
self.storage_client.save_data(csv_buffer.getvalue(), hdfs_path) | ||
return | ||
|
||
def download(self, hdfs_path): | ||
""" | ||
从HDFS下载数据并返回为Pandas DataFrame | ||
:param hdfs_path: HDFS文件路径 | ||
:return: Pandas DataFrame | ||
""" | ||
content = self.storage_client.get_data(hdfs_path) | ||
dataframe = pd.read_csv(io.BytesIO(content)) | ||
return dataframe | ||
|
||
def download_byte(self, hdfs_path): | ||
""" | ||
从HDFS下载数据 | ||
:param hdfs_path: HDFS文件路径 | ||
:return: text | ||
""" | ||
content = self.storage_client.get_data(hdfs_path) | ||
return content |
37 changes: 24 additions & 13 deletions
37
python/ppc_dev/job_exceuter/pws_client.py → ...dpr_ml_toolkit/job_exceuter/pws_client.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
4 changes: 2 additions & 2 deletions
4
python/ppc_dev/result/fe_result.py → python/wedpr_ml_toolkit/result/fe_result.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
4 changes: 2 additions & 2 deletions
4
python/ppc_dev/result/psi_result.py → python/wedpr_ml_toolkit/result/psi_result.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
import os | ||
|
||
from wedpr_ml_toolkit.utils import utils | ||
|
||
|
||
class DataContext: | ||
|
||
def __init__(self, *datasets): | ||
self.datasets = list(datasets) | ||
self.ctx = self.datasets[0].ctx | ||
|
||
self._check_datasets() | ||
|
||
def _save_dataset(self, dataset): | ||
if dataset.dataset_path is None: | ||
dataset.dataset_id = utils.make_id(utils.IdPrefixEnum.DATASET.value) | ||
dataset.dataset_path = os.path.join(dataset.storage_workspace, dataset.dataset_id) | ||
if dataset.storage_client is not None: | ||
dataset.storage_client.upload(dataset.values, dataset.dataset_path) | ||
|
||
def _check_datasets(self): | ||
for dataset in self.datasets: | ||
self._save_dataset(dataset) | ||
|
||
def to_psi_format(self, merge_filed, result_receiver_id_list): | ||
dataset_psi = [] | ||
for dataset in self.datasets: | ||
if dataset.agency.agency_id in result_receiver_id_list: | ||
result_receiver = "true" | ||
else: | ||
result_receiver = "false" | ||
dataset_psi_info = {"idFields": [merge_filed], | ||
"dataset": {"owner": dataset.ctx.user_name, | ||
"ownerAgency": dataset.agency.agency_id, | ||
"path": dataset.dataset_path, | ||
"storageTypeStr": "HDFS", | ||
"datasetID": dataset.dataset_id}, | ||
"receiveResult": result_receiver} | ||
dataset_psi.append(dataset_psi_info) | ||
return dataset_psi | ||
|
||
def to_model_formort(self): | ||
dataset_model = [] | ||
for dataset in self.datasets: | ||
dataset_model.append(dataset.dataset_path) | ||
return dataset_model |
Oops, something went wrong.