diff --git a/python/ppc_dev/job_exceuter/hdfs_client.py b/python/ppc_dev/job_exceuter/hdfs_client.py deleted file mode 100644 index ca77360a..00000000 --- a/python/ppc_dev/job_exceuter/hdfs_client.py +++ /dev/null @@ -1,53 +0,0 @@ -import requests -import pandas as pd -import io - - -class HDFSApi: - def __init__(self, base_url): - self.base_url = base_url - - def upload(self, dataframe, hdfs_path): - """ - 上传Pandas DataFrame到HDFS - :param dataframe: 要上传的Pandas DataFrame - :param hdfs_path: HDFS目标路径 - :return: 响应信息 - """ - # 将DataFrame转换为CSV格式 - csv_buffer = io.StringIO() - dataframe.to_csv(csv_buffer, index=False) - - # 发送PUT请求上传CSV数据 - response = requests.put( - f"{self.base_url}/upload?path={hdfs_path}", - data=csv_buffer.getvalue(), - headers={'Content-Type': 'text/csv'} - ) - return response.json() - - def download(self, hdfs_path): - """ - 从HDFS下载数据并返回为Pandas DataFrame - :param hdfs_path: HDFS文件路径 - :return: Pandas DataFrame - """ - response = requests.get(f"{self.base_url}/download?path={hdfs_path}") - if response.status_code == 200: - # 读取CSV数据并转换为DataFrame - dataframe = pd.read_csv(io.StringIO(response.text)) - return dataframe - else: - raise Exception(f"下载失败: {response.json()}") - - def download_data(self, hdfs_path): - """ - 从HDFS下载数据并返回为Pandas DataFrame - :param hdfs_path: HDFS文件路径 - :return: text - """ - response = requests.get(f"{self.base_url}/download?path={hdfs_path}") - if response.status_code == 200: - return response.text - else: - raise Exception(f"下载失败: {response.json()}") diff --git a/python/ppc_dev/wedpr_data/data_context.py b/python/ppc_dev/wedpr_data/data_context.py deleted file mode 100644 index 177a828f..00000000 --- a/python/ppc_dev/wedpr_data/data_context.py +++ /dev/null @@ -1,35 +0,0 @@ -import os - -from ppc_dev.utils import utils - - -class DataContext: - - def __init__(self, *datasets): - self.datasets = list(datasets) - self.ctx = self.datasets[0].ctx - - self._check_datasets() - - def _save_dataset(self, dataset): - if dataset.dataset_path is None: - dataset.dataset_id = utils.make_id(utils.IdPrefixEnum.DATASET.value) - dataset.dataset_path = os.path.join(dataset.ctx.workspace, dataset.dataset_id) - if self.storage_client is not None: - self.storage_client.upload(self.values, self.dataset_path) - - def _check_datasets(self): - for dataset in self.datasets: - self._save_dataset(dataset) - - def to_psi_format(self): - dataset_psi = [] - for dataset in self.datasets: - dataset_psi.append(dataset.dataset_path) - return dataset_psi - - def to_model_formort(self): - dataset_model = [] - for dataset in self.datasets: - dataset_model.append(dataset.dataset_path) - return dataset_model diff --git a/python/ppc_dev/__init__.py b/python/wedpr_ml_toolkit/__init__.py similarity index 100% rename from python/ppc_dev/__init__.py rename to python/wedpr_ml_toolkit/__init__.py diff --git a/python/ppc_dev/common/__init__.py b/python/wedpr_ml_toolkit/common/__init__.py similarity index 100% rename from python/ppc_dev/common/__init__.py rename to python/wedpr_ml_toolkit/common/__init__.py diff --git a/python/ppc_dev/common/base_context.py b/python/wedpr_ml_toolkit/common/base_context.py similarity index 81% rename from python/ppc_dev/common/base_context.py rename to python/wedpr_ml_toolkit/common/base_context.py index 1496f381..cd2cdb66 100644 --- a/python/ppc_dev/common/base_context.py +++ b/python/wedpr_ml_toolkit/common/base_context.py @@ -10,4 +10,4 @@ def __init__(self, project_id, user_name, pws_endpoint=None, hdfs_endpoint=None, self.pws_endpoint = pws_endpoint self.hdfs_endpoint = hdfs_endpoint self.token = token - self.workspace = os.path.join(self.project_id, self.user_name) + self.workspace = './milestone2' diff --git a/python/ppc_dev/common/base_result.py b/python/wedpr_ml_toolkit/common/base_result.py similarity index 58% rename from python/ppc_dev/common/base_result.py rename to python/wedpr_ml_toolkit/common/base_result.py index ace5f8e3..88bb3f8f 100644 --- a/python/ppc_dev/common/base_result.py +++ b/python/wedpr_ml_toolkit/common/base_result.py @@ -1,4 +1,4 @@ -from ppc_dev.common.base_context import BaseContext +from wedpr_ml_toolkit.common.base_context import BaseContext class BaseResult: diff --git a/python/ppc_dev/job_exceuter/__init__.py b/python/wedpr_ml_toolkit/job_exceuter/__init__.py similarity index 100% rename from python/ppc_dev/job_exceuter/__init__.py rename to python/wedpr_ml_toolkit/job_exceuter/__init__.py diff --git a/python/wedpr_ml_toolkit/job_exceuter/hdfs_client.py b/python/wedpr_ml_toolkit/job_exceuter/hdfs_client.py new file mode 100644 index 00000000..d8c7be68 --- /dev/null +++ b/python/wedpr_ml_toolkit/job_exceuter/hdfs_client.py @@ -0,0 +1,47 @@ +import pandas as pd +import io + +from ppc_common.deps_services import storage_loader + + +class HDFSApi: + def __init__(self, hdfs_endpoint): + self.hdfs_endpoint = hdfs_endpoint + + config_data = {} + config_data['STORAGE_TYPE'] = 'HDFS' + config_data['HDFS_URL'] = self.hdfs_endpoint + config_data['HDFS_ENDPOINT'] = self.hdfs_endpoint + self.storage_client = storage_loader.load(config_data, logger=None) + + def upload(self, dataframe, hdfs_path): + """ + 上传Pandas DataFrame到HDFS + :param dataframe: 要上传的Pandas DataFrame + :param hdfs_path: HDFS目标路径 + :return: 响应信息 + """ + # 将DataFrame转换为CSV格式 + csv_buffer = io.StringIO() + dataframe.to_csv(csv_buffer, index=False) + self.storage_client.save_data(csv_buffer.getvalue(), hdfs_path) + return + + def download(self, hdfs_path): + """ + 从HDFS下载数据并返回为Pandas DataFrame + :param hdfs_path: HDFS文件路径 + :return: Pandas DataFrame + """ + content = self.storage_client.get_data(hdfs_path) + dataframe = pd.read_csv(io.BytesIO(content)) + return dataframe + + def download_byte(self, hdfs_path): + """ + 从HDFS下载数据 + :param hdfs_path: HDFS文件路径 + :return: text + """ + content = self.storage_client.get_data(hdfs_path) + return content diff --git a/python/ppc_dev/job_exceuter/pws_client.py b/python/wedpr_ml_toolkit/job_exceuter/pws_client.py similarity index 64% rename from python/ppc_dev/job_exceuter/pws_client.py rename to python/wedpr_ml_toolkit/job_exceuter/pws_client.py index 8404620a..40df0238 100644 --- a/python/ppc_dev/job_exceuter/pws_client.py +++ b/python/wedpr_ml_toolkit/job_exceuter/pws_client.py @@ -1,36 +1,47 @@ import random import time +import requests from ppc_common.ppc_utils import http_utils from ppc_common.ppc_utils.exception import PpcException, PpcErrorCode +PWS_URL = '/api/wedpr/v3/project/submitJob' + + class PWSApi: def __init__(self, endpoint, token, polling_interval_s: int = 5, max_retries: int = 5, retry_delay_s: int = 5): - self.endpoint = endpoint + self.pws_url = endpoint + PWS_URL self.token = token self.polling_interval_s = polling_interval_s self.max_retries = max_retries self.retry_delay_s = retry_delay_s - self._async_run_task_method = 'asyncRunTask' - self._get_task_status_method = 'getTaskStatus' self._completed_status = 'COMPLETED' self._failed_status = 'FAILED' - def run(self, datasets, params): - params = { - 'jsonrpc': '1', - 'method': self._async_run_task_method, - 'token': self.token, - 'id': random.randint(1, 65535), - 'dataset': datasets, - 'params': params + def run(self, params): + + headers = { + "Authorization": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3MzEzMTUwMTksInVzZXIiOiJ7XCJ1c2VybmFtZVwiOlwiZmx5aHVhbmcxXCIsXCJncm91cEluZm9zXCI6W3tcImdyb3VwSWRcIjpcIjEwMDAwMDAwMDAwMDAwMDBcIixcImdyb3VwTmFtZVwiOlwi5Yid5aeL55So5oi357uEXCIsXCJncm91cEFkbWluTmFtZVwiOlwiYWRtaW5cIn1dLFwicm9sZU5hbWVcIjpcIm9yaWdpbmFsX3VzZXJcIixcInBlcm1pc3Npb25zXCI6bnVsbCxcImFjY2Vzc0tleUlEXCI6bnVsbCxcImFkbWluXCI6ZmFsc2V9In0.1jZFOVbiISzCvvE9SOsTx0IWb0-OQc3o3rJgCu9GM9A", + "content-type": "application/json" } - response = self._send_request_with_retry(http_utils.send_post_request, self.endpoint, None, params) + + payload = { + "job": { + "jobType": params['jobType'], + "projectName": params['projectName'], + "param": params['param'] + }, + "taskParties": params['taskParties'], + "datasetList": params['datasetList'] + } + + response = requests.request("POST", self.pws_url, json=payload, headers=headers) if response.status_code != 200: raise Exception(f"创建任务失败: {response.json()}") - return self._poll_task_status(response.job_id, self.token) + return + # return self._poll_task_status(response.data, self.token) def _poll_task_status(self, job_id, token): while True: diff --git a/python/ppc_dev/result/__init__.py b/python/wedpr_ml_toolkit/result/__init__.py similarity index 100% rename from python/ppc_dev/result/__init__.py rename to python/wedpr_ml_toolkit/result/__init__.py diff --git a/python/ppc_dev/result/fe_result.py b/python/wedpr_ml_toolkit/result/fe_result.py similarity index 84% rename from python/ppc_dev/result/fe_result.py rename to python/wedpr_ml_toolkit/result/fe_result.py index 65c4dfc5..3caa0fea 100644 --- a/python/ppc_dev/result/fe_result.py +++ b/python/wedpr_ml_toolkit/result/fe_result.py @@ -1,7 +1,7 @@ import os -from ppc_dev.wedpr_data.data_context import DataContext -from ppc_dev.common.base_result import BaseResult +from wedpr_ml_toolkit.wedpr_data.data_context import DataContext +from wedpr_ml_toolkit.common.base_result import BaseResult class FeResult(BaseResult): diff --git a/python/ppc_dev/result/model_result.py b/python/wedpr_ml_toolkit/result/model_result.py similarity index 91% rename from python/ppc_dev/result/model_result.py rename to python/wedpr_ml_toolkit/result/model_result.py index 5930e45c..7609003f 100644 --- a/python/ppc_dev/result/model_result.py +++ b/python/wedpr_ml_toolkit/result/model_result.py @@ -3,9 +3,9 @@ from ppc_common.ppc_utils import utils -from ppc_dev.wedpr_data.data_context import DataContext -from ppc_dev.common.base_result import BaseResult -from ppc_dev.job_exceuter.hdfs_client import HDFSApi +from wedpr_ml_toolkit.wedpr_data.data_context import DataContext +from wedpr_ml_toolkit.common.base_result import BaseResult +from wedpr_ml_toolkit.job_exceuter.hdfs_client import HDFSApi class ModelResult(BaseResult): diff --git a/python/ppc_dev/result/psi_result.py b/python/wedpr_ml_toolkit/result/psi_result.py similarity index 84% rename from python/ppc_dev/result/psi_result.py rename to python/wedpr_ml_toolkit/result/psi_result.py index dae03f58..3b7da74c 100644 --- a/python/ppc_dev/result/psi_result.py +++ b/python/wedpr_ml_toolkit/result/psi_result.py @@ -1,7 +1,7 @@ import os -from ppc_dev.wedpr_data.data_context import DataContext -from ppc_dev.common.base_result import BaseResult +from wedpr_ml_toolkit.wedpr_data.data_context import DataContext +from wedpr_ml_toolkit.common.base_result import BaseResult class PSIResult(BaseResult): diff --git a/python/ppc_dev/test/__init__.py b/python/wedpr_ml_toolkit/test/__init__.py similarity index 100% rename from python/ppc_dev/test/__init__.py rename to python/wedpr_ml_toolkit/test/__init__.py diff --git a/python/ppc_dev/test/test_dev.py b/python/wedpr_ml_toolkit/test/test_dev.py similarity index 52% rename from python/ppc_dev/test/test_dev.py rename to python/wedpr_ml_toolkit/test/test_dev.py index 03bad07a..9ccc864f 100644 --- a/python/ppc_dev/test/test_dev.py +++ b/python/wedpr_ml_toolkit/test/test_dev.py @@ -3,26 +3,26 @@ import pandas as pd from sklearn import metrics -from ppc_dev.common.base_context import BaseContext -from ppc_dev.utils.agency import Agency -from ppc_dev.wedpr_data.wedpr_data import WedprData -from ppc_dev.wedpr_data.data_context import DataContext -from ppc_dev.wedpr_session.wedpr_session import WedprSession +from wedpr_ml_toolkit.common.base_context import BaseContext +from wedpr_ml_toolkit.utils.agency import Agency +from wedpr_ml_toolkit.wedpr_data.wedpr_data import WedprData +from wedpr_ml_toolkit.wedpr_data.data_context import DataContext +from wedpr_ml_toolkit.wedpr_session.wedpr_session import WedprSession # 从jupyter环境中获取project_id等信息 # create workspace # 相同项目/刷新专家模式project_id固定 project_id = 'p-123' -user = 'admin' -my_agency='WeBank' -pws_endpoint = '0.0.0.0:0000' -hdfs_endpoint = '0.0.0.0:0001' +user = 'flyhuang1' +my_agency='sgd' +pws_endpoint = 'http://139.159.202.235:8005' # http +hdfs_endpoint = 'http://192.168.0.18:50070' # client token = 'abc...' # 自定义合作方机构 -partner_agency1='SG' +partner_agency1='webank' partner_agency2='TX' # 初始化project ctx 信息 @@ -36,23 +36,28 @@ # pd.Dataframe df = pd.DataFrame({ 'id': np.arange(0, 100), # id列,顺序整数 + 'y': np.random.randint(0, 2, size=100), **{f'x{i}': np.random.rand(100) for i in range(1, 11)} # x1到x10列,随机数 }) -dataset1 = WedprData(ctx, values=df, agency=agency1) + +dataset1 = WedprData(ctx, values=df, agency=agency1, is_label_holder=True) dataset1.storage_client = None -dataset1.save_values(path='./project_id/user/data/d-101') +dataset1.save_values(path='d-101') # './milestone2\\sgd\\flyhuang1\\share\\d-101' + # hdfs_path -dataset2 = WedprData(ctx, dataset_path='./data_path/d-123', agency=agency2, is_label_holder=True) +dataset2 = WedprData(ctx, dataset_path='/user/ppc/milestone2/webank/flyhuang/d-9606695119693829', agency=agency2) dataset2.storage_client = None -dataset2.load_values() - -# 支持更新dataset的values数据 -df2 = pd.DataFrame({ - 'id': np.arange(0, 100), # id列,顺序整数 - 'y': np.random.randint(0, 2, size=100), - **{f'x{i}': np.random.rand(100) for i in range(1, 11)} # x1到x10列,随机数 -}) -dataset2.update_values(values=df2) +# dataset2.load_values() +if dataset2.storage_client is None: + # 支持更新dataset的values数据 + df2 = pd.DataFrame({ + 'id': np.arange(0, 100), # id列,顺序整数 + **{f'z{i}': np.random.rand(100) for i in range(1, 11)} # x1到x10列,随机数 + }) + dataset2.update_values(values=df2) +if dataset1.storage_client is not None: + dataset1.update_values(path='/user/ppc/milestone2/sgd/flyhuang1/d-9606704699156485') + dataset1.load_values() # 构建 dataset context dataset = DataContext(dataset1, dataset2) diff --git a/python/ppc_dev/utils/__init__.py b/python/wedpr_ml_toolkit/utils/__init__.py similarity index 100% rename from python/ppc_dev/utils/__init__.py rename to python/wedpr_ml_toolkit/utils/__init__.py diff --git a/python/ppc_dev/utils/agency.py b/python/wedpr_ml_toolkit/utils/agency.py similarity index 100% rename from python/ppc_dev/utils/agency.py rename to python/wedpr_ml_toolkit/utils/agency.py diff --git a/python/ppc_dev/utils/utils.py b/python/wedpr_ml_toolkit/utils/utils.py similarity index 100% rename from python/ppc_dev/utils/utils.py rename to python/wedpr_ml_toolkit/utils/utils.py diff --git a/python/ppc_dev/wedpr_data/__init__.py b/python/wedpr_ml_toolkit/wedpr_data/__init__.py similarity index 100% rename from python/ppc_dev/wedpr_data/__init__.py rename to python/wedpr_ml_toolkit/wedpr_data/__init__.py diff --git a/python/wedpr_ml_toolkit/wedpr_data/data_context.py b/python/wedpr_ml_toolkit/wedpr_data/data_context.py new file mode 100644 index 00000000..cf9e7645 --- /dev/null +++ b/python/wedpr_ml_toolkit/wedpr_data/data_context.py @@ -0,0 +1,46 @@ +import os + +from wedpr_ml_toolkit.utils import utils + + +class DataContext: + + def __init__(self, *datasets): + self.datasets = list(datasets) + self.ctx = self.datasets[0].ctx + + self._check_datasets() + + def _save_dataset(self, dataset): + if dataset.dataset_path is None: + dataset.dataset_id = utils.make_id(utils.IdPrefixEnum.DATASET.value) + dataset.dataset_path = os.path.join(dataset.storage_workspace, dataset.dataset_id) + if dataset.storage_client is not None: + dataset.storage_client.upload(dataset.values, dataset.dataset_path) + + def _check_datasets(self): + for dataset in self.datasets: + self._save_dataset(dataset) + + def to_psi_format(self, merge_filed, result_receiver_id_list): + dataset_psi = [] + for dataset in self.datasets: + if dataset.agency.agency_id in result_receiver_id_list: + result_receiver = "true" + else: + result_receiver = "false" + dataset_psi_info = {"idFields": [merge_filed], + "dataset": {"owner": dataset.ctx.user_name, + "ownerAgency": dataset.agency.agency_id, + "path": dataset.dataset_path, + "storageTypeStr": "HDFS", + "datasetID": dataset.dataset_id}, + "receiveResult": result_receiver} + dataset_psi.append(dataset_psi_info) + return dataset_psi + + def to_model_formort(self): + dataset_model = [] + for dataset in self.datasets: + dataset_model.append(dataset.dataset_path) + return dataset_model diff --git a/python/ppc_dev/wedpr_data/wedpr_data.py b/python/wedpr_ml_toolkit/wedpr_data/wedpr_data.py similarity index 80% rename from python/ppc_dev/wedpr_data/wedpr_data.py rename to python/wedpr_ml_toolkit/wedpr_data/wedpr_data.py index 2e4c9575..eb1f487d 100644 --- a/python/ppc_dev/wedpr_data/wedpr_data.py +++ b/python/wedpr_ml_toolkit/wedpr_data/wedpr_data.py @@ -1,7 +1,8 @@ +import os import pandas as pd -from ppc_dev.common.base_context import BaseContext -from ppc_dev.job_exceuter.hdfs_client import HDFSApi +from wedpr_ml_toolkit.common.base_context import BaseContext +from wedpr_ml_toolkit.job_exceuter.hdfs_client import HDFSApi class WedprData: @@ -26,6 +27,7 @@ def __init__(self, self.shape = None self.storage_client = HDFSApi(self.ctx.hdfs_endpoint) + self.storage_workspace = os.path.join(self.ctx.workspace, self.agency.agency_id, self.ctx.user_name, 'share') if self.values is not None: self.columns = self.values.columns @@ -42,6 +44,8 @@ def save_values(self, path=None): # 保存数据到hdfs目录 if path is not None: self.dataset_path = path + if not self.dataset_path.startswith(self.ctx.workspace): + self.dataset_path = os.path.join(self.storage_workspace, self.dataset_path) if self.storage_client is not None: self.storage_client.upload(self.values, self.dataset_path) @@ -53,7 +57,7 @@ def update_values(self, values: pd.DataFrame = None, path: str = None): self.shape = self.values.shape if path is not None: self.dataset_path = path - if self.storage_client is not None: + if values is not None and self.storage_client is not None: self.storage_client.upload(self.values, self.dataset_path) def update_path(self, path: str = None): diff --git a/python/ppc_dev/wedpr_session/__init__.py b/python/wedpr_ml_toolkit/wedpr_session/__init__.py similarity index 100% rename from python/ppc_dev/wedpr_session/__init__.py rename to python/wedpr_ml_toolkit/wedpr_session/__init__.py diff --git a/python/ppc_dev/wedpr_session/wedpr_session.py b/python/wedpr_ml_toolkit/wedpr_session/wedpr_session.py similarity index 68% rename from python/ppc_dev/wedpr_session/wedpr_session.py rename to python/wedpr_ml_toolkit/wedpr_session/wedpr_session.py index e3c34e01..85b81d52 100644 --- a/python/ppc_dev/wedpr_session/wedpr_session.py +++ b/python/wedpr_ml_toolkit/wedpr_session/wedpr_session.py @@ -1,8 +1,10 @@ -from ppc_dev.wedpr_data.data_context import DataContext -from ppc_dev.job_exceuter.pws_client import PWSApi -from ppc_dev.result.psi_result import PSIResult -from ppc_dev.result.fe_result import FeResult -from ppc_dev.result.model_result import ModelResult +import json + +from wedpr_ml_toolkit.wedpr_data.data_context import DataContext +from wedpr_ml_toolkit.job_exceuter.pws_client import PWSApi +from wedpr_ml_toolkit.result.psi_result import PSIResult +from wedpr_ml_toolkit.result.fe_result import FeResult +from wedpr_ml_toolkit.result.model_result import ModelResult class WedprSession: @@ -11,18 +13,24 @@ def __init__(self, dataset: DataContext = None, my_agency = None): self.dataset = dataset self.create_agency = my_agency + self.participant_id_list = [] + self.task_parties = [] + self.dataset_id_list = [] + self.dataset_list = [] + self.label_holder_agency = None + self.label_columns = None if self.dataset is not None: - self.participant_id_list = self.get_agencies() - self.label_holder_agency = self.get_label_holder_agency() + self.get_agencies() + self.get_label_holder_agency() self.result_receiver_id_list = [my_agency] # 仅限jupyter所在机构 self.excute = PWSApi(self.dataset.ctx.pws_endpoint, self.dataset.ctx.token) - def task(self, datasets: list, params: dict = {}): + def task(self, params: dict = {}): self.check_agencies() - job_response = self.excute.run(datasets, params) + job_response = self.excute.run(params) return job_response.job_id @@ -31,11 +39,18 @@ def psi(self, dataset: DataContext = None, merge_filed: str = 'id'): if dataset is not None: self.update_dataset(dataset) - # 构造参数 - params = {merge_filed: merge_filed} + self.dataset_list = self.dataset.to_psi_format(merge_filed, self.result_receiver_id_list) + # 构造参数 + # params = {merge_filed: merge_filed} + params = {'jobType': 'PSI', + 'projectName': 'jupyter', + 'param': json.dumps({'dataSetList': self.dataset_list}).replace('"', '\\"'), + 'taskParties': self.task_parties, + 'datasetList': [None, None]} + # 执行任务 - job_id = self.task(self.dataset.to_psi_format(), params) + job_id = self.task(params) # 结果处理 psi_result = PSIResult(dataset, job_id) @@ -101,16 +116,24 @@ def update_dataset(self, dataset: DataContext): def get_agencies(self): participant_id_list = [] + dataset_id_list = [] for dataset in self.dataset.datasets: participant_id_list.append(dataset.agency.agency_id) - return participant_id_list + dataset_id_list.append(dataset.dataset_id) + self.task_parties.append({'userName': dataset.ctx.user_name, + 'agency': dataset.agency.agency_id}) + self.participant_id_list = participant_id_list + self.dataset_id_list = dataset_id_list def get_label_holder_agency(self): label_holder_agency = None + label_columns = None for dataset in self.dataset.datasets: if dataset.is_label_holder: label_holder_agency = dataset.agency.agency_id - return label_holder_agency + label_columns = 'y' + self.label_holder_agency = label_holder_agency + self.label_columns = label_columns def check_agencies(self): """