From c795ec6861a501c0de38495afc49b88bc897dd14 Mon Sep 17 00:00:00 2001 From: Re-st Date: Mon, 27 Nov 2023 04:51:37 +0000 Subject: [PATCH] Formatted with black --- API/candidate.py | 4 +--- API/elected.py | 6 ++---- analysis/age/__init__.py | 2 ++ analysis/age/hist_groups.py | 27 ++++++++++++++++++--------- analysis/age/main.py | 18 +++++++++++------- scrap/group_head.py | 4 ++-- scrap/utils/runner.py | 3 ++- 7 files changed, 38 insertions(+), 26 deletions(-) diff --git a/API/candidate.py b/API/candidate.py index 8bbb027..c755610 100644 --- a/API/candidate.py +++ b/API/candidate.py @@ -91,6 +91,4 @@ def fetch_all_data( if args.save_method == "excel": save_to_excel(data_list, sgTypecode, is_elected=False) elif args.save_method == "mongo": - save_to_mongo( - data_list, sgTypecode, CANDIDATE_TYPECODE_TYPE[sgTypecode] - ) + save_to_mongo(data_list, sgTypecode, CANDIDATE_TYPECODE_TYPE[sgTypecode]) diff --git a/API/elected.py b/API/elected.py index 1b4a534..7fe299e 100644 --- a/API/elected.py +++ b/API/elected.py @@ -54,7 +54,7 @@ def fetch_all_data( sgIds: List[str], sgTypecodes: str, drop_columns: List[str] ) -> List[dict]: data_list = [] - for sgTypecode in sgTypecodes.split(","): + for sgTypecode in sgTypecodes.split(","): for sgId in sgIds: data_list.extend(fetch_data(sgId, sgTypecode, drop_columns=drop_columns)) @@ -90,6 +90,4 @@ def fetch_all_data( if args.save_method == "excel": save_to_excel(data_list, sgTypecode, is_elected=True) elif args.save_method == "mongo": - save_to_mongo( - data_list, sgTypecode, ELECTED_TYPECODE_TYPE[sgTypecode] - ) + save_to_mongo(data_list, sgTypecode, ELECTED_TYPECODE_TYPE[sgTypecode]) diff --git a/analysis/age/__init__.py b/analysis/age/__init__.py index d84ff4a..ac61850 100644 --- a/analysis/age/__init__.py +++ b/analysis/age/__init__.py @@ -1,6 +1,8 @@ """ 공공데이터포털 API로 수집한 데이터를 분석하기 위한 패키지입니다. """ + + class BasicArgument: def __init__(self, **kwargs): self.__dict__.update(kwargs) diff --git a/analysis/age/hist_groups.py b/analysis/age/hist_groups.py index 2a01d59..94a5cda 100644 --- a/analysis/age/hist_groups.py +++ b/analysis/age/hist_groups.py @@ -9,6 +9,7 @@ from db.client import client from analysis.age import BasicArgument + def plot_young_and_old(youngest_cluster, oldest_cluster): try: sns.histplot( @@ -184,7 +185,7 @@ def cluster(df_original, n_clst, basedic): """구역별 그룹을 만듭니다. df_original: 데이터프레임 n_clst: 그룹 수 - basedic: 기본 정보가 담긴 딕셔너리 + basedic: 기본 정보가 담긴 딕셔너리 """ distdb = client["district"] statdb = client["stats"] @@ -230,7 +231,9 @@ def cluster(df_original, n_clst, basedic): for i in range(n_clst): clst_data = df_clst[df_clst["cluster_label"] == i] # print(f"Cluster {i} in {area}: {clst_data['age'].min()} - {clst_data['age'].max()}") - cluster_center_age = round(clst_data["age"].mean(), 2) # 나이를 소수점 2자리까지 반올림 + cluster_center_age = round( + clst_data["age"].mean(), 2 + ) # 나이를 소수점 2자리까지 반올림 clst_age_mean.append(cluster_center_age) clst_of_young = 0 clst_of_old = n_clst - 1 @@ -243,7 +246,9 @@ def cluster(df_original, n_clst, basedic): # 지역의 가장 젊은, 나이든 그룹을 찾습니다 yb_clst = df_clst[df_clst["cluster_label"] == clst_of_young] ob_clst = df_clst[df_clst["cluster_label"] == clst_of_old] - print(f"Youngest in {area}: {yb_clst['age'].min()} - {yb_clst['age'].max()}") + print( + f"Youngest in {area}: {yb_clst['age'].min()} - {yb_clst['age'].max()}" + ) print(f"Oldest in {area}: {ob_clst['age'].min()} - {ob_clst['age'].max()}") # 그룹의 성비를 계산합니다. young_group_sexratio = ( @@ -266,7 +271,9 @@ def cluster(df_original, n_clst, basedic): "minAge": int(age), "maxAge": int(age) + 1, "count": df_clst[df_clst["age"] == age].shape[0], - "ageGroup": int(df_clst.loc[df_clst["age"] == age].iloc[0]["cluster_label"]) + "ageGroup": int( + df_clst.loc[df_clst["age"] == age].iloc[0]["cluster_label"] + ), } for age in df_clst["age"].unique() ] @@ -312,9 +319,9 @@ def cluster(df_original, n_clst, basedic): else: localname = df_clst["wiwName"].iloc[0] print("sdName is ", metroname, "wiwName is", localname) - localId = localIds.find_one({"sdName": metroname, "wiwName": localname})[ - "localId" - ] + localId = localIds.find_one( + {"sdName": metroname, "wiwName": localname} + )["localId"] dic = basedic.__dict__.copy() dic["metroId"] = metroId insert_data_to_mongo( @@ -326,11 +333,13 @@ def cluster(df_original, n_clst, basedic): localId=localId, ) - print(f"Number of data points per cluster for {area}, method {basedic.method}") + print( + f"Number of data points per cluster for {area}, method {basedic.method}" + ) for cluster_label in range(n_clst): closest_data_count = sum(df_clst["cluster_label"] == cluster_label) print( f"Cluster {cluster_label}: Age {clst_age_mean[cluster_label]}, {closest_data_count} closest data points" ) print(f"Youngest in {youngest_age[0]}: {youngest_age[1]}") - print(f"Oldest in {oldest_age[0]}: {oldest_age[1]}") \ No newline at end of file + print(f"Oldest in {oldest_age[0]}: {oldest_age[1]}") diff --git a/analysis/age/main.py b/analysis/age/main.py index 61a43ff..5368248 100644 --- a/analysis/age/main.py +++ b/analysis/age/main.py @@ -23,6 +23,7 @@ "기초의원비례대표": "local_councilor", } + def main(N=5, folder_name="To_be_filled"): ## TO-DO: excel말고 mongodb에서 받아오도록 합니다. ## 이 링크에 구현될 save_to_mongo함수 참고 : https://github.com/NewWays-TechForImpactKAIST/API-scrap-and-analysis//blob/bd817e9a15086d313d9615b2515a81e0dbd73850/API/utils.py#L34 @@ -33,9 +34,9 @@ def main(N=5, folder_name="To_be_filled"): level = 1 if cluster_by == "sdName" else 2 datadir = os.path.join(BASE_DIR, "_data", folder_name) # for d in os.listdir(datadir): - # xlsx 파일을 읽어옵니다. - # if not d.endswith(".xlsx"): - # continue + # xlsx 파일을 읽어옵니다. + # if not d.endswith(".xlsx"): + # continue # df = pd.read_excel(os.path.join(datadir, d)) # d = "[당선][시도의원].xlsx" d = "[당선][구시군의회의원].xlsx" @@ -57,12 +58,15 @@ def main(N=5, folder_name="To_be_filled"): if "후보" in d else ValueError("엑셀파일 이름에 '당선'이든지 '후보'가 있어야 합니다.") ) - councilorType = councilordict[d.split('[')[-1].split(']')[0]] + councilorType = councilordict[d.split("[")[-1].split("]")[0]] for method in ["kmeans", "equal"]: - basedic = BasicArgument(councilorType=councilorType, is_elected=is_elected, level=level, method=method) - cluster( - df, N, basedic + basedic = BasicArgument( + councilorType=councilorType, + is_elected=is_elected, + level=level, + method=method, ) + cluster(df, N, basedic) ## 2. 광역의회 diff --git a/scrap/group_head.py b/scrap/group_head.py index 5fac463..3c50fb7 100644 --- a/scrap/group_head.py +++ b/scrap/group_head.py @@ -65,13 +65,13 @@ def scrap_group_leaders( browser.get(url) results = dict() - for (area, councilor) in metro_heads: + for area, councilor in metro_heads: results[area] = ScrapResult( council_id=area, council_type=CouncilType.METRO_LEADER, councilors=councilor, ) - for (local_area_name, councilor) in local_heads: + for local_area_name, councilor in local_heads: print(local_area_name) results[local_area_name] = ScrapResult( council_id=local_area_name, diff --git a/scrap/utils/runner.py b/scrap/utils/runner.py index 2a4daac..01cb2a1 100644 --- a/scrap/utils/runner.py +++ b/scrap/utils/runner.py @@ -265,7 +265,8 @@ def parse_cids(cids_str: Optional[str], where: str) -> Optional[List[int]]: if __name__ == "__main__": parser = argparse.ArgumentParser(description="지방의회 / 광역의회 / 국회 / 단체장 스크랩 스크립트 실행") - parser.add_argument( "-w", + parser.add_argument( + "-w", "--where", help="스크랩할 의회 종류 (지방의회: 'local', 광역의회: 'metro', 국회: 'national', 단체장: 'leaders')", choices=["local", "metro", "national", "leaders"],