Skip to content

Commit

Permalink
Formatted with black
Browse files Browse the repository at this point in the history
  • Loading branch information
Re-st committed Nov 27, 2023
1 parent b6bc27f commit c795ec6
Show file tree
Hide file tree
Showing 7 changed files with 38 additions and 26 deletions.
4 changes: 1 addition & 3 deletions API/candidate.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,4 @@ def fetch_all_data(
if args.save_method == "excel":
save_to_excel(data_list, sgTypecode, is_elected=False)
elif args.save_method == "mongo":
save_to_mongo(
data_list, sgTypecode, CANDIDATE_TYPECODE_TYPE[sgTypecode]
)
save_to_mongo(data_list, sgTypecode, CANDIDATE_TYPECODE_TYPE[sgTypecode])
6 changes: 2 additions & 4 deletions API/elected.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def fetch_all_data(
sgIds: List[str], sgTypecodes: str, drop_columns: List[str]
) -> List[dict]:
data_list = []
for sgTypecode in sgTypecodes.split(","):
for sgTypecode in sgTypecodes.split(","):
for sgId in sgIds:
data_list.extend(fetch_data(sgId, sgTypecode, drop_columns=drop_columns))

Expand Down Expand Up @@ -90,6 +90,4 @@ def fetch_all_data(
if args.save_method == "excel":
save_to_excel(data_list, sgTypecode, is_elected=True)
elif args.save_method == "mongo":
save_to_mongo(
data_list, sgTypecode, ELECTED_TYPECODE_TYPE[sgTypecode]
)
save_to_mongo(data_list, sgTypecode, ELECTED_TYPECODE_TYPE[sgTypecode])
2 changes: 2 additions & 0 deletions analysis/age/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
"""
공공데이터포털 API로 수집한 데이터를 분석하기 위한 패키지입니다.
"""


class BasicArgument:
def __init__(self, **kwargs):
self.__dict__.update(kwargs)
27 changes: 18 additions & 9 deletions analysis/age/hist_groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from db.client import client
from analysis.age import BasicArgument


def plot_young_and_old(youngest_cluster, oldest_cluster):
try:
sns.histplot(
Expand Down Expand Up @@ -184,7 +185,7 @@ def cluster(df_original, n_clst, basedic):
"""구역별 그룹을 만듭니다.
df_original: 데이터프레임
n_clst: 그룹 수
basedic: 기본 정보가 담긴 딕셔너리
basedic: 기본 정보가 담긴 딕셔너리
"""
distdb = client["district"]
statdb = client["stats"]
Expand Down Expand Up @@ -230,7 +231,9 @@ def cluster(df_original, n_clst, basedic):
for i in range(n_clst):
clst_data = df_clst[df_clst["cluster_label"] == i]
# print(f"Cluster {i} in {area}: {clst_data['age'].min()} - {clst_data['age'].max()}")
cluster_center_age = round(clst_data["age"].mean(), 2) # 나이를 소수점 2자리까지 반올림
cluster_center_age = round(
clst_data["age"].mean(), 2
) # 나이를 소수점 2자리까지 반올림
clst_age_mean.append(cluster_center_age)
clst_of_young = 0
clst_of_old = n_clst - 1
Expand All @@ -243,7 +246,9 @@ def cluster(df_original, n_clst, basedic):
# 지역의 가장 젊은, 나이든 그룹을 찾습니다
yb_clst = df_clst[df_clst["cluster_label"] == clst_of_young]
ob_clst = df_clst[df_clst["cluster_label"] == clst_of_old]
print(f"Youngest in {area}: {yb_clst['age'].min()} - {yb_clst['age'].max()}")
print(
f"Youngest in {area}: {yb_clst['age'].min()} - {yb_clst['age'].max()}"
)
print(f"Oldest in {area}: {ob_clst['age'].min()} - {ob_clst['age'].max()}")
# 그룹의 성비를 계산합니다.
young_group_sexratio = (
Expand All @@ -266,7 +271,9 @@ def cluster(df_original, n_clst, basedic):
"minAge": int(age),
"maxAge": int(age) + 1,
"count": df_clst[df_clst["age"] == age].shape[0],
"ageGroup": int(df_clst.loc[df_clst["age"] == age].iloc[0]["cluster_label"])
"ageGroup": int(
df_clst.loc[df_clst["age"] == age].iloc[0]["cluster_label"]
),
}
for age in df_clst["age"].unique()
]
Expand Down Expand Up @@ -312,9 +319,9 @@ def cluster(df_original, n_clst, basedic):
else:
localname = df_clst["wiwName"].iloc[0]
print("sdName is ", metroname, "wiwName is", localname)
localId = localIds.find_one({"sdName": metroname, "wiwName": localname})[
"localId"
]
localId = localIds.find_one(
{"sdName": metroname, "wiwName": localname}
)["localId"]
dic = basedic.__dict__.copy()
dic["metroId"] = metroId
insert_data_to_mongo(
Expand All @@ -326,11 +333,13 @@ def cluster(df_original, n_clst, basedic):
localId=localId,
)

print(f"Number of data points per cluster for {area}, method {basedic.method}")
print(
f"Number of data points per cluster for {area}, method {basedic.method}"
)
for cluster_label in range(n_clst):
closest_data_count = sum(df_clst["cluster_label"] == cluster_label)
print(
f"Cluster {cluster_label}: Age {clst_age_mean[cluster_label]}, {closest_data_count} closest data points"
)
print(f"Youngest in {youngest_age[0]}: {youngest_age[1]}")
print(f"Oldest in {oldest_age[0]}: {oldest_age[1]}")
print(f"Oldest in {oldest_age[0]}: {oldest_age[1]}")
18 changes: 11 additions & 7 deletions analysis/age/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
"기초의원비례대표": "local_councilor",
}


def main(N=5, folder_name="To_be_filled"):
## TO-DO: excel말고 mongodb에서 받아오도록 합니다.
## 이 링크에 구현될 save_to_mongo함수 참고 : https://github.com/NewWays-TechForImpactKAIST/API-scrap-and-analysis//blob/bd817e9a15086d313d9615b2515a81e0dbd73850/API/utils.py#L34
Expand All @@ -33,9 +34,9 @@ def main(N=5, folder_name="To_be_filled"):
level = 1 if cluster_by == "sdName" else 2
datadir = os.path.join(BASE_DIR, "_data", folder_name)
# for d in os.listdir(datadir):
# xlsx 파일을 읽어옵니다.
# if not d.endswith(".xlsx"):
# continue
# xlsx 파일을 읽어옵니다.
# if not d.endswith(".xlsx"):
# continue
# df = pd.read_excel(os.path.join(datadir, d))
# d = "[당선][시도의원].xlsx"
d = "[당선][구시군의회의원].xlsx"
Expand All @@ -57,12 +58,15 @@ def main(N=5, folder_name="To_be_filled"):
if "후보" in d
else ValueError("엑셀파일 이름에 '당선'이든지 '후보'가 있어야 합니다.")
)
councilorType = councilordict[d.split('[')[-1].split(']')[0]]
councilorType = councilordict[d.split("[")[-1].split("]")[0]]
for method in ["kmeans", "equal"]:
basedic = BasicArgument(councilorType=councilorType, is_elected=is_elected, level=level, method=method)
cluster(
df, N, basedic
basedic = BasicArgument(
councilorType=councilorType,
is_elected=is_elected,
level=level,
method=method,
)
cluster(df, N, basedic)
## 2. 광역의회


Expand Down
4 changes: 2 additions & 2 deletions scrap/group_head.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,13 +65,13 @@ def scrap_group_leaders(

browser.get(url)
results = dict()
for (area, councilor) in metro_heads:
for area, councilor in metro_heads:
results[area] = ScrapResult(
council_id=area,
council_type=CouncilType.METRO_LEADER,
councilors=councilor,
)
for (local_area_name, councilor) in local_heads:
for local_area_name, councilor in local_heads:
print(local_area_name)
results[local_area_name] = ScrapResult(
council_id=local_area_name,
Expand Down
3 changes: 2 additions & 1 deletion scrap/utils/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,8 @@ def parse_cids(cids_str: Optional[str], where: str) -> Optional[List[int]]:

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="지방의회 / 광역의회 / 국회 / 단체장 스크랩 스크립트 실행")
parser.add_argument( "-w",
parser.add_argument(
"-w",
"--where",
help="스크랩할 의회 종류 (지방의회: 'local', 광역의회: 'metro', 국회: 'national', 단체장: 'leaders')",
choices=["local", "metro", "national", "leaders"],
Expand Down

0 comments on commit c795ec6

Please sign in to comment.