Skip to content

Commit

Permalink
[scrap] 후보 스크랩 다수 sgcode가능토록 수정 [analysis] 후보도 mongoDB에 업로드 완료
Browse files Browse the repository at this point in the history
  • Loading branch information
Re-st committed Nov 27, 2023
1 parent 2eb4fd8 commit 640a7d3
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 23 deletions.
5 changes: 5 additions & 0 deletions API/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,17 @@
"4": CouncilType.LOCAL_LEADER,
"5": CouncilType.METROPOLITAN_COUNCIL,
"6": CouncilType.LOCAL_COUNCIL,
"7": CouncilType.NATIONAL_COUNCIL,
"8": CouncilType.METROPOLITAN_COUNCIL,
"9": CouncilType.LOCAL_COUNCIL,
}
CANDIDATE_TYPECODE_TYPE = {
"2": CouncilType.NATIONAL_COUNCIL_CAND,
# "3": CouncilType.METRO_LEADER_CAND,
# "4": CouncilType.LOCAL_LEADER_CAND,
"5": CouncilType.METROPOLITAN_COUNCIL_CAND,
"6": CouncilType.LOCAL_COUNCIL_CAND,
"7": CouncilType.NATIONAL_COUNCIL_CAND,
"8": CouncilType.METROPOLITAN_COUNCIL_CAND,
"9": CouncilType.LOCAL_COUNCIL_CAND,
}
7 changes: 4 additions & 3 deletions API/candidate.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,12 @@ def fetch_data(


def fetch_all_data(
sgIds: List[str], sgTypecode: str, drop_columns: List[str]
sgIds: List[str], sgTypecodes: str, drop_columns: List[str]
) -> List[dict]:
data_list = []
for sgId in sgIds:
data_list.extend(fetch_data(sgId, sgTypecode, drop_columns=drop_columns))
for sgTypecode in sgTypecodes.split(","):
for sgId in sgIds:
data_list.extend(fetch_data(sgId, sgTypecode, drop_columns=drop_columns))

return data_list

Expand Down
32 changes: 12 additions & 20 deletions analysis/age/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,29 +23,15 @@
"기초의원비례대표": "local_councilor",
}


def main(N=5, folder_name="To_be_filled"):
def run(cluster_by, filenames, N=5, folder_name="To_be_filled"):
## TO-DO: excel말고 mongodb에서 받아오도록 합니다.
## 이 링크에 구현될 save_to_mongo함수 참고 : https://github.com/NewWays-TechForImpactKAIST/API-scrap-and-analysis//blob/bd817e9a15086d313d9615b2515a81e0dbd73850/API/utils.py#L34
## 1. 지역의회
# cluster_by = input("구역을 나눌 기준을 입력해주세요 (sdName 즉 시/도 또는 wiwName 즉 기초단체단위): ")
cluster_by = "wiwName"
assert cluster_by in ["sdName", "wiwName"]
level = 1 if cluster_by == "sdName" else 2
datadir = os.path.join(BASE_DIR, "_data", folder_name)
# for d in os.listdir(datadir):
# xlsx 파일을 읽어옵니다.
# if not d.endswith(".xlsx"):
# continue
# df = pd.read_excel(os.path.join(datadir, d))
# d = "[당선][시도의원].xlsx"
d = "[당선][구시군의회의원].xlsx"
df_1 = pd.read_excel(os.path.join(datadir, d))
# d = "[당선][광역의원비례대표].xlsx"
d = "[당선][기초의원비례대표].xlsx"
df_2 = pd.read_excel(os.path.join(datadir, d))
df = pd.concat([df_1, df_2])
# 필요한 열만 추출합니다.
df = pd.DataFrame()
for d in filenames:
df_new = pd.read_excel(os.path.join(datadir, d))
df = pd.concat([df, df_new])
if level == 1:
df = df[["sgId", "sdName", "name", "age", "gender"]]
else:
Expand All @@ -67,7 +53,13 @@ def main(N=5, folder_name="To_be_filled"):
method=method,
)
cluster(df, N, basedic)
## 2. 광역의회
def main(N=5):
run("sdName", ["[당선][시도의원].xlsx", "[당선][광역의원비례대표].xlsx"])
run("sdName", ["[후보][시도의원].xlsx", "[후보][광역의원비례대표].xlsx"])
run("sdName", ["[당선][구시군의회의원].xlsx", "[당선][기초의원비례대표].xlsx"])
run("sdName", ["[후보][구시군의회의원].xlsx", "[후보][기초의원비례대표].xlsx"])
run("wiwName", ["[당선][구시군의회의원].xlsx", "[당선][기초의원비례대표].xlsx"])
run("wiwName", ["[후보][구시군의회의원].xlsx", "[후보][기초의원비례대표].xlsx"])


main()

0 comments on commit 640a7d3

Please sign in to comment.