Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into feat-scraping-webhook
Browse files Browse the repository at this point in the history
  • Loading branch information
keonly committed Nov 27, 2023
2 parents 025ebd8 + 72a535d commit 537d797
Show file tree
Hide file tree
Showing 12 changed files with 531 additions and 278 deletions.
3 changes: 3 additions & 0 deletions API/MongoDB.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ class Councilor:
job: str
eduId: int
edu: str
year: str

@classmethod
def from_dict(cls, data: dict):
Expand All @@ -31,6 +32,7 @@ def from_dict(cls, data: dict):
job=data.get("job"),
eduId=int(data.get("eduId")),
edu=data.get("edu"),
year=data.get("year"),
)

def to_dict(self):
Expand All @@ -47,4 +49,5 @@ def to_dict(self):
"job": self.job,
"eduId": self.eduId,
"edu": self.edu,
"year": self.year,
}
7 changes: 7 additions & 0 deletions API/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,18 @@
"4": CouncilType.LOCAL_LEADER,
"5": CouncilType.METROPOLITAN_COUNCIL,
"6": CouncilType.LOCAL_COUNCIL,
"7": CouncilType.NATIONAL_COUNCIL_GLOBAL,
"8": CouncilType.METROPOLITAN_COUNCIL,
"9": CouncilType.LOCAL_COUNCIL,
}

CANDIDATE_TYPECODE_TYPE = {
"2": CouncilType.NATIONAL_COUNCIL_CAND,
# "3": CouncilType.METRO_LEADER_CAND,
# "4": CouncilType.LOCAL_LEADER_CAND,
"5": CouncilType.METROPOLITAN_COUNCIL_CAND,
"6": CouncilType.LOCAL_COUNCIL_CAND,
"7": CouncilType.NATIONAL_COUNCIL_GLOBAL_CAND,
"8": CouncilType.METROPOLITAN_COUNCIL_CAND,
"9": CouncilType.LOCAL_COUNCIL_CAND,
}
25 changes: 13 additions & 12 deletions API/candidate.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,18 +51,19 @@ def fetch_data(


def fetch_all_data(
sgIds: List[str], sgTypecode: str, drop_columns: List[str]
sgIds: List[str], sgTypecodes: str, drop_columns: List[str]
) -> List[dict]:
data_list = []
for sgId in sgIds:
data_list.extend(fetch_data(sgId, sgTypecode, drop_columns=drop_columns))
for sgTypecode in sgTypecodes.split(","):
for sgId in sgIds:
data_list.extend(fetch_data(sgId, sgTypecode, drop_columns=drop_columns))

return data_list


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="공공데이터포털 API로부터 후보자 정보를 가져옵니다.")
parser.add_argument("sgTypecode", type=str, help="원하는 sgTypecode 하나를 입력하세요")
parser.add_argument("sgTypecodes", type=str, help="원하는 sgTypecode를 ','로 구분하여 입력하세요")
parser.add_argument("sgIds", type=str, help="원하는 sgId를 ','로 구분하여 입력하세요")
parser.add_argument(
"--drop-columns",
Expand All @@ -84,11 +85,11 @@ def fetch_all_data(
sgIds = args.sgIds.split(",")
drop_columns = args.drop_columns.split(",") if args.drop_columns else []

data_list = fetch_all_data(sgIds, args.sgTypecode, drop_columns=drop_columns)

if args.save_method == "excel":
save_to_excel(data_list, args.sgTypecode, is_elected=False)
elif args.save_method == "mongo":
save_to_mongo(
data_list, args.sgTypecode, CANDIDATE_TYPECODE_TYPE[args.sgTypecode]
)
data_list = fetch_all_data(sgIds, args.sgTypecodes, drop_columns=drop_columns)
for sgTypecode in args.sgTypecodes.split(","):
if sgTypecode not in SG_TYPECODE:
raise ValueError(f"Invalid sgTypecode: {sgTypecode}")
if args.save_method == "excel":
save_to_excel(data_list, sgTypecode, is_elected=False)
elif args.save_method == "mongo":
save_to_mongo(data_list, sgTypecode, CANDIDATE_TYPECODE_TYPE[sgTypecode])
25 changes: 14 additions & 11 deletions API/elected.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def fetch_data(
data_list = []
for item in root.findall(".//item"):
data_entry = {child.tag: child.text for child in item}
data_entry["year"] = sgId[:4]

for column in drop_columns:
data_entry.pop(column)
Expand All @@ -51,18 +52,19 @@ def fetch_data(


def fetch_all_data(
sgIds: List[str], sgTypecode: str, drop_columns: List[str]
sgIds: List[str], sgTypecodes: str, drop_columns: List[str]
) -> List[dict]:
data_list = []
for sgId in sgIds:
data_list.extend(fetch_data(sgId, sgTypecode, drop_columns=drop_columns))
for sgTypecode in sgTypecodes.split(","):
for sgId in sgIds:
data_list.extend(fetch_data(sgId, sgTypecode, drop_columns=drop_columns))

return data_list


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="공공데이터포털 API로부터 당선자 정보를 가져옵니다.")
parser.add_argument("sgTypecode", type=str, help="원하는 sgTypecode 하나를 입력하세요")
parser.add_argument("sgTypecodes", type=str, help="원하는 sgTypecode를 ','로 구분하여 입력하세요")
parser.add_argument("sgIds", type=str, help="원하는 sgId를 ','로 구분하여 입력하세요")
parser.add_argument(
"--drop-columns",
Expand All @@ -82,10 +84,11 @@ def fetch_all_data(
sgIds = args.sgIds.split(",")
drop_columns = args.drop_columns.split(",") if args.drop_columns else []

data_list = fetch_all_data(sgIds, args.sgTypecode, drop_columns=drop_columns)
if args.save_method == "excel":
save_to_excel(data_list, args.sgTypecode, is_elected=True)
elif args.save_method == "mongo":
save_to_mongo(
data_list, args.sgTypecode, ELECTED_TYPECODE_TYPE[args.sgTypecode]
)
data_list = fetch_all_data(sgIds, args.sgTypecodes, drop_columns=drop_columns)
for sgTypecode in args.sgTypecodes.split(","):
if sgTypecode not in SG_TYPECODE:
raise ValueError(f"Invalid sgTypecode: {sgTypecode}")
if args.save_method == "excel":
save_to_excel(data_list, sgTypecode, is_elected=True)
elif args.save_method == "mongo":
save_to_mongo(data_list, sgTypecode, ELECTED_TYPECODE_TYPE[sgTypecode])
45 changes: 44 additions & 1 deletion API/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,9 @@ def save_to_mongo(data: List[dict], sgTypecode: str, where: str) -> None:
main_collection = db[where]

# TODO: Support other types of councils
if sgTypecode in ["6", "9"]:
if sgTypecode in ["8", "5", "2", "6", "9"]:
for entry in data:
entry["wiwName"] = change_local_name(entry["sdName"], entry["wiwName"])
district_id = get_district_id(entry["sdName"], entry["wiwName"])

if district_id:
Expand All @@ -59,6 +60,17 @@ def save_to_mongo(data: List[dict], sgTypecode: str, where: str) -> None:
print(
f"Warning: '{entry['sdName']} {entry['wiwName']}'에 해당하는 지역 ID가 존재하지 않습니다."
)
elif sgTypecode in ["7"]:
for entry in data:
main_collection.update_one(
{
"name": entry["name"],
"localId": 0,
"metroId": 0,
},
{"$set": Councilor.from_dict(entry).to_dict()},
upsert=True,
)
else:
raise NotImplementedError("현재 구시군의회의원(6) 및 기초의원비례대표(9)만 구현되어 있습니다.")

Expand Down Expand Up @@ -95,3 +107,34 @@ def getLocalMetroMap() -> Dict[str, str]:
}
for item in result
}


def change_local_name(sdName, wiwName):
"""
1. 만약 '시' 와 '구'가 모두 wiwName에 있다면, '시' 까지만 쓰기
ex) '용인시수지구' (선거 단위) -> '용인시' (의회 단위)
2. 지역이 승급되면 이름 바꾸기
ex) '당진군' (~2011) -> '당진시' (2012~)
Keyword arguments:
argument -- string
Return: processed string
"""
if (sdName, wiwName) in change_city_name:
return change_city_name[(sdName, wiwName)]
if "구" in wiwName and "시" in wiwName:
return wiwName.split("시")[0] + "시"
else:
return wiwName


change_city_name = {
("충청남도", "당진군"): "당진시",
("경상남도", "마산시"): "창원시",
("경상남도", "진해시"): "창원시",
("경기도", "여주군"): "여주시",
("충청북도", "청원군"): "청주시",
("인천광역시", "남구"): "미추홀구",
}

#
change_lvl2to1 = {"연기군": "세종특별자치시"}
5 changes: 5 additions & 0 deletions analysis/age/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
"""
공공데이터포털 API로 수집한 데이터를 분석하기 위한 패키지입니다.
"""


class BasicArgument:
def __init__(self, **kwargs):
self.__dict__.update(kwargs)
Loading

0 comments on commit 537d797

Please sign in to comment.