From 6988f1813781e14e7f2528cb6f50b685bb83ae13 Mon Sep 17 00:00:00 2001 From: happycastle <41810556+happycastle114@users.noreply.github.com> Date: Sun, 1 Oct 2023 07:51:06 +0900 Subject: [PATCH 01/19] add: metro council type at types --- scrap/utils/types.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scrap/utils/types.py b/scrap/utils/types.py index 00a6bef..99c3559 100644 --- a/scrap/utils/types.py +++ b/scrap/utils/types.py @@ -14,6 +14,10 @@ class CouncilType(str, Enum): """ 기초의회 """ + METRO_COUNCIL = "metropolitan_council" + """ + 광역의회 + """ def __str__(self): """ JSON으로 직렬화하기 위해 문자열로 변환하는 함수를 오버라이드합니다. From 76cd5aa04aaa8f7dbbe393e2d508c691ffa97719 Mon Sep 17 00:00:00 2001 From: happycastle <41810556+happycastle114@users.noreply.github.com> Date: Sun, 1 Oct 2023 07:51:35 +0900 Subject: [PATCH 02/19] Refactor: Make folders for daejeon --- scrap/local_councils/__init__.py | 2 +- scrap/local_councils/{ => daejeon}/daejeon.py | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename scrap/local_councils/{ => daejeon}/daejeon.py (100%) diff --git a/scrap/local_councils/__init__.py b/scrap/local_councils/__init__.py index 6080943..e0d370c 100644 --- a/scrap/local_councils/__init__.py +++ b/scrap/local_councils/__init__.py @@ -2,5 +2,5 @@ 각 기초의회들의 크롤링 코드를 모아놓은 패키지입니다. 광역자치단체 별로 폴더를 만들어서 관리합니다. """ -from .daejeon import * +from .daejeon.daejeon import * from .ulsan import * \ No newline at end of file diff --git a/scrap/local_councils/daejeon.py b/scrap/local_councils/daejeon/daejeon.py similarity index 100% rename from scrap/local_councils/daejeon.py rename to scrap/local_councils/daejeon/daejeon.py From 6638ee0186b2cd890f3529d14ecc7756c16d3d99 Mon Sep 17 00:00:00 2001 From: happycastle <41810556+happycastle114@users.noreply.github.com> Date: Sun, 1 Oct 2023 07:58:47 +0900 Subject: [PATCH 03/19] Add: Andong --- .../local_councils/gyeongsangbuk/__init__.py | 4 +++ scrap/local_councils/gyeongsangbuk/andong.py | 35 +++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 scrap/local_councils/gyeongsangbuk/__init__.py create mode 100644 scrap/local_councils/gyeongsangbuk/andong.py diff --git a/scrap/local_councils/gyeongsangbuk/__init__.py b/scrap/local_councils/gyeongsangbuk/__init__.py new file mode 100644 index 0000000..e922545 --- /dev/null +++ b/scrap/local_councils/gyeongsangbuk/__init__.py @@ -0,0 +1,4 @@ +""" +경상북도 기초의회들의 크롤링 코드를 모아둔 사이트입니다. +""" +from .andong import scrap_andong \ No newline at end of file diff --git a/scrap/local_councils/gyeongsangbuk/andong.py b/scrap/local_councils/gyeongsangbuk/andong.py new file mode 100644 index 0000000..78b1e1b --- /dev/null +++ b/scrap/local_councils/gyeongsangbuk/andong.py @@ -0,0 +1,35 @@ +from urllib.parse import urlparse + +from typing import List +from scrap.utils.types import CouncilType, Councilor, ScrapResult +from scrap.utils.requests import get_soup +import re + +def scrap_andong(url = 'https://council.andong.go.kr/kr/member/name.do') -> ScrapResult: + '''대전시 동구 페이지에서 의원 상세약력 스크랩 + + :param url: 의원 목록 사이트 url + :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + ''' + + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + + for profile in soup.find_all('div', class_='profile'): + name_tag = profile.find("div", class_="name") + name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" + + party = "정당 정보 없음" + party_info = profile.find("em", string="소속정당") + if party_info: + party = party_info.find_next("span").get_text(strip=True) + councilors.append(Councilor(name=name, party=party)) + + return ScrapResult( + council_id="andong", + council_type=CouncilType.LOCAL_COUNCIL, + councilors=councilors + ) + +if __name__ == '__main__': + print(scrap_andong()) \ No newline at end of file From 3c7cda22758d163a6b2d298bb84d2fe63133c14c Mon Sep 17 00:00:00 2001 From: happycastle <41810556+happycastle114@users.noreply.github.com> Date: Sun, 1 Oct 2023 08:04:27 +0900 Subject: [PATCH 04/19] Add: Pohang Council --- scrap/local_councils/gyeongsangbuk/andong.py | 2 +- scrap/local_councils/gyeongsangbuk/pohang.py | 35 ++++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 scrap/local_councils/gyeongsangbuk/pohang.py diff --git a/scrap/local_councils/gyeongsangbuk/andong.py b/scrap/local_councils/gyeongsangbuk/andong.py index 78b1e1b..335af0a 100644 --- a/scrap/local_councils/gyeongsangbuk/andong.py +++ b/scrap/local_councils/gyeongsangbuk/andong.py @@ -16,7 +16,7 @@ def scrap_andong(url = 'https://council.andong.go.kr/kr/member/name.do') -> Scra councilors: List[Councilor] = [] for profile in soup.find_all('div', class_='profile'): - name_tag = profile.find("div", class_="name") + name_tag = profile.find("em", class_="name") name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" party = "정당 정보 없음" diff --git a/scrap/local_councils/gyeongsangbuk/pohang.py b/scrap/local_councils/gyeongsangbuk/pohang.py new file mode 100644 index 0000000..2d4a088 --- /dev/null +++ b/scrap/local_councils/gyeongsangbuk/pohang.py @@ -0,0 +1,35 @@ +from urllib.parse import urlparse + +from typing import List +from scrap.utils.types import CouncilType, Councilor, ScrapResult +from scrap.utils.requests import get_soup +import re + +def scrap_pohang(url = 'https://council.pohang.go.kr/content/member/memberName.html') -> ScrapResult: + '''대전시 동구 페이지에서 의원 상세약력 스크랩 + + :param url: 의원 목록 사이트 url + :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + ''' + + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + + for profile in soup.find_all('div', class_='profile'): + name_tag = profile.find("dd", class_="name") + name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" + + party = "정당 정보 없음" + party_info = profile.find("span", string="정") + if party_info: + party = party_info.find_next("span").get_text(strip=True) + councilors.append(Councilor(name=name, party=party)) + + return ScrapResult( + council_id="andong", + council_type=CouncilType.LOCAL_COUNCIL, + councilors=councilors + ) + +if __name__ == '__main__': + print(scrap_andong()) \ No newline at end of file From 8afb14ad959a0f68a7c99c239a603efd562e11f2 Mon Sep 17 00:00:00 2001 From: happycastle <41810556+happycastle114@users.noreply.github.com> Date: Sun, 1 Oct 2023 08:37:00 +0900 Subject: [PATCH 05/19] Add: Pohang and gyeongju --- main.py | 4 ++ scrap/local_councils/__init__.py | 3 +- .../local_councils/gyeongsangbuk/__init__.py | 4 +- scrap/local_councils/gyeongsangbuk/andong.py | 1 + .../local_councils/gyeongsangbuk/gyeongju.py | 37 +++++++++++++++++++ scrap/local_councils/gyeongsangbuk/pohang.py | 7 ++-- scrap/local_councils/ulsan.py | 2 +- 7 files changed, 52 insertions(+), 6 deletions(-) create mode 100644 main.py create mode 100644 scrap/local_councils/gyeongsangbuk/gyeongju.py diff --git a/main.py b/main.py new file mode 100644 index 0000000..fe397fc --- /dev/null +++ b/main.py @@ -0,0 +1,4 @@ +from scrap.local_councils.gyeongsangbuk import scrap_andong, scrap_pohang, scrap_gyeongju + +if __name__ == '__main__': + print(scrap_gyeongju()) \ No newline at end of file diff --git a/scrap/local_councils/__init__.py b/scrap/local_councils/__init__.py index e0d370c..54caaed 100644 --- a/scrap/local_councils/__init__.py +++ b/scrap/local_councils/__init__.py @@ -3,4 +3,5 @@ 광역자치단체 별로 폴더를 만들어서 관리합니다. """ from .daejeon.daejeon import * -from .ulsan import * \ No newline at end of file +from .ulsan import * +from .daejeon import * \ No newline at end of file diff --git a/scrap/local_councils/gyeongsangbuk/__init__.py b/scrap/local_councils/gyeongsangbuk/__init__.py index e922545..b4f027a 100644 --- a/scrap/local_councils/gyeongsangbuk/__init__.py +++ b/scrap/local_councils/gyeongsangbuk/__init__.py @@ -1,4 +1,6 @@ """ 경상북도 기초의회들의 크롤링 코드를 모아둔 사이트입니다. """ -from .andong import scrap_andong \ No newline at end of file +from .andong import scrap_andong +from .pohang import scrap_pohang +from .gyeongju import scrap_gyeongju \ No newline at end of file diff --git a/scrap/local_councils/gyeongsangbuk/andong.py b/scrap/local_councils/gyeongsangbuk/andong.py index 335af0a..f9f7f23 100644 --- a/scrap/local_councils/gyeongsangbuk/andong.py +++ b/scrap/local_councils/gyeongsangbuk/andong.py @@ -3,6 +3,7 @@ from typing import List from scrap.utils.types import CouncilType, Councilor, ScrapResult from scrap.utils.requests import get_soup + import re def scrap_andong(url = 'https://council.andong.go.kr/kr/member/name.do') -> ScrapResult: diff --git a/scrap/local_councils/gyeongsangbuk/gyeongju.py b/scrap/local_councils/gyeongsangbuk/gyeongju.py new file mode 100644 index 0000000..c8c4dc2 --- /dev/null +++ b/scrap/local_councils/gyeongsangbuk/gyeongju.py @@ -0,0 +1,37 @@ +from urllib.parse import urlparse + +from typing import List +from scrap.utils.types import CouncilType, Councilor, ScrapResult +from scrap.utils.requests import get_soup +import re +import requests + +def scrap_gyeongju(url = 'https://council.gyeongju.go.kr/kr/member/name.do') -> ScrapResult: + '''대전시 동구 페이지에서 의원 상세약력 스크랩 + + :param url: 의원 목록 사이트 url + :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + ''' + + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + + for profile in soup.find_all('div', class_="profile"): + data_uid = profile.find("a", class_="btn_profile")["data-uid"] + + if data_uid: + url = f"https://council.gyeongju.go.kr/common/async/member/{data_uid}.do" + result = requests.get(url).json() + name = result['name'] if result['name'] else "이름 정보 없음" + party = result['party_nm'] if result['party_nm'] else "정당 정보 없음" + + councilors.append(Councilor(name=name, party=party)) + + return ScrapResult( + council_id="gyeongju", + council_type=CouncilType.LOCAL_COUNCIL, + councilors=councilors + ) + +if __name__ == '__main__': + print(scrap_gyeongju()) \ No newline at end of file diff --git a/scrap/local_councils/gyeongsangbuk/pohang.py b/scrap/local_councils/gyeongsangbuk/pohang.py index 2d4a088..699df22 100644 --- a/scrap/local_councils/gyeongsangbuk/pohang.py +++ b/scrap/local_councils/gyeongsangbuk/pohang.py @@ -14,8 +14,9 @@ def scrap_pohang(url = 'https://council.pohang.go.kr/content/member/memberName.h soup = get_soup(url, verify=False) councilors: List[Councilor] = [] + mlist = soup.find_all('ul', class_='mlist')[0] - for profile in soup.find_all('div', class_='profile'): + for profile in mlist.find_all('li'): name_tag = profile.find("dd", class_="name") name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" @@ -26,10 +27,10 @@ def scrap_pohang(url = 'https://council.pohang.go.kr/content/member/memberName.h councilors.append(Councilor(name=name, party=party)) return ScrapResult( - council_id="andong", + council_id="pohang", council_type=CouncilType.LOCAL_COUNCIL, councilors=councilors ) if __name__ == '__main__': - print(scrap_andong()) \ No newline at end of file + print(scrap_pohang()) \ No newline at end of file diff --git a/scrap/local_councils/ulsan.py b/scrap/local_councils/ulsan.py index f2a2219..63daa5a 100644 --- a/scrap/local_councils/ulsan.py +++ b/scrap/local_councils/ulsan.py @@ -1,5 +1,5 @@ from urllib.parse import urlparse - +import sys from typing import List from scrap.utils.types import CouncilType, Councilor, ScrapResult from scrap.utils.requests import get_soup From e810c54c88433459a27e980f59c4021841239c20 Mon Sep 17 00:00:00 2001 From: happycastle <41810556+happycastle114@users.noreply.github.com> Date: Sun, 1 Oct 2023 08:53:28 +0900 Subject: [PATCH 06/19] Add: Add gimcheon --- .../local_councils/gyeongsangbuk/__init__.py | 3 +- .../local_councils/gyeongsangbuk/gimcheon.py | 36 +++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 scrap/local_councils/gyeongsangbuk/gimcheon.py diff --git a/scrap/local_councils/gyeongsangbuk/__init__.py b/scrap/local_councils/gyeongsangbuk/__init__.py index b4f027a..49f1e79 100644 --- a/scrap/local_councils/gyeongsangbuk/__init__.py +++ b/scrap/local_councils/gyeongsangbuk/__init__.py @@ -3,4 +3,5 @@ """ from .andong import scrap_andong from .pohang import scrap_pohang -from .gyeongju import scrap_gyeongju \ No newline at end of file +from .gyeongju import scrap_gyeongju +from .gimcheon import scrap_gimcheon \ No newline at end of file diff --git a/scrap/local_councils/gyeongsangbuk/gimcheon.py b/scrap/local_councils/gyeongsangbuk/gimcheon.py new file mode 100644 index 0000000..4f26f50 --- /dev/null +++ b/scrap/local_councils/gyeongsangbuk/gimcheon.py @@ -0,0 +1,36 @@ +from urllib.parse import urlparse + +from typing import List +from scrap.utils.types import CouncilType, Councilor, ScrapResult +from scrap.utils.requests import get_soup +import re + +def scrap_gimcheon(url = 'https://council.gc.go.kr/kr/member/active.do') -> ScrapResult: + '''김천시 페이지에서 의원 상세약력 스크랩 + + :param url: 의원 목록 사이트 url + :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + ''' + + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + mlist = soup.find_all('ul', class_='memberList')[0] + + for profile in mlist.find_all('li', recursive=False): + name_tag = profile.find("h4") + name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" + + party = "정당 정보 없음" + party_info = profile.find("span", string="소속정당 : ") + if party_info: + party = party_info.find_next("span").get_text(strip=True) + councilors.append(Councilor(name=name, party=party)) + + return ScrapResult( + council_id="gimcheon", + council_type=CouncilType.LOCAL_COUNCIL, + councilors=councilors + ) + +if __name__ == '__main__': + print(scrap_gimcheon()) \ No newline at end of file From 0ef0ccc3bceb5d474bf32b7dffd3ec86f4b2504c Mon Sep 17 00:00:00 2001 From: happycastle <41810556+happycastle114@users.noreply.github.com> Date: Sun, 1 Oct 2023 08:56:02 +0900 Subject: [PATCH 07/19] Add: Add Gumi Council --- scrap/local_councils/gyeongsangbuk/gumi.py | 36 ++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 scrap/local_councils/gyeongsangbuk/gumi.py diff --git a/scrap/local_councils/gyeongsangbuk/gumi.py b/scrap/local_councils/gyeongsangbuk/gumi.py new file mode 100644 index 0000000..d20f66d --- /dev/null +++ b/scrap/local_councils/gyeongsangbuk/gumi.py @@ -0,0 +1,36 @@ +from urllib.parse import urlparse + +from typing import List +from scrap.utils.types import CouncilType, Councilor, ScrapResult +from scrap.utils.requests import get_soup +import re + +def scrap_gumi(url = 'https://gumici.or.kr/content/member/memberName.html') -> ScrapResult: + '''대전시 동구 페이지에서 의원 상세약력 스크랩 + + :param url: 의원 목록 사이트 url + :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + ''' + + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + mlist = soup.find_all('ul', class_='mlist')[0] + + for profile in mlist.find_all('li'): + name_tag = profile.find("dd", class_="name") + name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" + + party = "정당 정보 없음" + party_info = profile.find("span", string="정") + if party_info: + party = party_info.find_next("span").get_text(strip=True) + councilors.append(Councilor(name=name, party=party)) + + return ScrapResult( + council_id="gumi", + council_type=CouncilType.LOCAL_COUNCIL, + councilors=councilors + ) + +if __name__ == '__main__': + print(scrap_gumi()) \ No newline at end of file From 4332445d6025ed9f0f1847302ceeaaf44dad7213 Mon Sep 17 00:00:00 2001 From: happycastle <41810556+happycastle114@users.noreply.github.com> Date: Sun, 1 Oct 2023 09:02:19 +0900 Subject: [PATCH 08/19] Add: add sangju --- scrap/local_councils/gyeongsangbuk/sangju.py | 36 ++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 scrap/local_councils/gyeongsangbuk/sangju.py diff --git a/scrap/local_councils/gyeongsangbuk/sangju.py b/scrap/local_councils/gyeongsangbuk/sangju.py new file mode 100644 index 0000000..6d0326a --- /dev/null +++ b/scrap/local_councils/gyeongsangbuk/sangju.py @@ -0,0 +1,36 @@ +from urllib.parse import urlparse + +from typing import List +from scrap.utils.types import CouncilType, Councilor, ScrapResult +from scrap.utils.requests import get_soup + +import re + +def scrap_sangju(url = 'https://www.sangjucouncil.go.kr/kr/member/name.do') -> ScrapResult: + '''대전시 동구 페이지에서 의원 상세약력 스크랩 + + :param url: 의원 목록 사이트 url + :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + ''' + + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + + for profile in soup.find_all('div', class_='profile'): + name_tag = profile.find("div", class_="name").find("strong") + name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" + + party = "정당 정보 없음" + party_info = profile.find("em", string="소속정당") + if party_info: + party = party_info.find_next("span").find_next("span").get_text(strip=True) + councilors.append(Councilor(name=name, party=party)) + + return ScrapResult( + council_id="sangju", + council_type=CouncilType.LOCAL_COUNCIL, + councilors=councilors + ) + +if __name__ == '__main__': + print(scrap_sangju()) \ No newline at end of file From 85e0d36712f28fcd33b3d60c72d51450778e0590 Mon Sep 17 00:00:00 2001 From: happycastle <41810556+happycastle114@users.noreply.github.com> Date: Sun, 1 Oct 2023 09:05:37 +0900 Subject: [PATCH 09/19] Add: moongyeong --- .../gyeongsangbuk/moongyeong.py | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 scrap/local_councils/gyeongsangbuk/moongyeong.py diff --git a/scrap/local_councils/gyeongsangbuk/moongyeong.py b/scrap/local_councils/gyeongsangbuk/moongyeong.py new file mode 100644 index 0000000..a231b05 --- /dev/null +++ b/scrap/local_councils/gyeongsangbuk/moongyeong.py @@ -0,0 +1,37 @@ +from urllib.parse import urlparse + +from typing import List +from scrap.utils.types import CouncilType, Councilor, ScrapResult +from scrap.utils.requests import get_soup +import re +import requests + +def scrap_moongyeong(url = 'https://council.gbmg.go.kr/kr/member/name.do') -> ScrapResult: + '''문경시 페이지에서 의원 상세약력 스크랩 + + :param url: 의원 목록 사이트 url + :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + ''' + + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + + for profile in soup.find_all('div', class_="profile"): + data_uid = profile.find("a", class_="btn_profile")["data-uid"] + + if data_uid: + url = f"https://council.gbmg.go.kr/common/async/member/{data_uid}.do" + result = requests.get(url).json() + name = result['name'] if result['name'] else "이름 정보 없음" + party = result['party_nm'] if result['party_nm'] else "정당 정보 없음" + + councilors.append(Councilor(name=name, party=party)) + + return ScrapResult( + council_id="moongyeong", + council_type=CouncilType.LOCAL_COUNCIL, + councilors=councilors + ) + +if __name__ == '__main__': + print(scrap_moongyeong()) \ No newline at end of file From ba6099234f93bd64c84fee3637cbb8f7d49746f5 Mon Sep 17 00:00:00 2001 From: happycastle <41810556+happycastle114@users.noreply.github.com> Date: Sun, 1 Oct 2023 09:11:46 +0900 Subject: [PATCH 10/19] Add: Yaecheon --- .../local_councils/gyeongsangbuk/__init__.py | 5 ++- .../local_councils/gyeongsangbuk/yaecheon.py | 38 +++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 scrap/local_councils/gyeongsangbuk/yaecheon.py diff --git a/scrap/local_councils/gyeongsangbuk/__init__.py b/scrap/local_councils/gyeongsangbuk/__init__.py index 49f1e79..2ae24c7 100644 --- a/scrap/local_councils/gyeongsangbuk/__init__.py +++ b/scrap/local_councils/gyeongsangbuk/__init__.py @@ -4,4 +4,7 @@ from .andong import scrap_andong from .pohang import scrap_pohang from .gyeongju import scrap_gyeongju -from .gimcheon import scrap_gimcheon \ No newline at end of file +from .gimcheon import scrap_gimcheon +from .sangju import scrap_sangju +from .moongyeong import scrap_moongyeong +from .yaecheon import scrap_yaecheon \ No newline at end of file diff --git a/scrap/local_councils/gyeongsangbuk/yaecheon.py b/scrap/local_councils/gyeongsangbuk/yaecheon.py new file mode 100644 index 0000000..8527ec0 --- /dev/null +++ b/scrap/local_councils/gyeongsangbuk/yaecheon.py @@ -0,0 +1,38 @@ +from urllib.parse import urlparse + +from typing import List +from scrap.utils.types import CouncilType, Councilor, ScrapResult +from scrap.utils.requests import get_soup +import requests + +import re + +def scrap_yaecheon(url = 'https://www.ycgcl.kr/kr/member/name.do') -> ScrapResult: + '''예천시 페이지에서 의원 상세약력 스크랩 + + :param url: 의원 목록 사이트 url + :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + ''' + + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + + for profile in soup.find_all('div', class_="profile"): + data_uid = profile.find("a", class_="btn_profile")["data-uid"] + + if data_uid: + url = f"https://www.ycgcl.kr/common/async/member/{data_uid}.do" + result = requests.get(url).json() + name = result['name'] if result['name'] else "이름 정보 없음" + party = result['party_nm'] if result['party_nm'] else "정당 정보 없음" + + councilors.append(Councilor(name=name, party=party)) + + return ScrapResult( + council_id="yaecheon", + council_type=CouncilType.LOCAL_COUNCIL, + councilors=councilors + ) + +if __name__ == '__main__': + print(scrap_yaecheon()) \ No newline at end of file From 17ba4ca9766026b96de38abadda4d9bdd13a3921 Mon Sep 17 00:00:00 2001 From: happycastle <41810556+happycastle114@users.noreply.github.com> Date: Wed, 8 Nov 2023 20:28:11 +0900 Subject: [PATCH 11/19] Add: More Councilors --- scrap/local_councils/junnam/danyang.py | 37 +++++++++++++++++++++++++ scrap/local_councils/junnam/gangjin.py | 37 +++++++++++++++++++++++++ scrap/local_councils/junnam/goheung.py | 37 +++++++++++++++++++++++++ scrap/local_councils/junnam/hapchun.py | 37 +++++++++++++++++++++++++ scrap/local_councils/junnam/henam.py | 36 ++++++++++++++++++++++++ scrap/local_councils/junnam/sanchung.py | 37 +++++++++++++++++++++++++ scrap/local_councils/junnam/wando.py | 31 +++++++++++++++++++++ test.py | 6 ++++ 8 files changed, 258 insertions(+) create mode 100644 scrap/local_councils/junnam/danyang.py create mode 100644 scrap/local_councils/junnam/gangjin.py create mode 100644 scrap/local_councils/junnam/goheung.py create mode 100644 scrap/local_councils/junnam/hapchun.py create mode 100644 scrap/local_councils/junnam/henam.py create mode 100644 scrap/local_councils/junnam/sanchung.py create mode 100644 scrap/local_councils/junnam/wando.py create mode 100644 test.py diff --git a/scrap/local_councils/junnam/danyang.py b/scrap/local_councils/junnam/danyang.py new file mode 100644 index 0000000..34e7a38 --- /dev/null +++ b/scrap/local_councils/junnam/danyang.py @@ -0,0 +1,37 @@ +from urllib.parse import urlparse + +from typing import List +from scrap.utils.types import CouncilType, Councilor, ScrapResult +from scrap.utils.requests import get_soup +import re + +def scrap_damyang(url = 'https://council.gc.go.kr/kr/member/active.do') -> ScrapResult: + '''담양군 페이지에서 의원 상세약력 스크랩 + + :param url: 의원 목록 사이트 url + :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + ''' + + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + mlist = soup.find_all('ul', class_='memlist')[0] + + for profile in mlist.find_all('li', recursive=False): + info = profile.find('ul', class_='info') + name = info.find("h5").get_text(strip=True) if info.find("h5").get_text(strip=True) else "이름 정보 없음" + + li = info.find("li", class_="item MP") + party = "정당 정보 없음" + party_dd = li.find_all("dd")[1] + if party_dd: + party = party_dd.get_text(strip=True) + councilors.append(Councilor(name=name, party=party)) + + return ScrapResult( + council_id="damyang", + council_type=CouncilType.LOCAL_COUNCIL, + councilors=councilors + ) + +if __name__ == '__main__': + print(scrap_damyang()) \ No newline at end of file diff --git a/scrap/local_councils/junnam/gangjin.py b/scrap/local_councils/junnam/gangjin.py new file mode 100644 index 0000000..b4719f3 --- /dev/null +++ b/scrap/local_councils/junnam/gangjin.py @@ -0,0 +1,37 @@ +from urllib.parse import urlparse + +from typing import List +from scrap.utils.types import CouncilType, Councilor, ScrapResult +from scrap.utils.requests import get_soup +import re + +def scrap_gangjin(url = 'https://www.gangjincl.go.kr/index.do?PID=010') -> ScrapResult: + '''강진군 페이지에서 의원 상세약력 스크랩 + + :param url: 의원 목록 사이트 url + :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + ''' + + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + mlist = soup.find_all('ul', class_='memlist')[0] + + for profile in mlist.find_all('li', recursive=False): + info = profile.find('ul', class_='info') + name = info.find("h5").get_text(strip=True) if info.find("h5").get_text(strip=True) else "이름 정보 없음" + + li = info.find_all("li", recursive=False)[6] + party = "정당 정보 없음" + party_dd = li.find("dd") + if party_dd: + party = party_dd.get_text(strip=True) + councilors.append(Councilor(name=name, party=party)) + + return ScrapResult( + council_id="damyang", + council_type=CouncilType.LOCAL_COUNCIL, + councilors=councilors + ) + +if __name__ == '__main__': + print(scrap_gangjin()) \ No newline at end of file diff --git a/scrap/local_councils/junnam/goheung.py b/scrap/local_councils/junnam/goheung.py new file mode 100644 index 0000000..4751184 --- /dev/null +++ b/scrap/local_councils/junnam/goheung.py @@ -0,0 +1,37 @@ +from urllib.parse import urlparse + +from typing import List +from scrap.utils.types import CouncilType, Councilor, ScrapResult +from scrap.utils.requests import get_soup +import re + +def scrap_goheung(url = 'https://council.gc.go.kr/kr/member/active.do') -> ScrapResult: + '''고흥군 페이지에서 의원 상세약력 스크랩 + + :param url: 의원 목록 사이트 url + :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + ''' + + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + mlist = soup.find_all('ul', class_='memlist')[0] + + for profile in mlist.find_all('li', recursive=False): + info = profile.find('ul', class_='info') + name = info.find("h5").get_text(strip=True) if info.find("h5").get_text(strip=True) else "이름 정보 없음" + + li = info.find("li", class_="item MP") + party = "정당 정보 없음" + party_dd = li.find_all("dd")[1] + if party_dd: + party = party_dd.get_text(strip=True) + councilors.append(Councilor(name=name, party=party)) + + return ScrapResult( + council_id="damyang", + council_type=CouncilType.LOCAL_COUNCIL, + councilors=councilors + ) + +if __name__ == '__main__': + print(scrap_damyang()) \ No newline at end of file diff --git a/scrap/local_councils/junnam/hapchun.py b/scrap/local_councils/junnam/hapchun.py new file mode 100644 index 0000000..9af2186 --- /dev/null +++ b/scrap/local_councils/junnam/hapchun.py @@ -0,0 +1,37 @@ +from urllib.parse import urlparse + +from typing import List +from scrap.utils.types import CouncilType, Councilor, ScrapResult +from scrap.utils.requests import get_soup +import requests + +def scrap_hapchun(url = 'https://www.hccl.go.kr/source/korean/member/active.jsp') -> ScrapResult: + '''합천군 페이지에서 의원 상세약력 스크랩 + + :param url: 의원 목록 사이트 url + :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + ''' + + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + mlist = soup.find_all('dl', class_='member') + + for profile in mlist: + info = profile.find_all('li') + if info: + name = info[0].get_text(strip=True) if info[0].get_text(strip=True) else "이름 정보 없음" + + party = "정당 정보 없음" + party_dd = info[3].get_text(strip=True).replace("소속정당 : ", "") + if party_dd: + party = party_dd + councilors.append(Councilor(name=name, party=party)) + + return ScrapResult( + council_id="hapchun", + council_type=CouncilType.LOCAL_COUNCIL, + councilors=councilors + ) + +if __name__ == '__main__': + print(scrap_hapchun()) \ No newline at end of file diff --git a/scrap/local_councils/junnam/henam.py b/scrap/local_councils/junnam/henam.py new file mode 100644 index 0000000..27011b1 --- /dev/null +++ b/scrap/local_councils/junnam/henam.py @@ -0,0 +1,36 @@ +from urllib.parse import urlparse + +from typing import List +from scrap.utils.types import CouncilType, Councilor, ScrapResult +from scrap.utils.requests import get_soup +import re + +def scrap_henam(url = 'http://council.haenam.go.kr/kr/member/active.do') -> ScrapResult: + '''해남 페이지에서 의원 상세약력 스크랩 + + :param url: 의원 목록 사이트 url + :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + ''' + + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + mlist = soup.find_all('ul', class_='memberList')[0] + + for profile in mlist.find_all('li', recursive=False): + name_tag = profile.find("h4") + name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" + + party = "정당 정보 없음" + party_info = profile.find("span", string="소속정당 : ") + if party_info: + party = party_info.find_next("span").get_text(strip=True) + councilors.append(Councilor(name=name, party=party)) + + return ScrapResult( + council_id="gimcheon", + council_type=CouncilType.LOCAL_COUNCIL, + councilors=councilors + ) + +if __name__ == '__main__': + print(scrap_henam()) \ No newline at end of file diff --git a/scrap/local_councils/junnam/sanchung.py b/scrap/local_councils/junnam/sanchung.py new file mode 100644 index 0000000..deeab16 --- /dev/null +++ b/scrap/local_councils/junnam/sanchung.py @@ -0,0 +1,37 @@ +from urllib.parse import urlparse + +from typing import List +from scrap.utils.types import CouncilType, Councilor, ScrapResult +from scrap.utils.requests import get_soup +import requests + +def scrap_sanchung(url = 'https://www.sancheong.go.kr/council/selectPersonalAssembly.do?key=2224&assemCate=8') -> ScrapResult: + '''산청군 페이지에서 의원 상세약력 스크랩 + + :param url: 의원 목록 사이트 url + :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + ''' + + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + mlist = soup.find('ul', class_='comment_list') + lis = mlist.find_all('li', recursive=False) + for profile in lis: + print(profile) + info = profile.find_all('li') + name = profile.find("span", class_="name").get_text(strip=True) if profile.find("span", class_="name").get_text(strip=True) else "이름 정보 없음" + party = "정당 정보 없음" + + party_dd = info[3].get_text(strip=True).replace("소속정당", "") + if party_dd: + party = party_dd + councilors.append(Councilor(name=name, party=party)) + + return ScrapResult( + council_id="hapchun", + council_type=CouncilType.LOCAL_COUNCIL, + councilors=councilors + ) + +if __name__ == '__main__': + print(scrap_sanchung()) \ No newline at end of file diff --git a/scrap/local_councils/junnam/wando.py b/scrap/local_councils/junnam/wando.py new file mode 100644 index 0000000..5dc3806 --- /dev/null +++ b/scrap/local_councils/junnam/wando.py @@ -0,0 +1,31 @@ +from urllib.parse import urlparse + +from typing import List +from scrap.utils.types import CouncilType, Councilor, ScrapResult +from scrap.utils.requests import get_soup +import requests + +def scrap_wando(url = 'http://www.wdcc.or.kr:8088/common/selectCouncilMemberList.json?searchCsDaesoo=9') -> ScrapResult: + '''완도군 페이지에서 의원 상세약력 스크랩 + + :param url: 의원 목록 사이트 url + :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + ''' + + councilors: List[Councilor] = [] + + result = requests.get(url) + result_json = result.json() + for profile in result_json['list']: + name = profile['cmNm'] + party = profile['mpParty'] + councilors.append(Councilor(name=name, party=party)) + + return ScrapResult( + council_id="wando", + council_type=CouncilType.LOCAL_COUNCIL, + councilors=councilors + ) + +if __name__ == '__main__': + print(scrap_wando()) \ No newline at end of file diff --git a/test.py b/test.py new file mode 100644 index 0000000..2a0f446 --- /dev/null +++ b/test.py @@ -0,0 +1,6 @@ +name = "한양" + +def change(a): + name = a +change("고려") +print(name) \ No newline at end of file From 22071a982c64ac8bd74011a52ca4fa05cd16d7d5 Mon Sep 17 00:00:00 2001 From: happycastle <41810556+happycastle114@users.noreply.github.com> Date: Mon, 13 Nov 2023 20:39:32 +0900 Subject: [PATCH 12/19] Add: Junnam --- scrap/local_councils/junnam/hamppyeong.py | 40 +++++++++++++++++++++++ scrap/local_councils/junnam/muan.py | 36 ++++++++++++++++++++ scrap/local_councils/junnam/yeonggwang.py | 36 ++++++++++++++++++++ 3 files changed, 112 insertions(+) create mode 100644 scrap/local_councils/junnam/hamppyeong.py create mode 100644 scrap/local_councils/junnam/muan.py create mode 100644 scrap/local_councils/junnam/yeonggwang.py diff --git a/scrap/local_councils/junnam/hamppyeong.py b/scrap/local_councils/junnam/hamppyeong.py new file mode 100644 index 0000000..741ee9d --- /dev/null +++ b/scrap/local_councils/junnam/hamppyeong.py @@ -0,0 +1,40 @@ +from urllib.parse import urlparse + +from typing import List +from scrap.utils.types import CouncilType, Councilor, ScrapResult +from scrap.utils.requests import get_soup +import re + +def scrap_hamppyeong(url = 'https://www.hpcouncil.go.kr/main/incumbentCouncillor.do?PID=0201&item=01') -> ScrapResult: + '''무안 페이지에서 의원 상세약력 스크랩 + + :param url: 의원 목록 사이트 url + :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + ''' + + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + mlist = soup.find_all('div', id='subContent')[0] + + total_div = mlist.find_all("div", class_="infosubcontent") + total_div.append(mlist.find_all("div", class_="infosubcontent2")) + for profile in total_div: + if not profile: + continue + info = profile.find('div', class_='infosub_detail') + name = info.find("li", class_="infosubmem_name" ).get_text(strip=False)[:3] if info.find("li", class_="infosubmem_name" ).get_text(strip=True) else "이름 정보 없음" + + party_dd = info.find("ul", class_="infosub").find_all('li')[1] + party = "정당 정보 없음" + if party_dd: + party = party_dd.get_text(strip=True).replace("소속정당 : ", "") + councilors.append(Councilor(name=name, party=party)) + + return ScrapResult( + council_id="yeonggwang", + council_type=CouncilType.LOCAL_COUNCIL, + councilors=councilors + ) + +if __name__ == '__main__': + print(scrap_hamppyeong()) \ No newline at end of file diff --git a/scrap/local_councils/junnam/muan.py b/scrap/local_councils/junnam/muan.py new file mode 100644 index 0000000..9684f4d --- /dev/null +++ b/scrap/local_councils/junnam/muan.py @@ -0,0 +1,36 @@ +from urllib.parse import urlparse + +from typing import List +from scrap.utils.types import CouncilType, Councilor, ScrapResult +from scrap.utils.requests import get_soup +import re + +def scrap_muan(url = 'http://www.muan.or.kr/main/incumbentCouncillor.do?PID=0201') -> ScrapResult: + '''무안 페이지에서 의원 상세약력 스크랩 + + :param url: 의원 목록 사이트 url + :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + ''' + + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + mlist = soup.find_all('ul', class_='formerCouncillor')[0] + + for profile in mlist.find_all('li', recursive=False): + info = profile.find('div', class_='profileInfo') + name = info.find("div", class_="infosubmem_name").get_text(strip=True) if info.find("div", class_="infosubmem_name").get_text(strip=True) else "이름 정보 없음" + + party_dd = info.find("div", class_="infoContents") + party = "정당 정보 없음" + if party_dd: + party = party_dd.get_text(strip=True) + councilors.append(Councilor(name=name, party=party)) + + return ScrapResult( + council_id="muan", + council_type=CouncilType.LOCAL_COUNCIL, + councilors=councilors + ) + +if __name__ == '__main__': + print(scrap_muan()) \ No newline at end of file diff --git a/scrap/local_councils/junnam/yeonggwang.py b/scrap/local_councils/junnam/yeonggwang.py new file mode 100644 index 0000000..efe39f4 --- /dev/null +++ b/scrap/local_councils/junnam/yeonggwang.py @@ -0,0 +1,36 @@ +from urllib.parse import urlparse + +from typing import List +from scrap.utils.types import CouncilType, Councilor, ScrapResult +from scrap.utils.requests import get_soup +import re + +def scrap_yeonggwang(url = 'https://www.ygcouncil.go.kr/bbs/content.php?co_id=councilors_curr#aside') -> ScrapResult: + '''무안 페이지에서 의원 상세약력 스크랩 + + :param url: 의원 목록 사이트 url + :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + ''' + + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + mlist = soup.find_all('div', class_='councilors_curr2_wrap')[0] + + for profile in mlist.find_all('div',class_="subcon_body_txt", recursive=False): + info = profile.find('div', class_='ygmember_txt') + name = info.find("h4").get_text(strip=True).split(" ")[0] if info.find("h4").get_text(strip=True) else "이름 정보 없음" + + party_dd = info.find("p", class_="party_highlight") + party = "정당 정보 없음" + if party_dd: + party = party_dd.get_text(strip=True).replace("정당 : ", "") + councilors.append(Councilor(name=name, party=party)) + + return ScrapResult( + council_id="yeonggwang", + council_type=CouncilType.LOCAL_COUNCIL, + councilors=councilors + ) + +if __name__ == '__main__': + print(scrap_yeonggwang()) \ No newline at end of file From 00910d10aa60adfab4e0ab45944732d88cc62c4f Mon Sep 17 00:00:00 2001 From: happycastle <41810556+happycastle114@users.noreply.github.com> Date: Mon, 13 Nov 2023 20:52:26 +0900 Subject: [PATCH 13/19] Add: Gyeongsangnam --- .../local_councils/gyeongsangnam/changwon.py | 36 +++++++++++++++++++ scrap/local_councils/gyeongsangnam/goseong.py | 36 +++++++++++++++++++ scrap/local_councils/gyeongsangnam/jinju.py | 36 +++++++++++++++++++ scrap/local_councils/gyeongsangnam/sacheon.py | 36 +++++++++++++++++++ 4 files changed, 144 insertions(+) create mode 100644 scrap/local_councils/gyeongsangnam/changwon.py create mode 100644 scrap/local_councils/gyeongsangnam/goseong.py create mode 100644 scrap/local_councils/gyeongsangnam/jinju.py create mode 100644 scrap/local_councils/gyeongsangnam/sacheon.py diff --git a/scrap/local_councils/gyeongsangnam/changwon.py b/scrap/local_councils/gyeongsangnam/changwon.py new file mode 100644 index 0000000..4d6f812 --- /dev/null +++ b/scrap/local_councils/gyeongsangnam/changwon.py @@ -0,0 +1,36 @@ +from urllib.parse import urlparse + +from typing import List +from scrap.utils.types import CouncilType, Councilor, ScrapResult +from scrap.utils.requests import get_soup +import re + +def scrap_changwon(url = 'https://gumici.or.kr/content/member/memberName.html') -> ScrapResult: + '''대전시 동구 페이지에서 의원 상세약력 스크랩 + + :param url: 의원 목록 사이트 url + :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + ''' + + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + mlist = soup.find_all('ul', class_='mlist')[0] + + for profile in mlist.find_all('li'): + name_tag = profile.find("dd", class_="name") + name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" + + party = "정당 정보 없음" + party_info = profile.find("span", string="정") + if party_info: + party = party_info.find_next("span").get_text(strip=True) + councilors.append(Councilor(name=name, party=party)) + + return ScrapResult( + council_id="changwon", + council_type=CouncilType.LOCAL_COUNCIL, + councilors=councilors + ) + +if __name__ == '__main__': + print(scrap_changwon()) \ No newline at end of file diff --git a/scrap/local_councils/gyeongsangnam/goseong.py b/scrap/local_councils/gyeongsangnam/goseong.py new file mode 100644 index 0000000..4b2081b --- /dev/null +++ b/scrap/local_councils/gyeongsangnam/goseong.py @@ -0,0 +1,36 @@ +from typing import List +from scrap.utils.types import CouncilType, Councilor, ScrapResult +from scrap.utils.requests import get_soup + + +def scrap_goseong(url="https://council.goseong.go.kr/kr/member/active.do") -> ScrapResult: + """ + Scrap councilors’ details from Yongsan-gu District Council of Seoul page. + + :param url: Yongsan-gu District Council members' list site url + :return: Councilors’ name and party data in ScrapResult object + """ + + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + + for profile in soup.find_all("div", class_="profile"): + name_tag = profile.find("em", class_="name") + name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" + + party = "정당 정보 없음" + party_info = profile.find("em", string="소속정당 : ") + if party_info: + party = party_info.find_next("span").get_text(strip=True) + + councilors.append(Councilor(name=name, party=party)) + + return ScrapResult( + council_id="goseong", + council_type=CouncilType.LOCAL_COUNCIL, + councilors=councilors, + ) + + +if __name__ == "__main__": + print(scrap_goseong()) diff --git a/scrap/local_councils/gyeongsangnam/jinju.py b/scrap/local_councils/gyeongsangnam/jinju.py new file mode 100644 index 0000000..c55f0f9 --- /dev/null +++ b/scrap/local_councils/gyeongsangnam/jinju.py @@ -0,0 +1,36 @@ +from urllib.parse import urlparse + +from typing import List +from scrap.utils.types import CouncilType, Councilor, ScrapResult +from scrap.utils.requests import get_soup + +import re + +def scrap_jinju(url = 'https://www.jinjucl.com/kr/member/name.do') -> ScrapResult: + '''대전시 동구 페이지에서 의원 상세약력 스크랩 + + :param url: 의원 목록 사이트 url + :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + ''' + + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + + for profile in soup.find_all('div', class_='profile'): + name_tag = profile.find("div", class_="name").find("strong") + name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" + + party = "정당 정보 없음" + party_info = profile.find("em", string="소속정당") + if party_info: + party = party_info.find_next("span").find_next("span").get_text(strip=True) + councilors.append(Councilor(name=name, party=party)) + + return ScrapResult( + council_id="jinju", + council_type=CouncilType.LOCAL_COUNCIL, + councilors=councilors + ) + +if __name__ == '__main__': + print(scrap_jinju()) \ No newline at end of file diff --git a/scrap/local_councils/gyeongsangnam/sacheon.py b/scrap/local_councils/gyeongsangnam/sacheon.py new file mode 100644 index 0000000..c0807bc --- /dev/null +++ b/scrap/local_councils/gyeongsangnam/sacheon.py @@ -0,0 +1,36 @@ +from typing import List +from scrap.utils.types import CouncilType, Councilor, ScrapResult +from scrap.utils.requests import get_soup + + +def scrap_sacheon(url="https://council.sacheon.go.kr/kr/member/active.do") -> ScrapResult: + """ + Scrap councilors’ details from Yongsan-gu District Council of Seoul page. + + :param url: Yongsan-gu District Council members' list site url + :return: Councilors’ name and party data in ScrapResult object + """ + + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + + for profile in soup.find_all("div", class_="profile"): + name_tag = profile.find("em", class_="name") + name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" + + party = "정당 정보 없음" + party_info = profile.find("em", string="소속정당 : ") + if party_info: + party = party_info.find_next("span").get_text(strip=True) + + councilors.append(Councilor(name=name, party=party)) + + return ScrapResult( + council_id="sacheon", + council_type=CouncilType.LOCAL_COUNCIL, + councilors=councilors, + ) + + +if __name__ == "__main__": + print(scrap_sacheon()) From 5e779022f0bd955859ca407ca774e2bcc07df1ea Mon Sep 17 00:00:00 2001 From: happycastle <41810556+happycastle114@users.noreply.github.com> Date: Wed, 15 Nov 2023 03:31:49 +0900 Subject: [PATCH 14/19] Add: More councilors --- .../gyeongsangnam/changnyeong.py | 37 +++++++++++++++++++ scrap/local_councils/gyeongsangnam/geoje.py | 37 +++++++++++++++++++ scrap/local_councils/gyeongsangnam/hanam.py | 36 ++++++++++++++++++ .../{junnam => gyeongsangnam}/hapchun.py | 0 scrap/local_councils/gyeongsangnam/miryang.py | 35 ++++++++++++++++++ .../{junnam => gyeongsangnam}/sanchung.py | 0 .../local_councils/gyeongsangnam/uiryeong.py | 37 +++++++++++++++++++ scrap/local_councils/gyeongsangnam/yangsan.py | 34 +++++++++++++++++ 8 files changed, 216 insertions(+) create mode 100644 scrap/local_councils/gyeongsangnam/changnyeong.py create mode 100644 scrap/local_councils/gyeongsangnam/geoje.py create mode 100644 scrap/local_councils/gyeongsangnam/hanam.py rename scrap/local_councils/{junnam => gyeongsangnam}/hapchun.py (100%) create mode 100644 scrap/local_councils/gyeongsangnam/miryang.py rename scrap/local_councils/{junnam => gyeongsangnam}/sanchung.py (100%) create mode 100644 scrap/local_councils/gyeongsangnam/uiryeong.py create mode 100644 scrap/local_councils/gyeongsangnam/yangsan.py diff --git a/scrap/local_councils/gyeongsangnam/changnyeong.py b/scrap/local_councils/gyeongsangnam/changnyeong.py new file mode 100644 index 0000000..d7e1761 --- /dev/null +++ b/scrap/local_councils/gyeongsangnam/changnyeong.py @@ -0,0 +1,37 @@ +from urllib.parse import urlparse + +from typing import List +from scrap.utils.types import CouncilType, Councilor, ScrapResult +from scrap.utils.requests import get_soup +import requests + +def scrap_changnyeong(url = 'https://www.cngc.go.kr/kr/member/active') -> ScrapResult: + '''창녕군 페이지에서 의원 상세약력 스크랩 + + :param url: 의원 목록 사이트 url + :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + ''' + + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + mlist = soup.find_all('div', class_='card_area') + + for profile in mlist: + info = profile.find_all('li') + if info: + name = profile.find("dt").get_text(strip=True).split("(")[0] if profile.find("dt").get_text(strip=True) else "이름 정보 없음" + + party = "정당 정보 없음" + party_dd = info[2].get_text(strip=True).replace("정 당 :", "") + if party_dd: + party = party_dd + councilors.append(Councilor(name=name, party=party)) + + return ScrapResult( + council_id="geoje", + council_type=CouncilType.LOCAL_COUNCIL, + councilors=councilors + ) + +if __name__ == '__main__': + print(scrap_changnyeong()) \ No newline at end of file diff --git a/scrap/local_councils/gyeongsangnam/geoje.py b/scrap/local_councils/gyeongsangnam/geoje.py new file mode 100644 index 0000000..e1cd85e --- /dev/null +++ b/scrap/local_councils/gyeongsangnam/geoje.py @@ -0,0 +1,37 @@ +from urllib.parse import urlparse + +from typing import List +from scrap.utils.types import CouncilType, Councilor, ScrapResult +from scrap.utils.requests import get_soup +import requests + +def scrap_geoje(url = 'https://www.gjcl.go.kr/source/korean/member/active.html') -> ScrapResult: + '''거제시 페이지에서 의원 상세약력 스크랩 + + :param url: 의원 목록 사이트 url + :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + ''' + + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + mlist = soup.find_all('dl') + + for profile in mlist: + info = profile.find_all('li') + if info: + name = profile.find("dt").get_text(strip=True) if profile.find("dt").get_text(strip=True) else "이름 정보 없음" + + party = "정당 정보 없음" + party_dd = info[2].get_text(strip=True).replace("정당 :", "") + if party_dd: + party = party_dd + councilors.append(Councilor(name=name, party=party)) + + return ScrapResult( + council_id="geoje", + council_type=CouncilType.LOCAL_COUNCIL, + councilors=councilors + ) + +if __name__ == '__main__': + print(scrap_geoje()) \ No newline at end of file diff --git a/scrap/local_councils/gyeongsangnam/hanam.py b/scrap/local_councils/gyeongsangnam/hanam.py new file mode 100644 index 0000000..5e7be5a --- /dev/null +++ b/scrap/local_councils/gyeongsangnam/hanam.py @@ -0,0 +1,36 @@ +from urllib.parse import urlparse + +from typing import List +from scrap.utils.types import CouncilType, Councilor, ScrapResult +from scrap.utils.requests import get_soup +import requests + +def scrap_hanam(url = 'https://www.haman.go.kr/04646/04669.web') -> ScrapResult: + '''합천군 페이지에서 의원 상세약력 스크랩 + + :param url: 의원 목록 사이트 url + :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + ''' + + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + mlist = soup.find_all('div', class_='column') + + for profile in mlist: + name = profile.find("h2").get_text(strip=True).split("\n")[0] if profile.find("h2").get_text(strip=True) else "이름 정보 없음" + info = profile.find_all('li') + if info: + party = "정당 정보 없음" + party_dd = info[2].get_text(strip=True).replace("정당", "") + if party_dd: + party = party_dd + councilors.append(Councilor(name=name, party=party)) + + return ScrapResult( + council_id="hanam", + council_type=CouncilType.LOCAL_COUNCIL, + councilors=councilors + ) + +if __name__ == '__main__': + print(scrap_hanam()) \ No newline at end of file diff --git a/scrap/local_councils/junnam/hapchun.py b/scrap/local_councils/gyeongsangnam/hapchun.py similarity index 100% rename from scrap/local_councils/junnam/hapchun.py rename to scrap/local_councils/gyeongsangnam/hapchun.py diff --git a/scrap/local_councils/gyeongsangnam/miryang.py b/scrap/local_councils/gyeongsangnam/miryang.py new file mode 100644 index 0000000..a59d5f8 --- /dev/null +++ b/scrap/local_councils/gyeongsangnam/miryang.py @@ -0,0 +1,35 @@ +from urllib.parse import urlparse + +from typing import List +from scrap.utils.types import CouncilType, Councilor, ScrapResult +from scrap.utils.requests import get_soup + +def scrap_miryang(url = 'https://council.miryang.go.kr/web/EgovCouncilManList.do?menuNo=14010100') -> ScrapResult: + '''밀양시 의회 페이지에서 의원 상세약력 스크랩 + + :param url: 의원 목록 사이트 url + :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + ''' + + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + + + for profile in soup.find_all('div', class_="council_box"): + name_tag = profile.find("span", string="이름").find_next("span").get_text(strip=True) + name = name_tag if name_tag else "이름 정보 없음" + + party = "정당 정보 없음" + party_info = profile.find("span", string="소속정당").find_next("span").get_text(strip=True) + if party_info: + party = party_info + councilors.append(Councilor(name=name, party=party)) + + return ScrapResult( + council_id="miryang", + council_type=CouncilType.LOCAL_COUNCIL, + councilors=councilors + ) + +if __name__ == '__main__': + print(scrap_miryang()) \ No newline at end of file diff --git a/scrap/local_councils/junnam/sanchung.py b/scrap/local_councils/gyeongsangnam/sanchung.py similarity index 100% rename from scrap/local_councils/junnam/sanchung.py rename to scrap/local_councils/gyeongsangnam/sanchung.py diff --git a/scrap/local_councils/gyeongsangnam/uiryeong.py b/scrap/local_councils/gyeongsangnam/uiryeong.py new file mode 100644 index 0000000..71bd42c --- /dev/null +++ b/scrap/local_councils/gyeongsangnam/uiryeong.py @@ -0,0 +1,37 @@ +from typing import List +from scrap.utils.types import CouncilType, Councilor, ScrapResult +from scrap.utils.requests import get_soup + + +def scrap_uiryeong(url="https://www.uiryeong.go.kr/board/list.uiryeong?boardId=BBS_0000169&menuCd=DOM_000000502001000000&contentsSid=1040") -> ScrapResult: + """ + Scrap councilors’ details from Yongsan-gu District Council of Seoul page. + + :param url: Yongsan-gu District Council members' list site url + :return: Councilors’ name and party data in ScrapResult object + """ + + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + + for profile in soup.find_all("li", class_="assemList"): + name_tag = profile.find("p", class_="assemName") + name = name_tag.get_text(strip=True).split(" ")[0] if name_tag else "이름 정보 없음" + + party = "정당 정보 없음" + party_info = profile.find("ul", class_="assemCate") + party_info = party_info.find("li") + if party_info: + party = party_info.get_text(strip=True) + + councilors.append(Councilor(name=name, party=party)) + + return ScrapResult( + council_id="goseong", + council_type=CouncilType.LOCAL_COUNCIL, + councilors=councilors, + ) + + +if __name__ == "__main__": + print(scrap_uiryeong()) diff --git a/scrap/local_councils/gyeongsangnam/yangsan.py b/scrap/local_councils/gyeongsangnam/yangsan.py new file mode 100644 index 0000000..829d5fa --- /dev/null +++ b/scrap/local_councils/gyeongsangnam/yangsan.py @@ -0,0 +1,34 @@ +from urllib.parse import urlparse + +from typing import List +from scrap.utils.types import CouncilType, Councilor, ScrapResult +from scrap.utils.requests import get_soup +import re + +def scrap_yangsan(url = 'https://www.yscouncil.go.kr/kr/member/active') -> ScrapResult: + '''양산시 페이지에서 의원 상세약력 스크랩 + + :param url: 의원 목록 사이트 url + :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + ''' + + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + for profile in soup.find_all('div', class_="member"): + name_tag = profile.find("strong", class_="name") + name = name_tag.get_text(strip=True).split("(")[0] if name_tag else "이름 정보 없음" + + party = "정당 정보 없음" + party_info = profile.find("strong", string="정 당 : ") + if party_info: + party = party_info.find_next("span").get_text(strip=True) + councilors.append(Councilor(name=name, party=party)) + + return ScrapResult( + council_id="yangsan", + council_type=CouncilType.LOCAL_COUNCIL, + councilors=councilors + ) + +if __name__ == '__main__': + print(scrap_yangsan()) \ No newline at end of file From ecd686cd9dc0026335cfd23c62d26ec0dcb06638 Mon Sep 17 00:00:00 2001 From: happycastle <41810556+happycastle114@users.noreply.github.com> Date: Wed, 15 Nov 2023 11:17:55 +0900 Subject: [PATCH 15/19] Add: More --- scrap/local_councils/gyeongsangnam/gimhae.py | 37 +++++++++++++++++++ scrap/local_councils/gyeongsangnam/hamyang.py | 36 ++++++++++++++++++ scrap/local_councils/gyeongsangnam/namhae.py | 34 +++++++++++++++++ 3 files changed, 107 insertions(+) create mode 100644 scrap/local_councils/gyeongsangnam/gimhae.py create mode 100644 scrap/local_councils/gyeongsangnam/hamyang.py create mode 100644 scrap/local_councils/gyeongsangnam/namhae.py diff --git a/scrap/local_councils/gyeongsangnam/gimhae.py b/scrap/local_councils/gyeongsangnam/gimhae.py new file mode 100644 index 0000000..d14294d --- /dev/null +++ b/scrap/local_councils/gyeongsangnam/gimhae.py @@ -0,0 +1,37 @@ +from urllib.parse import urlparse + +from typing import List +from scrap.utils.types import CouncilType, Councilor, ScrapResult +from scrap.utils.requests import get_soup +import requests + +def scrap_gimhae(url = 'https://council.gimhae.go.kr/kr/member/active') -> ScrapResult: + '''창녕군 페이지에서 의원 상세약력 스크랩 + + :param url: 의원 목록 사이트 url + :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + ''' + + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + mlist = soup.find_all('div', class_='card_area') + + for profile in mlist: + info = profile.find_all('li') + if info: + name = profile.find("dt").get_text(strip=True).split("(")[0] if profile.find("dt").get_text(strip=True) else "이름 정보 없음" + + party = "정당 정보 없음" + party_dd = info[2].get_text(strip=True).replace("정 당 :", "") + if party_dd: + party = party_dd + councilors.append(Councilor(name=name, party=party)) + + return ScrapResult( + council_id="gimhae", + council_type=CouncilType.LOCAL_COUNCIL, + councilors=councilors + ) + +if __name__ == '__main__': + print(scrap_gimhae()) \ No newline at end of file diff --git a/scrap/local_councils/gyeongsangnam/hamyang.py b/scrap/local_councils/gyeongsangnam/hamyang.py new file mode 100644 index 0000000..c980973 --- /dev/null +++ b/scrap/local_councils/gyeongsangnam/hamyang.py @@ -0,0 +1,36 @@ +from typing import List +from scrap.utils.types import CouncilType, Councilor, ScrapResult +from scrap.utils.requests import get_soup + + +def scrap_hamyang(url="https://council.hygn.go.kr/kr/member/active.do") -> ScrapResult: + """ + Scrap councilors’ details from Yongsan-gu District Council of Seoul page. + + :param url: Yongsan-gu District Council members' list site url + :return: Councilors’ name and party data in ScrapResult object + """ + + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + + for profile in soup.find_all("div", class_="profile"): + name_tag = profile.find("em", class_="name") + name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" + + party = "정당 정보 없음" + party_info = profile.find("em", string="소속정당 : ") + if party_info: + party = party_info.find_next("span").get_text(strip=True) + + councilors.append(Councilor(name=name, party=party)) + + return ScrapResult( + council_id="hamyang", + council_type=CouncilType.LOCAL_COUNCIL, + councilors=councilors, + ) + + +if __name__ == "__main__": + print(scrap_hamyang()) diff --git a/scrap/local_councils/gyeongsangnam/namhae.py b/scrap/local_councils/gyeongsangnam/namhae.py new file mode 100644 index 0000000..526469b --- /dev/null +++ b/scrap/local_councils/gyeongsangnam/namhae.py @@ -0,0 +1,34 @@ +from urllib.parse import urlparse + +from typing import List +from scrap.utils.types import CouncilType, Councilor, ScrapResult +from scrap.utils.requests import get_soup +import re + +def scrap_namhae(url = 'https://council.namhae.go.kr/source/korean/member/active.html') -> ScrapResult: + '''남해 페이지에서 의원 상세약력 스크랩 + + :param url: 의원 목록 사이트 url + :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + ''' + + soup = get_soup(url, verify=False, encoding="euc-kr") + councilors: List[Councilor] = [] + for profile in soup.find_all('div', class_="profile"): + name_tag = profile.find("li", class_="name") + name = name_tag.get_text(strip=True).split("(")[0] if name_tag else "이름 정보 없음" + + party = "정당 정보 없음" + party_info = profile.find_all("li")[3] + if party_info: + party = party_info.get_text(strip=True).replace("소속정당 : ", "") + councilors.append(Councilor(name=name, party=party)) + + return ScrapResult( + council_id="yangsan", + council_type=CouncilType.LOCAL_COUNCIL, + councilors=councilors + ) + +if __name__ == '__main__': + print(scrap_namhae()) \ No newline at end of file From 46a674466ca19e1bcc01ed7a12334ea54c35f965 Mon Sep 17 00:00:00 2001 From: happycastle114 Date: Wed, 15 Nov 2023 02:20:16 +0000 Subject: [PATCH 16/19] Formatted with black --- main.py | 10 ++++-- .../local_councils/gyeongsangbuk/__init__.py | 2 +- scrap/local_councils/gyeongsangbuk/andong.py | 18 ++++++----- .../local_councils/gyeongsangbuk/gimcheon.py | 20 ++++++------ scrap/local_councils/gyeongsangbuk/gumi.py | 24 +++++++------- .../local_councils/gyeongsangbuk/gyeongju.py | 26 ++++++++------- .../gyeongsangbuk/moongyeong.py | 24 +++++++------- scrap/local_councils/gyeongsangbuk/pohang.py | 22 +++++++------ scrap/local_councils/gyeongsangbuk/sangju.py | 20 +++++++----- .../local_councils/gyeongsangbuk/yaecheon.py | 24 +++++++------- .../gyeongsangnam/changnyeong.py | 26 +++++++++------ .../local_councils/gyeongsangnam/changwon.py | 22 +++++++------ scrap/local_councils/gyeongsangnam/geoje.py | 28 ++++++++++------ scrap/local_councils/gyeongsangnam/gimhae.py | 26 +++++++++------ scrap/local_councils/gyeongsangnam/goseong.py | 4 ++- scrap/local_councils/gyeongsangnam/hanam.py | 26 +++++++++------ scrap/local_councils/gyeongsangnam/hapchun.py | 28 ++++++++++------ scrap/local_councils/gyeongsangnam/jinju.py | 18 ++++++----- scrap/local_councils/gyeongsangnam/miryang.py | 29 ++++++++++------- scrap/local_councils/gyeongsangnam/namhae.py | 20 +++++++----- scrap/local_councils/gyeongsangnam/sacheon.py | 4 ++- .../local_councils/gyeongsangnam/sanchung.py | 32 ++++++++++++------- .../local_councils/gyeongsangnam/uiryeong.py | 4 ++- scrap/local_councils/gyeongsangnam/yangsan.py | 18 ++++++----- scrap/local_councils/junnam/danyang.py | 28 +++++++++------- scrap/local_councils/junnam/gangjin.py | 28 +++++++++------- scrap/local_councils/junnam/goheung.py | 28 +++++++++------- scrap/local_councils/junnam/hamppyeong.py | 30 ++++++++++------- scrap/local_councils/junnam/henam.py | 20 ++++++------ scrap/local_councils/junnam/muan.py | 32 +++++++++++-------- scrap/local_councils/junnam/wando.py | 22 +++++++------ scrap/local_councils/junnam/yeonggwang.py | 30 ++++++++++------- scrap/utils/types.py | 8 ++++- test.py | 5 ++- 34 files changed, 427 insertions(+), 279 deletions(-) diff --git a/main.py b/main.py index fe397fc..0fff00a 100644 --- a/main.py +++ b/main.py @@ -1,4 +1,8 @@ -from scrap.local_councils.gyeongsangbuk import scrap_andong, scrap_pohang, scrap_gyeongju +from scrap.local_councils.gyeongsangbuk import ( + scrap_andong, + scrap_pohang, + scrap_gyeongju, +) -if __name__ == '__main__': - print(scrap_gyeongju()) \ No newline at end of file +if __name__ == "__main__": + print(scrap_gyeongju()) diff --git a/scrap/local_councils/gyeongsangbuk/__init__.py b/scrap/local_councils/gyeongsangbuk/__init__.py index 2ae24c7..9204778 100644 --- a/scrap/local_councils/gyeongsangbuk/__init__.py +++ b/scrap/local_councils/gyeongsangbuk/__init__.py @@ -7,4 +7,4 @@ from .gimcheon import scrap_gimcheon from .sangju import scrap_sangju from .moongyeong import scrap_moongyeong -from .yaecheon import scrap_yaecheon \ No newline at end of file +from .yaecheon import scrap_yaecheon diff --git a/scrap/local_councils/gyeongsangbuk/andong.py b/scrap/local_councils/gyeongsangbuk/andong.py index f9f7f23..3a1601d 100644 --- a/scrap/local_councils/gyeongsangbuk/andong.py +++ b/scrap/local_councils/gyeongsangbuk/andong.py @@ -6,17 +6,18 @@ import re -def scrap_andong(url = 'https://council.andong.go.kr/kr/member/name.do') -> ScrapResult: - '''대전시 동구 페이지에서 의원 상세약력 스크랩 + +def scrap_andong(url="https://council.andong.go.kr/kr/member/name.do") -> ScrapResult: + """대전시 동구 페이지에서 의원 상세약력 스크랩 :param url: 의원 목록 사이트 url :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - ''' - + """ + soup = get_soup(url, verify=False) councilors: List[Councilor] = [] - for profile in soup.find_all('div', class_='profile'): + for profile in soup.find_all("div", class_="profile"): name_tag = profile.find("em", class_="name") name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" @@ -29,8 +30,9 @@ def scrap_andong(url = 'https://council.andong.go.kr/kr/member/name.do') -> Scra return ScrapResult( council_id="andong", council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors + councilors=councilors, ) -if __name__ == '__main__': - print(scrap_andong()) \ No newline at end of file + +if __name__ == "__main__": + print(scrap_andong()) diff --git a/scrap/local_councils/gyeongsangbuk/gimcheon.py b/scrap/local_councils/gyeongsangbuk/gimcheon.py index 4f26f50..13a0cc1 100644 --- a/scrap/local_councils/gyeongsangbuk/gimcheon.py +++ b/scrap/local_councils/gyeongsangbuk/gimcheon.py @@ -5,18 +5,19 @@ from scrap.utils.requests import get_soup import re -def scrap_gimcheon(url = 'https://council.gc.go.kr/kr/member/active.do') -> ScrapResult: - '''김천시 페이지에서 의원 상세약력 스크랩 + +def scrap_gimcheon(url="https://council.gc.go.kr/kr/member/active.do") -> ScrapResult: + """김천시 페이지에서 의원 상세약력 스크랩 :param url: 의원 목록 사이트 url :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - ''' - + """ + soup = get_soup(url, verify=False) councilors: List[Councilor] = [] - mlist = soup.find_all('ul', class_='memberList')[0] + mlist = soup.find_all("ul", class_="memberList")[0] - for profile in mlist.find_all('li', recursive=False): + for profile in mlist.find_all("li", recursive=False): name_tag = profile.find("h4") name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" @@ -29,8 +30,9 @@ def scrap_gimcheon(url = 'https://council.gc.go.kr/kr/member/active.do') -> Scra return ScrapResult( council_id="gimcheon", council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors + councilors=councilors, ) -if __name__ == '__main__': - print(scrap_gimcheon()) \ No newline at end of file + +if __name__ == "__main__": + print(scrap_gimcheon()) diff --git a/scrap/local_councils/gyeongsangbuk/gumi.py b/scrap/local_councils/gyeongsangbuk/gumi.py index d20f66d..1275ae7 100644 --- a/scrap/local_councils/gyeongsangbuk/gumi.py +++ b/scrap/local_councils/gyeongsangbuk/gumi.py @@ -5,18 +5,21 @@ from scrap.utils.requests import get_soup import re -def scrap_gumi(url = 'https://gumici.or.kr/content/member/memberName.html') -> ScrapResult: - '''대전시 동구 페이지에서 의원 상세약력 스크랩 + +def scrap_gumi( + url="https://gumici.or.kr/content/member/memberName.html", +) -> ScrapResult: + """대전시 동구 페이지에서 의원 상세약력 스크랩 :param url: 의원 목록 사이트 url :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - ''' - + """ + soup = get_soup(url, verify=False) councilors: List[Councilor] = [] - mlist = soup.find_all('ul', class_='mlist')[0] + mlist = soup.find_all("ul", class_="mlist")[0] - for profile in mlist.find_all('li'): + for profile in mlist.find_all("li"): name_tag = profile.find("dd", class_="name") name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" @@ -27,10 +30,9 @@ def scrap_gumi(url = 'https://gumici.or.kr/content/member/memberName.html') -> S councilors.append(Councilor(name=name, party=party)) return ScrapResult( - council_id="gumi", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors + council_id="gumi", council_type=CouncilType.LOCAL_COUNCIL, councilors=councilors ) -if __name__ == '__main__': - print(scrap_gumi()) \ No newline at end of file + +if __name__ == "__main__": + print(scrap_gumi()) diff --git a/scrap/local_councils/gyeongsangbuk/gyeongju.py b/scrap/local_councils/gyeongsangbuk/gyeongju.py index c8c4dc2..b98b606 100644 --- a/scrap/local_councils/gyeongsangbuk/gyeongju.py +++ b/scrap/local_councils/gyeongsangbuk/gyeongju.py @@ -6,32 +6,36 @@ import re import requests -def scrap_gyeongju(url = 'https://council.gyeongju.go.kr/kr/member/name.do') -> ScrapResult: - '''대전시 동구 페이지에서 의원 상세약력 스크랩 + +def scrap_gyeongju( + url="https://council.gyeongju.go.kr/kr/member/name.do", +) -> ScrapResult: + """대전시 동구 페이지에서 의원 상세약력 스크랩 :param url: 의원 목록 사이트 url :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - ''' - + """ + soup = get_soup(url, verify=False) councilors: List[Councilor] = [] - for profile in soup.find_all('div', class_="profile"): + for profile in soup.find_all("div", class_="profile"): data_uid = profile.find("a", class_="btn_profile")["data-uid"] - + if data_uid: url = f"https://council.gyeongju.go.kr/common/async/member/{data_uid}.do" result = requests.get(url).json() - name = result['name'] if result['name'] else "이름 정보 없음" - party = result['party_nm'] if result['party_nm'] else "정당 정보 없음" + name = result["name"] if result["name"] else "이름 정보 없음" + party = result["party_nm"] if result["party_nm"] else "정당 정보 없음" councilors.append(Councilor(name=name, party=party)) return ScrapResult( council_id="gyeongju", council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors + councilors=councilors, ) -if __name__ == '__main__': - print(scrap_gyeongju()) \ No newline at end of file + +if __name__ == "__main__": + print(scrap_gyeongju()) diff --git a/scrap/local_councils/gyeongsangbuk/moongyeong.py b/scrap/local_councils/gyeongsangbuk/moongyeong.py index a231b05..8c5b4f4 100644 --- a/scrap/local_councils/gyeongsangbuk/moongyeong.py +++ b/scrap/local_councils/gyeongsangbuk/moongyeong.py @@ -6,32 +6,34 @@ import re import requests -def scrap_moongyeong(url = 'https://council.gbmg.go.kr/kr/member/name.do') -> ScrapResult: - '''문경시 페이지에서 의원 상세약력 스크랩 + +def scrap_moongyeong(url="https://council.gbmg.go.kr/kr/member/name.do") -> ScrapResult: + """문경시 페이지에서 의원 상세약력 스크랩 :param url: 의원 목록 사이트 url :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - ''' - + """ + soup = get_soup(url, verify=False) councilors: List[Councilor] = [] - for profile in soup.find_all('div', class_="profile"): + for profile in soup.find_all("div", class_="profile"): data_uid = profile.find("a", class_="btn_profile")["data-uid"] - + if data_uid: url = f"https://council.gbmg.go.kr/common/async/member/{data_uid}.do" result = requests.get(url).json() - name = result['name'] if result['name'] else "이름 정보 없음" - party = result['party_nm'] if result['party_nm'] else "정당 정보 없음" + name = result["name"] if result["name"] else "이름 정보 없음" + party = result["party_nm"] if result["party_nm"] else "정당 정보 없음" councilors.append(Councilor(name=name, party=party)) return ScrapResult( council_id="moongyeong", council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors + councilors=councilors, ) -if __name__ == '__main__': - print(scrap_moongyeong()) \ No newline at end of file + +if __name__ == "__main__": + print(scrap_moongyeong()) diff --git a/scrap/local_councils/gyeongsangbuk/pohang.py b/scrap/local_councils/gyeongsangbuk/pohang.py index 699df22..5b052d0 100644 --- a/scrap/local_councils/gyeongsangbuk/pohang.py +++ b/scrap/local_councils/gyeongsangbuk/pohang.py @@ -5,18 +5,21 @@ from scrap.utils.requests import get_soup import re -def scrap_pohang(url = 'https://council.pohang.go.kr/content/member/memberName.html') -> ScrapResult: - '''대전시 동구 페이지에서 의원 상세약력 스크랩 + +def scrap_pohang( + url="https://council.pohang.go.kr/content/member/memberName.html", +) -> ScrapResult: + """대전시 동구 페이지에서 의원 상세약력 스크랩 :param url: 의원 목록 사이트 url :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - ''' - + """ + soup = get_soup(url, verify=False) councilors: List[Councilor] = [] - mlist = soup.find_all('ul', class_='mlist')[0] + mlist = soup.find_all("ul", class_="mlist")[0] - for profile in mlist.find_all('li'): + for profile in mlist.find_all("li"): name_tag = profile.find("dd", class_="name") name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" @@ -29,8 +32,9 @@ def scrap_pohang(url = 'https://council.pohang.go.kr/content/member/memberName.h return ScrapResult( council_id="pohang", council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors + councilors=councilors, ) -if __name__ == '__main__': - print(scrap_pohang()) \ No newline at end of file + +if __name__ == "__main__": + print(scrap_pohang()) diff --git a/scrap/local_councils/gyeongsangbuk/sangju.py b/scrap/local_councils/gyeongsangbuk/sangju.py index 6d0326a..531ba5c 100644 --- a/scrap/local_councils/gyeongsangbuk/sangju.py +++ b/scrap/local_councils/gyeongsangbuk/sangju.py @@ -6,17 +6,20 @@ import re -def scrap_sangju(url = 'https://www.sangjucouncil.go.kr/kr/member/name.do') -> ScrapResult: - '''대전시 동구 페이지에서 의원 상세약력 스크랩 + +def scrap_sangju( + url="https://www.sangjucouncil.go.kr/kr/member/name.do", +) -> ScrapResult: + """대전시 동구 페이지에서 의원 상세약력 스크랩 :param url: 의원 목록 사이트 url :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - ''' - + """ + soup = get_soup(url, verify=False) councilors: List[Councilor] = [] - for profile in soup.find_all('div', class_='profile'): + for profile in soup.find_all("div", class_="profile"): name_tag = profile.find("div", class_="name").find("strong") name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" @@ -29,8 +32,9 @@ def scrap_sangju(url = 'https://www.sangjucouncil.go.kr/kr/member/name.do') -> S return ScrapResult( council_id="sangju", council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors + councilors=councilors, ) -if __name__ == '__main__': - print(scrap_sangju()) \ No newline at end of file + +if __name__ == "__main__": + print(scrap_sangju()) diff --git a/scrap/local_councils/gyeongsangbuk/yaecheon.py b/scrap/local_councils/gyeongsangbuk/yaecheon.py index 8527ec0..3e31bee 100644 --- a/scrap/local_councils/gyeongsangbuk/yaecheon.py +++ b/scrap/local_councils/gyeongsangbuk/yaecheon.py @@ -7,32 +7,34 @@ import re -def scrap_yaecheon(url = 'https://www.ycgcl.kr/kr/member/name.do') -> ScrapResult: - '''예천시 페이지에서 의원 상세약력 스크랩 + +def scrap_yaecheon(url="https://www.ycgcl.kr/kr/member/name.do") -> ScrapResult: + """예천시 페이지에서 의원 상세약력 스크랩 :param url: 의원 목록 사이트 url :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - ''' - + """ + soup = get_soup(url, verify=False) councilors: List[Councilor] = [] - for profile in soup.find_all('div', class_="profile"): + for profile in soup.find_all("div", class_="profile"): data_uid = profile.find("a", class_="btn_profile")["data-uid"] - + if data_uid: url = f"https://www.ycgcl.kr/common/async/member/{data_uid}.do" result = requests.get(url).json() - name = result['name'] if result['name'] else "이름 정보 없음" - party = result['party_nm'] if result['party_nm'] else "정당 정보 없음" + name = result["name"] if result["name"] else "이름 정보 없음" + party = result["party_nm"] if result["party_nm"] else "정당 정보 없음" councilors.append(Councilor(name=name, party=party)) return ScrapResult( council_id="yaecheon", council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors + councilors=councilors, ) -if __name__ == '__main__': - print(scrap_yaecheon()) \ No newline at end of file + +if __name__ == "__main__": + print(scrap_yaecheon()) diff --git a/scrap/local_councils/gyeongsangnam/changnyeong.py b/scrap/local_councils/gyeongsangnam/changnyeong.py index d7e1761..8f09573 100644 --- a/scrap/local_councils/gyeongsangnam/changnyeong.py +++ b/scrap/local_councils/gyeongsangnam/changnyeong.py @@ -5,21 +5,26 @@ from scrap.utils.requests import get_soup import requests -def scrap_changnyeong(url = 'https://www.cngc.go.kr/kr/member/active') -> ScrapResult: - '''창녕군 페이지에서 의원 상세약력 스크랩 + +def scrap_changnyeong(url="https://www.cngc.go.kr/kr/member/active") -> ScrapResult: + """창녕군 페이지에서 의원 상세약력 스크랩 :param url: 의원 목록 사이트 url :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - ''' - + """ + soup = get_soup(url, verify=False) councilors: List[Councilor] = [] - mlist = soup.find_all('div', class_='card_area') + mlist = soup.find_all("div", class_="card_area") for profile in mlist: - info = profile.find_all('li') + info = profile.find_all("li") if info: - name = profile.find("dt").get_text(strip=True).split("(")[0] if profile.find("dt").get_text(strip=True) else "이름 정보 없음" + name = ( + profile.find("dt").get_text(strip=True).split("(")[0] + if profile.find("dt").get_text(strip=True) + else "이름 정보 없음" + ) party = "정당 정보 없음" party_dd = info[2].get_text(strip=True).replace("정 당 :", "") @@ -30,8 +35,9 @@ def scrap_changnyeong(url = 'https://www.cngc.go.kr/kr/member/active') -> ScrapR return ScrapResult( council_id="geoje", council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors + councilors=councilors, ) -if __name__ == '__main__': - print(scrap_changnyeong()) \ No newline at end of file + +if __name__ == "__main__": + print(scrap_changnyeong()) diff --git a/scrap/local_councils/gyeongsangnam/changwon.py b/scrap/local_councils/gyeongsangnam/changwon.py index 4d6f812..5273341 100644 --- a/scrap/local_councils/gyeongsangnam/changwon.py +++ b/scrap/local_councils/gyeongsangnam/changwon.py @@ -5,18 +5,21 @@ from scrap.utils.requests import get_soup import re -def scrap_changwon(url = 'https://gumici.or.kr/content/member/memberName.html') -> ScrapResult: - '''대전시 동구 페이지에서 의원 상세약력 스크랩 + +def scrap_changwon( + url="https://gumici.or.kr/content/member/memberName.html", +) -> ScrapResult: + """대전시 동구 페이지에서 의원 상세약력 스크랩 :param url: 의원 목록 사이트 url :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - ''' - + """ + soup = get_soup(url, verify=False) councilors: List[Councilor] = [] - mlist = soup.find_all('ul', class_='mlist')[0] + mlist = soup.find_all("ul", class_="mlist")[0] - for profile in mlist.find_all('li'): + for profile in mlist.find_all("li"): name_tag = profile.find("dd", class_="name") name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" @@ -29,8 +32,9 @@ def scrap_changwon(url = 'https://gumici.or.kr/content/member/memberName.html') return ScrapResult( council_id="changwon", council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors + councilors=councilors, ) -if __name__ == '__main__': - print(scrap_changwon()) \ No newline at end of file + +if __name__ == "__main__": + print(scrap_changwon()) diff --git a/scrap/local_councils/gyeongsangnam/geoje.py b/scrap/local_councils/gyeongsangnam/geoje.py index e1cd85e..d12f06c 100644 --- a/scrap/local_councils/gyeongsangnam/geoje.py +++ b/scrap/local_councils/gyeongsangnam/geoje.py @@ -5,21 +5,28 @@ from scrap.utils.requests import get_soup import requests -def scrap_geoje(url = 'https://www.gjcl.go.kr/source/korean/member/active.html') -> ScrapResult: - '''거제시 페이지에서 의원 상세약력 스크랩 + +def scrap_geoje( + url="https://www.gjcl.go.kr/source/korean/member/active.html", +) -> ScrapResult: + """거제시 페이지에서 의원 상세약력 스크랩 :param url: 의원 목록 사이트 url :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - ''' - + """ + soup = get_soup(url, verify=False) councilors: List[Councilor] = [] - mlist = soup.find_all('dl') + mlist = soup.find_all("dl") for profile in mlist: - info = profile.find_all('li') + info = profile.find_all("li") if info: - name = profile.find("dt").get_text(strip=True) if profile.find("dt").get_text(strip=True) else "이름 정보 없음" + name = ( + profile.find("dt").get_text(strip=True) + if profile.find("dt").get_text(strip=True) + else "이름 정보 없음" + ) party = "정당 정보 없음" party_dd = info[2].get_text(strip=True).replace("정당 :", "") @@ -30,8 +37,9 @@ def scrap_geoje(url = 'https://www.gjcl.go.kr/source/korean/member/active.html') return ScrapResult( council_id="geoje", council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors + councilors=councilors, ) -if __name__ == '__main__': - print(scrap_geoje()) \ No newline at end of file + +if __name__ == "__main__": + print(scrap_geoje()) diff --git a/scrap/local_councils/gyeongsangnam/gimhae.py b/scrap/local_councils/gyeongsangnam/gimhae.py index d14294d..74d2b27 100644 --- a/scrap/local_councils/gyeongsangnam/gimhae.py +++ b/scrap/local_councils/gyeongsangnam/gimhae.py @@ -5,21 +5,26 @@ from scrap.utils.requests import get_soup import requests -def scrap_gimhae(url = 'https://council.gimhae.go.kr/kr/member/active') -> ScrapResult: - '''창녕군 페이지에서 의원 상세약력 스크랩 + +def scrap_gimhae(url="https://council.gimhae.go.kr/kr/member/active") -> ScrapResult: + """창녕군 페이지에서 의원 상세약력 스크랩 :param url: 의원 목록 사이트 url :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - ''' - + """ + soup = get_soup(url, verify=False) councilors: List[Councilor] = [] - mlist = soup.find_all('div', class_='card_area') + mlist = soup.find_all("div", class_="card_area") for profile in mlist: - info = profile.find_all('li') + info = profile.find_all("li") if info: - name = profile.find("dt").get_text(strip=True).split("(")[0] if profile.find("dt").get_text(strip=True) else "이름 정보 없음" + name = ( + profile.find("dt").get_text(strip=True).split("(")[0] + if profile.find("dt").get_text(strip=True) + else "이름 정보 없음" + ) party = "정당 정보 없음" party_dd = info[2].get_text(strip=True).replace("정 당 :", "") @@ -30,8 +35,9 @@ def scrap_gimhae(url = 'https://council.gimhae.go.kr/kr/member/active') -> Scrap return ScrapResult( council_id="gimhae", council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors + councilors=councilors, ) -if __name__ == '__main__': - print(scrap_gimhae()) \ No newline at end of file + +if __name__ == "__main__": + print(scrap_gimhae()) diff --git a/scrap/local_councils/gyeongsangnam/goseong.py b/scrap/local_councils/gyeongsangnam/goseong.py index 4b2081b..1eff641 100644 --- a/scrap/local_councils/gyeongsangnam/goseong.py +++ b/scrap/local_councils/gyeongsangnam/goseong.py @@ -3,7 +3,9 @@ from scrap.utils.requests import get_soup -def scrap_goseong(url="https://council.goseong.go.kr/kr/member/active.do") -> ScrapResult: +def scrap_goseong( + url="https://council.goseong.go.kr/kr/member/active.do", +) -> ScrapResult: """ Scrap councilors’ details from Yongsan-gu District Council of Seoul page. diff --git a/scrap/local_councils/gyeongsangnam/hanam.py b/scrap/local_councils/gyeongsangnam/hanam.py index 5e7be5a..238881b 100644 --- a/scrap/local_councils/gyeongsangnam/hanam.py +++ b/scrap/local_councils/gyeongsangnam/hanam.py @@ -5,20 +5,25 @@ from scrap.utils.requests import get_soup import requests -def scrap_hanam(url = 'https://www.haman.go.kr/04646/04669.web') -> ScrapResult: - '''합천군 페이지에서 의원 상세약력 스크랩 + +def scrap_hanam(url="https://www.haman.go.kr/04646/04669.web") -> ScrapResult: + """합천군 페이지에서 의원 상세약력 스크랩 :param url: 의원 목록 사이트 url :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - ''' - + """ + soup = get_soup(url, verify=False) councilors: List[Councilor] = [] - mlist = soup.find_all('div', class_='column') + mlist = soup.find_all("div", class_="column") for profile in mlist: - name = profile.find("h2").get_text(strip=True).split("\n")[0] if profile.find("h2").get_text(strip=True) else "이름 정보 없음" - info = profile.find_all('li') + name = ( + profile.find("h2").get_text(strip=True).split("\n")[0] + if profile.find("h2").get_text(strip=True) + else "이름 정보 없음" + ) + info = profile.find_all("li") if info: party = "정당 정보 없음" party_dd = info[2].get_text(strip=True).replace("정당", "") @@ -29,8 +34,9 @@ def scrap_hanam(url = 'https://www.haman.go.kr/04646/04669.web') -> ScrapResult: return ScrapResult( council_id="hanam", council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors + councilors=councilors, ) -if __name__ == '__main__': - print(scrap_hanam()) \ No newline at end of file + +if __name__ == "__main__": + print(scrap_hanam()) diff --git a/scrap/local_councils/gyeongsangnam/hapchun.py b/scrap/local_councils/gyeongsangnam/hapchun.py index 9af2186..47cb577 100644 --- a/scrap/local_councils/gyeongsangnam/hapchun.py +++ b/scrap/local_councils/gyeongsangnam/hapchun.py @@ -5,21 +5,28 @@ from scrap.utils.requests import get_soup import requests -def scrap_hapchun(url = 'https://www.hccl.go.kr/source/korean/member/active.jsp') -> ScrapResult: - '''합천군 페이지에서 의원 상세약력 스크랩 + +def scrap_hapchun( + url="https://www.hccl.go.kr/source/korean/member/active.jsp", +) -> ScrapResult: + """합천군 페이지에서 의원 상세약력 스크랩 :param url: 의원 목록 사이트 url :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - ''' - + """ + soup = get_soup(url, verify=False) councilors: List[Councilor] = [] - mlist = soup.find_all('dl', class_='member') + mlist = soup.find_all("dl", class_="member") for profile in mlist: - info = profile.find_all('li') + info = profile.find_all("li") if info: - name = info[0].get_text(strip=True) if info[0].get_text(strip=True) else "이름 정보 없음" + name = ( + info[0].get_text(strip=True) + if info[0].get_text(strip=True) + else "이름 정보 없음" + ) party = "정당 정보 없음" party_dd = info[3].get_text(strip=True).replace("소속정당 : ", "") @@ -30,8 +37,9 @@ def scrap_hapchun(url = 'https://www.hccl.go.kr/source/korean/member/active.jsp' return ScrapResult( council_id="hapchun", council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors + councilors=councilors, ) -if __name__ == '__main__': - print(scrap_hapchun()) \ No newline at end of file + +if __name__ == "__main__": + print(scrap_hapchun()) diff --git a/scrap/local_councils/gyeongsangnam/jinju.py b/scrap/local_councils/gyeongsangnam/jinju.py index c55f0f9..0b1d6fe 100644 --- a/scrap/local_councils/gyeongsangnam/jinju.py +++ b/scrap/local_councils/gyeongsangnam/jinju.py @@ -6,17 +6,18 @@ import re -def scrap_jinju(url = 'https://www.jinjucl.com/kr/member/name.do') -> ScrapResult: - '''대전시 동구 페이지에서 의원 상세약력 스크랩 + +def scrap_jinju(url="https://www.jinjucl.com/kr/member/name.do") -> ScrapResult: + """대전시 동구 페이지에서 의원 상세약력 스크랩 :param url: 의원 목록 사이트 url :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - ''' - + """ + soup = get_soup(url, verify=False) councilors: List[Councilor] = [] - for profile in soup.find_all('div', class_='profile'): + for profile in soup.find_all("div", class_="profile"): name_tag = profile.find("div", class_="name").find("strong") name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" @@ -29,8 +30,9 @@ def scrap_jinju(url = 'https://www.jinjucl.com/kr/member/name.do') -> ScrapResul return ScrapResult( council_id="jinju", council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors + councilors=councilors, ) -if __name__ == '__main__': - print(scrap_jinju()) \ No newline at end of file + +if __name__ == "__main__": + print(scrap_jinju()) diff --git a/scrap/local_councils/gyeongsangnam/miryang.py b/scrap/local_councils/gyeongsangnam/miryang.py index a59d5f8..d61211f 100644 --- a/scrap/local_councils/gyeongsangnam/miryang.py +++ b/scrap/local_councils/gyeongsangnam/miryang.py @@ -4,23 +4,29 @@ from scrap.utils.types import CouncilType, Councilor, ScrapResult from scrap.utils.requests import get_soup -def scrap_miryang(url = 'https://council.miryang.go.kr/web/EgovCouncilManList.do?menuNo=14010100') -> ScrapResult: - '''밀양시 의회 페이지에서 의원 상세약력 스크랩 + +def scrap_miryang( + url="https://council.miryang.go.kr/web/EgovCouncilManList.do?menuNo=14010100", +) -> ScrapResult: + """밀양시 의회 페이지에서 의원 상세약력 스크랩 :param url: 의원 목록 사이트 url :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - ''' - + """ + soup = get_soup(url, verify=False) councilors: List[Councilor] = [] - - for profile in soup.find_all('div', class_="council_box"): - name_tag = profile.find("span", string="이름").find_next("span").get_text(strip=True) + for profile in soup.find_all("div", class_="council_box"): + name_tag = ( + profile.find("span", string="이름").find_next("span").get_text(strip=True) + ) name = name_tag if name_tag else "이름 정보 없음" party = "정당 정보 없음" - party_info = profile.find("span", string="소속정당").find_next("span").get_text(strip=True) + party_info = ( + profile.find("span", string="소속정당").find_next("span").get_text(strip=True) + ) if party_info: party = party_info councilors.append(Councilor(name=name, party=party)) @@ -28,8 +34,9 @@ def scrap_miryang(url = 'https://council.miryang.go.kr/web/EgovCouncilManList.do return ScrapResult( council_id="miryang", council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors + councilors=councilors, ) -if __name__ == '__main__': - print(scrap_miryang()) \ No newline at end of file + +if __name__ == "__main__": + print(scrap_miryang()) diff --git a/scrap/local_councils/gyeongsangnam/namhae.py b/scrap/local_councils/gyeongsangnam/namhae.py index 526469b..4755ffa 100644 --- a/scrap/local_councils/gyeongsangnam/namhae.py +++ b/scrap/local_councils/gyeongsangnam/namhae.py @@ -5,16 +5,19 @@ from scrap.utils.requests import get_soup import re -def scrap_namhae(url = 'https://council.namhae.go.kr/source/korean/member/active.html') -> ScrapResult: - '''남해 페이지에서 의원 상세약력 스크랩 + +def scrap_namhae( + url="https://council.namhae.go.kr/source/korean/member/active.html", +) -> ScrapResult: + """남해 페이지에서 의원 상세약력 스크랩 :param url: 의원 목록 사이트 url :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - ''' - + """ + soup = get_soup(url, verify=False, encoding="euc-kr") councilors: List[Councilor] = [] - for profile in soup.find_all('div', class_="profile"): + for profile in soup.find_all("div", class_="profile"): name_tag = profile.find("li", class_="name") name = name_tag.get_text(strip=True).split("(")[0] if name_tag else "이름 정보 없음" @@ -27,8 +30,9 @@ def scrap_namhae(url = 'https://council.namhae.go.kr/source/korean/member/active return ScrapResult( council_id="yangsan", council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors + councilors=councilors, ) -if __name__ == '__main__': - print(scrap_namhae()) \ No newline at end of file + +if __name__ == "__main__": + print(scrap_namhae()) diff --git a/scrap/local_councils/gyeongsangnam/sacheon.py b/scrap/local_councils/gyeongsangnam/sacheon.py index c0807bc..91e8e55 100644 --- a/scrap/local_councils/gyeongsangnam/sacheon.py +++ b/scrap/local_councils/gyeongsangnam/sacheon.py @@ -3,7 +3,9 @@ from scrap.utils.requests import get_soup -def scrap_sacheon(url="https://council.sacheon.go.kr/kr/member/active.do") -> ScrapResult: +def scrap_sacheon( + url="https://council.sacheon.go.kr/kr/member/active.do", +) -> ScrapResult: """ Scrap councilors’ details from Yongsan-gu District Council of Seoul page. diff --git a/scrap/local_councils/gyeongsangnam/sanchung.py b/scrap/local_councils/gyeongsangnam/sanchung.py index deeab16..04b87c8 100644 --- a/scrap/local_councils/gyeongsangnam/sanchung.py +++ b/scrap/local_councils/gyeongsangnam/sanchung.py @@ -5,23 +5,30 @@ from scrap.utils.requests import get_soup import requests -def scrap_sanchung(url = 'https://www.sancheong.go.kr/council/selectPersonalAssembly.do?key=2224&assemCate=8') -> ScrapResult: - '''산청군 페이지에서 의원 상세약력 스크랩 + +def scrap_sanchung( + url="https://www.sancheong.go.kr/council/selectPersonalAssembly.do?key=2224&assemCate=8", +) -> ScrapResult: + """산청군 페이지에서 의원 상세약력 스크랩 :param url: 의원 목록 사이트 url :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - ''' - + """ + soup = get_soup(url, verify=False) councilors: List[Councilor] = [] - mlist = soup.find('ul', class_='comment_list') - lis = mlist.find_all('li', recursive=False) + mlist = soup.find("ul", class_="comment_list") + lis = mlist.find_all("li", recursive=False) for profile in lis: print(profile) - info = profile.find_all('li') - name = profile.find("span", class_="name").get_text(strip=True) if profile.find("span", class_="name").get_text(strip=True) else "이름 정보 없음" + info = profile.find_all("li") + name = ( + profile.find("span", class_="name").get_text(strip=True) + if profile.find("span", class_="name").get_text(strip=True) + else "이름 정보 없음" + ) party = "정당 정보 없음" - + party_dd = info[3].get_text(strip=True).replace("소속정당", "") if party_dd: party = party_dd @@ -30,8 +37,9 @@ def scrap_sanchung(url = 'https://www.sancheong.go.kr/council/selectPersonalAsse return ScrapResult( council_id="hapchun", council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors + councilors=councilors, ) -if __name__ == '__main__': - print(scrap_sanchung()) \ No newline at end of file + +if __name__ == "__main__": + print(scrap_sanchung()) diff --git a/scrap/local_councils/gyeongsangnam/uiryeong.py b/scrap/local_councils/gyeongsangnam/uiryeong.py index 71bd42c..5a7d23e 100644 --- a/scrap/local_councils/gyeongsangnam/uiryeong.py +++ b/scrap/local_councils/gyeongsangnam/uiryeong.py @@ -3,7 +3,9 @@ from scrap.utils.requests import get_soup -def scrap_uiryeong(url="https://www.uiryeong.go.kr/board/list.uiryeong?boardId=BBS_0000169&menuCd=DOM_000000502001000000&contentsSid=1040") -> ScrapResult: +def scrap_uiryeong( + url="https://www.uiryeong.go.kr/board/list.uiryeong?boardId=BBS_0000169&menuCd=DOM_000000502001000000&contentsSid=1040", +) -> ScrapResult: """ Scrap councilors’ details from Yongsan-gu District Council of Seoul page. diff --git a/scrap/local_councils/gyeongsangnam/yangsan.py b/scrap/local_councils/gyeongsangnam/yangsan.py index 829d5fa..c5964ab 100644 --- a/scrap/local_councils/gyeongsangnam/yangsan.py +++ b/scrap/local_councils/gyeongsangnam/yangsan.py @@ -5,16 +5,17 @@ from scrap.utils.requests import get_soup import re -def scrap_yangsan(url = 'https://www.yscouncil.go.kr/kr/member/active') -> ScrapResult: - '''양산시 페이지에서 의원 상세약력 스크랩 + +def scrap_yangsan(url="https://www.yscouncil.go.kr/kr/member/active") -> ScrapResult: + """양산시 페이지에서 의원 상세약력 스크랩 :param url: 의원 목록 사이트 url :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - ''' - + """ + soup = get_soup(url, verify=False) councilors: List[Councilor] = [] - for profile in soup.find_all('div', class_="member"): + for profile in soup.find_all("div", class_="member"): name_tag = profile.find("strong", class_="name") name = name_tag.get_text(strip=True).split("(")[0] if name_tag else "이름 정보 없음" @@ -27,8 +28,9 @@ def scrap_yangsan(url = 'https://www.yscouncil.go.kr/kr/member/active') -> Scrap return ScrapResult( council_id="yangsan", council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors + councilors=councilors, ) -if __name__ == '__main__': - print(scrap_yangsan()) \ No newline at end of file + +if __name__ == "__main__": + print(scrap_yangsan()) diff --git a/scrap/local_councils/junnam/danyang.py b/scrap/local_councils/junnam/danyang.py index 34e7a38..4377791 100644 --- a/scrap/local_councils/junnam/danyang.py +++ b/scrap/local_councils/junnam/danyang.py @@ -5,20 +5,25 @@ from scrap.utils.requests import get_soup import re -def scrap_damyang(url = 'https://council.gc.go.kr/kr/member/active.do') -> ScrapResult: - '''담양군 페이지에서 의원 상세약력 스크랩 + +def scrap_damyang(url="https://council.gc.go.kr/kr/member/active.do") -> ScrapResult: + """담양군 페이지에서 의원 상세약력 스크랩 :param url: 의원 목록 사이트 url :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - ''' - + """ + soup = get_soup(url, verify=False) councilors: List[Councilor] = [] - mlist = soup.find_all('ul', class_='memlist')[0] + mlist = soup.find_all("ul", class_="memlist")[0] - for profile in mlist.find_all('li', recursive=False): - info = profile.find('ul', class_='info') - name = info.find("h5").get_text(strip=True) if info.find("h5").get_text(strip=True) else "이름 정보 없음" + for profile in mlist.find_all("li", recursive=False): + info = profile.find("ul", class_="info") + name = ( + info.find("h5").get_text(strip=True) + if info.find("h5").get_text(strip=True) + else "이름 정보 없음" + ) li = info.find("li", class_="item MP") party = "정당 정보 없음" @@ -30,8 +35,9 @@ def scrap_damyang(url = 'https://council.gc.go.kr/kr/member/active.do') -> Scrap return ScrapResult( council_id="damyang", council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors + councilors=councilors, ) -if __name__ == '__main__': - print(scrap_damyang()) \ No newline at end of file + +if __name__ == "__main__": + print(scrap_damyang()) diff --git a/scrap/local_councils/junnam/gangjin.py b/scrap/local_councils/junnam/gangjin.py index b4719f3..b7ff1aa 100644 --- a/scrap/local_councils/junnam/gangjin.py +++ b/scrap/local_councils/junnam/gangjin.py @@ -5,20 +5,25 @@ from scrap.utils.requests import get_soup import re -def scrap_gangjin(url = 'https://www.gangjincl.go.kr/index.do?PID=010') -> ScrapResult: - '''강진군 페이지에서 의원 상세약력 스크랩 + +def scrap_gangjin(url="https://www.gangjincl.go.kr/index.do?PID=010") -> ScrapResult: + """강진군 페이지에서 의원 상세약력 스크랩 :param url: 의원 목록 사이트 url :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - ''' - + """ + soup = get_soup(url, verify=False) councilors: List[Councilor] = [] - mlist = soup.find_all('ul', class_='memlist')[0] + mlist = soup.find_all("ul", class_="memlist")[0] - for profile in mlist.find_all('li', recursive=False): - info = profile.find('ul', class_='info') - name = info.find("h5").get_text(strip=True) if info.find("h5").get_text(strip=True) else "이름 정보 없음" + for profile in mlist.find_all("li", recursive=False): + info = profile.find("ul", class_="info") + name = ( + info.find("h5").get_text(strip=True) + if info.find("h5").get_text(strip=True) + else "이름 정보 없음" + ) li = info.find_all("li", recursive=False)[6] party = "정당 정보 없음" @@ -30,8 +35,9 @@ def scrap_gangjin(url = 'https://www.gangjincl.go.kr/index.do?PID=010') -> Scrap return ScrapResult( council_id="damyang", council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors + councilors=councilors, ) -if __name__ == '__main__': - print(scrap_gangjin()) \ No newline at end of file + +if __name__ == "__main__": + print(scrap_gangjin()) diff --git a/scrap/local_councils/junnam/goheung.py b/scrap/local_councils/junnam/goheung.py index 4751184..6f19a48 100644 --- a/scrap/local_councils/junnam/goheung.py +++ b/scrap/local_councils/junnam/goheung.py @@ -5,20 +5,25 @@ from scrap.utils.requests import get_soup import re -def scrap_goheung(url = 'https://council.gc.go.kr/kr/member/active.do') -> ScrapResult: - '''고흥군 페이지에서 의원 상세약력 스크랩 + +def scrap_goheung(url="https://council.gc.go.kr/kr/member/active.do") -> ScrapResult: + """고흥군 페이지에서 의원 상세약력 스크랩 :param url: 의원 목록 사이트 url :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - ''' - + """ + soup = get_soup(url, verify=False) councilors: List[Councilor] = [] - mlist = soup.find_all('ul', class_='memlist')[0] + mlist = soup.find_all("ul", class_="memlist")[0] - for profile in mlist.find_all('li', recursive=False): - info = profile.find('ul', class_='info') - name = info.find("h5").get_text(strip=True) if info.find("h5").get_text(strip=True) else "이름 정보 없음" + for profile in mlist.find_all("li", recursive=False): + info = profile.find("ul", class_="info") + name = ( + info.find("h5").get_text(strip=True) + if info.find("h5").get_text(strip=True) + else "이름 정보 없음" + ) li = info.find("li", class_="item MP") party = "정당 정보 없음" @@ -30,8 +35,9 @@ def scrap_goheung(url = 'https://council.gc.go.kr/kr/member/active.do') -> Scrap return ScrapResult( council_id="damyang", council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors + councilors=councilors, ) -if __name__ == '__main__': - print(scrap_damyang()) \ No newline at end of file + +if __name__ == "__main__": + print(scrap_damyang()) diff --git a/scrap/local_councils/junnam/hamppyeong.py b/scrap/local_councils/junnam/hamppyeong.py index 741ee9d..b8a6241 100644 --- a/scrap/local_councils/junnam/hamppyeong.py +++ b/scrap/local_councils/junnam/hamppyeong.py @@ -5,26 +5,33 @@ from scrap.utils.requests import get_soup import re -def scrap_hamppyeong(url = 'https://www.hpcouncil.go.kr/main/incumbentCouncillor.do?PID=0201&item=01') -> ScrapResult: - '''무안 페이지에서 의원 상세약력 스크랩 + +def scrap_hamppyeong( + url="https://www.hpcouncil.go.kr/main/incumbentCouncillor.do?PID=0201&item=01", +) -> ScrapResult: + """무안 페이지에서 의원 상세약력 스크랩 :param url: 의원 목록 사이트 url :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - ''' - + """ + soup = get_soup(url, verify=False) councilors: List[Councilor] = [] - mlist = soup.find_all('div', id='subContent')[0] + mlist = soup.find_all("div", id="subContent")[0] total_div = mlist.find_all("div", class_="infosubcontent") total_div.append(mlist.find_all("div", class_="infosubcontent2")) for profile in total_div: if not profile: continue - info = profile.find('div', class_='infosub_detail') - name = info.find("li", class_="infosubmem_name" ).get_text(strip=False)[:3] if info.find("li", class_="infosubmem_name" ).get_text(strip=True) else "이름 정보 없음" + info = profile.find("div", class_="infosub_detail") + name = ( + info.find("li", class_="infosubmem_name").get_text(strip=False)[:3] + if info.find("li", class_="infosubmem_name").get_text(strip=True) + else "이름 정보 없음" + ) - party_dd = info.find("ul", class_="infosub").find_all('li')[1] + party_dd = info.find("ul", class_="infosub").find_all("li")[1] party = "정당 정보 없음" if party_dd: party = party_dd.get_text(strip=True).replace("소속정당 : ", "") @@ -33,8 +40,9 @@ def scrap_hamppyeong(url = 'https://www.hpcouncil.go.kr/main/incumbentCouncillor return ScrapResult( council_id="yeonggwang", council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors + councilors=councilors, ) -if __name__ == '__main__': - print(scrap_hamppyeong()) \ No newline at end of file + +if __name__ == "__main__": + print(scrap_hamppyeong()) diff --git a/scrap/local_councils/junnam/henam.py b/scrap/local_councils/junnam/henam.py index 27011b1..b344bc7 100644 --- a/scrap/local_councils/junnam/henam.py +++ b/scrap/local_councils/junnam/henam.py @@ -5,18 +5,19 @@ from scrap.utils.requests import get_soup import re -def scrap_henam(url = 'http://council.haenam.go.kr/kr/member/active.do') -> ScrapResult: - '''해남 페이지에서 의원 상세약력 스크랩 + +def scrap_henam(url="http://council.haenam.go.kr/kr/member/active.do") -> ScrapResult: + """해남 페이지에서 의원 상세약력 스크랩 :param url: 의원 목록 사이트 url :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - ''' - + """ + soup = get_soup(url, verify=False) councilors: List[Councilor] = [] - mlist = soup.find_all('ul', class_='memberList')[0] + mlist = soup.find_all("ul", class_="memberList")[0] - for profile in mlist.find_all('li', recursive=False): + for profile in mlist.find_all("li", recursive=False): name_tag = profile.find("h4") name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" @@ -29,8 +30,9 @@ def scrap_henam(url = 'http://council.haenam.go.kr/kr/member/active.do') -> Scra return ScrapResult( council_id="gimcheon", council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors + councilors=councilors, ) -if __name__ == '__main__': - print(scrap_henam()) \ No newline at end of file + +if __name__ == "__main__": + print(scrap_henam()) diff --git a/scrap/local_councils/junnam/muan.py b/scrap/local_councils/junnam/muan.py index 9684f4d..cb4f175 100644 --- a/scrap/local_councils/junnam/muan.py +++ b/scrap/local_councils/junnam/muan.py @@ -5,20 +5,27 @@ from scrap.utils.requests import get_soup import re -def scrap_muan(url = 'http://www.muan.or.kr/main/incumbentCouncillor.do?PID=0201') -> ScrapResult: - '''무안 페이지에서 의원 상세약력 스크랩 + +def scrap_muan( + url="http://www.muan.or.kr/main/incumbentCouncillor.do?PID=0201", +) -> ScrapResult: + """무안 페이지에서 의원 상세약력 스크랩 :param url: 의원 목록 사이트 url :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - ''' - + """ + soup = get_soup(url, verify=False) councilors: List[Councilor] = [] - mlist = soup.find_all('ul', class_='formerCouncillor')[0] + mlist = soup.find_all("ul", class_="formerCouncillor")[0] - for profile in mlist.find_all('li', recursive=False): - info = profile.find('div', class_='profileInfo') - name = info.find("div", class_="infosubmem_name").get_text(strip=True) if info.find("div", class_="infosubmem_name").get_text(strip=True) else "이름 정보 없음" + for profile in mlist.find_all("li", recursive=False): + info = profile.find("div", class_="profileInfo") + name = ( + info.find("div", class_="infosubmem_name").get_text(strip=True) + if info.find("div", class_="infosubmem_name").get_text(strip=True) + else "이름 정보 없음" + ) party_dd = info.find("div", class_="infoContents") party = "정당 정보 없음" @@ -27,10 +34,9 @@ def scrap_muan(url = 'http://www.muan.or.kr/main/incumbentCouncillor.do?PID=0201 councilors.append(Councilor(name=name, party=party)) return ScrapResult( - council_id="muan", - council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors + council_id="muan", council_type=CouncilType.LOCAL_COUNCIL, councilors=councilors ) -if __name__ == '__main__': - print(scrap_muan()) \ No newline at end of file + +if __name__ == "__main__": + print(scrap_muan()) diff --git a/scrap/local_councils/junnam/wando.py b/scrap/local_councils/junnam/wando.py index 5dc3806..dc90a26 100644 --- a/scrap/local_councils/junnam/wando.py +++ b/scrap/local_councils/junnam/wando.py @@ -5,27 +5,31 @@ from scrap.utils.requests import get_soup import requests -def scrap_wando(url = 'http://www.wdcc.or.kr:8088/common/selectCouncilMemberList.json?searchCsDaesoo=9') -> ScrapResult: - '''완도군 페이지에서 의원 상세약력 스크랩 + +def scrap_wando( + url="http://www.wdcc.or.kr:8088/common/selectCouncilMemberList.json?searchCsDaesoo=9", +) -> ScrapResult: + """완도군 페이지에서 의원 상세약력 스크랩 :param url: 의원 목록 사이트 url :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - ''' + """ councilors: List[Councilor] = [] result = requests.get(url) result_json = result.json() - for profile in result_json['list']: - name = profile['cmNm'] - party = profile['mpParty'] + for profile in result_json["list"]: + name = profile["cmNm"] + party = profile["mpParty"] councilors.append(Councilor(name=name, party=party)) return ScrapResult( council_id="wando", council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors + councilors=councilors, ) -if __name__ == '__main__': - print(scrap_wando()) \ No newline at end of file + +if __name__ == "__main__": + print(scrap_wando()) diff --git a/scrap/local_councils/junnam/yeonggwang.py b/scrap/local_councils/junnam/yeonggwang.py index efe39f4..e4e38c0 100644 --- a/scrap/local_councils/junnam/yeonggwang.py +++ b/scrap/local_councils/junnam/yeonggwang.py @@ -5,20 +5,27 @@ from scrap.utils.requests import get_soup import re -def scrap_yeonggwang(url = 'https://www.ygcouncil.go.kr/bbs/content.php?co_id=councilors_curr#aside') -> ScrapResult: - '''무안 페이지에서 의원 상세약력 스크랩 + +def scrap_yeonggwang( + url="https://www.ygcouncil.go.kr/bbs/content.php?co_id=councilors_curr#aside", +) -> ScrapResult: + """무안 페이지에서 의원 상세약력 스크랩 :param url: 의원 목록 사이트 url :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - ''' - + """ + soup = get_soup(url, verify=False) councilors: List[Councilor] = [] - mlist = soup.find_all('div', class_='councilors_curr2_wrap')[0] + mlist = soup.find_all("div", class_="councilors_curr2_wrap")[0] - for profile in mlist.find_all('div',class_="subcon_body_txt", recursive=False): - info = profile.find('div', class_='ygmember_txt') - name = info.find("h4").get_text(strip=True).split(" ")[0] if info.find("h4").get_text(strip=True) else "이름 정보 없음" + for profile in mlist.find_all("div", class_="subcon_body_txt", recursive=False): + info = profile.find("div", class_="ygmember_txt") + name = ( + info.find("h4").get_text(strip=True).split(" ")[0] + if info.find("h4").get_text(strip=True) + else "이름 정보 없음" + ) party_dd = info.find("p", class_="party_highlight") party = "정당 정보 없음" @@ -29,8 +36,9 @@ def scrap_yeonggwang(url = 'https://www.ygcouncil.go.kr/bbs/content.php?co_id=co return ScrapResult( council_id="yeonggwang", council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors + councilors=councilors, ) -if __name__ == '__main__': - print(scrap_yeonggwang()) \ No newline at end of file + +if __name__ == "__main__": + print(scrap_yeonggwang()) diff --git a/scrap/utils/types.py b/scrap/utils/types.py index ef0d30c..124e839 100644 --- a/scrap/utils/types.py +++ b/scrap/utils/types.py @@ -5,11 +5,14 @@ from typing import Optional, List from dataclasses import dataclass from enum import Enum + + class CouncilType(str, Enum): """ 의회의 종류를 나타내는 열거형입니다. """ - LOCAL_COUNCIL = "local_council" + + LOCAL_COUNCIL = "local_council" """ 기초의회 """ @@ -17,11 +20,14 @@ class CouncilType(str, Enum): """ 광역의회 """ + def __str__(self): """ JSON으로 직렬화하기 위해 문자열로 변환하는 함수를 오버라이드합니다. """ return str(self.value) + + from db.types import CouncilType, Councilor diff --git a/test.py b/test.py index 2a0f446..a3c2a3c 100644 --- a/test.py +++ b/test.py @@ -1,6 +1,9 @@ name = "한양" + def change(a): name = a + + change("고려") -print(name) \ No newline at end of file +print(name) From 73eb3de2929a8560ebcab83fc5a4244fb89d03ca Mon Sep 17 00:00:00 2001 From: happycastle <41810556+happycastle114@users.noreply.github.com> Date: Wed, 15 Nov 2023 11:38:44 +0900 Subject: [PATCH 17/19] add more council --- scrap/local_councils/basic.py | 2 ++ scrap/local_councils/daejeon/daejeon.py | 2 ++ .../local_councils/gyeongsangbuk/cheongdo.py | 36 +++++++++++++++++++ .../local_councils/gyeongsangbuk/goryeong.py | 36 +++++++++++++++++++ scrap/utils/types.py | 26 +++++++------- 5 files changed, 89 insertions(+), 13 deletions(-) create mode 100644 scrap/local_councils/gyeongsangbuk/cheongdo.py create mode 100644 scrap/local_councils/gyeongsangbuk/goryeong.py diff --git a/scrap/local_councils/basic.py b/scrap/local_councils/basic.py index a394eae..97ae9aa 100644 --- a/scrap/local_councils/basic.py +++ b/scrap/local_councils/basic.py @@ -2,6 +2,8 @@ import re import requests import copy +from scrap.utils.utils import getPartyList +from scrap.utils.types import ScrapBasicArgument regex_pattern = re.compile(r"정\s*\S*\s*당", re.IGNORECASE) # Case-insensitive party_keywords = getPartyList() diff --git a/scrap/local_councils/daejeon/daejeon.py b/scrap/local_councils/daejeon/daejeon.py index eb9c2f2..3ccc05d 100644 --- a/scrap/local_councils/daejeon/daejeon.py +++ b/scrap/local_councils/daejeon/daejeon.py @@ -1,4 +1,6 @@ from scrap.local_councils import * +from scrap.utils.types import CouncilType, Councilor, ScrapResult +from scrap.utils.requests import get_soup def scrap_65(url, cid) -> ScrapResult: diff --git a/scrap/local_councils/gyeongsangbuk/cheongdo.py b/scrap/local_councils/gyeongsangbuk/cheongdo.py new file mode 100644 index 0000000..97fa97c --- /dev/null +++ b/scrap/local_councils/gyeongsangbuk/cheongdo.py @@ -0,0 +1,36 @@ +from typing import List +from scrap.utils.types import CouncilType, Councilor, ScrapResult +from scrap.utils.requests import get_soup + + +def scrap_cheongdo(url="https://www.cheongdocl.go.kr/kr/member/active.do") -> ScrapResult: + """ + Scrap councilors’ details from Yongsan-gu District Council of Seoul page. + + :param url: Yongsan-gu District Council members' list site url + :return: Councilors’ name and party data in ScrapResult object + """ + + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + + for profile in soup.find_all("div", class_="profile"): + name_tag = profile.find("em", class_="name") + name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" + + party = "정당 정보 없음" + party_info = profile.find("em", string="소속정당 : ") + if party_info: + party = party_info.find_next("span").get_text(strip=True) + + councilors.append(Councilor(name=name, jdName=party)) + + return ScrapResult( + council_id="cheongdo", + council_type=CouncilType.LOCAL_COUNCIL, + councilors=councilors, + ) + + +if __name__ == "__main__": + print(scrap_cheongdo()) diff --git a/scrap/local_councils/gyeongsangbuk/goryeong.py b/scrap/local_councils/gyeongsangbuk/goryeong.py new file mode 100644 index 0000000..8218287 --- /dev/null +++ b/scrap/local_councils/gyeongsangbuk/goryeong.py @@ -0,0 +1,36 @@ +from typing import List +from scrap.utils.types import CouncilType, Councilor, ScrapResult +from scrap.utils.requests import get_soup + + +def scrap_goryeong(url="https://council.goryeong.go.kr/kr/member/active.do") -> ScrapResult: + """ + Scrap councilors’ details from Yongsan-gu District Council of Seoul page. + + :param url: Yongsan-gu District Council members' list site url + :return: Councilors’ name and party data in ScrapResult object + """ + + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + + for profile in soup.find_all("div", class_="profile"): + name_tag = profile.find("em", class_="name") + name = name_tag.get_text(strip=True).split("\r")[0] if name_tag else "이름 정보 없음" + + party = "정당 정보 없음" + party_info = profile.find("em", string="정 당 : ") + if party_info: + party = party_info.find_next("span").get_text(strip=True) + + councilors.append(Councilor(name=name, jdName=party)) + + return ScrapResult( + council_id="goryeong", + council_type=CouncilType.LOCAL_COUNCIL, + councilors=councilors, + ) + + +if __name__ == "__main__": + print(scrap_goryeong()) diff --git a/scrap/utils/types.py b/scrap/utils/types.py index ef0d30c..27a64c4 100644 --- a/scrap/utils/types.py +++ b/scrap/utils/types.py @@ -52,19 +52,19 @@ class ScrapBasicArgument: def __init__( self, - pf_elt: str | None = None, - pf_cls: str | None = None, - pf_memlistelt: str | None = None, - pf_memlistcls: str | None = None, - name_elt: str | None = None, - name_cls: str | None = None, - name_wrapelt: str | None = None, - name_wrapcls: str | None = None, - pty_elt: str | None = None, - pty_cls: str | None = None, - pty_wrapelt: str | None = None, - pty_wrapcls: str | None = None, - pty_wraptxt: str | None = None, + pf_elt: str = None, + pf_cls: str = None, + pf_memlistelt: str = None, + pf_memlistcls: str = None, + name_elt: str = None, + name_cls: str = None, + name_wrapelt: str = None, + name_wrapcls: str = None, + pty_elt: str = None, + pty_cls: str = None, + pty_wrapelt: str = None, + pty_wrapcls: str = None, + pty_wraptxt: str = None, ): """ ScrapBasicArgument 클래스의 생성자입니다. From 32c967a3ffae3bd6782b745b8bc4eaf4f4f4bc96 Mon Sep 17 00:00:00 2001 From: happycastle <41810556+happycastle114@users.noreply.github.com> Date: Wed, 15 Nov 2023 15:24:20 +0900 Subject: [PATCH 18/19] Add: More --- scrap/local_councils/gyeongsangbuk/chilgok.py | 37 ++++++++++++++++++ scrap/local_councils/gyeongsangbuk/uiseong.py | 38 +++++++++++++++++++ scrap/local_councils/gyeongsangbuk/uljin.py | 38 +++++++++++++++++++ scrap/local_councils/gyeongsangbuk/yungduk.py | 37 ++++++++++++++++++ 4 files changed, 150 insertions(+) create mode 100644 scrap/local_councils/gyeongsangbuk/chilgok.py create mode 100644 scrap/local_councils/gyeongsangbuk/uiseong.py create mode 100644 scrap/local_councils/gyeongsangbuk/uljin.py create mode 100644 scrap/local_councils/gyeongsangbuk/yungduk.py diff --git a/scrap/local_councils/gyeongsangbuk/chilgok.py b/scrap/local_councils/gyeongsangbuk/chilgok.py new file mode 100644 index 0000000..46cb2e2 --- /dev/null +++ b/scrap/local_councils/gyeongsangbuk/chilgok.py @@ -0,0 +1,37 @@ +from urllib.parse import urlparse + +from typing import List +from scrap.utils.types import CouncilType, Councilor, ScrapResult +from scrap.utils.requests import get_soup +import requests + +def scrap_chilgok(url = 'https://council.chilgok.go.kr/content/member/member.html') -> ScrapResult: + '''칠곡군 페이지에서 의원 상세약력 스크랩 + + :param url: 의원 목록 사이트 url + :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + ''' + + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + mlist = soup.find_all('ul', class_='memberUl')[0] + + for profile in mlist.find_all("li", recursive=False): + info = profile.find_all('dd') + if info: + name = profile.find("dd", class_="name").get_text(strip=True)if profile.find("dd", class_="name").get_text(strip=True) else "이름 정보 없음" + + party = "정당 정보 없음" + party_dd = info[3].get_text(strip=True).replace("정당 : ", "") + if party_dd: + party = party_dd + councilors.append(Councilor(name=name, jdName=party)) + + return ScrapResult( + council_id="chilgok", + council_type=CouncilType.LOCAL_COUNCIL, + councilors=councilors + ) + +if __name__ == '__main__': + print(scrap_chilgok()) \ No newline at end of file diff --git a/scrap/local_councils/gyeongsangbuk/uiseong.py b/scrap/local_councils/gyeongsangbuk/uiseong.py new file mode 100644 index 0000000..4e510e0 --- /dev/null +++ b/scrap/local_councils/gyeongsangbuk/uiseong.py @@ -0,0 +1,38 @@ +from urllib.parse import urlparse + +from typing import List +from scrap.utils.types import CouncilType, Councilor, ScrapResult +from scrap.utils.requests import get_soup +import requests + +import re + +def scrap_uiseong(url = 'http://www.cus.go.kr/kr/member/name.do') -> ScrapResult: + '''의성군 페이지에서 의원 상세약력 스크랩 + + :param url: 의원 목록 사이트 url + :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + ''' + + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + + for profile in soup.find_all('div', class_="profile"): + data_uid = profile.find("a", class_="btn_profile")["data-uid"] + + if data_uid: + url = f"http://www.cus.go.kr/common/async/member/{data_uid}.do" + result = requests.get(url).json() + name = result['name'] if result['name'] else "이름 정보 없음" + party = result['party_nm'] if result['party_nm'] else "정당 정보 없음" + + councilors.append(Councilor(name=name, jdName=party)) + + return ScrapResult( + council_id="uiseong", + council_type=CouncilType.LOCAL_COUNCIL, + councilors=councilors + ) + +if __name__ == '__main__': + print(scrap_uiseong()) \ No newline at end of file diff --git a/scrap/local_councils/gyeongsangbuk/uljin.py b/scrap/local_councils/gyeongsangbuk/uljin.py new file mode 100644 index 0000000..8f66a20 --- /dev/null +++ b/scrap/local_councils/gyeongsangbuk/uljin.py @@ -0,0 +1,38 @@ +from urllib.parse import urlparse + +from typing import List +from scrap.utils.types import CouncilType, Councilor, ScrapResult +from scrap.utils.requests import get_soup +import requests + +import re + +def scrap_uljin(url = 'https://council.uljin.go.kr/kr/member/name.do') -> ScrapResult: + '''울진군 페이지에서 의원 상세약력 스크랩 + + :param url: 의원 목록 사이트 url + :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + ''' + + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + + for profile in soup.find_all('div', class_="profile"): + data_uid = profile.find("a", class_="btn_profile")["data-uid"] + + if data_uid: + url = f"http://council.uljin.go.kr/common/async/member/{data_uid}.do" + result = requests.get(url).json() + name = result['name'] if result['name'] else "이름 정보 없음" + party = result['party_nm'] if result['party_nm'] else "정당 정보 없음" + + councilors.append(Councilor(name=name, jdName=party)) + + return ScrapResult( + council_id="uiseong", + council_type=CouncilType.LOCAL_COUNCIL, + councilors=councilors + ) + +if __name__ == '__main__': + print(scrap_uljin()) \ No newline at end of file diff --git a/scrap/local_councils/gyeongsangbuk/yungduk.py b/scrap/local_councils/gyeongsangbuk/yungduk.py new file mode 100644 index 0000000..c7d591e --- /dev/null +++ b/scrap/local_councils/gyeongsangbuk/yungduk.py @@ -0,0 +1,37 @@ +from urllib.parse import urlparse + +from typing import List +from scrap.utils.types import CouncilType, Councilor, ScrapResult +from scrap.utils.requests import get_soup +import requests + +def scrap_yungduk(url = 'https://council.yd.go.kr/kr/member/active') -> ScrapResult: + '''영덕시 페이지에서 의원 상세약력 스크랩 + + :param url: 의원 목록 사이트 url + :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 + ''' + + soup = get_soup(url, verify=False) + councilors: List[Councilor] = [] + mlist = soup.find_all('div', class_='card_area') + + for profile in mlist: + info = profile.find_all('li') + if info: + name = profile.find("dt").get_text(strip=True).split("(")[0] if profile.find("dt").get_text(strip=True) else "이름 정보 없음" + + party = "정당 정보 없음" + party_dd = info[3].get_text(strip=True).replace("정당: ", "") + if party_dd: + party = party_dd + councilors.append(Councilor(name=name, jdName=party)) + + return ScrapResult( + council_id="yungduk", + council_type=CouncilType.LOCAL_COUNCIL, + councilors=councilors + ) + +if __name__ == '__main__': + print(scrap_yungduk()) \ No newline at end of file From 01d0a2ada74226d257c9a2d18199ba0775a705d0 Mon Sep 17 00:00:00 2001 From: happycastle114 Date: Wed, 15 Nov 2023 06:25:17 +0000 Subject: [PATCH 19/19] Formatted with black --- .../local_councils/gyeongsangbuk/cheongdo.py | 4 ++- scrap/local_councils/gyeongsangbuk/chilgok.py | 28 ++++++++++++------- .../local_councils/gyeongsangbuk/goryeong.py | 4 ++- scrap/local_councils/gyeongsangbuk/uiseong.py | 24 ++++++++-------- scrap/local_councils/gyeongsangbuk/uljin.py | 24 ++++++++-------- scrap/local_councils/gyeongsangbuk/yungduk.py | 26 ++++++++++------- scrap/utils/types.py | 12 ++++---- 7 files changed, 72 insertions(+), 50 deletions(-) diff --git a/scrap/local_councils/gyeongsangbuk/cheongdo.py b/scrap/local_councils/gyeongsangbuk/cheongdo.py index 97fa97c..f6bcc04 100644 --- a/scrap/local_councils/gyeongsangbuk/cheongdo.py +++ b/scrap/local_councils/gyeongsangbuk/cheongdo.py @@ -3,7 +3,9 @@ from scrap.utils.requests import get_soup -def scrap_cheongdo(url="https://www.cheongdocl.go.kr/kr/member/active.do") -> ScrapResult: +def scrap_cheongdo( + url="https://www.cheongdocl.go.kr/kr/member/active.do", +) -> ScrapResult: """ Scrap councilors’ details from Yongsan-gu District Council of Seoul page. diff --git a/scrap/local_councils/gyeongsangbuk/chilgok.py b/scrap/local_councils/gyeongsangbuk/chilgok.py index 46cb2e2..9d7c11f 100644 --- a/scrap/local_councils/gyeongsangbuk/chilgok.py +++ b/scrap/local_councils/gyeongsangbuk/chilgok.py @@ -5,21 +5,28 @@ from scrap.utils.requests import get_soup import requests -def scrap_chilgok(url = 'https://council.chilgok.go.kr/content/member/member.html') -> ScrapResult: - '''칠곡군 페이지에서 의원 상세약력 스크랩 + +def scrap_chilgok( + url="https://council.chilgok.go.kr/content/member/member.html", +) -> ScrapResult: + """칠곡군 페이지에서 의원 상세약력 스크랩 :param url: 의원 목록 사이트 url :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - ''' - + """ + soup = get_soup(url, verify=False) councilors: List[Councilor] = [] - mlist = soup.find_all('ul', class_='memberUl')[0] + mlist = soup.find_all("ul", class_="memberUl")[0] for profile in mlist.find_all("li", recursive=False): - info = profile.find_all('dd') + info = profile.find_all("dd") if info: - name = profile.find("dd", class_="name").get_text(strip=True)if profile.find("dd", class_="name").get_text(strip=True) else "이름 정보 없음" + name = ( + profile.find("dd", class_="name").get_text(strip=True) + if profile.find("dd", class_="name").get_text(strip=True) + else "이름 정보 없음" + ) party = "정당 정보 없음" party_dd = info[3].get_text(strip=True).replace("정당 : ", "") @@ -30,8 +37,9 @@ def scrap_chilgok(url = 'https://council.chilgok.go.kr/content/member/member.htm return ScrapResult( council_id="chilgok", council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors + councilors=councilors, ) -if __name__ == '__main__': - print(scrap_chilgok()) \ No newline at end of file + +if __name__ == "__main__": + print(scrap_chilgok()) diff --git a/scrap/local_councils/gyeongsangbuk/goryeong.py b/scrap/local_councils/gyeongsangbuk/goryeong.py index 8218287..e25c642 100644 --- a/scrap/local_councils/gyeongsangbuk/goryeong.py +++ b/scrap/local_councils/gyeongsangbuk/goryeong.py @@ -3,7 +3,9 @@ from scrap.utils.requests import get_soup -def scrap_goryeong(url="https://council.goryeong.go.kr/kr/member/active.do") -> ScrapResult: +def scrap_goryeong( + url="https://council.goryeong.go.kr/kr/member/active.do", +) -> ScrapResult: """ Scrap councilors’ details from Yongsan-gu District Council of Seoul page. diff --git a/scrap/local_councils/gyeongsangbuk/uiseong.py b/scrap/local_councils/gyeongsangbuk/uiseong.py index 4e510e0..c835f59 100644 --- a/scrap/local_councils/gyeongsangbuk/uiseong.py +++ b/scrap/local_councils/gyeongsangbuk/uiseong.py @@ -7,32 +7,34 @@ import re -def scrap_uiseong(url = 'http://www.cus.go.kr/kr/member/name.do') -> ScrapResult: - '''의성군 페이지에서 의원 상세약력 스크랩 + +def scrap_uiseong(url="http://www.cus.go.kr/kr/member/name.do") -> ScrapResult: + """의성군 페이지에서 의원 상세약력 스크랩 :param url: 의원 목록 사이트 url :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - ''' - + """ + soup = get_soup(url, verify=False) councilors: List[Councilor] = [] - for profile in soup.find_all('div', class_="profile"): + for profile in soup.find_all("div", class_="profile"): data_uid = profile.find("a", class_="btn_profile")["data-uid"] - + if data_uid: url = f"http://www.cus.go.kr/common/async/member/{data_uid}.do" result = requests.get(url).json() - name = result['name'] if result['name'] else "이름 정보 없음" - party = result['party_nm'] if result['party_nm'] else "정당 정보 없음" + name = result["name"] if result["name"] else "이름 정보 없음" + party = result["party_nm"] if result["party_nm"] else "정당 정보 없음" councilors.append(Councilor(name=name, jdName=party)) return ScrapResult( council_id="uiseong", council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors + councilors=councilors, ) -if __name__ == '__main__': - print(scrap_uiseong()) \ No newline at end of file + +if __name__ == "__main__": + print(scrap_uiseong()) diff --git a/scrap/local_councils/gyeongsangbuk/uljin.py b/scrap/local_councils/gyeongsangbuk/uljin.py index 8f66a20..6304f7d 100644 --- a/scrap/local_councils/gyeongsangbuk/uljin.py +++ b/scrap/local_councils/gyeongsangbuk/uljin.py @@ -7,32 +7,34 @@ import re -def scrap_uljin(url = 'https://council.uljin.go.kr/kr/member/name.do') -> ScrapResult: - '''울진군 페이지에서 의원 상세약력 스크랩 + +def scrap_uljin(url="https://council.uljin.go.kr/kr/member/name.do") -> ScrapResult: + """울진군 페이지에서 의원 상세약력 스크랩 :param url: 의원 목록 사이트 url :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - ''' - + """ + soup = get_soup(url, verify=False) councilors: List[Councilor] = [] - for profile in soup.find_all('div', class_="profile"): + for profile in soup.find_all("div", class_="profile"): data_uid = profile.find("a", class_="btn_profile")["data-uid"] - + if data_uid: url = f"http://council.uljin.go.kr/common/async/member/{data_uid}.do" result = requests.get(url).json() - name = result['name'] if result['name'] else "이름 정보 없음" - party = result['party_nm'] if result['party_nm'] else "정당 정보 없음" + name = result["name"] if result["name"] else "이름 정보 없음" + party = result["party_nm"] if result["party_nm"] else "정당 정보 없음" councilors.append(Councilor(name=name, jdName=party)) return ScrapResult( council_id="uiseong", council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors + councilors=councilors, ) -if __name__ == '__main__': - print(scrap_uljin()) \ No newline at end of file + +if __name__ == "__main__": + print(scrap_uljin()) diff --git a/scrap/local_councils/gyeongsangbuk/yungduk.py b/scrap/local_councils/gyeongsangbuk/yungduk.py index c7d591e..91eba2b 100644 --- a/scrap/local_councils/gyeongsangbuk/yungduk.py +++ b/scrap/local_councils/gyeongsangbuk/yungduk.py @@ -5,21 +5,26 @@ from scrap.utils.requests import get_soup import requests -def scrap_yungduk(url = 'https://council.yd.go.kr/kr/member/active') -> ScrapResult: - '''영덕시 페이지에서 의원 상세약력 스크랩 + +def scrap_yungduk(url="https://council.yd.go.kr/kr/member/active") -> ScrapResult: + """영덕시 페이지에서 의원 상세약력 스크랩 :param url: 의원 목록 사이트 url :return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 - ''' - + """ + soup = get_soup(url, verify=False) councilors: List[Councilor] = [] - mlist = soup.find_all('div', class_='card_area') + mlist = soup.find_all("div", class_="card_area") for profile in mlist: - info = profile.find_all('li') + info = profile.find_all("li") if info: - name = profile.find("dt").get_text(strip=True).split("(")[0] if profile.find("dt").get_text(strip=True) else "이름 정보 없음" + name = ( + profile.find("dt").get_text(strip=True).split("(")[0] + if profile.find("dt").get_text(strip=True) + else "이름 정보 없음" + ) party = "정당 정보 없음" party_dd = info[3].get_text(strip=True).replace("정당: ", "") @@ -30,8 +35,9 @@ def scrap_yungduk(url = 'https://council.yd.go.kr/kr/member/active') -> ScrapRes return ScrapResult( council_id="yungduk", council_type=CouncilType.LOCAL_COUNCIL, - councilors=councilors + councilors=councilors, ) -if __name__ == '__main__': - print(scrap_yungduk()) \ No newline at end of file + +if __name__ == "__main__": + print(scrap_yungduk()) diff --git a/scrap/utils/types.py b/scrap/utils/types.py index cf16ccc..7a576b0 100644 --- a/scrap/utils/types.py +++ b/scrap/utils/types.py @@ -61,16 +61,16 @@ def __init__( pf_elt: str = None, pf_cls: str = None, pf_memlistelt: str = None, - pf_memlistcls: str = None, - name_elt: str = None, + pf_memlistcls: str = None, + name_elt: str = None, name_cls: str = None, name_wrapelt: str = None, name_wrapcls: str = None, - pty_elt: str = None, - pty_cls: str = None, - pty_wrapelt: str = None, + pty_elt: str = None, + pty_cls: str = None, + pty_wrapelt: str = None, pty_wrapcls: str = None, - pty_wraptxt: str = None, + pty_wraptxt: str = None, ): """ ScrapBasicArgument 클래스의 생성자입니다.