-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #18 from NewWays-TechForImpactKAIST/14-create-andong
Add: Gyeongsangbuk-do Councils
- Loading branch information
Showing
45 changed files
with
1,578 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
from scrap.local_councils.gyeongsangbuk import ( | ||
scrap_andong, | ||
scrap_pohang, | ||
scrap_gyeongju, | ||
) | ||
|
||
if __name__ == "__main__": | ||
print(scrap_gyeongju()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
2 changes: 2 additions & 0 deletions
2
scrap/local_councils/daejeon.py → scrap/local_councils/daejeon/daejeon.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
""" | ||
경상북도 기초의회들의 크롤링 코드를 모아둔 사이트입니다. | ||
""" | ||
from .andong import scrap_andong | ||
from .pohang import scrap_pohang | ||
from .gyeongju import scrap_gyeongju | ||
from .gimcheon import scrap_gimcheon | ||
from .sangju import scrap_sangju | ||
from .moongyeong import scrap_moongyeong | ||
from .yaecheon import scrap_yaecheon |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
from urllib.parse import urlparse | ||
|
||
from typing import List | ||
from scrap.utils.types import CouncilType, Councilor, ScrapResult | ||
from scrap.utils.requests import get_soup | ||
|
||
import re | ||
|
||
|
||
def scrap_andong(url="https://council.andong.go.kr/kr/member/name.do") -> ScrapResult: | ||
"""대전시 동구 페이지에서 의원 상세약력 스크랩 | ||
:param url: 의원 목록 사이트 url | ||
:return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 | ||
""" | ||
|
||
soup = get_soup(url, verify=False) | ||
councilors: List[Councilor] = [] | ||
|
||
for profile in soup.find_all("div", class_="profile"): | ||
name_tag = profile.find("em", class_="name") | ||
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" | ||
|
||
party = "정당 정보 없음" | ||
party_info = profile.find("em", string="소속정당") | ||
if party_info: | ||
party = party_info.find_next("span").get_text(strip=True) | ||
councilors.append(Councilor(name=name, party=party)) | ||
|
||
return ScrapResult( | ||
council_id="andong", | ||
council_type=CouncilType.LOCAL_COUNCIL, | ||
councilors=councilors, | ||
) | ||
|
||
|
||
if __name__ == "__main__": | ||
print(scrap_andong()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
from typing import List | ||
from scrap.utils.types import CouncilType, Councilor, ScrapResult | ||
from scrap.utils.requests import get_soup | ||
|
||
|
||
def scrap_cheongdo( | ||
url="https://www.cheongdocl.go.kr/kr/member/active.do", | ||
) -> ScrapResult: | ||
""" | ||
Scrap councilors’ details from Yongsan-gu District Council of Seoul page. | ||
:param url: Yongsan-gu District Council members' list site url | ||
:return: Councilors’ name and party data in ScrapResult object | ||
""" | ||
|
||
soup = get_soup(url, verify=False) | ||
councilors: List[Councilor] = [] | ||
|
||
for profile in soup.find_all("div", class_="profile"): | ||
name_tag = profile.find("em", class_="name") | ||
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" | ||
|
||
party = "정당 정보 없음" | ||
party_info = profile.find("em", string="소속정당 : ") | ||
if party_info: | ||
party = party_info.find_next("span").get_text(strip=True) | ||
|
||
councilors.append(Councilor(name=name, jdName=party)) | ||
|
||
return ScrapResult( | ||
council_id="cheongdo", | ||
council_type=CouncilType.LOCAL_COUNCIL, | ||
councilors=councilors, | ||
) | ||
|
||
|
||
if __name__ == "__main__": | ||
print(scrap_cheongdo()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
from urllib.parse import urlparse | ||
|
||
from typing import List | ||
from scrap.utils.types import CouncilType, Councilor, ScrapResult | ||
from scrap.utils.requests import get_soup | ||
import requests | ||
|
||
|
||
def scrap_chilgok( | ||
url="https://council.chilgok.go.kr/content/member/member.html", | ||
) -> ScrapResult: | ||
"""칠곡군 페이지에서 의원 상세약력 스크랩 | ||
:param url: 의원 목록 사이트 url | ||
:return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 | ||
""" | ||
|
||
soup = get_soup(url, verify=False) | ||
councilors: List[Councilor] = [] | ||
mlist = soup.find_all("ul", class_="memberUl")[0] | ||
|
||
for profile in mlist.find_all("li", recursive=False): | ||
info = profile.find_all("dd") | ||
if info: | ||
name = ( | ||
profile.find("dd", class_="name").get_text(strip=True) | ||
if profile.find("dd", class_="name").get_text(strip=True) | ||
else "이름 정보 없음" | ||
) | ||
|
||
party = "정당 정보 없음" | ||
party_dd = info[3].get_text(strip=True).replace("정당 : ", "") | ||
if party_dd: | ||
party = party_dd | ||
councilors.append(Councilor(name=name, jdName=party)) | ||
|
||
return ScrapResult( | ||
council_id="chilgok", | ||
council_type=CouncilType.LOCAL_COUNCIL, | ||
councilors=councilors, | ||
) | ||
|
||
|
||
if __name__ == "__main__": | ||
print(scrap_chilgok()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
from urllib.parse import urlparse | ||
|
||
from typing import List | ||
from scrap.utils.types import CouncilType, Councilor, ScrapResult | ||
from scrap.utils.requests import get_soup | ||
import re | ||
|
||
|
||
def scrap_gimcheon(url="https://council.gc.go.kr/kr/member/active.do") -> ScrapResult: | ||
"""김천시 페이지에서 의원 상세약력 스크랩 | ||
:param url: 의원 목록 사이트 url | ||
:return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 | ||
""" | ||
|
||
soup = get_soup(url, verify=False) | ||
councilors: List[Councilor] = [] | ||
mlist = soup.find_all("ul", class_="memberList")[0] | ||
|
||
for profile in mlist.find_all("li", recursive=False): | ||
name_tag = profile.find("h4") | ||
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" | ||
|
||
party = "정당 정보 없음" | ||
party_info = profile.find("span", string="소속정당 : ") | ||
if party_info: | ||
party = party_info.find_next("span").get_text(strip=True) | ||
councilors.append(Councilor(name=name, party=party)) | ||
|
||
return ScrapResult( | ||
council_id="gimcheon", | ||
council_type=CouncilType.LOCAL_COUNCIL, | ||
councilors=councilors, | ||
) | ||
|
||
|
||
if __name__ == "__main__": | ||
print(scrap_gimcheon()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
from typing import List | ||
from scrap.utils.types import CouncilType, Councilor, ScrapResult | ||
from scrap.utils.requests import get_soup | ||
|
||
|
||
def scrap_goryeong( | ||
url="https://council.goryeong.go.kr/kr/member/active.do", | ||
) -> ScrapResult: | ||
""" | ||
Scrap councilors’ details from Yongsan-gu District Council of Seoul page. | ||
:param url: Yongsan-gu District Council members' list site url | ||
:return: Councilors’ name and party data in ScrapResult object | ||
""" | ||
|
||
soup = get_soup(url, verify=False) | ||
councilors: List[Councilor] = [] | ||
|
||
for profile in soup.find_all("div", class_="profile"): | ||
name_tag = profile.find("em", class_="name") | ||
name = name_tag.get_text(strip=True).split("\r")[0] if name_tag else "이름 정보 없음" | ||
|
||
party = "정당 정보 없음" | ||
party_info = profile.find("em", string="정 당 : ") | ||
if party_info: | ||
party = party_info.find_next("span").get_text(strip=True) | ||
|
||
councilors.append(Councilor(name=name, jdName=party)) | ||
|
||
return ScrapResult( | ||
council_id="goryeong", | ||
council_type=CouncilType.LOCAL_COUNCIL, | ||
councilors=councilors, | ||
) | ||
|
||
|
||
if __name__ == "__main__": | ||
print(scrap_goryeong()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
from urllib.parse import urlparse | ||
|
||
from typing import List | ||
from scrap.utils.types import CouncilType, Councilor, ScrapResult | ||
from scrap.utils.requests import get_soup | ||
import re | ||
|
||
|
||
def scrap_gumi( | ||
url="https://gumici.or.kr/content/member/memberName.html", | ||
) -> ScrapResult: | ||
"""대전시 동구 페이지에서 의원 상세약력 스크랩 | ||
:param url: 의원 목록 사이트 url | ||
:return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 | ||
""" | ||
|
||
soup = get_soup(url, verify=False) | ||
councilors: List[Councilor] = [] | ||
mlist = soup.find_all("ul", class_="mlist")[0] | ||
|
||
for profile in mlist.find_all("li"): | ||
name_tag = profile.find("dd", class_="name") | ||
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" | ||
|
||
party = "정당 정보 없음" | ||
party_info = profile.find("span", string="정") | ||
if party_info: | ||
party = party_info.find_next("span").get_text(strip=True) | ||
councilors.append(Councilor(name=name, party=party)) | ||
|
||
return ScrapResult( | ||
council_id="gumi", council_type=CouncilType.LOCAL_COUNCIL, councilors=councilors | ||
) | ||
|
||
|
||
if __name__ == "__main__": | ||
print(scrap_gumi()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
from urllib.parse import urlparse | ||
|
||
from typing import List | ||
from scrap.utils.types import CouncilType, Councilor, ScrapResult | ||
from scrap.utils.requests import get_soup | ||
import re | ||
import requests | ||
|
||
|
||
def scrap_gyeongju( | ||
url="https://council.gyeongju.go.kr/kr/member/name.do", | ||
) -> ScrapResult: | ||
"""대전시 동구 페이지에서 의원 상세약력 스크랩 | ||
:param url: 의원 목록 사이트 url | ||
:return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 | ||
""" | ||
|
||
soup = get_soup(url, verify=False) | ||
councilors: List[Councilor] = [] | ||
|
||
for profile in soup.find_all("div", class_="profile"): | ||
data_uid = profile.find("a", class_="btn_profile")["data-uid"] | ||
|
||
if data_uid: | ||
url = f"https://council.gyeongju.go.kr/common/async/member/{data_uid}.do" | ||
result = requests.get(url).json() | ||
name = result["name"] if result["name"] else "이름 정보 없음" | ||
party = result["party_nm"] if result["party_nm"] else "정당 정보 없음" | ||
|
||
councilors.append(Councilor(name=name, party=party)) | ||
|
||
return ScrapResult( | ||
council_id="gyeongju", | ||
council_type=CouncilType.LOCAL_COUNCIL, | ||
councilors=councilors, | ||
) | ||
|
||
|
||
if __name__ == "__main__": | ||
print(scrap_gyeongju()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
from urllib.parse import urlparse | ||
|
||
from typing import List | ||
from scrap.utils.types import CouncilType, Councilor, ScrapResult | ||
from scrap.utils.requests import get_soup | ||
import re | ||
import requests | ||
|
||
|
||
def scrap_moongyeong(url="https://council.gbmg.go.kr/kr/member/name.do") -> ScrapResult: | ||
"""문경시 페이지에서 의원 상세약력 스크랩 | ||
:param url: 의원 목록 사이트 url | ||
:return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체 | ||
""" | ||
|
||
soup = get_soup(url, verify=False) | ||
councilors: List[Councilor] = [] | ||
|
||
for profile in soup.find_all("div", class_="profile"): | ||
data_uid = profile.find("a", class_="btn_profile")["data-uid"] | ||
|
||
if data_uid: | ||
url = f"https://council.gbmg.go.kr/common/async/member/{data_uid}.do" | ||
result = requests.get(url).json() | ||
name = result["name"] if result["name"] else "이름 정보 없음" | ||
party = result["party_nm"] if result["party_nm"] else "정당 정보 없음" | ||
|
||
councilors.append(Councilor(name=name, party=party)) | ||
|
||
return ScrapResult( | ||
council_id="moongyeong", | ||
council_type=CouncilType.LOCAL_COUNCIL, | ||
councilors=councilors, | ||
) | ||
|
||
|
||
if __name__ == "__main__": | ||
print(scrap_moongyeong()) |
Oops, something went wrong.