Skip to content

Commit

Permalink
Merge pull request #18 from NewWays-TechForImpactKAIST/14-create-andong
Browse files Browse the repository at this point in the history
Add: Gyeongsangbuk-do Councils
  • Loading branch information
Re-st authored Nov 15, 2023
2 parents 941f996 + 01d0a2a commit 242b59f
Show file tree
Hide file tree
Showing 45 changed files with 1,578 additions and 13 deletions.
8 changes: 8 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from scrap.local_councils.gyeongsangbuk import (
scrap_andong,
scrap_pohang,
scrap_gyeongju,
)

if __name__ == "__main__":
print(scrap_gyeongju())
3 changes: 3 additions & 0 deletions scrap/local_councils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
각 기초의회들의 크롤링 코드를 모아놓은 패키지입니다.
광역자치단체 별로 폴더를 만들어서 관리합니다.
"""
from .daejeon.daejeon import *
from .ulsan import *
from .daejeon import *
import re
from urllib.parse import urlparse
from typing import List
Expand Down
2 changes: 2 additions & 0 deletions scrap/local_councils/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import re
import requests
import copy
from scrap.utils.utils import getPartyList
from scrap.utils.types import ScrapBasicArgument

regex_pattern = re.compile(r"정\s*\S*\s*당", re.IGNORECASE) # Case-insensitive
party_keywords = getPartyList()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from scrap.local_councils import *
from scrap.utils.types import CouncilType, Councilor, ScrapResult
from scrap.utils.requests import get_soup


def scrap_65(url, cid) -> ScrapResult:
Expand Down
10 changes: 10 additions & 0 deletions scrap/local_councils/gyeongsangbuk/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
"""
경상북도 기초의회들의 크롤링 코드를 모아둔 사이트입니다.
"""
from .andong import scrap_andong
from .pohang import scrap_pohang
from .gyeongju import scrap_gyeongju
from .gimcheon import scrap_gimcheon
from .sangju import scrap_sangju
from .moongyeong import scrap_moongyeong
from .yaecheon import scrap_yaecheon
38 changes: 38 additions & 0 deletions scrap/local_councils/gyeongsangbuk/andong.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from urllib.parse import urlparse

from typing import List
from scrap.utils.types import CouncilType, Councilor, ScrapResult
from scrap.utils.requests import get_soup

import re


def scrap_andong(url="https://council.andong.go.kr/kr/member/name.do") -> ScrapResult:
"""대전시 동구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
:return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체
"""

soup = get_soup(url, verify=False)
councilors: List[Councilor] = []

for profile in soup.find_all("div", class_="profile"):
name_tag = profile.find("em", class_="name")
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음"

party = "정당 정보 없음"
party_info = profile.find("em", string="소속정당")
if party_info:
party = party_info.find_next("span").get_text(strip=True)
councilors.append(Councilor(name=name, party=party))

return ScrapResult(
council_id="andong",
council_type=CouncilType.LOCAL_COUNCIL,
councilors=councilors,
)


if __name__ == "__main__":
print(scrap_andong())
38 changes: 38 additions & 0 deletions scrap/local_councils/gyeongsangbuk/cheongdo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from typing import List
from scrap.utils.types import CouncilType, Councilor, ScrapResult
from scrap.utils.requests import get_soup


def scrap_cheongdo(
url="https://www.cheongdocl.go.kr/kr/member/active.do",
) -> ScrapResult:
"""
Scrap councilors’ details from Yongsan-gu District Council of Seoul page.
:param url: Yongsan-gu District Council members' list site url
:return: Councilors’ name and party data in ScrapResult object
"""

soup = get_soup(url, verify=False)
councilors: List[Councilor] = []

for profile in soup.find_all("div", class_="profile"):
name_tag = profile.find("em", class_="name")
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음"

party = "정당 정보 없음"
party_info = profile.find("em", string="소속정당 : ")
if party_info:
party = party_info.find_next("span").get_text(strip=True)

councilors.append(Councilor(name=name, jdName=party))

return ScrapResult(
council_id="cheongdo",
council_type=CouncilType.LOCAL_COUNCIL,
councilors=councilors,
)


if __name__ == "__main__":
print(scrap_cheongdo())
45 changes: 45 additions & 0 deletions scrap/local_councils/gyeongsangbuk/chilgok.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from urllib.parse import urlparse

from typing import List
from scrap.utils.types import CouncilType, Councilor, ScrapResult
from scrap.utils.requests import get_soup
import requests


def scrap_chilgok(
url="https://council.chilgok.go.kr/content/member/member.html",
) -> ScrapResult:
"""칠곡군 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
:return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체
"""

soup = get_soup(url, verify=False)
councilors: List[Councilor] = []
mlist = soup.find_all("ul", class_="memberUl")[0]

for profile in mlist.find_all("li", recursive=False):
info = profile.find_all("dd")
if info:
name = (
profile.find("dd", class_="name").get_text(strip=True)
if profile.find("dd", class_="name").get_text(strip=True)
else "이름 정보 없음"
)

party = "정당 정보 없음"
party_dd = info[3].get_text(strip=True).replace("정당 : ", "")
if party_dd:
party = party_dd
councilors.append(Councilor(name=name, jdName=party))

return ScrapResult(
council_id="chilgok",
council_type=CouncilType.LOCAL_COUNCIL,
councilors=councilors,
)


if __name__ == "__main__":
print(scrap_chilgok())
38 changes: 38 additions & 0 deletions scrap/local_councils/gyeongsangbuk/gimcheon.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from urllib.parse import urlparse

from typing import List
from scrap.utils.types import CouncilType, Councilor, ScrapResult
from scrap.utils.requests import get_soup
import re


def scrap_gimcheon(url="https://council.gc.go.kr/kr/member/active.do") -> ScrapResult:
"""김천시 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
:return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체
"""

soup = get_soup(url, verify=False)
councilors: List[Councilor] = []
mlist = soup.find_all("ul", class_="memberList")[0]

for profile in mlist.find_all("li", recursive=False):
name_tag = profile.find("h4")
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음"

party = "정당 정보 없음"
party_info = profile.find("span", string="소속정당 : ")
if party_info:
party = party_info.find_next("span").get_text(strip=True)
councilors.append(Councilor(name=name, party=party))

return ScrapResult(
council_id="gimcheon",
council_type=CouncilType.LOCAL_COUNCIL,
councilors=councilors,
)


if __name__ == "__main__":
print(scrap_gimcheon())
38 changes: 38 additions & 0 deletions scrap/local_councils/gyeongsangbuk/goryeong.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from typing import List
from scrap.utils.types import CouncilType, Councilor, ScrapResult
from scrap.utils.requests import get_soup


def scrap_goryeong(
url="https://council.goryeong.go.kr/kr/member/active.do",
) -> ScrapResult:
"""
Scrap councilors’ details from Yongsan-gu District Council of Seoul page.
:param url: Yongsan-gu District Council members' list site url
:return: Councilors’ name and party data in ScrapResult object
"""

soup = get_soup(url, verify=False)
councilors: List[Councilor] = []

for profile in soup.find_all("div", class_="profile"):
name_tag = profile.find("em", class_="name")
name = name_tag.get_text(strip=True).split("\r")[0] if name_tag else "이름 정보 없음"

party = "정당 정보 없음"
party_info = profile.find("em", string="정 당 : ")
if party_info:
party = party_info.find_next("span").get_text(strip=True)

councilors.append(Councilor(name=name, jdName=party))

return ScrapResult(
council_id="goryeong",
council_type=CouncilType.LOCAL_COUNCIL,
councilors=councilors,
)


if __name__ == "__main__":
print(scrap_goryeong())
38 changes: 38 additions & 0 deletions scrap/local_councils/gyeongsangbuk/gumi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from urllib.parse import urlparse

from typing import List
from scrap.utils.types import CouncilType, Councilor, ScrapResult
from scrap.utils.requests import get_soup
import re


def scrap_gumi(
url="https://gumici.or.kr/content/member/memberName.html",
) -> ScrapResult:
"""대전시 동구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
:return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체
"""

soup = get_soup(url, verify=False)
councilors: List[Councilor] = []
mlist = soup.find_all("ul", class_="mlist")[0]

for profile in mlist.find_all("li"):
name_tag = profile.find("dd", class_="name")
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음"

party = "정당 정보 없음"
party_info = profile.find("span", string="정")
if party_info:
party = party_info.find_next("span").get_text(strip=True)
councilors.append(Councilor(name=name, party=party))

return ScrapResult(
council_id="gumi", council_type=CouncilType.LOCAL_COUNCIL, councilors=councilors
)


if __name__ == "__main__":
print(scrap_gumi())
41 changes: 41 additions & 0 deletions scrap/local_councils/gyeongsangbuk/gyeongju.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from urllib.parse import urlparse

from typing import List
from scrap.utils.types import CouncilType, Councilor, ScrapResult
from scrap.utils.requests import get_soup
import re
import requests


def scrap_gyeongju(
url="https://council.gyeongju.go.kr/kr/member/name.do",
) -> ScrapResult:
"""대전시 동구 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
:return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체
"""

soup = get_soup(url, verify=False)
councilors: List[Councilor] = []

for profile in soup.find_all("div", class_="profile"):
data_uid = profile.find("a", class_="btn_profile")["data-uid"]

if data_uid:
url = f"https://council.gyeongju.go.kr/common/async/member/{data_uid}.do"
result = requests.get(url).json()
name = result["name"] if result["name"] else "이름 정보 없음"
party = result["party_nm"] if result["party_nm"] else "정당 정보 없음"

councilors.append(Councilor(name=name, party=party))

return ScrapResult(
council_id="gyeongju",
council_type=CouncilType.LOCAL_COUNCIL,
councilors=councilors,
)


if __name__ == "__main__":
print(scrap_gyeongju())
39 changes: 39 additions & 0 deletions scrap/local_councils/gyeongsangbuk/moongyeong.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from urllib.parse import urlparse

from typing import List
from scrap.utils.types import CouncilType, Councilor, ScrapResult
from scrap.utils.requests import get_soup
import re
import requests


def scrap_moongyeong(url="https://council.gbmg.go.kr/kr/member/name.do") -> ScrapResult:
"""문경시 페이지에서 의원 상세약력 스크랩
:param url: 의원 목록 사이트 url
:return: 의원들의 이름과 정당 데이터를 담은 ScrapResult 객체
"""

soup = get_soup(url, verify=False)
councilors: List[Councilor] = []

for profile in soup.find_all("div", class_="profile"):
data_uid = profile.find("a", class_="btn_profile")["data-uid"]

if data_uid:
url = f"https://council.gbmg.go.kr/common/async/member/{data_uid}.do"
result = requests.get(url).json()
name = result["name"] if result["name"] else "이름 정보 없음"
party = result["party_nm"] if result["party_nm"] else "정당 정보 없음"

councilors.append(Councilor(name=name, party=party))

return ScrapResult(
council_id="moongyeong",
council_type=CouncilType.LOCAL_COUNCIL,
councilors=councilors,
)


if __name__ == "__main__":
print(scrap_moongyeong())
Loading

0 comments on commit 242b59f

Please sign in to comment.