-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
191 additions
and
58 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
from scrap.local_councils import * | ||
|
||
|
||
def scrap_65(url, cid) -> ScrapResult: | ||
"""대전 동구""" | ||
soup = get_soup(url, verify=False) | ||
councilors: List[Councilor] = [] | ||
|
||
# 프로필 링크 스크랩을 위해 base_url 추출 | ||
parsed_url = urlparse(url) | ||
base_url = f"{parsed_url.scheme}://{parsed_url.netloc}" | ||
|
||
for profile in soup.find_all("dl", class_="profile"): | ||
name_tag = profile.find("strong", class_="name") | ||
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" | ||
party = "정당 정보 없음" | ||
|
||
# 프로필보기 링크 가져오기 | ||
profile_link = profile.find("a", class_="start") | ||
if profile_link: | ||
data_uid = profile_link.get("data-uid") | ||
if data_uid: | ||
profile_url = base_url + f"/kr/member/profile_popup?uid={data_uid}" | ||
profile_soup = get_soup(profile_url, verify=False) | ||
party_info = profile_soup.find("strong", string="정 당") | ||
if ( | ||
party_info | ||
and (party_span := party_info.find_next("span")) is not None | ||
): | ||
party = party_span.text | ||
|
||
councilors.append(Councilor(name=name, jdName=party)) | ||
|
||
return ret_local_councilors(cid, councilors) | ||
|
||
|
||
def scrap_66(url, cid) -> ScrapResult: | ||
"""대전 중구""" | ||
soup = get_soup(url, verify=False) | ||
councilors: List[Councilor] = [] | ||
|
||
for profile in soup.find_all("div", class_="profile"): | ||
name_tag = profile.find("div", class_="name") | ||
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" | ||
|
||
party = "정당 정보 없음" | ||
party_info = profile.find("em", string="소속정당") | ||
if party_info: | ||
party = party_info.find_next("span").get_text(strip=True) | ||
councilors.append(Councilor(name=name, jdName=party)) | ||
|
||
return ret_local_councilors(cid, councilors) | ||
|
||
|
||
def scrap_67( | ||
url, | ||
cid, | ||
) -> ScrapResult: | ||
"""대전 서구""" | ||
soup = get_soup(url, verify=False) | ||
councilors: List[Councilor] = [] | ||
|
||
for profile in soup.find_all("dl"): | ||
name_tag = profile.find("dd", class_="name") | ||
name = ( | ||
name_tag.get_text(strip=True).replace(" 의원", "") if name_tag else "이름 정보 없음" | ||
) | ||
|
||
party = "정당 정보 없음" | ||
party_info = list(filter(lambda x: "정당" in str(x), profile.find_all("dd"))) | ||
if party_info: | ||
party = party_info[0].get_text(strip=True).replace("정당: ", "") | ||
|
||
councilors.append(Councilor(name=name, jdName=party)) | ||
|
||
return ret_local_councilors(cid, councilors) | ||
|
||
|
||
def scrap_68(url, cid) -> ScrapResult: | ||
"""대전 유성구""" | ||
soup = get_soup(url, verify=False) | ||
councilors: List[Councilor] = [] | ||
|
||
for profile in soup.find_all("div", class_="profile"): | ||
name_tag = profile.find("em", class_="name") | ||
# () 안에 있는 한자를 제거 (ex. 김영희(金英姬) -> 김영희) | ||
name = name_tag.get_text(strip=True).split("(")[0] if name_tag else "이름 정보 없음" | ||
|
||
party = "정당 정보 없음" | ||
regex_pattern = re.compile(r"정\s*당\s*:", re.IGNORECASE) # Case-insensitive | ||
party_info = profile.find("em", string=regex_pattern) | ||
if party_info: | ||
party = party_info.find_next("span").get_text(strip=True) | ||
councilors.append(Councilor(name=name, jdName=party)) | ||
|
||
return ret_local_councilors(cid, councilors) | ||
|
||
|
||
def scrap_69(url, cid) -> ScrapResult: | ||
"""대전 대덕구""" | ||
soup = get_soup(url, verify=False) | ||
councilors: List[Councilor] = [] | ||
|
||
for profile in soup.find_all("div", class_="profile"): | ||
name_tag = profile.find("em", class_="name") | ||
name = name_tag.get_text(strip=True) if name_tag else "이름 정보 없음" | ||
|
||
party = "정당 정보 없음" | ||
regex_pattern = re.compile(r"정\s*당\s*:", re.IGNORECASE) # Case-insensitive | ||
party_info = profile.find("em", string=regex_pattern) | ||
if party_info: | ||
party = party_info.find_next("span").get_text(strip=True) | ||
councilors.append(Councilor(name=name, jdName=party)) | ||
|
||
return ret_local_councilors(cid, councilors) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
""" | ||
크롤링을 실행, 진행결과 알림, 크롤링결과를 mongoDB로 저장하는 | ||
기능을 담당하는 모듈입니다. | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
import smtplib | ||
from email.mime.text import MIMEText | ||
from configurations.secrets import EmailSecrets | ||
|
||
# 이메일 서버 설정 (Gmail 사용 예제) | ||
smtp_server = "smtp.gmail.com" | ||
smtp_port = 587 | ||
|
||
def email_result(emessages): | ||
# 이메일 내용 설정 | ||
subject = "스크래핑 결과" | ||
# 메일 구성 | ||
msg = MIMEText(emessages) | ||
msg['Subject'] = subject | ||
msg['From'] = EmailSecrets.sender_email | ||
msg['To'] = EmailSecrets.receiver_email | ||
|
||
# 이메일 전송 | ||
try: | ||
with smtplib.SMTP(smtp_server, smtp_port) as server: | ||
server.starttls() | ||
server.login(msg['From'], EmailSecrets.password) | ||
server.sendmail(msg['From'], msg['To'], msg.as_string()) | ||
print("이메일이 성공적으로 전송되었습니다.") | ||
except Exception as e: | ||
print(f"이메일 전송 중 오류 발생: {e}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters