Skip to content

Commit

Permalink
Formatted with black
Browse files Browse the repository at this point in the history
  • Loading branch information
Re-st committed Nov 22, 2023
1 parent 13b5c52 commit 1d07256
Show file tree
Hide file tree
Showing 6 changed files with 104 additions and 42 deletions.
42 changes: 28 additions & 14 deletions analysis/age/hist_groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from analysis.age.draw import make_scatterplot, make_hist
from db.client import client


def plot_young_and_old(youngest_cluster, oldest_cluster):
try:
sns.histplot(
Expand Down Expand Up @@ -65,6 +66,7 @@ def cluster_data(method, n_clst, df):
df.loc[df["age"] == min_age, "cluster_label"] = i
return df


# 이름이 바뀐 경우
change_city_name = {
("충청남도", "당진군"): "당진시",
Expand All @@ -75,9 +77,10 @@ def cluster_data(method, n_clst, df):
("인천광역시", "남구"): "미추홀구",
}

#
#
change_lvl2to1 = {"연기군": "세종특별자치시"}


def change_local_name(sdName, wiwName):
"""
1. 만약 '시' 와 '구'가 모두 wiwName에 있다면, '시' 까지만 쓰기
Expand All @@ -90,20 +93,23 @@ def change_local_name(sdName, wiwName):
"""
if (sdName, wiwName) in change_city_name:
return change_city_name[(sdName, wiwName)]
if '구' in wiwName and '시' in wiwName:
return wiwName.split('시')[0] + '시'
if "구" in wiwName and "시" in wiwName:
return wiwName.split("시")[0] + "시"
else:
return wiwName


def local_to_metro_list(sdName, wiwName):
"""
구시군에서 광역시/도로 승격한 경우
"""
if wiwName in change_lvl2to1:
print('change', wiwName, 'to', change_lvl2to1[wiwName])
print("change", wiwName, "to", change_lvl2to1[wiwName])
return change_lvl2to1[wiwName]
else:
return sdName


def cluster(df, year, n_clst, method, cluster_by, outdir, font_name, folder_name):
"""구역별 그룹을 만듭니다.
df: 데이터프레임
Expand Down Expand Up @@ -134,8 +140,12 @@ def cluster(df, year, n_clst, method, cluster_by, outdir, font_name, folder_name

# wiwName을 처리합니다
if level == "2level":
df['sdName'] = df[['sdName', 'wiwName']].apply(lambda x: local_to_metro_list(*x), axis=1)
df['wiwName'] = df[['sdName', 'wiwName']].apply(lambda x: change_local_name(*x), axis=1)
df["sdName"] = df[["sdName", "wiwName"]].apply(
lambda x: local_to_metro_list(*x), axis=1
)
df["wiwName"] = df[["sdName", "wiwName"]].apply(
lambda x: change_local_name(*x), axis=1
)
# 데이터프레임에서 시도별로 묶은 후 나이 열만 가져옵니다.
df_age = pd.DataFrame(columns=["area", "age"])
for area, df_clst in df.groupby(cluster_by):
Expand Down Expand Up @@ -181,25 +191,29 @@ def cluster(df, year, n_clst, method, cluster_by, outdir, font_name, folder_name
"ageGroup": age_group,
}
for age, count, age_group in zip(
range(df_clst['age'].min(), df_clst['age'].max() + 1),
df_clst.groupby('age').size(),
df_clst.groupby('age')['cluster_label'].first()
range(df_clst["age"].min(), df_clst["age"].max() + 1),
df_clst.groupby("age").size(),
df_clst.groupby("age")["cluster_label"].first(),
)
]
metroname = df_clst["sdName"].iloc[0]
metroId = metroIds.find_one({"sdName": metroname})["metroId"]
if level == "1level":
print ("sdName is ", metroname)
print("sdName is ", metroname)
main_collection.insert_one({"metroId": metroId, "data": data})
elif metroname in change_lvl2to1.values():
print ("sdName is ", metroname)
print("sdName is ", metroname)
lvl1_collection = db[folder_name + "_" + year + "_1level_" + method]
lvl1_collection.insert_one({"metroId": metroId, "data": data})
else:
localname = df_clst["wiwName"].iloc[0]
print ("sdName is ", metroname, "wiwName is", localname)
localId = localIds.find_one({"sdName": metroname, "wiwName": localname})["localId"]
main_collection.insert_one({"metroId": metroId, "localId": localId, "data": data})
print("sdName is ", metroname, "wiwName is", localname)
localId = localIds.find_one({"sdName": metroname, "wiwName": localname})[
"localId"
]
main_collection.insert_one(
{"metroId": metroId, "localId": localId, "data": data}
)

# # 그리기
# package = (
Expand Down
1 change: 1 addition & 0 deletions configurations/secrets.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ class OpenDataPortalSecrets:

service_key = str(os.getenv("OPEN_DATA_SERICE_KEY") or "")


class EmailSecrets:
"""
스크랩 결과 이메일 전송에 필요한 키를 정의합니다.
Expand Down
2 changes: 1 addition & 1 deletion scrap/local_councils/daejeon.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,4 +112,4 @@ def scrap_69(url, cid) -> ScrapResult:
party = party_info.find_next("span").get_text(strip=True)
councilors.append(Councilor(name=name, jdName=party))

return ret_local_councilors(cid, councilors)
return ret_local_councilors(cid, councilors)
2 changes: 1 addition & 1 deletion scrap/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""
크롤링을 실행, 진행결과 알림, 크롤링결과를 mongoDB로 저장하는
기능을 담당하는 모듈입니다.
"""
"""
33 changes: 17 additions & 16 deletions scrap/utils/email_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,22 @@
smtp_server = "smtp.gmail.com"
smtp_port = 587


def email_result(emessages):
# 이메일 내용 설정
subject = "스크래핑 결과"
# 메일 구성
msg = MIMEText(emessages)
msg['Subject'] = subject
msg['From'] = EmailSecrets.sender_email
msg['To'] = EmailSecrets.receiver_email
# 이메일 내용 설정
subject = "스크래핑 결과"
# 메일 구성
msg = MIMEText(emessages)
msg["Subject"] = subject
msg["From"] = EmailSecrets.sender_email
msg["To"] = EmailSecrets.receiver_email

# 이메일 전송
try:
with smtplib.SMTP(smtp_server, smtp_port) as server:
server.starttls()
server.login(msg['From'], EmailSecrets.password)
server.sendmail(msg['From'], msg['To'], msg.as_string())
print("이메일이 성공적으로 전송되었습니다.")
except Exception as e:
print(f"이메일 전송 중 오류 발생: {e}")
# 이메일 전송
try:
with smtplib.SMTP(smtp_server, smtp_port) as server:
server.starttls()
server.login(msg["From"], EmailSecrets.password)
server.sendmail(msg["From"], msg["To"], msg.as_string())
print("이메일이 성공적으로 전송되었습니다.")
except Exception as e:
print(f"이메일 전송 중 오류 발생: {e}")
66 changes: 56 additions & 10 deletions scrap/utils/spreadsheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,16 +71,53 @@ def main() -> None:
0
) # 원하는 워크시트 선택 (0은 첫 번째 워크시트입니다.)
# TODO - 홈페이지 위 charset=euc-kr 등을 인식해 바로 가져오기.
euc_kr = [6, 13, 16, 31, 72, 88, 112, 134, 154, 157, 163, 165, 167, 176, 181,
197, 202, 222]
euc_kr = [
6,
13,
16,
31,
72,
88,
112,
134,
154,
157,
163,
165,
167,
176,
181,
197,
202,
222,
]
special_functions = (
list(range(1, 57))
+ [62, 63, 64, 88, 97, 103, 107]
+ list(range(113, 127))
+ [132, 134, 140, 142, 154, 155, 156, 157, 160, 161, 162, 163, 164, 165, 167]
+ list(range(177, 180))
+ [182, 183, 184, 186, 188, 189, 190, 191, 194, 195, 196, 198, 199, 201, 203,
206, 208, 209, 210]
+ [
182,
183,
184,
186,
188,
189,
190,
191,
194,
195,
196,
198,
199,
201,
203,
206,
208,
209,
210,
]
+ list(range(212, 221))
+ [222, 223, 224, 226]
)
Expand All @@ -100,19 +137,25 @@ def main() -> None:
N = 226
emessages: str = ""
enumbers = []

def add_error(n, msg):
nonlocal emessages
emsg: str = f"| {n:3} | 오류: {msg}"
emessages += emsg
enumbers.append(n)

for n in range(1, N + 1):
if n in no_information + error_unsolved:
emsg: str = (
"지난번 확인 시, 정당 정보 등이 홈페이지에 없었습니다. \
(
"지난번 확인 시, 정당 정보 등이 홈페이지에 없었습니다. \
다시 확인해보시겠어요?"
if n in no_information
else "함수 구현에 실패한 웹페이지입니다."
) + " 링크: " + data[n - 1]["URL"]
if n in no_information
else "함수 구현에 실패한 웹페이지입니다."
)
+ " 링크: "
+ data[n - 1]["URL"]
)
add_error(n, emsg)
continue
encoding = "euc-kr" if n in euc_kr else "utf-8"
Expand Down Expand Up @@ -154,12 +197,15 @@ def add_error(n, msg):
add_error(n, emsg)
except Exception as e:
add_error(n, "기타 오류 - " + str(e))
emessages = f"""
emessages = (
f"""
총 실행 횟수: {N}
에러: {enumbers}, 총 {len(enumbers)}
그 중 '정보 없음' 횟수: {parse_error_times}
타임아웃 횟수: {timeouts}
""" + emessages
"""
+ emessages
)
email_result(emessages)


Expand Down

0 comments on commit 1d07256

Please sign in to comment.