Skip to content

Commit

Permalink
[analysis] 행정구역 개편으로 인한 이름변경 고침
Browse files Browse the repository at this point in the history
  • Loading branch information
Re-st committed Nov 22, 2023
1 parent 2aa7135 commit 151d773
Showing 1 changed file with 42 additions and 9 deletions.
51 changes: 42 additions & 9 deletions analysis/age/hist_groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,19 +65,45 @@ def cluster_data(method, n_clst, df):
df.loc[df["age"] == min_age, "cluster_label"] = i
return df

def extract_city(area):
# 이름이 바뀐 경우
change_city_name = {
("충청남도", "당진군"): "당진시",
("경상남도", "마산시"): "창원시",
("경상남도", "진해시"): "창원시",
("경기도", "여주군"): "여주시",
("충청북도", "청원군"): "청주시",
("인천광역시", "남구"): "미추홀구",
}

#
change_lvl2to1 = {"연기군": "세종특별자치시"}

def change_local_name(sdName, wiwName):
"""
만약 '시' 와 '구'가 모두 wiwName에 있다면, '시' 까지만 쓰기
1. 만약 '시' 와 '구'가 모두 wiwName에 있다면, '시' 까지만 쓰기
ex) '용인시수지구' (선거 단위) -> '용인시' (의회 단위)
2. 지역이 승급되면 이름 바꾸기
ex) '당진군' (~2011) -> '당진시' (2012~)
Keyword arguments:
argument -- string
Return: processed string
"""
if '구' in area and '시' in area:
return area.split('시')[0] + '시'
if (sdName, wiwName) in change_city_name:
return change_city_name[(sdName, wiwName)]
if '구' in wiwName and '시' in wiwName:
return wiwName.split('시')[0] + '시'
else:
return area
return wiwName

def local_to_metro_list(sdName, wiwName):
"""
구시군에서 광역시/도로 승격한 경우
"""
if wiwName in change_lvl2to1:
print('change', wiwName, 'to', change_lvl2to1[wiwName])
return change_lvl2to1[wiwName]
else:
return sdName
def cluster(df, year, n_clst, method, cluster_by, outdir, font_name, folder_name):
"""구역별 그룹을 만듭니다.
df: 데이터프레임
Expand All @@ -94,7 +120,7 @@ def cluster(df, year, n_clst, method, cluster_by, outdir, font_name, folder_name
ids = client["district"]
metroIds = ids["metro_district"]
localIds = ids["local_district"]
db = client["agehistogram"]
db = client["age_hist"]
level = "1level" if cluster_by == "sdName" else "2level"
main_collection = db[folder_name + "_" + year + "_" + level + "_" + method]
# 기존 histogram 정보는 삭제 (나이별로 넣는 것이기 때문에 찌꺼기값 존재가능)
Expand All @@ -107,12 +133,12 @@ def cluster(df, year, n_clst, method, cluster_by, outdir, font_name, folder_name
# colors = cm.rainbow(np.linspace(0, 1, n_clst))

# wiwName을 처리합니다
df['wiwName'] = df['wiwName'].apply(extract_city)
if level == "2level":
df['sdName'] = df[['sdName', 'wiwName']].apply(lambda x: local_to_metro_list(*x), axis=1)
df['wiwName'] = df[['sdName', 'wiwName']].apply(lambda x: change_local_name(*x), axis=1)
# 데이터프레임에서 시도별로 묶은 후 나이 열만 가져옵니다.
df_age = pd.DataFrame(columns=["area", "age"])
for area, df_clst in df.groupby(cluster_by):
metroname = df_clst["sdName"].iloc[0]
localname = df_clst["wiwName"].iloc[0]
df_clst = cluster_data(method, n_clst, df_clst)
# 클러스터 중심 나이를 계산합니다.
clst_age_mean = []
Expand Down Expand Up @@ -160,10 +186,17 @@ def cluster(df, year, n_clst, method, cluster_by, outdir, font_name, folder_name
df_clst.groupby('age')['cluster_label'].first()
)
]
metroname = df_clst["sdName"].iloc[0]
metroId = metroIds.find_one({"sdName": metroname})["metroId"]
if level == "1level":
print ("sdName is ", metroname)
main_collection.insert_one({"metroId": metroId, "data": data})
elif metroname in change_lvl2to1.values():
print ("sdName is ", metroname)
lvl1_collection = db[folder_name + "_" + year + "_1level_" + method]
lvl1_collection.insert_one({"metroId": metroId, "data": data})
else:
localname = df_clst["wiwName"].iloc[0]
print ("sdName is ", metroname, "wiwName is", localname)
localId = localIds.find_one({"sdName": metroname, "wiwName": localname})["localId"]
main_collection.insert_one({"metroId": metroId, "localId": localId, "data": data})
Expand Down

0 comments on commit 151d773

Please sign in to comment.