From 73adf1c25fd8e695452bd3b9741033fd673b79d9 Mon Sep 17 00:00:00 2001
From: keonly <keonl@kaist.ac.kr>
Date: Wed, 22 Nov 2023 02:47:46 +0900
Subject: [PATCH 1/3] feat: add runner for scraping logic

---
 .gitignore                       |   3 +-
 requirements.txt                 |   3 +-
 scrap/local_councils/basic.py    |   6 +-
 scrap/local_councils/gyeonggi.py |   4 +-
 scrap/local_councils/incheon.py  |   2 +-
 scrap/utils/database.py          |   3 +-
 scrap/utils/export.py            |  23 ++++
 scrap/utils/runner.py            | 203 +++++++++++++++++++++++++++++++
 scrap/utils/runner_args.json     |  19 +++
 scrap/utils/spreadsheet.py       |  27 ++--
 10 files changed, 270 insertions(+), 23 deletions(-)
 create mode 100644 scrap/utils/export.py
 create mode 100644 scrap/utils/runner.py
 create mode 100644 scrap/utils/runner_args.json

diff --git a/.gitignore b/.gitignore
index be72122..b21ce63 100644
--- a/.gitignore
+++ b/.gitignore
@@ -222,4 +222,5 @@ pyrightconfig.json
 
 /_data
 /output
-__pycache__
\ No newline at end of file
+__pycache__
+/logs
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 8437846..726b96d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -12,4 +12,5 @@ gspread==5.11.2
 pymongo==4.5.0
 python-dotenv==1.0.0
 openpyxl
-selenium
\ No newline at end of file
+selenium
+tqdm
\ No newline at end of file
diff --git a/scrap/local_councils/basic.py b/scrap/local_councils/basic.py
index 4bbcfa8..d4ac5e6 100644
--- a/scrap/local_councils/basic.py
+++ b/scrap/local_councils/basic.py
@@ -138,7 +138,7 @@ def sel_getname(profile, element, class_, wrapper_element, wrapper_class_):
             if keyword in name:  # 인천 서구 등
                 name = name.replace(keyword, "").strip()
                 break
-    print(name, "is name\n")
+    # print(name, "is name\n")
     maybe_name = name.split()[0]  # 이름 뒤에 직책이 따라오는 경우
     if len(maybe_name) == 1:  # 외자 이름이 띄어쓰기 때문에 분리된 경우
         name = "".join(name.split()[0:2])
@@ -266,7 +266,7 @@ def scrap_basic(url, cid, args: ScrapBasicArgument, encoding="utf-8") -> ScrapRe
     profiles = getprofiles(
         soup, args.pf_elt, args.pf_cls, args.pf_memlistelt, args.pf_memlistcls
     )
-    print(cid, "번째 의회에는,", len(profiles), "명의 의원이 있습니다.")  # 디버깅용.
+    # print(cid, "번째 의회에는,", len(profiles), "명의 의원이 있습니다.")  # 디버깅용.
 
     for profile in profiles:
         name = party = ""
@@ -314,7 +314,7 @@ def sel_scrap_basic(url, cid, args: ScrapBasicArgument) -> ScrapResult:
     profiles = sel_getprofiles(
         browser, args.pf_elt, args.pf_cls, args.pf_memlistelt, args.pf_memlistcls
     )
-    print(cid, "번째 의회에는,", len(profiles), "명의 의원이 있습니다.")  # 디버깅용.
+    # print(cid, "번째 의회에는,", len(profiles), "명의 의원이 있습니다.")  # 디버깅용.
 
     for profile in profiles:
         name = party = ""
diff --git a/scrap/local_councils/gyeonggi.py b/scrap/local_councils/gyeonggi.py
index 55ffbd0..2d97d79 100644
--- a/scrap/local_councils/gyeonggi.py
+++ b/scrap/local_councils/gyeonggi.py
@@ -94,7 +94,7 @@ def scrap_88(url, cid, args: ScrapBasicArgument) -> ScrapResult:
     profiles = get_profiles_88_103(
         soup, args.pf_elt, args.pf_cls, args.pf_memlistelt, args.pf_memlistcls
     )
-    print(cid, "번째 의회에는,", len(profiles), "명의 의원이 있습니다.")  # 디버깅용.
+    # print(cid, "번째 의회에는,", len(profiles), "명의 의원이 있습니다.")  # 디버깅용.
 
     for profile in profiles:
         name = getname(
@@ -153,7 +153,7 @@ def scrap_103(url, cid, args: ScrapBasicArgument) -> ScrapResult:
     profiles = get_profiles_88_103(
         soup, args.pf_elt, args.pf_cls, args.pf_memlistelt, args.pf_memlistcls
     )
-    print(cid, "번째 의회에는,", len(profiles), "명의 의원이 있습니다.")  # 디버깅용.
+    # print(cid, "번째 의회에는,", len(profiles), "명의 의원이 있습니다.")  # 디버깅용.
 
     for profile in profiles:
         name = getname(
diff --git a/scrap/local_councils/incheon.py b/scrap/local_councils/incheon.py
index 5a45f57..72c60b6 100644
--- a/scrap/local_councils/incheon.py
+++ b/scrap/local_councils/incheon.py
@@ -170,7 +170,7 @@ def scrap_57(url, args) -> ScrapResult:
     profiles = getprofiles(
         soup, args.pf_elt, args.pf_cls, args.pf_memlistelt, args.pf_memlistcls
     )
-    print(cid, "번째 의회에는,", len(profiles), "명의 의원이 있습니다.")  # 디버깅용.
+    # print(cid, "번째 의회에는,", len(profiles), "명의 의원이 있습니다.")  # 디버깅용.
 
     for profile in profiles:
         name = getname(
diff --git a/scrap/utils/database.py b/scrap/utils/database.py
index 2c351ad..313c785 100644
--- a/scrap/utils/database.py
+++ b/scrap/utils/database.py
@@ -10,7 +10,6 @@
 # Note: MongoDB는 데이터베이스가 존재하지 않으면 자동으로 생성합니다.
 # MongoDB 데이터베이스는 하나 이상의 컬렉션으로 구성됩니다.
 # 컬렉션은 하나 이상의 문서로 구성됩니다.
-db = client[str(MongoDBSecrets.database_name)]
 
 
 def save_to_database(record: ScrapResult):
@@ -25,6 +24,7 @@ def save_to_database(record: ScrapResult):
         # MongoDB는 JSON을 저장할 수 있습니다.
         # JSON 형태로 변환한 후, MongoDB에 저장합니다.
         # serialized_record = json.dumps(dataclasses.asdict(record), ensure_ascii=False)
+        db = client[str(MongoDBSecrets.database_name)]
         collection = db[str(record.council_type)]
         result = collection.find_one(
             {"council_id": record.council_id},
@@ -62,7 +62,6 @@ def save_to_database(record: ScrapResult):
 
         return True
     except Exception as e:
-        t
         print(e)
         return False
 
diff --git a/scrap/utils/export.py b/scrap/utils/export.py
new file mode 100644
index 0000000..5276ae0
--- /dev/null
+++ b/scrap/utils/export.py
@@ -0,0 +1,23 @@
+import os
+import json
+from dataclasses import asdict
+
+from scrap.utils.types import ScrapResult, ScrapBasicArgument
+
+
+def export_results_to_json(results: dict[int, ScrapResult], output_path: str, current_time: str):
+    os.makedirs(output_path, exist_ok=True)
+    results = {k: [asdict(councilor) for councilor in v.councilors] for k, v in results.items()}
+
+    with open(os.path.join(output_path, f"scraping_result_{current_time}.json"), "w", encoding="utf-8") as f:
+        json.dump(results, f, ensure_ascii=False, indent=4)
+
+
+def export_results_to_txt(results: dict[int, ScrapResult], output_path: str, current_time: str):
+    os.makedirs(output_path, exist_ok=True)
+    results = {k: [asdict(councilor) for councilor in v.councilors] for k, v in results.items()}
+
+    with open(os.path.join(output_path, f"scraping_result_{current_time}.txt"), "w", encoding="utf-8") as f:
+        for cid, councilors in results.items():
+            councilors = "\n".join([c.to_txt() for c in councilors])
+            f.write(f"| {cid} | {councilors}\n")
\ No newline at end of file
diff --git a/scrap/utils/runner.py b/scrap/utils/runner.py
new file mode 100644
index 0000000..b0fb630
--- /dev/null
+++ b/scrap/utils/runner.py
@@ -0,0 +1,203 @@
+import os
+import sys
+import json
+import argparse
+import datetime
+import logging
+import warnings
+from typing import List, Dict, Optional
+from collections.abc import Iterable
+from tqdm import tqdm
+
+from scrap.utils.export import export_results_to_json, export_results_to_txt
+from scrap.utils.database import save_to_database
+from scrap.utils.types import ScrapResult, ScrapBasicArgument
+from scrap.utils.spreadsheet import read_record_from_spreadsheet
+from scrap.local_councils.seoul import *
+from scrap.local_councils.busan import *
+from scrap.local_councils.daegu import *
+from scrap.local_councils.incheon import *
+from scrap.local_councils.gwangju import *
+
+# from scrap.local_councils.daejeon import *
+from scrap.local_councils.ulsan import *
+from scrap.local_councils.gyeonggi import *
+from scrap.local_councils.gangwon import *
+from scrap.local_councils.chungcheong import *
+from scrap.local_councils.jeolla import *
+from scrap.local_councils.gyeongsang import *
+from scrap.local_councils import *
+from requests.exceptions import Timeout
+
+
+BASE_DIR = os.path.join(os.path.dirname(__file__), os.pardir, os.pardir)
+
+
+class ScraperRunner:
+    def __init__(
+        self,
+        runner_args_path: str,
+        council_args_path: str,
+        data_source: str,
+        kwargs: Dict[str, str] = {},
+    ):
+        with open(runner_args_path, "r") as f:
+            self.runner_args = json.load(f)
+        with open(council_args_path, "r") as f:
+            self.council_args = json.load(f)
+
+        self.get_records_from_data_source(data_source)
+
+        self.setup_logging(kwargs.get("log_path"), kwargs.get("current_time"))
+        self.error_log = dict()
+        self.timeout_count = 0
+        self.parseerror_count = 0
+
+    def setup_logging(self, log_path: str, current_time: str):
+        if not os.path.exists(log_path):
+            os.makedirs(log_path)
+
+        log_path = os.path.join(BASE_DIR, log_path, f"scraping_log_{current_time}.log")
+
+        logging.basicConfig(
+            filename=log_path,
+            level=logging.INFO,
+            format="[%(asctime)s] %(levelname)s - %(message)s",
+        )
+
+    def get_records_from_data_source(self, data_source: str):
+        if data_source == "google_sheets":
+            self.url_records = read_record_from_spreadsheet()
+        elif data_source == "mongodb":
+            # TODO: Implement MongoDB -> MongoDB에 지방의회별 URL을 저장할 필요성 논의
+            raise NotImplementedError("MongoDB에 아직 데이터가 없습니다.")
+
+    # Helper Functions
+    def is_euc_kr(self, n: int) -> bool:
+        return n in self.runner_args["euc_kr"]
+
+    def is_special_function(self, n: int) -> bool:
+        return n in self.runner_args["special_functions"]
+
+    def is_selenium_basic(self, n: int) -> bool:
+        return n in self.runner_args["selenium_basic"]
+
+    def handle_errors(self, cid: int, error):
+        self.error_log[cid] = str(error)
+
+        if isinstance(error, Timeout):
+            self.timeout_count += 1
+        elif isinstance(error, ValueError) and "정보 없음" in str(error):
+            self.parseerror_count += 1
+        logging.error(f"| {cid} | 오류: {error}")
+
+    def run_single_council(self, n: int) -> ScrapResult:
+        encoding = "euc-kr" if self.is_euc_kr(n) else "utf-8"
+        council_url = self.url_records[n - 1]["URL"]
+        council_args = self.council_args.get(str(n), None)
+        if council_args is not None:
+            council_args = ScrapBasicArgument(**council_args)
+
+        if self.is_special_function(n):
+            function_name = f"scrap_{n}"
+            if hasattr(sys.modules[__name__], function_name):
+                function_to_call = getattr(sys.modules[__name__], function_name)  # type: ignore
+                result = function_to_call(council_url, n, args=council_args)
+            else:
+                raise NotImplementedError(f"함수를 찾을 수 없습니다: {function_name}")
+        else:
+            if council_args is None:
+                raise ValueError(f"{n}번 의회에 대한 ScrapBasicArgument가 없습니다.")
+        
+            if self.is_selenium_basic(n):
+                result = sel_scrap_basic(council_url, n, council_args)
+            else:
+                result = scrap_basic(council_url, n, council_args, encoding)
+
+        return result
+
+    def run_all_councils(self, cids: Iterable[int]) -> Dict[int, ScrapResult]:
+        scrape_results = dict()
+
+        for cid in tqdm(cids):
+            try:
+                result = self.run_single_council(cid)
+                if "정보 없음" in str(result.councilors):
+                    raise ValueError("정보 없음이 포함되어 있습니다.")
+                scrape_results[cid] = result
+            except Exception as e:
+                self.handle_errors(cid, e)
+
+        logging.info(
+            f"| 총 실행 횟수: {len(cids)} | 에러: {list(self.error_log.keys())}, 총 {len(self.error_log)}회 | 그 중 정보 없음 횟수: {self.parseerror_count} | 타임아웃 횟수: {self.timeout_count} |"
+        )
+
+        return scrape_results
+
+
+def main(args: Dict[str, str]) -> None:
+    warnings.filterwarnings("ignore")
+
+    current_time = datetime.datetime.now().strftime("%Y%m%d_%H%M")
+    runner_kwargs = {
+        "log_path": args.get("log_path"),
+        "current_time": current_time,
+    }
+
+    runner = ScraperRunner(
+        args["runner_args_path"],
+        args["council_args_path"],
+        args["data_source"],
+        runner_kwargs,
+    )
+
+    cids_to_run = parse_cids(args.get("cids"))
+    results = runner.run_all_councils(cids_to_run)
+
+    if args.get("update_mongo"):
+        for result in results.values():
+            # TODO: 잘 작동하는지 확인 필요
+            save_to_database(result)
+
+    if args.get("output_store"):
+        if args.get("output_format") == "json":
+            export_results_to_json(results, args.get("output_path"), current_time)
+        elif args.get("output_format") == "txt":
+            export_results_to_txt(results, args.get("output_path"), current_time)
+
+
+def parse_cids(cids_str: Optional[str]) -> list[int]:
+    if cids_str:
+        return [int(cid.strip()) for cid in cids_str.split(",")]
+    else:
+        return range(1, 227)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="지방의회 스크랩 스크립트 실행")
+    parser.add_argument("runner_args_path", help="runner_args JSON 파일 경로")
+    parser.add_argument("council_args_path", help="council_args JSON 파일 경로")
+    parser.add_argument(
+        "data_source",
+        help="사용할 데이터 소스 ('google_sheets', 'mongodb')",
+        choices=["google_sheets", "mongodb"],
+        default="google_sheets",
+    )
+    parser.add_argument("-c", "--cids", help="스크랩할 지방의회 ID 목록 (쉼표로 구분)", default=None)
+    parser.add_argument("-l", "--log_path", help="로그 파일 경로", default="logs")
+    parser.add_argument(
+        "-m", "--update_mongo", help="스크랩 결과를 MongoDB에 업데이트", action="store_true"
+    )
+    parser.add_argument(
+        "-o", "--output_store", help="스크랩 결과를 로컬에 저장", action="store_true"
+    )
+    parser.add_argument(
+        "--output_format",
+        help="스크랩 결과 저장 형식 ('json', 'txt')",
+        choices=["json", "txt"],
+        default="json",
+    )
+    parser.add_argument("--output_path", help="스크랩 결과 저장 경로", default="output")
+    args = vars(parser.parse_args())
+
+    main(args)
diff --git a/scrap/utils/runner_args.json b/scrap/utils/runner_args.json
new file mode 100644
index 0000000..ddad42b
--- /dev/null
+++ b/scrap/utils/runner_args.json
@@ -0,0 +1,19 @@
+{
+  "euc_kr": [
+    6, 13, 16, 31, 72, 88, 112, 134, 154, 157, 163, 165, 167, 176, 181, 197,
+    202, 222
+  ],
+  "special_functions": [
+    1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
+    22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
+    41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 62, 63, 64,
+    88, 97, 103, 107, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123,
+    124, 125, 126, 132, 134, 140, 142, 154, 155, 156, 157, 160, 161, 162, 163,
+    164, 165, 167, 177, 178, 179, 182, 183, 184, 186, 188, 189, 190, 191, 194,
+    195, 196, 198, 199, 201, 203, 206, 208, 209, 210, 212, 213, 214, 215, 216,
+    217, 218, 219, 220, 222, 223, 224, 226
+  ],
+  "selenium_basic": [76, 78, 101, 169, 173, 177],
+  "no_information": [18, 29, 106, 111, 172, 181, 185, 187, 197, 200, 204, 207],
+  "error_unresolved": [170, 171]
+}
diff --git a/scrap/utils/spreadsheet.py b/scrap/utils/spreadsheet.py
index 2fee572..cb0d348 100644
--- a/scrap/utils/spreadsheet.py
+++ b/scrap/utils/spreadsheet.py
@@ -11,7 +11,7 @@
 from scrap.local_councils.daegu import *
 from scrap.local_councils.incheon import *
 from scrap.local_councils.gwangju import *
-from scrap.local_councils.daejeon import *
+# from scrap.local_councils.daejeon import *
 from scrap.local_councils.ulsan import *
 from scrap.local_councils.gyeonggi import *
 from scrap.local_councils.gangwon import *
@@ -56,16 +56,16 @@ def google_authorization():
     return gspread.authorize(creds)
 
 
-def main() -> None:
-    # Google Sheets API 설정
-    client: gspread.client.Client = google_authorization()
-
-    # 스프레드시트 열기
+def read_record_from_spreadsheet() -> list[dict]:
+    client = google_authorization()
     link = "https://docs.google.com/spreadsheets/d/1fBDJjkw8FSN5wXrvos9Q2wDsyItkUtNFGOxUZYE-h0M/edit#gid=1127955905"  # T4I-의회목록
-    spreadsheet: gspread.Spreadsheet = client.open_by_url(link)
-    worksheet: gspread.Worksheet = spreadsheet.get_worksheet(
-        0
-    )  # 원하는 워크시트 선택 (0은 첫 번째 워크시트입니다.)
+    spreadsheet = client.open_by_url(link)
+    worksheet = spreadsheet.get_worksheet(0)
+
+    return worksheet.get_all_records()
+
+
+def main() -> None:
     # TODO - 홈페이지 위 charset=euc-kr 등을 인식해 바로 가져오기.
     euc_kr = [
         6,
@@ -126,13 +126,14 @@ def main() -> None:
     f.close()
 
     # 데이터 가져오기
-    data: list[dict] = worksheet.get_all_records()
+    # data: list[dict] = worksheet.get_all_records()
+    data = read_record_from_spreadsheet()
     result: str = ""
 
     parse_error_times = 0
     timeouts = 0
     N = 226
-    for n in [189]:  # range(1, N + 1):
+    for n in range(1, N + 1):  # range(1, N + 1):
         if n in no_information + error_unsolved:
             error_msg = (
                 "지난번 확인 시, 정당 정보 등이 홈페이지에 없었습니다. \
@@ -171,7 +172,7 @@ def main() -> None:
                 print("정보 없음이 포함되어 있습니다.")
                 parse_error_times += 1
                 errors.append(n)
-            print(f"| {n} | {result}")
+            # print(f"| {n} | {result}")
         except Timeout:
             print(f"| {n} | 오류: Request to {council_url} timed out.")
             timeouts += 1

From 6ab2c445b771ed5155b5445317ced3f28323f719 Mon Sep 17 00:00:00 2001
From: keonly <keonly@users.noreply.github.com>
Date: Tue, 21 Nov 2023 17:49:10 +0000
Subject: [PATCH 2/3] Formatted with black

---
 scrap/utils/export.py      | 30 +++++++++++++++++++++++-------
 scrap/utils/runner.py      |  2 +-
 scrap/utils/spreadsheet.py |  1 +
 3 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/scrap/utils/export.py b/scrap/utils/export.py
index 5276ae0..014da59 100644
--- a/scrap/utils/export.py
+++ b/scrap/utils/export.py
@@ -5,19 +5,35 @@
 from scrap.utils.types import ScrapResult, ScrapBasicArgument
 
 
-def export_results_to_json(results: dict[int, ScrapResult], output_path: str, current_time: str):
+def export_results_to_json(
+    results: dict[int, ScrapResult], output_path: str, current_time: str
+):
     os.makedirs(output_path, exist_ok=True)
-    results = {k: [asdict(councilor) for councilor in v.councilors] for k, v in results.items()}
+    results = {
+        k: [asdict(councilor) for councilor in v.councilors] for k, v in results.items()
+    }
 
-    with open(os.path.join(output_path, f"scraping_result_{current_time}.json"), "w", encoding="utf-8") as f:
+    with open(
+        os.path.join(output_path, f"scraping_result_{current_time}.json"),
+        "w",
+        encoding="utf-8",
+    ) as f:
         json.dump(results, f, ensure_ascii=False, indent=4)
 
 
-def export_results_to_txt(results: dict[int, ScrapResult], output_path: str, current_time: str):
+def export_results_to_txt(
+    results: dict[int, ScrapResult], output_path: str, current_time: str
+):
     os.makedirs(output_path, exist_ok=True)
-    results = {k: [asdict(councilor) for councilor in v.councilors] for k, v in results.items()}
+    results = {
+        k: [asdict(councilor) for councilor in v.councilors] for k, v in results.items()
+    }
 
-    with open(os.path.join(output_path, f"scraping_result_{current_time}.txt"), "w", encoding="utf-8") as f:
+    with open(
+        os.path.join(output_path, f"scraping_result_{current_time}.txt"),
+        "w",
+        encoding="utf-8",
+    ) as f:
         for cid, councilors in results.items():
             councilors = "\n".join([c.to_txt() for c in councilors])
-            f.write(f"| {cid} | {councilors}\n")
\ No newline at end of file
+            f.write(f"| {cid} | {councilors}\n")
diff --git a/scrap/utils/runner.py b/scrap/utils/runner.py
index b0fb630..fc0955d 100644
--- a/scrap/utils/runner.py
+++ b/scrap/utils/runner.py
@@ -108,7 +108,7 @@ def run_single_council(self, n: int) -> ScrapResult:
         else:
             if council_args is None:
                 raise ValueError(f"{n}번 의회에 대한 ScrapBasicArgument가 없습니다.")
-        
+
             if self.is_selenium_basic(n):
                 result = sel_scrap_basic(council_url, n, council_args)
             else:
diff --git a/scrap/utils/spreadsheet.py b/scrap/utils/spreadsheet.py
index cb0d348..7ec208d 100644
--- a/scrap/utils/spreadsheet.py
+++ b/scrap/utils/spreadsheet.py
@@ -11,6 +11,7 @@
 from scrap.local_councils.daegu import *
 from scrap.local_councils.incheon import *
 from scrap.local_councils.gwangju import *
+
 # from scrap.local_councils.daejeon import *
 from scrap.local_councils.ulsan import *
 from scrap.local_councils.gyeonggi import *

From f2a204e2e4304155e3c83f753a9817b8b4f88cd0 Mon Sep 17 00:00:00 2001
From: keonly <keonl@kaist.ac.kr>
Date: Wed, 22 Nov 2023 19:25:54 +0900
Subject: [PATCH 3/3] =?UTF-8?q?feat(scrap):=20=EC=8A=A4=ED=81=AC=EB=9E=A9?=
 =?UTF-8?q?=20=EA=B2=B0=EA=B3=BC=20MongoDB=20=EC=97=B0=EB=8F=99=20?=
 =?UTF-8?q?=EB=A1=9C=EC=A7=81=20=EC=B6=94=EA=B0=80?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 db/types.py             |   6 +-
 scrap/utils/database.py | 147 +++++++++++++++++++++++-----------------
 2 files changed, 86 insertions(+), 67 deletions(-)

diff --git a/db/types.py b/db/types.py
index 4b04658..7e22fdd 100644
--- a/db/types.py
+++ b/db/types.py
@@ -9,9 +9,9 @@ class CouncilType(str, Enum):
     의회의 종류를 나타내는 열거형입니다.
     """
 
-    LOCAL_COUNCIL = "local_council"
-    NATIONAL_COUNCIL = "national_council"
-    METROPOLITAN_COUNCIL = "metropolitan_council"
+    LOCAL_COUNCIL = "local_councilor"
+    NATIONAL_COUNCIL = "national_councilor"
+    METROPOLITAN_COUNCIL = "metropolitan_councilor"
     LOCAL_LEADER = "local_leader"
     METRO_LEADER = "metro_leader"
     """
diff --git a/scrap/utils/database.py b/scrap/utils/database.py
index 313c785..74c3526 100644
--- a/scrap/utils/database.py
+++ b/scrap/utils/database.py
@@ -1,3 +1,5 @@
+from typing import List
+
 from db.client import client
 from configurations.secrets import MongoDBSecrets
 
@@ -7,73 +9,90 @@
 import json
 
 
-# Note: MongoDB는 데이터베이스가 존재하지 않으면 자동으로 생성합니다.
-# MongoDB 데이터베이스는 하나 이상의 컬렉션으로 구성됩니다.
-# 컬렉션은 하나 이상의 문서로 구성됩니다.
-
-
-def save_to_database(record: ScrapResult):
+def save_to_database(record: ScrapResult) -> bool:
     """
     지방의회 크롤링 결과를 데이터베이스에 저장합니다.
-    각 의회의 기존 데이터는 덮어 씌워집니다.
-    예시는 scrap/utils/database.py를 참조해주세요.
     :param record: 지방의회 크롤링 결과
     :return: 저장 성공 여부를 불리언 값으로 반환합니다.
     """
-    try:
-        # MongoDB는 JSON을 저장할 수 있습니다.
-        # JSON 형태로 변환한 후, MongoDB에 저장합니다.
-        # serialized_record = json.dumps(dataclasses.asdict(record), ensure_ascii=False)
-        db = client[str(MongoDBSecrets.database_name)]
-        collection = db[str(record.council_type)]
-        result = collection.find_one(
-            {"council_id": record.council_id},
+    db = client[str(MongoDBSecrets.database_name)]
+    collection = db[str(record.council_type)]
+
+    cid = record.council_id
+
+    new_councilors = check_new_councilors(collection, record)
+    resigned_councilors = check_resigned_councilors(collection, record)
+    other_councilors = [
+        councilor
+        for councilor in record.councilors
+        if councilor not in new_councilors
+        and councilor.name
+        not in [councilor["name"] for councilor in resigned_councilors]
+    ]
+
+    # TODO: DB에 없던 새로운 의원 핸들링
+    update_councilors(collection, cid, other_councilors)
+    remove_councilors(collection, resigned_councilors)
+
+    return True
+
+
+def check_new_councilors(collection, record: ScrapResult) -> list[Councilor]:
+    """
+    DB에 없던 새로운 의원을 찾아 반환합니다.
+    :param collection: MongoDB 컬렉션
+    :param record: 지방의회 크롤링 결과
+    :return: 새로운 의원 목록
+    """
+    new_councilors = []
+
+    for councilor in record.councilors:
+        if (
+            collection.find_one({"localId": record.council_id, "name": councilor.name})
+            is None
+        ):
+            new_councilors.append(councilor)
+
+    return new_councilors
+
+
+def check_resigned_councilors(collection, record: ScrapResult) -> list[dict]:
+    """
+    DB에 있었으나 사퇴한 의원을 찾아 반환합니다.
+    :param collection: MongoDB 컬렉션
+    :param record: 지방의회 크롤링 결과
+    :return: 사퇴한 의원 목록
+    """
+    resigned_councilors = []
+
+    for councilor in collection.find({"localId": record.council_id}):
+        if councilor["name"] not in [councilor.name for councilor in record.councilors]:
+            resigned_councilors.append(councilor)
+
+    return resigned_councilors
+
+
+def update_councilors(collection, cid: int, councilors: List[Councilor]) -> None:
+    """
+    신규/사퇴 의원이 아닌 의원 정보를 업데이트합니다.
+    :param collection: MongoDB 컬렉션
+    :param councilors: 업데이트할 의원 리스트
+    """
+    for councilor in councilors:
+        collection.update_one(
+            {"localId": cid, "name": councilor.name},
+            {"$set": asdict(councilor)},
+            upsert=False,
+        )
+
+
+def remove_councilors(collection, councilors: List[Councilor]) -> None:
+    """
+    사퇴한 의원을 데이터베이스에서 제거합니다.
+    :param collection: MongoDB 컬렉션
+    :param councilors: 제거할 의원 리스트
+    """
+    for councilor in councilors:
+        collection.delete_one(
+            {"localId": councilor["localId"], "name": councilor["name"]}
         )
-        if result is not None:
-            before_councilors = result["councilors"]  # List[dict]
-            updated_councilors = []
-            updated_names = set()
-
-            name_data_map_for_update = {d.name: asdict(d) for d in record.councilors}
-
-            for d in before_councilors:
-                if d["name"] in name_data_map_for_update:
-                    d.update(
-                        {
-                            k: v
-                            for k, v in name_data_map_for_update[d["name"]].items()
-                            if k in d
-                        }
-                    )
-                    updated_names.add(d["name"])
-                updated_councilors.append(d)
-
-            for d in record.councilors:
-                if d.name not in updated_names:
-                    updated_councilors.append(asdict(d))
-
-            collection.find_one_and_update(
-                {"council_id": record.council_id},
-                {"$set": {"councilors": updated_councilors}},
-                upsert=True,
-            )
-        else:
-            return False
-
-        return True
-    except Exception as e:
-        print(e)
-        return False
-
-
-if __name__ == "__main__":
-    test_record = ScrapResult(
-        council_id="test-test",
-        council_type=CouncilType.LOCAL_COUNCIL,
-        councilors=[
-            Councilor(name="김철수", jdName="국민의힘"),
-            Councilor(name="김영희", jdName="Birthday Party"),
-            Councilor(name="테스트", jdName="테스트당"),
-        ],
-    )
-    print(save_to_database(test_record))