Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat: manual addition of non-scrappable councils #76

Merged
merged 6 commits into from
Nov 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .env.example
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# required environment variables
MONGO_CONNECTION_URI=mongodb://localhost:27017
OPEN_DATA_SERICE_KEY=some_key
OPEN_DATA_SERVICE_KEY=some_key
MONGO_DATABASE=council
42 changes: 25 additions & 17 deletions API/candidate.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,28 +68,36 @@ def fetch_all_data(
parser.add_argument(
"--drop-columns",
type=str,
default="num,huboid,hanjaName,status",
default="num,huboid,hanjaName,status,gihoSangse",
help="제거할 열 이름을 ','로 구분하여 입력하세요",
)
parser.add_argument(
"--save-method",
type=str,
# TODO: Add MongoDB support
# choices=["excel", "mongo"],
choices=["excel"],
default="excel",
help="데이터 저장 방식: 'excel' (현재는 excel만 지원)",
"-m", "--update-mongo", help="API 요청 결과를 MongoDB에 업데이트", action="store_true"
)
parser.add_argument(
"-o", "--output-store", help="API 요청 결과를 로컬에 저장", action="store_true"
)
parser.add_argument("--output-path", help="API 요청 결과 저장 경로", default="output")

args = vars(parser.parse_args())
print(args)
sgIds = args.get("sgIds").split(",")
if args.get("drop_columns"):
drop_columns = args.get("drop_columns").split(",")
else:
drop_columns = []
print(drop_columns)

data_list = fetch_all_data(
sgIds, args.get("sgTypecodes"), drop_columns=drop_columns
)

args = parser.parse_args()
sgIds = args.sgIds.split(",")
drop_columns = args.drop_columns.split(",") if args.drop_columns else []

data_list = fetch_all_data(sgIds, args.sgTypecodes, drop_columns=drop_columns)
for sgTypecode in args.sgTypecodes.split(","):
for sgTypecode in args.get("sgTypecodes").split(","):
if sgTypecode not in SG_TYPECODE:
raise ValueError(f"Invalid sgTypecode: {sgTypecode}")
if args.save_method == "excel":
save_to_excel(data_list, sgTypecode, is_elected=False)
elif args.save_method == "mongo":

if args.get("update_mongo"):
save_to_mongo(data_list, sgTypecode, CANDIDATE_TYPECODE_TYPE[sgTypecode])

if args.get("output_store"):
save_to_excel(data_list, sgTypecode, is_elected=False)
34 changes: 21 additions & 13 deletions API/elected.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,22 +73,30 @@ def fetch_all_data(
help="제거할 열 이름을 ','로 구분하여 입력하세요",
)
parser.add_argument(
"--save-method",
type=str,
choices=["excel", "mongo"],
default="excel",
help="데이터 저장 방식: 'excel', 'mongo'",
"-m", "--update-mongo", help="API 요청 결과를 MongoDB에 업데이트", action="store_true"
)
parser.add_argument(
"-o", "--output-store", help="API 요청 결과를 로컬에 저장", action="store_true"
)
parser.add_argument("--output-path", help="API 요청 결과 저장 경로", default="output")

args = parser.parse_args()
sgIds = args.sgIds.split(",")
drop_columns = args.drop_columns.split(",") if args.drop_columns else []
args = vars(parser.parse_args())
sgIds = args.get("sgIds").split(",")
if args.get("drop_columns"):
drop_columns = args.get("drop_columns").split(",")
else:
drop_columns = []

data_list = fetch_all_data(
sgIds, args.get("sgTypecodes"), drop_columns=drop_columns
)

data_list = fetch_all_data(sgIds, args.sgTypecodes, drop_columns=drop_columns)
for sgTypecode in args.sgTypecodes.split(","):
for sgTypecode in args.get("sgTypecodes").split(","):
if sgTypecode not in SG_TYPECODE:
raise ValueError(f"Invalid sgTypecode: {sgTypecode}")
if args.save_method == "excel":
save_to_excel(data_list, sgTypecode, is_elected=True)
elif args.save_method == "mongo":

if args.get("update_mongo"):
save_to_mongo(data_list, sgTypecode, ELECTED_TYPECODE_TYPE[sgTypecode])

if args.get("output_store"):
save_to_excel(data_list, sgTypecode, is_elected=True)
2 changes: 1 addition & 1 deletion API/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def save_to_mongo(data: List[dict], sgTypecode: str, where: str) -> None:
upsert=True,
)
else:
raise NotImplementedError("현재 구시군의회의원(6) 및 기초의원비례대표(9)만 구현되어 있습니다.")
raise NotImplementedError(f"아직 구현되지 않은 sgTypecode: {sgTypecode}")

print(f"데이터를 성공적으로 MongoDB '{main_collection.name}' 컬렉션에 저장하였습니다.")

Expand Down
2 changes: 1 addition & 1 deletion configurations/secrets.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class OpenDataPortalSecrets:
공공데이터포털(data.go.kr) API 호출에 필요한 서비스 키를 정의합니다.
"""

service_key = str(os.getenv("OPEN_DATA_SERICE_KEY") or "")
service_key = str(os.getenv("OPEN_DATA_SERVICE_KEY") or "")


class EmailSecrets:
Expand Down
21 changes: 20 additions & 1 deletion scrap/utils/export.py → scrap/utils/data_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
from dataclasses import asdict
from typing import Dict

from scrap.utils.types import ScrapResult, ScrapBasicArgument
from scrap.utils.types import ScrapResult
from db.types import Councilor


def export_results_to_json(
Expand Down Expand Up @@ -38,3 +39,21 @@ def export_results_to_txt(
for cid, councilors in results.items():
councilors = "\n".join([c.to_txt() for c in councilors])
f.write(f"| {cid} | {councilors}\n")


def import_results_from_json(
input_path: str, council_type: str
) -> Dict[str, ScrapResult]:
with open(input_path, "r", encoding="utf-8") as f:
results = json.load(f)

results = {
k: ScrapResult(
council_id=k,
council_type=council_type,
councilors=[Councilor(**c) for c in v],
)
for k, v in results.items()
}

return results
45 changes: 31 additions & 14 deletions scrap/utils/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,11 @@

from configurations.secrets import WebhookSecrets

from scrap.utils.export import export_results_to_json, export_results_to_txt
from scrap.utils.data_io import (
export_results_to_json,
export_results_to_txt,
import_results_from_json,
)
from scrap.utils.database import save_to_database
from scrap.utils.types import ScrapResult, ScrapBasicArgument
from scrap.utils.spreadsheet import read_record_from_spreadsheet
Expand Down Expand Up @@ -250,26 +254,36 @@ def main(args: Dict[str, str]) -> None:

where = args.get("where")
current_time = datetime.datetime.now().strftime("%Y%m%d_%H%M")
runner_kwargs = args | {"current_time": current_time}

runner = ScraperFactory(where, runner_kwargs).create_scraper()
json_import_path = args.get("import_from_json")
if json_import_path:
if not args.get("update_mongo"):
raise ValueError(
"JSON 파일에서 가져온 결과를 MongoDB에 업데이트하려면 --update-mongo (-m) 옵션을 사용해야 합니다."
)

cids_to_run = parse_cids(args.get("cids"), where)
enable_webhook = args.get("disable-webhook")
if cids_to_run:
results = runner.run(cids_to_run, enable_webhook)
print("JSON 파일에서 결과를 가져옵니다. 다른 스크랩 관련 옵션은 무시됩니다.")
results = import_results_from_json(json_import_path, where)
else:
results = runner.run()
runner_kwargs = args | {"current_time": current_time}
runner = ScraperFactory(where, runner_kwargs).create_scraper()

cids_to_run = parse_cids(args.get("cids"), where)
enable_webhook = args.get("disable_webhook")
if cids_to_run:
results = runner.run(cids_to_run, enable_webhook)
else:
results = runner.run()

if args.get("update-mongo"):
if args.get("update_mongo"):
for result in results.values():
save_to_database(result)

if args.get("output-store"):
if args.get("output-format") == "json":
export_results_to_json(results, args.get("output-path"), current_time)
elif args.get("output-format") == "txt":
export_results_to_txt(results, args.get("output-path"), current_time)
if args.get("output_store"):
if args.get("output_format") == "json":
export_results_to_json(results, args.get("output_path"), current_time)
elif args.get("output_format") == "txt":
export_results_to_txt(results, args.get("output_path"), current_time)


def parse_cids(cids_str: Optional[str], where: str) -> Optional[Iterable[int]]:
Expand All @@ -294,6 +308,9 @@ def parse_cids(cids_str: Optional[str], where: str) -> Optional[Iterable[int]]:
choices=["local", "metro", "national", "leaders"],
default="local",
)
parser.add_argument(
"--import-from-json", help="경로에서 JSON 파일을 읽어와 결과를 받아옴", default=None
)
parser.add_argument(
"--data-source",
help="사용할 데이터 소스 ('google_sheets', 'mongodb')",
Expand Down