Skip to content

Commit

Permalink
Merge pull request #72 from NewWays-TechForImpactKAIST/feat-scraping-…
Browse files Browse the repository at this point in the history
…webhook

feat: send webhooks to slack app when scraping
  • Loading branch information
Re-st authored Nov 27, 2023
2 parents 72a535d + 537d797 commit 8473968
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 20 deletions.
8 changes: 8 additions & 0 deletions configurations/secrets.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,11 @@ class EmailSecrets:
sender_email = str(os.getenv("SCRAP_SENDER_EMAIL") or "")
receiver_email = str(os.getenv("SCRAP_RECEIVER_EMAIL") or "")
password = str(os.getenv("SCRAP_EMAIL_PASSWORD") or "")


class WebhookSecrets:
"""
스크랩 결과 웹훅 전송에 필요한 키를 정의합니다.
"""

webhook_url = str(os.getenv("WEBHOOK_URL") or "")
61 changes: 41 additions & 20 deletions scrap/utils/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
from tqdm import tqdm
from abc import *

from configurations.secrets import WebhookSecrets

from scrap.utils.export import export_results_to_json, export_results_to_txt
from scrap.utils.database import save_to_database
from scrap.utils.types import ScrapResult, ScrapBasicArgument
Expand All @@ -30,6 +32,7 @@
from scrap.local_councils import *
from scrap.metropolitan_council import *
from scrap.national_council import *
from requests import post
from scrap.group_head import *
from requests.exceptions import Timeout

Expand Down Expand Up @@ -65,6 +68,16 @@ def handle_errors(self, cid: int | str, error):
self.parseerror_count += 1
logging.error(f"| {cid} | 오류: {error}")

def send_webhook(self, message: str) -> None:
webhook_url = WebhookSecrets.webhook_url
payload = {"text": message}

response = requests.post(webhook_url, json=payload)
if response.status_code != 200:
raise ValueError(
f"Request to slack returned an error {response.status_code}, the response is:\n{response.text}"
)

@abstractmethod
def run(self) -> Dict[str, ScrapResult]:
pass
Expand Down Expand Up @@ -125,7 +138,7 @@ def run_single(self, cid: int) -> ScrapResult:

return result

def run(self, cids: Iterable[int]) -> Dict[int, ScrapResult]:
def run(self, cids: Iterable[int], enable_webhook: bool) -> Dict[int, ScrapResult]:
scrape_results = dict()

for cid in tqdm(cids):
Expand All @@ -137,9 +150,10 @@ def run(self, cids: Iterable[int]) -> Dict[int, ScrapResult]:
except Exception as e:
self.handle_errors(cid, e)

logging.info(
f"| 총 실행 횟수: {len(cids)} | 에러: {list(self.error_log.keys())}, 총 {len(self.error_log)}회 | 그 중 정보 없음 횟수: {self.parseerror_count} | 타임아웃 횟수: {self.timeout_count} |"
)
result_summary = f"| 총 실행 횟수: {len(cids)} | 에러: {list(self.error_log.keys())}, 총 {len(self.error_log)}회 | 그 중 정보 없음 횟수: {self.parseerror_count} | 타임아웃 횟수: {self.timeout_count} |"
logging.info(result_summary)
if enable_webhook:
self.send_webhook("지방의회 스크랩 결과\n" + result_summary)

return scrape_results

Expand All @@ -157,7 +171,7 @@ def run_single(self, cid: int) -> ScrapResult:
raise NotImplementedError(f"함수를 찾을 수 없습니다: {function_name}")
return result

def run(self, cids: Iterable[int]) -> Dict[int, ScrapResult]:
def run(self, cids: Iterable[int], enable_webhook: bool) -> Dict[int, ScrapResult]:
scrape_results = dict()

for cid in tqdm(cids):
Expand All @@ -169,9 +183,10 @@ def run(self, cids: Iterable[int]) -> Dict[int, ScrapResult]:
except Exception as e:
self.handle_errors(cid, e)

logging.info(
f"| 총 실행 횟수: {len(cids)} | 에러: {list(self.error_log.keys())}, 총 {len(self.error_log)}회 | 그 중 정보 없음 횟수: {self.parseerror_count} | 타임아웃 횟수: {self.timeout_count} |"
)
result_summary = f"| 총 실행 횟수: {len(cids)} | 에러: {list(self.error_log.keys())}, 총 {len(self.error_log)}회 | 그 중 정보 없음 횟수: {self.parseerror_count} | 타임아웃 횟수: {self.timeout_count} |"
logging.info(result_summary)
if enable_webhook:
self.send_webhook("광역의회 스크랩 결과\n" + result_summary)

return scrape_results

Expand Down Expand Up @@ -234,23 +249,24 @@ def main(args: Dict[str, str]) -> None:
runner = ScraperFactory(where, runner_kwargs).create_scraper()

cids_to_run = parse_cids(args.get("cids"), where)
enable_webhook = args.get("disable-webhook")
if cids_to_run:
results = runner.run(cids_to_run)
results = runner.run(cids_to_run, enable_webhook)
else:
results = runner.run()

if args.get("update_mongo"):
if args.get("update-mongo"):
for result in results.values():
save_to_database(result)

if args.get("output_store"):
if args.get("output_format") == "json":
export_results_to_json(results, args.get("output_path"), current_time)
elif args.get("output_format") == "txt":
export_results_to_txt(results, args.get("output_path"), current_time)
if args.get("output-store"):
if args.get("output-format") == "json":
export_results_to_json(results, args.get("output-path"), current_time)
elif args.get("output-format") == "txt":
export_results_to_txt(results, args.get("output-path"), current_time)


def parse_cids(cids_str: Optional[str], where: str) -> Optional[List[int]]:
def parse_cids(cids_str: Optional[str], where: str) -> Optional[Iterable[int]]:
if cids_str and where in ["local", "metro"]:
return [int(cid.strip()) for cid in cids_str.split(",")]
elif where == "metro":
Expand Down Expand Up @@ -280,18 +296,18 @@ def parse_cids(cids_str: Optional[str], where: str) -> Optional[List[int]]:
)
parser.add_argument("-l", "--log_path", help="로그 파일 경로", default="logs")
parser.add_argument(
"-m", "--update_mongo", help="스크랩 결과를 MongoDB에 업데이트", action="store_true"
"-m", "--update-mongo", help="스크랩 결과를 MongoDB에 업데이트", action="store_true"
)
parser.add_argument(
"-o", "--output_store", help="스크랩 결과를 로컬에 저장", action="store_true"
"-o", "--output-store", help="스크랩 결과를 로컬에 저장", action="store_true"
)
parser.add_argument(
"--output_format",
"--output-format",
help="스크랩 결과 저장 형식 ('json', 'txt')",
choices=["json", "txt"],
default="json",
)
parser.add_argument("--output_path", help="스크랩 결과 저장 경로", default="output")
parser.add_argument("--output-path", help="스크랩 결과 저장 경로", default="output")
parser.add_argument(
"-c", "--cids", help="스크랩할 의회 ID 목록 (','로 구분, 지방/광역의회만 해당)", default=None
)
Expand All @@ -305,6 +321,11 @@ def parse_cids(cids_str: Optional[str], where: str) -> Optional[List[int]]:
help="지방의회 스크랩 시 사용할 council_args JSON 파일 경로",
default="scrap/utils/scrap_args.json",
)
parser.add_argument(
"--disable-webhook",
help="스크랩 결과 웹훅 전송 비활성화",
action="store_false",
)
args = vars(parser.parse_args())

main(args)

0 comments on commit 8473968

Please sign in to comment.