From 9b1aef7f556a205220627e4258c64ebbd56130cb Mon Sep 17 00:00:00 2001 From: lordfriend Date: Sat, 9 Jul 2016 17:07:31 +0800 Subject: [PATCH] add InfoScanner referred at #10 remove encode path in service.common.generate_video_link --- CHANGELOG.md | 12 ++++ README.md | 4 ++ Scheduler.py | 4 ++ config/config-sample.yml | 3 + service/common.py | 2 +- taskrunner/InfoScanner.py | 115 ++++++++++++++++++++++++++++++++++++++ taskrunner/__init__.py | 0 7 files changed, 139 insertions(+), 1 deletion(-) create mode 100644 taskrunner/InfoScanner.py create mode 100644 taskrunner/__init__.py diff --git a/CHANGELOG.md b/CHANGELOG.md index b0ca2e9..9eac40d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,15 @@ +#0.6.0-alpha + +##Features Add + +- InfoScanner for scanning missing information (name, name_cn, duration) and auto fill those information from bangumi.tv, note that the name_cn is not always filled. + +##Bug fix + +- rollback session when db connection lost + +NOTE: this release require use to update their config.yml. + #0.4.0-alpha ##Features Add diff --git a/README.md b/README.md index 5849ba3..2ac4a62 100644 --- a/README.md +++ b/README.md @@ -57,6 +57,10 @@ $ python tools.py --user-add admin 1234 # admin is username 1234 is password $ python tools.py --user-promote admin 3 # admin is username 3 is the level, currently means super user ``` +### set your server locale + +To avoid some unicode issues, it is recommended to set locale of your server + ### Docker you can also set up the development environment with [Docker](https://www.docker.com/) diff --git a/Scheduler.py b/Scheduler.py index 5441eaf..92c9fec 100644 --- a/Scheduler.py +++ b/Scheduler.py @@ -40,6 +40,9 @@ from sqlalchemy.sql import func import traceback +from taskrunner.InfoScanner import info_scanner + + class Scheduler: def __init__(self): @@ -173,6 +176,7 @@ def scan_bangumi(self): def on_connected(result): # logger.info(result) scheduler.start() + info_scanner.start() def on_connect_fail(result): logger.error(result) diff --git a/config/config-sample.yml b/config/config-sample.yml index 1ed01c8..2528aa8 100644 --- a/config/config-sample.yml +++ b/config/config-sample.yml @@ -13,6 +13,9 @@ deluge: task: interval: 15 # unit minute + info_scanner: + scan_time: # A info scanner will scheduled for once a day at this particular time. it is not an accurate time, default is 23:00 + scan_time_format: # default is '%H:%M' see https://docs.python.org/2/library/datetime.html#strftime-strptime-behavior download: location: / # change this location diff --git a/service/common.py b/service/common.py index 3dc2b98..a10a5d8 100644 --- a/service/common.py +++ b/service/common.py @@ -37,7 +37,7 @@ def generate_cover_link(self, bangumi): return cover_url def generate_video_link(self, bangumi_id, path): - video_link = '/video/{0}/{1}'.format(bangumi_id, path.encode('utf-8')) + video_link = '/video/{0}/{1}'.format(bangumi_id, path) if self.video_domain is not None: video_link = self.video_domain + video_link return video_link diff --git a/taskrunner/InfoScanner.py b/taskrunner/InfoScanner.py new file mode 100644 index 0000000..6ce320c --- /dev/null +++ b/taskrunner/InfoScanner.py @@ -0,0 +1,115 @@ +from utils.SessionManager import SessionManager +from domain.Bangumi import Bangumi +from domain.Episode import Episode +from sqlalchemy.sql.expression import or_ +from sqlalchemy import exc +from twisted.internet import threads +from twisted.internet.task import LoopingCall +from datetime import datetime +import time +import yaml +import requests +import logging +import traceback + +logger = logging.getLogger(__name__) + +class InfoScanner: + + def __init__(self): + fr = open('./config/config.yml', 'r') + config = yaml.load(fr) + if 'info_scanner' in config['task']: + scan_time = '16:00' + scan_time_format = '%H:%M' + if 'scan_time' in config['task']['info_scanner'] and config['task']['info_scanner']['scan_time'] is not None: + scan_time = config['task']['info_scanner']['scan_time'] + + if 'scan_time_format' in config['task']['info_scanner'] and config['task']['info_scanner']['scan_time_format'] is not None: + scan_time_format = config['task']['info_scanner']['scan_time_format'] + + self.scan_time = datetime.strptime(scan_time, scan_time_format) + + self.scanner_running = False + self.last_scan_date = None + + def start(self): + lc = LoopingCall(self.check_time) + lc.start(60) + + def check_time(self): + if self.scanner_running: + return + current_time = datetime.now() + if self.last_scan_date is not None and self.last_scan_date == current_time.date(): + return + if (not self.scanner_running) and (self.scan_time.hour == current_time.hour): + self.scanner_running = True + self.scan_episode() + self.last_scan_date = current_time.date() + self.scanner_running = False + + def get_bgm_info(self, bgm_id): + bangumi_tv_url_base = 'http://api.bgm.tv/subject/' + bangumi_tv_url_param = '?responseGroup=large' + bangumi_tv_url = bangumi_tv_url_base + str(bgm_id) + bangumi_tv_url_param + r = requests.get(bangumi_tv_url, timeout=60) + if r.status_code < 400: + return (r.status_code, r.json()) + else: + return (r.status_code, {}) + + + def __scan_episode_in_thread(self): + logger.info('start scan info of episode') + session = SessionManager.Session + try: + # we don't scan the episode those name_cn is missing + # because many of them don't have name_cn + result = session.query(Episode, Bangumi).\ + join(Bangumi).\ + filter(or_(Episode.name == '', Episode.duration == '')) + + bgm_episode_dict = {} + + for episode, bangumi in result: + if not (bangumi.bgm_id in bgm_episode_dict): + # if this is not the first call for get_bgm_info, a delay should be added to prevent access the bgm api + # too frequently + if bgm_episode_dict: + time.sleep(20) + logger.info('try to get info for bangumi of %s' % str(bangumi.bgm_id)) + (status_code, bangumi_info) = self.get_bgm_info(bangumi.bgm_id) + if status_code < 400: + bgm_episode_dict[bangumi.bgm_id] = bangumi_info + + if not (bangumi.bgm_id in bgm_episode_dict): + continue + + bangumi_info = bgm_episode_dict[bangumi.bgm_id] + + for eps in bangumi_info['eps']: + if eps['id'] == episode.bgm_eps_id: + if episode.name == '': + episode.name = eps['name'] + if episode.name_cn == '': + episode.name_cn = eps['name_cn'] + if episode.duration == '': + episode.duration = eps['duration'] + break + + session.commit() + logger.info('scan finished, will scan at next day') + except exc.DBAPIError as db_error: + logger.error(db_error) + # if connection is invalid rollback the session + if db_error.connection_invalidated: + session.rollback() + except Exception as error: + logger.error(error) + traceback.print_exc() + + def scan_episode(self): + threads.deferToThread(self.__scan_episode_in_thread) + +info_scanner = InfoScanner() \ No newline at end of file diff --git a/taskrunner/__init__.py b/taskrunner/__init__.py new file mode 100644 index 0000000..e69de29