Skip to content

Commit

Permalink
Merge pull request #85 from my8100/default_logging_INFO
Browse files Browse the repository at this point in the history
 Bump version: 1.3.0 to 1.4.0
  • Loading branch information
my8100 authored Aug 16, 2019
2 parents 6033e7e + c688ecf commit 2df1c3d
Show file tree
Hide file tree
Showing 48 changed files with 71 additions and 69 deletions.
20 changes: 4 additions & 16 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,14 @@ jobs:
- run:
name: Setup DATA_PATH
command: |
printf "\nDATA_PATH = '"$DATA_PATH"'\n" >> scrapydweb_settings_v9.py
echo $DATA_PATH
- when:
condition: <<parameters.use-sqlite>>
steps:
- run:
name: Set DATABASE_URL to sqlite
command: |
printf "\nDATABASE_URL = '"$DATABASE_URL"'\n" >> scrapydweb_settings_v9.py
echo $DATABASE_URL
- when:
condition: <<parameters.use-postgresql>>
steps:
Expand All @@ -87,11 +87,6 @@ jobs:
# createdb: could not connect to database template1: FATAL: role "circleci" does not exist
# sudo apt install -y postgresql-client
# createdb -h localhost scrapydweb_apscheduler -O circleci
- run:
name: Set DATABASE_URL to postgresql
command: |
# postgres://[email protected]:5432
printf "\nDATABASE_URL = '"$DATABASE_URL"'\n" >> scrapydweb_settings_v9.py
- when:
condition: <<parameters.use-mysql>>
steps:
Expand All @@ -117,11 +112,6 @@ jobs:
# mysql -h 127.0.0.1 -u root -prootpw -e "create database scrapydweb_timertasks"
# mysql -h 127.0.0.1 -u root -prootpw -e "create database scrapydweb_metadata"
# mysql -h 127.0.0.1 -u root -prootpw -e "create database scrapydweb_jobs"
- run:
name: Set DATABASE_URL to mysql
command: |
# mysql://user:[email protected]:3306
printf "\nDATABASE_URL = '"$DATABASE_URL"'\n" >> scrapydweb_settings_v9.py
- run:
name: Install dependencies
Expand Down Expand Up @@ -168,10 +158,8 @@ jobs:
- run:
name: Generate report
command: |
touch scrapydweb_settings_v9.py
cat scrapydweb_settings_v9.py
echo $DATA_PATH
echo $DATABASE_URL
echo DATA_PATH: $DATA_PATH
echo DATABASE_URL: $DATABASE_URL
. venv/bin/activate
coverage report
coverage html
Expand Down
10 changes: 10 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
Release History
===============
1.4.0 (2019-08-16)
------------------
- New Features
- Add API for sending text or alert via Slack, Telegram, or Email
- Improvements
- UI improvements on sidebar and multinode buttons
- Others
- Update config file to scrapydweb_settings_v10.py


[1.3.0](https://github.com/my8100/scrapydweb/issues?q=is%3Aclosed+milestone%3A1.3.0) (2019-08-04)
------------------
- New Features
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
- :package: **Auto packaging**
- :male_detective: **Integrated with [:link: *LogParser*](https://github.com/my8100/logparser)**
- :alarm_clock: **Timer tasks**
- :e-mail: **Email notice**
- :e-mail: **Monitor & Alert**
- :iphone: Mobile UI
- :closed_lock_with_key: Basic auth for web UI

Expand Down
2 changes: 1 addition & 1 deletion README_CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
- :package: **自动打包项目**
- :male_detective: **集成 [:link: *LogParser*](https://github.com/my8100/logparser)**
- :alarm_clock: **定时器任务**
- :e-mail: **邮件通知**
- :e-mail: **监控和警报**
- :iphone: 移动端 UI
- :closed_lock_with_key: web UI 支持基本身份认证

Expand Down
2 changes: 1 addition & 1 deletion scrapydweb/__version__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# coding: utf-8

__title__ = 'scrapydweb'
__version__ = '1.3.0'
__version__ = '1.4.0'
__author__ = 'my8100'
__author_email__ = '[email protected]'
__url__ = 'https://github.com/my8100/scrapydweb'
Expand Down
6 changes: 3 additions & 3 deletions scrapydweb/default_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,13 +338,13 @@
# Note that use_reloader is set to False in run.py
DEBUG = False

# The default is False, set it to True to change the logging level from WARNING to DEBUG
# The default is False, set it to True to change the logging level from INFO to DEBUG
# for getting more information about how ScrapydWeb works, especially while debugging.
VERBOSE = False

# The default is '', which means saving all program data in the Python directory.
# e.g. 'C:/Users/username/scrapydweb_data' or '/home/username/scrapydweb_data'
DATA_PATH = ''
DATA_PATH = os.environ.get('DATA_PATH', '')

# The default is '', which means saving data of Jobs and Timer Tasks in DATA_PATH using SQLite.
# The data could be also saved in MySQL or PostgreSQL backend in order to improve concurrency.
Expand All @@ -355,4 +355,4 @@
# 'postgres://username:[email protected]:5432'
# 'sqlite:///C:/Users/username'
# 'sqlite:////home/username'
DATABASE_URL = ''
DATABASE_URL = os.environ.get('DATABASE_URL', '')
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes
File renamed without changes
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
2 changes: 1 addition & 1 deletion scrapydweb/templates/base.html
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ <h3>System</h3>
</ul>
</div>
<div class="github">
<a id="scrapydweb_version" class="request" href="https://pypi.org/project/scrapydweb/" target="_blank">v{{ SCRAPYDWEB_VERSION }} DEV</a>
<a id="scrapydweb_version" class="request" href="https://pypi.org/project/scrapydweb/" target="_blank">v{{ SCRAPYDWEB_VERSION }}</a>
<a class="github-button" href="{{ GITHUB_URL.replace('/scrapydweb', '') }}" aria-label="@my8100 on GitHub">GitHub</a>
<div>
<!-- <ul id="links"> -->
Expand Down
1 change: 1 addition & 0 deletions scrapydweb/utils/check_app_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ def check_assert(key, default, is_instance, allow_zero=True, non_empty=False, co

check_assert('EMAIL_PASSWORD', '', str)
if config.get('EMAIL_PASSWORD', ''):
logger.debug("Found EMAIL_PASSWORD, checking email settings")
check_assert('EMAIL_SUBJECT', '', str)
check_assert('EMAIL_USERNAME', '', str) # '' would default to config['EMAIL_SENDER']
# check_assert('EMAIL_PASSWORD', '', str, non_empty=True)
Expand Down
20 changes: 10 additions & 10 deletions scrapydweb/utils/poll.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def __init__(self, url_scrapydweb, username, password,
if verbose:
self.logger.setLevel(logging.DEBUG)
else:
self.logger.setLevel(logging.WARNING)
self.logger.setLevel(logging.INFO)
self.exit_timeout = exit_timeout

self.init_time = time.time()
Expand Down Expand Up @@ -115,8 +115,8 @@ def fetch_jobs(self, node, url, auth):
running_jobs.append(job_tuple)
elif job['finish']:
finished_jobs_set.add(job_tuple)
self.logger.info("[node %s] got running_jobs: %s", node, len(running_jobs))
self.logger.info("[node %s] got finished_jobs_set: %s", node, len(finished_jobs_set))
self.logger.debug("[node %s] got running_jobs: %s", node, len(running_jobs))
self.logger.debug("[node %s] got finished_jobs_set: %s", node, len(finished_jobs_set))
return running_jobs, finished_jobs_set

def fetch_stats(self, node, job_tuple, finished_jobs):
Expand All @@ -139,7 +139,7 @@ def fetch_stats(self, node, job_tuple, finished_jobs):
self.logger.error("[node %s %s] fetch_stats failed: %s", node, self.scrapyd_servers[node-1], url)
if job_finished:
self.finished_jobs_dict[node].remove(job_tuple)
self.logger.warning("[node %s] retry in next round: %s", node, url)
self.logger.info("[node %s] retry in next round: %s", node, url)
else:
self.logger.debug("[node %s] fetch_stats got (%s) %s bytes from %s",
node, r.status_code, len(r.content), url)
Expand All @@ -156,7 +156,7 @@ def main(self):
self.logger.critical("GoodBye, exit_timeout: %s", self.exit_timeout)
break
else:
self.logger.warning("Sleeping for %ss", self.poll_round_interval)
self.logger.info("Sleeping for %ss", self.poll_round_interval)
time.sleep(self.poll_round_interval)
except KeyboardInterrupt:
self.logger.warning("Poll subprocess (pid: %s) cancelled by KeyboardInterrupt", self.poll_pid)
Expand Down Expand Up @@ -203,21 +203,21 @@ def run(self):

def update_finished_jobs(self, node, finished_jobs_set):
finished_jobs_set_previous = self.finished_jobs_dict.setdefault(node, set())
self.logger.info("[node %s] previous finished_jobs_set: %s", node, len(finished_jobs_set_previous))
self.logger.debug("[node %s] previous finished_jobs_set: %s", node, len(finished_jobs_set_previous))
# set([2,3]).difference(set([1,2])) => {3}
finished_jobs_set_new_added = finished_jobs_set.difference(finished_jobs_set_previous)
self.finished_jobs_dict[node] = finished_jobs_set
self.logger.info("[node %s] now finished_jobs_set: %s", node, len(self.finished_jobs_dict[node]))
self.logger.debug("[node %s] now finished_jobs_set: %s", node, len(self.finished_jobs_dict[node]))
if finished_jobs_set_new_added:
self.logger.warning("[node %s] new added finished_jobs_set: %s", node, finished_jobs_set_new_added)
else:
self.logger.info("[node %s] new added finished_jobs_set: %s", node, finished_jobs_set_new_added)
else:
self.logger.debug("[node %s] new added finished_jobs_set: %s", node, finished_jobs_set_new_added)

finished_jobs = []
ignore = self.ignore_finished_bool_list[node-1]
for job_tuple in finished_jobs_set_new_added:
if ignore:
self.logger.warning("[node %s] ignore finished job: %s", node, job_tuple)
self.logger.debug("[node %s] ignore finished job: %s", node, job_tuple)
else:
finished_jobs.append(job_tuple)
if ignore:
Expand Down
6 changes: 3 additions & 3 deletions scrapydweb/utils/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,11 @@ def my_listener(event):


def shutdown_scheduler():
apscheduler_logger.info("Scheduled tasks: %s", scheduler.get_jobs())
apscheduler_logger.debug("Scheduled tasks: %s", scheduler.get_jobs())
apscheduler_logger.warning("Shutting down the scheduler for timer tasks gracefully, "
"wait until all currently executing tasks are finished")
apscheduler_logger.info("The main pid is %s. Kill it manually if you don't want to wait",
handle_metadata().get('main_pid'))
apscheduler_logger.warning("The main pid is %s. Kill it manually if you don't want to wait",
handle_metadata().get('main_pid'))
scheduler.shutdown()
# apscheduler_logger.info("Waits until all currently executing jobs are finished. "
# "Press CTRL+C to force unclean shutdown")
Expand Down
12 changes: 7 additions & 5 deletions scrapydweb/utils/send_email.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,6 @@


logger = logging.getLogger('scrapydweb.utils.send_email') # __name__
_handler = logging.StreamHandler()
_formatter = logging.Formatter(fmt="[%(asctime)s] %(levelname)-8s in %(name)s: %(message)s")
_handler.setFormatter(_formatter)
logger.addHandler(_handler)
logger.setLevel(logging.DEBUG)


Expand Down Expand Up @@ -74,7 +70,7 @@ def send_email(**kwargs):
kwargs.update(to_retry=False, need_debug=True)
logger.debug("Retrying...")
time.sleep(3)
send_email(**kwargs)
return send_email(**kwargs)
else:
result = True
reason = "Sent"
Expand All @@ -90,4 +86,10 @@ def send_email(**kwargs):


if __name__ == '__main__':
# To avoid logging twice when importing the send_email function to send email.
_handler = logging.StreamHandler()
_formatter = logging.Formatter(fmt="[%(asctime)s] %(levelname)-8s in %(name)s: %(message)s")
_handler.setFormatter(_formatter)
logger.addHandler(_handler)

send_email(**json.loads(sys.argv[1]))
2 changes: 1 addition & 1 deletion scrapydweb/vars.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

PYTHON_VERSION = '.'.join([str(n) for n in sys.version_info[:3]])
PY2 = sys.version_info.major < 3
SCRAPYDWEB_SETTINGS_PY = 'scrapydweb_settings_v9.py'
SCRAPYDWEB_SETTINGS_PY = 'scrapydweb_settings_v10.py'
try:
custom_settings_module = importlib.import_module(os.path.splitext(SCRAPYDWEB_SETTINGS_PY)[0])
except ImportError:
Expand Down
2 changes: 1 addition & 1 deletion scrapydweb/views/baseview.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def __init__(self, *args, **kwargs):
self.SQLALCHEMY_DATABASE_URI = app.config['SQLALCHEMY_DATABASE_URI']
self.SQLALCHEMY_BINDS = app.config['SQLALCHEMY_BINDS']

_level = logging.DEBUG if self.VERBOSE else logging.WARNING
_level = logging.DEBUG if self.VERBOSE else logging.INFO
self.logger.setLevel(_level)
logging.getLogger("requests").setLevel(_level)
logging.getLogger("urllib3").setLevel(_level)
Expand Down
8 changes: 4 additions & 4 deletions scrapydweb/views/dashboard/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ def handle_jobs_with_db(self):
self.metadata['style'] = self.style
handle_metadata('jobs_style', self.style)
msg = "Change style to %s" % self.style
self.logger.warning(msg)
self.logger.info(msg)
# flash(msg, self.WARN)

# Note that there may be jobs with the same combination of (project, spider, job) in the fetched Jobs
Expand Down Expand Up @@ -256,7 +256,7 @@ def db_insert_jobs(self):
record.deleted = NOT_DELETED
record.pages = None
record.items = None
self.logger.warning("Recover deleted job: %s", record)
self.logger.info("Recover deleted job: %s", record)
flash("Recover deleted job: %s" % job, self.WARN)
else:
record = self.Job()
Expand Down Expand Up @@ -300,7 +300,7 @@ def db_clean_pending_jobs(self):
if (record.project, record.spider, record.job) not in current_pending_jobs:
db.session.delete(record)
db.session.commit()
self.logger.warning("Deleted pending jobs %s", record)
self.logger.info("Deleted pending jobs %s", record)

def query_jobs(self):
current_running_job_pids = [int(job['pid']) for job in self.jobs_backup if job['pid']]
Expand Down Expand Up @@ -461,7 +461,7 @@ def dispatch_request(self, **kwargs):
self.js['message'] = str(err)
else:
self.js['status'] = self.OK
self.logger.warning(self.js.setdefault('tip', "Deleted %s" % job))
self.logger.info(self.js.setdefault('tip', "Deleted %s" % job))
else:
self.js['status'] = self.ERROR
self.js['message'] = "job #%s not found in the database" % self.id
Expand Down
8 changes: 4 additions & 4 deletions scrapydweb/views/files/log.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,7 @@ def monitor_alert(self):
job_data_default = ([0] * 8, [False] * 6, False, time.time())
job_data = job_data_dict.setdefault(self.job_key, job_data_default)
(self.job_stats_previous, self.triggered_list, self.has_been_stopped, self.last_send_timestamp) = job_data
self.logger.info(job_data_dict)
self.logger.debug(job_data_dict)
self.job_stats = [self.kwargs['log_categories'][k.lower() + '_logs']['count']
for k in self.ALERT_TRIGGER_KEYS]
self.job_stats.extend([self.kwargs['pages'] or 0, self.kwargs['items'] or 0]) # May be None by LogParser
Expand Down Expand Up @@ -527,14 +527,14 @@ def send_alert(self):
def handle_data(self):
if self.flag:
# Update job_data_dict (last_send_timestamp would be updated only when flag is non-empty)
self.logger.info("Previous job_data['%s'] %s", self.job_key, job_data_dict[self.job_key])
self.logger.debug("Previous job_data['%s'] %s", self.job_key, job_data_dict[self.job_key])
job_data_dict[self.job_key] = (self.job_stats, self.triggered_list, self.has_been_stopped, time.time())
self.logger.info("Updated job_data['%s'] %s", self.job_key, job_data_dict[self.job_key])
self.logger.debug("Updated job_data['%s'] %s", self.job_key, job_data_dict[self.job_key])

if self.job_finished:
job_data_dict.pop(self.job_key)
od = job_finished_key_dict[self.node]
od[self.job_key] = None
if len(od) > self.jobs_to_keep:
od.popitem(last=False)
self.logger.info('job_finished: %s', self.job_key)
self.logger.info('job_finished: %s', self.job_key)
6 changes: 3 additions & 3 deletions scrapydweb/views/operations/deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def get_modification_times(self):
if timestamps:
max_timestamp_index = timestamps.index(max(timestamps))
self.latest_folder = self.folders[max_timestamp_index]
self.logger.info('latest_folder: %s', self.latest_folder)
self.logger.debug('latest_folder: %s', self.latest_folder)

def get_modification_time(self, path, func_walk=os.walk, retry=True):
# https://stackoverflow.com/a/29685234/10517783
Expand Down Expand Up @@ -171,8 +171,8 @@ def parse_scrapy_cfg(self):
diff = set(keys_all).difference(set(keys_exist))
for key in diff:
self.logger.debug('Pop %s, project %s', key, folder_project_dict.pop(key))
self.logger.info(self.json_dumps(folder_project_dict))
self.logger.info('folder_project_dict length: %s', len(folder_project_dict))
self.logger.debug(self.json_dumps(folder_project_dict))
self.logger.debug('folder_project_dict length: %s', len(folder_project_dict))


class DeployUploadView(BaseView):
Expand Down
12 changes: 6 additions & 6 deletions scrapydweb/views/operations/execute_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ def main(self):
continue
if index == 1:
# https://apscheduler.readthedocs.io/en/latest/userguide.html#shutting-down-the-scheduler
self.logger.info("Retry task #%s (%s) on nodes %s in %s seconds",
self.task_id, self.task_name, nodes, self.sleep_seconds_before_retry)
self.logger.warning("Retry task #%s (%s) on nodes %s in %s seconds",
self.task_id, self.task_name, nodes, self.sleep_seconds_before_retry)
time.sleep(self.sleep_seconds_before_retry)
self.logger.warning("Retrying task #%s (%s) on nodes %s", self.task_id, self.task_name, nodes)
for node in nodes:
Expand Down Expand Up @@ -119,7 +119,7 @@ def db_insert_task_job_result(self, js):
task_job_result.result = js.get('jobid', '') or js.get('message', '') or js.get('exception', '')
db.session.add(task_job_result)
db.session.commit()
self.logger.warning("Inserted %s", task_job_result)
self.logger.info("Inserted task_job_result: %s", task_job_result)

# https://stackoverflow.com/questions/13895176/sqlalchemy-and-sqlite-database-is-locked
def db_update_task_result(self):
Expand All @@ -133,18 +133,18 @@ def db_update_task_result(self):
url_delete_task_result = re.sub(r'/\d+/\d+/$', '/%s/%s/' % (self.task_id, self.task_result_id),
self.url_delete_task_result)
js = get_response_from_view(url_delete_task_result, auth=self.auth, data=self.data, as_json=True)
apscheduler_logger.warning("Delete task_result #%s [FAIL %s, PASS %s] of task #%s: %s",
apscheduler_logger.warning("Deleted task_result #%s [FAIL %s, PASS %s] of task #%s: %s",
self.task_result_id, self.fail_count, self.pass_count, self.task_id, js)
return
if not task_result:
apscheduler_logger.error("task_result #%s of task #%s not found", self.task_result_id, self.task_id)
apscheduler_logger.warning("Fail to update task_result #%s [FAIL %s, PASS %s] of task #%s",
apscheduler_logger.warning("Failed to update task_result #%s [FAIL %s, PASS %s] of task #%s",
self.task_result_id, self.fail_count, self.pass_count, self.task_id)
return
task_result.fail_count = self.fail_count
task_result.pass_count = self.pass_count
db.session.commit()
self.logger.warning("Inserted %s", task_result)
self.logger.info("Inserted task_result: %s", task_result)


def execute_task(task_id):
Expand Down
Loading

0 comments on commit 2df1c3d

Please sign in to comment.