-
Notifications
You must be signed in to change notification settings - Fork 166
/
work_spider.py
42 lines (31 loc) · 1.06 KB
/
work_spider.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Created by shimeng on 17-9-21
import sys
# 这里写你自己的地址
sys.path.append('/home/shimeng/code/spider_framework_github_responsity')
from spider.tools import format_put_data
from spider.data_save import pipeline
from spider.html_parser import parser
from spider.page_downloader import aispider
from spider.threads import start, work_queue, save_queue
from spider.log_format import logger
from proxy_basic_config import url_parse_dict
from _request import valid
from get_proxies_base_spider import SpiderMain
class WorkSpider(SpiderMain):
def __init__(self):
super(WorkSpider, self).__init__()
# 重写run方法,
# 若请求的函数为自定义, 则可以在crawl函数中设置: request=your_request_function, 默认为框架中的request
def run(self):
start()
self.craw()
if __name__ == '__main__':
work_spider = WorkSpider()
work_spider.run()
# Blocking
work_queue.join()
save_queue.join()
# Done
logger.info('All Job Finishing, Please Check!')