-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathscreenshot.py
70 lines (62 loc) · 2.47 KB
/
screenshot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#coding:utf-8
# -*- coding:utf-8 -*-
import sys
import time
import redis
import cookielib
from pymongo import MongoClient
from selenium import webdriver
import os
user='hoccgoomusic'
service_args = [
'--proxy=https://127.0.0.1:1080',
'--proxy-type=https',
]
def get_url(url_list):
pool = redis.ConnectionPool(host='localhost', port=6379, decode_responses=True)
r = redis.Redis(connection_pool=pool)
conn = MongoClient('127.0.0.1', 27017)
db = conn.instagram
my_set = db.hoccgoomusic_1108
while r.llen(url_list) > 0:
print("-------------------------------------------------------------------------------")
url = r.lpop(url_list)
print(url)
dict = my_set.find_one({'url': url})
time_local = time.localtime(dict['taken_at_timestamp'])
month = time.strftime('%Y_%m', time_local)
post_time = time.strftime('%Y-%m-%d_%H%M%S', time_local)
dirpath = r'D:\python\Instagram_crawler-master\Instagram_crawler-master\Instagram\screenshot\{0}\{1}'.format(user, str(month))
if not os.path.exists(dirpath):
os.mkdir(dirpath)
pic_name = str(post_time)
imageName = pic_name + '.png'
pic_path = r'D:\python\Instagram_crawler-master\Instagram_crawler-master\Instagram\screenshot\{0}\{1}\{2}'.format(user, str(month), imageName)
if os.path.exists(pic_path):
continue
myDriver = webdriver.PhantomJS(r'D:\python\phantomjs-2.1.1-windows\bin\phantomjs.exe',service_args=service_args)
get_picture(myDriver, url, pic_path)
myDriver.quit()
#得到有效链接的截图
def get_picture(myDriver, url, fpath):
retry = 5
while retry > 0:
try:
myDriver.set_page_load_timeout(15)#设置网页加载超时时间为10秒
myDriver.get(url)
myDriver.get_screenshot_as_file(fpath)#截取网页内容,已当前时间戳为图片命名保存
myDriver.close()
retry = 0
except:
s= u'当前网页超时:%s\n' %url
print s
time.sleep(5)
retry = retry - 1
if retry == 0:
pool = redis.ConnectionPool(host='localhost', port=6379, decode_responses=True)
r = redis.Redis(connection_pool=pool)
r.rpush(url_list, url)
print("%%%%%%%%%%%%")
if __name__ == '__main__':
url_list = 'url'
get_url(url_list)