-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathsogouIndex.py
67 lines (59 loc) · 1.9 KB
/
sogouIndex.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import requests
from bs4 import BeautifulSoup
from urllib.parse import quote
from random import choice, randint
from userAgents import agents
import re
import json
import xlwt
keyword = '煤改电'
time = 'MONTH' # YEAR, MONTH, WEEK
searchType = 'SEARCH_ALL' # SEARCH_ALL, SEARCH_PC, SEARCH_WAP, MEDIA_WECHAT
url = 'http://zhishu.sogou.com/index/searchHeat?kwdNamesStr=' + \
quote(keyword, encoding='utf8') + '&timePeriodType=' + time + \
'&dataType=' + searchType + '&queryType=INPUT'
try:
session = requests.Session()
agent = choice(agents)
headers = {'User-Agent': agent}
req = session.get(url, headers=headers, timeout=60)
bsObj = BeautifulSoup(req.content, 'html5lib')
scriptStr = bsObj.script.get_text()
data = re.findall(r'root\.SG\.data\s=\s(.+?);', scriptStr)[0]
wholeData = re.findall(r'root\.SG\.wholedata\s=\s(.+?);', scriptStr)[0]
print('数据读取成功')
dataJson = json.loads(data)
wholeDataJason = json.loads(wholeData)
dataList=[]
timeList=[]
wholeDataList=[]
wholeTimeList=[]
workbook = xlwt.Workbook()
worksheetData = workbook.add_sheet('data')
worksheetWholeData = workbook.add_sheet('wholeData')
i = 0
for dataItem in dataJson['pvList'][0]:
dataOne = dataItem['pv']
timeOne = dataItem['date']
dataList.append(dataOne)
timeList.append(timeOne)
worksheetData.write(i, 0, label=dataOne)
worksheetData.write(i, 1, label=timeOne)
i = i + 1
j = 0
for wholeDataItem in wholeDataJason['pvList'][0]:
wholeDataOne = wholeDataItem['pv']
wholeTimeOne = wholeDataItem['date']
wholeDataList.append(wholeDataOne)
wholeTimeList.append(wholeTimeOne)
worksheetWholeData.write(j, 0, label=wholeDataOne)
worksheetWholeData.write(j, 1, label=wholeTimeOne)
j = j + 1
workbook.save(keyword+'-'+time+'-'+searchType+'.xls')
print('写入成功')
except Exception as e:
print(e)
if bsObj.find('div', {'class': 'noresult'}):
print('未收录关键词')
finally:
print('结束')