-
Notifications
You must be signed in to change notification settings - Fork 2
/
msdn.py
98 lines (75 loc) · 3.36 KB
/
msdn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import requests
import re
import os
url = 'https://msdn.itellyou.cn/'
index_url = 'https://msdn.itellyou.cn/Category/Index'
lang_url = 'https://msdn.itellyou.cn/Category/GetLang'
list_url = 'https://msdn.itellyou.cn/Category/GetList'
product_url = 'https://msdn.itellyou.cn/Category/GetProduct'
data = {'id': 'aff8a80f-2dee-4bba-80ec-611ac56d3849'} # 企业解决方案
data1 = {'id': '23958de6-bedb-4998-825c-aa3d1e00d097'} # MSDN 技术资源库
data2 = {'id': '95c4acfd-d1a6-41fe-b14d-a6816973d2aa'} # 工具和资源
data3 = {'id': '051d75ee-ff53-43fe-80e9-bac5c10fc0fb'} # 应用程序
data4 = {'id': 'fcf12b78-0662-4dd4-9a82-72040db91c9e'} # 开发人员工具
data5 = {'id': '7ab5f0cb-7607-4bbe-9e88-50716dc43de6'} # 操作系统
data6 = {'id': '36d3766e-0efb-491e-961b-d1a419e06c68'} # 服务器
data7 = {'id': '5d6967f0-b58d-4385-8769-b886bfc2b78c'} # 设计人员工具
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36',
'Referer': 'https://msdn.itellyou.cn/'}
def get_ilt(url, data, headers): # data必须为dict类型,可以用eval(str)将str转为dict
r = requests.post(url, data=data, headers=headers)
r.raise_for_status()
return r.text
def dowload(index_res):
index_id = re.findall(r'{(.*?),"name".*?},', index_res)
for i in index_id:
dic = '{' + i + '}'
data_ = eval(dic)
lang_res = get_ilt(lang_url, data_, headers)
# print(lang_res)
lang_id = re.findall(
'{"status":true,"result":\[{"id":"(.*?)","lang":.*?}]}', lang_res)
# print(lang_id)
if lang_id: # 过滤掉空值
data_['lang'] = lang_id[0]
data_['filter'] = 'true'
# print(data_)
list_res = get_ilt(list_url, data_, headers)
# print(list_res)
product_id = re.findall(
'{"status":true,"result":\[(.*?),"name":.*?', list_res)
# print(product_id)
product_id_ = product_id[0] + '}'
# print(product_id_)
product_data = eval(product_id_)
product_res = get_ilt(product_url, product_data, headers)
file_name = re.findall(
'{"status":true,"result":{"FileName":(.*?),"DownLoad":.*?', product_res)
# print(file_name)
path = file_name[0][1:-4] + '.txt' # 构造文件名
# print(path)
with open(path, 'w') as f:
f.write(product_res)
print(file_name, ':下载完成')
def dowload_all():
res = requests.get(url, headers=headers)
all_index = re.findall(
' data-loadmenu="true" data-menuid="(.*?)" data-target=', res.text) # 筛选出各大类id
# print(index_ids)
for id in all_index:
# 组合id,例:data = {'id': 'aff8a80f-2dee-4bba-80ec-611ac56d3849'} id = aff8a80f-2dee-4bba-80ec-611ac56d3849
data['id'] = id
# print(data)
index_res = get_ilt(index_url, data, headers)
dowload(index_res)
os.chdir(r"txt")
dowload_all()
res = requests.get(url, headers=headers)
all_index = re.findall(
' data-loadmenu="true" data-menuid="(.*?)" data-target=', res.text)
data['id'] = all_index[0]
index_res = get_ilt(index_url, eval(
"{'id': 'aff8a80f-2dee-4bba-80ec-611ac56d3849'}"), headers)
i = type(data)
# print(i)
print(index_res)