-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
79 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
# https://www.baidu.com/s?&wd=%E5%91%A8%E6%9D%B0%E4%BC%A6 | ||
# Unicode编码统一 | ||
|
||
# 需求:获取https://www.baidu.com/s?&wd=周杰伦的网页源码 | ||
|
||
import urllib.request | ||
import urllib.parse | ||
|
||
url = 'https://www.baidu.com/s?&wd=' | ||
|
||
# 请求对象的定制是为了解决反爬的第一种手段UA | ||
headers = { | ||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36' | ||
} | ||
|
||
# 将周杰伦三个字变成unicode编码的格式 | ||
# 我们需要依赖于urllib.parse | ||
name = urllib.parse.quote('周杰伦') | ||
|
||
url = url+name | ||
|
||
# 请求对象的定制 指定关键字 | ||
request = urllib.request.Request(url=url, headers=headers) | ||
|
||
# 模拟浏览器向服务器发送请求 | ||
response = urllib.request.urlopen(request) | ||
|
||
# 获取响应的内容 | ||
content = response.read().decode('utf-8') | ||
|
||
# 打印数据 | ||
print(content) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
# urlencode应用场景:多个参数的时候 | ||
|
||
# https://www.baidu.com/s?tn=59044660_hao_pg&ie=utf-8&wd=%E5%91%A8%E6%9D%B0%E4%BC%A6 | ||
|
||
# import urllib.parse | ||
# data = { | ||
# 'wd': '周杰伦', | ||
# 'sex': '男', | ||
# 'location':'中国台湾省' | ||
# } | ||
|
||
# a = urllib.parse.urlencode(data) | ||
# print(a) | ||
|
||
# 获取https://www.baidu.com/s?wd=%E5%91%A8%E6%9D%B0%E4%BC%A6&sex=%E7%94%B7&location=%E4%B8%AD%E5%9B%BD%E5%8F%B0%E6%B9%BE的网页源码 | ||
|
||
import urllib.request | ||
import urllib.parse | ||
|
||
base_url = 'https://www.baidu.com/s?' | ||
|
||
data = { | ||
'wd': '周杰伦', | ||
'sex': '男', | ||
'location': '中国台湾省' | ||
} | ||
|
||
new_data = urllib.parse.urlencode(data) | ||
|
||
# 请求资源路径 | ||
url = base_url+new_data | ||
|
||
# 防止反爬 | ||
headers = { | ||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36' | ||
} | ||
|
||
# 请求对象的定制 | ||
request = urllib.request.Request(url=url, headers=headers) | ||
|
||
# 模拟浏览器向服务器发送请求 | ||
response = urllib.request.urlopen(request) | ||
|
||
# 获取网页源码的数据 | ||
content = response.read().decode('utf-8') | ||
|
||
print(content) |