Skip to content

Commit

Permalink
解决检索出现汉语问题
Browse files Browse the repository at this point in the history
  • Loading branch information
JLUVicent committed Sep 13, 2021
1 parent 5ab34d5 commit b29b19c
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 0 deletions.
32 changes: 32 additions & 0 deletions urllib_get请求的quote方法.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# https://www.baidu.com/s?&wd=%E5%91%A8%E6%9D%B0%E4%BC%A6
# Unicode编码统一

# 需求:获取https://www.baidu.com/s?&wd=周杰伦的网页源码

import urllib.request
import urllib.parse

url = 'https://www.baidu.com/s?&wd='

# 请求对象的定制是为了解决反爬的第一种手段UA
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36'
}

# 将周杰伦三个字变成unicode编码的格式
# 我们需要依赖于urllib.parse
name = urllib.parse.quote('周杰伦')

url = url+name

# 请求对象的定制 指定关键字
request = urllib.request.Request(url=url, headers=headers)

# 模拟浏览器向服务器发送请求
response = urllib.request.urlopen(request)

# 获取响应的内容
content = response.read().decode('utf-8')

# 打印数据
print(content)
47 changes: 47 additions & 0 deletions urllib_get请求的urlencode方法.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# urlencode应用场景:多个参数的时候

# https://www.baidu.com/s?tn=59044660_hao_pg&ie=utf-8&wd=%E5%91%A8%E6%9D%B0%E4%BC%A6

# import urllib.parse
# data = {
# 'wd': '周杰伦',
# 'sex': '男',
# 'location':'中国台湾省'
# }

# a = urllib.parse.urlencode(data)
# print(a)

# 获取https://www.baidu.com/s?wd=%E5%91%A8%E6%9D%B0%E4%BC%A6&sex=%E7%94%B7&location=%E4%B8%AD%E5%9B%BD%E5%8F%B0%E6%B9%BE的网页源码

import urllib.request
import urllib.parse

base_url = 'https://www.baidu.com/s?'

data = {
'wd': '周杰伦',
'sex': '男',
'location': '中国台湾省'
}

new_data = urllib.parse.urlencode(data)

# 请求资源路径
url = base_url+new_data

# 防止反爬
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36'
}

# 请求对象的定制
request = urllib.request.Request(url=url, headers=headers)

# 模拟浏览器向服务器发送请求
response = urllib.request.urlopen(request)

# 获取网页源码的数据
content = response.read().decode('utf-8')

print(content)

0 comments on commit b29b19c

Please sign in to comment.