From b29b19c23e3b9a4335fd24bd164aec3e542eb72b Mon Sep 17 00:00:00 2001 From: JLUVicent <17390955615@163.com> Date: Mon, 13 Sep 2021 12:48:08 +0800 Subject: [PATCH] =?UTF-8?q?=E8=A7=A3=E5=86=B3=E6=A3=80=E7=B4=A2=E5=87=BA?= =?UTF-8?q?=E7=8E=B0=E6=B1=89=E8=AF=AD=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...7\232\204quote\346\226\271\346\263\225.py" | 32 +++++++++++++ ...2\204urlencode\346\226\271\346\263\225.py" | 47 +++++++++++++++++++ 2 files changed, 79 insertions(+) create mode 100644 "urllib_get\350\257\267\346\261\202\347\232\204quote\346\226\271\346\263\225.py" create mode 100644 "urllib_get\350\257\267\346\261\202\347\232\204urlencode\346\226\271\346\263\225.py" diff --git "a/urllib_get\350\257\267\346\261\202\347\232\204quote\346\226\271\346\263\225.py" "b/urllib_get\350\257\267\346\261\202\347\232\204quote\346\226\271\346\263\225.py" new file mode 100644 index 0000000..6918502 --- /dev/null +++ "b/urllib_get\350\257\267\346\261\202\347\232\204quote\346\226\271\346\263\225.py" @@ -0,0 +1,32 @@ +# https://www.baidu.com/s?&wd=%E5%91%A8%E6%9D%B0%E4%BC%A6 +# Unicode编码统一 + +# 需求:获取https://www.baidu.com/s?&wd=周杰伦的网页源码 + +import urllib.request +import urllib.parse + +url = 'https://www.baidu.com/s?&wd=' + +# 请求对象的定制是为了解决反爬的第一种手段UA +headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36' +} + +# 将周杰伦三个字变成unicode编码的格式 +# 我们需要依赖于urllib.parse +name = urllib.parse.quote('周杰伦') + +url = url+name + +# 请求对象的定制 指定关键字 +request = urllib.request.Request(url=url, headers=headers) + +# 模拟浏览器向服务器发送请求 +response = urllib.request.urlopen(request) + +# 获取响应的内容 +content = response.read().decode('utf-8') + +# 打印数据 +print(content) diff --git "a/urllib_get\350\257\267\346\261\202\347\232\204urlencode\346\226\271\346\263\225.py" "b/urllib_get\350\257\267\346\261\202\347\232\204urlencode\346\226\271\346\263\225.py" new file mode 100644 index 0000000..e773106 --- /dev/null +++ "b/urllib_get\350\257\267\346\261\202\347\232\204urlencode\346\226\271\346\263\225.py" @@ -0,0 +1,47 @@ +# urlencode应用场景:多个参数的时候 + +# https://www.baidu.com/s?tn=59044660_hao_pg&ie=utf-8&wd=%E5%91%A8%E6%9D%B0%E4%BC%A6 + +# import urllib.parse +# data = { +# 'wd': '周杰伦', +# 'sex': '男', +# 'location':'中国台湾省' +# } + +# a = urllib.parse.urlencode(data) +# print(a) + +# 获取https://www.baidu.com/s?wd=%E5%91%A8%E6%9D%B0%E4%BC%A6&sex=%E7%94%B7&location=%E4%B8%AD%E5%9B%BD%E5%8F%B0%E6%B9%BE的网页源码 + +import urllib.request +import urllib.parse + +base_url = 'https://www.baidu.com/s?' + +data = { + 'wd': '周杰伦', + 'sex': '男', + 'location': '中国台湾省' +} + +new_data = urllib.parse.urlencode(data) + +# 请求资源路径 +url = base_url+new_data + +# 防止反爬 +headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36' +} + +# 请求对象的定制 +request = urllib.request.Request(url=url, headers=headers) + +# 模拟浏览器向服务器发送请求 +response = urllib.request.urlopen(request) + +# 获取网页源码的数据 +content = response.read().decode('utf-8') + +print(content)