Skip to content

Commit

Permalink
Merge pull request #53 from liangzy-gh/master
Browse files Browse the repository at this point in the history
整理代码逻辑
  • Loading branch information
qiyeboy authored Feb 21, 2017
2 parents 86e699d + cdeced6 commit 7346c7d
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 49 deletions.
71 changes: 23 additions & 48 deletions spider/HtmlDownloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,61 +12,36 @@


class Html_Downloader(object):
@classmethod
def download(self, url):
count = 0 # 重试次数
r = ''
@staticmethod
def download(url):
try:
r = requests.get(url=url, headers=config.HEADER, timeout=config.TIMEOUT)
r.encoding = chardet.detect(r.content)['encoding']
if (not r.ok) or len(r.content) < 500:
raise ConnectionError
else:
return r.text

except Exception:
count = 0 # 重试次数
proxylist = sqlhelper.select(10)
if not proxylist:
return None

while count < config.RETRY_TIME:
if (not r.ok) or len(r.content) < 500:
proxylist = sqlhelper.select(10)
try:
proxy = random.choice(proxylist)
ip = proxy[0]
port = proxy[1]
proxies = {"http": "http://%s:%s" % (ip, port), "https": "http://%s:%s" % (ip, port)}
try:
r = requests.get(url=url, headers=config.HEADER, timeout=config.TIMEOUT, proxies=proxies)
r.encoding = chardet.detect(r.content)['encoding']
count += 1
except Exception as e:
count += 1

else:
return r.text

return None

except Exception as e:
while count < config.RETRY_TIME:
if r == '' or (not r.ok) or len(r.content) < 500:
try:
proxylist = sqlhelper.select(10)
proxy = random.choice(proxylist)
ip = proxy[0]
port = proxy[1]
proxies = {"http": "http://%s:%s" % (ip, port), "https": "http://%s:%s" % (ip, port)}
try:
r = requests.get(url=url, headers=config.HEADER, timeout=config.TIMEOUT, proxies=proxies)
r.encoding = chardet.detect(r.content)['encoding']
count += 1
except Exception as e:
count += 1

except Exception as e:
return None

else:
return r.text

return None








r = requests.get(url=url, headers=config.HEADER, timeout=config.TIMEOUT, proxies=proxies)
r.encoding = chardet.detect(r.content)['encoding']
if (not r.ok) or len(r.content) < 500:
raise ConnectionError
else:
return r.text
except Exception:
count += 1

return None
2 changes: 1 addition & 1 deletion util/exception.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

class Test_URL_Fail(Exception):
def __str__(self):
str = "访问%s失败,请检查网络连接" % config.TEST_URL
str = "访问%s失败,请检查网络连接" % config.TEST_IP
return str


Expand Down

0 comments on commit 7346c7d

Please sign in to comment.