From 25b1ba30f75be4eba656bd5b080cf4ea22cda561 Mon Sep 17 00:00:00 2001 From: jiongshen1808 <651271820@qq.com> Date: Sun, 26 Jun 2016 12:55:28 +0800 Subject: [PATCH] =?UTF-8?q?Question.get=5Fall=5Fanswers()=E7=9A=84Bug?= =?UTF-8?q?=E6=9B=B4=E6=AD=A3=E5=90=8E=E5=8F=AF=E4=BB=A5=E6=90=9C=E9=9B=86?= =?UTF-8?q?=E5=85=A8=E6=89=80=E6=9C=89=E7=9A=84=E5=9B=9E=E7=AD=94?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- zhihu.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/zhihu.py b/zhihu.py index e00e939..f847e65 100755 --- a/zhihu.py +++ b/zhihu.py @@ -371,15 +371,16 @@ def get_all_answers(self): else: error_answer_count = 0 my_answer_count = 0 - for i in xrange((answers_num - 1) / 20 + 1): + for i in xrange((answers_num - 1) / 10 + 1): if i == 0: - for j in xrange(min(answers_num, 20)): + for j in xrange(min(answers_num, 10)): if self.soup == None: self.parser() soup = BeautifulSoup(self.soup.encode("utf-8"), "lxml") is_my_answer = False - if soup.find_all("div", class_="zm-item-answer")[j].find("span", class_="count") == None: + #print len(soup.find_all("div", class_="zm-item-answer")) + if soup.find_all("div", class_="zm-item-answer zm-item-expanded")[j].find("span", class_="count") == None: my_answer_count += 1 is_my_answer = True @@ -428,9 +429,9 @@ def get_all_answers(self): else: post_url = "http://www.zhihu.com/node/QuestionAnswerListV2" _xsrf = self.soup.find("input", attrs={'name': '_xsrf'})["value"] - offset = i * 20 + offset = i * 10 params = json.dumps( - {"url_token": int(self.url[-8:-1] + self.url[-1]), "pagesize": 20, "offset": offset}) + {"url_token": int(self.url[-8:]), "pagesize": 10, "offset": offset}) data = { '_xsrf': _xsrf, 'method': "next", @@ -444,7 +445,7 @@ def get_all_answers(self): r = requests.post(post_url, data=data, headers=header, verify=False) answer_list = r.json()["msg"] - for j in xrange(min(answers_num - i * 20, 20)): + for j in xrange(min(answers_num - i * 10, 10)): soup = BeautifulSoup(self.soup.encode("utf-8"), "lxml") answer_soup = BeautifulSoup(answer_list[j], "lxml")