From c00f4685916f55917282b4984610095ba12e9f22 Mon Sep 17 00:00:00 2001 From: Pengyu CHEN Date: Fri, 27 Mar 2015 06:47:13 +0800 Subject: [PATCH] fixed: `HcfMiddleware` generates fewer links than requested. Solved #58 --- scrapylib/hcf.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scrapylib/hcf.py b/scrapylib/hcf.py index 13eff8f..227add7 100644 --- a/scrapylib/hcf.py +++ b/scrapylib/hcf.py @@ -201,7 +201,8 @@ def _get_new_requests(self): """ Get a new batch of links from the HCF.""" num_batches = 0 num_links = 0 - for num_batches, batch in enumerate(self.fclient.read(self.hs_frontier, self.hs_consume_from_slot), 1): + for batch in self.fclient.read(self.hs_frontier, self.hs_consume_from_slot, mincount=self.hs_max_links): + num_batches += 1 for fingerprint, data in batch['requests']: num_links += 1 yield Request(url=fingerprint, meta={'hcf_params': {'qdata': data}})