Skip to content

Commit

Permalink
reduced execution timeouts and bugfix
Browse files Browse the repository at this point in the history
  • Loading branch information
segment-srl committed Jul 24, 2017
1 parent a288d09 commit dcc0078
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 5 deletions.
1 change: 1 addition & 0 deletions core/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
REQTYPE_JSONP = "jsonp"
REQTYPE_FORM = "form"
REQTYPE_REDIRECT = "redirect"
REQTYPE_IMAGE = "image"
REQTYPE_UNKNOWN = "unknown"


Expand Down
2 changes: 1 addition & 1 deletion core/crawl/crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def __init__(self, argv):
"http_auth": None,
"use_urllib_onerror": True,
"group_qs": False,
"process_timeout": 300, # when lots of element(~25000) are added dynamically it can take some time..
"process_timeout": 180, # when lots of element(~25000) are added dynamically it can take some time..
"set_referer": True,
"scope": CRAWLSCOPE_DOMAIN,
"mode": CRAWLMODE_AGGRESSIVE,
Expand Down
9 changes: 5 additions & 4 deletions core/crawl/crawler_thread.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,9 +152,10 @@ def send_probe(self, request, errors):

if jsn == None:
errors.append(ERROR_PROBEKILLED)
time.sleep(self.process_retries_interval) # ... ???
retries -= 1
continue
# time.sleep(self.process_retries_interval) # ... ???
# retries -= 1
# continue
break


# try to decode json also after an exception .. sometimes phantom crashes BUT returns a valid json ..
Expand Down Expand Up @@ -241,7 +242,7 @@ def crawl(self):
adjust_requests(requests)

Shared.main_condition.acquire()
res = CrawlResult(request, requests, errors, probe.page_hash)
res = CrawlResult(request, requests, errors, probe.page_hash if probe else "")
Shared.crawl_results.append(res)
Shared.main_condition.notify()
Shared.main_condition.release()
Expand Down

0 comments on commit dcc0078

Please sign in to comment.