From dfc485b1c3aed31dd1b4fe4105b38aa0e7b95537 Mon Sep 17 00:00:00 2001 From: Manuel Reinhardt Date: Tue, 9 Jul 2024 07:45:29 +0200 Subject: [PATCH] Sehepunkte import: Port to `requests` and use https Ref syslabcom/scrum#2402 --- src/recensio/plone/browser/sehepunkte.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/recensio/plone/browser/sehepunkte.py b/src/recensio/plone/browser/sehepunkte.py index 714809d..6e660f7 100644 --- a/src/recensio/plone/browser/sehepunkte.py +++ b/src/recensio/plone/browser/sehepunkte.py @@ -21,7 +21,7 @@ import html.entities import logging import re -import urllib +import requests log = logging.getLogger(__name__) @@ -105,7 +105,8 @@ def __call__(self): review_count = 0 for url in self._getTargetURLs(): try: - sehepunkte_xml = urllib.request.urlopen(url).read() # nosec B310 + response = requests.get(url) + sehepunkte_xml = response.content data.append(sehepunkte_parser.parse(sehepunkte_xml)) except OSError: pass # The library takes care of logging a failure @@ -133,7 +134,7 @@ def __call__(self): return "Success" def _getTargetURLs(self): - base = "http://www.sehepunkte.de/export/sehepunkte_%s.xml" + base = "https://www.sehepunkte.de/export/sehepunkte_%s.xml" now = datetime.datetime.now() past_months = int(self.request.get("past_months", 1)) for idx in reversed(range(past_months + 1)): @@ -225,7 +226,8 @@ def setter(mapper): def _extractAndSanitizeHTML(self, review): # XXX check scheme? (bandit) - html = urllib.request.urlopen(review["canonical_uri"]).read() # nosec B310 + response = requests.get(review["canonical_uri"]) + html = response.content soup = BeautifulSoup(html, "lxml") dirt = soup.findAll("div", {"class": "box"}) for div in dirt: