Skip to content

Commit

Permalink
Sehepunkte import: Port to requests and use https
Browse files Browse the repository at this point in the history
  • Loading branch information
reinhardt committed Jul 9, 2024
1 parent 7a3fff3 commit dfc485b
Showing 1 changed file with 6 additions and 4 deletions.
10 changes: 6 additions & 4 deletions src/recensio/plone/browser/sehepunkte.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import html.entities
import logging
import re
import urllib
import requests


log = logging.getLogger(__name__)
Expand Down Expand Up @@ -105,7 +105,8 @@ def __call__(self):
review_count = 0
for url in self._getTargetURLs():
try:
sehepunkte_xml = urllib.request.urlopen(url).read() # nosec B310
response = requests.get(url)
sehepunkte_xml = response.content
data.append(sehepunkte_parser.parse(sehepunkte_xml))
except OSError:
pass # The library takes care of logging a failure
Expand Down Expand Up @@ -133,7 +134,7 @@ def __call__(self):
return "Success"

def _getTargetURLs(self):
base = "http://www.sehepunkte.de/export/sehepunkte_%s.xml"
base = "https://www.sehepunkte.de/export/sehepunkte_%s.xml"
now = datetime.datetime.now()
past_months = int(self.request.get("past_months", 1))
for idx in reversed(range(past_months + 1)):
Expand Down Expand Up @@ -225,7 +226,8 @@ def setter(mapper):

def _extractAndSanitizeHTML(self, review):
# XXX check scheme? (bandit)
html = urllib.request.urlopen(review["canonical_uri"]).read() # nosec B310
response = requests.get(review["canonical_uri"])
html = response.content
soup = BeautifulSoup(html, "lxml")
dirt = soup.findAll("div", {"class": "box"})
for div in dirt:
Expand Down

0 comments on commit dfc485b

Please sign in to comment.