From 16057c3c7aa17fed88726f249e67b877cc8950f5 Mon Sep 17 00:00:00 2001 From: Markus Schepke Date: Mon, 2 Dec 2024 00:10:33 +0200 Subject: [PATCH] Small corrections --- src/board_game_scraper/__init__.py | 4 ++++ src/board_game_scraper/spiders/bgg.py | 11 ++++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/board_game_scraper/__init__.py b/src/board_game_scraper/__init__.py index e69de29..528bd8b 100644 --- a/src/board_game_scraper/__init__.py +++ b/src/board_game_scraper/__init__.py @@ -0,0 +1,4 @@ +import logging + +logging.getLogger("numba").setLevel(logging.INFO) +logging.getLogger("PIL").setLevel(logging.INFO) diff --git a/src/board_game_scraper/spiders/bgg.py b/src/board_game_scraper/spiders/bgg.py index 58ba85d..8151d86 100644 --- a/src/board_game_scraper/spiders/bgg.py +++ b/src/board_game_scraper/spiders/bgg.py @@ -48,7 +48,7 @@ class BggSpider(SitemapSpider): name = "bgg" - allowed_domains = ("boardgamegeek.com",) + allowed_domains = ("boardgamegeek.com", "geekdo-images.com") # https://boardgamegeek.com/wiki/page/BGG_XML_API2 bgg_xml_api_url = "https://boardgamegeek.com/xmlapi2" @@ -352,7 +352,7 @@ def parse_games( bgg_ids=bgg_ids, page=page + 1, priority=-page - 1, - max_page=max_page, + meta={"max_page": max_page}, ) for game in response.xpath("/items/item"): @@ -663,8 +663,13 @@ def parse_poll( if not poll or parse_int_from_elem(poll, "@totalvotes") < self.min_votes: return default + votes = tuple(parse_votes(poll, attr, enum=enum)) + + if not votes: + return default + try: - return func(parse_votes(poll, attr, enum=enum)) + return func(votes) except Exception: self.logger.exception("Error parsing poll <%s>", name)