From 1b81895320be78bbb3777da1c770815b202e49f1 Mon Sep 17 00:00:00 2001 From: Pieter Robberechts Date: Thu, 16 Jan 2025 13:24:28 +0100 Subject: [PATCH] fix(WhoScored): update URL parsing --- soccerdata/whoscored.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/soccerdata/whoscored.py b/soccerdata/whoscored.py index f036456..8bf674e 100644 --- a/soccerdata/whoscored.py +++ b/soccerdata/whoscored.py @@ -11,7 +11,10 @@ import numpy as np import pandas as pd from lxml import html -from selenium.common.exceptions import ElementClickInterceptedException, NoSuchElementException +from selenium.common.exceptions import ( + ElementClickInterceptedException, + NoSuchElementException, +) from selenium.webdriver.common.by import By from ._common import BaseSeleniumReader, make_game_id, standardize_colnames @@ -88,13 +91,13 @@ def _parse_url(url: str) -> dict: """ patt = ( r"^(?:https:\/\/www.whoscored.com)?\/" - + r"(?:Regions\/(\d+)\/)?" - + r"(?:Tournaments\/(\d+)\/)?" - + r"(?:Seasons\/(\d+)\/)?" - + r"(?:Stages\/(\d+)\/)?" - + r"(?:Matches\/(\d+)\/)?" + + r"(?:regions\/(\d+)\/)?" + + r"(?:tournaments\/(\d+)\/)?" + + r"(?:seasons\/(\d+)\/)?" + + r"(?:stages\/(\d+)\/)?" + + r"(?:matches\/(\d+)\/)?" ) - matches = re.search(patt, url) + matches = re.search(patt, url, re.IGNORECASE) if matches: return { "region_id": matches.group(1),