From 1b81895320be78bbb3777da1c770815b202e49f1 Mon Sep 17 00:00:00 2001
From: Pieter Robberechts <pieter.robberechts@kuleuven.be>
Date: Thu, 16 Jan 2025 13:24:28 +0100
Subject: [PATCH] fix(WhoScored): update URL parsing

---
 soccerdata/whoscored.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/soccerdata/whoscored.py b/soccerdata/whoscored.py
index f036456..8bf674e 100644
--- a/soccerdata/whoscored.py
+++ b/soccerdata/whoscored.py
@@ -11,7 +11,10 @@
 import numpy as np
 import pandas as pd
 from lxml import html
-from selenium.common.exceptions import ElementClickInterceptedException, NoSuchElementException
+from selenium.common.exceptions import (
+    ElementClickInterceptedException,
+    NoSuchElementException,
+)
 from selenium.webdriver.common.by import By
 
 from ._common import BaseSeleniumReader, make_game_id, standardize_colnames
@@ -88,13 +91,13 @@ def _parse_url(url: str) -> dict:
     """
     patt = (
         r"^(?:https:\/\/www.whoscored.com)?\/"
-        + r"(?:Regions\/(\d+)\/)?"
-        + r"(?:Tournaments\/(\d+)\/)?"
-        + r"(?:Seasons\/(\d+)\/)?"
-        + r"(?:Stages\/(\d+)\/)?"
-        + r"(?:Matches\/(\d+)\/)?"
+        + r"(?:regions\/(\d+)\/)?"
+        + r"(?:tournaments\/(\d+)\/)?"
+        + r"(?:seasons\/(\d+)\/)?"
+        + r"(?:stages\/(\d+)\/)?"
+        + r"(?:matches\/(\d+)\/)?"
     )
-    matches = re.search(patt, url)
+    matches = re.search(patt, url, re.IGNORECASE)
     if matches:
         return {
             "region_id": matches.group(1),