From 6c93de986adb0ca1d4bd5bfd81e9edf7d1f22f09 Mon Sep 17 00:00:00 2001 From: techtanic Date: Thu, 4 Jul 2024 07:55:04 +0530 Subject: [PATCH] chore: Update urllib.parse imports and handle URL paths consistently --- base.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/base.py b/base.py index b3be051..1165cf4 100644 --- a/base.py +++ b/base.py @@ -6,7 +6,7 @@ import traceback from datetime import datetime, timezone from decimal import Decimal -from urllib.parse import parse_qs, unquote, urlsplit +from urllib.parse import parse_qs, unquote, urlsplit, urlunparse, urlparse import cloudscraper import requests @@ -210,7 +210,7 @@ def rd(self): r = requests.get( "https://www.real.discount/api-web/all-courses/?store=Udemy&page=1&per_page=500&orderby=date&free=1&editorschoices=0", headers=headers, - timeout=(10,30) + timeout=(10, 30), ).json() except requests.exceptions.Timeout: self.rd_error = "Timeout" @@ -576,6 +576,20 @@ def remove_duplicate_courses(self): for key in data: new_data[key] = [] for title, link in data[key]: + parsed_url = urlparse(link) + path = parsed_url.path + if not path.endswith("/"): + path += "/" + link = urlunparse( + ( + parsed_url.scheme, + parsed_url.netloc, + path, + parsed_url.params, + parsed_url.query, + parsed_url.fragment, + ) + ) if link not in existing_links: new_data[key].append((title, link)) existing_links.add(link)