Merge pull request #184 from Riushda/improve_tv_shows_results

feat: Improve title filtering and tv shows results
g0ldyy · Nov 18, 2024 · 8d32156 · 8d32156
2 parents c74e6cc + 986c4be
commit 8d32156
Show file tree

Hide file tree

Showing 3 changed files with 32 additions and 5 deletions.
diff --git a/comet/api/stream.py b/comet/api/stream.py
@@ -120,7 +120,7 @@ async def stream(request: Request, b64config: str, type: str, id: str):
         name = translate(name)
         log_name = name
         if type == "series":
-            log_name = f"{name} S0{season}E0{episode}"
+            log_name = f"{name} S{season:02d}E{episode:02d}"
 
         if (
             settings.PROXY_DEBRID_STREAM
@@ -280,9 +280,10 @@ async def stream(request: Request, b64config: str, type: str, id: str):
 
             search_terms = [name]
             if type == "series":
+                search_terms = []
                 if not kitsu:
-                    search_terms.append(f"{name} S0{season}E0{episode}")
-                    search_terms.append(f"{name} s0{season}e0{episode}")
+                    search_terms.append(f"{name} S{season:02d}E{episode:02d}")
+                    search_terms.append(f"{name} s{season:02d}e{episode:02d}")
                 else:
                     search_terms.append(f"{name} {episode}")
             tasks.extend(

diff --git a/comet/utils/general.py b/comet/utils/general.py
@@ -7,9 +7,10 @@
 import asyncio
 import orjson
 
-from RTN import parse, title_match
+from RTN import parse
 from curl_cffi import requests
 from fastapi import Request
+from fuzzywuzzy import fuzz
 
 from comet.utils.logger import logger
 from comet.utils.models import settings, ConfigModel
@@ -467,6 +468,30 @@ async def get_mediafusion(log_name: str, type: str, full_id: str):
     return results
 
 
+def match_titles(imdb_title: str, torrent_title: str, threshold: int = 80) -> bool:
+    """
+    Match movie/TV show titles using fuzzy string matching.
+
+    Parameters:
+    imdb_title (str): The title from the IMDB data source.
+    torrent_title (str): The title from the torrent data source.
+    threshold (int): The minimum fuzzy match ratio to consider the titles a match.
+
+    Returns:
+    bool: True if the titles match, False otherwise.
+    """
+    # Calculate the fuzzy match ratio
+    # The idea is that ratio will give very low score to garbage ratio but will also give mid/average 
+    # score to some good results. The WRatio will make sure these mid score passes the filter.
+    base_ratio = fuzz.ratio(imdb_title, torrent_title) # strict ratio
+    w_ratio = fuzz.WRatio(imdb_title, torrent_title) # less strict ratio
+    # The weight of the ratios needs to be adjusted because basic ratio is too strict.
+    match_ratio = (base_ratio*0.7 + w_ratio*1.3)/2
+
+    # Check if the fuzzy match ratio meets the thresholds 
+    return match_ratio >= threshold 
+
+
 async def filter(torrents: list, name: str, year: int):
     results = []
     for torrent in torrents:
@@ -478,7 +503,7 @@ async def filter(torrents: list, name: str, year: int):
 
         parsed = parse(title)
 
-        if parsed.parsed_title and not title_match(name, parsed.parsed_title):
+        if parsed.parsed_title and not match_titles(name, parsed.parsed_title):
             results.append((index, False))
             continue
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -25,6 +25,7 @@ aiosqlite = "*"
 jinja2 = "*"
 rank-torrent-name = "*"
 parsett = "*"
+fuzzywuzzy = {extras = ["speedup"], version = "*"}
 
 
 [tool.poetry.group.dev.dependencies]