From ac9b48ef92bb645e1366fa17bd08de16451d9786 Mon Sep 17 00:00:00 2001
From: Dariush <mohandesdariush@gmail.com>
Date: Fri, 15 Nov 2024 17:45:04 +0100
Subject: [PATCH 01/10] improve title filtering and tv shows results

---
 comet/api/stream.py    | 2 ++
 comet/utils/general.py | 7 ++++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/comet/api/stream.py b/comet/api/stream.py
index edeb31c..29c7ef9 100644
--- a/comet/api/stream.py
+++ b/comet/api/stream.py
@@ -264,8 +264,10 @@ async def stream(request: Request, b64config: str, type: str, id: str):
 
             search_terms = [name]
             if type == "series":
+                search_terms = []
                 if not kitsu:
                     search_terms.append(f"{name} S0{season}E0{episode}")
+                    search_terms.append(f"{name} s0{season}e0{episode}")
                 else:
                     search_terms.append(f"{name} {episode}")
             tasks.extend(
diff --git a/comet/utils/general.py b/comet/utils/general.py
index c4c02d8..1269385 100644
--- a/comet/utils/general.py
+++ b/comet/utils/general.py
@@ -477,7 +477,12 @@ async def filter(torrents: list, name: str, year: int):
 
         parsed = parse(title)
 
-        if parsed.parsed_title and not title_match(name, parsed.parsed_title):
+        def title_sub_match(correct_title: str, torrent_title: str):
+            correct_title = correct_title.lower()
+            torrent_title = torrent_title.lower()
+            return correct_title in torrent_title or torrent_title in correct_title
+
+        if parsed.parsed_title and not (title_match(name, parsed.parsed_title) or title_sub_match(name, parsed.parsed_title)):
             results.append((index, False))
             continue
 

From a8cf4cb75e5e8734b89319cf00d57f25352c1325 Mon Sep 17 00:00:00 2001
From: Dariush <mohandesdariush@gmail.com>
Date: Fri, 15 Nov 2024 18:14:25 +0100
Subject: [PATCH 02/10] improve tv show strings numbering

---
 comet/api/stream.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/comet/api/stream.py b/comet/api/stream.py
index 29c7ef9..541bce1 100644
--- a/comet/api/stream.py
+++ b/comet/api/stream.py
@@ -108,7 +108,7 @@ async def stream(request: Request, b64config: str, type: str, id: str):
         name = translate(name)
         log_name = name
         if type == "series":
-            log_name = f"{name} S0{season}E0{episode}"
+            log_name = f"{name} S{season:02d}E{episode:02d}"
 
         if (
             settings.PROXY_DEBRID_STREAM
@@ -266,8 +266,8 @@ async def stream(request: Request, b64config: str, type: str, id: str):
             if type == "series":
                 search_terms = []
                 if not kitsu:
-                    search_terms.append(f"{name} S0{season}E0{episode}")
-                    search_terms.append(f"{name} s0{season}e0{episode}")
+                    search_terms.append(f"{name} S{season:02d}E{episode:02d}")
+                    search_terms.append(f"{name} s{season:02d}e{episode:02d}")
                 else:
                     search_terms.append(f"{name} {episode}")
             tasks.extend(

From aadde165c451bce4942f9c3dfb02dad29ba9f37b Mon Sep 17 00:00:00 2001
From: Dariush <mohandesdariush@gmail.com>
Date: Fri, 15 Nov 2024 18:18:54 +0100
Subject: [PATCH 03/10] move function

---
 comet/utils/general.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/comet/utils/general.py b/comet/utils/general.py
index 1269385..c2ffd0f 100644
--- a/comet/utils/general.py
+++ b/comet/utils/general.py
@@ -467,6 +467,11 @@ async def get_mediafusion(log_name: str, type: str, full_id: str):
 
 
 async def filter(torrents: list, name: str, year: int):
+    def title_sub_match(correct_title: str, torrent_title: str):
+        correct_title = correct_title.lower()
+        torrent_title = torrent_title.lower()
+        return correct_title in torrent_title or torrent_title in correct_title
+
     results = []
     for torrent in torrents:
         index = torrent[0]
@@ -477,11 +482,6 @@ async def filter(torrents: list, name: str, year: int):
 
         parsed = parse(title)
 
-        def title_sub_match(correct_title: str, torrent_title: str):
-            correct_title = correct_title.lower()
-            torrent_title = torrent_title.lower()
-            return correct_title in torrent_title or torrent_title in correct_title
-
         if parsed.parsed_title and not (title_match(name, parsed.parsed_title) or title_sub_match(name, parsed.parsed_title)):
             results.append((index, False))
             continue

From 630a5fee7604a20e952b9ddda2efdcc370c7874a Mon Sep 17 00:00:00 2001
From: Dariush <mohandesdariush@gmail.com>
Date: Fri, 15 Nov 2024 18:20:07 +0100
Subject: [PATCH 04/10] rename variable

---
 comet/utils/general.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/comet/utils/general.py b/comet/utils/general.py
index c2ffd0f..3ff56b6 100644
--- a/comet/utils/general.py
+++ b/comet/utils/general.py
@@ -467,10 +467,10 @@ async def get_mediafusion(log_name: str, type: str, full_id: str):
 
 
 async def filter(torrents: list, name: str, year: int):
-    def title_sub_match(correct_title: str, torrent_title: str):
-        correct_title = correct_title.lower()
+    def title_sub_match(imdb_title: str, torrent_title: str):
+        imdb_title = imdb_title.lower()
         torrent_title = torrent_title.lower()
-        return correct_title in torrent_title or torrent_title in correct_title
+        return imdb_title in torrent_title or torrent_title in imdb_title
 
     results = []
     for torrent in torrents:

From 027bb54d1b91a2141ad6dfbd77f2c74b2ba75fad Mon Sep 17 00:00:00 2001
From: Dariush <mohandesdariush@gmail.com>
Date: Fri, 15 Nov 2024 19:19:46 +0100
Subject: [PATCH 05/10] improve title matching function

---
 comet/utils/general.py | 32 ++++++++++++++++++++++++++------
 pyproject.toml         |  1 +
 2 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/comet/utils/general.py b/comet/utils/general.py
index 3ff56b6..e9002e3 100644
--- a/comet/utils/general.py
+++ b/comet/utils/general.py
@@ -10,6 +10,7 @@
 from RTN import parse, title_match
 from curl_cffi import requests
 from fastapi import Request
+from fuzzywuzzy import fuzz
 
 from comet.utils.logger import logger
 from comet.utils.models import settings, ConfigModel
@@ -465,13 +466,32 @@ async def get_mediafusion(log_name: str, type: str, full_id: str):
 
     return results
 
+def match_titles(imdb_title, torrent_title, threshold=80, token_overlap_threshold=0.5):
+    """
+    Match movie/TV show titles using a combination of fuzzy string matching and token overlap.
 
-async def filter(torrents: list, name: str, year: int):
-    def title_sub_match(imdb_title: str, torrent_title: str):
-        imdb_title = imdb_title.lower()
-        torrent_title = torrent_title.lower()
-        return imdb_title in torrent_title or torrent_title in imdb_title
+    Parameters:
+    imdb_title (str): The title from the IMDB data source.
+    torrent_title (str): The title from the torrent data source.
+    threshold (int): The minimum fuzzy match ratio to consider the titles a match.
+    token_overlap_threshold (float): The minimum proportion of overlapping tokens to consider the titles a match.
+
+    Returns:
+    bool: True if the titles match, False otherwise.
+    """
+    # Calculate the fuzzy match ratio
+    match_ratio = fuzz.token_set_ratio(imdb_title, torrent_title)
+
+    # Calculate the proportion of overlapping tokens
+    imdb_tokens = set(imdb_title.lower().split())
+    torrent_tokens = set(torrent_title.lower().split())
+    common_tokens = imdb_tokens.intersection(torrent_tokens)
+    token_overlap_ratio = len(common_tokens) / max(len(imdb_tokens), len(torrent_tokens))
 
+    # Check if both the fuzzy match ratio and token overlap ratio meet the thresholds
+    return match_ratio >= threshold and token_overlap_ratio >= token_overlap_threshold
+
+async def filter(torrents: list, name: str, year: int):
     results = []
     for torrent in torrents:
         index = torrent[0]
@@ -482,7 +502,7 @@ def title_sub_match(imdb_title: str, torrent_title: str):
 
         parsed = parse(title)
 
-        if parsed.parsed_title and not (title_match(name, parsed.parsed_title) or title_sub_match(name, parsed.parsed_title)):
+        if parsed.parsed_title and not (match_titles(name, parsed.parsed_title)):
             results.append((index, False))
             continue
 
diff --git a/pyproject.toml b/pyproject.toml
index 222dbe4..543895a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -25,6 +25,7 @@ aiosqlite = "*"
 jinja2 = "*"
 rank-torrent-name = "*"
 parsett = "*"
+fuzzywuzzy = "*"
 
 
 [tool.poetry.group.dev.dependencies]

From 0567166f842b20dd3daf56973b3923032a65ef3d Mon Sep 17 00:00:00 2001
From: Dariush <mohandesdariush@gmail.com>
Date: Fri, 15 Nov 2024 19:21:54 +0100
Subject: [PATCH 06/10] add python-Levenshtein for fuzzywuzzy

---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index 543895a..6ddd8f6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,6 +26,7 @@ jinja2 = "*"
 rank-torrent-name = "*"
 parsett = "*"
 fuzzywuzzy = "*"
+python-Levenshtein = "*"
 
 
 [tool.poetry.group.dev.dependencies]

From 1123f086d9112fcccf65b373e34e9f889eb7fd61 Mon Sep 17 00:00:00 2001
From: Dariush <mohandesdariush@gmail.com>
Date: Fri, 15 Nov 2024 21:13:39 +0100
Subject: [PATCH 07/10] remove useless parenthesis

---
 comet/utils/general.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/comet/utils/general.py b/comet/utils/general.py
index e9002e3..2fa32d2 100644
--- a/comet/utils/general.py
+++ b/comet/utils/general.py
@@ -502,7 +502,7 @@ async def filter(torrents: list, name: str, year: int):
 
         parsed = parse(title)
 
-        if parsed.parsed_title and not (match_titles(name, parsed.parsed_title)):
+        if parsed.parsed_title and not match_titles(name, parsed.parsed_title):
             results.append((index, False))
             continue
 

From 1c9ed405698f0acd2788db281f832295f7c6fc85 Mon Sep 17 00:00:00 2001
From: Goldy <153996346+g0ldyy@users.noreply.github.com>
Date: Sat, 16 Nov 2024 00:08:25 +0100
Subject: [PATCH 08/10] Update pyproject.toml

---
 pyproject.toml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 6ddd8f6..64311c2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -25,8 +25,7 @@ aiosqlite = "*"
 jinja2 = "*"
 rank-torrent-name = "*"
 parsett = "*"
-fuzzywuzzy = "*"
-python-Levenshtein = "*"
+fuzzywuzzy = {extras = ["speedup"], version = "*"}
 
 
 [tool.poetry.group.dev.dependencies]

From f93fd6a6884236970b4e60f947419ccf67633f3a Mon Sep 17 00:00:00 2001
From: Dariush <mohandesdariush@gmail.com>
Date: Sat, 16 Nov 2024 01:59:59 +0100
Subject: [PATCH 09/10] improve title matching

---
 comet/utils/general.py | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/comet/utils/general.py b/comet/utils/general.py
index 2fa32d2..2bd3b75 100644
--- a/comet/utils/general.py
+++ b/comet/utils/general.py
@@ -7,7 +7,7 @@
 import asyncio
 import orjson
 
-from RTN import parse, title_match
+from RTN import parse
 from curl_cffi import requests
 from fastapi import Request
 from fuzzywuzzy import fuzz
@@ -466,30 +466,25 @@ async def get_mediafusion(log_name: str, type: str, full_id: str):
 
     return results
 
-def match_titles(imdb_title, torrent_title, threshold=80, token_overlap_threshold=0.5):
+
+def match_titles(imdb_title: str, torrent_title: str, threshold: int = 80) -> bool:
     """
-    Match movie/TV show titles using a combination of fuzzy string matching and token overlap.
+    Match movie/TV show titles using fuzzy string matching.
 
     Parameters:
     imdb_title (str): The title from the IMDB data source.
     torrent_title (str): The title from the torrent data source.
     threshold (int): The minimum fuzzy match ratio to consider the titles a match.
-    token_overlap_threshold (float): The minimum proportion of overlapping tokens to consider the titles a match.
 
     Returns:
     bool: True if the titles match, False otherwise.
     """
     # Calculate the fuzzy match ratio
-    match_ratio = fuzz.token_set_ratio(imdb_title, torrent_title)
+    match_ratio = fuzz.ratio(imdb_title, torrent_title)
 
-    # Calculate the proportion of overlapping tokens
-    imdb_tokens = set(imdb_title.lower().split())
-    torrent_tokens = set(torrent_title.lower().split())
-    common_tokens = imdb_tokens.intersection(torrent_tokens)
-    token_overlap_ratio = len(common_tokens) / max(len(imdb_tokens), len(torrent_tokens))
+    # Check if the fuzzy match ratio meets the thresholds
+    return match_ratio >= threshold
 
-    # Check if both the fuzzy match ratio and token overlap ratio meet the thresholds
-    return match_ratio >= threshold and token_overlap_ratio >= token_overlap_threshold
 
 async def filter(torrents: list, name: str, year: int):
     results = []

From 7f19c4c3839b6b46aab3cee3efb283469d7b68f4 Mon Sep 17 00:00:00 2001
From: Dariush <mohandesdariush@gmail.com>
Date: Sat, 16 Nov 2024 14:36:25 +0100
Subject: [PATCH 10/10] improve title matching with better ratios

---
 comet/utils/general.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/comet/utils/general.py b/comet/utils/general.py
index 2bd3b75..7e9cfe6 100644
--- a/comet/utils/general.py
+++ b/comet/utils/general.py
@@ -480,10 +480,15 @@ def match_titles(imdb_title: str, torrent_title: str, threshold: int = 80) -> bo
     bool: True if the titles match, False otherwise.
     """
     # Calculate the fuzzy match ratio
-    match_ratio = fuzz.ratio(imdb_title, torrent_title)
-
-    # Check if the fuzzy match ratio meets the thresholds
-    return match_ratio >= threshold
+    # The idea is that ratio will give very low score to garbage ratio but will also give mid/average 
+    # score to some good results. The WRatio will make sure these mid score passes the filter.
+    base_ratio = fuzz.ratio(imdb_title, torrent_title) # strict ratio
+    w_ratio = fuzz.WRatio(imdb_title, torrent_title) # less strict ratio
+    # The weight of the ratios needs to be adjusted because basic ratio is too strict.
+    match_ratio = (base_ratio*0.7 + w_ratio*1.3)/2
+
+    # Check if the fuzzy match ratio meets the thresholds 
+    return match_ratio >= threshold 
 
 
 async def filter(torrents: list, name: str, year: int):