Skip to content

Commit

Permalink
Added Napiprojekt provider releases info and a new options to filter …
Browse files Browse the repository at this point in the history
…subtitles based on uploader
  • Loading branch information
destpstrzy authored Nov 25, 2024
1 parent 42c051e commit 42d569f
Show file tree
Hide file tree
Showing 5 changed files with 99 additions and 19 deletions.
4 changes: 4 additions & 0 deletions bazarr/app/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,10 @@ def check_parser_binary(value):
Validator('opensubtitlescom.use_hash', must_exist=True, default=True, is_type_of=bool),
Validator('opensubtitlescom.include_ai_translated', must_exist=True, default=False, is_type_of=bool),

# napiprojekt section
Validator('napiprojekt.only_authors', must_exist=True, default=False, is_type_of=bool),
Validator('napiprojekt.only_real_names', must_exist=True, default=False, is_type_of=bool),

# addic7ed section
Validator('addic7ed.username', must_exist=True, default='', is_type_of=str, cast=str),
Validator('addic7ed.password', must_exist=True, default='', is_type_of=str, cast=str),
Expand Down
10 changes: 6 additions & 4 deletions bazarr/app/get_providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
from sonarr.blacklist import blacklist_log
from utilities.analytics import event_tracker


_TRACEBACK_RE = re.compile(r'File "(.*?providers[\\/].*?)", line (\d+)')


Expand All @@ -41,7 +40,7 @@ def time_until_midnight(timezone):
"""
now_in_tz = datetime.datetime.now(tz=timezone)
midnight = now_in_tz.replace(hour=0, minute=0, second=0, microsecond=0) + \
datetime.timedelta(days=1)
datetime.timedelta(days=1)
return midnight - now_in_tz


Expand Down Expand Up @@ -254,6 +253,8 @@ def get_providers_auth():
'include_ai_translated': settings.opensubtitlescom.include_ai_translated,
'api_key': 's38zmzVlW7IlYruWi7mHwDYl2SfMQoC1'
},
'napiprojekt': {'only_authors': settings.napiprojekt.only_authors,
'only_real_names': settings.napiprojekt.only_real_names},
'podnapisi': {
'only_foreign': False, # fixme
'also_foreign': False, # fixme
Expand Down Expand Up @@ -369,7 +370,7 @@ def provider_throttle(name, exception, ids=None, language=None):
cls = valid_cls

throttle_data = provider_throttle_map().get(name, provider_throttle_map()["default"]).get(cls, None) or \
provider_throttle_map()["default"].get(cls, None)
provider_throttle_map()["default"].get(cls, None)

if throttle_data:
throttle_delta, throttle_description = throttle_data
Expand All @@ -379,7 +380,8 @@ def provider_throttle(name, exception, ids=None, language=None):
throttle_until = datetime.datetime.now() + throttle_delta

if cls_name not in VALID_COUNT_EXCEPTIONS or throttled_count(name):
if cls_name == 'ValueError' and isinstance(exception.args, tuple) and len(exception.args) and exception.args[0].startswith('unsupported pickle protocol'):
if cls_name == 'ValueError' and isinstance(exception.args, tuple) and len(exception.args) and exception.args[
0].startswith('unsupported pickle protocol'):
for fn in subliminal_cache_region.backend.all_filenames:
try:
os.remove(fn)
Expand Down
6 changes: 5 additions & 1 deletion custom_libs/subliminal/providers/napiprojekt.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,10 @@ class NapiProjektProvider(Provider):
server_url = 'http://napiprojekt.pl/unit_napisy/dl.php'
subtitle_class = NapiProjektSubtitle

def __init__(self):
def __init__(self, only_authors=None, only_real_names=None):
self.session = None
self.only_authors = only_authors
self.only_real_names = only_real_names

def initialize(self):
self.session = Session()
Expand All @@ -78,6 +80,8 @@ def terminate(self):
self.session.close()

def query(self, language, hash):
if self.only_authors or self.only_real_names:
return None
params = {
'v': 'dreambox',
'kolejka': 'false',
Expand Down
81 changes: 68 additions & 13 deletions custom_libs/subliminal_patch/providers/napiprojekt.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# coding=utf-8
from __future__ import absolute_import
import logging
import re

from subliminal.providers.napiprojekt import NapiProjektProvider as _NapiProjektProvider, \
NapiProjektSubtitle as _NapiProjektSubtitle, get_subhash
Expand Down Expand Up @@ -40,6 +41,11 @@ class NapiProjektProvider(_NapiProjektProvider):
video_types = (Episode, Movie)
subtitle_class = NapiProjektSubtitle

def __init__(self, only_authors=None, only_real_names=None):
super().__init__()
self.only_authors = only_authors
self.only_real_names = only_real_names

def query(self, language, hash):
params = {
'v': 'dreambox',
Expand Down Expand Up @@ -68,8 +74,9 @@ def query(self, language, hash):
def list_subtitles(self, video, languages):
def flatten(l):
return [item for sublist in l for item in sublist]

return [s for s in [self.query(l, video.hashes['napiprojekt']) for l in languages] if s is not None] + \
flatten([self._scrape(video, l) for l in languages])
flatten([self._scrape(video, l) for l in languages])

def download_subtitle(self, subtitle):
if subtitle.content is not None:
Expand All @@ -80,7 +87,8 @@ def _scrape(self, video, language):
if language.alpha2 != 'pl':
return []
title, matches = self._find_title(video)
if title == None:

if title is None:
return []
episode = f'-s{video.season:02d}e{video.episode:02d}' if isinstance(
video, Episode) else ''
Expand All @@ -89,14 +97,59 @@ def _scrape(self, video, language):
response.raise_for_status()
soup = BeautifulSoup(response.content, 'html.parser')
subtitles = []
for link in soup.find_all('a'):
if 'class' in link.attrs and 'tableA' in link.attrs['class']:
hash = link.attrs['href'][len('napiprojekt:'):]
subtitles.append(
NapiProjektSubtitle(language,
hash,
release_info=str(link.contents[0]),
matches=matches | ({'season', 'episode'} if episode else set())))

# Find all rows with titles and napiprojekt links
rows = soup.find_all("tr", title=True)

for row in rows:
for link in row.find_all('a'):
if 'class' in link.attrs and 'tableA' in link.attrs['class']:
title = row['title']
hash = link.attrs['href'][len('napiprojekt:'):]

data = row.find_all('p')

size = data[1].contents[0] if len(data) > 1 and data[1].contents else ""
length = data[3].contents[0] if len(data) > 3 and data[3].contents else ""
author = data[4].contents[0] if len(data) > 4 and data[4].contents else ""
added = data[5].contents[0] if len(data) > 5 and data[5].contents else ""

if author == "":
match = re.search(r"<b>Autor:</b> (.*?)\(", title)
print(title)
if match:
author = match.group(1).strip()
else:
author = ""

if self.only_authors:
if author.lower() in ["brak", "automat", "si", "chatgpt", "ai", "robot"]:
continue

if self.only_real_names:
# Check if `self.only_authors` contains exactly 2 uppercase letters and at least one lowercase letter
if not (re.match(r'^(?=(?:.*[A-Z]){2})(?=.*[a-z]).*$', author) or
re.match(r'^\w+\s\w+$', author)):
continue

match = re.search(r"<b>Video rozdzielczość:</b> (.*?)<", title)
if match:
resolution = match.group(1).strip()
else:
resolution = ""

match = re.search(r"<b>Video FPS:</b> (.*?)<", title)
if match:
fps = match.group(1).strip()
else:
fps = ""

added_lenght = "Autor: " + author + " | " + resolution + " | " + fps + " | " + size + " | " + added + " | " + length
subtitles.append(
NapiProjektSubtitle(language,
hash,
release_info=added_lenght,
matches=matches | ({'season', 'episode'} if episode else set())))

logger.debug(f'Found subtitles {subtitles}')
return subtitles
Expand All @@ -114,15 +167,17 @@ def _find_title(self, video):
video, Episode) else video.imdb_id

def match_title_tag(
tag): return tag.name == 'a' and 'class' in tag.attrs and 'movieTitleCat' in tag.attrs['class'] and 'href' in tag.attrs
tag):
return tag.name == 'a' and 'class' in tag.attrs and 'movieTitleCat' in tag.attrs[
'class'] and 'href' in tag.attrs

if imdb_id:
for entry in soup.find_all(lambda tag: tag.name == 'div' and 'greyBoxCatcher' in tag['class']):
if entry.find_all(href=lambda href: href and href.startswith(f'https://www.imdb.com/title/{imdb_id}')):
for link in entry.find_all(match_title_tag):
return link.attrs['href'][len('napisy-'):], \
{'series', 'year', 'series_imdb_id'} if isinstance(
video, Episode) else {'title', 'year', 'imdb_id'}
{'series', 'year', 'series_imdb_id'} if isinstance(
video, Episode) else {'title', 'year', 'imdb_id'}

type = 'episode' if isinstance(video, Episode) else 'movie'
for link in soup.find_all(match_title_tag):
Expand Down
17 changes: 16 additions & 1 deletion frontend/src/pages/Settings/Providers/list.ts
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,22 @@ export const ProviderList: Readonly<ProviderInfo[]> = [
},
],
},
{ key: "napiprojekt", description: "Polish Subtitles Provider" },
{
key: "napiprojekt",
description: "Polish Subtitles Provider",
inputs: [
{
type: "switch",
key: "only_authors",
name: "Skip subtitles without authors or possibly AI generated",
},
{
type: "switch",
key: "only_real_names",
name: "Download subtitles with real name authors only",
},
],
},
{
key: "napisy24",
description: "Polish Subtitles Provider",
Expand Down

0 comments on commit 42d569f

Please sign in to comment.