From de23e39c41600a447a1b0286ed3457fc2a6db781 Mon Sep 17 00:00:00 2001 From: Wei-Cheng Pan Date: Mon, 18 Dec 2023 22:52:50 +0900 Subject: [PATCH] update --- drive/app/jav/_dispatch.py | 63 +++++++++++++++---------- drive/app/jav/_main.py | 8 ++-- drive/app/jav/_sauce.py | 94 ++++++++++++++++++++++++-------------- 3 files changed, 102 insertions(+), 63 deletions(-) diff --git a/drive/app/jav/_dispatch.py b/drive/app/jav/_dispatch.py index bbc287bd..e25c5960 100644 --- a/drive/app/jav/_dispatch.py +++ b/drive/app/jav/_dispatch.py @@ -3,7 +3,7 @@ from ._types import SauceData, JavData -def match_jav_1(name: str) -> JavData | None: +def _match_jav_1(name: str) -> JavData | None: m = re.search(r"(\w{2,6})[-_](\d{2,4}\w?)", name) if not m: return None @@ -18,7 +18,7 @@ def match_jav_1(name: str) -> JavData | None: ) -def match_jav_2(name: str) -> JavData | None: +def _match_jav_2(name: str) -> JavData | None: m = re.search(r"(\d{3,4})(\w{3,6})[-_](\d{3,4}\w?)", name) if not m: return None @@ -33,7 +33,7 @@ def match_jav_2(name: str) -> JavData | None: ) -def match_fc2(name: str) -> JavData | None: +def _match_fc2(name: str) -> JavData | None: m = re.search(r"fc2[-_]ppv[-_](\d+)", name, re.I) if not m: return None @@ -47,7 +47,7 @@ def match_fc2(name: str) -> JavData | None: ) -def match_heydouga(name: str) -> JavData | None: +def _match_heydouga(name: str) -> JavData | None: m = re.search(r"hey(douga)?[-_ ]?(\d+)[-_](\d+)", name, re.I) if not m: return None @@ -62,7 +62,7 @@ def match_heydouga(name: str) -> JavData | None: ) -def match_caribpr(name: str) -> JavData | None: +def _match_caribpr(name: str) -> JavData | None: m = re.search(r"(\d{6})[-_](\d{3})-CARIBPR", name, re.I) if not m: return None @@ -75,7 +75,7 @@ def match_caribpr(name: str) -> JavData | None: ) -def match_carib(name: str) -> JavData | None: +def _match_carib(name: str) -> JavData | None: m = re.search(r"(\d{6})[-_](\d{3})-CARIB", name, re.I) if not m: return None @@ -88,7 +88,7 @@ def match_carib(name: str) -> JavData | None: ) -def match_1pondo(name: str) -> JavData | None: +def _match_1pondo(name: str) -> JavData | None: m = re.search(r"(\d{6})[-_](\d{3})-1PON", name, re.I) if not m: return None @@ -101,7 +101,21 @@ def match_1pondo(name: str) -> JavData | None: ) -def match_unknown(name: str) -> JavData | None: +def _match_heyzo(name: str) -> JavData | None: + m = re.search(r"HEYZO[-_](\d{4})", name, re.I) + if not m: + return None + number = m.group(1) + name = f"HEYZO-{number}" + return JavData( + name=name, + sauce_list=[ + SauceData(name="heyzo", query=number), + ], + ) + + +def _match_unknown(name: str) -> JavData | None: m = re.search(r"(\d{6})[-_](\d{3})", name, re.I) if not m: return None @@ -118,7 +132,7 @@ def match_unknown(name: str) -> JavData | None: ) -def match_mesubuta(name: str) -> JavData | None: +def _match_mesubuta(name: str) -> JavData | None: m = re.search(r"(\d{6})[-_](\d{3})[-_](\d{2})", name, re.I) if not m: return None @@ -129,7 +143,7 @@ def match_mesubuta(name: str) -> JavData | None: ) -def match_10musume(name: str) -> JavData | None: +def _match_10musume(name: str) -> JavData | None: m = re.search(r"(\d{6})[-_](\d{2})-10MU", name, re.I) if not m: return None @@ -144,30 +158,31 @@ def match_10musume(name: str) -> JavData | None: ) -EXCLUDE_LIST = [ - match_mesubuta, +_EXCLUDE_LIST = [ + _match_mesubuta, ] -INCLUDE_LIST = [ - match_10musume, - match_caribpr, - match_carib, - match_1pondo, - match_heydouga, - match_fc2, - match_jav_1, - match_jav_2, - match_unknown, +_INCLUDE_LIST = [ + _match_10musume, + _match_caribpr, + _match_carib, + _match_1pondo, + _match_heyzo, + _match_heydouga, + _match_fc2, + _match_jav_1, + _match_jav_2, + _match_unknown, ] def get_jav_query(name: str) -> JavData | None: - for ex in EXCLUDE_LIST: + for ex in _EXCLUDE_LIST: rv = ex(name) if rv: return None - for in_ in INCLUDE_LIST: + for in_ in _INCLUDE_LIST: rv = in_(name) if rv: return rv diff --git a/drive/app/jav/_main.py b/drive/app/jav/_main.py index 2c49f056..8ee21afb 100644 --- a/drive/app/jav/_main.py +++ b/drive/app/jav/_main.py @@ -48,7 +48,7 @@ async def _generate(drive: Drive, kwargs: Namespace) -> int: async with ClientSession() as session: root_node = await drive.get_node_by_path(root_path) children = await drive.get_children(root_node) - async for node in process_node_list(session, children): + async for node in _process_node_list(session, children): yaml.safe_dump( [node], sys.stdout, @@ -72,7 +72,7 @@ async def _apply(drive: Drive, kwargs: Namespace) -> int: node = await drive.get_node_by_id(id_) print(f"rename {node.name} -> {value}") - await rename(drive, node, value) + await _rename(drive, node, value) break return 0 @@ -101,7 +101,7 @@ def all_same(m: dict[str, Any]): return 0 -async def process_node_list(session: ClientSession, node_list: list[Node]): +async def _process_node_list(session: ClientSession, node_list: list[Node]): for node in node_list: if node.is_trashed: continue @@ -118,7 +118,7 @@ async def process_node_list(session: ClientSession, node_list: list[Node]): } -async def rename(drive: Drive, node: Node, new_name: str) -> None: +async def _rename(drive: Drive, node: Node, new_name: str) -> None: if node.is_directory: if new_name == node.name: print("skipped") diff --git a/drive/app/jav/_sauce.py b/drive/app/jav/_sauce.py index cf8a07c7..eaeb5169 100644 --- a/drive/app/jav/_sauce.py +++ b/drive/app/jav/_sauce.py @@ -8,7 +8,7 @@ from ._types import JavData -async def fetch_jav_data_from_javbus(session: ClientSession, jav_id: str, query: str): +async def _fetch_jav_data_from_javbus(session: ClientSession, jav_id: str, query: str): async with session.get(f"https://www.javbus.com/ja/{query}") as response: if response.status != 200: return None @@ -18,10 +18,10 @@ async def fetch_jav_data_from_javbus(session: ClientSession, jav_id: str, query: title = soup.select_one(".container > h3") if not title: return None - return normalize_title(title.text) + return _normalize_title(title.text) -async def fetch_jav_data_from_javlibrary( +async def _fetch_jav_data_from_javlibrary( session: ClientSession, jav_id: str, query: str ): async with session.get( @@ -37,7 +37,7 @@ async def fetch_jav_data_from_javlibrary( soup = BeautifulSoup(html, "html.parser") title = soup.select_one("#video_title .post-title") if title: - return normalize_title(title.text) + return _normalize_title(title.text) videos = soup.select(".videos .video") for div in videos: @@ -53,12 +53,12 @@ async def fetch_jav_data_from_javlibrary( continue if title.find("(ブルーレイディスク)") >= 0: continue - return normalize_title(title) + return _normalize_title(title) return None -async def fetch_jav_data_from_javbee(session: ClientSession, jav_id: str, query: str): +async def _fetch_jav_data_from_javbee(session: ClientSession, jav_id: str, query: str): async with session.get( "https://javbee.org/search", params={ @@ -73,10 +73,12 @@ async def fetch_jav_data_from_javbee(session: ClientSession, jav_id: str, query: title = soup.select_one(".title > a") if not title: return None - return normalize_title(title.text) + return _normalize_title(title.text) -async def fetch_jav_data_from_heydouga(session: ClientSession, jav_id: str, query: str): +async def _fetch_jav_data_from_heydouga( + session: ClientSession, jav_id: str, query: str +): async with session.get( f"https://www.heydouga.com/moviepages/{query}/index.html", ) as response: @@ -90,10 +92,28 @@ async def fetch_jav_data_from_heydouga(session: ClientSession, jav_id: str, quer return None for span in title.find_all("span"): span.decompose() - return f"{jav_id} {normalize_title(title.text)}" + title = _normalize_title(title.text) + return f"{jav_id} {title}" + + +async def _fetch_jav_data_from_heyzo(session: ClientSession, jav_id: str, query: str): + async with session.get( + f"https://www.heyzo.com/moviepages/{query}/index.html", + ) as response: + if response.status != 200: + return None + html = await response.text(errors="ignore") + soup = BeautifulSoup(html, "html.parser") + title = soup.select_one("#movie > h1") + if not title: + return None + title = _normalize_title(title.text) + title = re.sub(r"\t+", " ", title) + return f"{jav_id} {title}" -async def fetch_jav_data_from_carib(session: ClientSession, jav_id: str, query: str): + +async def _fetch_jav_data_from_carib(session: ClientSession, jav_id: str, query: str): async with session.get( f"https://www.caribbeancom.com/moviepages/{query}/index.html", ) as response: @@ -106,16 +126,17 @@ async def fetch_jav_data_from_carib(session: ClientSession, jav_id: str, query: title = soup.select_one("h1[itemprop=name]") if not title: return None - title = normalize_title(title.text) + title = _normalize_title(title.text) actor = soup.select_one(".movie-spec a[itemprop=actor] > span[itemprop=name]") if not actor: return f"{jav_id} {title}" + actor = _normalize_title(actor.text) - return f"{jav_id} {title} {normalize_title(actor.text)}" + return f"{jav_id} {title} {actor}" -async def fetch_jav_data_from_caribpr(session: ClientSession, jav_id: str, query: str): +async def _fetch_jav_data_from_caribpr(session: ClientSession, jav_id: str, query: str): async with session.get( f"https://www.caribbeancompr.com/moviepages/{query}/index.html", ) as response: @@ -128,16 +149,16 @@ async def fetch_jav_data_from_caribpr(session: ClientSession, jav_id: str, query title = soup.select_one(".movie-info .heading") if not title: return None - title = normalize_title(title.text) + title = _normalize_title(title.text) actor = soup.select_one(".movie-spec .spec-content > .spec-item") if not actor: return f"{jav_id} {title}" - return f"{jav_id} {title} {normalize_title(actor.text)}" + return f"{jav_id} {title} {_normalize_title(actor.text)}" -async def fetch_jav_data_from_1pondo(session: ClientSession, jav_id: str, query: str): +async def _fetch_jav_data_from_1pondo(session: ClientSession, jav_id: str, query: str): m = re.match(r"\d{6}_\d{3}", jav_id) if not m: return None @@ -149,13 +170,15 @@ async def fetch_jav_data_from_1pondo(session: ClientSession, jav_id: str, query: return None data = await response.json() - title = normalize_title(data["Title"]) - actor = normalize_title(data["Actor"]) + title = _normalize_title(data["Title"]) + actor = _normalize_title(data["Actor"]) return f"{jav_id} {title} {actor}" -async def fetch_jav_data_from_10musume(session: ClientSession, jav_id: str, query: str): +async def _fetch_jav_data_from_10musume( + session: ClientSession, jav_id: str, query: str +): async with session.get( f"https://www.10musume.com/dyn/phpauto/movie_details/movie_id/{jav_id}.json", ) as response: @@ -163,22 +186,23 @@ async def fetch_jav_data_from_10musume(session: ClientSession, jav_id: str, quer return None data = await response.json() - title = normalize_title(data["Title"]) - actor = normalize_title(data["Actor"]) + title = _normalize_title(data["Title"]) + actor = _normalize_title(data["Actor"]) return f"10MU {jav_id} {title} {actor}" -SAUCE_DICT = { - "javbus": fetch_jav_data_from_javbus, - "javlibrary": fetch_jav_data_from_javlibrary, - "javbee": fetch_jav_data_from_javbee, - "javtorrent": fetch_jav_data_from_javbee, - "heydouga": fetch_jav_data_from_heydouga, - "carib": fetch_jav_data_from_carib, - "caribpr": fetch_jav_data_from_caribpr, - "1pondo": fetch_jav_data_from_1pondo, - "10musume": fetch_jav_data_from_10musume, +_SAUCE_DICT = { + "javbus": _fetch_jav_data_from_javbus, + "javlibrary": _fetch_jav_data_from_javlibrary, + "javbee": _fetch_jav_data_from_javbee, + "javtorrent": _fetch_jav_data_from_javbee, + "heydouga": _fetch_jav_data_from_heydouga, + "carib": _fetch_jav_data_from_carib, + "caribpr": _fetch_jav_data_from_caribpr, + "1pondo": _fetch_jav_data_from_1pondo, + "10musume": _fetch_jav_data_from_10musume, + "heyzo": _fetch_jav_data_from_heyzo, } @@ -186,19 +210,19 @@ async def fetch_jav_data(session: ClientSession, jav_query: JavData): queries = ( await _ for _ in as_completed( - as_kv(_.name, SAUCE_DICT[_.name](session, jav_query.name, _.query)) + _as_kv(_.name, _SAUCE_DICT[_.name](session, jav_query.name, _.query)) for _ in jav_query.sauce_list ) ) - rv = {k: v async for k, v in queries if v} + rv = {k: v async for k, v in queries} return rv -async def as_kv[K, V](key: K, value: Awaitable[V]) -> tuple[K, V]: +async def _as_kv[K, V](key: K, value: Awaitable[V]) -> tuple[K, V]: return key, await value -def normalize_title(title: str) -> str: +def _normalize_title(title: str) -> str: title = title.strip() title = title.replace("/", "/") title = title.replace("\n", "")