Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
legnaleurc committed Dec 18, 2023
1 parent 1a47596 commit de23e39
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 63 deletions.
63 changes: 39 additions & 24 deletions drive/app/jav/_dispatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from ._types import SauceData, JavData


def match_jav_1(name: str) -> JavData | None:
def _match_jav_1(name: str) -> JavData | None:
m = re.search(r"(\w{2,6})[-_](\d{2,4}\w?)", name)
if not m:
return None
Expand All @@ -18,7 +18,7 @@ def match_jav_1(name: str) -> JavData | None:
)


def match_jav_2(name: str) -> JavData | None:
def _match_jav_2(name: str) -> JavData | None:
m = re.search(r"(\d{3,4})(\w{3,6})[-_](\d{3,4}\w?)", name)
if not m:
return None
Expand All @@ -33,7 +33,7 @@ def match_jav_2(name: str) -> JavData | None:
)


def match_fc2(name: str) -> JavData | None:
def _match_fc2(name: str) -> JavData | None:
m = re.search(r"fc2[-_]ppv[-_](\d+)", name, re.I)
if not m:
return None
Expand All @@ -47,7 +47,7 @@ def match_fc2(name: str) -> JavData | None:
)


def match_heydouga(name: str) -> JavData | None:
def _match_heydouga(name: str) -> JavData | None:
m = re.search(r"hey(douga)?[-_ ]?(\d+)[-_](\d+)", name, re.I)
if not m:
return None
Expand All @@ -62,7 +62,7 @@ def match_heydouga(name: str) -> JavData | None:
)


def match_caribpr(name: str) -> JavData | None:
def _match_caribpr(name: str) -> JavData | None:
m = re.search(r"(\d{6})[-_](\d{3})-CARIBPR", name, re.I)
if not m:
return None
Expand All @@ -75,7 +75,7 @@ def match_caribpr(name: str) -> JavData | None:
)


def match_carib(name: str) -> JavData | None:
def _match_carib(name: str) -> JavData | None:
m = re.search(r"(\d{6})[-_](\d{3})-CARIB", name, re.I)
if not m:
return None
Expand All @@ -88,7 +88,7 @@ def match_carib(name: str) -> JavData | None:
)


def match_1pondo(name: str) -> JavData | None:
def _match_1pondo(name: str) -> JavData | None:
m = re.search(r"(\d{6})[-_](\d{3})-1PON", name, re.I)
if not m:
return None
Expand All @@ -101,7 +101,21 @@ def match_1pondo(name: str) -> JavData | None:
)


def match_unknown(name: str) -> JavData | None:
def _match_heyzo(name: str) -> JavData | None:
m = re.search(r"HEYZO[-_](\d{4})", name, re.I)
if not m:
return None
number = m.group(1)
name = f"HEYZO-{number}"
return JavData(
name=name,
sauce_list=[
SauceData(name="heyzo", query=number),
],
)


def _match_unknown(name: str) -> JavData | None:
m = re.search(r"(\d{6})[-_](\d{3})", name, re.I)
if not m:
return None
Expand All @@ -118,7 +132,7 @@ def match_unknown(name: str) -> JavData | None:
)


def match_mesubuta(name: str) -> JavData | None:
def _match_mesubuta(name: str) -> JavData | None:
m = re.search(r"(\d{6})[-_](\d{3})[-_](\d{2})", name, re.I)
if not m:
return None
Expand All @@ -129,7 +143,7 @@ def match_mesubuta(name: str) -> JavData | None:
)


def match_10musume(name: str) -> JavData | None:
def _match_10musume(name: str) -> JavData | None:
m = re.search(r"(\d{6})[-_](\d{2})-10MU", name, re.I)
if not m:
return None
Expand All @@ -144,30 +158,31 @@ def match_10musume(name: str) -> JavData | None:
)


EXCLUDE_LIST = [
match_mesubuta,
_EXCLUDE_LIST = [
_match_mesubuta,
]


INCLUDE_LIST = [
match_10musume,
match_caribpr,
match_carib,
match_1pondo,
match_heydouga,
match_fc2,
match_jav_1,
match_jav_2,
match_unknown,
_INCLUDE_LIST = [
_match_10musume,
_match_caribpr,
_match_carib,
_match_1pondo,
_match_heyzo,
_match_heydouga,
_match_fc2,
_match_jav_1,
_match_jav_2,
_match_unknown,
]


def get_jav_query(name: str) -> JavData | None:
for ex in EXCLUDE_LIST:
for ex in _EXCLUDE_LIST:
rv = ex(name)
if rv:
return None
for in_ in INCLUDE_LIST:
for in_ in _INCLUDE_LIST:
rv = in_(name)
if rv:
return rv
Expand Down
8 changes: 4 additions & 4 deletions drive/app/jav/_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ async def _generate(drive: Drive, kwargs: Namespace) -> int:
async with ClientSession() as session:
root_node = await drive.get_node_by_path(root_path)
children = await drive.get_children(root_node)
async for node in process_node_list(session, children):
async for node in _process_node_list(session, children):
yaml.safe_dump(
[node],
sys.stdout,
Expand All @@ -72,7 +72,7 @@ async def _apply(drive: Drive, kwargs: Namespace) -> int:

node = await drive.get_node_by_id(id_)
print(f"rename {node.name} -> {value}")
await rename(drive, node, value)
await _rename(drive, node, value)
break
return 0

Expand Down Expand Up @@ -101,7 +101,7 @@ def all_same(m: dict[str, Any]):
return 0


async def process_node_list(session: ClientSession, node_list: list[Node]):
async def _process_node_list(session: ClientSession, node_list: list[Node]):
for node in node_list:
if node.is_trashed:
continue
Expand All @@ -118,7 +118,7 @@ async def process_node_list(session: ClientSession, node_list: list[Node]):
}


async def rename(drive: Drive, node: Node, new_name: str) -> None:
async def _rename(drive: Drive, node: Node, new_name: str) -> None:
if node.is_directory:
if new_name == node.name:
print("skipped")
Expand Down
94 changes: 59 additions & 35 deletions drive/app/jav/_sauce.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from ._types import JavData


async def fetch_jav_data_from_javbus(session: ClientSession, jav_id: str, query: str):
async def _fetch_jav_data_from_javbus(session: ClientSession, jav_id: str, query: str):
async with session.get(f"https://www.javbus.com/ja/{query}") as response:
if response.status != 200:
return None
Expand All @@ -18,10 +18,10 @@ async def fetch_jav_data_from_javbus(session: ClientSession, jav_id: str, query:
title = soup.select_one(".container > h3")
if not title:
return None
return normalize_title(title.text)
return _normalize_title(title.text)


async def fetch_jav_data_from_javlibrary(
async def _fetch_jav_data_from_javlibrary(
session: ClientSession, jav_id: str, query: str
):
async with session.get(
Expand All @@ -37,7 +37,7 @@ async def fetch_jav_data_from_javlibrary(
soup = BeautifulSoup(html, "html.parser")
title = soup.select_one("#video_title .post-title")
if title:
return normalize_title(title.text)
return _normalize_title(title.text)

videos = soup.select(".videos .video")
for div in videos:
Expand All @@ -53,12 +53,12 @@ async def fetch_jav_data_from_javlibrary(
continue
if title.find("(ブルーレイディスク)") >= 0:
continue
return normalize_title(title)
return _normalize_title(title)

return None


async def fetch_jav_data_from_javbee(session: ClientSession, jav_id: str, query: str):
async def _fetch_jav_data_from_javbee(session: ClientSession, jav_id: str, query: str):
async with session.get(
"https://javbee.org/search",
params={
Expand All @@ -73,10 +73,12 @@ async def fetch_jav_data_from_javbee(session: ClientSession, jav_id: str, query:
title = soup.select_one(".title > a")
if not title:
return None
return normalize_title(title.text)
return _normalize_title(title.text)


async def fetch_jav_data_from_heydouga(session: ClientSession, jav_id: str, query: str):
async def _fetch_jav_data_from_heydouga(
session: ClientSession, jav_id: str, query: str
):
async with session.get(
f"https://www.heydouga.com/moviepages/{query}/index.html",
) as response:
Expand All @@ -90,10 +92,28 @@ async def fetch_jav_data_from_heydouga(session: ClientSession, jav_id: str, quer
return None
for span in title.find_all("span"):
span.decompose()
return f"{jav_id} {normalize_title(title.text)}"
title = _normalize_title(title.text)
return f"{jav_id} {title}"


async def _fetch_jav_data_from_heyzo(session: ClientSession, jav_id: str, query: str):
async with session.get(
f"https://www.heyzo.com/moviepages/{query}/index.html",
) as response:
if response.status != 200:
return None

html = await response.text(errors="ignore")
soup = BeautifulSoup(html, "html.parser")
title = soup.select_one("#movie > h1")
if not title:
return None
title = _normalize_title(title.text)
title = re.sub(r"\t+", " ", title)
return f"{jav_id} {title}"

async def fetch_jav_data_from_carib(session: ClientSession, jav_id: str, query: str):

async def _fetch_jav_data_from_carib(session: ClientSession, jav_id: str, query: str):
async with session.get(
f"https://www.caribbeancom.com/moviepages/{query}/index.html",
) as response:
Expand All @@ -106,16 +126,17 @@ async def fetch_jav_data_from_carib(session: ClientSession, jav_id: str, query:
title = soup.select_one("h1[itemprop=name]")
if not title:
return None
title = normalize_title(title.text)
title = _normalize_title(title.text)

actor = soup.select_one(".movie-spec a[itemprop=actor] > span[itemprop=name]")
if not actor:
return f"{jav_id} {title}"
actor = _normalize_title(actor.text)

return f"{jav_id} {title} {normalize_title(actor.text)}"
return f"{jav_id} {title} {actor}"


async def fetch_jav_data_from_caribpr(session: ClientSession, jav_id: str, query: str):
async def _fetch_jav_data_from_caribpr(session: ClientSession, jav_id: str, query: str):
async with session.get(
f"https://www.caribbeancompr.com/moviepages/{query}/index.html",
) as response:
Expand All @@ -128,16 +149,16 @@ async def fetch_jav_data_from_caribpr(session: ClientSession, jav_id: str, query
title = soup.select_one(".movie-info .heading")
if not title:
return None
title = normalize_title(title.text)
title = _normalize_title(title.text)

actor = soup.select_one(".movie-spec .spec-content > .spec-item")
if not actor:
return f"{jav_id} {title}"

return f"{jav_id} {title} {normalize_title(actor.text)}"
return f"{jav_id} {title} {_normalize_title(actor.text)}"


async def fetch_jav_data_from_1pondo(session: ClientSession, jav_id: str, query: str):
async def _fetch_jav_data_from_1pondo(session: ClientSession, jav_id: str, query: str):
m = re.match(r"\d{6}_\d{3}", jav_id)
if not m:
return None
Expand All @@ -149,56 +170,59 @@ async def fetch_jav_data_from_1pondo(session: ClientSession, jav_id: str, query:
return None

data = await response.json()
title = normalize_title(data["Title"])
actor = normalize_title(data["Actor"])
title = _normalize_title(data["Title"])
actor = _normalize_title(data["Actor"])

return f"{jav_id} {title} {actor}"


async def fetch_jav_data_from_10musume(session: ClientSession, jav_id: str, query: str):
async def _fetch_jav_data_from_10musume(
session: ClientSession, jav_id: str, query: str
):
async with session.get(
f"https://www.10musume.com/dyn/phpauto/movie_details/movie_id/{jav_id}.json",
) as response:
if response.status != 200:
return None

data = await response.json()
title = normalize_title(data["Title"])
actor = normalize_title(data["Actor"])
title = _normalize_title(data["Title"])
actor = _normalize_title(data["Actor"])

return f"10MU {jav_id} {title} {actor}"


SAUCE_DICT = {
"javbus": fetch_jav_data_from_javbus,
"javlibrary": fetch_jav_data_from_javlibrary,
"javbee": fetch_jav_data_from_javbee,
"javtorrent": fetch_jav_data_from_javbee,
"heydouga": fetch_jav_data_from_heydouga,
"carib": fetch_jav_data_from_carib,
"caribpr": fetch_jav_data_from_caribpr,
"1pondo": fetch_jav_data_from_1pondo,
"10musume": fetch_jav_data_from_10musume,
_SAUCE_DICT = {
"javbus": _fetch_jav_data_from_javbus,
"javlibrary": _fetch_jav_data_from_javlibrary,
"javbee": _fetch_jav_data_from_javbee,
"javtorrent": _fetch_jav_data_from_javbee,
"heydouga": _fetch_jav_data_from_heydouga,
"carib": _fetch_jav_data_from_carib,
"caribpr": _fetch_jav_data_from_caribpr,
"1pondo": _fetch_jav_data_from_1pondo,
"10musume": _fetch_jav_data_from_10musume,
"heyzo": _fetch_jav_data_from_heyzo,
}


async def fetch_jav_data(session: ClientSession, jav_query: JavData):
queries = (
await _
for _ in as_completed(
as_kv(_.name, SAUCE_DICT[_.name](session, jav_query.name, _.query))
_as_kv(_.name, _SAUCE_DICT[_.name](session, jav_query.name, _.query))
for _ in jav_query.sauce_list
)
)
rv = {k: v async for k, v in queries if v}
rv = {k: v async for k, v in queries}
return rv


async def as_kv[K, V](key: K, value: Awaitable[V]) -> tuple[K, V]:
async def _as_kv[K, V](key: K, value: Awaitable[V]) -> tuple[K, V]:
return key, await value


def normalize_title(title: str) -> str:
def _normalize_title(title: str) -> str:
title = title.strip()
title = title.replace("/", "/")
title = title.replace("\n", "")
Expand Down

0 comments on commit de23e39

Please sign in to comment.