Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dev #2547

Merged
merged 9 commits into from
Jan 11, 2025
Merged

Dev #2547

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
878 changes: 439 additions & 439 deletions README.md

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion lncrawl/bots/console/integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def _download_novel():
if not self.search_mode:
raise e
elif not self.confirm_retry():
raise LNException("Cancelled by user")
raise LNException("Cancelled by user") from e

self.app.start_download()
self.app.bind_books()
Expand Down
206 changes: 0 additions & 206 deletions lncrawl/templates/novelupdates.py

This file was deleted.

2 changes: 1 addition & 1 deletion sources/_index.json

Large diffs are not rendered by default.

6 changes: 0 additions & 6 deletions sources/en/d/dobelyuwai.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,6 @@ def read_novel_info(self):
self.novel_cover = None
logger.info("Novel cover: %s", self.novel_cover)

# try:
# self.novel_author = soup.select_one('div.entry-content > p:nth-child(2)').text.strip()
# except Exception as e:
# logger.warning('Failed to get novel auth. Error: %s', e)
# logger.info('%s', self.novel_author)

# Removes none TOC links from bottom of page.
toc_parts = soup.select_one("div.entry-content")

Expand Down
78 changes: 18 additions & 60 deletions sources/en/f/fenrirtranslations.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,13 @@
# -*- coding: utf-8 -*-
import logging
from typing import Generator
from urllib.parse import urlencode, quote_plus

from bs4 import BeautifulSoup, Tag

from lncrawl.models import Chapter, SearchResult
from lncrawl.templates.soup.chapter_only import ChapterOnlySoupTemplate
from lncrawl.templates.soup.searchable import SearchableSoupTemplate
from bs4 import BeautifulSoup

from lncrawl.templates.madara import MadaraTemplate
logger = logging.getLogger(__name__)


class FenrirTranslationsCrawler(SearchableSoupTemplate, ChapterOnlySoupTemplate):
class FenrirTranslationsCrawler(MadaraTemplate):
base_url = ["https://fenrirtranslations.com/"]

def initialize(self) -> None:
Expand All @@ -23,57 +18,20 @@ def initialize(self) -> None:
]
)

def select_search_items(self, query: str) -> Generator[Tag, None, None]:
params = {"s": quote_plus(query.lower()), "post_type": "wp-manga"}
soup = self.post_soup(f"{self.home_url}?{urlencode(params)}")
yield from soup.select(".post-title a[href*='/series/']")

def parse_search_item(self, tag: Tag) -> SearchResult:
return SearchResult(
title=tag.text.strip(),
url=self.absolute_url(tag["href"]),
)

def parse_title(self, soup: BeautifulSoup) -> str:
tag = soup.select_one(".tab-summary .post-content .post-title")
assert isinstance(tag, Tag)
return tag.text.strip()

def parse_cover(self, soup: BeautifulSoup) -> str:
tag = soup.select_one(".tab-summary .summary_image img")
assert isinstance(tag, Tag)
src = tag.get("src")
return self.absolute_url(src)

def parse_authors(self, soup: BeautifulSoup) -> Generator[str, None, None]:
for a in soup.select(".tab-summary .post-content .manga-authors a[href*='/author/']"):
def parse_authors(self, soup: BeautifulSoup):
for a in soup.select('.manga-authors a[href*="author"]'):
yield a.text.strip()

def parse_genres(self, soup: BeautifulSoup) -> Generator[str, None, None]:
for a in soup.select(".tab-summary .post-content a[href*='/genre/']"):
yield a.text.strip()

def parse_summary(self, soup: BeautifulSoup) -> str:
tag = soup.select_one(".tab-summary .post-content div.manga-summary")
assert isinstance(tag, Tag)
return tag.text.strip()

def select_chapter_tags(self, soup: BeautifulSoup) -> Generator[Tag, None, None]:
novel_url_without_slash = self.novel_url.rstrip("/")
headers = {
"Accept": "*/*",
"Referer": self.novel_url,
"X-Requested-With": "XMLHttpRequest"
}
chapters_soup = self.post_soup(f"{novel_url_without_slash}/ajax/chapters/", headers=headers)
yield from reversed(chapters_soup.select("section.free li.free-chap a"))

def parse_chapter_item(self, tag: Tag, id: int) -> Chapter:
return Chapter(
id=id,
title=tag.text.strip(),
url=self.absolute_url(tag["href"]),
)

def select_chapter_body(self, soup: BeautifulSoup) -> Tag:
return soup.select_one(".entry-content .reading-content")
def parse_summary(self, soup):
possible_summary = soup.select_one(".manga-summary")
return self.cleaner.extract_contents(possible_summary)

def select_chapter_tags(self, soup: BeautifulSoup):
try:
clean_novel_url = self.novel_url.split("?")[0].strip("/")
response = self.submit_form(f"{clean_novel_url}/ajax/chapters/")
soup = self.make_soup(response)
chapters = soup.select(".free ul.main .wp-manga-chapter a")
yield from reversed(chapters)
except Exception as e:
logger.debug("Failed to fetch chapters using ajax", e)
22 changes: 9 additions & 13 deletions sources/en/n/novelhall.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ def search_novel(self, query: str):
soup = self.get_soup(self.absolute_url(search_url + quote_plus(query.lower())))

results = []
for novel in soup.select('.section3 table tbody tr'):
novel = novel.findAll('a')
for novel in soup.select(".section3 table tbody tr"):
novel = novel.findAll("a")
novel_link = novel[1]
latest_chapter = novel[2].text.strip().split('.')
latest_chapter = novel[2].text.strip().split(".")
chapter_number = latest_chapter[0]

if chapter_number.isdigit():
Expand All @@ -37,8 +37,8 @@ def search_novel(self, query: str):
results.append(
{
"title": novel_link.text.strip(),
"url": self.absolute_url(novel_link['href']),
"info": latest_chapter
"url": self.absolute_url(novel_link["href"]),
"info": latest_chapter,
}
)

Expand All @@ -60,20 +60,16 @@ def read_novel_info(self):
possible_image = soup.select_one("div.book-img img")
if possible_image:
self.novel_cover = self.absolute_url(possible_image["src"])

if possible_image['src'] == "":
logger.warning("Novel cover: unavailable")
else:
logger.info("Novel cover: %s", self.novel_cover)
else:
logger.info("Novel cover: unavailable")
logger.info("Novel cover: %s", self.novel_cover)

author = soup.select("div.book-info div.total.booktag span.blue")[0]
author.select_one("p").extract()
self.novel_author = author.text.replace("Author:", "").strip()
logger.info("Novel author: %s", self.novel_author)

self.novel_tags = [soup.select_one("div.book-info div.total.booktag a.red").text.strip()]
self.novel_tags = [
soup.select_one("div.book-info div.total.booktag a.red").text.strip()
]
logger.info("Novel tags: %s", self.novel_tags)

synopsis = soup.select_one(".js-close-wrap")
Expand Down
Loading
Loading