Skip to content

Commit

Permalink
Merge pull request #2278 from Vuizur/dev
Browse files Browse the repository at this point in the history
Add NovelDeGlace
  • Loading branch information
dipu-bd authored Feb 25, 2024
2 parents fa5057d + 487b867 commit 4609e73
Showing 1 changed file with 95 additions and 0 deletions.
95 changes: 95 additions & 0 deletions sources/fr/noveldeglace.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
import logging

from lncrawl.core.crawler import Crawler
from lncrawl.core.exeptions import LNException
from lncrawl.models.chapter import Chapter
from lncrawl.models.volume import Volume

logger = logging.getLogger(__name__)


class NovelDeGlace(Crawler):
base_url = "https://noveldeglace.com/"
last_updated = "2024-02-22"
has_mtl = False

def read_novel_info(self) -> None:
logger.debug("Visiting %s", self.novel_url)
soup = self.get_soup(self.novel_url)

novel_details = soup.select_one("div.entry-content")
if not novel_details:
raise LNException("Failed to find novel details")

for div in novel_details.find_all("div", class_="line_roman"):
strong = div.find("strong")
if not strong:
continue
strong_text = strong.text.strip()
if strong_text == "Titre complet :":
self.novel_title = (
div.text.split(":")[1].split("RSS")[0].split("CMS")[0].strip()
)
elif strong_text == "Auteur :":
self.novel_author = div.text.split(":")[1].strip()

if not self.novel_title:
logger.debug("Failed to find novel title")

tabs = soup.select_one("div.su-tabs-panes")
if not tabs:
raise LNException("Failed to find chapters")
volume_id = 0

rows = tabs.find_all("div", class_="su-row")

for row in rows:

img = row.find("img")
if img:
self.novel_cover = img["src"]
else:
logger.debug("Failed to find novel cover")

uls = row.find_all("ul")

if len(uls) == 0:
raise LNException("Failed to find chapters")
volume_span = row.find("span", class_="roman volume")

for ul in uls: # There is one ul for each arc
chapters_lis = ul.find_all("li")
for li in chapters_lis:
a = li.find("a")
if a and a.has_attr("href"):
self.chapters.append(
Chapter(
id=len(self.chapters) + 1,
title=a.text.strip(),
url=a["href"],
volume=volume_id,
volume_title=volume_span.text.strip(),
)
)
else:
logger.debug("Failed to find chapter link")

self.volumes.append(Volume(id=volume_id, title=volume_span.text.strip()))
volume_id += 1

def download_chapter_body(self, chapter: Chapter) -> str:
logger.debug("Visiting %s", chapter.url)
soup = self.get_soup(chapter.url)
body = soup.select_one("div.content-tome")
if not body:
body = soup.select_one("div.entry-content-chapitre")

if body.h2:
body.h2.decompose()
mistape_caption = body.find("div", class_="mistape_caption")
if mistape_caption:
mistape_caption.decompose()
if not body:
raise LNException("Failed to find chapter content")

return str(body)

0 comments on commit 4609e73

Please sign in to comment.