From 42a8a7540441ffa2b1d43e7546b09e77b8c11fea Mon Sep 17 00:00:00 2001
From: ACA <aca-0021@proton.me>
Date: Sun, 21 Jan 2024 22:56:36 +0100
Subject: [PATCH 1/4] add faqwiki.py source (https://faqwiki.us/)

---
 sources/en/f/faqwiki.py | 135 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 135 insertions(+)
 create mode 100644 sources/en/f/faqwiki.py

diff --git a/sources/en/f/faqwiki.py b/sources/en/f/faqwiki.py
new file mode 100644
index 000000000..5b1185cf5
--- /dev/null
+++ b/sources/en/f/faqwiki.py
@@ -0,0 +1,135 @@
+# -*- coding: utf-8 -*-
+import logging
+
+from bs4.element import Tag
+from lncrawl.core.crawler import Crawler
+from lncrawl.models import Volume, Chapter, SearchResult
+
+logger = logging.getLogger(__name__)
+
+
+class FaqWiki(Crawler):
+    base_url = ["https://faqwiki.us/"]
+    has_manga = False
+    has_mtl = True
+
+    def initialize(self) -> None:
+        # There's about 4+ ads as img tags within each chapter.
+        # Have not yet seen an img be part of any chapter, worst case we'll miss out on it.
+        self.cleaner.bad_tags.add("img")
+
+    def read_novel_info(self):
+        soup = self.get_soup(self.novel_url)
+
+        content = soup.select_one(".entry-content")
+
+        entry_title = soup.select_one("h1.entry-title")
+        assert isinstance(entry_title, Tag)  # this must be here, is part of normal site structure/framework
+        self.novel_title = entry_title.text.strip()
+        # remove suffix from completed novels' title
+        if self.novel_title.endswith(" – All Chapters"):
+            self.novel_title = self.novel_title[0:self.novel_title.find(" – All Chapters")]
+        self.novel_author = "FaqWiki"
+        cover = content.select_one('img[importance="high"]')
+        self.novel_cover = self.absolute_url(cover["src"])
+
+        # remove any optimized image size GET args from novel cover URL
+        if "?" in self.novel_cover:
+            self.novel_cover = self.novel_cover[0:self.novel_cover.find("?")]
+
+        metadata_container = soup.select_one("div.book-review-block__meta-item-value")
+        keywords = {
+            "desc": "Description:",
+            "alt_name": "Alternate Names:",
+            "genre": "Genre:",
+            "author": "Author(s):",
+            "status": "Status:",
+            "original_pub": "Original Publisher:"
+        }
+
+        if metadata_container:
+            metadata = metadata_container.text  # doesn't have line breaks anyway so not splitting here
+            pos_dict = {}
+            for key, sep in keywords.items():
+                pos_dict[key + "_start"] = metadata.find(sep)
+                pos_dict[key] = metadata.find(sep) + len(sep)
+
+            self.novel_synopsis = metadata[pos_dict["desc"]:pos_dict["alt_name_start"]].strip()
+            self.novel_tags = metadata[pos_dict["genre"]:pos_dict["author_start"]].strip().split(" ")
+            self.novel_author = metadata[pos_dict["author"]:pos_dict["status_start"]].strip()
+
+        logger.info("Novel title: %s", self.novel_title)
+        logger.info("Novel synopsis: %s", self.novel_synopsis)
+        logger.info("Novel tags: %s", ",".join(self.novel_tags))
+        logger.info("Novel author: %s", self.novel_author)
+        logger.info("Novel cover: %s", self.novel_cover)
+
+        chap_list = soup.select_one('#lcp_instance_0').select("li>a")
+        chap_list.reverse()  # since newest chapter is first child, reverse
+        for idx, a in enumerate(chap_list):
+            if "chapter" not in a.text.strip().lower():
+                continue
+            chap_id = 1 + idx
+            vol_id = 1 + len(self.chapters) // 100
+            vol_title = f"Volume {vol_id}"
+            if chap_id % 100 == 1:
+                self.volumes.append(
+                    Volume(
+                        id=vol_id,
+                        title=vol_title
+                    ))
+
+            # chapter name is only (sometimes) present in chapter page, not in overview
+            entry_title = f"Chapter {chap_id}"
+
+            self.chapters.append(
+                Chapter(
+                    id=chap_id,
+                    url=self.absolute_url(a["href"]),
+                    title=entry_title,
+                    volume=vol_id,
+                    volume_title=vol_title
+                ),
+            )
+
+    def download_chapter_body(self, chapter):
+        soup = self.get_soup(chapter.url)
+
+        contents_html = soup.select_one("div.entry-content")
+        contents_html = self.cleaner.clean_contents(contents_html)
+        contents_str = self.cleaner.extract_contents(contents_html)
+
+        return contents_str
+
+    def search_novel(self, query: str):
+        novel_selector = "article > div > header > h3.entry-title > a"
+        next_selector = "div.nav-links > a.next"
+
+        soup = self.get_soup(f"https://faqwiki.us/?s={query.replace(' ','+')}&post_type=page")
+        empty = "nothing found" in soup.select_one("h1.page-title").text.strip().lower()
+        if empty:
+            return []
+
+        novels = soup.select(novel_selector)
+
+        # loop over all pages via next button and get all novels
+        next_page = soup.select_one(next_selector)
+        while next_page:
+            page_soup = self.get_soup(self.absolute_url(next_page["href"]))
+            novels += page_soup.select(novel_selector)
+            next_page = page_soup.select_one(next_selector)
+
+        results = []
+        for novel in novels:
+            # filter out "fake" novels (links to All, completed & ongoing pages)
+            if "novels" in novel.text.lower():
+                pass
+            # simple but at least won't taint results
+            if query.lower() in novel.text.lower():
+                results.append(
+                    SearchResult(
+                        title=novel.text,
+                        url=novel["href"]
+                    )
+                )
+        return results

From f8424985b4fb6a2cddc735eb8cf41b9aba8217c2 Mon Sep 17 00:00:00 2001
From: ACA <aca-0021@proton.me>
Date: Thu, 1 Feb 2024 23:09:44 +0100
Subject: [PATCH 2/4] faqwiki: fix downloads for novels with missing cover img

---
 sources/en/f/faqwiki.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/sources/en/f/faqwiki.py b/sources/en/f/faqwiki.py
index 5b1185cf5..a82a0892e 100644
--- a/sources/en/f/faqwiki.py
+++ b/sources/en/f/faqwiki.py
@@ -31,10 +31,11 @@ def read_novel_info(self):
             self.novel_title = self.novel_title[0:self.novel_title.find(" – All Chapters")]
         self.novel_author = "FaqWiki"
         cover = content.select_one('img[importance="high"]')
-        self.novel_cover = self.absolute_url(cover["src"])
-
+        # is missing in some rarer cases
+        if cover:
+            self.novel_cover = self.absolute_url(cover["src"])
         # remove any optimized image size GET args from novel cover URL
-        if "?" in self.novel_cover:
+        if self.novel_cover and "?" in self.novel_cover:
             self.novel_cover = self.novel_cover[0:self.novel_cover.find("?")]
 
         metadata_container = soup.select_one("div.book-review-block__meta-item-value")

From c457136d496eb3da6279098d69b2fd7d2994fd30 Mon Sep 17 00:00:00 2001
From: ACA <aca-0021@proton.me>
Date: Sun, 4 Feb 2024 16:18:15 +0100
Subject: [PATCH 3/4] faqwiki: fix downloads for novels with chapters in
 chronological order

---
 sources/en/f/faqwiki.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/sources/en/f/faqwiki.py b/sources/en/f/faqwiki.py
index a82a0892e..e58e9f27c 100644
--- a/sources/en/f/faqwiki.py
+++ b/sources/en/f/faqwiki.py
@@ -66,7 +66,10 @@ def read_novel_info(self):
         logger.info("Novel cover: %s", self.novel_cover)
 
         chap_list = soup.select_one('#lcp_instance_0').select("li>a")
-        chap_list.reverse()  # since newest chapter is first child, reverse
+
+        # in rare cases the chapter list is newest to oldest
+        if not chap_list[0].text.lower().replace(' ', '').endswith(("chapter0", "chapter1")):
+            chap_list.reverse()
         for idx, a in enumerate(chap_list):
             if "chapter" not in a.text.strip().lower():
                 continue

From 6c474c06e8a2538e6a2b8eb947c6ecc75d58d920 Mon Sep 17 00:00:00 2001
From: ACA <aca-0021@proton.me>
Date: Tue, 6 Feb 2024 22:40:46 +0100
Subject: [PATCH 4/4] faqwiki: remove conditional chapter reversal & improve
 cover image download

---
 sources/en/f/faqwiki.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/sources/en/f/faqwiki.py b/sources/en/f/faqwiki.py
index e58e9f27c..6194fa828 100644
--- a/sources/en/f/faqwiki.py
+++ b/sources/en/f/faqwiki.py
@@ -30,10 +30,17 @@ def read_novel_info(self):
         if self.novel_title.endswith(" – All Chapters"):
             self.novel_title = self.novel_title[0:self.novel_title.find(" – All Chapters")]
         self.novel_author = "FaqWiki"
-        cover = content.select_one('img[importance="high"]')
+        cover = content.select_one('.wp-block-image img')
         # is missing in some rarer cases
         if cover:
-            self.novel_cover = self.absolute_url(cover["src"])
+            src = str(cover['src'])
+            # may be replaced with JS after load, in such case try and get the real img hidden in data-values
+            if src.startswith("data:"):
+                try:
+                    src = cover["data-ezsrc"]
+                except KeyError:
+                    pass
+            self.novel_cover = self.absolute_url(src)
         # remove any optimized image size GET args from novel cover URL
         if self.novel_cover and "?" in self.novel_cover:
             self.novel_cover = self.novel_cover[0:self.novel_cover.find("?")]
@@ -67,11 +74,8 @@ def read_novel_info(self):
 
         chap_list = soup.select_one('#lcp_instance_0').select("li>a")
 
-        # in rare cases the chapter list is newest to oldest
-        if not chap_list[0].text.lower().replace(' ', '').endswith(("chapter0", "chapter1")):
-            chap_list.reverse()
         for idx, a in enumerate(chap_list):
-            if "chapter" not in a.text.strip().lower():
+            if "chapter" not in a.text.lower():
                 continue
             chap_id = 1 + idx
             vol_id = 1 + len(self.chapters) // 100