Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add qidian,ypshuo parser #743

Merged
merged 2 commits into from
Oct 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions catalog/book/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,7 @@ def lookup_id_type_choices(cls):
IdType.DoubanBook,
IdType.Goodreads,
IdType.GoogleBooks,
IdType.Qidian,
]
return [(i.value, i.label) for i in id_types]

Expand Down
46 changes: 46 additions & 0 deletions catalog/book/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,52 @@ def test_work(self):
self.assertEqual(editions[1].display_title, "黄金时代")


class QidianTestCase(TestCase):
databases = "__all__"

def test_parse(self):
t_type = IdType.Qidian
t_id = "1010868264"
t_url = "https://www.qidian.com/book/1010868264/"
t_url2 = "https://book.qidian.com/info/1010868264/"
p1 = SiteManager.get_site_by_url(t_url)
p2 = SiteManager.get_site_by_url(t_url2)
self.assertEqual(p1.url, t_url2)
self.assertEqual(p1.ID_TYPE, t_type)
self.assertEqual(p1.id_value, t_id)
self.assertEqual(p2.url, t_url2)

@use_local_response
def test_scrape(self):
t_url = "https://book.qidian.com/info/1010868264/"
site = SiteManager.get_site_by_url(t_url)
self.assertEqual(site.ready, False)
site.get_resource_ready()
self.assertEqual(site.ready, True)
self.assertEqual(site.resource.site_name, SiteName.Qidian)
self.assertEqual(site.resource.id_type, IdType.Qidian)
self.assertEqual(site.resource.id_value, "1010868264")
self.assertEqual(site.resource.item.display_title, "诡秘之主")
self.assertEqual(site.resource.item.author[0], "爱潜水的乌贼")


class YpshuoTestCase(TestCase):
databases = "__all__"

@use_local_response
def test_scrape(self):
t_url = "https://www.ypshuo.com/novel/1.html"
site = SiteManager.get_site_by_url(t_url)
self.assertEqual(site.ready, False)
site.get_resource_ready()
self.assertEqual(site.ready, True)
self.assertEqual(site.resource.site_name, SiteName.Ypshuo)
self.assertEqual(site.resource.id_type, IdType.Ypshuo)
self.assertEqual(site.resource.id_value, "1")
self.assertEqual(site.resource.item.display_title, "诡秘之主")
self.assertEqual(site.resource.item.author[0], "爱潜水的乌贼")


class MultiBookSitesTestCase(TestCase):
databases = "__all__"

Expand Down
4 changes: 4 additions & 0 deletions catalog/common/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ class SiteName(models.TextChoices):
Discogs = "discogs", _("Discogs") # type:ignore[reportCallIssue]
AppleMusic = "apple_music", _("Apple Music") # type:ignore[reportCallIssue]
Fediverse = "fedi", _("Fediverse") # type:ignore[reportCallIssue]
Qidian = "qidian", _("Qidian") # type:ignore[reportCallIssue]
Ypshuo = "ypshuo", _("Ypshuo") # type:ignore[reportCallIssue]


class IdType(models.TextChoices):
Expand Down Expand Up @@ -118,6 +120,8 @@ class IdType(models.TextChoices):
ApplePodcast = "apple_podcast", _("Apple Podcast") # type:ignore[reportCallIssue]
AppleMusic = "apple_music", _("Apple Music") # type:ignore[reportCallIssue]
Fediverse = "fedi", _("Fediverse") # type:ignore[reportCallIssue]
Qidian = "qidian", _("Qidian") # type:ignore[reportCallIssue]
Ypshuo = "ypshuo", _("Ypshuo") # type:ignore[reportCallIssue]


IdealIdTypes = [
Expand Down
2 changes: 2 additions & 0 deletions catalog/sites/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@
from .google_books import GoogleBooks
from .igdb import IGDB
from .imdb import IMDB
from .qidian import Qidian
from .rss import RSS
from .spotify import Spotify
from .steam import Steam
from .tmdb import TMDB_Movie
from .ypshuo import Ypshuo

# from .apple_podcast import ApplePodcast
54 changes: 54 additions & 0 deletions catalog/sites/qidian.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import logging

from catalog.common import *
from catalog.models import *


@SiteManager.register
class Qidian(AbstractSite):
SITE_NAME = SiteName.Qidian
ID_TYPE = IdType.Qidian
URL_PATTERNS = [
r"https://www\.qidian\.com/book/(\d+)",
r"https://book\.qidian\.com/info/(\d+)",
]
WIKI_PROPERTY_ID = ""
DEFAULT_MODEL = Edition

@classmethod
def id_to_url(cls, id_value):
return f"https://book.qidian.com/info/{id_value}/"

def scrape(self):
content = ProxiedDownloader(self.url).download().html()
title_elem = content.xpath('//*[@id="bookName"]/text()')
title = (
title_elem[0].strip() # type:ignore
if title_elem
else f"Unknown Title {self.id_value}"
)

brief_elem = content.xpath(
"/html/body/div[1]/div[5]/div[3]/div[1]/div/div[1]/div[1]/p/text()"
)
brief = (
"\n".join(p.strip() for p in brief_elem) # type:ignore
if brief_elem
else None
)

img_url = f"https://bookcover.yuewen.com/qdbimg/349573/{self.id_value}"

author_elem = content.xpath(
"/html/body/div[1]/div[5]/div[1]/div[2]/h1/span[1]/a/text()"
)
authors = [author_elem[0].strip()] if author_elem else None # type:ignore

return ResourceContent(
metadata={
"localized_title": [{"lang": "zh-cn", "text": title}],
"author": authors,
"localized_description": [{"lang": "zh-cn", "text": brief}],
"cover_image_url": img_url,
}
)
39 changes: 39 additions & 0 deletions catalog/sites/ypshuo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import json

from catalog.common import *
from catalog.models import *


@SiteManager.register
class Ypshuo(AbstractSite):
SITE_NAME = SiteName.Ypshuo
ID_TYPE = IdType.Ypshuo
URL_PATTERNS = [
r"https://www\.ypshuo\.com/novel/(\d+)\.html",
]
WIKI_PROPERTY_ID = ""
DEFAULT_MODEL = Edition

@classmethod
def id_to_url(cls, id_value):
return f"https://www.ypshuo.com/novel/{id_value}.html"

def scrape(self):
api_url = f"https://www.ypshuo.com/api/novel/getInfo?novelId={self.id_value}"
o = BasicDownloader(api_url).download().json()
source = json.loads(o["data"]["source"])
lookup_ids = {}
for site in source:
if site["siteName"] == "起点中文网":
lookup_ids[IdType.Qidian] = site["bookId"]
return ResourceContent(
metadata={
"localized_title": [{"lang": "zh-cn", "text": o["data"]["novel_name"]}],
"author": [o["data"]["author_name"]],
"localized_description": [
{"lang": "zh-cn", "text": o["data"]["synopsis"]}
],
"cover_image_url": o["data"]["novel_img"],
},
lookup_ids=lookup_ids,
)
6 changes: 6 additions & 0 deletions common/static/scss/_sitelabel.scss
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,12 @@
white-space: nowrap;
}

.qidian {
border: none;
color: white;
background-color: #9e252b;
}

.douban {
border: none;
color: white;
Expand Down
1,920 changes: 1,920 additions & 0 deletions test_data/https___book_qidian_com_info_1010868264_

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"code":"00","data":{"id":1,"novel_name":"诡秘之主","category_id":1,"novel_img":"https://qidian.qpic.cn/qdbimg/349573/1010868264/300","author_name":"爱潜水的乌贼","synopsis":"蒸汽与机械的浪潮中,谁能触及非凡?历史和黑暗的迷雾里,又是谁在耳语?我从诡秘中醒来,睁眼看见这个世界:枪械,大炮,巨舰,飞空艇,差分机;魔药,占卜,诅咒,倒吊人,封印物……光明依旧照耀,神秘从未远离,这是一段“愚者”的传说。","word_number":4465200,"update_status":1,"update_explain":null,"status":2,"source":"[{\"bookId\":\"1010868264\",\"siteName\":\"起点中文网\",\"bookPage\":\"http://book.qidian.com/info/1010868264\"},{\"bookId\":\"20868264\",\"siteName\":\"创世中文网\",\"bookPage\":\"http://chuangshi.qq.com/bk/ly/20868264.html\"}]","power":"0","point":328,"score":8.7,"scorer":120,"score_1":8,"scorer_1":2,"score_2":10,"scorer_2":1,"score_3":8.7,"scorer_3":117,"create_time":1605427200,"update_time":1605427200,"novel_tags":[{"tag_name":"异世大陆"}],"novel_category":{"cate_name":"玄幻"}}}