Skip to content

Commit

Permalink
update model about merging works
Browse files Browse the repository at this point in the history
  • Loading branch information
Your Name authored and alphatownsman committed Apr 9, 2024
1 parent fec56d1 commit 86834ed
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 5 deletions.
42 changes: 37 additions & 5 deletions catalog/book/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from django.core.validators import MaxValueValidator, MinValueValidator
from django.db import models
from django.utils.translation import gettext_lazy as _
from loguru import logger as _logger

from catalog.common import (
BaseSchema,
Expand Down Expand Up @@ -163,19 +164,39 @@ def lookup_id_cleanup(cls, lookup_id_type, lookup_id_value):
return detect_isbn_asin(lookup_id_value)
return super().lookup_id_cleanup(lookup_id_type, lookup_id_value)

def merge_to(self, to_item):
super().merge_to(to_item)
for work in self.works.all():
to_item.works.add(work)
self.works.clear()

def delete(self, using=None, soft=True, *args, **kwargs):
if soft:
self.works.clear()
return super().delete(using, soft, *args, **kwargs)

def update_linked_items_from_external_resource(self, resource):
"""add Work from resource.metadata['work'] if not yet"""
links = resource.required_resources + resource.related_resources
for w in links:
if w.get("model") == "Work":
work = Work.objects.filter(
primary_lookup_id_type=w["id_type"],
primary_lookup_id_value=w["id_value"],
work_res = ExternalResource.objects.filter(
id_type=w["id_type"], id_value=w["id_value"]
).first()
if work_res:
work = work_res.item
if not work:
_logger.warning(f"Unable to find work for {work_res}")
else:
_logger.warning(
f'Unable to find resource for {w["id_type"]}:{w["id_value"]}'
)
work = Work.objects.filter(
primary_lookup_id_type=w["id_type"],
primary_lookup_id_value=w["id_value"],
).first()
if work and work not in self.works.all():
self.works.add(work)
# if not work:
# _logger.info(f'Unable to find link for {w["url"]}')

def get_related_books(self):
works = list(self.works.all())
Expand Down Expand Up @@ -224,6 +245,17 @@ class Work(Item):
# a lazy fix is to remove smaller DoubanBook_Work ids
# but ideally deal with 302 in scrape().

def merge_to(self, to_item):
super().merge_to(to_item)
for edition in self.editions.all():
to_item.editions.add(edition)
self.editions.clear()

def delete(self, using=None, soft=True, *args, **kwargs):
if soft:
self.editions.clear()
return super().delete(using, soft, *args, **kwargs)


class Series(Item):
category = ItemCategory.Book
Expand Down
43 changes: 43 additions & 0 deletions catalog/book/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,3 +387,46 @@ def test_works(self):
self.assertEqual(w3e[1].title, "黄金时代")
e = Edition.objects.get(primary_lookup_id_value=9781662601217)
self.assertEqual(e.title, "Golden Age: A Novel")

@use_local_response
def test_works_merge(self):
# url1 and url4 has same ISBN, hence they share same Edition instance, which belongs to 2 Work instances
url1 = "https://book.douban.com/subject/1089243/"
url2 = "https://book.douban.com/subject/2037260/"
url3 = "https://www.goodreads.com/book/show/59952545-golden-age"
url4 = "https://www.goodreads.com/book/show/11798823"
p1 = SiteManager.get_site_by_url(
url1
).get_resource_ready() # lxml bug may break this
w1 = p1.item.works.all().first()
p2 = SiteManager.get_site_by_url(url2).get_resource_ready()
w2 = p2.item.works.all().first()
self.assertEqual(w1, w2)
self.assertEqual(p1.item.works.all().count(), 1)
p3 = SiteManager.get_site_by_url(url3).get_resource_ready()
w3 = p3.item.works.all().first()
self.assertNotEqual(w3, w2)
self.assertEqual(w2.external_resources.all().count(), 1)
self.assertEqual(w3.external_resources.all().count(), 1)
w3.merge_to(w2)
self.assertEqual(w2.external_resources.all().count(), 2)
self.assertEqual(w3.external_resources.all().count(), 0)
self.assertEqual(w2.editions.all().count(), 3)
self.assertEqual(w3.editions.all().count(), 0)
p4 = SiteManager.get_site_by_url(url4).get_resource_ready()
self.assertEqual(p4.item.id, p1.item.id)
self.assertEqual(p4.item.works.all().count(), 1)
self.assertEqual(p1.item.works.all().count(), 1)
w2e = w2.editions.all().order_by("title")
self.assertEqual(w2e.count(), 3)
self.assertEqual(w2e[0].title, "Golden Age: A Novel")
self.assertEqual(w2e[1].title, "Wang in Love and Bondage")
self.assertEqual(w2e[2].title, "黄金时代")
w3e = w3.editions.all().order_by("title")
self.assertEqual(w3e.count(), 0)
e = Edition.objects.get(primary_lookup_id_value=9781662601217)
self.assertEqual(e.title, "Golden Age: A Novel")
w2e[1].delete()
self.assertEqual(w2.editions.all().count(), 2)
w2e.delete()
self.assertEqual(p1.item.works.all().count(), 0)

0 comments on commit 86834ed

Please sign in to comment.