Skip to content

Commit

Permalink
move cover download out of scrape()
Browse files Browse the repository at this point in the history
  • Loading branch information
Your Name authored and alphatownsman committed Jul 27, 2024
1 parent 9763192 commit b4bdc58
Show file tree
Hide file tree
Showing 16 changed files with 26 additions and 194 deletions.
14 changes: 13 additions & 1 deletion catalog/common/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -891,12 +891,24 @@ def site_label(self) -> str:
def update_content(self, resource_content: "ResourceContent"):
self.other_lookup_ids = resource_content.lookup_ids
self.metadata = resource_content.metadata
if (
resource_content.metadata.get("cover_image_url")
and not resource_content.cover_image
):
from .downloaders import BasicImageDownloader

(
resource_content.cover_image,
resource_content.cover_image_extention,
) = BasicImageDownloader.download_image(
resource_content.metadata.get("cover_image_url"), self.url
)
if resource_content.cover_image and resource_content.cover_image_extention:
self.cover = SimpleUploadedFile(
"temp." + resource_content.cover_image_extention,
resource_content.cover_image,
)
else:
elif resource_content.metadata.get("cover_image_path"):
self.cover = resource_content.metadata.get("cover_image_path")
self.scraped_time = timezone.now()
self.save()
Expand Down
19 changes: 5 additions & 14 deletions catalog/sites/apple_music.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ def id_to_url(cls, id_value):

def get_locales(self):
locales = {}
for l in PREFERRED_LANGUAGES:
match l:
for lang in PREFERRED_LANGUAGES:
match lang:
case "zh":
locales.update({"zh": ["cn", "tw", "hk", "sg"]})
case "en":
Expand Down Expand Up @@ -94,10 +94,10 @@ def scrape(self):
brief = album_data.get("modalPresentationDescriptor", {}).get(
"paragraphText", ""
)
l = detect_language(title + " " + brief)
localized_title.append({"lang": l, "text": title})
tl = detect_language(title + " " + brief)
localized_title.append({"lang": tl, "text": title})
if brief:
localized_desc.append({"lang": l, "text": brief})
localized_desc.append({"lang": tl, "text": brief})
if lang == DEFAULT_CATALOG_LANGUAGE or not matched_content:
matched_content = content
break
Expand Down Expand Up @@ -155,13 +155,4 @@ def scrape(self):
"cover_image_url": image_url,
}
)
if pd.metadata["cover_image_url"]:
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
try:
pd.cover_image = imgdl.download().content
pd.cover_image_extention = imgdl.extention
except Exception:
_logger.debug(
f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
)
return pd
9 changes: 0 additions & 9 deletions catalog/sites/apple_podcast.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,4 @@ def scrape(self):
}
)
pd.lookup_ids[IdType.RSS] = RSS.url_to_id(feed_url)
if pd.metadata["cover_image_url"]:
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
try:
pd.cover_image = imgdl.download().content
pd.cover_image_extention = imgdl.extention
except Exception:
_logger.debug(
f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
)
return pd
9 changes: 0 additions & 9 deletions catalog/sites/bandcamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,13 +102,4 @@ def scrape(self):
"cover_image_url": cover_url,
}
pd = ResourceContent(metadata=data)
if data["cover_image_url"]:
imgdl = BasicImageDownloader(data["cover_image_url"], self.url)
try:
pd.cover_image = imgdl.download().content
pd.cover_image_extention = imgdl.extention
except Exception:
_logger.debug(
f'failed to download cover for {self.url} from {data["cover_image_url"]}'
)
return pd
9 changes: 0 additions & 9 deletions catalog/sites/bgg.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,13 +69,4 @@ def scrape(self):
"cover_image_url": cover_image_url,
}
)
if pd.metadata["cover_image_url"]:
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
try:
pd.cover_image = imgdl.download().content
pd.cover_image_extention = imgdl.extention
except Exception:
logger.debug(
f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
)
return pd
18 changes: 0 additions & 18 deletions catalog/sites/discogs.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,15 +76,6 @@ def scrape(self):
)
if barcode:
pd.lookup_ids[IdType.GTIN] = barcode
if pd.metadata["cover_image_url"]:
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
try:
pd.cover_image = imgdl.download().content
pd.cover_image_extention = imgdl.extention
except Exception:
_logger.debug(
f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
)
return pd


Expand Down Expand Up @@ -122,15 +113,6 @@ def scrape(self):
"cover_image_url": image_url,
}
)
if pd.metadata["cover_image_url"]:
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
try:
pd.cover_image = imgdl.download().content
pd.cover_image_extention = imgdl.extention
except Exception:
_logger.debug(
f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
)
return pd


Expand Down
3 changes: 0 additions & 3 deletions catalog/sites/douban_book.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,9 +236,6 @@ def scrape(self):
if t:
pd.lookup_ids[t] = n
pd.lookup_ids[IdType.CUBN] = cubn
pd.cover_image, pd.cover_image_extention = BasicImageDownloader.download_image(
img_url, self.url
)
return pd


Expand Down
33 changes: 6 additions & 27 deletions catalog/sites/douban_drama.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import logging
import re

from django.core.cache import cache
Expand All @@ -10,8 +9,6 @@

from .douban import DoubanDownloader

_logger = logging.getLogger(__name__)


def _cache_key(url):
return f"$:{url}"
Expand Down Expand Up @@ -77,10 +74,10 @@ def scrape(self):
}
if data["opening_date"]:
d = data["opening_date"].split("-")
l = len(d) if len(d) < 6 else 6
if l > 3:
dl = len(d) if len(d) < 6 else 6
if dl > 3:
data["opening_date"] = "-".join(d[:3])
data["closing_date"] = "-".join(d[0 : 6 - l] + d[3:l])
data["closing_date"] = "-".join(d[0 : 6 - dl] + d[3:dl])
actor_elem = h.xpath(p + "//dt[text()='主演:']/following-sibling::dd[1]/a")
data["actor"] = []
for e in actor_elem:
Expand All @@ -101,15 +98,6 @@ def scrape(self):
"url": show_url,
}
]
if pd.metadata["cover_image_url"]:
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
try:
pd.cover_image = imgdl.download().content
pd.cover_image_extention = imgdl.extention
except Exception:
_logger.debug(
f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
)
return pd


Expand Down Expand Up @@ -213,10 +201,10 @@ def scrape(self):
data["opening_date"] = date_elem[0] if date_elem else None
if data["opening_date"]:
d = data["opening_date"].split("-")
l = len(d) if len(d) < 6 else 6
if l > 3:
dl = len(d) if len(d) < 6 else 6
if dl > 3:
data["opening_date"] = "-".join(d[:3])
data["closing_date"] = "-".join(d[0 : 6 - l] + d[3:l])
data["closing_date"] = "-".join(d[0 : 6 - dl] + d[3:dl])

data["location"] = [
s.strip()
Expand Down Expand Up @@ -257,13 +245,4 @@ def scrape(self):
data["localized_description"] = [{"lang": "zh-cn", "text": data["brief"]}]

pd = ResourceContent(metadata=data)
if pd.metadata["cover_image_url"]:
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
try:
pd.cover_image = imgdl.download().content
pd.cover_image_extention = imgdl.extention
except Exception:
_logger.debug(
f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
)
return pd
12 changes: 1 addition & 11 deletions catalog/sites/douban_movie.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def scrape(self):
"\n", ""
) # strip \n bc multi-line string is not properly coded in json by douban
d = json.loads(schema_data) if schema_data else {}
except Exception as e:
except Exception:
d = {}

try:
Expand Down Expand Up @@ -245,7 +245,6 @@ def scrape(self):
"TVSeason" if is_series or episodes or season else "Movie"
)

tmdb_season_id = None
if imdb_code:
res_data = search_tmdb_by_imdb_id(imdb_code)
has_movie = (
Expand Down Expand Up @@ -302,13 +301,4 @@ def scrape(self):
]
# TODO parse sister seasons
# pd.metadata['related_resources'] = []
if pd.metadata["cover_image_url"]:
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
try:
pd.cover_image = imgdl.download().content
pd.cover_image_extention = imgdl.extention
except Exception:
_logger.debug(
f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
)
return pd
9 changes: 0 additions & 9 deletions catalog/sites/douban_music.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,13 +133,4 @@ def scrape(self):
pd.lookup_ids[IdType.GTIN] = gtin
if isrc:
pd.lookup_ids[IdType.ISRC] = isrc
if pd.metadata["cover_image_url"]:
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
try:
pd.cover_image = imgdl.download().content
pd.cover_image_extention = imgdl.extention
except Exception:
_logger.debug(
f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
)
return pd
9 changes: 0 additions & 9 deletions catalog/sites/goodreads.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,15 +119,6 @@ def scrape(self, response=None):
pd = ResourceContent(metadata=data)
pd.lookup_ids[IdType.ISBN] = ids.get(IdType.ISBN)
pd.lookup_ids[IdType.ASIN] = ids.get(IdType.ASIN)
if data["cover_image_url"]:
imgdl = BasicImageDownloader(data["cover_image_url"], self.url)
try:
pd.cover_image = imgdl.download().content
pd.cover_image_extention = imgdl.extention
except Exception:
_logger.debug(
f'failed to download cover for {self.url} from {data["cover_image_url"]}'
)
return pd


Expand Down
9 changes: 0 additions & 9 deletions catalog/sites/igdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,13 +152,4 @@ def scrape(self):
pd.lookup_ids[IdType.Steam] = SiteManager.get_site_cls_by_id_type(
IdType.Steam
).url_to_id(steam_url)
if pd.metadata["cover_image_url"]:
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
try:
pd.cover_image = imgdl.download().content
pd.cover_image_extention = imgdl.extention
except Exception:
logger.debug(
f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
)
return pd
9 changes: 0 additions & 9 deletions catalog/sites/imdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,15 +116,6 @@ def scrape_imdb(self):
data["title"] = re.sub(r"#(\d+).(\d+)", r"S\1E\2", data["title"][8:])
pd = ResourceContent(metadata=data)
pd.lookup_ids[IdType.IMDB] = self.id_value
if pd.metadata["cover_image_url"]:
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
try:
pd.cover_image = imgdl.download().content
pd.cover_image_extention = imgdl.extention
except Exception:
_logger.debug(
f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
)
return pd

@staticmethod
Expand Down
13 changes: 1 addition & 12 deletions catalog/sites/rss.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def validate_url_fallback(cls, url):

def scrape(self):
if not self.url:
raise ValueError(f"no url avaialble in RSS site")
raise ValueError("no url avaialble in RSS site")
feed = self.parse_feed_from_url(self.url)
if not feed:
raise ValueError(f"no feed avaialble in {self.url}")
Expand All @@ -108,17 +108,6 @@ def scrape(self):
}
)
pd.lookup_ids[IdType.RSS] = RSS.url_to_id(self.url)
if pd.metadata["cover_image_url"]:
imgdl = BasicImageDownloader(
pd.metadata["cover_image_url"], feed.get("link") or self.url
)
try:
pd.cover_image = imgdl.download().content
pd.cover_image_extention = imgdl.extention
except Exception:
_logger.warn(
f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
)
return pd

def scrape_additional_data(self):
Expand Down
9 changes: 0 additions & 9 deletions catalog/sites/spotify.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,15 +103,6 @@ def scrape(self):
pd.lookup_ids[IdType.GTIN] = gtin
if isrc:
pd.lookup_ids[IdType.ISRC] = isrc
if pd.metadata["cover_image_url"]:
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
try:
pd.cover_image = imgdl.download().content
pd.cover_image_extention = imgdl.extention
except Exception:
_logger.debug(
f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
)
return pd


Expand Down
Loading

0 comments on commit b4bdc58

Please sign in to comment.