From fc64ff6ca5ab5a58df0d8e199757a27e72a248d1 Mon Sep 17 00:00:00 2001 From: seeleng Date: Mon, 23 Sep 2024 03:25:30 +0800 Subject: [PATCH] feat: add image to process scripts --- backend/src/scrapers/cna/process.py | 2 ++ backend/src/scrapers/guardian/process.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/backend/src/scrapers/cna/process.py b/backend/src/scrapers/cna/process.py index 029d6c4f..67e4547f 100644 --- a/backend/src/scrapers/cna/process.py +++ b/backend/src/scrapers/cna/process.py @@ -43,6 +43,7 @@ class CNAArticle(BaseModel): field_summary: str | None = None description: str | None = None release_date: str + image_url: str async def process(category: str): @@ -78,6 +79,7 @@ async def process(category: str): source=ArticleSource.CNA, body=body.strip(), date=article.release_date, + image_url=article.image_url, ) with Session(engine) as session: session.add(article_orm) diff --git a/backend/src/scrapers/guardian/process.py b/backend/src/scrapers/guardian/process.py index 9ef73856..e6be8a71 100644 --- a/backend/src/scrapers/guardian/process.py +++ b/backend/src/scrapers/guardian/process.py @@ -16,6 +16,7 @@ class GuardianArticleFields(BaseModel): model_config = ConfigDict(extra="allow") bodyText: str trailText: str | None = None + thumbnail: str | None = None class GuardianArticle(BaseModel): @@ -37,6 +38,7 @@ class GuardianArticle(BaseModel): source=ArticleSource.GUARDIAN, body=article.fields.bodyText, date=article.webPublicationDate, + image_url=article.fields.thumbnail or "", ) with Session(engine) as session: session.add(article_orm)