From af4ef0f303defc8916ccb1d0a5ec26da7b344501 Mon Sep 17 00:00:00 2001 From: Shun Liang Date: Tue, 5 Nov 2024 00:57:13 +0000 Subject: [PATCH] Check ytdlp response with Pydantic --- src/yt2doc/media/media_info_extractor.py | 52 +++++++++++++++--------- 1 file changed, 33 insertions(+), 19 deletions(-) diff --git a/src/yt2doc/media/media_info_extractor.py b/src/yt2doc/media/media_info_extractor.py index 93cdf81..325255f 100644 --- a/src/yt2doc/media/media_info_extractor.py +++ b/src/yt2doc/media/media_info_extractor.py @@ -5,12 +5,32 @@ from pathlib import Path +from pydantic import BaseModel, Field + from yt2doc.media import interfaces logger = logging.getLogger(__file__) +class YtDLPResponse(BaseModel): + video_id: str = Field(alias="id") + webpage_url: str + title: str + description: str + chapters: typing.Optional[typing.Sequence[interfaces.MediaChapter]] = None + + +class YtDLPPlaylistEntry(BaseModel): + url: str + title: str + + +class YtDLPPlaylistResponse(BaseModel): + title: str + entries: typing.Sequence[YtDLPPlaylistEntry] + + def _length(chapter: interfaces.MediaChapter) -> float: return chapter.end_time - chapter.start_time @@ -55,19 +75,13 @@ def extract_video_info(self, video_url: str) -> interfaces.MediaInfo: with yt_dlp.YoutubeDL(ydl_opts) as ydl: response = ydl.extract_info(video_url, download=False) - video_id = response["id"] - title = response["title"] - chapter_objects = response.get("chapters") or [] - chapters = _merge_short_chapters( - [interfaces.MediaChapter(**chapter) for chapter in chapter_objects] - ) - description = response["description"] + parsed_response = YtDLPResponse(**response) return interfaces.MediaInfo( - video_id=video_id, - title=title, - chapters=chapters, - description=description, + video_id=parsed_response.video_id, + title=parsed_response.title, + chapters=_merge_short_chapters(parsed_response.chapters or []), + description=parsed_response.description, ) def extract_audio(self, video_url: str) -> Path: @@ -86,8 +100,8 @@ def extract_audio(self, video_url: str) -> Path: with yt_dlp.YoutubeDL(ydl_opts) as ydl: response = ydl.extract_info(video_url, download=True) - video_id = response["id"] - audio_path = self.temp_dir / f"{video_id}.m4a" + parsed_response = YtDLPResponse(**response) + audio_path = self.temp_dir / f"{parsed_response.video_id}.m4a" return audio_path def extract_playlist_info(self, playlist_url: str) -> interfaces.YtPlaylistInfo: @@ -99,14 +113,14 @@ def extract_playlist_info(self, playlist_url: str) -> interfaces.YtPlaylistInfo: with yt_dlp.YoutubeDL(ydl_opts) as ydl: playlist_info = ydl.extract_info(playlist_url, download=False) - title: str = playlist_info["title"] - entries = playlist_info["entries"] + parsed_playlist_info = YtDLPPlaylistResponse(**playlist_info) + video_urls = [ - entry["url"] - for entry in entries - if entry["title"] not in ["[Private video]", "[Deleted video]"] + entry.url + for entry in parsed_playlist_info.entries + if entry.title not in ["[Private video]", "[Deleted video]"] ] return interfaces.YtPlaylistInfo( - title=title, + title=parsed_playlist_info.title, video_urls=video_urls, )