Support extra opts to yt-dlp

shun-liang · Dec 11, 2024 · 0a642c5 · 0a642c5
1 parent 201ec29
commit 0a642c5
Show file tree

Hide file tree

Showing 3 changed files with 43 additions and 4 deletions.
diff --git a/src/yt2doc/cli.py b/src/yt2doc/cli.py
@@ -1,3 +1,4 @@
+import ast
 import tempfile
 import typing
 import logging
@@ -23,11 +24,21 @@
 logger = logging.getLogger(__file__)
 
 
+class MalformedYtDlpOpts(Exception):
+    pass
+
+
 class WhisperBackend(str, Enum):
     faster_whisper = "faster_whisper"
     whisper_cpp = "whisper_cpp"
 
 
+def _is_dict_of_str_any(
+    value: typing.Any,
+) -> typing.TypeGuard[typing.Dict[str, typing.Any]]:
+    return isinstance(value, dict) and all(isinstance(key, str) for key in value)
+
+
 def main(
     video_url: typing.Optional[str] = typer.Option(
         None, "--video", "--audio", help="URL of the video to extract"
@@ -103,6 +114,11 @@ def main(
             help="Ignore original chapters from the source",
         ),
     ] = False,
+    yt_dlp_extra_opts_str: typing.Optional[str] = typer.Option(
+        None,
+        "--yt-dlp-extra-opts",
+        help="Extra opts to yt-dlp as a string representation of a dictionary",
+    ),
     show_version: typing.Annotated[
         bool,
         typer.Option(
@@ -159,6 +175,21 @@ def main(
             "whisper_cpp_model": whisper_cpp_model.resolve().as_posix(),
         }
 
+    if yt_dlp_extra_opts_str is None:
+        yt_dlp_extra_opts = {}
+    else:
+        try:
+            yt_dlp_extra_opts = ast.literal_eval(yt_dlp_extra_opts_str)
+        except ValueError as e:
+            raise MalformedYtDlpOpts(
+                f"ValueError when trying to parse yt-dlp-extra-opts: f{e}"
+            )
+
+    if not _is_dict_of_str_any(yt_dlp_extra_opts):
+        raise MalformedYtDlpOpts(
+            "yt-dlp-extra-opts is not a string representation of a dictionary"
+        )
+
     with tempfile.TemporaryDirectory() as temp_dir_name:
         temp_dir = Path(temp_dir_name)
         yt2doc = get_yt2doc(
@@ -173,6 +204,7 @@ def main(
             llm_server=llm_server,
             llm_api_key=llm_api_key,
             temp_dir=temp_dir,
+            yt_dlp_options=yt_dlp_extra_opts,
         )
 
         if video_url:

diff --git a/src/yt2doc/factories.py b/src/yt2doc/factories.py
@@ -6,7 +6,7 @@
 from wtpsplit import SaT
 from openai import OpenAI
 
-from yt2doc.media.media_info_extractor import MediaInfoExtractor
+from yt2doc.media.media_info_extractor import YtDlpMediaInfoExtractor
 from yt2doc.transcription.transcriber import Transcriber
 from yt2doc.transcription import interfaces as transcription_interfaces
 from yt2doc.extraction.file_cache import FileCache
@@ -38,6 +38,7 @@ def get_yt2doc(
     llm_server: str,
     llm_api_key: str,
     temp_dir: Path,
+    yt_dlp_options: typing.Dict[str, typing.Any],
 ) -> Yt2Doc:
     DEFAULT_CACHE_PATH.mkdir(exist_ok=True)
     file_cache = FileCache(
@@ -74,7 +75,10 @@ def get_yt2doc(
             add_table_of_contents=add_table_of_contents,
         )
 
-    media_info_extractor = MediaInfoExtractor(temp_dir=temp_dir)
+    media_info_extractor = YtDlpMediaInfoExtractor(
+        temp_dir=temp_dir,
+        extra_opts=yt_dlp_options or {},
+    )
     transcriber = Transcriber(
         temp_dir=temp_dir,
         whisper_adapter=whisper_adapter,

diff --git a/src/yt2doc/media/media_info_extractor.py b/src/yt2doc/media/media_info_extractor.py
@@ -64,13 +64,15 @@ def _merge_short_chapters(
     return merged_chapters
 
 
-class MediaInfoExtractor:
-    def __init__(self, temp_dir: Path):
+class YtDlpMediaInfoExtractor:
+    def __init__(self, temp_dir: Path, extra_opts: typing.Dict[str, typing.Any]):
         self.temp_dir = temp_dir
+        self.extra_opts = extra_opts
 
     def extract_media_info(self, video_url: str) -> interfaces.MediaInfo:
         ydl_opts = {
             "quiet": True,
+            **self.extra_opts,
         }
 
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
@@ -99,6 +101,7 @@ def extract_audio(self, video_url: str) -> Path:
                     "preferredcodec": "m4a",
                 }
             ],
+            **self.extra_opts,
         }
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
             response = ydl.extract_info(video_url, download=True)