Skip to content

Commit

Permalink
Support extra opts to yt-dlp
Browse files Browse the repository at this point in the history
  • Loading branch information
shun-liang committed Dec 11, 2024
1 parent 201ec29 commit 0a642c5
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 4 deletions.
32 changes: 32 additions & 0 deletions src/yt2doc/cli.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import ast
import tempfile
import typing
import logging
Expand All @@ -23,11 +24,21 @@
logger = logging.getLogger(__file__)


class MalformedYtDlpOpts(Exception):
pass


class WhisperBackend(str, Enum):
faster_whisper = "faster_whisper"
whisper_cpp = "whisper_cpp"


def _is_dict_of_str_any(
value: typing.Any,
) -> typing.TypeGuard[typing.Dict[str, typing.Any]]:
return isinstance(value, dict) and all(isinstance(key, str) for key in value)


def main(
video_url: typing.Optional[str] = typer.Option(
None, "--video", "--audio", help="URL of the video to extract"
Expand Down Expand Up @@ -103,6 +114,11 @@ def main(
help="Ignore original chapters from the source",
),
] = False,
yt_dlp_extra_opts_str: typing.Optional[str] = typer.Option(
None,
"--yt-dlp-extra-opts",
help="Extra opts to yt-dlp as a string representation of a dictionary",
),
show_version: typing.Annotated[
bool,
typer.Option(
Expand Down Expand Up @@ -159,6 +175,21 @@ def main(
"whisper_cpp_model": whisper_cpp_model.resolve().as_posix(),
}

if yt_dlp_extra_opts_str is None:
yt_dlp_extra_opts = {}
else:
try:
yt_dlp_extra_opts = ast.literal_eval(yt_dlp_extra_opts_str)
except ValueError as e:
raise MalformedYtDlpOpts(
f"ValueError when trying to parse yt-dlp-extra-opts: f{e}"
)

if not _is_dict_of_str_any(yt_dlp_extra_opts):
raise MalformedYtDlpOpts(
"yt-dlp-extra-opts is not a string representation of a dictionary"
)

with tempfile.TemporaryDirectory() as temp_dir_name:
temp_dir = Path(temp_dir_name)
yt2doc = get_yt2doc(
Expand All @@ -173,6 +204,7 @@ def main(
llm_server=llm_server,
llm_api_key=llm_api_key,
temp_dir=temp_dir,
yt_dlp_options=yt_dlp_extra_opts,
)

if video_url:
Expand Down
8 changes: 6 additions & 2 deletions src/yt2doc/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from wtpsplit import SaT
from openai import OpenAI

from yt2doc.media.media_info_extractor import MediaInfoExtractor
from yt2doc.media.media_info_extractor import YtDlpMediaInfoExtractor
from yt2doc.transcription.transcriber import Transcriber
from yt2doc.transcription import interfaces as transcription_interfaces
from yt2doc.extraction.file_cache import FileCache
Expand Down Expand Up @@ -38,6 +38,7 @@ def get_yt2doc(
llm_server: str,
llm_api_key: str,
temp_dir: Path,
yt_dlp_options: typing.Dict[str, typing.Any],
) -> Yt2Doc:
DEFAULT_CACHE_PATH.mkdir(exist_ok=True)
file_cache = FileCache(
Expand Down Expand Up @@ -74,7 +75,10 @@ def get_yt2doc(
add_table_of_contents=add_table_of_contents,
)

media_info_extractor = MediaInfoExtractor(temp_dir=temp_dir)
media_info_extractor = YtDlpMediaInfoExtractor(
temp_dir=temp_dir,
extra_opts=yt_dlp_options or {},
)
transcriber = Transcriber(
temp_dir=temp_dir,
whisper_adapter=whisper_adapter,
Expand Down
7 changes: 5 additions & 2 deletions src/yt2doc/media/media_info_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,15 @@ def _merge_short_chapters(
return merged_chapters


class MediaInfoExtractor:
def __init__(self, temp_dir: Path):
class YtDlpMediaInfoExtractor:
def __init__(self, temp_dir: Path, extra_opts: typing.Dict[str, typing.Any]):
self.temp_dir = temp_dir
self.extra_opts = extra_opts

def extract_media_info(self, video_url: str) -> interfaces.MediaInfo:
ydl_opts = {
"quiet": True,
**self.extra_opts,
}

with yt_dlp.YoutubeDL(ydl_opts) as ydl:
Expand Down Expand Up @@ -99,6 +101,7 @@ def extract_audio(self, video_url: str) -> Path:
"preferredcodec": "m4a",
}
],
**self.extra_opts,
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
response = ydl.extract_info(video_url, download=True)
Expand Down

0 comments on commit 0a642c5

Please sign in to comment.