Skip to content

Commit

Permalink
feat: improve customizability
Browse files Browse the repository at this point in the history
  • Loading branch information
ZhenShuo2021 committed Dec 12, 2024
1 parent 99c0ca5 commit 616f519
Show file tree
Hide file tree
Showing 6 changed files with 144 additions and 64 deletions.
20 changes: 15 additions & 5 deletions README.en.md
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,7 @@ The keys are stored in a secure folder with access control, and encryption mater
You can also extend V2DL. An example code below demonstrates how to use custom default config and replace your own the web automation script.

```py
import v2dl
import logging
from v2dl import V2DLApp

custom_defaults = {
"static_config": {
Expand Down Expand Up @@ -170,9 +169,20 @@ class CustomBot:
</html>
"""

app = v2dl.V2DLApp("custom_bot", custom_defaults)
app.register_bot("custom_bot", lambda config: CustomBot(config))
app.run()
class ExtendedV2DL(V2DLApp):
def _setup_runtime_config(self, config_manager, args):
super()._setup_runtime_config(config_manager, args)
config_manager.set("runtime_config", "user_agent", "my_custom_ua")
print("Custom config in action!")


bot_name = "my awesome bot"
command_line_args = {"url": "https://www.v2ph.com/album/foo", "force_download": True}

app = ExtendedV2DL()
app.register_bot(bot_name, CustomBot)
app.set_bot(bot_name)
app.run(command_line_args)
```

## Additional Notes
Expand Down
20 changes: 15 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,7 @@ Cookies 登入比帳號密碼更容易成功。
你可以擴展 V2DL,以下是一個使用自訂預設值,並且替換網頁自動化套件的範例

```py
import v2dl
import logging
from v2dl import V2DLApp

custom_defaults = {
"static_config": {
Expand Down Expand Up @@ -172,9 +171,20 @@ class CustomBot:
</html>
"""

app = v2dl.V2DLApp("custom_bot", custom_defaults)
app.register_bot("custom_bot", lambda config: CustomBot(config))
app.run()
class ExtendedV2DL(V2DLApp):
def _setup_runtime_config(self, config_manager, args):
super()._setup_runtime_config(config_manager, args)
config_manager.set("runtime_config", "user_agent", "my_custom_ua")
print("Custom config in action!")


bot_name = "my awesome bot"
command_line_args = {"url": "https://www.v2ph.com/album/foo", "force_download": True}

app = ExtendedV2DL()
app.register_bot(bot_name, CustomBot)
app.set_bot(bot_name)
app.run(command_line_args)
```

## 補充
Expand Down
4 changes: 3 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import pytest

from v2dl.common.const import DEFAULT_CONFIG, SELENIUM_AGENT
from v2dl.common.const import DEFAULT_CONFIG, HEADERS, SELENIUM_AGENT
from v2dl.config import Config, ConfigManager
from v2dl.utils.factory import ServiceType, create_download_service

Expand All @@ -24,6 +24,7 @@ def _create_download_service(service_type):
max_worker=5,
rate_limit=400,
logger=mock_logger,
headers=HEADERS,
service_type=service_type,
)

Expand All @@ -41,6 +42,7 @@ def real_config(tmp_path, real_download_service, real_args, mock_logger) -> Conf
config_manager.get("static_config", "max_worker"),
config_manager.get("static_config", "rate_limit"),
mock_logger,
HEADERS,
)

# setup static_config
Expand Down
156 changes: 107 additions & 49 deletions v2dl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,83 +18,141 @@
class V2DLApp:
def __init__(
self,
bot_type: str = "drissionpage",
default_config: dict[str, dict[str, Any]] = common.const.DEFAULT_CONFIG,
) -> None:
self.bot_type = bot_type
self.bot_registered: dict[str, Any] = {}
self.default_config = default_config
self.default_cli_values = {"url": None, "url_file": None, "account": False}
self.registered_bot: dict[str, Any] = {}

def run(self, args: Namespace | dict[Any, Any] | None = None) -> int:
args = self.__prepare_arguments(args)
config_instance = self.setup(args)
web_bot_instance = self.get_bot(config_instance)
scraper = core.ScrapeManager(config_instance, web_bot_instance)
scraper.start_scraping()
if not config_instance.static_config.no_history:
scraper.final_process()
scraper.log_final_status()
def run(self, args: Namespace | dict[Any, Any] | list[Any] | None = None) -> int:
"""The interface to run the full V2DL
return 0
Args:
args (Namespace | dict[Any, Any] | list[Any] | None, optional): The command line
input for setup method. Defaults to None.
Returns:
int: The runtime status
"""
try:
args = self.parse_arguments_wrapper(args)
conf = self.setup(args)
bot = self.get_bot(conf)
scraper = core.ScrapeManager(conf, bot)
scraper.start_scraping()
if not conf.static_config.no_history:
scraper.final_process()
scraper.log_final_status()

return 0

except Exception as e:
raise RuntimeError(f"Runtime error of V2DL: {e}") from e

def setup(self, args: Namespace) -> config.Config:
"""Setup the Config dataclass with command line inputs
The args can be replace with a custom Namespace object for advance uses.
Args:
args (Namespace): The variable from the command line. Can be replaced with custom
Namespace argument. Please see the cli/option.py for the required field.
Returns:
config.Config: The Config dataclass.
"""
self._check_cli_inputs(args)

config_manager = config.ConfigManager(self.default_config)
config_manager.load_all({"args": args})
self._setup_runtime_config(config_manager, args)
return config_manager.initialize_config()

def get_bot(self, conf: config.Config) -> Any:
"""Get the web automation bot
If the bot_name attribute is not set or not in registered_bot, it returns default bot.
"""
# use user custom bot
if hasattr(self, "bot_name") and self.bot_name in self.registered_bot:
return self.registered_bot[self.bot_name](conf)

# use default bot, configured in config
return web_bot.get_bot(conf)

def set_bot(self, bot_name: str) -> None:
"""Set the name of the custom bot"""
self.bot_name = bot_name

def register_bot(self, bot_name: str, bot: Any) -> None:
"""Register a custom bot
Args:
bot_type (str): The name of custom bot
bot (Any): Web automation bot to be used
"""
self.registered_bot[bot_name] = bot

def parse_arguments_wrapper(
self, args: Namespace | dict[Any, Any] | list[Any] | None
) -> Namespace:
"""Process CLI input for Config setup
Convert input variable to namespace for parse_args. If input is Namespace, returns itself.
If input is a dict or list, convert the and pass it to the parse_args. Otherwise, calls the
default CLI interface.
"""

def init_attr(args: dict[Any, Any]) -> Namespace:
"""Initialize attribute with value None"""
mock_input = ["placeholder"]
default_args = vars(cli.parse_arguments(mock_input))
return Namespace(**{key: args.get(key) for key in default_args})

if isinstance(args, Namespace):
return args
elif isinstance(args, dict):
return init_attr(args)
elif isinstance(args, list):
return cli.parse_arguments(args)
elif args is None:
return cli.parse_arguments()
else:
raise ValueError(f"Unsupported CLI input type {type(args)}")

def _check_cli_inputs(self, args: Namespace) -> None:
"""Check command line inputs in advance"""
if args.version:
print(version.__version__) # noqa: T201
sys.exit(0)

if args.bot_type == "selenium":
utils.check_module_installed()

config_manager = config.ConfigManager(self.default_config)
config_manager.load_all({"args": args})

# prepare logger
def _setup_runtime_config(
self,
config_manager: config.ConfigManager,
args: Namespace,
headers: dict[str, str] = common.const.HEADERS,
user_agent: str = common.const.SELENIUM_AGENT,
) -> None:
"""Initialize instances and assign to runtime config"""
logger = common.setup_logging(
config_manager.get("runtime_config", "log_level"),
log_path=config_manager.get("path", "system_log"),
logger_name=version.__package_name__,
)

# prepare runtime_config
download_service, download_function = utils.create_download_service(
args,
config_manager.get("static_config", "max_worker"),
config_manager.get("static_config", "rate_limit"),
logger,
headers,
utils.ServiceType.ASYNC,
)
config_manager.set("runtime_config", "url", args.url)
config_manager.set("runtime_config", "download_service", download_service)
config_manager.set("runtime_config", "download_function", download_function)
config_manager.set("runtime_config", "logger", logger)
config_manager.set("runtime_config", "user_agent", common.const.SELENIUM_AGENT)

return config_manager.initialize_config()

def get_bot(self, config_instance: config.Config) -> Any:
if self.bot_type in self.bot_registered:
return self.bot_registered[self.bot_type](config_instance)
return web_bot.get_bot(config_instance)

def set_bot(self, bot: str) -> None:
self.bot_type = bot

def register_bot(self, bot_type: str, factory: Any) -> None:
self.bot_registered[bot_type] = factory

def parse_arguments(self) -> Namespace:
return cli.parse_arguments()

def __prepare_arguments(self, args: Namespace | dict[Any, Any] | None) -> Namespace:
"""Process CLI input for Config setup
If input is None, it uses the argparse to parse inputs from command line. Otherwise, the
inputs will be cast into a Namespace variable. Note these variables has the highest priority
to the Config setup.
"""
if isinstance(args, Namespace):
return args
elif isinstance(args, dict):
return Namespace(**args)
else:
return self.parse_arguments()
config_manager.set("runtime_config", "user_agent", user_agent)
4 changes: 2 additions & 2 deletions v2dl/cli/option.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def _format_action_invocation(self, action): # type: ignore
return ", ".join(parts)


def parse_arguments() -> argparse.Namespace:
def parse_arguments(args: list[str] | None = None) -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="V2PH scraper.",
formatter_class=CustomHelpFormatter,
Expand Down Expand Up @@ -173,4 +173,4 @@ def parse_arguments() -> argparse.Namespace:
help="Set log level (1~5)",
)

return parser.parse_args()
return parser.parse_args(args)
4 changes: 2 additions & 2 deletions v2dl/utils/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
VideoDownloadAPI,
)
from .multitask import AsyncService, BaseTaskService, ThreadingService
from ..common.const import HEADERS


@dataclass
Expand Down Expand Up @@ -98,6 +97,7 @@ def create_download_service(
max_worker: int,
rate_limit: int,
logger: Logger,
headers: dict[str, str],
service_type: ServiceType = ServiceType.ASYNC,
) -> tuple[BaseTaskService, Callable[..., Any]]:
"""Create runtime configuration with integrated download service and function."""
Expand All @@ -110,7 +110,7 @@ def create_download_service(

download_api = DownloadAPIFactory.create(
service_type=service_type,
headers=HEADERS,
headers=headers,
rate_limit=rate_limit,
force_download=args.force_download,
logger=logger,
Expand Down

0 comments on commit 616f519

Please sign in to comment.