Skip to content

Commit

Permalink
refactor: move functions
Browse files Browse the repository at this point in the history
  • Loading branch information
ZhenShuo2021 committed Nov 11, 2024
1 parent 6bfed38 commit 570520a
Show file tree
Hide file tree
Showing 6 changed files with 91 additions and 88 deletions.
1 change: 1 addition & 0 deletions v2dl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,5 @@
"FileProcessingError",
"DownloadError",
"get_bot",
"__version__",
]
86 changes: 0 additions & 86 deletions v2dl/config.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
import argparse
import logging
import os
import platform
import sys
from dataclasses import dataclass
from pathlib import Path
from typing import Any
Expand Down Expand Up @@ -151,86 +148,3 @@ def _merge_config(base: dict[str, Any], custom: dict[str, Any]) -> dict:
else:
base[key] = value
return base


def parse_arguments():
formatter = lambda prog: argparse.HelpFormatter(prog, max_help_position=36)
parser = argparse.ArgumentParser(description="V2PH scraper.", formatter_class=formatter)

input_group = parser.add_mutually_exclusive_group(required=True)
input_group.add_argument("url", nargs="?", help="URL to scrape")
input_group.add_argument(
"-i",
"--input-file",
metavar="PATH",
help="Path to txt file containing URL list to be downloaded",
)
input_group.add_argument("-a", "--account", action="store_true", help="Manage account")
input_group.add_argument("--version", action="store_true", help="Show package version")

parser.add_argument(
"--bot",
dest="bot_type",
default="drission",
type=str,
choices=["selenium", "drission"],
required=False,
help="Type of bot to use (default: drission)",
)

parser.add_argument(
"--chrome-args",
type=str,
help="Override Chrome arguments (example: --chrome-args='--arg1//--arg2//--arg3')",
)
parser.add_argument(
"--user-agent",
type=str,
help="Override user-agent (example: --user-agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64)...')",
)

parser.add_argument("--dry-run", action="store_true", help="Dry run without downloading")
parser.add_argument("--no-skip", action="store_true", help="Do not skip downloaded files")
parser.add_argument("--terminate", action="store_true", help="Terminate chrome after scraping")
parser.add_argument(
"--use-default-chrome-profile",
action="store_true",
help="Use default chrome profile. Using default profile with an operating chrome is not valid",
)

log_group = parser.add_mutually_exclusive_group()
log_group.add_argument("-q", "--quiet", action="store_true", help="Quiet mode")
log_group.add_argument("-v", "--verbose", action="store_true", help="Verbose mode")
log_group.add_argument(
"--log-level", default=None, type=int, choices=range(1, 6), help="Set log level (1~5)"
)

args = parser.parse_args()

if args.quiet:
log_level = logging.ERROR
elif args.verbose:
log_level = logging.DEBUG
elif args.log_level is not None:
log_level_mapping = {
1: logging.DEBUG,
2: logging.INFO,
3: logging.WARNING,
4: logging.WARNING,
5: logging.CRITICAL,
}
log_level = log_level_mapping.get(args.log_level, logging.INFO)
else:
log_level = logging.INFO

args.chrome_args = args.chrome_args.split("//") if args.chrome_args else None

return args, log_level


def check_input_file(input_path: str):
if input_path and not os.path.isfile(input_path):
logging.error("Input file %s does not exist.", input_path)
sys.exit(1)
else:
logging.info("Input file %s exists and is accessible.", input_path)
77 changes: 77 additions & 0 deletions v2dl/option.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import argparse
import logging


def parse_arguments():
formatter = lambda prog: argparse.HelpFormatter(prog, max_help_position=36)
parser = argparse.ArgumentParser(description="V2PH scraper.", formatter_class=formatter)

input_group = parser.add_mutually_exclusive_group(required=True)
input_group.add_argument("url", nargs="?", help="URL to scrape")
input_group.add_argument(
"-i",
"--input-file",
metavar="PATH",
help="Path to txt file containing URL list to be downloaded",
)
input_group.add_argument("-a", "--account", action="store_true", help="Manage account")
input_group.add_argument("--version", action="store_true", help="Show package version")

parser.add_argument(
"--bot",
dest="bot_type",
default="drission",
type=str,
choices=["selenium", "drission"],
required=False,
help="Type of bot to use (default: drission)",
)

parser.add_argument(
"--chrome-args",
type=str,
help="Override Chrome arguments (example: --chrome-args='--arg1//--arg2//--arg3')",
)
parser.add_argument(
"--user-agent",
type=str,
help="Override user-agent (example: --user-agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64)...')",
)

parser.add_argument("--dry-run", action="store_true", help="Dry run without downloading")
parser.add_argument("--no-skip", action="store_true", help="Do not skip downloaded files")
parser.add_argument("--terminate", action="store_true", help="Terminate chrome after scraping")
parser.add_argument(
"--use-default-chrome-profile",
action="store_true",
help="Use default chrome profile. Using default profile with an operating chrome is not valid",
)

log_group = parser.add_mutually_exclusive_group()
log_group.add_argument("-q", "--quiet", action="store_true", help="Quiet mode")
log_group.add_argument("-v", "--verbose", action="store_true", help="Verbose mode")
log_group.add_argument(
"--log-level", default=None, type=int, choices=range(1, 6), help="Set log level (1~5)"
)

args = parser.parse_args()

if args.quiet:
log_level = logging.ERROR
elif args.verbose:
log_level = logging.DEBUG
elif args.log_level is not None:
log_level_mapping = {
1: logging.DEBUG,
2: logging.INFO,
3: logging.WARNING,
4: logging.WARNING,
5: logging.CRITICAL,
}
log_level = log_level_mapping.get(args.log_level, logging.INFO)
else:
log_level = logging.INFO

args.chrome_args = args.chrome_args.split("//") if args.chrome_args else None

return args, log_level
1 change: 1 addition & 0 deletions v2dl/utils/security_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def encrypt_master_key(self, master_key: bytes) -> tuple[bytes, bytes, bytes]:

# Derive the encryption key using scrypt
import time

t = time.time()
derived_key = argon2id.kdf(
self.KEY_BYTES,
Expand Down
9 changes: 9 additions & 0 deletions v2dl/utils/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import logging
import os
import re
import sys
import threading
import time
from dataclasses import dataclass
Expand Down Expand Up @@ -329,3 +330,11 @@ def get_image_extension(url: str) -> str:
else:
# 如果沒找到,返回預設值
return "jpg"


def check_input_file(input_path: str):
if input_path and not os.path.isfile(input_path):
logging.error("Input file %s does not exist.", input_path)
sys.exit(1)
else:
logging.info("Input file %s exists and is accessible.", input_path)
5 changes: 3 additions & 2 deletions v2dl/v2dl.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@
import sys

from .account_cli import cli
from .config import Config, ConfigManager, RuntimeConfig, check_input_file, parse_arguments
from .config import Config, ConfigManager, RuntimeConfig
from .const import DEFAULT_CONFIG
from .error import ScrapeError
from .logger import setup_logging
from .option import parse_arguments
from .scrapper import ScrapeHandler
from .utils.utils import ThreadingService
from .utils.utils import ThreadingService, check_input_file
from .version import __version__
from .web_bot import get_bot

Expand Down

0 comments on commit 570520a

Please sign in to comment.