From 448d2eb89d0c3fa48c8baaf69a36ead0d3a69a18 Mon Sep 17 00:00:00 2001 From: ZhenShuo2021 <98386542+ZhenShuo2021@users.noreply.github.com> Date: Wed, 11 Dec 2024 23:55:15 +0800 Subject: [PATCH] docs: update temlate --- README.en.md | 83 +++++++++++++++++++++++++------------------------- README.md | 85 ++++++++++++++++++++++++---------------------------- config.yaml | 38 ++++++++++++----------- 3 files changed, 99 insertions(+), 107 deletions(-) diff --git a/README.en.md b/README.en.md index 412f298..a6128a8 100644 --- a/README.en.md +++ b/README.en.md @@ -11,20 +11,23 @@ # V2PH Downloader + V2PH Downloader ## Features -📦 Plug-and-play: No extra dependencies required -🌐 Cross-platform: Supports all platforms -🔄 Dual engines: Supports both DrissionPage and Selenium automation options -🛠️ Convenient: Supports multiple accounts for auto-login and switching, supports cookies/password login -⚡️ Fast: High-efficiency download with asynchronous event loop -🧩 Customizable: Offers many configuration options -🔑 Secure: Uses PyNaCL as encryption backend. +📦 Plug-and-play: No extra dependencies required +🌐 Cross-platform: Supports all platforms +🔄 Dual engines: Supports both DrissionPage and Selenium automation options +🛠️ Convenient: Supports multiple accounts for auto-login and switching, supports cookies/password login +⚡️ Fast: High-efficiency download with asynchronous event loop +🧩 Customizable: Offers many configuration options +🔑 Secure: Uses PyNaCL as encryption backend. ## Installation + Requirements: + 1. Chrome browser installed 2. Python version > 3.10 3. Install via pip @@ -34,10 +37,12 @@ pip install v2dl ``` ## Usage + On first run, login to V2PH with one of the two methods: 1. Account Management Interface Use `v2dl -a` to enter the account management interface. + ```sh v2dl -a ``` @@ -46,6 +51,7 @@ v2dl -a Due to strict bot detection on login pages, you can trigger the login page by randomly downloading an album, then manually log in if errors occur. ### First Download Attempt + v2dl supports various download methods, including downloading a single album, a list of albums, starting from a specific album, or reading all pages from a text file. ```sh @@ -60,9 +66,11 @@ v2dl -i "/path/to/urls.txt" ``` ## Configuration + The program looks for a `config.yaml` file in the system configuration directory. Refer to the example in the root directory. You can modify settings like scroll length, scroll step, and rate limit: + - download_dir: Set download location, defaults to system download folder. - download_log: Tracks downloaded album URLs, skipped if duplicated; defaults to system configuration directory. - system_log: Location for program logs; defaults to system configuration directory. @@ -70,21 +78,24 @@ You can modify settings like scroll length, scroll step, and rate limit: - chrome/exec_path: Path to Chrome executable. System configuration directory locations: + - Windows: `C:\Users\xxx\AppData\v2dl` - Linux, macOS: `/Users/xxx/.config/v2dl` ### Cookies + Cookies login is often more successful than using username/password. Use an extension (e.g., [Cookie-Editor](https://chromewebstore.google.com/detail/cookie-editor/hlkenndednhfkekhgcdicdfddnkalmdm)) to export cookies in Netscape format, and input the exported cookie file path in the account manager tool. -> [!NOTE] +> [!NOTE] > Exported cookies must include `frontend-rmt/frontend-rmu`. -> [!NOTE] +> [!NOTE] > Cookies are sensitive information; use high-quality extensions and remove or restrict access after exporting. ### Parameters + - url: URL of the target to download. - -i: URL list in a text file, one URL per line. - -a: Enter the account management tool. @@ -118,48 +129,34 @@ The keys are stored in a secure folder with access control, and encryption mater ```py import v2dl import logging -from collections import namedtuple - -your_custom_config = { - "download": { - "min_scroll_length": 500, - "max_scroll_length": 1000, - "min_scroll_step": 150, - "max_scroll_step": 250, - "rate_limit": 400, - "download_dir": "v2dl", - }, - "paths": { - "download_log": "downloaded_albums.txt", - "system_log": "v2dl.log", - }, - "chrome": { - "profile_path": "v2dl_chrome_profile", - "exec_path": { - "Linux": "/usr/bin/google-chrome", - "Darwin": "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome", - "Windows": r"C:\Program Files\Google\Chrome\Application\chrome.exe", - }, - }, + + +custom_defaults = { + "static_config": { + "min_scroll_length": 1000, + "max_scroll_length": 2000, + # ... + } } -your_named_tuple = namedtuple("url", "input_file", "bot_type", ...) -args = your_named_tuple(url="http://v2ph.com/...", input_file="txt_file", bot_type="drission", ...) +def custom_setup(custom_defaults): + print("Using custom setup!") -# Initialize -log_level = logging.INFO -logger = v2dl.common.setup_logging(logging.INFO, log_path=app_config.paths.system_log) + config_manager = config.ConfigManager(custom_defaults) + config_manager.set("runtime_config", "url", custom_url) + config_manager.set("runtime_config", "download_service", custom_service) + config_manager.set("runtime_config", "download_function", custom_function) + config_manager.set("runtime_config", "logger", custom_logger) + config_manager.set("runtime_config", "user_agent", custom_ua) -app_config = v2dl.common.BaseConfigManager(your_custom_config) -runtime_config = create_runtime_config(args, app_config, logger, log_level) + return v2dl.config.ConfigManager() -# Start scraping -web_bot_ = v2dl.web_bot.get_bot(runtime_config, app_config) -scraper = v2dl.core.ScrapeManager(runtime_config, app_config, web_bot_) -scraper.start_scraping() +v2dl.setup = custom_setup +v2dl.main(custom_defaults=custom_defaults) ``` ## Additional Notes + 1. Rapid page switching or fast downloads may trigger blocks. Current settings balance speed and block prevention. 2. Block likelihood depends on network environment. Avoid using VPN for safer downloads. 3. Use cautiously to avoid overloading the website's resources. diff --git a/README.md b/README.md index f3b5fc5..ac82567 100644 --- a/README.md +++ b/README.md @@ -13,20 +13,21 @@ [English](https://github.com/ZhenShuo2021/V2PH-Downloader/blob/main/README.en.md) # V2PH Downloader -微圖坊下載器 +微圖坊下載器 ## 特色 -📦 開箱即用:不用下載額外依賴 -🌐 跨平台:全平台支援 -🔄 雙引擎:支援 DrissionPage 和 Selenium 兩種自動化選項 -🛠️ 方便:支援多帳號自動登入自動切換,支援 cookies/帳號密碼登入兩種方式 -⚡️ 快速:採用非同步事件迴圈的高效下載 -🧩 自訂:提供多種自定義參數選項 -🔑 安全:使用 PyNaCL 作為加密後端 +📦 開箱即用:不用下載額外依賴 +🌐 跨平台:全平台支援 +🔄 雙引擎:支援 DrissionPage 和 Selenium 兩種自動化選項 +🛠️ 方便:支援多帳號自動登入自動切換,支援 cookies/帳號密碼登入兩種方式 +⚡️ 快速:採用非同步事件迴圈的高效下載 +🧩 自訂:提供多種自定義參數選項 +🔑 安全:使用 PyNaCL 作為加密後端 ## 安裝 + 基本需求為 1. 電腦已安裝 Chrome 瀏覽器 @@ -38,10 +39,12 @@ pip install v2dl ``` ## 使用方式 + 首次執行時需要登入 V2PH 的帳號,有兩種方式 1. 帳號管理介面 使用 `v2dl -a` 進入帳號管理介面。 + ```sh v2dl -a ``` @@ -49,8 +52,8 @@ v2dl -a 2. 手動登入 帳號登入頁面的機器人偵測比較嚴格,可以隨機下載一個相簿啟動程式,遇到登入頁面程式報錯後手動登入。 - ### 嘗試第一次下載 + v2dl 支援多種下載方式,可以下載單一相簿,也可以下載相簿列表,也支援從相簿中間開始下載,以及讀取文字文件中的所有頁面。 ```sh @@ -65,6 +68,7 @@ v2dl -i "/path/to/urls.txt" ``` ## 設定 + 會尋找系統設定目錄中是否存在 `config.yaml`,格式請參照根目錄的範例。 裡面可以修改捲動長度、捲動步長與速率限制等設定: @@ -76,21 +80,24 @@ v2dl -i "/path/to/urls.txt" - chrome/exec_path: 系統的 Chrome 程式位置。 系統設定目錄位置: + - Windows: `C:\Users\xxx\AppData\v2dl` - Linux, macOS: `/Users/xxx/.config/v2dl` ### Cookies + Cookies 登入比帳號密碼更容易成功。 使用方式是用擴充套件(如 [Cookie-Editor](https://chromewebstore.google.com/detail/cookie-editor/hlkenndednhfkekhgcdicdfddnkalmdm))導出 cookies,建議選擇 Netscape 格式,並且在帳號管理工具中輸入導出的 cookie 文件位置。 -> [!NOTE] +> [!NOTE] > 導出的 Cookies 必須包含 frontend-rmt/frontend-rmu 項目。 -> [!NOTE] -> Cookies 為機密資訊,請選擇選擇[下載數量高](https://news.cnyes.com/news/id/5584471)的擴充功能套件,並且導出完成後建議將套件移除或限制存取。 +> [!NOTE] +> Cookies 為機密資訊,請選擇選擇[下載數量高](https://news.cnyes.com/news/id/5584471)的擴充功能套件,並且導出完成後建議將套件移除或限制存取。 ### 參數 + - url: 下載目標的網址。 - -i: 下載目標的 URL 列表文字文件,每行一個 URL。 - -a: 進入帳號管理工具。 @@ -124,48 +131,34 @@ Cookies 登入比帳號密碼更容易成功。 ```py import v2dl import logging -from collections import namedtuple - -your_custom_config = { - "download": { - "min_scroll_length": 500, - "max_scroll_length": 1000, - "min_scroll_step": 150, - "max_scroll_step": 250, - "rate_limit": 400, - "download_dir": "v2dl", - }, - "paths": { - "download_log": "downloaded_albums.txt", - "system_log": "v2dl.log", - }, - "chrome": { - "profile_path": "v2dl_chrome_profile", - "exec_path": { - "Linux": "/usr/bin/google-chrome", - "Darwin": "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome", - "Windows": r"C:\Program Files\Google\Chrome\Application\chrome.exe", - }, - }, + +custom_defaults = { + "static_config": { + "min_scroll_length": 1000, + "max_scroll_length": 2000, + # ... + } } -your_named_tuple = namedtuple("url", "input_file", "bot_type", ...) -args = your_named_tuple(url="http://v2ph.com/...", input_file="txt_file", bot_type="drission", ...) +def custom_setup(custom_defaults): + print("Using custom setup!") + -# Initialize -log_level = logging.INFO -logger = v2dl.common.setup_logging(logging.INFO, log_path=app_config.paths.system_log) + config_manager = config.ConfigManager(custom_defaults) + config_manager.set("runtime_config", "url", custom_url) + config_manager.set("runtime_config", "download_service", custom_service) + config_manager.set("runtime_config", "download_function", custom_function) + config_manager.set("runtime_config", "logger", custom_logger) + config_manager.set("runtime_config", "user_agent", custom_ua) -app_config = v2dl.common.BaseConfigManager(your_custom_config) -runtime_config = create_runtime_config(args, app_config, logger, log_level) + return v2dl.config.ConfigManager() -# Start scraping -web_bot_ = v2dl.web_bot.get_bot(runtime_config, app_config) -scraper = v2dl.core.ScrapeManager(runtime_config, app_config, web_bot_) -scraper.start_scraping() +v2dl.setup = custom_setup +v2dl.main(custom_defaults=custom_defaults) ``` ## 補充 + 1. 換頁或者下載速度太快都可能觸發封鎖,目前的設定已經均衡下載速度和避免封鎖了。 2. 會不會被封鎖也有一部分取決於網路環境,不要開 VPN 下載比較安全。 3. 謹慎使用,不要又把網站搞到關掉了,難得有資源收錄完整的。 diff --git a/config.yaml b/config.yaml index 5518fd1..7f128cf 100644 --- a/config.yaml +++ b/config.yaml @@ -1,23 +1,25 @@ -download: - min_scroll_length: 200 - max_scroll_length: 1000 - min_scroll_step: 30 - max_scroll_step: 80 - rate_limit: 400 - download_dir: "download" +static_config: + min_scroll_length: 500 + max_scroll_length: 1500 + max_worker: 4 + rate_limit: 300 + no_history: true + language: "fr" + exact_dir: true + download_dir: "~/Downloads/v2dl-yaml" + force_download: true + chrome_args: + - "--no-sandbox" + dry_run: false + terminate: true -paths: - download_log: "downloaded_albums.txt" - system_log: "v2ph.log" +path_config: + history_file: "~/Downloads/v2dl-yaml/yaml_history.log" + download_log: "~/Downloads/v2dl-yaml/yaml_download.log" + system_log: "~/Downloads/v2dl-yaml/yaml_system.log" + chrome_profile_path: "~/Downloads/v2dl-yaml/yaml_chrome_profile" -chrome: - profile_path: "v2dl_chrome_profile" - exec_path: - Linux: "/usr/bin/google-chrome" - Darwin: "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" - Windows: r"C:\Program Files\\Google\\Chrome\\Application\\chrome.exe" - -encryption: +encryption_config: key_bytes: 32 salt_bytes: 16 nonce_bytes: 24