Skip to content

Commit

Permalink
feat: B站二维码、Cookie登录实现
Browse files Browse the repository at this point in the history
  • Loading branch information
NanmiCoder committed Dec 3, 2023
1 parent a90b411 commit 94b5030
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 11 deletions.
14 changes: 7 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
# 仓库描述

**小红书爬虫****抖音爬虫****快手爬虫**...。
目前能抓取小红书、抖音、快手的视频、图片、评论、点赞、转发等信息。
**小红书爬虫****抖音爬虫****快手爬虫****B站爬虫**...。
目前能抓取小红书、抖音、快手、B站的视频、图片、评论、点赞、转发等信息。

原理:利用[playwright](https://playwright.dev/)搭桥,保留登录成功后的上下文浏览器环境,通过执行JS表达式获取一些加密参数
通过使用此方式,免去了复现核心加密JS代码,逆向难度大大降低。
Expand All @@ -21,11 +21,11 @@
## 功能列表
| 平台 | Cookie 登录 | 二维码登录 | 手机号登录 | 关键词搜索 | 指定视频/帖子 ID 爬取 | 登录状态缓存 | 数据保存 | IP 代理池 | 滑块验证码 |
|:---:|:---------:|:-----:|:-----:|:-----:|:-------------:|:------:|:----:|:------:|:-----:|
| 小红书 | ||| || | |||
| 抖音 | ||| || | |||
| 快手 | | || || | |||
| B 站 | | || || | |||
| 微博 | ||| || | |||
| 小红书 | ||| || | |||
| 抖音 | ||| || | |||
| 快手 | ||| || | |||
| B 站 | | || || | |||
| 微博 | ||| || | |||


## 使用方法
Expand Down
67 changes: 64 additions & 3 deletions media_platform/bilibili/login.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,74 @@ def __init__(self,
self.cookie_str = cookie_str

async def begin(self):
pass
"""Start login xiaohongshu"""
utils.logger.info("Begin login Bilibili ...")
if self.login_type == "qrcode":
await self.login_by_qrcode()
elif self.login_type == "phone":
await self.login_by_mobile()
elif self.login_type == "cookie":
await self.login_by_cookies()
else:
raise ValueError("Invalid Login Type Currently only supported qrcode or phone or cookie ...")

@retry(stop=stop_after_attempt(20), wait=wait_fixed(1), retry=retry_if_result(lambda value: value is False))
async def check_login_state(self) -> bool:
"""
Check if the current login status is successful and return True otherwise return False
retry decorator will retry 20 times if the return value is False, and the retry interval is 1 second
if max retry times reached, raise RetryError
"""
current_cookie = await self.browser_context.cookies()
_, cookie_dict = utils.convert_cookies(current_cookie)
if cookie_dict.get("SESSDATA", "") or cookie_dict.get("DedeUserID"):
return True
return False

async def login_by_qrcode(self):
pass
"""login bilibili website and keep webdriver login state"""
utils.logger.info("Begin login bilibili by qrcode ...")

# click login button
login_button_ele = self.context_page.locator(
"xpath=//div[@class='right-entry__outside go-login-btn']//div"
)
await login_button_ele.click()

# find login qrcode
qrcode_img_selector = "//div[@class='login-scan-box']//img"
base64_qrcode_img = await utils.find_login_qrcode(
self.context_page,
selector=qrcode_img_selector
)
if not base64_qrcode_img:
utils.logger.info("login failed , have not found qrcode please check ....")
sys.exit()

# show login qrcode
partial_show_qrcode = functools.partial(utils.show_qrcode, base64_qrcode_img)
asyncio.get_running_loop().run_in_executor(executor=None, func=partial_show_qrcode)

utils.logger.info(f"Waiting for scan code login, remaining time is 20s")
try:
await self.check_login_state()
except RetryError:
utils.logger.info("Login bilibili failed by qrcode login method ...")
sys.exit()

wait_redirect_seconds = 5
utils.logger.info(f"Login successful then wait for {wait_redirect_seconds} seconds redirect ...")
await asyncio.sleep(wait_redirect_seconds)

async def login_by_mobile(self):
pass

async def login_by_cookies(self):
pass
utils.logger.info("Begin login bilibili by cookie ...")
for key, value in utils.convert_str_cookie_to_dict(self.cookie_str).items():
await self.browser_context.add_cookies([{
'name': key,
'value': value,
'domain': ".bilibili.com",
'path': "/"
}])
2 changes: 1 addition & 1 deletion media_platform/kuaishou/login.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,6 @@ async def login_by_cookies(self):
await self.browser_context.add_cookies([{
'name': key,
'value': value,
'domain': ".douyin.com",
'domain': ".kuaishou.com",
'path': "/"
}])

0 comments on commit 94b5030

Please sign in to comment.