forked from lewis-007/MediaCrawler
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
9785abb
commit c5b64fd
Showing
14 changed files
with
671 additions
and
19 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
# -*- coding: utf-8 -*- | ||
# @Author : [email protected] | ||
# @Time : 2023/12/23 15:40 | ||
# @Desc : | ||
from .core import WeiboCrawler | ||
from .login import WeiboLogin | ||
from .client import WeiboClient |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
# -*- coding: utf-8 -*- | ||
# @Author : [email protected] | ||
# @Time : 2023/12/23 15:40 | ||
# @Desc : 微博爬虫 API 请求 client | ||
|
||
import asyncio | ||
import json | ||
from typing import Any, Callable, Dict, List, Optional, Tuple, Union | ||
from urllib.parse import urlencode | ||
|
||
import httpx | ||
from playwright.async_api import BrowserContext, Page | ||
|
||
from tools import utils | ||
|
||
from .exception import DataFetchError | ||
from .field import SearchType | ||
|
||
|
||
class WeiboClient: | ||
def __init__( | ||
self, | ||
timeout=10, | ||
proxies=None, | ||
*, | ||
headers: Dict[str, str], | ||
playwright_page: Page, | ||
cookie_dict: Dict[str, str], | ||
): | ||
self.proxies = proxies | ||
self.timeout = timeout | ||
self.headers = headers | ||
self._host = "https://m.weibo.cn" | ||
self.playwright_page = playwright_page | ||
self.cookie_dict = cookie_dict | ||
|
||
async def request(self, method, url, **kwargs) -> Any: | ||
async with httpx.AsyncClient(proxies=self.proxies) as client: | ||
response = await client.request( | ||
method, url, timeout=self.timeout, | ||
**kwargs | ||
) | ||
data: Dict = response.json() | ||
if data.get("ok") != 1: | ||
utils.logger.error(f"[WeiboClient.request] request {method}:{url} err, res:{data}") | ||
raise DataFetchError(data.get("msg", "unkonw error")) | ||
else: | ||
return data.get("data", {}) | ||
|
||
async def get(self, uri: str, params=None) -> Dict: | ||
final_uri = uri | ||
if isinstance(params, dict): | ||
final_uri = (f"{uri}?" | ||
f"{urlencode(params)}") | ||
return await self.request(method="GET", url=f"{self._host}{final_uri}", headers=self.headers) | ||
|
||
async def post(self, uri: str, data: dict) -> Dict: | ||
json_str = json.dumps(data, separators=(',', ':'), ensure_ascii=False) | ||
return await self.request(method="POST", url=f"{self._host}{uri}", | ||
data=json_str, headers=self.headers) | ||
|
||
async def pong(self) -> bool: | ||
"""get a note to check if login state is ok""" | ||
utils.logger.info("[WeiboClient.pong] Begin pong weibo...") | ||
ping_flag = False | ||
try: | ||
pass | ||
except Exception as e: | ||
utils.logger.error(f"[BilibiliClient.pong] Pong weibo failed: {e}, and try to login again...") | ||
ping_flag = False | ||
return ping_flag | ||
|
||
async def update_cookies(self, browser_context: BrowserContext): | ||
cookie_str, cookie_dict = utils.convert_cookies(await browser_context.cookies()) | ||
self.headers["Cookie"] = cookie_str | ||
self.cookie_dict = cookie_dict | ||
|
||
async def get_note_by_keyword( | ||
self, | ||
keyword: str, | ||
page: int = 1, | ||
search_type: SearchType = SearchType.DEFAULT | ||
) -> Dict: | ||
""" | ||
search note by keyword | ||
:param keyword: 微博搜搜的关键词 | ||
:param page: 分页参数 -当前页码 | ||
:param search_type: 搜索的类型,见 weibo/filed.py 中的枚举SearchType | ||
:return: | ||
""" | ||
uri = "/api/container/getIndex" | ||
containerid = f"100103type={search_type.value}&q={keyword}" | ||
params = { | ||
"containerid": containerid, | ||
"page_type": "searchall", | ||
"page": page, | ||
} | ||
return await self.get(uri, params) |
Oops, something went wrong.