-
Notifications
You must be signed in to change notification settings - Fork 197
/
search.py
113 lines (106 loc) · 3.85 KB
/
search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import json
import re
import requests
from one import OneNote
from xhs_utils.xhs_util import (
get_headers,
get_search_data,
get_params,
js,
check_cookies,
)
class Search:
def __init__(self, cookies=None):
if cookies is None:
self.cookies = check_cookies()
else:
self.cookies = cookies
self.search_url = "https://edith.xiaohongshu.com/api/sns/web/v1/search/notes"
self.headers = get_headers()
self.params = get_params()
self.oneNote = OneNote(self.cookies)
def get_search_note(self, query, number):
data = get_search_data()
api = "/api/sns/web/v1/search/notes"
data = json.dumps(data, separators=(",", ":"))
data = re.sub(r'"keyword":".*?"', f'"keyword":"{query}"', data)
page = 0
note_ids = []
while len(note_ids) < number:
page += 1
data = re.sub(r'"page":".*?"', f'"page":"{page}"', data)
ret = js.call("get_xs", api, data, self.cookies["a1"])
self.headers["x-s"], self.headers["x-t"] = ret["X-s"], str(ret["X-t"])
response = requests.post(
self.search_url,
headers=self.headers,
cookies=self.cookies,
data=data.encode("utf-8"),
)
res = response.json()
if not res["data"]["has_more"]:
print(f"搜索结果数量为 {len(note_ids)}, 不足 {number}")
break
items = res["data"]["items"]
for note in items:
note_id = note["id"]
note_ids.append(note_id)
if len(note_ids) >= number:
break
return note_ids
def handle_note_info(self, query, number, sort, need_cover=False):
data = get_search_data()
data["sort"] = sort
api = "/api/sns/web/v1/search/notes"
data = json.dumps(data, separators=(",", ":"))
data = re.sub(r'"keyword":".*?"', f'"keyword":"{query}"', data)
page = 0
index = 0
while index < number:
page += 1
data = re.sub(r'"page":".*?"', f'"page":"{page}"', data)
ret = js.call("get_xs", api, data, self.cookies["a1"])
self.headers["x-s"], self.headers["x-t"] = ret["X-s"], str(ret["X-t"])
response = requests.post(
self.search_url,
headers=self.headers,
cookies=self.cookies,
data=data.encode("utf-8"),
)
res = response.json()
try:
items = res["data"]["items"]
except:
print(f"搜索结果数量为 {index}, 不足 {number}")
break
for note in items:
index += 1
self.oneNote.save_one_note_info(
self.oneNote.detail_url + note["id"], need_cover, "", "datas_search"
)
if index >= number:
break
if not res["data"]["has_more"] and index < number:
print(f"搜索结果数量为 {index}, 不足 {number}")
break
print(f"搜索结果全部下载完成,共 {index} 个笔记")
def main(self, info):
query = info["query"]
number = info["number"]
sort = info["sort"]
self.handle_note_info(query, number, sort, need_cover=True)
if __name__ == "__main__":
search = Search()
# 검색할 키워드
queries = ["自拍"]
for query in queries:
# 검색할 노트의 수 (상위 몇 개)
number = 2000
# 排序方式 general: 综合排序 popularity_descending: 热门排序 time_descending: 最新排序
sort = "general"
info = {
"query": query,
"number": number,
"sort": sort,
}
search.main(info)