-
Notifications
You must be signed in to change notification settings - Fork 20
/
feishu_downloader.py
224 lines (195 loc) · 10.4 KB
/
feishu_downloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
import configparser, locale, os, re, subprocess, time
from concurrent.futures import as_completed, ThreadPoolExecutor
import requests
from tqdm import tqdm
locale.setlocale(locale.LC_CTYPE,"chinese")
# 读取配置文件
config = configparser.ConfigParser(interpolation=None)
config.read('config.ini', encoding='utf-8')
# 获取配置文件中的cookie
minutes_cookie = config.get('Cookies', 'cookie')
# 获取下载设置
space_name = int(config.get('下载设置', '所在空间'))
list_size = int(config.get('下载设置', '每次检查的妙记数量'))
vc_max_num = int(config.get('下载设置', '保留妙记的最大数量'))
check_interval = int(config.get('下载设置', '检查妙记的时间间隔(单位s,太短容易报错)'))
download_type = int(config.get('下载设置', '文件类型'))
subtitle_only = config.get('下载设置', '是否只下载字幕文件(是/否)')=='是'
# 获取保存路径
save_path = config.get('下载设置', '保存路径(不填则默认为当前路径/data)')
if not save_path:
save_path = './data'
# 获取字幕格式设置
subtitle_params = {'add_speaker': config.get('下载设置', '字幕是否包含说话人(是/否)')=='是',
'add_timestamp': config.get('下载设置', '字幕是否包含时间戳(是/否)')=='是',
'format': 3 if config.get('下载设置', '字幕格式(srt/txt)')=='srt' else 2
}
# 获取代理设置
use_proxy = config.get('代理设置', '是否使用代理(是/否)')
proxy_address = config.get('代理设置', '代理地址')
if use_proxy == '是':
proxies = {
'http': proxy_address,
'https': proxy_address,
}
else:
proxies = None
class FeishuDownloader:
def __init__(self, cookie):
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36',
'cookie': cookie,
'bv-csrf-token': cookie[cookie.find('bv_csrf_token=') + len('bv_csrf_token='):cookie.find(';', cookie.find('bv_csrf_token='))],
'referer': f'https://meetings.feishu.cn/minutes/me',
'content-type': 'application/x-www-form-urlencoded'
}
if len(self.headers.get('bv-csrf-token')) != 36:
raise Exception("minutes_cookie中不包含bv_csrf_token,请确保从请求`list?size=20&`中获取!")
self.all_minutes = []
self.minutes_num = 0
self.meeting_time_dict = {} # 会议文件名称和会议时间的对应关系
self.subtitle_type = 'srt' if subtitle_params['format']==3 else 'txt'
def get_minutes(self):
"""
批量获取妙记信息
"""
get_rec_url = f'https://meetings.feishu.cn/minutes/api/space/list?&size={list_size}&space_name={space_name}'
resp = requests.get(url=get_rec_url, headers=self.headers, proxies=proxies)
# 如果resp.json()['data']中没有list字段,则说明cookie失效
if 'list' not in resp.json()['data']:
raise Exception("minutes_cookie失效,请重新获取!")
self.all_minutes = list(reversed(resp.json()['data']['list'])) # 按时间正序排列的妙记信息(从旧到新)
self.minutes_num = len(self.all_minutes)
def check_minutes(self):
"""
检查需要下载的妙记
"""
# 从文件中读取已下载的妙记id
downloaded_minutes = set()
if os.path.exists('minutes.txt'):
with open('minutes.txt', 'r') as f:
downloaded_minutes = set(line.strip() for line in f)
# 获取云端所有妙记
self.get_minutes()
print(f"云端现有 {self.minutes_num} 个妙记")
# 过滤需要下载的妙记
need_download_minutes = [
minutes for minutes in self.all_minutes
if minutes['object_token'] not in downloaded_minutes and
(download_type == 2 or minutes['object_type'] == download_type)
]
print(f"需要下载 {len(need_download_minutes)} 个妙记")
# 如果有需要下载的妙记则进行下载
if need_download_minutes:
self.download_minutes(need_download_minutes)
# 将下载的妙记id写入记录
with open('minutes.txt', 'a') as f:
for minutes in need_download_minutes:
f.write(minutes['object_token']+'\n')
print(f"成功下载了 {len(need_download_minutes)} 个妙记,等待 {check_interval} 秒后再次检查...")
def download_minutes(self, minutes_list):
"""
使用aria2批量下载妙记
"""
with ThreadPoolExecutor(max_workers=10) as executor:
with open('links.temp', 'w', encoding='utf-8') as file:
futures = [executor.submit(self.get_minutes_url, minutes) for minutes in minutes_list]
for future in as_completed(futures):
video_url = future.result()[0]
file_name = future.result()[1]
video_name = file_name
file.write(f'{video_url}\n out={save_path}/{file_name}/{video_name}.mp4\n')
if not subtitle_only:
headers_option = ' '.join(f'--header="{k}: {v}"' for k, v in self.headers.items())
proxy_cmd = ""
if proxies is not None:
proxy_cmd = f'--all-proxy={proxies["http"]}'
cmd = f'aria2c -c --input-file=links.temp {headers_option} --continue=true --auto-file-renaming=true --console-log-level=warn {proxy_cmd} -s16 -x16 -k1M'
subprocess.run(cmd, shell=True)
# 删除临时文件
os.remove('links.temp')
# 修改会议妙记的创建时间
for file_name, start_time in self.meeting_time_dict.items():
os.utime(f'{save_path}/{file_name}', (start_time, start_time))
if not subtitle_only:
os.utime(f'{save_path}/{file_name}/{file_name}.mp4', (start_time, start_time))
os.utime(f'{save_path}/{file_name}/{file_name}.{self.subtitle_type}', (start_time, start_time))
self.meeting_time_dict = {}
def get_minutes_url(self, minutes):
"""
获取妙记视频下载链接;写入字幕文件。
"""
# 获取妙记视频的下载链接
video_url_url = f'https://meetings.feishu.cn/minutes/api/status?object_token={minutes["object_token"]}&language=zh_cn&_t={int(time.time() * 1000)}'
resp = requests.get(url=video_url_url, headers=self.headers, proxies=proxies)
video_url = resp.json()['data']['video_info']['video_download_url']
# 获取妙记字幕
subtitle_url = f'https://meetings.feishu.cn/minutes/api/export'
subtitle_params['object_token'] = minutes['object_token']
resp = requests.post(url=subtitle_url, params=subtitle_params, headers=self.headers, proxies=proxies)
resp.encoding = 'utf-8'
# 获取妙记标题
file_name = minutes['topic']
rstr = r'[\/\\\:\*\?\"\<\>\|]'
file_name = re.sub(rstr, '_', file_name) # 将标题中的特殊字符替换为下划线
# 如果妙记来自会议,则将会议起止时间作为文件名的一部分
if minutes['object_type'] == 0:
# 根据会议的起止时间和标题来设置文件名
start_time = time.strftime("%Y年%m月%d日%H时%M分", time.localtime(minutes['start_time'] / 1000))
stop_time = time.strftime("%Y年%m月%d日%H时%M分", time.localtime(minutes['stop_time'] / 1000))
file_name = start_time+"至"+stop_time+file_name
else:
create_time = time.strftime("%Y年%m月%d日%H时%M分", time.localtime(minutes['create_time'] / 1000))
file_name = create_time+file_name
subtitle_name = file_name
# 创建文件夹
if not os.path.exists(f'{save_path}/{file_name}'):
os.makedirs(f'{save_path}/{file_name}')
# 写入字幕文件
with open(f'{save_path}/{file_name}/{subtitle_name}.{self.subtitle_type}', 'w', encoding='utf-8') as f:
f.write(resp.text)
# 如果妙记来自会议,则记录会议起止时间
if minutes['object_type'] == 0:
self.meeting_time_dict[file_name] = minutes['start_time']/1000
return video_url, file_name
def delete_minutes(self, num):
"""
删除指定数量的最早几个妙记
"""
old_all_minutes = self.all_minutes
successed_num = 0
unsuccessed_num = 0
for index in tqdm(old_all_minutes[:num+unsuccessed_num], desc='删除妙记'):
old_minutes_num = self.minutes_num
# 将该妙记放入回收站
delete_url = f'https://meetings.feishu.cn/minutes/api/space/delete'
params = {'object_tokens': index['object_token'],
'is_destroyed': 'false',
'language': 'zh_cn'}
requests.post(url=delete_url, params=params, headers=self.headers, proxies=proxies)
# 将该妙记彻底删除
requests.post(url=delete_url, params=params.update({'is_destroyed': 'true'}), headers=self.headers, proxies=proxies)
time.sleep(3)
self.get_minutes()
if self.minutes_num == old_minutes_num:
print(f"删除 http://meetings.feishu.cn/minutes/{index['object_token']} 失败,可能是没有该妙记的权限")
unsuccessed_num += 1
else:
successed_num += 1
if successed_num == num:
break
print(f"成功删除 {successed_num} 个妙记,跳过 {unsuccessed_num} 个妙记")
if __name__ == '__main__':
if not minutes_cookie:
raise Exception("cookie不能为空!")
# 定时检查是否有要下载的妙记
while True:
print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()))
downloader = FeishuDownloader(minutes_cookie)
# 检查是否存在需要下载的妙记
downloader.check_minutes()
# 如果云端的妙记数量超过了最大限制,则删除最早的几个妙记
if downloader.minutes_num > vc_max_num:
print(f"删除最早的 {downloader.minutes_num - vc_max_num} 个妙记")
downloader.delete_minutes(downloader.minutes_num - vc_max_num)
time.sleep(check_interval)