-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.py
59 lines (49 loc) · 1.65 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
"""切割 tg history json 文件 为较小的多个文件"""
import decimal
import pathlib
import ijson
import json
from loguru import logger
whole_file_path = pathlib.Path(__file__).resolve().parent / "result.json"
file_count = 1
group_title = ""
group_type = ""
group_id = 0
class DecimalEncoder(json.JSONEncoder):
def default(self, o):
if isinstance(o, decimal.Decimal):
return float(o)
return super(DecimalEncoder, self).default(o)
messages = ijson.items(open(whole_file_path, encoding="utf-8"), "messages.item")
message_count = 0
message_list = []
for message in messages:
if message["id"] < 0:
continue
message_count += 1
message_list.append(message)
if message_count % 16000 == 0:
logger.info(f"message count: {message_count}, creating file {file_count}")
data = {
"name": group_title,
"type": group_type,
"id": group_id,
"messages": message_list,
}
with open(f"result_{file_count}.json", "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=4, cls=DecimalEncoder)
logger.info(f"file {file_count} done")
file_count += 1
message_list = []
logger.info(f"total message count: {message_count}")
if message_list:
logger.info(f"creating file {file_count}")
data = {
"name": group_title,
"type": group_type,
"id": group_id,
"messages": message_list,
}
with open(f"result_{file_count}.json", "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=4, cls=DecimalEncoder)
logger.info(f"file {file_count} done")