-
Notifications
You must be signed in to change notification settings - Fork 8
/
dealMPList.js
110 lines (105 loc) · 3.68 KB
/
dealMPList.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
const fs = require('fs-extra');
const path = require('path');
const URL = require('url');
const chalk = require('chalk');
const Queue = require('./lib/Queue');
const getMdArticle = require('./lib/getMdArticle');
function self(data, cachePath, jsonFilePath, spinner, options) {
return new Promise((resolve, reject) => {
if (!data.nickname) {
return reject('没有公众号名称');
}
const txtPath = path.join(cachePath, `${data.nickname}.txt`);
// console.log(txtPath);
if (!fs.existsSync(txtPath)) {
return reject('找不到抓取后的文章存储文件');
}
let content = fs.readFileSync(txtPath, {encoding: 'utf8'});
if (!content) {
return reject('抓取后的列表文件打开失败');
}
content = content.split('\n');
if (!content.length) {
return reject('列表为空');
}
let rs = [];
const queue = new Queue(getMdArticle, 2);
let count = 0;
content.forEach(item => {
try {
item = item.trim();
if (item.length < 10) {
return;
}
let json = JSON.parse(item);
let data = Array.isArray(json.list) ? json.list : json;
count += data.length;
if (Array.isArray(data)) {
data.forEach(j => {
if (options && options.listFilter && typeof options.listFilter === 'function') {
const filter = options.listFilter;
// 如果返回是 undefined、false 等,则过滤
if (!filter(j)) {
return;
}
}
let info = j.app_msg_ext_info;
if (!info) {
return;
}
let url = info.content_url;
url = url.replace(/&/g, '&');
let urlObj = URL.parse(url, true);
let {mid} = urlObj.query;
// count++;
// if (count > 10) {
// return;
// }
// console.log(url);
queue.add([mid, url, options]);
});
}
} catch (e) {
console.log(e);
}
});
if (options && options.listFilter && typeof options.listFilter === 'function') {
console.log(
`\n共获取了 ${chalk.yellow.bold(count)} 篇文章,过滤后为 ${chalk.yellow.bold(queue.getLength())} 篇`
);
spinner.start('开始解析文章列表');
}
queue.on('progress', (curLength, total) => {
spinner.text = `开始解析文章列表,进度 ${chalk.yellow.bold(curLength)}/${chalk.green.bold(total)}`;
});
queue.run().then(
data => {
data = data.filter(item => {
return item && item.content;
});
if (jsonFilePath) {
fs.writeJSONSync(jsonFilePath, data);
}
resolve(data);
},
e => {
reject(e);
}
);
});
}
module.exports = self;
// self(
// {
// nickname: 'list'
// },
// path.join(__dirname),
// path.join(__dirname, 'all.json')
// ).then(
// (data) => {
// console.log(data);
// },
// (e) => {
// console.log(e);
// }
// );