-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathindex.js
75 lines (67 loc) · 2.46 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
const request = require('request');
const cheerio = require('cheerio');
const fs = require('fs');
const path = require('path');
const async = require('async');
const arrange = require('./arrange');
const log4js = require('log4js').getLogger("begin crawler");
let loveURL = "http://dianying.fm/search/?genre=%E7%88%B1%E6%83%85&p=";
// 根据图片url下载图片
let download = function (uri, filename, cb) {
request.head(uri, function (err, res, body) {
request(uri, {
sendImmediately: false
}).pipe(fs.createWriteStream(filename)).on('close', cb);
});
};
// 从拉取的html中把src取出来
let getSrc = function (body) {
let $ = cheerio.load(body);
let list = [];
$('ul[class="fm-result-list"]').find('li > div > a > img').each(function (index, element) {
list.push($(element).attr('src'));
})
return list;
}
// 获取`爬虫爬取的图片在本地的保存位置`。
let getPosterPath = function (src) {
let postersDir = path.join(__dirname, 'source');
let fileName = src.split('/')[src.split('/').length - 1].split('-')[0];
let posterPath = path.join(postersDir, fileName);
return posterPath;
}
let begin = () => {
let downLoads = 0;
if (!process.argv[2] && !process.argv[3] && !process.argv[4]) {
log4js.warn(`请从命令行中输入页数,宽度, 高度 forExample: node dingying.js 3 4 5 `);
process.exit(0);
}
let pages = process.argv[2];
let loop = new Array(parseInt(pages)); //设置要循环的次数;
async.eachOfLimit(loop, 2, (page, index, cbPage) => {
request(loveURL + (index + 1), function (error, response, body) {
let list = getSrc(body);
async.eachSeries(list, (src, cbSrc) => {
log4js.info(src, "start ...");
let posterPath = getPosterPath(src)
download(src, posterPath, function (err) {
if (err) log4js.error(err);
downLoads++;
return cbSrc();
});
}, (err) => {
return cbPage();
});
});
}, (err) => {
log4js.info("downLoads url total: ", downLoads);
arrange.arrange(process.argv[3], process.argv[4], (err, info) => {
if (err) console.error(err, info);
setTimeout(function () {
log4js.info("finish: success");
process.exit();
}, 2000);
});
});
}
begin();