-
Notifications
You must be signed in to change notification settings - Fork 0
/
fiction.js
123 lines (113 loc) · 3.54 KB
/
fiction.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
const http = require('http'),
superagent = require('superagent'), //客户端请求代理模块类似request
cheerio = require('cheerio'), //nodejs中的jq
async = require('async'), //解决同时过多请求
eventproxy = require('eventproxy'), //将串行等待变成并行等待,提升多异步协作场景下的执行效率
fs = require('fs'),
mongoose = require('mongoose'), //mongoDB插件
Schema = mongoose.Schema
mongoose.connect(
'mongodb://localhost:27017/fictionBox',
err => {
if (err) {
console.log('数据库连接失败!')
} else {
console.log('数据库连接成功!')
}
}
)
require('superagent-charset')(superagent)
//定义Schema
const ficiton = new Schema({
title: String,
content: String
})
//定义model
const reptileFiction = mongoose.model('Fiction', ficiton)
var ep = new eventproxy(),
fictionUrls = [],
url = 'http://www.quanshuwang.com/book/44/44683'
function start() {
function onRequest(req, res) {
superagent
.get(url)
.charset('gbk')
.end((err, pres) => {
var $ = cheerio.load(pres.text)
var curPageUrls = $('.dirconone li a')
for (var i = 0; i < curPageUrls.length; i++) {
var articleUrl = curPageUrls.eq(i).attr('href')
fictionUrls.push(articleUrl)
// 相当于一个计数器
ep.emit('BlogArticleHtml', articleUrl)
}
})
ep.after('BlogArticleHtml', fictionUrls.length, articleUrl => {
// 当所有 'BlogArticleHtml' 事件完成后的回调触发下面事件
// 控制并发数
var curCount = 0
var reptileMove = function(url, callback) {
//延迟毫秒数
var delay = parseInt((Math.random() * 30000000) % 1000, 10)
curCount++
superagent
.get(url)
.charset('gbk')
.end(function(err, sres) {
// sres.text 里面存储着请求返回的 html 内容
var $ = cheerio.load(sres.text)
var content = $('.mainContenr')
var title = $('.jieqi_title')
for (var i = 0; i < content.length; i++) {
//新增到mongodb
reptileFiction.create(
{
title: title.eq(i).text(),
content: content.eq(i).text()
},
(err, doc) => {
if (err) throw err
console.log('保存成功!')
}
)
//保存到本地
// fs.writeFile(
// `./textBox/${title.eq(i).text()}.txt`,
// content.eq(i).text(),
// function(err) {
// if (err) throw err
// console.log('保存成功!')
// }
// )
}
})
setTimeout(function() {
curCount--
callback(null, url + 'Call back content')
}, delay)
}
//因为用mapLimit章节会乱,所以选择用mapSeries串行请求,不过速度会挺慢的
async.mapSeries(
articleUrl,
(url, callback) => {
reptileMove(url, callback)
},
(err, result) => {}
)
// async.mapLimit(
// articleUrl,
// 5,
// function(url, callback) {
// reptileMove(url, callback)
// },
// function(err, result) {
// // 4000 个 URL 访问完成的回调函数
// // ...
// }
// )
})
}
http.createServer(onRequest).listen(3020)
console.log('app started at port 3020...')
}
start()