forked from ping/newsrack
-
Notifications
You must be signed in to change notification settings - Fork 0
/
build-index.js
36 lines (31 loc) · 1.1 KB
/
build-index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
// ref https://lunrjs.com/guides/index_prebuilding.html
var lunr = require('lunr'),
stdin = process.stdin,
stdout = process.stdout,
buffer = []
stdin.resume()
stdin.setEncoding('utf8')
stdin.on('data', function (data) {
buffer.push(data)
})
// Ref https://github.com/olivernn/lunr.js/blob/aa5a878f62a6bba1e8e5b95714899e17e8150b38/lib/stop_word_filter.js#L43
customStopWordFilter = lunr.generateStopWordFilter(['li']) // to exclude <li>
lunr.Pipeline.registerFunction(customStopWordFilter, 'customStopWordFilter')
stdin.on('end', function () {
// modified to exclude "/" "<" ">"
lunr.tokenizer.separator = /[\s\-\/<>’]+/
var documents = JSON.parse(buffer.join(''))
var idx = lunr(function () {
this.ref('id')
this.field('title')
this.field('articles')
this.field('tags')
this.field('category')
this.metadataWhitelist = ['position']
this.pipeline.before(lunr.stopWordFilter, customStopWordFilter)
documents.forEach(function (doc) {
this.add(doc)
}, this)
})
stdout.write(JSON.stringify(idx))
})