-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrdf.js
109 lines (86 loc) · 3.11 KB
/
rdf.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
var fs = require('fs');
var _ = require('lodash');
var N3 = require('n3');
var db = require("seraph")("http://localhost:7474");
db.index.createIfNone('Synset', 'wnid', function(err, res) { err && console.log(err); });
db.constraints.uniqueness.createIfNone('Synset', 'wnid', function(err, res) { err && console.log(err); });
function testSaveNode() {
db.save({
label: 'Synset',
wnid: '100021007',
pos: 'n',
value: 'matter',
lang: 'eng'
}, console.log);
}
// return testSaveNode();
// var rdfStream = fs.createReadStream('./../wordnet-3.0-rdf/rdf/rdf/basic/void.ttl');
// download the n-triple data from http://wordnet-rdf.princeton.edu direct link: http://wordnet-rdf.princeton.edu/wn31.nt.gz
var rdfStream = fs.createReadStream('./../data/wordnet.nt');
var streamParser = N3.StreamParser();
rdfStream.pipe(streamParser);
streamParser.pipe(new ConsumerStream());
var predicates = {}; // a map - just to see what predicates are there in the dataset
var counter = 0;
var txCounter = 0;
function ConsumerStream() {
var tx = db.batch();
var writer = new require('stream').Writable({ objectMode: true });
writer._write = function (triple, encoding, done) {
// example triple:
// {
// "subject":"http://wordnet-rdf.princeton.edu/wn31/100021007-n",
// "predicate":"http://www.w3.org/2000/01/rdf-schema#label",
// "object":"\"matter\"@eng",
// "graph":""
// }
counter += 1;
var urlid = triple.subject.slice(triple.subject.lastIndexOf('/') + 1);
var wnid = urlid.slice(0, -2);
var pos = urlid.slice(-1);
var pred = triple.predicate.slice(triple.predicate.lastIndexOf('#') + 1);
var lang = (pred === 'label' && triple.object.slice(triple.object.lastIndexOf('@') + 1)) || 'eng';
var value = (pred === 'label' && triple.object.slice(1, triple.object.lastIndexOf('"'))) || '';
predicates[pred] = triple.object;
// console.log('%s (%s) ==[%s]==> %s', wnid, pos, pred, triple.object)
if (pred !== 'label') {
done();
return;
}
tx.save({
label: 'Synset',
wnid: wnid,
pos: pos,
value: value,
lang: lang
});
if (++txCounter % 5000 === 0) {
tx.commit(function(err, results) {
console.log(err || "COMMITTED, total: %s", txCounter);
tx = db.batch();
done();
});
} else {
done();
}
}
writer.on('finish', function() {
console.log('stream finished')
tx.commit(function(err, results) {
console.log(err || "COMMITTED, total: %s", txCounter);
});
});
writer.on('error', console.error);
return writer;
}
function printStats() {
console.log("N-triples processed: %s", counter);
console.log("Created synsets: %s", txCounter);
console.log(predicates);
console.log(_.keys(predicates));
}
process.on('exit', printStats);
process.on('SIGINT', function() {
printStats();
process.exit();
});