-
Notifications
You must be signed in to change notification settings - Fork 2
/
couchdb.lisp
84 lines (74 loc) · 2.58 KB
/
couchdb.lisp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
(in-package :wordnet)
;;; TODO: this is essentially the same as solr.lisp, but for CouchDB.
;;; Both files can be unified.
(defun merge-duplicate-keys (alist)
(let ((r (make-hash-table)))
(dolist (pair alist)
(let ((key (car pair))
(val (cdr pair)))
(multiple-value-bind (value ok) (gethash key r)
(if ok
(if (listp value)
(setf (gethash key r) (push val value))
(setf (gethash key r) (list val value)))
(setf (gethash key r) val)))))
(alexandria:hash-table-alist r)))
#|
Sample CouchDB chillax server creation:
(defparameter *server* (make-instance 'chillax:yason-server
:host *server-url*
:port 443
:securep t
:username *username*
:password *password*
:object-as-alist-p t
:parse-object-key-fun (lambda (string) (intern string *package*))))
(defparameter *cloudant-db* (chillax:ensure-db *server* "wn"))
|#
(defun load-nomlex-couchdb (blocksize db)
(let* ((current 0)
(total 0)
(block-tmp nil)
(query (get-triples :p !rdf:type :o !nomlex:Nominalization)))
(do* ((a-triple (cursor-next-row query)
(cursor-next-row query)))
((null a-triple)
(chillax:bulk-post-documents db block-tmp))
(format *debug-io* "Processing ~a [~a/~a ~a]~%"
(part->string (subject a-triple)) current blocksize total)
(push (merge-duplicate-keys
(remove-duplicates
(nomlex-to-alist (subject a-triple))
:test #'equal))
block-tmp)
(setf current (1+ current))
(if (> current blocksize)
(progn
(chillax:bulk-post-documents db block-tmp)
(setf total (+ total current)
current 0
block-tmp nil))))))
(defun load-synsets-couchdb (blocksize db)
(let* ((current 0)
(total 0)
(block-tmp nil)
(plan-words (sparql:parse-sparql (query-string "synset-words.sparql")))
(synsets (sparql:run-sparql (sparql:parse-sparql (query-string "all-synsets.sparql"))
:results-format :lists)))
(dolist (p synsets)
(let ((id (cl-ppcre:regex-replace "^wn30en:synset-"
(part->string (car p) :format :concise) "")))
(format *debug-io* "Processing ~a [~a/~a ~a]~%"
id current blocksize total)
(push (merge-duplicate-keys
(remove-duplicates
(synset-to-alist id :plan plan-words)
:test #'equal)) block-tmp)
(setf current (1+ current))
(if (> current blocksize)
(progn
(chillax:bulk-post-documents db block-tmp)
(setf total (+ total current)
current 0
block-tmp nil)))))
(chillax:bulk-post-documents db block-tmp)))