forked from Dans-labs/semaf-client
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathjGraph.py
174 lines (157 loc) · 7.21 KB
/
jGraph.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
from xml.dom import minidom
import json
from Semaf import Semaf
from rdflib import Graph, URIRef, Literal, BNode, plugin, Namespace
from rdflib.serializer import Serializer
from collections import defaultdict, OrderedDict
import requests
class jGraph():
def __init__(self, thisobject=None, RootRef=None, crosswolksfile=None, thisformat='json', debug=False):
self.stats = {}
self.json = {}
self.context = json.loads(thisobject)
self.RootRef = RootRef
self.dictcontent = []
self.mappings = {}
self.locator = {}
self.namespaces = {}
self.EnrichFlag = False
#self.EnrichFlag = True
self.crosswalks = {}
# Default Graph
self.g = Graph()
# Second graph reservation
#self.gs = Graph()
def setNamespaces(self):
# Define namespaces
ns1 = Namespace("%s" % self.RootRef)
self.g.bind('cmdi', ns1)
ns2 = Namespace("%s/#" % self.RootRef)
self.g.bind('cmdidoc', ns2)
ns3 = Namespace("%s/Keyword#" % self.RootRef)
self.g.bind('keywords', ns3)
ns4 = Namespace("https://dataverse.org/schema/citation")
self.g.bind('citation', ns4)
ns5 = Namespace("https://dataverse.org/schema/")
self.g.bind('schema', ns5)
ns6 = Namespace("http://purl.org/dc/terms/")
self.g.bind('dcterms', ns6)
for nsname in self.namespaces:
ns = Namespace(nsname)
self.g.bind(self.namespaces[nsname], "%s/" % ns)
def SetRef(self, value):
# Set references with loaded semantic mappings
if value in self.mappings:
RefURL = self.mappings[value]
else:
RefURL = "%s%s" % (self.RootRef, value)
self.crosswalks[RefURL] = value
#self.mappings[value] = RefURL
return RefURL
def load_crosswalks(self, crossfile):
with open(crossfile, encoding='utf-8') as fh:
content = fh.readlines()
for line in content:
mapline = line.split(',')
self.mappings[mapline[0]] = mapline[1]
return self.mappings
def rotatelist(self, thislist, pk, DEBUG=None):
# pk = parent key
for keyID in range(0, len(thislist)):
key = thislist[keyID]
if type(key) is dict:
complexstatements = {}
staID = BNode()
staIDlocal = BNode()
for k, v in key.items():
#root="%s/%s" % (self.RootRef, pk)
root = self.SetRef(pk)
#kRef = "%s/%s" % (self.RootRef, k)
self.dictcontent.append({"list": root, self.SetRef(k): v, 'type': type(v), 'sort': keyID })
if type(v) is str:
complexstatements[URIRef(self.SetRef(k))] = v
self.g.add((staIDlocal, URIRef(self.SetRef(k)), Literal(v)))
elif type(v) is list:
complexarray = []
for item in v:
complexarray.append({ self.SetRef(k): item, URIRef("%s#Vocabulary" % self.SetRef(k)) : "url" })
# Create and add a new statement
staIDar = BNode()
self.g.add((staIDar, URIRef(self.SetRef(k)), Literal(item)))
if self.EnrichFlag:
self.g.add((staIDar, URIRef("%s#Vocabulary" % self.SetRef(k)), Literal('vocabulary name')))
self.g.add((staIDar, URIRef("%s#VocabularyURL" % self.SetRef(k)), Literal("http link to concept URI for %s" % item)))
# Add statements from array
self.g.add((staIDlocal, URIRef(self.SetRef(k)), staIDar))
complexstatements[URIRef(self.SetRef(k))] = complexarray
if DEBUG:
print(complexstatements)
self.g.add((URIRef(root), URIRef(self.SetRef(k)), staIDlocal))
return
def rotate(self, thisdict, pk, DEBUG=None):
self.cmdiloc = {}
if (isinstance(thisdict,list)):
#root="%s/%s" % (self.RootRef, pk)
root = self.SetRef(pk)
#kRef = "%s/%s" % (self.RootRef, k)
self.dictcontent.append({"list": root, self.SetRef(k): v })
self.g.add((URIRef(root), URIRef(self.SetRef(k)), Literal(v)))
return
for k,v in thisdict.items():
if (isinstance(v,dict)):
if pk:
fullXpath = "%s/%s" % (pk, k)
else:
fullXpath = k
self.namespaces[self.SetRef(pk)] = k.lower()
if DEBUG:
print("XPath %s [%s]" % (fullXpath, k))
self.rotate(v, fullXpath)
###self.rotate(v, k)
#root="%s%s" % (self.RootRef, pk)
root = self.SetRef(pk)
#kRef = "%s/%s" % (self.RootRef, k)
staID = BNode()
staID = URIRef(self.RootRef)
self.g.add((staID, URIRef(root), URIRef(self.SetRef(k))))
self.locator[root] = staID
continue
else:
if (isinstance(v,list)):
print(k)
self.rotatelist(v, k)
continue
#root="%s%s" % (self.RootRef, pk)
root = self.SetRef(pk)
#kRef = "%s/%s" % (self.RootRef, k)
if self.SetRef(k) in self.cmdiloc:
cache = self.cmdiloc['root']
if type(cache) is list:
cache.append( { self.SetRef(k): v })
else:
cache = { self.SetRef(k): v }
else:
self.cmdiloc = { self.SetRef(k): v }
self.dictcontent.append({"parent": root, self.SetRef(k): v, 'type': type(v) })
# Add statement
staID = BNode()
self.locator[URIRef(self.SetRef(k))] = staID
self.g.add((URIRef(root), URIRef(self.SetRef(k)), Literal(v)))
self.setNamespaces()
return self.dictcontent
def statements(self, limit=False, DEBUG=False):
allstatements = []
for subj, pred, obj in self.g:
localstatements = [ subj, pred, obj ]
allstatements.append(localstatements)
return allstatements
def graph_to_turtle(self, DEBUG=False):
v = self.g.serialize(format='n3')
statements = str(v)
statements = statements.replace('\\n', "\n")
return statements
def dataset_upload(self, ROOT, DATAVERSE_ID, API_TOKEN, filename):
headers = { "X-Dataverse-key" : API_TOKEN, 'Content-Type' : 'application/json-ld'}
url = "%s/%s" % (ROOT, "api/dataverses/%s/datasets" % DATAVERSE_ID)
r = requests.post(url, data=open(filename, 'rb'), headers=headers)
return r.text