This repository has been archived by the owner on Nov 28, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdodo.py
111 lines (94 loc) · 3.42 KB
/
dodo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# encoding=utf8
from doit import get_var
from roald import Roald
import logging
import logging.config
logging.config.fileConfig('logging.cfg')
logger = logging.getLogger(__name__)
import data_ub_tasks
config = {
'dumps_dir': get_var('dumps_dir', '/opt/data.ub/www/default/dumps'),
'dumps_dir_url': get_var('dumps_dir_url', 'http://data.ub.uio.no/dumps'),
'graph': 'http://data.ub.uio.no/mrtermer',
'fuseki': 'http://127.0.0.1:3031/ds',
'basename': 'mrtermer',
'git_user': 'ubo-bot',
'git_email': '[email protected]',
}
def task_fetch_core():
yield {
'doc': 'Fetch remote files that have changed',
'basename': 'fetch',
'name': None
}
yield data_ub_tasks.git_pull_task_gen(config)
for file in [
{
'remote': 'https://app.uio.no/ub/emnesok/data/mr/idtermer.txt',
'local': 'src/idtermer.txt'
},
{
'remote': 'https://app.uio.no/ub/emnesok/data/mr/idsteder.txt',
'local': 'src/idsteder.txt'
},
{
'remote': 'https://app.uio.no/ub/emnesok/data/mr/idformer.txt',
'local': 'src/idformer.txt'
},
{
'remote': 'https://rawgit.com/scriptotek/data_ub_ontology/master/ub-onto.ttl',
'local': 'src/ub-onto.ttl'
}
]:
yield data_ub_tasks.fetch_remote_gen(file['remote'], file['local'], ['fetch_core:git-pull'])
def task_build():
def build_dist(task):
logger.info('Building new dist')
roald = Roald()
roald.load('src/', format='roald2', language='en')
roald.set_uri_format(
'http://data.ub.uio.no/%s/c{id}' % config['basename'])
roald.save('%s.json' % config['basename'])
logger.info('Wrote %s.json', config['basename'])
marc21options = {
'vocabulary_code': 'noubomr',
'created_by': 'NoOU'
}
roald.export('dist/%s.marc21.xml' %
config['basename'], format='marc21', **marc21options)
logger.info('Wrote dist/%s.marc21.xml', config['basename'])
prepared = roald.prepare_export(format='rdfskos', include=[
'%s.scheme.ttl' % config['basename'],
'src/ub-onto.ttl',
])
prepared.write('dist/%s.ttl' % config['basename'], format='turtle')
logger.info('Wrote dist/%s.ttl', config['basename'])
prepared.write('dist/%s.nt' % config['basename'], format='nt')
logger.info('Wrote dist/%s.nt', config['basename'])
return {
'doc': 'Build distribution files (RDF/SKOS + MARC21XML) from source files',
'actions': [build_dist],
'file_dep': [
'src/idtermer.txt',
'src/idsteder.txt',
'src/idformer.txt',
'src/ub-onto.ttl',
'%s.scheme.ttl' % config['basename']
],
'targets': [
'%s.json' % config['basename'],
'dist/%s.marc21.xml' % config['basename'],
'dist/%s.ttl' % config['basename'],
'dist/%s.nt' % config['basename'],
]
}
def task_git_push():
return data_ub_tasks.git_push_task_gen(config)
def task_publish_dumps():
return data_ub_tasks.publish_dumps_task_gen(config['dumps_dir'], [
'%s.marc21.xml' % config['basename'],
'%s.ttl' % config['basename'],
'%s.nt' % config['basename'],
])
def task_fuseki():
return data_ub_tasks.fuseki_task_gen(config)