Skip to content

Commit

Permalink
yaml configurations, external rdf files support
Browse files Browse the repository at this point in the history
  • Loading branch information
jarno-knaw committed Jul 24, 2024
1 parent cbacb75 commit 5e13889
Show file tree
Hide file tree
Showing 7 changed files with 211 additions and 73 deletions.
6 changes: 4 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,7 @@
**/*.loaded
.idea

data/countries.ttl
data/countries.config
data/*.ttl
data/*.trig
data/*.config
data/*.yaml
8 changes: 6 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

- Added support for YAML configurations.
- Allow specifying external config/rdf files.
- Added support for trig files.

## [v2.15-1.1.0]

### Changed

- Rewrote `entrypoint.sh` as a Python script
- Use GraphDB instead of Fuseki
- Rewrote `entrypoint.sh` as a Python script.
- Use GraphDB instead of Fuseki.
- On start, check if vocabularies exist in GraphDB instead of keeping track of local .loaded files.

## [v2.15-1.0.0]
Expand Down
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ RUN /usr/bin/env pip install -r /var/www/requirements.txt
# Configure Skosmos
COPY skosmos-repository.ttl /var/www/
COPY entrypoint.sh /var/www/
COPY ./src /var/www/src
COPY entrypoint.py /var/www/
COPY config-docker-compose.ttl /var/www/html/
ENTRYPOINT ["/var/www/entrypoint.sh"]
172 changes: 104 additions & 68 deletions entrypoint.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,108 @@
#!/usr/bin/env python3

import time
import os
import shutil
import glob
import re
import requests
import urllib.request
from pathlib import Path
from rdflib import ConjunctiveGraph

import yaml

from src.exceptions import InvalidConfigurationException, UnknownAuthenticationTypeException
from src.graphdb import add_vocabulary, get_loaded_vocabs, setup_graphdb

def get_graph(data_dir, vocab_name):

def get_graph(fp):
"""
Get the sparql graph from the given vocab
:param data_dir: The location of the 'data' directory
:param vocab_name: The vocabulary, a config and ttl should be present
:param fp: The vocabulary config, a file pointer
:return:
"""
file = open(f"{data_dir}/{vocab_name}.config", 'r')
for line in file:
for line in fp:
# If line is a bytes-like object, we need to decode it
try:
line = line.decode()
except (UnicodeDecodeError, AttributeError):
# Already decoded
pass
if re.search("sparqlGraph", line):
return line.strip().split(" ")[1].strip("<>")


def append_file(source, dest):
"""
Append source to dest file.
:param source:
:param dest:
:param source: A file pointer to a source file.
:param dest: The path of the destination file.
:return:
"""
with open(dest, "a+") as df:
for line in source:
try:
line = line.decode()
except (UnicodeDecodeError, AttributeError):
pass
df.write(line)


def load_vocab_yaml(file_location):
"""
Open a yaml config file and return a dict with its contents
:param file_location:
:return:
"""
with open(file_location, 'r', encoding='utf-8') as fp:
return yaml.safe_load(fp)


def get_file_from_config(config_data, data_dir):
"""
Get the config file from yaml data.
:param config_data: The configuration, a dict with information about the file.
:param data_dir: The data directory of the application
:return:
"""
if config_data['type'] == 'file':
return open(f"{data_dir}/{config_data['location']}")
elif config_data['type'] == 'fetch':
req = urllib.request.Request(config_data['location'])
if 'headers' in config_data:
for header, val in config_data['headers'].items():
req.add_header(header, val)

if 'auth' in config_data:
auth_data = config_data['auth']
if auth_data['type'] == 'github':
req.add_header('Authorization', f'token {auth_data["token"]}')
else:
raise UnknownAuthenticationTypeException()

return urllib.request.urlopen(req)
else:
raise InvalidConfigurationException("Type must be file")


def get_vocab_format(source_data):
if 'format' in source_data:
return source_data['format']
return source_data['location'].split('.')[-1]


def load_vocabulary(source_data, data_dir, graph_name):
"""
Load a vocabulary using the source data from the yaml.
:param source_data:
:param data_dir:
:param graph_name:
:return:
"""
with open(source, "r") as sf:
with open(dest, "a+") as df:
df.write(sf.read())
with get_file_from_config(source_data, data_dir) as vocab_file:
# g = ConjunctiveGraph()
# g.parse(vocab_file, format=get_vocab_format(source_data))
# c = list(g.contexts())[0]
add_vocabulary(vocab_file, graph_name, get_vocab_format(source_data))


if __name__ == "__main__":
Expand All @@ -45,65 +116,30 @@ def append_file(source, dest):
shutil.copy('/var/www/html/config-docker-compose.ttl', '/tmp/config-docker-compose.ttl')

if os.path.isfile(f'{data}/config-ext.ttl'):
append_file(f'{data}/config-ext.ttl', '/tmp/config-docker-compose.ttl')

admin_password = os.environ.get("ADMIN_PASSWORD", '')

endpoint = os.environ.get("SPARQL_ENDPOINT", '')

# Check if db exists
resp = requests.get(f"{endpoint}/size")
if resp.status_code != 200:
# GraphDB repository not created yet -- create it
headers = {
'Content-Type': 'text/turtle',
}
response = requests.put(
f"{endpoint}",
headers=headers,
data=open(f"/var/www/skosmos-repository.ttl", "rb"),
auth=('admin', admin_password),
)
print(f"CREATED GRAPHDB[{endpoint}] DB[skosmos.tdb]")
else:
print(f"EXISTS GRAPHDB [{endpoint}]]")
with open(f'{data}/config-ext.ttl', 'r', encoding='utf-8') as f:
append_file(f, '/tmp/config-docker-compose.ttl')

setup_graphdb()

vocabs = glob.glob(f'{data}/*.ttl')
loaded_vocabs = get_loaded_vocabs()

graphs_response = requests.get(f"{endpoint}/rdf-graphs",
headers={"Accept": "application/json"})
loaded_vocabs = []
if graphs_response.status_code == 200:
body = graphs_response.json()
loaded_vocabs = []
for binding in body["results"]["bindings"]:
loaded_vocabs.append(binding["contextID"]["value"])
print("Loaded vocabs:")
print(loaded_vocabs)
vocabs = glob.glob(f'{data}/*.yaml')

for vocab in vocabs:
path = Path(vocab)
basename = path.stem
graph = get_graph(data, basename)
if graph not in loaded_vocabs:
print(f"LOAD VOCAB[{basename}] ...")
print(f"... in GRAPH[{graph}] ...")
if not os.path.isfile(f'{data}/{basename}.ttl'):
print(f"!ERROR {data}/{basename}.ttl doesn't exist!")
exit(1)

headers = {
'Content-Type': 'text/turtle',
}
response = requests.put(
f"{endpoint}/statements",
data=open(f'{data}/{basename}.ttl', "rb"),
headers=headers,
auth=('admin', admin_password),
params={'context': f"<{graph}>"},
)
print("... DONE")

configs = glob.glob(f'{data}/*.config')
for config in configs:
append_file(config, "/tmp/config-docker-compose.ttl")
vocab_config = load_vocab_yaml(path)

with get_file_from_config(vocab_config['config'], data) as config:
graph = get_graph(config)
print(f"Graph: {graph}")
with get_file_from_config(vocab_config['config'], data) as config:
# Reset file pointer
append_file(config, "/tmp/config-docker-compose.ttl")

always_load = vocab_config['config'].get('alwaysRefresh', False)

if always_load or graph not in loaded_vocabs:
print(f"Loading vocabulary {vocab}")
load_vocabulary(vocab_config['source'], data, graph)
print("... DONE")
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
requests
PyYAML==6.0.1
requests==2.31.0
rdflib~=7.0.0
10 changes: 10 additions & 0 deletions src/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
"""
This file contains all exceptions raised by the application.
"""


class InvalidConfigurationException(Exception):
pass

class UnknownAuthenticationTypeException(Exception):
pass
83 changes: 83 additions & 0 deletions src/graphdb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
"""
This file contains functions for interacting with GraphDB
"""
import os

import requests

admin_password = os.environ.get("ADMIN_PASSWORD", '')
endpoint = os.environ.get("SPARQL_ENDPOINT", '')


def setup_graphdb():
"""
Setup graphdb, if it isn't set up yet.
:return:
"""
# Check if db exists
resp = requests.get(f"{endpoint}/size")
if resp.status_code != 200:
# GraphDB repository not created yet -- create it
headers = {
'Content-Type': 'text/turtle',
}
requests.put(
f"{endpoint}",
headers=headers,
data=open(f"/var/www/skosmos-repository.ttl", "rb"),
auth=('admin', admin_password),
)
print(f"CREATED GRAPHDB[{endpoint}] DB[skosmos.tdb]")
else:
print(f"EXISTS GRAPHDB [{endpoint}]]")


def get_loaded_vocabs():
"""
Get all loaded vocabularies from GraphDB
:return:
"""
graphs_response = requests.get(f"{endpoint}/rdf-graphs",
headers={"Accept": "application/json"})
tmp = []
if graphs_response.status_code == 200:
body = graphs_response.json()
tmp = []
for binding in body["results"]["bindings"]:
tmp.append(binding["contextID"]["value"])
print("Loaded vocabs:")
print(tmp)
return tmp


def get_type(extension):
if extension in ["ttl", "turtle"]:
return "text/turtle"
if extension in ["trig"]:
return "application/x-trig"
# Default
return "text/turtle"


def add_vocabulary(graph, graph_name, extension):
"""
Add a vocabulary to GraphDB
:param graph: File
:param graph_name: String representing the name of the graph
:param extension: String representing the extension
:return:
"""
print(f"Adding vocabulary {graph_name}")
headers = {
'Content-Type': get_type(extension),
}
response = requests.put(
f"{endpoint}/statements",
data=graph.read(),
headers=headers,
auth=('admin', admin_password),
params={'context': f"<{graph_name}>"},
)
print(f"RESPONSE: {response.status_code}")
if response.status_code != 200:
print(response.content)

0 comments on commit 5e13889

Please sign in to comment.