Skip to content

Commit

Permalink
cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
jarno-knaw committed Jul 25, 2024
1 parent 5e13889 commit d11eb28
Show file tree
Hide file tree
Showing 4 changed files with 129 additions and 98 deletions.
95 changes: 10 additions & 85 deletions entrypoint.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,17 @@
#!/usr/bin/env python3
import time

"""
The python entrypoint initializes the Skosmos dataset before starting the application.
"""

import glob
import os
import shutil
import glob
import re
import urllib.request
import time
from pathlib import Path
from rdflib import ConjunctiveGraph

import yaml

from src.exceptions import InvalidConfigurationException, UnknownAuthenticationTypeException
from src.graphdb import add_vocabulary, get_loaded_vocabs, setup_graphdb


def get_graph(fp):
"""
Get the sparql graph from the given vocab
:param fp: The vocabulary config, a file pointer
:return:
"""
for line in fp:
# If line is a bytes-like object, we need to decode it
try:
line = line.decode()
except (UnicodeDecodeError, AttributeError):
# Already decoded
pass
if re.search("sparqlGraph", line):
return line.strip().split(" ")[1].strip("<>")
from src.graphdb import get_loaded_vocabs, setup_graphdb
from src.vocabularies import get_file_from_config, get_graph, load_vocab_yaml, load_vocabulary


def append_file(source, dest):
Expand All @@ -38,7 +21,7 @@ def append_file(source, dest):
:param dest: The path of the destination file.
:return:
"""
with open(dest, "a+") as df:
with open(dest, "a+", encoding='utf-8') as df:
for line in source:
try:
line = line.decode()
Expand All @@ -47,64 +30,6 @@ def append_file(source, dest):
df.write(line)


def load_vocab_yaml(file_location):
"""
Open a yaml config file and return a dict with its contents
:param file_location:
:return:
"""
with open(file_location, 'r', encoding='utf-8') as fp:
return yaml.safe_load(fp)


def get_file_from_config(config_data, data_dir):
"""
Get the config file from yaml data.
:param config_data: The configuration, a dict with information about the file.
:param data_dir: The data directory of the application
:return:
"""
if config_data['type'] == 'file':
return open(f"{data_dir}/{config_data['location']}")
elif config_data['type'] == 'fetch':
req = urllib.request.Request(config_data['location'])
if 'headers' in config_data:
for header, val in config_data['headers'].items():
req.add_header(header, val)

if 'auth' in config_data:
auth_data = config_data['auth']
if auth_data['type'] == 'github':
req.add_header('Authorization', f'token {auth_data["token"]}')
else:
raise UnknownAuthenticationTypeException()

return urllib.request.urlopen(req)
else:
raise InvalidConfigurationException("Type must be file")


def get_vocab_format(source_data):
if 'format' in source_data:
return source_data['format']
return source_data['location'].split('.')[-1]


def load_vocabulary(source_data, data_dir, graph_name):
"""
Load a vocabulary using the source data from the yaml.
:param source_data:
:param data_dir:
:param graph_name:
:return:
"""
with get_file_from_config(source_data, data_dir) as vocab_file:
# g = ConjunctiveGraph()
# g.parse(vocab_file, format=get_vocab_format(source_data))
# c = list(g.contexts())[0]
add_vocabulary(vocab_file, graph_name, get_vocab_format(source_data))


if __name__ == "__main__":
time.sleep(10)

Expand Down
11 changes: 8 additions & 3 deletions src/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,12 @@


class InvalidConfigurationException(Exception):
pass
"""
Exception raised when there is a misconfiguration.
"""

class UnknownAuthenticationTypeException(Exception):
pass

class UnknownAuthenticationTypeException(InvalidConfigurationException):
"""
Exception raised when the authentication type specified is not known.
"""
30 changes: 20 additions & 10 deletions src/graphdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
This file contains functions for interacting with GraphDB
"""
import os

import requests

admin_password = os.environ.get("ADMIN_PASSWORD", '')
Expand All @@ -15,18 +14,20 @@ def setup_graphdb():
:return:
"""
# Check if db exists
resp = requests.get(f"{endpoint}/size")
resp = requests.get(f"{endpoint}/size", timeout=60)
if resp.status_code != 200:
# GraphDB repository not created yet -- create it
headers = {
'Content-Type': 'text/turtle',
}
requests.put(
f"{endpoint}",
headers=headers,
data=open(f"/var/www/skosmos-repository.ttl", "rb"),
auth=('admin', admin_password),
)
with open("/var/www/skosmos-repository.ttl", "rb") as fp:
requests.put(
f"{endpoint}",
headers=headers,
data=fp,
auth=('admin', admin_password),
timeout=60
)
print(f"CREATED GRAPHDB[{endpoint}] DB[skosmos.tdb]")
else:
print(f"EXISTS GRAPHDB [{endpoint}]]")
Expand All @@ -37,8 +38,11 @@ def get_loaded_vocabs():
Get all loaded vocabularies from GraphDB
:return:
"""
graphs_response = requests.get(f"{endpoint}/rdf-graphs",
headers={"Accept": "application/json"})
graphs_response = requests.get(
f"{endpoint}/rdf-graphs",
headers={"Accept": "application/json"},
timeout=60
)
tmp = []
if graphs_response.status_code == 200:
body = graphs_response.json()
Expand All @@ -51,6 +55,11 @@ def get_loaded_vocabs():


def get_type(extension):
"""
Get the http mimetype based on the extension of a file.
:param extension:
:return:
"""
if extension in ["ttl", "turtle"]:
return "text/turtle"
if extension in ["trig"]:
Expand All @@ -77,6 +86,7 @@ def add_vocabulary(graph, graph_name, extension):
headers=headers,
auth=('admin', admin_password),
params={'context': f"<{graph_name}>"},
timeout=60,
)
print(f"RESPONSE: {response.status_code}")
if response.status_code != 200:
Expand Down
91 changes: 91 additions & 0 deletions src/vocabularies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
"""
This file contains functions for dealing with vocabularies and their configuration.
"""

import re
import urllib.request
import yaml

from src.exceptions import InvalidConfigurationException, UnknownAuthenticationTypeException
from src.graphdb import add_vocabulary


def get_file_from_config(config_data, data_dir):
"""
Get the config file from yaml data.
:param config_data: The configuration, a dict with information about the file.
:param data_dir: The data directory of the application
:return:
"""
if config_data['type'] == 'file':
return open(f"{data_dir}/{config_data['location']}", encoding='utf-8')
if config_data['type'] == 'fetch':
req = urllib.request.Request(config_data['location'])
if 'headers' in config_data:
for header, val in config_data['headers'].items():
req.add_header(header, val)

if 'auth' in config_data:
auth_data = config_data['auth']
if auth_data['type'] == 'github':
req.add_header('Authorization', f'token {auth_data["token"]}')
else:
raise UnknownAuthenticationTypeException()

return urllib.request.urlopen(req)
raise InvalidConfigurationException("Type must be file")


def load_vocabulary(source_data, data_dir, graph_name):
"""
Load a vocabulary using the source data from the yaml.
:param source_data:
:param data_dir:
:param graph_name:
:return:
"""
with get_file_from_config(source_data, data_dir) as vocab_file:
# g = ConjunctiveGraph()
# g.parse(vocab_file, format=get_vocab_format(source_data))
# c = list(g.contexts())[0]
add_vocabulary(vocab_file, graph_name, get_vocab_format(source_data))


def get_graph(fp):
"""
Get the sparql graph from the given vocab
:param fp: The vocabulary config, a file pointer
:return:
"""
for line in fp:
# If line is a bytes-like object, we need to decode it
try:
line = line.decode()
except (UnicodeDecodeError, AttributeError):
# Already decoded
pass
if re.search("sparqlGraph", line):
return line.strip().split(" ")[1].strip("<>")
return ""


def load_vocab_yaml(file_location):
"""
Open a yaml config file and return a dict with its contents
:param file_location:
:return:
"""
with open(file_location, 'r', encoding='utf-8') as fp:
return yaml.safe_load(fp)


def get_vocab_format(source_data):
"""
Return the vocab format of the given data source. It is either based on the file extension,
or on an override in the yaml file.
:param source_data:
:return:
"""
if 'format' in source_data:
return source_data['format']
return source_data['location'].split('.')[-1]

0 comments on commit d11eb28

Please sign in to comment.