From 4983618588c3c58afb39e90d35cc468c490e30ea Mon Sep 17 00:00:00 2001 From: Norman Radtke Date: Mon, 19 Aug 2019 16:39:19 +0200 Subject: [PATCH 1/6] Add recursion to descend commit tree Support subdirectories in repositories. --- quit/conf.py | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/quit/conf.py b/quit/conf.py index 2a7155d1..f0016cae 100644 --- a/quit/conf.py +++ b/quit/conf.py @@ -414,25 +414,34 @@ def get_blobs_from_repository(self, rev): dict: containing names rdf files plus format and oid. """ + def find_blobs(tree, prefix=''): + # Collect graph files, rdf files and config files + for entry in tree: + if entry.type == 'blob': + format = guess_format(entry.name) + if format is None and entry.name.endswith('.graph'): + graph_file_blobs[join(prefix, entry.name)] = entry.id + elif format is not None and format == 'nt': + print('Yeah', join(prefix, entry.name)) + rdf_file_blobs[join(prefix, entry.name)] = (entry.id, format) + elif format is not None and entry.name == 'config.ttl': + config_files.append(str(entry.id)) + elif entry.type == 'tree': + prefix += entry.name + '/' + tree_obj = self.repository[entry.id] + find_blobs(tree_obj, prefix) + config_files = [] graph_files = {} graph_file_blobs = {} rdf_file_blobs = {} + try: commit = self.repository.revparse_single(rev) except Exception: return graph_files, config_files, rdf_file_blobs - # Collect graph files, rdf files and config files - for entry in commit.tree: - if entry.type == 'blob': - format = guess_format(entry.name) - if format is None and entry.name.endswith('.graph'): - graph_file_blobs[entry.name] = entry.id - elif format is not None and format == 'nt': - rdf_file_blobs[entry.name] = (entry.id, format) - elif format is not None and entry.name == 'config.ttl': - config_files.append(str(entry.id)) + find_blobs(commit.tree) # collect pairs of rdf files and graph files for filename in rdf_file_blobs.keys(): From 3e76326d8172e827d8e1b6551b85720cb0513b0c Mon Sep 17 00:00:00 2001 From: Norman Radtke Date: Tue, 20 Aug 2019 11:13:57 +0200 Subject: [PATCH 2/6] Fix logical error in recursion Remove comment --- quit/conf.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/quit/conf.py b/quit/conf.py index f0016cae..2aa95211 100644 --- a/quit/conf.py +++ b/quit/conf.py @@ -422,14 +422,12 @@ def find_blobs(tree, prefix=''): if format is None and entry.name.endswith('.graph'): graph_file_blobs[join(prefix, entry.name)] = entry.id elif format is not None and format == 'nt': - print('Yeah', join(prefix, entry.name)) rdf_file_blobs[join(prefix, entry.name)] = (entry.id, format) elif format is not None and entry.name == 'config.ttl': config_files.append(str(entry.id)) elif entry.type == 'tree': - prefix += entry.name + '/' tree_obj = self.repository[entry.id] - find_blobs(tree_obj, prefix) + find_blobs(tree_obj, join(prefix, entry.name)) config_files = [] graph_files = {} From 9b5c2e48f0605a948be9f9944f7d82dcab278efe Mon Sep 17 00:00:00 2001 From: Norman Radtke Date: Tue, 20 Aug 2019 11:15:03 +0200 Subject: [PATCH 3/6] Update factory to deal with subdirectories --- tests/helpers.py | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/tests/helpers.py b/tests/helpers.py index 6b01d036..39f55179 100644 --- a/tests/helpers.py +++ b/tests/helpers.py @@ -1,6 +1,6 @@ from tempfile import TemporaryDirectory from pygit2 import init_repository, clone_repository, Signature -from os import path, walk +from os import path, walk, makedirs from os.path import join from rdflib import Graph from urllib.parse import quote_plus @@ -156,7 +156,7 @@ def withNoConfigInformation(self): return tmpRepo - def withGraphs(self, graphUriContentDict, mode='graphfiles'): + def withGraphs(self, graphUriContentDict, mode='graphfiles', subDirectory=False): """Give a TemporaryRepository() initialized with a dictionary of graphUris and content (nt).""" uristring = '' configFileContent = """@base . @@ -176,29 +176,41 @@ def withGraphs(self, graphUriContentDict, mode='graphfiles'): index.read() i = 0 + for graphUri, graphContent in sorted(graphUriContentDict.items()): + subdir = '' + + if subDirectory: + subdir = 'sub{}'.format(i) + abs_subdir = path.join(tmpRepo.repo.workdir, subdir) + makedirs(abs_subdir, exist_ok=True) + filename = 'graph_{}.nt'.format(i) - with open(path.join(tmpRepo.repo.workdir, filename), "w") as graphFile: + + with open(path.join(tmpRepo.repo.workdir, subdir, filename), "w") as graphFile: if graphContent: graphFile.write(graphContent) if mode == 'graphfiles': # Set Graph URI to http://example.org/ - with open(path.join(tmpRepo.repo.workdir, filename + ".graph"), "w") as graphFile: + with open(path.join(tmpRepo.repo.workdir, subdir, filename + ".graph"), "w") as graphFile: graphFile.write(graphUri) - index.add(filename + '.graph') + + index.add(path.join(subdir, filename + '.graph')) elif mode == 'configfile': - uristring += graphResource.format(i, graphUri, filename) + uristring += graphResource.format(i, graphUri, join(subdir, filename)) # Add and Commit the empty graph - index.add(filename) + index.add(path.join(subdir, filename)) i += 1 if mode == 'configfile': graph = Graph() + with open(path.join(tmpRepo.repo.workdir, "config.ttl"), "w") as configFile: rdf_content = configFileContent.format(tmpRepo.repo.workdir, uristring) graph.parse(format='turtle', data=rdf_content) configFile.write(graph.serialize(format='turtle').decode()) + index.add('config.ttl') index.write() From e34056147c890f9c6a82c33706a74490e8fd965f Mon Sep 17 00:00:00 2001 From: Norman Radtke Date: Tue, 20 Aug 2019 11:17:25 +0200 Subject: [PATCH 4/6] Add first subdirectory tests TODO: test update and configfile tests --- tests/test_app.py | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/tests/test_app.py b/tests/test_app.py index 1397634c..a7c5ea0c 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -2797,6 +2797,51 @@ def testStartApp(self): response = app.post('/provenance', data=dict(query=query)) self.assertEqual(response.status, '404 NOT FOUND') + def testSubdirectoriesGraphfile(self): + """Test if subdirectories are recognized and commits are working.""" + # Prepare a Repository with subdirectories + repo_content = {'urn:graph0': ' .\n', + 'urn:graph1': ' .\n'} + + with TemporaryRepositoryFactory().withGraphs(repo_content, 'graphfiles', True) as repo: + select = 'SELECT ?s ?p ?o WHERE {{ GRAPH {{ ?s ?p ?o }} }}' + update = """ + DELETE DATA {{ + GRAPH {{ + . }} }} ; + INSERT DATA {{ + GRAPH {{ + . }} }}""" + + # Start Quit + args = quitApp.parseArgs(['-t', repo.workdir]) + objects = quitApp.initialize(args) + config = objects['config'] + app = create_app(config).test_client() + + for i in [0, 1]: + # check file existence + with open(path.join(repo.workdir, + 'sub{}'.format(i), + 'graph_{}.nt'.format(i)), 'r') as f: + self.assertEqual( + ' .\n'.format(i=i), + f.read()) + + # check store content + res = app.post('/sparql', + data=dict(query=select.format(i)), + headers=dict(accept='application/sparql-results+json')) + obj = json.loads(res.data.decode("utf-8")) + print(json.loads(res.data.decode("utf-8"))) + self.assertEqual(len(obj["results"]["bindings"]), 1) + self.assertDictEqual(obj["results"]["bindings"][0], { + "s": {'type': 'uri', 'value': 'urn:{}'.format(i)}, + "p": {'type': 'uri', 'value': 'urn:{}'.format(i)}, + "o": {'type': 'uri', 'value': 'urn:{}'.format(i)}}) + + # TODO update and test + def testWithOnDeleteAndInsert(self): """Test WITH on DELETE and INSERT plus USING. From 1af5895d7b8c19f9ae7857ca7937652944297a15 Mon Sep 17 00:00:00 2001 From: Norman Radtke Date: Tue, 20 Aug 2019 16:07:58 +0200 Subject: [PATCH 5/6] Add post update tests for subdirectory support Applies to mode "graphfiles" --- tests/test_app.py | 41 ++++++++++++++++++++++++++++++++++------- 1 file changed, 34 insertions(+), 7 deletions(-) diff --git a/tests/test_app.py b/tests/test_app.py index a7c5ea0c..7a61239c 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -2798,7 +2798,7 @@ def testStartApp(self): self.assertEqual(response.status, '404 NOT FOUND') def testSubdirectoriesGraphfile(self): - """Test if subdirectories are recognized and commits are working.""" + """Test if subdirectories are recognized and commits are working using graphfiles.""" # Prepare a Repository with subdirectories repo_content = {'urn:graph0': ' .\n', 'urn:graph1': ' .\n'} @@ -2819,7 +2819,23 @@ def testSubdirectoriesGraphfile(self): config = objects['config'] app = create_app(config).test_client() + # check states after init for i in [0, 1]: + self.assertTrue( + path.isfile(path.join(repo.workdir, + 'sub{}'.format(i), + 'graph_{}.nt.graph'.format(i)))) + # check store content + res = app.post('/sparql', + data=dict(query=select.format(i)), + headers=dict(accept='application/sparql-results+json')) + obj = json.loads(res.data.decode("utf-8")) + self.assertEqual(len(obj["results"]["bindings"]), 1) + self.assertDictEqual(obj["results"]["bindings"][0], { + "s": {'type': 'uri', 'value': 'urn:{}'.format(i)}, + "p": {'type': 'uri', 'value': 'urn:{}'.format(i)}, + "o": {'type': 'uri', 'value': 'urn:{}'.format(i)}}) + # check file existence with open(path.join(repo.workdir, 'sub{}'.format(i), @@ -2828,19 +2844,30 @@ def testSubdirectoriesGraphfile(self): ' .\n'.format(i=i), f.read()) + # check states after update + for i in [0, 1]: + # perform update + app.post('/sparql', data=dict(update=update.format(i=i))) + # check store content res = app.post('/sparql', data=dict(query=select.format(i)), headers=dict(accept='application/sparql-results+json')) obj = json.loads(res.data.decode("utf-8")) - print(json.loads(res.data.decode("utf-8"))) + + # check file existence + with open(path.join(repo.workdir, + 'sub{}'.format(i), + 'graph_{}.nt'.format(i)), 'r') as f: + self.assertEqual( + ' .\n'.format(i=i), + f.read()) + self.assertEqual(len(obj["results"]["bindings"]), 1) self.assertDictEqual(obj["results"]["bindings"][0], { - "s": {'type': 'uri', 'value': 'urn:{}'.format(i)}, - "p": {'type': 'uri', 'value': 'urn:{}'.format(i)}, - "o": {'type': 'uri', 'value': 'urn:{}'.format(i)}}) - - # TODO update and test + "s": {'type': 'uri', 'value': 'urn:{i}{i}'.format(i=i)}, + "p": {'type': 'uri', 'value': 'urn:{i}{i}'.format(i=i)}, + "o": {'type': 'uri', 'value': 'urn:{i}{i}'.format(i=i)}}) def testWithOnDeleteAndInsert(self): """Test WITH on DELETE and INSERT plus USING. From 5cf306b4d45806d619827f5d1f165ed740ba6817 Mon Sep 17 00:00:00 2001 From: Norman Radtke Date: Tue, 20 Aug 2019 16:09:34 +0200 Subject: [PATCH 6/6] Add subdirectory test for mode "configfile" --- tests/test_app.py | 71 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/tests/test_app.py b/tests/test_app.py index 7a61239c..2827911c 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -2869,6 +2869,77 @@ def testSubdirectoriesGraphfile(self): "p": {'type': 'uri', 'value': 'urn:{i}{i}'.format(i=i)}, "o": {'type': 'uri', 'value': 'urn:{i}{i}'.format(i=i)}}) + def testSubdirectoriesConfigfile(self): + """Test if subdirectories are recognized and commits are working using configfile.""" + # Prepare a Repository with subdirectories + repo_content = {'urn:graph0': ' .\n', + 'urn:graph1': ' .\n'} + + with TemporaryRepositoryFactory().withGraphs(repo_content, 'configfile', True) as repo: + select = 'SELECT ?s ?p ?o WHERE {{ GRAPH {{ ?s ?p ?o }} }}' + update = """ + DELETE DATA {{ + GRAPH {{ + . }} }} ; + INSERT DATA {{ + GRAPH {{ + . }} }}""" + + # Start Quit + args = quitApp.parseArgs(['-t', repo.workdir]) + objects = quitApp.initialize(args) + config = objects['config'] + app = create_app(config).test_client() + + # check states after init + for i in [0, 1]: + self.assertFalse(path.isfile(path.join(repo.workdir, + 'sub{}'.format(i), + 'graph_{}.nt.graph'.format(i)))) + # check store content + res = app.post('/sparql', + data=dict(query=select.format(i)), + headers=dict(accept='application/sparql-results+json')) + obj = json.loads(res.data.decode("utf-8")) + self.assertEqual(len(obj["results"]["bindings"]), 1) + self.assertDictEqual(obj["results"]["bindings"][0], { + "s": {'type': 'uri', 'value': 'urn:{}'.format(i)}, + "p": {'type': 'uri', 'value': 'urn:{}'.format(i)}, + "o": {'type': 'uri', 'value': 'urn:{}'.format(i)}}) + + # check file existence + with open(path.join(repo.workdir, + 'sub{}'.format(i), + 'graph_{}.nt'.format(i)), 'r') as f: + self.assertEqual( + ' .\n'.format(i=i), + f.read()) + + # check states after update + for i in [0, 1]: + # perform update + app.post('/sparql', data=dict(update=update.format(i=i))) + + # check store content + res = app.post('/sparql', + data=dict(query=select.format(i)), + headers=dict(accept='application/sparql-results+json')) + obj = json.loads(res.data.decode("utf-8")) + + # check file existence + with open(path.join(repo.workdir, + 'sub{}'.format(i), + 'graph_{}.nt'.format(i)), 'r') as f: + self.assertEqual( + ' .\n'.format(i=i), + f.read()) + + self.assertEqual(len(obj["results"]["bindings"]), 1) + self.assertDictEqual(obj["results"]["bindings"][0], { + "s": {'type': 'uri', 'value': 'urn:{i}{i}'.format(i=i)}, + "p": {'type': 'uri', 'value': 'urn:{i}{i}'.format(i=i)}, + "o": {'type': 'uri', 'value': 'urn:{i}{i}'.format(i=i)}}) + def testWithOnDeleteAndInsert(self): """Test WITH on DELETE and INSERT plus USING.