diff --git a/.gitignore b/.gitignore index f6faa365a..315afa2ba 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ /target/ /examples_*.ttl /examples/**/*.rq +/examples/**/*.md diff --git a/src/main/java/swiss/sib/rdf/sparql/examples/Converter.java b/src/main/java/swiss/sib/rdf/sparql/examples/Converter.java index aad09acae..4fdc0af09 100644 --- a/src/main/java/swiss/sib/rdf/sparql/examples/Converter.java +++ b/src/main/java/swiss/sib/rdf/sparql/examples/Converter.java @@ -3,29 +3,17 @@ import java.io.IOException; import java.io.InputStream; import java.nio.file.Files; -import java.nio.file.OpenOption; import java.nio.file.Path; import java.nio.file.StandardOpenOption; -import java.util.ArrayList; -import java.util.Iterator; import java.util.List; import java.util.Optional; import java.util.Set; +import java.util.function.Function; import java.util.regex.Pattern; -import java.util.stream.Collectors; import java.util.stream.Stream; -import java.util.stream.StreamSupport; -import org.eclipse.rdf4j.model.IRI; import org.eclipse.rdf4j.model.Model; -import org.eclipse.rdf4j.model.Resource; -import org.eclipse.rdf4j.model.Statement; -import org.eclipse.rdf4j.model.Value; import org.eclipse.rdf4j.model.impl.LinkedHashModel; -import org.eclipse.rdf4j.model.impl.SimpleValueFactory; -import org.eclipse.rdf4j.model.vocabulary.RDF; -import org.eclipse.rdf4j.model.vocabulary.RDFS; -import org.eclipse.rdf4j.model.vocabulary.SHACL; import org.eclipse.rdf4j.rio.RDFFormat; import org.eclipse.rdf4j.rio.RDFHandlerException; import org.eclipse.rdf4j.rio.RDFParseException; @@ -65,6 +53,11 @@ void exit(Exception e) { @Option(names = { "-r", "--rq" }, paramLabel = "output example files as rq files next to the exiting turtle", description = "output example files as rq files next to the exiting turtle", defaultValue = "false") private boolean outputRq; + + @Option(names = { "-m", + "--markdown" }, paramLabel = "output example files as markdown files next to the exiting turtle", description = "output example files as markdown files next to the exiting turtle", defaultValue = "false") + private boolean outputMd; + @Option(names = { "-i", "--input-directory" }, paramLabel = "directory containing example files to convert", description = "The root directory where the examples and their prefixes can be found.", required = true) @@ -87,8 +80,10 @@ public static void main(String[] args) { commandLine.printVersionHelp(System.out); return; } else { - if (converter.outputRq) { - converter.convertToRQs(); + if (converter.outputMd) { + converter.convertPerSingle("md", SparqlInRdfToMd::asRq); + } else if (converter.outputRq) { + converter.convertPerSingle("rq", SparqlInRdfToRq::asRq); } else { converter.convertToRdf(); } @@ -113,22 +108,21 @@ private void convertToRdf() { print(model); } - private void convertToRQs() { - + private void convertPerSingle(String extension, Function> converter){ if ("all".equals(projects)) { try (Stream list = Files.list(inputDirectory)) { - convertProjectsToRq(list); + convertProjectsPerSingle(list, extension, converter); } catch (IOException e) { Failure.CANT_READ_INPUT_DIRECTORY.exit(e); } } else { try (Stream list = COMMA.splitAsStream(projects).map(inputDirectory::resolve)) { - convertProjectsToRq(list); + convertProjectsPerSingle(list, extension, converter); } } } - private void convertProjectsToRq(Stream list) { + private void convertProjectsPerSingle(Stream list, String extension, Function> converter) { Optional findCommonPrefixes = FindFiles.prefixFile(inputDirectory).findFirst(); Model commonPrefixes = prefixModel(findCommonPrefixes); list.forEach(pro -> { @@ -140,10 +134,10 @@ private void convertProjectsToRq(Stream list) { ex.addAll(commonPrefixes); ex.addAll(projectPrefixes); String pfn = p.getFileName().toString(); - String prqfn = pfn.substring(0, pfn.indexOf('.')) + ".rq"; + String prqfn = pfn.substring(0, pfn.indexOf('.')) + "."+extension; Path prq = p.getParent().resolve(prqfn); try { - List rq = SparqlInRdfToRq.asRq(ex); + List rq = converter.apply(ex); Files.write(prq, rq, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING); } catch (IOException e) { Failure.CANT_WRITE_EXAMPLE_RQ.exit(e); diff --git a/src/main/java/swiss/sib/rdf/sparql/examples/SparqlInRdfToMd.java b/src/main/java/swiss/sib/rdf/sparql/examples/SparqlInRdfToMd.java new file mode 100644 index 000000000..1e2f761c2 --- /dev/null +++ b/src/main/java/swiss/sib/rdf/sparql/examples/SparqlInRdfToMd.java @@ -0,0 +1,49 @@ +package swiss.sib.rdf.sparql.examples; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; + +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.Model; +import org.eclipse.rdf4j.model.Resource; +import org.eclipse.rdf4j.model.Statement; +import org.eclipse.rdf4j.model.Value; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; +import org.eclipse.rdf4j.model.vocabulary.RDF; +import org.eclipse.rdf4j.model.vocabulary.RDFS; +import org.eclipse.rdf4j.model.vocabulary.SHACL; + +public class SparqlInRdfToMd { + private static final IRI KEYWORD = SimpleValueFactory.getInstance().createIRI("https://schema.org/keyword"); + + + public static List asRq(Model ex) { + List rq = new ArrayList<>(); + + streamOf(ex, null, RDF.TYPE, SHACL.SPARQL_EXECUTABLE).map(Statement::getSubject).distinct() + .forEach(s -> { + rq.add("# "+s.stringValue()+"\n"); + streamOf(ex, s, KEYWORD, null).map(Statement::getObject).map(Value::stringValue) + .map(k -> " * " + k) + .forEach(rq::add); + streamOf(ex, s, RDFS.COMMENT, null).map(Statement::getObject).map(Value::stringValue).forEach(rq::add); + rq.add("\n"); + rq.add("```sparql"); + Stream.of(SHACL.ASK, SHACL.SELECT, SHACL.CONSTRUCT, SIB.DESCRIBE) + .flatMap(qt -> streamOf(ex, s, qt, null)).map(Statement::getObject) + .map(o -> o.stringValue()).forEach(q ->SparqlInRdfToRq.addPrefixes(q, ex, rq)); + rq.add("```"); + }); + return rq; + } + + + + private static Stream streamOf(Model ex, Resource s, IRI p, Value o) { + return StreamSupport.stream(ex.getStatements(s, p, o).spliterator(), false); + } +} diff --git a/src/main/java/swiss/sib/rdf/sparql/examples/SparqlInRdfToRq.java b/src/main/java/swiss/sib/rdf/sparql/examples/SparqlInRdfToRq.java index 7cb65b8a8..0e0449ae4 100644 --- a/src/main/java/swiss/sib/rdf/sparql/examples/SparqlInRdfToRq.java +++ b/src/main/java/swiss/sib/rdf/sparql/examples/SparqlInRdfToRq.java @@ -38,15 +38,16 @@ public static List asRq(Model ex) { } Stream.of(SHACL.ASK, SHACL.SELECT, SHACL.CONSTRUCT, SIB.DESCRIBE) .flatMap(qt -> streamOf(ex, s, qt, null)).map(Statement::getObject) - .map(o -> o.stringValue()).map(q -> addPrefixes(q, ex)).forEach(rq::add); + .map(o -> o.stringValue()).forEach(q -> addPrefixes(q, ex, rq)); }); return rq; } /** * Add prefixes to the raw SPARQL query string + * @param rq **/ - public static String addPrefixes(String query, Model ex) { + public static void addPrefixes(String query, Model ex, List rq) { Iterator iterator = streamOf(ex, null, SHACL.PREFIX_PROP, null).iterator(); List prefixes = new ArrayList<>(); @@ -55,13 +56,27 @@ public static String addPrefixes(String query, Model ex) { Resource ns = n.getSubject(); String nos = n.getObject().stringValue() + ':'; - if (query.contains(nos)) { + if (queryContainsPrefix(query, nos)) { prefixes.add(streamOf(ex, ns, SHACL.NAMESPACE_PROP, null).map(Statement::getObject) .map(Value::stringValue).map(s -> "PREFIX "+nos+'<'+s+'>').collect(Collectors.joining())); } } prefixes.sort(String::compareTo); - return prefixes.stream().collect(Collectors.joining("\n")) + '\n' + query; + rq.addAll(prefixes); + rq.add(query); + } + + static boolean queryContainsPrefix(String query, String prefix) { + int indexOf = query.indexOf(prefix); + if (indexOf == 0) { + return true; + } else if (indexOf >= 0) { + //Make sure that the prefix is complete to avoid matching p: when prefix is actually up: + //so the character should be a tab, space, forward slash, closing bracket or pipe + char cb = query.charAt(indexOf - 1); + return cb == '\t' || cb == ' ' || cb == '/' || cb == ')' || cb == '|'; + } + return false; } private static Stream streamOf(Model ex, Resource s, IRI p, Value o) { diff --git a/src/test/java/swiss/sib/rdf/sparql/examples/SparqlInRdfToRqTest.java b/src/test/java/swiss/sib/rdf/sparql/examples/SparqlInRdfToRqTest.java new file mode 100644 index 000000000..783a89c28 --- /dev/null +++ b/src/test/java/swiss/sib/rdf/sparql/examples/SparqlInRdfToRqTest.java @@ -0,0 +1,22 @@ +package swiss.sib.rdf.sparql.examples; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import org.junit.jupiter.api.Test; + +public class SparqlInRdfToRqTest { + + @Test + public void prefixInQuery() { + String q =""" + SELECT ?taxon + FROM + WHERE + { + ?taxon a up:Taxon . + }"""; + assertTrue(SparqlInRdfToRq.queryContainsPrefix(q, "up:"), "up: (uniprot) is in the query"); + assertFalse(SparqlInRdfToRq.queryContainsPrefix(q, "p:"), "p: (wikidata) not in the query"); + } +}