From 7412bbb9a6d1273de2b4972e109964c15b5a4918 Mon Sep 17 00:00:00 2001 From: anbo-de Date: Fri, 5 Jan 2024 02:03:06 +0100 Subject: [PATCH] QB-BirthDataWikidata v3.4.0: ext. functionality * separated functionality: 1. use named entities that are linked, 2. use firstname and lastname annotations * improved SPARQL queries * added additional documentation --- qanary-component-QB-BirthDataWikidata/pom.xml | 4 +- .../qb/birthdata/wikidata/Application.java | 17 +- .../wikidata/BirthDataQueryBuilder.java | 758 +++++++++++------- .../resources/config/application.properties | 2 +- ...medEntityLinkedToSpecificKnowledgeGraph.rq | 8 +- .../birthdatawikidata/qb/QueryTest.java | 2 +- .../qb/TestConfiguration.java | 10 +- .../queries/getAnnotationFilteredTest.rq | 4 +- 8 files changed, 483 insertions(+), 322 deletions(-) diff --git a/qanary-component-QB-BirthDataWikidata/pom.xml b/qanary-component-QB-BirthDataWikidata/pom.xml index 19ae378db..fe868f252 100644 --- a/qanary-component-QB-BirthDataWikidata/pom.xml +++ b/qanary-component-QB-BirthDataWikidata/pom.xml @@ -5,7 +5,7 @@ 4.0.0 eu.wdaqua.qanary.component qanary-component-QB-BirthDataWikidata - 3.3.7 + 3.4.0 org.springframework.boot @@ -15,7 +15,7 @@ 17 - [3.7.6,4.0.0) + [3.9.2,4.0.0) qanary qanary-component-qb-birthdata-wikidata 1.4.13 diff --git a/qanary-component-QB-BirthDataWikidata/src/main/java/eu/wdaqua/component/qb/birthdata/wikidata/Application.java b/qanary-component-QB-BirthDataWikidata/src/main/java/eu/wdaqua/component/qb/birthdata/wikidata/Application.java index 65aa10bd5..e38526814 100644 --- a/qanary-component-QB-BirthDataWikidata/src/main/java/eu/wdaqua/component/qb/birthdata/wikidata/Application.java +++ b/qanary-component-QB-BirthDataWikidata/src/main/java/eu/wdaqua/component/qb/birthdata/wikidata/Application.java @@ -14,10 +14,10 @@ import org.springframework.context.annotation.ComponentScan; @SpringBootApplication -@ComponentScan(basePackages = {"eu.wdaqua.qanary"}) +@ComponentScan(basePackages = { "eu.wdaqua.qanary" }) /** - * basic class for wrapping functionality to a Qanary component - * note: there is no need to change something here + * basic class for wrapping functionality to a Qanary component note: there is + * no need to change something here */ public class Application { @@ -29,8 +29,8 @@ public static void main(String[] args) { } /** - * this method is needed to make the QanaryComponent in this project known - * to the QanaryServiceController in the qanary_component-template + * this method is needed to make the QanaryComponent in this project known to + * the QanaryServiceController in the qanary_component-template * * @return */ @@ -38,9 +38,10 @@ public static void main(String[] args) { public QanaryComponent qanaryComponent(@Value("${spring.application.name}") final String applicationName) { return new BirthDataQueryBuilder(applicationName); } - - @Bean - public BirthDataQueryBuilderController getBirthDataQueryBuilderController(BirthDataQueryBuilder myBirthDataQueryBuilder) { + + @Bean + public BirthDataQueryBuilderController getBirthDataQueryBuilderController( + BirthDataQueryBuilder myBirthDataQueryBuilder) { return new BirthDataQueryBuilderController(myBirthDataQueryBuilder); } diff --git a/qanary-component-QB-BirthDataWikidata/src/main/java/eu/wdaqua/component/qb/birthdata/wikidata/BirthDataQueryBuilder.java b/qanary-component-QB-BirthDataWikidata/src/main/java/eu/wdaqua/component/qb/birthdata/wikidata/BirthDataQueryBuilder.java index 6a7aaf06f..5cbcd75fe 100644 --- a/qanary-component-QB-BirthDataWikidata/src/main/java/eu/wdaqua/component/qb/birthdata/wikidata/BirthDataQueryBuilder.java +++ b/qanary-component-QB-BirthDataWikidata/src/main/java/eu/wdaqua/component/qb/birthdata/wikidata/BirthDataQueryBuilder.java @@ -1,13 +1,13 @@ package eu.wdaqua.component.qb.birthdata.wikidata; -import eu.wdaqua.qanary.commons.QanaryExceptionNoOrMultipleQuestions; -import eu.wdaqua.qanary.commons.QanaryMessage; -import eu.wdaqua.qanary.commons.QanaryQuestion; -import eu.wdaqua.qanary.commons.QanaryUtils; -import eu.wdaqua.qanary.commons.triplestoreconnectors.QanaryTripleStoreConnector; -import eu.wdaqua.qanary.component.QanaryComponent; -import eu.wdaqua.qanary.exceptions.SparqlQueryFailed; -import io.swagger.v3.oas.annotations.Operation; +import java.io.IOException; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + import org.apache.jena.datatypes.xsd.XSDDatatype; import org.apache.jena.query.QuerySolution; import org.apache.jena.query.QuerySolutionMap; @@ -19,307 +19,459 @@ import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Component; -import java.io.IOException; -import java.net.URISyntaxException; -import java.util.ArrayList; -import java.util.regex.Matcher; -import java.util.regex.Pattern; +import eu.wdaqua.qanary.commons.QanaryExceptionNoOrMultipleQuestions; +import eu.wdaqua.qanary.commons.QanaryMessage; +import eu.wdaqua.qanary.commons.QanaryQuestion; +import eu.wdaqua.qanary.commons.QanaryUtils; +import eu.wdaqua.qanary.commons.triplestoreconnectors.QanaryTripleStoreConnector; +import eu.wdaqua.qanary.component.QanaryComponent; +import eu.wdaqua.qanary.exceptions.SparqlQueryFailed; +import io.swagger.v3.oas.annotations.Operation; /** - * represents a query builder to answer questions regarding birthplace and date using Wikidata + * represents a query builder to answer questions regarding birthplace and date + * using Wikidata *

- * requirements: expects a textual question to be stored in the Qanary triplestore, - * written in English language, as well as previously annotated named entities + * requirements: expects a textual question to be stored in the Qanary + * triplestore, written in English language, as well as previously annotated + * named entities *

- * outcome: if the question structure is supported and a previous component (NED/NER) has found - * named entities then this component constructs a Wikidata query that might be used to compute - * the answer to the question + * outcome: if the question structure is supported and a previous component + * (NED/NER) has found named entities then this component constructs a Wikidata + * query that might be used to compute the answer to the question */ @Component public class BirthDataQueryBuilder extends QanaryComponent { - private static final Logger logger = LoggerFactory.getLogger(BirthDataQueryBuilder.class); - - private static final String FILENAME_ANNOTATIONS = "/queries/getAnnotation.rq"; - private static final String FILENAME_ANNOTATIONS_FILTERED = "/queries/getAnnotationFilteredOnlyWikidata.rq"; - - private static final String FILENAME_WIKIDATA_BIRTHDATA_QUERY_PERSON = "/queries/getQuestionAnswerFromWikidataByPerson.rq"; - private static final String FILENAME_WIKIDATA_BIRTHDATA_QUERY_FIRST_AND_LASTNAME = "/queries/getQuestionAnswerFromWikidataByFirstnameLastname.rq"; - - private static final String FIRSTNAME_ANNOTATION = "FIRST_NAME"; - private static final String LASTNAME_ANNOTATION = "LAST_NAME"; - - private static final String GRAPH = "graph"; - private static final String VALUE = "value"; - - private final String applicationName; - - private QanaryUtils myQanaryUtils; - private QanaryQuestion myQanaryQuestion; - private String myQuestion; - - private final String[] supportedQuestionPatterns = { - "([Ww]here and when was )(.*)( born)", - "([Ww]here was )(.*)( born)", - "([Ww]hen was )(.*)( born)" - }; - - private int patternIndex; - - public BirthDataQueryBuilder(@Value("$P{spring.application.name}") final String applicationName) { - this.applicationName = applicationName; - // check if files exists and are not empty - QanaryTripleStoreConnector.guardNonEmptyFileFromResources(FILENAME_ANNOTATIONS); - QanaryTripleStoreConnector.guardNonEmptyFileFromResources(FILENAME_ANNOTATIONS_FILTERED); - QanaryTripleStoreConnector.guardNonEmptyFileFromResources(FILENAME_WIKIDATA_BIRTHDATA_QUERY_PERSON); - QanaryTripleStoreConnector.guardNonEmptyFileFromResources(FILENAME_WIKIDATA_BIRTHDATA_QUERY_FIRST_AND_LASTNAME); - } - - /** - * compare the question against regular expression(s) representing the supported format - * and if a match is found, store the matched pattern index - * - * @param questionString the textual question - */ - @Operation( - summary = "Check if the question is supported and store the matched pattern index", - operationId = "isQuestionSupported", - description = "Compare the question against regular expression(s) representing the supported format and if a match is found, store the matched pattern index" - ) - private boolean isQuestionSupported(String questionString) { - for (int i = 0; i < this.supportedQuestionPatterns.length; i++) { - String pattern = this.supportedQuestionPatterns[i]; - - Pattern p = Pattern.compile(pattern); - Matcher m = p.matcher(questionString); - logger.info("checking pattern \"{}\"", pattern); - if (m.find()) { - this.patternIndex = i; - return true; - } - } - - return false; - } - - /** - * Find the position of a name in the textual question. - * - * @param questionString the textual question - * @param pattern a regular expression (from supportedQuestionPatterns) - */ - @Operation( - summary = "Find the index of the entity in the question", - operationId = "getNamePosition", - description = "Find the position of a name in the textual question." // - + "The name is represented as a matched group within supportedQuestionPatterns." - ) - private int getNamePosition(String questionString, String pattern) { - Matcher m = Pattern.compile(pattern).matcher(questionString); - m.find(); - int index = m.start(2); - return index; - } - - private String loadQueryFromFile(String filenameWithRelativePath, QuerySolutionMap bindings) throws IOException { - return QanaryTripleStoreConnector.readFileFromResourcesWithMap(filenameWithRelativePath, bindings); - } - - /** - * standard method for processing a message from the central Qanary component - * - * @param myQanaryMessage - * @throws Exception - */ - @Operation( - summary = "Process a Qanary question with BirthDataQueryBuilder", // - operationId = "process", // - description = "Encapsulates the main functionality of this component. " // - + "Construct a Wikidata query to find birth date and place for named entities." - ) - @Override - public QanaryMessage process(QanaryMessage myQanaryMessage) throws Exception { - logger.info("process: {}", myQanaryMessage); - - // STEP 1: Get the required Data - // - // This example component requires the textual representation of the Question - // as well as annotations of Wikidata entities made by the OpenTapioca NED. - - // get the question as String - this.myQanaryUtils = this.getUtils(myQanaryMessage); - this.myQanaryQuestion = new QanaryQuestion<>(myQanaryMessage, myQanaryUtils.getQanaryTripleStoreConnector()); - this.myQuestion = myQanaryQuestion.getTextualRepresentation(); - - // This component is only supposed to answer a specific type of question. - // Therefore, we only need to continue if the question asks for birthplace and date or if there is an - // annotation of the first and lastname. - - - // Get the firstname annotation if it's annotated - QuerySolutionMap bindingsForFirstname = new QuerySolutionMap(); - bindingsForFirstname.add(GRAPH, ResourceFactory.createResource(myQanaryQuestion.getOutGraph().toASCIIString())); - bindingsForFirstname.add(VALUE, ResourceFactory.createStringLiteral(FIRSTNAME_ANNOTATION)); - - String sparqlCheckFirstname = this.loadQueryFromFile(FILENAME_ANNOTATIONS, bindingsForFirstname); - ResultSet resultsetFirstname = myQanaryUtils.getQanaryTripleStoreConnector().select(sparqlCheckFirstname); - - // Get the lastname annotation, if it's annotated - QuerySolutionMap bindingsForLastname = new QuerySolutionMap(); - // the currently used graph - bindingsForLastname.add(GRAPH, ResourceFactory.createResource(myQanaryQuestion.getOutGraph().toASCIIString())); - // annotated for the current question - bindingsForLastname.add(VALUE, ResourceFactory.createStringLiteral(LASTNAME_ANNOTATION)); - - String sparqlCheckLastname = this.loadQueryFromFile(FILENAME_ANNOTATIONS, bindingsForLastname); - ResultSet resultsetLastname = myQanaryUtils.getQanaryTripleStoreConnector().select(sparqlCheckLastname); - - - // STEP 2: Create queries for Wikidata if the question is supported or annotations are available - ArrayList queriesForAnnotation = new ArrayList<>(); - - if (resultsetFirstname.hasNext() && resultsetLastname.hasNext()) { - // In this example, we are only interested in Entities that were found from another component and - // annotated with the annotation "FIRST_NAME" and "LAST_NAME". - queriesForAnnotation = createQueriesForAnnotation(resultsetFirstname, resultsetLastname); - } else { - logger.info("no annotation for {} and {} found", FIRSTNAME_ANNOTATION, LASTNAME_ANNOTATION); - } - - if ((queriesForAnnotation.isEmpty() || queriesForAnnotation.get(0).isBlank()) && this.isQuestionSupported(myQuestion)) { - // In this example we are only interested in Entities that were found at a specific point - // in the question: e.g., 'when and where was born?'. - // Because we do not require entities that might have been found anywhere else in the - // question we can filter our results: - - int filterStart = this.getNamePosition(myQuestion, this.supportedQuestionPatterns[this.patternIndex]); - // formulate a query to find existing information - queriesForAnnotation = createQueriesForAnnotation(filterStart); - - } - - // If no query was created, we can stop here. - if (queriesForAnnotation.isEmpty() || queriesForAnnotation.get(0).isBlank() ) { - logger.warn("nothing to do here as question \"{}\" does not have the supported format", myQuestion); - return myQanaryMessage; - } - - - for (int i = 0; i < queriesForAnnotation.size(); i++) { - // store the created select query as an annotation for the current question - // define here the parameters for the SPARQL INSERT query - QuerySolutionMap bindings = new QuerySolutionMap(); - // use here the variable names defined in method insertAnnotationOfAnswerSPARQL - bindings.add(GRAPH, ResourceFactory.createResource(myQanaryQuestion.getOutGraph().toASCIIString())); - bindings.add("targetQuestion", ResourceFactory.createResource(myQanaryQuestion.getUri().toASCIIString())); - bindings.add("selectQueryThatShouldComputeTheAnswer", ResourceFactory.createStringLiteral(queriesForAnnotation.get(i))); - bindings.add("confidence", ResourceFactory.createTypedLiteral("1.0", XSDDatatype.XSDfloat)); // as it is rule based, a high confidence is expressed - bindings.add("application", ResourceFactory.createResource("urn:qanary:" + this.applicationName)); - - // get the template of the INSERT query - String insertDataIntoQanaryTriplestoreQuery = QanaryTripleStoreConnector.insertAnnotationOfAnswerSPARQL(bindings); - logger.info("SPARQL insert for adding data to Qanary triplestore: {}", insertDataIntoQanaryTriplestoreQuery); - - //STEP 4: Push the computed result to the Qanary triplestore - logger.info("store data in graph {} of Qanary triplestore endpoint {}", // - myQanaryMessage.getValues().get(myQanaryMessage.getOutGraph()), // - myQanaryMessage.getValues().get(myQanaryMessage.getEndpoint())); - myQanaryUtils.getQanaryTripleStoreConnector().update(insertDataIntoQanaryTriplestoreQuery); - } - - return myQanaryMessage; - } - - private ArrayList createQueriesForAnnotation(int filterStart) throws IOException, QanaryExceptionNoOrMultipleQuestions, URISyntaxException, SparqlQueryFailed { - QuerySolutionMap bindingsForAnnotation = new QuerySolutionMap(); - // the currently used graph - bindingsForAnnotation.add(GRAPH, ResourceFactory.createResource(myQanaryQuestion.getOutGraph().toASCIIString())); - // annotated for the current question - bindingsForAnnotation.add("source", ResourceFactory.createResource(myQanaryQuestion.getUri().toASCIIString())); - // only for relevant annotations - bindingsForAnnotation.add("filterStart", ResourceFactory.createTypedLiteral(String.valueOf(filterStart), XSDDatatype.XSDint)); - - String sparqlGetAnnotation = this.loadQueryFromFile(FILENAME_ANNOTATIONS_FILTERED, bindingsForAnnotation); - - // STEP 3: Compute SPARQL select queries that should produce the result for every identified entity - // - // Rather than computing a (textual) result this component provides a - // SPARQL query that might be used to answer the question. - // This query can the used by other components. - - // there might be multiple entities identified for one name - ResultSet resultset = myQanaryUtils.getQanaryTripleStoreConnector().select(sparqlGetAnnotation); - ArrayList queries = new ArrayList<>(); - while (resultset.hasNext()) { - QuerySolution tupel = resultset.next(); - RDFNode wikidataResource = tupel.get("wikidataResource"); - logger.info("creating query for resource: {}", wikidataResource); - String createdWikiDataQuery = createWikidataSparqlQuery(wikidataResource); - queries.add(createdWikiDataQuery); - } - - return queries; - } - - private ArrayList createQueriesForAnnotation(ResultSet resultsetFirstname, ResultSet resultsetLastname) throws IOException { - ArrayList firstnameStartsEnds = new ArrayList<>(); - ArrayList lastnameStartsEnds = new ArrayList<>(); - - while (resultsetFirstname.hasNext()) { - Integer[] startEnd = new Integer[2]; - QuerySolution tupel = resultsetFirstname.next(); - startEnd[0] = tupel.getLiteral("start").getInt(); - startEnd[1] = tupel.getLiteral("end").getInt(); - - firstnameStartsEnds.add(startEnd); - } - - while (resultsetLastname.hasNext()) { - Integer[] startEnd = new Integer[2]; - QuerySolution tupel = resultsetLastname.next(); - startEnd[0] = tupel.getLiteral("start").getInt(); - startEnd[1] = tupel.getLiteral("end").getInt(); - - lastnameStartsEnds.add(startEnd); - } - - ArrayList queries = new ArrayList<>(); - for (int i = 0; i < firstnameStartsEnds.size(); i++) { - String firstanme = ""; - String lastname = ""; - - - try { - firstanme = myQuestion.substring(firstnameStartsEnds.get(i)[0], firstnameStartsEnds.get(i)[1]); - lastname = myQuestion.substring(lastnameStartsEnds.get(i)[0], lastnameStartsEnds.get(i)[1]); - } catch (Exception e) { - logger.error("error while get first or lastname: {}", e.getMessage()); - break; - } - - logger.info("creating query for {} {}", firstanme, lastname); - - String createdWikiDataQuery = createWikidataSparqlQuery(firstanme, lastname); - queries.add(createdWikiDataQuery); - } - - return queries; - } - - public String createWikidataSparqlQuery(RDFNode wikidataResource) throws IOException { - // populate a generalized answer query with the specific entity (Wikidata ID) - QuerySolutionMap bindingsForWikidataResultQuery = new QuerySolutionMap(); - // set expected person as parameter for Wikidata query - bindingsForWikidataResultQuery.add("person", wikidataResource); - return this.loadQueryFromFile(FILENAME_WIKIDATA_BIRTHDATA_QUERY_PERSON, bindingsForWikidataResultQuery); - } - - public String createWikidataSparqlQuery(String firstname, String lastname) throws IOException { - // populate a generalized answer query with the specific entity (Wikidata ID) - QuerySolutionMap bindingsForWikidataResultQuery = new QuerySolutionMap(); - // set expected last and firstname as parameter for Wikidata query - bindingsForWikidataResultQuery.add("firstnameValue", ResourceFactory.createLangLiteral(firstname, "en")); - bindingsForWikidataResultQuery.add("lastnameValue", ResourceFactory.createLangLiteral(lastname, "en")); - return this.loadQueryFromFile(FILENAME_WIKIDATA_BIRTHDATA_QUERY_FIRST_AND_LASTNAME, bindingsForWikidataResultQuery); - } + private static final Logger logger = LoggerFactory.getLogger(BirthDataQueryBuilder.class); + + public static final String FILENAME_ANNOTATIONS = "/queries/getAnnotation.rq"; + public static final String FILENAME_ANNOTATIONS_NAMED_ENTITY_FILTERED_FOR_WIKIDATA = "/queries/getAnnotationOfNamedEntityLinkedToSpecificKnowledgeGraph.rq"; + + public static final String FILENAME_WIKIDATA_BIRTHDATA_QUERY_PERSON = "/queries/getQuestionAnswerFromWikidataByPerson.rq"; + public static final String FILENAME_WIKIDATA_BIRTHDATA_QUERY_FIRST_AND_LASTNAME = "/queries/getQuestionAnswerFromWikidataByFirstnameLastname.rq"; + + private static final String FIRSTNAME_ANNOTATION = "FIRST_NAME"; + private static final String LASTNAME_ANNOTATION = "LAST_NAME"; + + private static final String GRAPH = "graph"; + private static final String VALUE = "value"; + + private final String applicationName; + + private QanaryUtils myQanaryUtils; + private QanaryQuestion myQanaryQuestion; + private String myQuestion; + + private final String[] supportedQuestionPatterns = { "([Ww]here and when was )(.*)( born)", + "([Ww]here was )(.*)( born)", "([Ww]hen was )(.*)( born)" }; + + private int patternIndex; + + public BirthDataQueryBuilder(@Value("$P{spring.application.name}") final String applicationName) { + this.applicationName = applicationName; + // check if files exists and are not empty + QanaryTripleStoreConnector.guardNonEmptyFileFromResources(FILENAME_ANNOTATIONS); + QanaryTripleStoreConnector + .guardNonEmptyFileFromResources(FILENAME_ANNOTATIONS_NAMED_ENTITY_FILTERED_FOR_WIKIDATA); + QanaryTripleStoreConnector.guardNonEmptyFileFromResources(FILENAME_WIKIDATA_BIRTHDATA_QUERY_PERSON); + QanaryTripleStoreConnector.guardNonEmptyFileFromResources(FILENAME_WIKIDATA_BIRTHDATA_QUERY_FIRST_AND_LASTNAME); + } + + /** + * compare the question against regular expression(s) representing the supported + * format and if a match is found, store the matched pattern index + * + * @param questionString the textual question + */ + @Operation(summary = "Check if the question is supported and store the matched pattern index", operationId = "isQuestionSupported", description = "Compare the question against regular expression(s) representing the supported format and if a match is found, store the matched pattern index") + private boolean isQuestionSupported(String questionString) { + for (int i = 0; i < this.supportedQuestionPatterns.length; i++) { + String pattern = this.supportedQuestionPatterns[i]; + + Pattern p = Pattern.compile(pattern); + Matcher m = p.matcher(questionString); + logger.info("checking pattern \"{}\"", pattern); + if (m.find()) { + this.patternIndex = i; + return true; + } + } + + return false; + } + + /** + * Find the position of a name in the textual question. + * + * @param questionString the textual question + * @param pattern a regular expression (from supportedQuestionPatterns) + */ + @Operation(summary = "Find the index of the entity in the question", operationId = "getNamePosition", description = "Find the position of a name in the textual question." // + + "The name is represented as a matched group within supportedQuestionPatterns.") + private int getNamePosition(String questionString, String pattern) { + Matcher m = Pattern.compile(pattern).matcher(questionString); + m.find(); + int index = m.start(2); + return index; + } + + private String loadQueryFromFile(String filenameWithRelativePath, QuerySolutionMap bindings) throws IOException { + return QanaryTripleStoreConnector.readFileFromResourcesWithMap(filenameWithRelativePath, bindings); + } + + /** + * standard method for processing a message from the central Qanary component + * + * @param myQanaryMessage + * @throws Exception + */ + @Operation(summary = "Process a Qanary question with BirthDataQueryBuilder", // + operationId = "process", // + description = "Encapsulates the main functionality of this component. " // + + "Construct a Wikidata query to find birth date and place for named entities." // + + "The process can use the provided firstname and lastname or a named entity annotation.") + @Override + public QanaryMessage process(QanaryMessage myQanaryMessage) throws Exception { + logger.info("process: {}", myQanaryMessage); + + // This example component requires the textual representation of the Question + // as well as annotations of Wikidata entities made by the OpenTapioca NED. + + this.myQanaryUtils = this.getUtils(myQanaryMessage); + this.myQanaryQuestion = new QanaryQuestion<>(myQanaryMessage, myQanaryUtils.getQanaryTripleStoreConnector()); + this.myQuestion = myQanaryQuestion.getTextualRepresentation(); // get the question as String + + // STEP 1-3 have two options + + // first, try to use a named entity annotation because it is more precise if it + // works, then stop + myQanaryMessage = this.processForExistingNamedEntity(myQanaryMessage); + if (myQanaryMessage != null) { + logger.info("Found a named entity annotation. Processing finished."); + return myQanaryMessage; + } + +// // second, let's try to find a firstname and lastname, if that works we stop +// myQanaryMessage = this.processForFirstNameAndLastName(myQanaryMessage); +// if( myQanaryMessage != null ) { +// logger.info("Found firstname and lastname. Processing finished."); +// return myQanaryMessage; +// } + + logger.warn("Nothing could be done here."); + return myQanaryMessage; + } + + /** + * This process is only supposed to answer a specific type of question. + * Therefore, we only need to continue if the question asks for birthplace and + * date or if there is an annotation of the first and lastname. + * + * @param myQanaryMessage + * @return + * @throws Exception + */ + private QanaryMessage processForFirstNameAndLastName(QanaryMessage myQanaryMessage) throws Exception { + + // STEP 1: Get the required Data + // Get the firstname annotation if it's annotated + QuerySolutionMap bindingsForFirstname = new QuerySolutionMap(); + bindingsForFirstname.add(GRAPH, ResourceFactory.createResource(myQanaryQuestion.getOutGraph().toASCIIString())); + bindingsForFirstname.add(VALUE, ResourceFactory.createStringLiteral(FIRSTNAME_ANNOTATION)); + + String sparqlCheckFirstname = this.loadQueryFromFile(FILENAME_ANNOTATIONS, bindingsForFirstname); + ResultSet resultsetFirstname = myQanaryUtils.getQanaryTripleStoreConnector().select(sparqlCheckFirstname); + + // Get the lastname annotation if it's annotated + QuerySolutionMap bindingsForLastname = new QuerySolutionMap(); + // the currently used graph + bindingsForLastname.add(GRAPH, ResourceFactory.createResource(myQanaryQuestion.getOutGraph().toASCIIString())); + // annotated for the current question + bindingsForLastname.add(VALUE, ResourceFactory.createStringLiteral(LASTNAME_ANNOTATION)); + + String sparqlCheckLastname = this.loadQueryFromFile(FILENAME_ANNOTATIONS, bindingsForLastname); + ResultSet resultsetLastname = myQanaryUtils.getQanaryTripleStoreConnector().select(sparqlCheckLastname); + + // STEP 2: Create queries for Wikidata if the question is supported or + // annotations are available + ArrayList queriesForAnnotation = new ArrayList<>(); + + if (resultsetFirstname.hasNext() && resultsetLastname.hasNext()) { + // In this example, we are only interested in Entities that were found from + // another component and + // annotated with the annotation "FIRST_NAME" and "LAST_NAME". + queriesForAnnotation = createQueriesForAnnotation(resultsetFirstname, resultsetLastname); + } else { + logger.info("no annotation for {} and {} found", FIRSTNAME_ANNOTATION, LASTNAME_ANNOTATION); + } + + if ((queriesForAnnotation.isEmpty() || queriesForAnnotation.get(0).isBlank()) + && this.isQuestionSupported(myQuestion)) { + // In this example we are only interested in Entities that were found at a + // specific point + // in the question: e.g., 'when and where was born?'. + // Because we do not require entities that might have been found anywhere else + // in the + // question we can filter our results: + + int filterStart = this.getNamePosition(myQuestion, this.supportedQuestionPatterns[this.patternIndex]); + // formulate a query to find existing information + queriesForAnnotation = createQueriesForAnnotation(filterStart); + } + + // If no query was created, we can stop here. + if (queriesForAnnotation.isEmpty() || queriesForAnnotation.get(0).isBlank()) { + logger.warn("nothing to do here as question \"{}\" does not have the supported format; ", myQuestion, + resultsetFirstname); + return null; + } else { + for (int i = 0; i < queriesForAnnotation.size(); i++) { + // store the created select query as an annotation for the current question + // define here the parameters for the SPARQL INSERT query + QuerySolutionMap bindings = new QuerySolutionMap(); + // use here the variable names defined in method insertAnnotationOfAnswerSPARQL + bindings.add(GRAPH, ResourceFactory.createResource(myQanaryQuestion.getOutGraph().toASCIIString())); + bindings.add("targetQuestion", + ResourceFactory.createResource(myQanaryQuestion.getUri().toASCIIString())); + bindings.add("selectQueryThatShouldComputeTheAnswer", + ResourceFactory.createStringLiteral(queriesForAnnotation.get(i))); + bindings.add("confidence", ResourceFactory.createTypedLiteral("1.0", XSDDatatype.XSDfloat)); // as it is + // rule + // based, + // a + // high + // confidence + // is + // expressed + bindings.add("application", ResourceFactory.createResource("urn:qanary:" + this.applicationName)); + + // get the template of the INSERT query + String insertDataIntoQanaryTriplestoreQuery = QanaryTripleStoreConnector + .insertAnnotationOfAnswerSPARQL(bindings); + logger.info("SPARQL insert for adding data to Qanary triplestore: {}", + insertDataIntoQanaryTriplestoreQuery); + + // STEP 3: Push the computed result to the Qanary triplestore + logger.info("store data in graph {} of Qanary triplestore endpoint {}", // + myQanaryMessage.getValues().get(myQanaryMessage.getOutGraph()), // + myQanaryMessage.getValues().get(myQanaryMessage.getEndpoint())); + myQanaryUtils.getQanaryTripleStoreConnector().update(insertDataIntoQanaryTriplestoreQuery); + } + + return myQanaryMessage; + } + } + + private QanaryMessage processForExistingNamedEntity(QanaryMessage myQanaryMessage) + throws IOException, SparqlQueryFailed, QanaryExceptionNoOrMultipleQuestions, URISyntaxException { + + logger.info("Executing processForExistingNamedEntity."); + + String inGraph = myQanaryQuestion.getInGraph().toASCIIString(); + String outGraph = myQanaryQuestion.getOutGraph().toASCIIString(); + String myQuestionURI = myQanaryQuestion.getUri().toASCIIString(); + String endpoint = myQanaryMessage.getEndpoint().toASCIIString(); + + // STEP 1: Get Named Entity from the Qanary triplestore + int filterStart = 0; + if (this.isQuestionSupported(myQuestion)) { + // In this example we are only interested in Entities that were found at a + // specific point + // in the question: e.g., 'when and where was born?'. + filterStart = this.getNamePosition(myQuestion, this.supportedQuestionPatterns[this.patternIndex]); + } else { + // stop the processing + logger.warn("processForExistingNamedEntity: Stop here as the question pattern was not found in '{}'.", + this.myQuestion); + return null; + } + + QuerySolutionMap bindingsForAnnotationWithWikidataResource = new QuerySolutionMap(); + bindingsForAnnotationWithWikidataResource.add(GRAPH, ResourceFactory.createResource(inGraph)); + bindingsForAnnotationWithWikidataResource.add("regexForResourceFilter", + ResourceFactory.createPlainLiteral("^http://www.wikidata.org/entity/")); + bindingsForAnnotationWithWikidataResource.add("filterStart", + ResourceFactory.createTypedLiteral(String.valueOf(filterStart), XSDDatatype.XSDint)); + String sparqlNamedEntityAnnotation = this.loadQueryFromFile( + FILENAME_ANNOTATIONS_NAMED_ENTITY_FILTERED_FOR_WIKIDATA, bindingsForAnnotationWithWikidataResource); + + // find the resources that are annotated in the given question as there are + // possibly multiple resource, we store them in a map with the score + ResultSet resultset = myQanaryUtils.getQanaryTripleStoreConnector().select(sparqlNamedEntityAnnotation); + Map wikidataResources = new HashMap<>(); + while (resultset.hasNext()) { + QuerySolution tupel = resultset.next(); + String wikidataResource = tupel.get("wikidataResource").asResource().getURI(); + float score = tupel.get("annotationScore").asLiteral().getFloat(); + + // if the resource exists, then check if the score is higher OR no such key + // exists + if ((wikidataResources.containsKey(wikidataResource) && wikidataResources.get(wikidataResource) < score) + || (!wikidataResources.containsKey(wikidataResource))) { + wikidataResources.put(wikidataResource, score); + } + + } + logger.info("found entities: {}", wikidataResources); + if (wikidataResources.size() == 0) { + // stop the processing + logger.warn("processForExistingNamedEntity: Stop here as no Wikidata resources were found in the graph {}.", + inGraph); + return null; + } + + // STEP 2: compute SPARQL queries that can be used to retrieve the actual answer + ArrayList queriesCapableOfRetrievingTheAnswer = new ArrayList<>(); // queries for inserting annotation + // of AnswerSparql into the Qanary + // triplestore + for (String namedEntityResource : wikidataResources.keySet()) { + float score = wikidataResources.get(namedEntityResource); + + String answerRepresentedAsSparqlQuery = createWikidataSparqlQuery(namedEntityResource); + + QuerySolutionMap bindingsForInserting = new QuerySolutionMap(); + bindingsForInserting.add(GRAPH, ResourceFactory.createResource(outGraph)); + bindingsForInserting.add("targetQuestion", ResourceFactory.createResource(myQuestionURI)); + bindingsForInserting.add("selectQueryThatShouldComputeTheAnswer", + ResourceFactory.createStringLiteral(answerRepresentedAsSparqlQuery)); + // we take over the score of the named entity recognizer (NER+NED) + bindingsForInserting.add("confidence", + ResourceFactory.createTypedLiteral(String.valueOf(score), XSDDatatype.XSDfloat)); + bindingsForInserting.add("application", + ResourceFactory.createResource("urn:qanary:" + this.applicationName)); + + // get the template of the INSERT query to insert the new annotation into the + // Qanary triplestore + String insertDataIntoQanaryTriplestoreQuery = QanaryTripleStoreConnector + .insertAnnotationOfAnswerSPARQL(bindingsForInserting); + logger.info("created SPARQL INSERT query for adding data to Qanary triplestore: {}", + insertDataIntoQanaryTriplestoreQuery); + queriesCapableOfRetrievingTheAnswer.add(insertDataIntoQanaryTriplestoreQuery); + } + if (queriesCapableOfRetrievingTheAnswer.size() == 0) { + // stop the processing + logger.warn("processForExistingNamedEntity: Stop here as no queries were created (based on graph {}).", + inGraph); + return null; + } else { + logger.info( + "Created {} SPARQL queries that should be capable of retrieving the correct answer over Wikidata.", + queriesCapableOfRetrievingTheAnswer.size()); + } + + // STEP 3: store the created information in the Qanary triplestore as + // AnnotationfAnswerSPARQL + for (String query : queriesCapableOfRetrievingTheAnswer) { + logger.info("store data in graph {} of Qanary triplestore endpoint {}", outGraph, endpoint); + myQanaryUtils.getQanaryTripleStoreConnector().update(query); + } + + return myQanaryMessage; // everything done + } + + private ArrayList createQueriesForAnnotation(int filterStart) + throws IOException, QanaryExceptionNoOrMultipleQuestions, URISyntaxException, SparqlQueryFailed { + QuerySolutionMap bindingsForAnnotation = new QuerySolutionMap(); + // the currently used graph + bindingsForAnnotation.add(GRAPH, + ResourceFactory.createResource(myQanaryQuestion.getOutGraph().toASCIIString())); + // annotated for the current question + bindingsForAnnotation.add("source", ResourceFactory.createResource(myQanaryQuestion.getUri().toASCIIString())); + // only for relevant annotations filter by starting point + bindingsForAnnotation.add("filterStart", + ResourceFactory.createTypedLiteral(String.valueOf(filterStart), XSDDatatype.XSDint)); + // filter resources to get only the ones that are pointing to the Wikidata + // knowledge graph + bindingsForAnnotation.add("regexForResourceFilter", + ResourceFactory.createPlainLiteral("^http://www.wikidata.org/entity/")); + + String sparqlGetAnnotation = this.loadQueryFromFile(FILENAME_ANNOTATIONS_NAMED_ENTITY_FILTERED_FOR_WIKIDATA, + bindingsForAnnotation); + + // STEP 3: Compute SPARQL select queries that should produce the result for + // every identified entity + + // Rather than computing a (textual) result this component provides a SPARQL + // query that might be used to answer the question. This query can the used by + // other components. This query will be stored in the Qanary triplestore. + ResultSet resultset = myQanaryUtils.getQanaryTripleStoreConnector().select(sparqlGetAnnotation); + ArrayList queries = new ArrayList<>(); + while (resultset.hasNext()) { + QuerySolution tupel = resultset.next(); + RDFNode wikidataResource = tupel.get("wikidataResource"); + logger.info("creating query for resource: {}", wikidataResource); + String createdWikiDataQuery = createWikidataSparqlQuery(wikidataResource); + queries.add(createdWikiDataQuery); + } + + return queries; + } + + private ArrayList createQueriesForAnnotation(ResultSet resultsetFirstname, ResultSet resultsetLastname) + throws IOException { + ArrayList firstnameStartsEnds = new ArrayList<>(); + ArrayList lastnameStartsEnds = new ArrayList<>(); + + while (resultsetFirstname.hasNext()) { + Integer[] startEnd = new Integer[2]; + QuerySolution tupel = resultsetFirstname.next(); + startEnd[0] = tupel.getLiteral("start").getInt(); + startEnd[1] = tupel.getLiteral("end").getInt(); + + firstnameStartsEnds.add(startEnd); + } + + while (resultsetLastname.hasNext()) { + Integer[] startEnd = new Integer[2]; + QuerySolution tupel = resultsetLastname.next(); + startEnd[0] = tupel.getLiteral("start").getInt(); + startEnd[1] = tupel.getLiteral("end").getInt(); + + lastnameStartsEnds.add(startEnd); + } + + ArrayList queries = new ArrayList<>(); + for (int i = 0; i < firstnameStartsEnds.size(); i++) { + String firstanme = ""; + String lastname = ""; + + try { + firstanme = myQuestion.substring(firstnameStartsEnds.get(i)[0], firstnameStartsEnds.get(i)[1]); + lastname = myQuestion.substring(lastnameStartsEnds.get(i)[0], lastnameStartsEnds.get(i)[1]); + } catch (Exception e) { + logger.error("error while get first or lastname: {}", e.getMessage()); + break; + } + + logger.info("creating query for {} {}", firstanme, lastname); + + String createdWikiDataQuery = createWikidataSparqlQuery(firstanme, lastname); + queries.add(createdWikiDataQuery); + } + + return queries; + } + + public String createWikidataSparqlQuery(String wikidataResource) throws IOException { + return this.createWikidataSparqlQuery(ResourceFactory.createResource(wikidataResource)); + } + + public String createWikidataSparqlQuery(RDFNode wikidataResource) throws IOException { + // populate a generalized answer query with the specific entity (Wikidata ID) + QuerySolutionMap bindingsForWikidataResultQuery = new QuerySolutionMap(); + // set expected person as parameter for Wikidata query + bindingsForWikidataResultQuery.add("person", wikidataResource); + return this.loadQueryFromFile(FILENAME_WIKIDATA_BIRTHDATA_QUERY_PERSON, bindingsForWikidataResultQuery); + } + + public String createWikidataSparqlQuery(String firstname, String lastname) throws IOException { + // populate a generalized answer query with the specific entity (Wikidata ID) + QuerySolutionMap bindingsForWikidataResultQuery = new QuerySolutionMap(); + // set expected last and firstname as parameter for Wikidata query + bindingsForWikidataResultQuery.add("firstnameValue", ResourceFactory.createLangLiteral(firstname, "en")); + bindingsForWikidataResultQuery.add("lastnameValue", ResourceFactory.createLangLiteral(lastname, "en")); + return this.loadQueryFromFile(FILENAME_WIKIDATA_BIRTHDATA_QUERY_FIRST_AND_LASTNAME, + bindingsForWikidataResultQuery); + } } diff --git a/qanary-component-QB-BirthDataWikidata/src/main/resources/config/application.properties b/qanary-component-QB-BirthDataWikidata/src/main/resources/config/application.properties index 2213cf8f4..71b22b508 100644 --- a/qanary-component-QB-BirthDataWikidata/src/main/resources/config/application.properties +++ b/qanary-component-QB-BirthDataWikidata/src/main/resources/config/application.properties @@ -1,6 +1,6 @@ # Update the port number server.port=5555 -spring.application.name=BirthDataQueryBuilderWikidata +spring.application.name=QB-BirthDataWikidata spring.application.description=${spring.application.name} is a Qanary component for creating a Wikidata SPARQL query intended to find the birth place and date of people by firstname/lastname or Wikidata resource # Update the URL of the Qanary pipeline spring.boot.admin.url=http://localhost:8080 diff --git a/qanary-component-QB-BirthDataWikidata/src/main/resources/queries/getAnnotationOfNamedEntityLinkedToSpecificKnowledgeGraph.rq b/qanary-component-QB-BirthDataWikidata/src/main/resources/queries/getAnnotationOfNamedEntityLinkedToSpecificKnowledgeGraph.rq index 25a95bee2..9a2f5703a 100644 --- a/qanary-component-QB-BirthDataWikidata/src/main/resources/queries/getAnnotationOfNamedEntityLinkedToSpecificKnowledgeGraph.rq +++ b/qanary-component-QB-BirthDataWikidata/src/main/resources/queries/getAnnotationOfNamedEntityLinkedToSpecificKnowledgeGraph.rq @@ -1,3 +1,4 @@ +# retrieve an annotation pointing to Wikidata entity from a text segment (TextSelector) PREFIX dbr: PREFIX oa: PREFIX qa: @@ -7,15 +8,18 @@ SELECT * FROM ?graph WHERE { ?annotation oa:hasBody ?wikidataResource . - # accept only Wikidata resources as it is a Wikidata Query Builder component - FILTER( REGEX(?wikidataResource, "^http://www.wikidata.org/entity/", "i")) ?annotation qa:score ?annotationScore . ?annotation oa:hasTarget ?target . + ?annotation oa:annotatedBy ?annotator . ?target oa:hasSource ?source . ?target oa:hasSelector ?textSelector . ?textSelector rdf:type oa:TextPositionSelector . ?textSelector oa:start ?start . ?textSelector oa:end ?end . + + # filter named entities that have particular start index in the given question FILTER(?start = ?filterStart) . + # accept only Wikidata resources as it is a Wikidata Query Builder component + FILTER( REGEX(?wikidataResource, ?regexForResourceFilter, "i")) } ORDER BY DESC(?annotationScore) \ No newline at end of file diff --git a/qanary-component-QB-BirthDataWikidata/src/test/java/eu/wdaqua/component/birthdatawikidata/qb/QueryTest.java b/qanary-component-QB-BirthDataWikidata/src/test/java/eu/wdaqua/component/birthdatawikidata/qb/QueryTest.java index a62a9d386..b1d91f8e9 100644 --- a/qanary-component-QB-BirthDataWikidata/src/test/java/eu/wdaqua/component/birthdatawikidata/qb/QueryTest.java +++ b/qanary-component-QB-BirthDataWikidata/src/test/java/eu/wdaqua/component/birthdatawikidata/qb/QueryTest.java @@ -48,7 +48,7 @@ void filenameAnnotationsFilteredQueryTest() throws IOException { bindingsForAnnotation.add("filterStart", ResourceFactory.createTypedLiteral(String.valueOf(5), XSDDatatype.XSDint)); String sparqlGetAnnotation = QanaryTripleStoreConnector.readFileFromResourcesWithMap( - TestConfiguration.FILENAME_ANNOTATIONS_FILTERED, + TestConfiguration.FILENAME_ANNOTATIONS_NAMED_ENTITY_FILTERED_FOR_WIKIDATA, bindingsForAnnotation ); diff --git a/qanary-component-QB-BirthDataWikidata/src/test/java/eu/wdaqua/component/birthdatawikidata/qb/TestConfiguration.java b/qanary-component-QB-BirthDataWikidata/src/test/java/eu/wdaqua/component/birthdatawikidata/qb/TestConfiguration.java index d01e7ef20..7d5c2c456 100644 --- a/qanary-component-QB-BirthDataWikidata/src/test/java/eu/wdaqua/component/birthdatawikidata/qb/TestConfiguration.java +++ b/qanary-component-QB-BirthDataWikidata/src/test/java/eu/wdaqua/component/birthdatawikidata/qb/TestConfiguration.java @@ -4,12 +4,14 @@ import java.nio.file.Files; import java.nio.file.Paths; +import eu.wdaqua.component.qb.birthdata.wikidata.BirthDataQueryBuilder; + @org.springframework.boot.test.context.TestConfiguration public class TestConfiguration { - protected static final String FILENAME_ANNOTATIONS = "/queries/getAnnotation.rq"; - protected static final String FILENAME_ANNOTATIONS_FILTERED = "/queries/getAnnotationFiltered.rq"; - protected static final String FILENAME_WIKIDATA_BIRTHDATA_QUERY_PERSON = "/queries/getQuestionAnswerFromWikidataByPerson.rq"; - protected static final String FILENAME_WIKIDATA_BIRTHDATA_QUERY_FIRST_AND_LASTNAME = "/queries/getQuestionAnswerFromWikidataByFirstnameLastname.rq"; + protected static final String FILENAME_ANNOTATIONS = BirthDataQueryBuilder.FILENAME_ANNOTATIONS; + protected static final String FILENAME_ANNOTATIONS_NAMED_ENTITY_FILTERED_FOR_WIKIDATA = BirthDataQueryBuilder.FILENAME_ANNOTATIONS_NAMED_ENTITY_FILTERED_FOR_WIKIDATA; + protected static final String FILENAME_WIKIDATA_BIRTHDATA_QUERY_PERSON = BirthDataQueryBuilder.FILENAME_WIKIDATA_BIRTHDATA_QUERY_PERSON; + protected static final String FILENAME_WIKIDATA_BIRTHDATA_QUERY_FIRST_AND_LASTNAME = BirthDataQueryBuilder.FILENAME_WIKIDATA_BIRTHDATA_QUERY_FIRST_AND_LASTNAME; /** * get the defined SPARQL query and remove all control characters (like newline) diff --git a/qanary-component-QB-BirthDataWikidata/src/test/resources/queries/getAnnotationFilteredTest.rq b/qanary-component-QB-BirthDataWikidata/src/test/resources/queries/getAnnotationFilteredTest.rq index 8c93916eb..60e226c53 100644 --- a/qanary-component-QB-BirthDataWikidata/src/test/resources/queries/getAnnotationFilteredTest.rq +++ b/qanary-component-QB-BirthDataWikidata/src/test/resources/queries/getAnnotationFilteredTest.rq @@ -8,7 +8,8 @@ FROM WHERE { ?annotation oa:hasBody ?wikidataResource ; qa:score ?annotationScore ; - oa:hasTarget ?target . + oa:hasTarget ?target ; + oa:annotatedBy ?annotator . ?target oa:hasSource ; oa:hasSelector ?textSelector . ?textSelector @@ -16,5 +17,6 @@ WHERE oa:start ?start ; oa:end ?end FILTER ( ?start = "5"^^ ) + FILTER regex(?wikidataResource, ?regexForResourceFilter, "i") } ORDER BY DESC(?annotationScore)