diff --git a/README.md b/README.md index 4bee69f..feb2de5 100644 --- a/README.md +++ b/README.md @@ -8,39 +8,76 @@ The [pydelphin](https://pypi.org/project/PyDelphin/) and [rdflib](https://pypi.o DELPHIN-RDF presents the following dependencies: - `delphin.rdf`: the main module responsible for mrs/eds/dmrs parsing to RDF. -- `delphin.cli`: pydelphin standard cli path. Here are modules describing the delphin subcommands. See [delphin.cli](https://pydelphin.readthedocs.io/en/latest/api/delphin.cli.html) +- `delphin.cli`: pydelphin standard cli path. Here are modules describing the delphin subcommand. See [delphin.cli](https://pydelphin.readthedocs.io/en/latest/api/delphin.cli.html) Besides that, in `tests` there are use examples. In `doc` there are some references. In `vocabularies` there is the modelling of the semantic representations in RDF. ## Command Line Interface -All the modules in this package can be imported and used as a python module, but DELPHIN-RDF declares `profile-to-rdf` a delphin subcommands of the PyDelphin CLI. +All the modules in this package can be imported and used as a Python module, but DELPHIN-RDF declares `profile-to-rdf`, a delphin subcommand of the PyDelphin CLI. +This subcommand requires the path to the profile to serialize as argument. There are optional arguments, such as the format of the output (`-f`), the representation to serialize (`--to`), the prefix of the URIs in the RDF (`-p`) and the name of the output file (`-o`). -To use the function of the transformation as a python module, it's only needed to import `delphin.rdf`, which exports three main functions: `mrs_to_rdf`, `dmrs_to_rdf` and `eds_to_rdf`. For example, to serialize a profile to DMRS-RDF, we can do +## Python module + +To use the function of the transformation as a Python module, it's only needed to import `delphin.rdf`, which exports three main functions: `mrs_to_rdf`, `dmrs_to_rdf` and `eds_to_rdf`. They operate on [IO Memory](https://rdflib.readthedocs.io/en/stable/_modules/rdflib/plugins/memory.html#IOMemory) RDFLib object, creating named graphs for each instance in the context of this optimizd RDFLib store. For example, to serialize a profile to DMRS-RDF, we can do ```python import delphin.rdf as drdf from delphin import itsdb from delphin import tsql from delphin.dmrs import from_mrs as dmrs_from_mrs from delphin.codecs.simpledmrs import decode -from rdflib import Graph +from rdflib import plugin +from rdflib.graph import Graph, ConjunctiveGraph +from rdflib.store import Store +from rdflib.term import BNode +from rdflib import URIRef +from rdflib.store import Store +from rdflib import RDF, RDFS +from rdflib import Namespace, Literal path_to_profile = "./erg/trunk/tsdb/gold/mrs" ts = itsdb.TestSuite(path_to_profile) -graph = Graph() +store = plugin.get("IOMemory", Store)() +ERG = Namespace("http://www.delph-in.net/schema/erg#") +DELPH = Namespace("http://www.delph-in.net/schema/") +POS = Namespace("http://www.delph-in.net/schema/pos#") +DMRS = Namespace("http://www.delph-in.net/schema/dmrs#") +store.bind("erg", ERG) +store.bind("delph", DELPH) +store.bind("pos", POS) +store.bind("dmrs", DMRS) +prefix = "http://example.com" +PROFILE = URIRef(prefix) +defaultGraph = Graph(store, identifier=BNode()) +defaultGraph.add((PROFILE, RDF.type, DELPH.Profile)) + for (parse_id, result_id, text, mrs_string) in tsql.select('parse-id result-id i-input mrs', ts): obj = dmrs_from_mrs(decode(mrs_string)) - graph = drdf.dmrs_to_rdf(obj, - identifier=[str(parse_id), str(result_id)], - graph=graph, - text=text) + ITEM = URIRef(f"{prefix}/{parse_id}") + RESULT = URIRef(f"{prefix}/{parse_id}/{result_id}") + DMRSI = URIRef(f"{prefix}/{parse_id}/{result_id}/dmrs") + + defaultGraph.add((ITEM, RDF.type, DELPH.Item)) + defaultGraph.add((RESULT, RDF.type, DELPH.Result)) + defaultGraph.add((MRSI, RDF.type, DMRS.DMRS)) + + defaultGraph.add((ITEM, DELPH.hasText, Literal(text))) + + defaultGraph.add((PROFILE, DELPH.hasItem, ITEM)) + defaultGraph.add((ITEM, DELPH.hasResult, RESULT)) + defaultGraph.add((RESULT, DELPH.hasDMRS, DMRSI)) + + drdf.dmrs_to_rdf(dmrs_from_mrs(simplemrs.decode(mrs_string)), + DMRSI, + store, + defaultGraph) #inplace, change store and defaultGraph -graph.serialize("./dmrs-erg-gold.nt", format="nt") +ConjunctiveGraph(store).serialize("./dmrs-erg-gold.nq", format="nquads") ``` ## Development -One may be able to install delphin-rdf in developer mode, running +One may be able to install delphin-rdf in developer mode cloning this repo and running ```bash $ pip install -e /path/to/delphin-rdf ``` diff --git a/delphin/cli/profile_to_rdf.py b/delphin/cli/profile_to_rdf.py index 754c73c..a73af5b 100644 --- a/delphin/cli/profile_to_rdf.py +++ b/delphin/cli/profile_to_rdf.py @@ -1,5 +1,6 @@ """ -Transcribes a profile intro a RDF graph. +Transcribes a profile into a RDF graph. +It creates named graphs, but it has support to creating files only with triples like ntriples or turtle. For more details, see: {https://github.com/own-pt/delphin-rdf}. """ @@ -29,20 +30,36 @@ from delphin import itsdb from delphin import tsql -from rdflib import Graph +from rdflib.graph import Graph, ConjunctiveGraph from rdflib.term import _is_valid_uri +from rdflib.store import Store +from rdflib import Namespace +from rdflib import plugin +from rdflib.term import BNode +from rdflib import URIRef +from rdflib import Literal +from rdflib import RDF +from rdflib import RDFS + +ERG = Namespace("http://www.delph-in.net/schema/erg#") +DELPH = Namespace("http://www.delph-in.net/schema/") +POS = Namespace("http://www.delph-in.net/schema/pos#") # interface function def __cli_parse__(args): # remove the not well formed sentences? add option? # print MRS or parse to DMRS format? - graph = Graph() path = args.profile prefix = args.prefix.strip("/") semrep = args.semrep.lower() parser = None - + # Setting verbosity; need to figure a better solution. + if args.verbosity == 1: + logger.setLevel(20) + elif args.verbosity >= 2: + logger.setLevel(10) + try: # validates path if not isdir(path): @@ -53,21 +70,33 @@ def __cli_parse__(args): # validates URI prefix if not _is_valid_uri(prefix): raise Exception(f'Invalid URI: {prefix}') - # validate format and get parsers - to_rdf, from_mrs = _get_parsers(semrep) + # validate format and get converter + to_rdf, from_mrs = _get_converters(semrep) # open Test Suite and start conversion ts = itsdb.TestSuite(path) # logger.info(f"Converting {len(ts['result'])} analysis of {len(ts['item'])} sentences from {args.profile}") - logger.log(30,f"Converting {len(ts['result'])} analysis of {len(ts['item'])} sentences from {args.profile}") + logger.info(f"Converting {len(ts['result'])} analysis of {len(ts['item'])} sentences from {args.profile}") + + # Creating the store and the default graph + store = plugin.get("IOMemory", Store)() + defaultGraph = Graph(store, identifier=BNode()) + PROFILE = URIRef(f"{prefix}") # review later + defaultGraph.add((PROFILE, RDF.type, DELPH.Profile)) + semrepURI, prof_semrep_relation = _get_RDF_semrep(semrep, store) + store.bind("erg", ERG) + store.bind("delph", DELPH) + store.bind("pos", POS) + # store.bind("upref", prefix) # may be useful # The tsql takes some time to be processed: # logger.info(f"Loading the profile") - logger.log(30,f"Loading the profile") + logger.info(f"Loading the profile") profile_data = tsql.select('parse-id result-id i-input mrs', ts) - logger.log(30,f"Converting the profile") + logger.info(f"Converting the profile") + # Iterating over the results: for (parse_id, result_id, text, mrs_string) in profile_data: - logger.info(f"Converting the result {result_id} of sentence {parse_id}") + logger.debug(f"Converting the result {result_id} of sentence {parse_id}") m = simplemrs.decode(mrs_string) # making sure of the well formedness of "m" @@ -77,19 +106,36 @@ def __cli_parse__(args): # converting the MRS object to the representation intended to be converted obj = from_mrs(m) - logger.debug(f"Result {result_id} of item {parse_id}: \n\t{text}\n\t{obj}\n\t{mrs_string}") + # logger.debug(f"Result {result_id} of item {parse_id}: \n\t{text}\n\t{obj}\n\t{mrs_string}") - graph = to_rdf( - obj, - prefix=prefix, - identifier=[str(parse_id), str(result_id)], - graph=graph, - text=text) + # Creating URIs for relevant resources. + ITEM = URIRef(f"{prefix}/{parse_id}") # The item part may be redundant, maybe iterate before the itens + RESULT = URIRef(f"{prefix}/{parse_id}/{result_id}") + SEMREPI = URIRef(f"{prefix}/{parse_id}/{result_id}/{semrep}") + + # adding types: + defaultGraph.add((ITEM, RDF.type, DELPH.Item)) + defaultGraph.add((RESULT, RDF.type, DELPH.Result)) + defaultGraph.add((SEMREPI, RDF.type, semrepURI)) + + # Associating text to item: + defaultGraph.add((ITEM, DELPH.hasText, Literal(text))) + + # Linking those nodes: + defaultGraph.add((PROFILE, DELPH.hasItem, ITEM)) + defaultGraph.add((ITEM, DELPH.hasResult, RESULT)) + defaultGraph.add((RESULT, prof_semrep_relation, SEMREPI)) + + to_rdf( + obj, + SEMREPI, + store, + defaultGraph) # serializes results - logger.log(30,f"Serializing results to {args.output}") - graph.serialize(destination=args.output, format=args.format) - logger.log(30,f"DONE") + logger.info(f"Serializing results to {args.output}") + ConjunctiveGraph(store).serialize(destination=args.output, format=args.format) + logger.info(f"DONE") # except PyDelphinSyntaxError as e: # logger.exception(e) @@ -100,19 +146,41 @@ def __cli_parse__(args): except Exception as e: logger.error(e) -def _get_parsers(semrep): +def _get_converters(semrep): + """ + This function gives us the conversor from MRS to a specific 'semrep'. + It returns a conversor function of delphin.rdf from this 'semrep' to RDF and + a function that converts PyDelphin MRS object to the specific semantic representation. + """ logger.info(f"Getting parsers for representation: {semrep}") - if semrep == "mrs": logger.info("No conversion necessary") return mrs_to_rdf, lambda x: x - if semrep == "eds": + elif semrep == "eds": return eds_to_rdf, eds_from_mrs - if semrep == "dmrs": + elif semrep == "dmrs": return dmrs_to_rdf, dmrs_from_mrs raise PyDelphinException(f"Not a valid format: {semrep}") +def _get_RDF_semrep(semrep, store): + """ + This function binds the prefix of the semantic representation to the RDF store and returns + RDFLib objects that are relevant for the conversion + """ + if semrep == "mrs": + MRS = Namespace("http://www.delph-in.net/schema/mrs#") + store.bind("mrs", MRS) + return MRS.MRS, DELPH.hasMRS + elif semrep == "eds": + EDS = Namespace("http://www.delph-in.net/schema/eds#") + store.bind("eds",EDS) + return EDS.EDS, DELPH.hasEDS + elif semrep == "dmrs": + DMRS = Namespace("http://www.delph-in.net/schema/dmrs#") + store.bind("dmrs", DMRS) + return DMRS.DMRS, DELPH.hasDMRS + # sets parser and interface function parser = argparse.ArgumentParser(add_help=False) parser.set_defaults(func=__cli_parse__) @@ -120,7 +188,7 @@ def _get_parsers(semrep): # sets the command infos COMMAND_INFO = { 'name': 'profile-to-rdf', # Required - 'help': 'delphin profile to rdf', # Optional + 'help': 'incr tsdb test suite to rdf', # Optional 'description': __doc__, # Optional 'parser': parser, # Required } @@ -130,7 +198,7 @@ def _get_parsers(semrep): "profile", help="profile path") -_default_prefix = "http://example.com/example" +_default_prefix = "http://example.com/" parser.add_argument( "-p", # "--prefix", @@ -138,7 +206,7 @@ def _get_parsers(semrep): help=f"URI prefix (default: {_default_prefix})", default=_default_prefix) -_default_output = "output.ttl" +_default_output = "output.nq" parser.add_argument( "-o", # "--output", @@ -146,7 +214,7 @@ def _get_parsers(semrep): help=f"output file name (default: {_default_output})", default=_default_output) -_defaut_format = "turtle" +_defaut_format = "nquads" parser.add_argument( "-f", # "--format", @@ -159,5 +227,5 @@ def _get_parsers(semrep): # "-t", "--to", dest="semrep", - help=f"(mrs|eds|dmrs) modeled semantic representation (default: {_default_delphin})", + help=f"(mrs|dmrs|eds) semantic representation to serialize (default: {_default_delphin})", default=_default_delphin) diff --git a/delphin/rdf/__about__.py b/delphin/rdf/__about__.py index 368b24f..c0e832f 100644 --- a/delphin/rdf/__about__.py +++ b/delphin/rdf/__about__.py @@ -5,7 +5,7 @@ __name__ = "Delphin RDF" __summary__ = "DELPH-IN formats in RDF" -__version__ = "1.0.1" +__version__ = "1.0.3" __author__ = "foo" __email__ = "foo" diff --git a/delphin/rdf/_dmrs_parser.py b/delphin/rdf/_dmrs_parser.py index 132f599..db41a5f 100644 --- a/delphin/rdf/_dmrs_parser.py +++ b/delphin/rdf/_dmrs_parser.py @@ -1,12 +1,17 @@ -from rdflib import Graph +from rdflib.graph import Graph from rdflib import Literal from rdflib import RDF from rdflib import RDFS from rdflib import URIRef from rdflib import Namespace +from rdflib import plugin +from rdflib.store import Store +from rdflib.term import BNode +import rdflib -import delphin -from delphin import dmrs +import delphin.dmrs +import delphin.variable +import delphin.predicate # some useful namespaces DMRS = Namespace("http://www.delph-in.net/schema/dmrs#") @@ -14,170 +19,152 @@ DELPH = Namespace("http://www.delph-in.net/schema/") POS = Namespace("http://www.delph-in.net/schema/pos#") -def __nodes_to_rdf__(d, graph, dmrsi, NODES, namespace): +def dmrs_to_rdf(d:delphin.dmrs._dmrs.DMRS, + DMRSI: rdflib.term.URIRef, + store:rdflib.plugins.memory.IOMemory=plugin.get("IOMemory", Store)(), + defaultGraph:rdflib.graph.Graph=None) -> rdflib.plugins.memory.IOMemory: """ - Creates nodes of variables and nodes specifying their properties. + Takes a PyDelphin DMRS object "d" and serializes it into a named RDF graph inside a store. + + Args: + d: a PyDelphin DMRS instance to be converted into RDF format + DMRSI: URI of the DMRS instance being converted + store: RDFLib IOMemory store to add the graphs. + defaultGraph : the default graph of the store. If not given, creates one from the 'store'. + Inplace function that alters the store with the serialized DMRS and return the store as well. + """ + + # Making the arguments behave well: + if defaultGraph is None: + defaultGraph = Graph(store, identifier=BNode()) + + if defaultGraph.store != store: # Bad function input + defaultGraph = Graph(store, identifier=BNode()) + print("'defaultGraph' argument not consistent with the 'store' argument. The argument was discarded") + + # DMRS graph: + dmrsGraph = Graph(store, identifier=DMRSI) + + # Creating the prefix of the DMRS elements and relevant namespaces + insprefix = Namespace(DMRSI + '#') + NODES = Namespace(insprefix + "node-") + LINKS = Namespace(insprefix + "link-") + PREDS = Namespace(insprefix + "predicate-") + SORTINFO = Namespace(insprefix + "sortinfo-") - d - a delphin dmrs instance to be parsed into RDF format. + defaultGraph.add((DMRSI, RDF.type, DMRS.DMRS)) - dmrsi - the dmrs iri of the specific instance being parsed. + # Adding top and index + dmrsGraph.add((DMRSI, DELPH['hasTop'], NODES[d.top])) + dmrsGraph.add((DMRSI, DELPH['hasIndex'], NODES[d.index])) + + # creating the prefixes of the output + # graph.bind("dmrs", DMRS) + # graph.bind("delph", DELPH) + # graph.bind("erg", ERG) + # graph.bind("pos", POS) + + # Populating the graphs + __nodes_to_rdf__(d, dmrsGraph, defaultGraph, DMRSI, NODES, PREDS, SORTINFO) + __links_to_rdf__(d, dmrsGraph, defaultGraph, DMRSI, LINKS, NODES) - graph - and rdflib graph that is used to store the DMRS as RDF - representation. + return store - NODES - the URI namespace dedicated to nodes. - namespace - the string namespace of a result of the profile. +def __nodes_to_rdf__(d, dmrsGraph, defaultGraph, DMRSI, NODES, PREDS, SORTINFO): + """ + Creates in the graphs the nodes of DMRS predications and their properties. + + Args: + d: a PyDelphin DMRS instance to be converted into RDF format + dmrsGraph: rdflib Graph of a Store of graphs where the DMRS triples will be put. + defaultGraph: the default graph of the Store with the dmrsGraph + DMRSI: the node of the DMRS instance being converted + NODES: the URI namespace dedicated to DMRS predications + PREDS: the URI namespace dedicated to predicates + SORTINFO: the URI namespace dedicated to the sortinfo (morphosemantic information). """ for i in range(len(d.nodes)): node = d.nodes[i] - nodeIRI = NODES["{}".format(node.id)] #era i, mas não da pra fazer link assim. Rever. - nodePredIRI = URIRef(f"{namespace}predicate-{node.id}") - nodeSortInfoIRI = URIRef(f"{namespace}sortinfo-{node.id}") - - #putting it's id - graph.add((nodeIRI, DMRS.hasId, Literal(node.id))) - graph.add((nodeIRI, RDFS.label, Literal(f"{node.predicate}<{node.cfrom},{node.cto}>"))) + nodeURI = NODES[f"{node.id}"] #era i, mas não da pra fazer link assim. Rever. + predURI = PREDS[f"{node.id}"] + sortinfoURI = SORTINFO[f"{node.id}"] - #Instantiate the Node and putting into the DMRS - graph.add((nodeIRI, RDF.type, DMRS.Node)) - graph.add((dmrsi, DMRS.hasNode, nodeIRI)) - - #typing the predicate and associating with the node - splittedPredicate = delphin.predicate.split(delphin.predicate.normalize(node.predicate)) + dmrsGraph.add((nodeURI, RDF.type, DMRS.Node)) + dmrsGraph.add((sortinfoURI, RDF.type, DELPH.SortInfo)) + + # Information about the DMRS node + dmrsGraph.add((DMRSI, DMRS.hasNode, nodeURI)) + dmrsGraph.add((nodeURI, DELPH.hasPredicate, predURI)) + dmrsGraph.add((nodeURI, DELPH.hasSortInfo, sortinfoURI)) + dmrsGraph.add((nodeURI, DMRS.hasId, Literal(node.id))) # review later if this is useful + dmrsGraph.add((nodeURI, RDFS.label, Literal(f"{delphin.predicate.normalize(node.predicate)}<{node.cfrom},{node.cto}>"))) + #type: + if node.type is not None: + #graph.add((nodeIRI, DMRS.cvarsort, DELPH[node.type])) + dmrsGraph.add((nodeURI, RDF.type, DELPH[node.type])) + + # Information about the predicate + dmrsGraph.add((predURI, DELPH.predText, Literal(delphin.predicate.normalize(node.predicate)))) if delphin.predicate.is_surface(node.predicate): - graph.add((nodePredIRI, RDF.type, DELPH.SurfacePredicate)) + dmrsGraph.add((predURI, RDF.type, DELPH.SurfacePredicate)) elif delphin.predicate.is_abstract(node.predicate): - graph.add((nodePredIRI, RDF.type, DELPH.AbstractPredicate)) + dmrsGraph.add((predURI, RDF.type, DELPH.AbstractPredicate)) else: - graph.add((nodePredIRI, RDF.type, DELPH.Predicate)) - print("An invalid predicate") + dmrsGraph.add((predURI, RDF.type, DELPH.Predicate)) + print(f"{node.predicate} is an invalid predicate.") + splittedPredicate = delphin.predicate.split(delphin.predicate.normalize(node.predicate)) if splittedPredicate[0] is not None: - graph.add((nodePredIRI, DELPH.hasLemma, Literal(splittedPredicate[0]))) - + dmrsGraph.add((predURI, DELPH.hasLemma, Literal(splittedPredicate[0]))) if splittedPredicate[1] is not None: - graph.add((nodePredIRI, DELPH.hasPos, POS[splittedPredicate[1]])) + dmrsGraph.add((predURI, DELPH.hasPos, POS[splittedPredicate[1]])) if splittedPredicate[2] is not None: - graph.add((nodePredIRI, DELPH.hasSense, Literal(splittedPredicate[2]))) + dmrsGraph.add((predURI, DELPH.hasSense, Literal(splittedPredicate[2]))) - graph.add((nodeIRI, DELPH.hasPredicate, nodePredIRI)) - graph.add((nodePredIRI, DELPH.predText, Literal(delphin.predicate.normalize(node.predicate)))) - # lnk if node.cfrom is not None: - graph.add((nodeIRI, DELPH.cfrom, Literal(node.cfrom))) + dmrsGraph.add((nodeURI, DELPH.cfrom, Literal(node.cfrom))) if node.cto is not None: - graph.add((nodeIRI, DELPH.cto, Literal(node.cto))) + dmrsGraph.add((nodeURI, DELPH.cto, Literal(node.cto))) - #properties / sortinfo - graph.add((nodeIRI, DELPH.hasSortInfo, nodeSortInfoIRI)) - graph.add((nodeSortInfoIRI, RDF.type, DELPH.SortInfo)) + # properties / sortinfo for prop, val in node.properties.items(): - graph.add((nodeSortInfoIRI, ERG[prop.lower()], Literal(val.lower()))) - - #type: - if node.type is not None: - #graph.add((nodeIRI, DMRS.cvarsort, DELPH[node.type])) - graph.add((nodeIRI, RDF.type, DELPH[node.type])) - - # carg - if node.carg is not None: - graph.add((nodeSortInfoIRI, DELPH.carg, Literal(node.carg))) + dmrsGraph.add((sortinfoURI, ERG[prop.lower()], Literal(val.lower()))) + # carg; review later + if node.carg is not None: + dmrsGraph.add((nodeURI, DELPH.carg, Literal(node.carg))) -def __links_to_rdf__(d, graph, dmrsi, NODES, LINKS): +def __links_to_rdf__(d, dmrsGraph, defaultGraph, DMRSI, LINKS, NODES): """ - Creates the links of a DMRS in the RDF graph. - - d - a delphin dmrs instance to be parsed into RDF format. - - dmrsi - the dmrs iri of the specific instance being parsed. - - graph - and rdflib graph that is used to store the DMRS as RDF - representation. - - NODES - the URI namespace dedicated to nodes. - - LINKS - the URI namespace dedicated to links. + Creates in the graphs the nodes of DMRS links and their properties. + + Args: + d: a PyDelphin DMRS instance to be converted into RDF format + dmrsGraph: rdflib Graph of a Store of graphs where the DMRS triples will be put. + defaultGraph: the default graph of the Store with the dmrsGraph + DMRSI: the node of the DMRS instance being converted + LINKS: the URI namespace dedicated to DMRS links. + NODES: the URI namespace dedicated to DMRS predications """ for i in range(len(d.links)): link = d.links[i] - linkIRI = LINKS["{}".format(i)] + linkURI = LINKS[f"{i}"] - # declaring the link node - graph.add((linkIRI, RDF.type, DMRS.Link)) - graph.add((linkIRI, RDFS.label, Literal("{}/{}".format(link.role,link.post)))) - graph.add((dmrsi, DMRS.hasLink, linkIRI)) + dmrsGraph.add((DMRSI, DMRS.hasLink, linkURI)) + dmrsGraph.add((linkURI, RDF.type, DMRS.Link)) + dmrsGraph.add((linkURI, RDFS.label, Literal(f"{link.role}/{link.post}"))) # the directions - graph.add((linkIRI, DMRS.hasFrom, NODES[f"{link.start}"])) - graph.add((linkIRI, DMRS.hasTo, NODES[f"{link.end}"])) + dmrsGraph.add((linkURI, DMRS.hasFrom, NODES[f"{link.start}"])) + dmrsGraph.add((linkURI, DMRS.hasTo, NODES[f"{link.end}"])) # adding roles and posts and creating (just to make sure, maybe remove the last one) - graph.add((linkIRI, DMRS.hasRole, DMRS[link.role.lower()])) - graph.add((linkIRI, DMRS.hasScopalRelation, DMRS[link.post.lower()])) - graph.add((DMRS[link.post.lower()], RDF.type, DMRS.ScopalRelation)) - graph.add((DMRS[link.role.lower()], RDF.type, DMRS.Role)) - - -def dmrs_to_rdf(d, prefix: str, identifier, iname="dmrs", graph=None, out=None, text=None, format="turtle"): - """ - Parses a pydelphin DMRS into RDF representation. - - d - a delphin DMRS instance to be parsed into RDF format. + dmrsGraph.add((linkURI, DMRS.hasRole, DMRS[link.role.lower()])) + dmrsGraph.add((linkURI, DMRS.hasScopalRelation, DMRS[link.post.lower()])) + dmrsGraph.add((DMRS[link.post.lower()], RDF.type, DMRS.ScopalRelation)) + dmrsGraph.add((DMRS[link.role.lower()], RDF.type, DMRS.Role)) - prefix - the IRI to be prefixed to the RDF formated DMRS. - identifier - an string or a list of strings identifying - the DMRS. It should be unique, possibly using a composite - identifier, given in list. - For instance one may use it as [textid, dmrs-id] if the - same text admits various mrs interpretations. - - iname - the dmrs instance name (the dmrs as RDF node name) - to be used. As default, it is "dmrs". - - graph - and rdflib graph. If given, uses it to store the - dmrs as RDF representation. - - out - filename to serialize the output into. - - text - the text that is represented in dmrs as RDF. - - format - file format to serialize the output into. - """ - - if graph is None: graph = Graph() - if type(identifier) == list: - identifier = "/".join(identifier) - - namespace = prefix + "/" + identifier + "#" - - #creating the instance URI and the namespaces - dmrsi = URIRef(namespace + iname) - graph.add((dmrsi, RDF.type, DMRS.DMRS)) - NODES = Namespace(namespace + "node-") - LINKS = Namespace(namespace + "link-") - - #creating the prefixes of the output - graph.bind("dmrs", DMRS) - graph.bind("delph", DELPH) - graph.bind("erg", ERG) - graph.bind("pos", POS) - - #Creating RDF triples - __nodes_to_rdf__(d, graph, dmrsi, NODES, namespace) - #Adding top - graph.add((dmrsi, DMRS['hasTop'], NODES["{}".format(d.top)])) - #Adding index - graph.add((dmrsi, DMRS['hasIndex'], NODES["{}".format(d.index)])) - __links_to_rdf__(d, graph, dmrsi, NODES, LINKS) - - # add text as one graph node if it's given - if text is not None: graph.add((dmrsi, DELPH.text, Literal(text))) - # serializes graph if given an output file - if out is not None: graph.serialize(destination=out, format=format) - - return graph diff --git a/delphin/rdf/_eds_parser.py b/delphin/rdf/_eds_parser.py index 276476c..529433a 100644 --- a/delphin/rdf/_eds_parser.py +++ b/delphin/rdf/_eds_parser.py @@ -1,12 +1,17 @@ -from rdflib import Graph +from rdflib.graph import Graph from rdflib import Literal from rdflib import RDF from rdflib import RDFS from rdflib import URIRef from rdflib import Namespace +from rdflib import plugin +from rdflib.store import Store +from rdflib.term import BNode +import rdflib -import delphin -from delphin import eds +import delphin.eds +import delphin.variable +import delphin.predicate # some useful namespaces EDS = Namespace("http://www.delph-in.net/schema/eds#") @@ -14,140 +19,130 @@ DELPH = Namespace("http://www.delph-in.net/schema/") POS = Namespace("http://www.delph-in.net/schema/pos#") -def __nodes_to_rdf__(e, graph, edsi, NODES, namespace): +def eds_to_rdf(e:delphin.eds._eds.EDS, + EDSI: rdflib.term.URIRef, + store:rdflib.plugins.memory.IOMemory=plugin.get("IOMemory", Store)(), + defaultGraph:rdflib.graph.Graph=None) -> rdflib.plugins.memory.IOMemory: """ - Creates nodes of variables and nodes specifying their properties. - - e - a delphin eds instance to be parsed into RDF format. + Takes a PyDelphin EDS object "e" and serializes it into a named RDF graph inside a store. - graph - and rdflib graph that is used to store the EDS as RDF - representation. + Args: + e: a PyDelphin EDS instance to be converted into RDF format + EDSI: URI of the EDS instance being converted + store: RDFLib IOMemory store to add the graphs. + defaultGraph : the default graph of the store. If not given, creates one from the 'store'. + Inplace function that alters the store with the serialized EDS and return the store as well. + """ + # Before running this, use delphin.eds.make_ids_unique(e, m) if possible + + # Making the arguments behave well: + if defaultGraph is None: + defaultGraph = Graph(store, identifier=BNode()) + + if defaultGraph.store != store: # Bad function input + defaultGraph = Graph(store, identifier=BNode()) + print("'defaultGraph' argument not consistent with the 'store' argument. The argument was discarded") + + # EDS graph: + edsGraph = Graph(store, identifier=EDSI) + + # Creating the prefix of the EDSI elements and relevant namespaces + insprefix = Namespace(EDSI + '#') + NODES = Namespace(insprefix + "node-") + PREDS = Namespace(insprefix + "predicate-") + SORTINFO = Namespace(insprefix + "sortinfo-") + + # Adding top + edsGraph.add((EDSI, DELPH['hasTop'], NODES[e.top])) + + # creating the prefixes of the output + # graph.bind("eds", EDS) + # graph.bind("delph", DELPH) + # graph.bind("erg", ERG) + # graph.bind("pos", POS) - edsi - The URI of the EDS instance being parsed. + # Populating the graphs + __nodes_to_rdf__(e, edsGraph, defaultGraph, EDSI, NODES, PREDS, SORTINFO) + __edges_to_rdf__(e, edsGraph, NODES) + + return store - NODES - the URI namespace dedicated to nodes. - namespace - the string namespace of a result of the profile. +def __nodes_to_rdf__(e, edsGraph, defaultGraph, EDSI, NODES, PREDS, SORTINFO): + """ + Creates in the graphs the nodes of EDS predications and their properties. + + Args: + e: a PyDelphin EDS instance to be converted into RDF format + edsGraph: rdflib Graph of a Store of graphs where the EDS triples will be put. + defaultGraph: the default graph of the Store with the edsGraph + EDSI: the node of the EDS instance being converted + NODES: the URI namespace dedicated to EDS predications + PREDS: the URI namespace dedicated to predicates + SORTINFO: the URI namespace dedicated to the sortinfo (morphosemantic information). """ for node in e.nodes: - nodeIRI = NODES[node.id] - nodePredIRI = URIRef(f"{namespace}predicate-{node.id}") - nodeSortInfoIRI = URIRef(f"{namespace}sortinfo-{node.id}") + nodeURI = NODES[node.id] + predURI = PREDS[node.id] + sortinfoURI = SORTINFO[node.id] - #Instantiate the Node - graph.add((nodeIRI, RDF.type, EDS.Node)) - graph.add((edsi, EDS.hasNode, nodeIRI)) + edsGraph.add((nodeURI, RDF.type, EDS.Node)) + edsGraph.add((sortinfoURI, RDF.type, DELPH.SortInfo)) + + # Information about the EDS node + edsGraph.add((EDSI, EDS.hasNode, nodeURI)) + edsGraph.add((nodeURI, DELPH.hasPredicate, predURI)) + edsGraph.add((nodeURI, DELPH.hasSortInfo, sortinfoURI)) + edsGraph.add((nodeURI, EDS.nodeIdentifier, Literal(node.id))) # review later if this is useful + edsGraph.add((nodeURI, RDFS.label, Literal(f"{delphin.predicate.normalize(node.predicate)}<{node.cfrom},{node.cto}>"))) + #type: + if node.type is not None: + edsGraph.add((nodeURI, RDF.type, DELPH[node.type])) - #typing the predicate + # Information about the predicate + edsGraph.add((predURI, DELPH.predText, Literal(delphin.predicate.normalize(node.predicate)))) if delphin.predicate.is_surface(node.predicate): - graph.add((nodePredIRI, RDF.type, DELPH.SurfacePredicate)) + edsGraph.add((predURI, RDF.type, DELPH.SurfacePredicate)) elif delphin.predicate.is_abstract(node.predicate): - graph.add((nodePredIRI, RDF.type, DELPH.AbstractPredicate)) + edsGraph.add((predURI, RDF.type, DELPH.AbstractPredicate)) else: #not surface neither abstract - print("{} is an invalid predicate".format(node.predicate)) - graph.add((nodePredIRI, RDF.type, DELPH.Predicate)) - - #Declaring the node predicate - graph.add((nodeIRI, DELPH.hasPredicate, nodePredIRI)) - graph.add((nodePredIRI, DELPH.predText, Literal(delphin.predicate.normalize(node.predicate)))) + edsGraph.add((predURI, RDF.type, DELPH.Predicate)) + print(f"{node.predicate} is an invalid predicate.") - # surface and its parts: splittedPredicate = delphin.predicate.split(delphin.predicate.normalize(node.predicate)) if splittedPredicate[0] is not None: #is this possible? - graph.add((nodePredIRI, DELPH.hasLemma, Literal(splittedPredicate[0]))) + edsGraph.add((predURI, DELPH.hasLemma, Literal(splittedPredicate[0]))) if splittedPredicate[1] is not None: - graph.add((nodePredIRI, DELPH.hasPos, POS[splittedPredicate[1]])) + edsGraph.add((predURI, DELPH.hasPos, POS[splittedPredicate[1]])) if splittedPredicate[2] is not None: - graph.add((nodePredIRI, DELPH.hasSense, Literal(splittedPredicate[2]))) + edsGraph.add((predURI, DELPH.hasSense, Literal(splittedPredicate[2]))) #lnk: if node.cfrom is not None: - graph.add((nodeIRI, DELPH.cfrom, Literal(node.cfrom))) + edsGraph.add((nodeURI, DELPH.cfrom, Literal(node.cfrom))) if node.cto is not None: - graph.add((nodeIRI, DELPH.cto, Literal(node.cto))) - - # type of node: - if node.type is not None: - graph.add((nodeIRI, RDF.type, DELPH[node.type])) + edsGraph.add((nodeURI, DELPH.cto, Literal(node.cto))) # properties - graph.add((nodeIRI, DELPH.hasSortInfo, nodeSortInfoIRI)) - graph.add((nodeSortInfoIRI, RDF.type, DELPH.SortInfo)) for prop in node.properties.items(): - graph.add((nodeSortInfoIRI, ERG[prop[0].lower()], Literal(prop[1].lower()))) - # carg + edsGraph.add((sortinfoURI, ERG[prop[0].lower()], Literal(prop[1].lower()))) + + # carg; review later if node.carg: - graph.add((nodeSortInfoIRI, DELPH.carg, Literal(node.carg))) + edsGraph.add((nodeURI, DELPH.carg, Literal(node.carg))) -def __edges_to_rdf__(e, graph, NODES): +def __edges_to_rdf__(e, edsGraph, NODES): """ - Creates nodes of variables and nodes specifying their properties. - - e - a delphin eds instance to be parsed into RDF format. - - graph - and rdflib graph that is used to store the EDS as RDF - representation. + Creates in the graph triples that links the EDS nodes, the edges. - NODES - the IRI namespace dedicated to nodes. + Args: + e: a PyDelphin EDS instance to be converted into RDF format + edsGraph: rdflib Graph of a Store of graphs where the EDS triples will be put. + NODES: the URI namespace dedicated to EDS predications """ for edge in e.edges: - graph.add((NODES[edge[0]], EDS[edge[1].lower()], NODES[edge[2]])) + edsGraph.add((NODES[edge[0]], EDS[edge[1].lower()], NODES[edge[2]])) -def eds_to_rdf(e, prefix: str, identifier, iname="eds", graph=None, out=None, text=None, format="turtle"): - """ - Parses a pydelphin EDS into RDF representation. - - e - a delphin EDS instance to be parsed into RDF format. - - prefix - the IRI to be prefixed to the RDF formated eds. - - identifier - an string or a list of strings identifying - the eds. It should be unique, possibly using a composite - identifier, given in list. - For instance one may use it as [textid, eds-id] if the - same text admits various eds interpretations. - - iname - the eds instance name (the eds as RDF node name) - to be used. As default, it is "eds". - - graph - and rdflib graph. If given, uses it to store the - mrs as RDF representation. - - text - the text that is represented in eds as RDF. - """ - - # Before running this, use delphin.eds.make_ids_unique(e, m) if possible - - # same graph for different EDSs - if graph is None: graph = Graph() - if type(identifier) == list: - identifier = "/".join(identifier) - - namespace = prefix + "/" + identifier + "#" - - #creating the instance URI and the namespace of nodes - edsi = URIRef(namespace + iname) - graph.add((edsi, RDF.type, EDS.EDS)) - NODES = Namespace(namespace + "node-") - - #creating the prefixes of the output - graph.bind("eds", EDS) - graph.bind("delph", DELPH) - graph.bind("erg", ERG) - graph.bind("pos", POS) - - #Creating the RDF triples - __nodes_to_rdf__(e, graph, edsi, NODES, namespace) - #Adding top - graph.add((edsi, DELPH['hasTop'], NODES[e.top])) - __edges_to_rdf__(e, graph, NODES) - - # add text as one graph node if it's given - if text is not None: graph.add((edsi, DELPH.text, Literal(text))) - # serializes graph if given an output file - if out is not None: graph.serialize(destination=out, format=format) - - return graph diff --git a/delphin/rdf/_mrs_parser.py b/delphin/rdf/_mrs_parser.py index 4328703..b495c03 100644 --- a/delphin/rdf/_mrs_parser.py +++ b/delphin/rdf/_mrs_parser.py @@ -1,15 +1,18 @@ -from typing import Union - -import rdflib -from rdflib import Graph +from rdflib.graph import Graph from rdflib import Literal from rdflib import RDF from rdflib import RDFS from rdflib import URIRef from rdflib import Namespace +from rdflib import plugin +from rdflib.store import Store +from rdflib.term import BNode +import rdflib + +import delphin.mrs +import delphin.variable +import delphin.predicate -import delphin -from delphin import mrs # some useful namespaces MRS = Namespace("http://www.delph-in.net/schema/mrs#") @@ -17,209 +20,191 @@ DELPH = Namespace("http://www.delph-in.net/schema/") POS = Namespace("http://www.delph-in.net/schema/pos#") -def _vars_to_rdf(m, graph, VARS): +def mrs_to_rdf(m:delphin.mrs._mrs.MRS, + MRSI:rdflib.term.URIRef, + store:rdflib.plugins.memory.IOMemory=plugin.get("IOMemory", Store)(), + defaultGraph:rdflib.graph.Graph=None) -> rdflib.plugins.memory.IOMemory: """ - Describes variables "VARS" in an MRS-RDF format + Takes a PyDelphin MRS object "m" and serializes it into a named RDF graph inside a store. + + Args: + m: a PyDelphin MRS instance to be converted into RDF format + MRSI: URI of the MRS instance being converted + store: RDFLib IOMemory store to add the graphs. + defaultGraph : the default graph of the store. If not given, creates one from the 'store'. + + Inplace function that alters the store with the serialized MRS and return the store as well. + """ + # Making the arguments behave well: + if defaultGraph is None: + defaultGraph = Graph(store, identifier=BNode()) + + if defaultGraph.store != store: # Bad function input + defaultGraph = Graph(store, identifier=BNode()) + print("'defaultGraph' argument not consistent with the 'store' argument. The argument was discarded") + + # MRS graph: + mrsGraph = Graph(store, identifier=MRSI) + + # Creating the prefix of the MRS elements and relevant namespaces + insprefix = Namespace(MRSI + '#') + VARS = Namespace(insprefix + "variable-") + RELS = Namespace(insprefix + "EP-") + PREDS = Namespace(insprefix + "predicate-") + SORTINFO = Namespace(insprefix + "sortinfo-") + HCONS = Namespace(insprefix + "hcons-") + ICONS = Namespace(insprefix + "icons-") + + # Adding top and index + mrsGraph.add((MRSI, DELPH['hasTop'], VARS[m.top])) + mrsGraph.add((MRSI, DELPH['hasIndex'], VARS[m.index])) + # ALTERNATIVE: (BNode, DELPH['hasTop'], VARS[m.top]) + + # Populating the graphs + _vars_to_rdf(m, mrsGraph, VARS, SORTINFO) + _rels_to_rdf(m, mrsGraph, defaultGraph, MRSI, RELS, PREDS, VARS) + _hcons_to_rdf(m, mrsGraph, defaultGraph, MRSI, HCONS, VARS) + _icons_to_rdf(m, mrsGraph, defaultGraph, MRSI, ICONS, VARS) + + return store + +def _vars_to_rdf(m, mrsGraph, VARS, SORTINFO): + """ + Converts the variables of a MRS to the RDF graph Args: - m: a delphin mrs instance to be parsed into RDF format - graph: rdflib Graph. If given, uses it to store the MRS - representation in RDF - VARS: the URI namespace dedicated to variables + m: a delphin mrs instance to be converted into RDF format + mrsGraph: rdflib Graph of a Store of graphs where the MRS triples will be put. + VARS: the URI namespace dedicated to variables. + SORTINFO: the URI namespace dedicated to the sortinfo (morphosemantic information). """ for v in m.variables.items(): if delphin.variable.is_valid(v[0]): # typing variables if delphin.variable.type(v[0]) != 'h': - graph.add((VARS[v[0]], RDF.type, DELPH[delphin.variable.type(v[0])])) + mrsGraph.add((VARS[v[0]], RDF.type, DELPH[delphin.variable.type(v[0])])) else : - graph.add((VARS[v[0]], RDF.type, MRS['h'])) + mrsGraph.add((VARS[v[0]], RDF.type, MRS['h'])) + + # associating the variable to its sortinfo + mrsGraph.add((VARS[v[0]], DELPH.hasSortInfo, SORTINFO[v[0]])) # adding the properties of the variables for props in v[1].items(): - graph.add((VARS[v[0]], ERG[props[0].lower()], Literal(props[1]))) + mrsGraph.add((SORTINFO[v[0]], ERG[props[0].lower()], Literal(props[1]))) # it won't be harmful to reassure that the property is defined in ERG, but it'll be like that for now. - else: - print("Invalid predicate") -def _rels_to_rdf(m, graph, mrsi, RELS, VARS, namespace): + else: # very rare event, should it be removed? + print("Invalid variable name") + +def _rels_to_rdf(m, mrsGraph, defaultGraph, MRSI, RELS, PREDS, VARS): """ - Describes EPs "RELS" in an MRS-RDF format + Converts the EPs of a MRS to the graph Args: - m: a delphin mrs instance to be parsed into RDF format - graph: rdflib Graph. If given, uses it to store the MRS - representation in RDF - mrsi: the mrs instance name (the MRS as RDF node name) + m: a delphin mrs instance to be converted into RDF format + mrsGraph: rdflib Graph of a Store of graphs where the MRS triples will be put. + defaultGraph: the default graph of the Store with the mrsGraph + MRSI: the node of the MRS instance being converted RELS: the URI namespace dedicated to EPs + PREDS: the URI namespace dedicated to predicates VARS: the URI namespace dedicated to variables - namespace - the string namespace of a result of the profile. """ for rel in range(len(m.rels)): mrs_rel = m.rels[rel] - rdf_rel = RELS["{rel}".format(rel=rel)] #maybe label EPs in a different manner is better because they aren't ordered. - pred_rel = URIRef(f"{namespace}predicate-{rel}") - sortinfo_rel = URIRef(f"{namespace}sortinfo-{rel}") + EPNode = RELS[f"{rel}"] #maybe label EPs in a different manner is better because they aren't ordered. + predNode = PREDS[f"{rel}"] - graph.add((mrsi, MRS.hasEP, rdf_rel)) - graph.add((rdf_rel, RDF.type, MRS.ElementaryPredication)) - graph.add((rdf_rel, MRS.hasLabel, VARS[mrs_rel.label])) + mrsGraph.add((MRSI, MRS.hasEP, EPNode)) + mrsGraph.add((EPNode, RDF.type, MRS.ElementaryPredication)) + mrsGraph.add((EPNode, MRS.hasLabel, VARS[mrs_rel.label])) # graph.add((rdf_rel, MRS.var, VARS[mrs_rel.iv])) #not needed because ARG0 is already being included at the end of function splittedPredicate = delphin.predicate.split(delphin.predicate.normalize(mrs_rel.predicate)) if delphin.predicate.is_surface(mrs_rel.predicate): - graph.add((pred_rel, RDF.type, DELPH.SurfacePredicate)) + mrsGraph.add((predNode, RDF.type, DELPH.SurfacePredicate)) elif delphin.predicate.is_abstract(mrs_rel.predicate): - graph.add((pred_rel, RDF.type, DELPH.AbstractPredicate)) + mrsGraph.add((predNode, RDF.type, DELPH.AbstractPredicate)) else: #not(delphin.predicate.is_valid(mrs_rel.predicate)) print("{} is an invalid predicate.".format(mrs_rel.predicate)) #revise; maybe something stronger. - graph.add((pred_rel, RDF.type, DELPH.Predicate)) #revise + mrsGraph.add((predNode, RDF.type, DELPH.Predicate)) #revise + + mrsGraph.add((EPNode, DELPH.hasPredicate, predNode)) + mrsGraph.add((predNode, DELPH.predText, Literal(delphin.predicate.normalize(mrs_rel.predicate)))) + mrsGraph.add((EPNode, RDFS.label, Literal(f"{delphin.predicate.normalize(mrs_rel.predicate)}<{mrs_rel.cfrom},{mrs_rel.cto}>"))) - graph.add((rdf_rel, DELPH.hasPredicate, pred_rel)) - graph.add((pred_rel, DELPH.predText, Literal(delphin.predicate.normalize(mrs_rel.predicate)))) - if splittedPredicate[0] is not None: #here, lemma = name by now. - graph.add((pred_rel, DELPH.hasLemma, Literal(splittedPredicate[0]))) + mrsGraph.add((predNode, DELPH.hasLemma, Literal(splittedPredicate[0]))) if splittedPredicate[1] is not None: - graph.add((pred_rel, DELPH.hasPos, POS[splittedPredicate[1]])) + mrsGraph.add((predNode, DELPH.hasPos, POS[splittedPredicate[1]])) if splittedPredicate[2] is not None: - graph.add((pred_rel, DELPH.hasSense, Literal(splittedPredicate[2]))) + mrsGraph.add((predNode, DELPH.hasSense, Literal(splittedPredicate[2]))) #lnk: if mrs_rel.cfrom is not None: - graph.add((rdf_rel, DELPH.cfrom, Literal(mrs_rel.cfrom))) #integer + mrsGraph.add((EPNode, DELPH.cfrom, Literal(mrs_rel.cfrom))) #integer if mrs_rel.cto is not None: - graph.add((rdf_rel, DELPH.cto, Literal(mrs_rel.cto))) #integer + mrsGraph.add((EPNode, DELPH.cto, Literal(mrs_rel.cto))) #integer # parse arguments - graph.add((rdf_rel, DELPH.hasSortInfo, sortinfo_rel)) - graph.add((sortinfo_rel, RDF.type, DELPH.SortInfo)) for hole, arg in mrs_rel.args.items(): - #if hole == "ARG0": continue - # arg_type = type(eval(arg.title())) - # ? - # mrs variables as arguments if hole.lower() != "carg" : - graph.add((sortinfo_rel, MRS[hole.lower()], VARS[arg])) + mrsGraph.add((EPNode, MRS[hole.lower()], VARS[arg])) else : - graph.add((sortinfo_rel, DELPH.carg, Literal(arg))) - + mrsGraph.add((EPNode, DELPH.carg, Literal(arg))) -def _hcons_to_rdf(m, graph, mrsi, HCONS, VARS): +def _hcons_to_rdf(m, mrsGraph, defaultGraph, MRSI, HCONS, VARS): """ Describes handle constraints "HCONS" in an MRS-RDF format Args: - m: a delphin mrs instance to be parsed into RDF format - graph: rdflib Graph. If given, uses it to store the MRS - representation in RDF - mrsi: the mrs instance name (the mrs as RDF node name) - HCONS: the URI namespace dedicated to HCONSs. - VARS: the URI namespace dedicated to variables. + m: a delphin mrs instance to be converted into RDF format + mrsGraph: rdflib Graph of a Store of graphs where the MRS triples will be put. + defaultGraph: the default graph of the Store with the mrsGraph + MRSI: the node of the MRS instance being converted + HCONS: the URI namespace dedicated to handle constraints + VARS: the URI namespace dedicated to variables """ - for hcon in range(len(m.hcons)): - mrs_hcon = m.hcons[hcon] - rdf_hcon = HCONS["{hcon}".format(hcon=hcon)] + for id_hcons in range(len(m.hcons)): + mrs_hcons = m.hcons[id_hcons] + HCONSNode = HCONS[f"{id_hcons}"] - # adds hcon to graph - graph.add((mrsi, MRS.hasHcons, rdf_hcon)) - graph.add((rdf_hcon, RDF.type, MRS[mrs_hcon.relation.capitalize()])) - graph.add((rdf_hcon, MRS.highHcons, VARS[mrs_hcon.hi])) - graph.add((rdf_hcon, MRS.lowHcons, VARS[mrs_hcon.lo])) + # adds hcons to graphs + mrsGraph.add((MRSI, MRS.hasHcons, HCONSNode)) + mrsGraph.add((HCONSNode, RDF.type, MRS[mrs_hcons.relation.capitalize()])) + mrsGraph.add((HCONSNode, MRS.highHcons, VARS[mrs_hcons.hi])) + mrsGraph.add((HCONSNode, MRS.lowHcons, VARS[mrs_hcons.lo])) - -def _icons_to_rdf(m, graph, mrsi, ICONS, VARS): +def _icons_to_rdf(m, mrsGraph, defaultGraph, MRSI, ICONS, VARS): """ Describes individual constraints "ICONS" in MRS-RDF format Args: - m: a delphin mrs instance to be parsed into RDF format - graph: rdflib Graph. If given, uses it to store the MRS - representation in RDF - mrsi: the mrs instance name (the mrs as RDF node name) - ICONS: the URI namespace dedicated to ICONSs. - VARS: the URI namespace dedicated to variables. + m: a delphin mrs instance to be converted into RDF format + mrsGraph: rdflib Graph of a Store of graphs where the MRS triples will be put. + defaultGraph: the default graph of the Store with the mrsGraph + MRSI: the node of the MRS instance being converted + ICONS: the URI namespace dedicated to individual constraints + VARS: the URI namespace dedicated to variables """ - for icon in range(len(m.icons)): - mrs_icon = m.icons[icon] - rdf_icon = ICONS["{icon}".format(icon=icon)] - - # adds icon to graph - graph.add((mrsi, MRS.hasIcons, rdf_icon)) - # by now, the ICONSs seems to be grammar-specific. - graph.add((rdf_icon, RDF.type, ERG[mrs_icon.relation])) - graph.add((rdf_icon, MRS.leftIcons, VARS[mrs_icon.left])) # should be revisited - graph.add((rdf_icon, MRS.rightIcons, VARS[mrs_icon.right])) # should be revisited - - # this relation must be defined in ERG as an icons - graph.add((ERG[mrs_icon.relation], RDF.type, RDFS.Class)) - graph.add((ERG[mrs_icon.relation], RDFS.subClassOf, MRS.Icons)) - #This is ad-hoc, will be removed one day. - #To remove it, we need to have an exhaustive list of the possible icons in ERG (later we must adapt to other grammars). + for id_icons in range(len(m.icons)): + mrs_icons = m.icons[id_icons] + ICONSNode = ICONS[f"{id_icons}"] - -def mrs_to_rdf( - m:delphin.mrs._mrs.MRS, - prefix:str, - identifier:Union[str, list], - iname:str ="mrs", - graph:rdflib.graph.Graph=None, - text:str=None) -> rdflib.graph.Graph: - """ - Parses a pydelphin MRS "m" into an RDF representation. - - Args: - m: a delphin mrs instance to be parsed into RDF format - prefix: the URI to be prefixed to the RDF formated MRS - identifier: an string or a list of strings identifying - the MRS. It may be composite, given in list - iname: the mrs instance name (the mrs as RDF node name) - to be used - graph: rdflib Graph. If given, uses it to store the MRS - representation in RDF - text: the text analized, represented in MRS in "m" - - Returns: - Graph: containing the RDF representation of "m" - """ - - # same graph for different mrs - if graph is None: graph = Graph() - if type(identifier) == list: - identifier = "/".join(identifier) - - # creating the namespaces for this MRS instance - namespace = prefix + "/" + identifier + "#" - mrsi = URIRef(namespace + iname) - graph.add((mrsi, RDF.type, MRS.MRS)) - VARS = Namespace(namespace + "variables-") - RELS = Namespace(namespace + "EP-") - HCONS = Namespace(namespace + "hcons-") - ICONS = Namespace(namespace + "icons-") - - # creating the prefixes of the output - graph.bind("mrs", MRS) - graph.bind("delph", DELPH) - graph.bind("erg", ERG) - graph.bind("pos", POS) - - # creating the RDF triples - _vars_to_rdf(m, graph, VARS) - _rels_to_rdf(m, graph, mrsi, RELS, VARS, namespace) - _hcons_to_rdf(m, graph, mrsi, HCONS, VARS) - _icons_to_rdf(m, graph, mrsi, ICONS, VARS) - # adding top - graph.add((mrsi, DELPH['hasTop'], VARS[m.top])) - # adding index - graph.add((mrsi, DELPH['hasIndex'], VARS[m.index])) - - # add text as one graph node if it's given - if text is not None: - graph.add((mrsi, DELPH.text, Literal(text))) - - return graph + # adds icons to graphs + mrsGraph.add((MRSI, MRS.hasIcons, ICONSNode)) + mrsGraph.add((ICONSNode, RDF.type, ERG[mrs_icons.relation])) + mrsGraph.add((ICONSNode, MRS.leftIcons, VARS[mrs_icons.left])) # should be revisited + mrsGraph.add((ICONSNode, MRS.rightIcons, VARS[mrs_icons.right])) # should be revisited + + # by now, the ICONSs seems to be grammar-specific + # and this relation must be defined in ERG as an icons. + # As we don't have an exhaustive list of the possible icons in ERG (and any other grammar), + # we'll create on the final graph those icons. This is provisory + defaultGraph.add((ERG[mrs_icons.relation], RDF.type, RDFS.Class)) + defaultGraph.add((ERG[mrs_icons.relation], RDFS.subClassOf, MRS.Icons)) \ No newline at end of file diff --git a/vocabularies/dmrs.ttl b/vocabularies/dmrs.ttl index 630c2e6..7ac2d79 100644 --- a/vocabularies/dmrs.ttl +++ b/vocabularies/dmrs.ttl @@ -28,6 +28,12 @@ dmrs:Role a rdfs:Class ; rdfs:comment "The class of the roles of a link in DMRS"@en-us. +delph:hasDMRS + rdf:subPropertyOf delph:hasSemanticRepresentation + rdfs:domain delph:Result ; + rdfs:range dmrs:DMRS ; + rdfs:comment "A property that links a result to its DMRS."@en-us. + dmrs:hasNode a rdf:Property ; rdfs:subProperty delph:hasPredication ; diff --git a/vocabularies/eds.ttl b/vocabularies/eds.ttl index e3eeae6..6fcde01 100644 --- a/vocabularies/eds.ttl +++ b/vocabularies/eds.ttl @@ -30,6 +30,12 @@ eds:edge rdfs:comment "EDS way to explicit the role between predications"@en-us. #Is it? +delph:hasEDS + rdf:subPropertyOf delph:hasSemanticRepresentation + rdfs:domain delph:Result ; + rdfs:range eds:EDS ; + rdfs:comment "A property that links a result to its EDS."@en-us. + eds:hasNode a rdf:Property ; rdfs:subProperty delph:hasPredication ; diff --git a/vocabularies/mrs.ttl b/vocabularies/mrs.ttl index 9c0f024..d41acab 100644 --- a/vocabularies/mrs.ttl +++ b/vocabularies/mrs.ttl @@ -100,6 +100,12 @@ mrs:Outscopes #Defining properties +delph:hasMRS + rdf:subPropertyOf delph:hasSemanticRepresentation + rdfs:domain delph:Result ; + rdfs:range mrs:MRS ; + rdfs:comment "A property that links a result to its MRS."@en-us. + mrs:hasEP a rdf:Property ; rdfs:subPropertyOf delph:hasPredication ; diff --git a/vocabularies/semstructs.ttl b/vocabularies/semstructs.ttl index c76b705..11ff973 100644 --- a/vocabularies/semstructs.ttl +++ b/vocabularies/semstructs.ttl @@ -13,6 +13,19 @@ #Declaring the classes: + +delph:Profile + a rdfs:Class ; + rdfs:comment "The class of the [incr tsdb()] test suites"@en-us . + +delph:Item + a rdfs:Class ; + rdfs:comment "The class of the itens of a profile. Represents sentences"@en-us . + +delph:Result + a rdfs:Class ; + rdfs:comment "The class of the results of a profile, the itens after being processed by the grammar"@en-us . + delph:SemanticStructure a rdfs:Class ; rdfs:comment "The class dedicated to the skeleton of semantic structures. Those are rooted DAGS with a bag of Predications."@en-us . @@ -85,6 +98,30 @@ delph:x #Declaring important relations +delph:hasItem + a rdf:Property ; + rdfs:domain delph:Profile ; + rdfs:range delph:Item ; + rdfs:comment "A mapping of a profile to one of its itens."@en-us. + +delph:hasResult + a rdf:Property ; + rdfs:domain delph:Item ; + rdfs:range delph:Result ; + rdfs:comment "A mapping of an item to one of its results."@en-us. + +delph:hasText + a rdf:Property ; + rdfs:domain delph:Item ; + rdfs:range rdfs:Literal ; + rdfs:comment "A mapping of a item to the associated sentence's text."@en-us. + +delph:hasSemanticRepresentation + a rdf:Property ; + rdfs:domain delph:Result ; + rdfs:range delph:SemanticStructure ; + rdfs:comment "General property that links a result to one of its semantic structures."@en-us. + delph:hasTop a rdf:Property ; rdfs:domain delph:SemanticStructure ; @@ -105,7 +142,7 @@ delph:hasPredication delph:hasSortInfo a rdf:Property ; - rdfs:domain delph:Predication ; + rdfs:domain delph:u ; rdfs:range delph:SortInfo ; rdfs:comment "A property that links a predication to its information"@en-us. @@ -158,14 +195,6 @@ delph:hasPropertyValue rdfs:range rdfs:Literal ; rdfs:comment "A general property to link an SortInfo node to a morphosemantic property value"@en-us. -delph:text - a rdf:Property ; - rdfs:domain delph:SemanticStructure ; - rdfs:range rdfs:Literal ; - rdfs:comment "The property that links a semantic structure to the text that generated it"@en-us. - - - # Defining the Parts of speech # Based on http://moin.delph-in.net/RmrsPos