Skip to content

Commit

Permalink
Named graphs (#26)
Browse files Browse the repository at this point in the history
  • Loading branch information
yfaria authored Jul 20, 2021
1 parent cc27e6b commit 5ffb0e1
Show file tree
Hide file tree
Showing 10 changed files with 558 additions and 439 deletions.
59 changes: 48 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,39 +8,76 @@ The [pydelphin](https://pypi.org/project/PyDelphin/) and [rdflib](https://pypi.o

DELPHIN-RDF presents the following dependencies:
- `delphin.rdf`: the main module responsible for mrs/eds/dmrs parsing to RDF.
- `delphin.cli`: pydelphin standard cli path. Here are modules describing the delphin subcommands. See [delphin.cli](https://pydelphin.readthedocs.io/en/latest/api/delphin.cli.html)
- `delphin.cli`: pydelphin standard cli path. Here are modules describing the delphin subcommand. See [delphin.cli](https://pydelphin.readthedocs.io/en/latest/api/delphin.cli.html)

Besides that, in `tests` there are use examples. In `doc` there are some references. In `vocabularies` there is the modelling of the semantic representations in RDF.

## Command Line Interface

All the modules in this package can be imported and used as a python module, but DELPHIN-RDF declares `profile-to-rdf` a delphin subcommands of the PyDelphin CLI.
All the modules in this package can be imported and used as a Python module, but DELPHIN-RDF declares `profile-to-rdf`, a delphin subcommand of the PyDelphin CLI.
This subcommand requires the path to the profile to serialize as argument. There are optional arguments, such as the format of the output (`-f`), the representation to serialize (`--to`), the prefix of the URIs in the RDF (`-p`) and the name of the output file (`-o`).

To use the function of the transformation as a python module, it's only needed to import `delphin.rdf`, which exports three main functions: `mrs_to_rdf`, `dmrs_to_rdf` and `eds_to_rdf`. For example, to serialize a profile to DMRS-RDF, we can do
## Python module

To use the function of the transformation as a Python module, it's only needed to import `delphin.rdf`, which exports three main functions: `mrs_to_rdf`, `dmrs_to_rdf` and `eds_to_rdf`. They operate on [IO Memory](https://rdflib.readthedocs.io/en/stable/_modules/rdflib/plugins/memory.html#IOMemory) RDFLib object, creating named graphs for each instance in the context of this optimizd RDFLib store. For example, to serialize a profile to DMRS-RDF, we can do
```python
import delphin.rdf as drdf
from delphin import itsdb
from delphin import tsql
from delphin.dmrs import from_mrs as dmrs_from_mrs
from delphin.codecs.simpledmrs import decode
from rdflib import Graph
from rdflib import plugin
from rdflib.graph import Graph, ConjunctiveGraph
from rdflib.store import Store
from rdflib.term import BNode
from rdflib import URIRef
from rdflib.store import Store
from rdflib import RDF, RDFS
from rdflib import Namespace, Literal

path_to_profile = "./erg/trunk/tsdb/gold/mrs"
ts = itsdb.TestSuite(path_to_profile)
graph = Graph()
store = plugin.get("IOMemory", Store)()
ERG = Namespace("http://www.delph-in.net/schema/erg#")
DELPH = Namespace("http://www.delph-in.net/schema/")
POS = Namespace("http://www.delph-in.net/schema/pos#")
DMRS = Namespace("http://www.delph-in.net/schema/dmrs#")
store.bind("erg", ERG)
store.bind("delph", DELPH)
store.bind("pos", POS)
store.bind("dmrs", DMRS)
prefix = "http://example.com"
PROFILE = URIRef(prefix)
defaultGraph = Graph(store, identifier=BNode())
defaultGraph.add((PROFILE, RDF.type, DELPH.Profile))

for (parse_id, result_id, text, mrs_string) in tsql.select('parse-id result-id i-input mrs', ts):
obj = dmrs_from_mrs(decode(mrs_string))
graph = drdf.dmrs_to_rdf(obj,
identifier=[str(parse_id), str(result_id)],
graph=graph,
text=text)
ITEM = URIRef(f"{prefix}/{parse_id}")
RESULT = URIRef(f"{prefix}/{parse_id}/{result_id}")
DMRSI = URIRef(f"{prefix}/{parse_id}/{result_id}/dmrs")

defaultGraph.add((ITEM, RDF.type, DELPH.Item))
defaultGraph.add((RESULT, RDF.type, DELPH.Result))
defaultGraph.add((MRSI, RDF.type, DMRS.DMRS))

defaultGraph.add((ITEM, DELPH.hasText, Literal(text)))

defaultGraph.add((PROFILE, DELPH.hasItem, ITEM))
defaultGraph.add((ITEM, DELPH.hasResult, RESULT))
defaultGraph.add((RESULT, DELPH.hasDMRS, DMRSI))

drdf.dmrs_to_rdf(dmrs_from_mrs(simplemrs.decode(mrs_string)),
DMRSI,
store,
defaultGraph) #inplace, change store and defaultGraph

graph.serialize("./dmrs-erg-gold.nt", format="nt")
ConjunctiveGraph(store).serialize("./dmrs-erg-gold.nq", format="nquads")
```

## Development

One may be able to install delphin-rdf in developer mode, running
One may be able to install delphin-rdf in developer mode cloning this repo and running
```bash
$ pip install -e /path/to/delphin-rdf
```
Expand Down
126 changes: 97 additions & 29 deletions delphin/cli/profile_to_rdf.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""
Transcribes a profile intro a RDF graph.
Transcribes a profile into a RDF graph.
It creates named graphs, but it has support to creating files only with triples like ntriples or turtle.
For more details, see: {https://github.com/own-pt/delphin-rdf}.
"""
Expand Down Expand Up @@ -29,20 +30,36 @@
from delphin import itsdb
from delphin import tsql

from rdflib import Graph
from rdflib.graph import Graph, ConjunctiveGraph
from rdflib.term import _is_valid_uri
from rdflib.store import Store
from rdflib import Namespace
from rdflib import plugin
from rdflib.term import BNode
from rdflib import URIRef
from rdflib import Literal
from rdflib import RDF
from rdflib import RDFS

ERG = Namespace("http://www.delph-in.net/schema/erg#")
DELPH = Namespace("http://www.delph-in.net/schema/")
POS = Namespace("http://www.delph-in.net/schema/pos#")

# interface function
def __cli_parse__(args):
# remove the not well formed sentences? add option?
# print MRS or parse to DMRS format?

graph = Graph()
path = args.profile
prefix = args.prefix.strip("/")
semrep = args.semrep.lower()
parser = None

# Setting verbosity; need to figure a better solution.
if args.verbosity == 1:
logger.setLevel(20)
elif args.verbosity >= 2:
logger.setLevel(10)

try:
# validates path
if not isdir(path):
Expand All @@ -53,21 +70,33 @@ def __cli_parse__(args):
# validates URI prefix
if not _is_valid_uri(prefix):
raise Exception(f'Invalid URI: {prefix}')
# validate format and get parsers
to_rdf, from_mrs = _get_parsers(semrep)
# validate format and get converter
to_rdf, from_mrs = _get_converters(semrep)

# open Test Suite and start conversion
ts = itsdb.TestSuite(path)
# logger.info(f"Converting {len(ts['result'])} analysis of {len(ts['item'])} sentences from {args.profile}")
logger.log(30,f"Converting {len(ts['result'])} analysis of {len(ts['item'])} sentences from {args.profile}")
logger.info(f"Converting {len(ts['result'])} analysis of {len(ts['item'])} sentences from {args.profile}")

# Creating the store and the default graph
store = plugin.get("IOMemory", Store)()
defaultGraph = Graph(store, identifier=BNode())
PROFILE = URIRef(f"{prefix}") # review later
defaultGraph.add((PROFILE, RDF.type, DELPH.Profile))
semrepURI, prof_semrep_relation = _get_RDF_semrep(semrep, store)
store.bind("erg", ERG)
store.bind("delph", DELPH)
store.bind("pos", POS)
# store.bind("upref", prefix) # may be useful

# The tsql takes some time to be processed:
# logger.info(f"Loading the profile")
logger.log(30,f"Loading the profile")
logger.info(f"Loading the profile")
profile_data = tsql.select('parse-id result-id i-input mrs', ts)
logger.log(30,f"Converting the profile")
logger.info(f"Converting the profile")
# Iterating over the results:
for (parse_id, result_id, text, mrs_string) in profile_data:
logger.info(f"Converting the result {result_id} of sentence {parse_id}")
logger.debug(f"Converting the result {result_id} of sentence {parse_id}")
m = simplemrs.decode(mrs_string)

# making sure of the well formedness of "m"
Expand All @@ -77,19 +106,36 @@ def __cli_parse__(args):

# converting the MRS object to the representation intended to be converted
obj = from_mrs(m)
logger.debug(f"Result {result_id} of item {parse_id}: \n\t{text}\n\t{obj}\n\t{mrs_string}")
# logger.debug(f"Result {result_id} of item {parse_id}: \n\t{text}\n\t{obj}\n\t{mrs_string}")

graph = to_rdf(
obj,
prefix=prefix,
identifier=[str(parse_id), str(result_id)],
graph=graph,
text=text)
# Creating URIs for relevant resources.
ITEM = URIRef(f"{prefix}/{parse_id}") # The item part may be redundant, maybe iterate before the itens
RESULT = URIRef(f"{prefix}/{parse_id}/{result_id}")
SEMREPI = URIRef(f"{prefix}/{parse_id}/{result_id}/{semrep}")

# adding types:
defaultGraph.add((ITEM, RDF.type, DELPH.Item))
defaultGraph.add((RESULT, RDF.type, DELPH.Result))
defaultGraph.add((SEMREPI, RDF.type, semrepURI))

# Associating text to item:
defaultGraph.add((ITEM, DELPH.hasText, Literal(text)))

# Linking those nodes:
defaultGraph.add((PROFILE, DELPH.hasItem, ITEM))
defaultGraph.add((ITEM, DELPH.hasResult, RESULT))
defaultGraph.add((RESULT, prof_semrep_relation, SEMREPI))

to_rdf(
obj,
SEMREPI,
store,
defaultGraph)

# serializes results
logger.log(30,f"Serializing results to {args.output}")
graph.serialize(destination=args.output, format=args.format)
logger.log(30,f"DONE")
logger.info(f"Serializing results to {args.output}")
ConjunctiveGraph(store).serialize(destination=args.output, format=args.format)
logger.info(f"DONE")

# except PyDelphinSyntaxError as e:
# logger.exception(e)
Expand All @@ -100,27 +146,49 @@ def __cli_parse__(args):
except Exception as e:
logger.error(e)

def _get_parsers(semrep):
def _get_converters(semrep):
"""
This function gives us the conversor from MRS to a specific 'semrep'.
It returns a conversor function of delphin.rdf from this 'semrep' to RDF and
a function that converts PyDelphin MRS object to the specific semantic representation.
"""
logger.info(f"Getting parsers for representation: {semrep}")

if semrep == "mrs":
logger.info("No conversion necessary")
return mrs_to_rdf, lambda x: x
if semrep == "eds":
elif semrep == "eds":
return eds_to_rdf, eds_from_mrs
if semrep == "dmrs":
elif semrep == "dmrs":
return dmrs_to_rdf, dmrs_from_mrs

raise PyDelphinException(f"Not a valid format: {semrep}")

def _get_RDF_semrep(semrep, store):
"""
This function binds the prefix of the semantic representation to the RDF store and returns
RDFLib objects that are relevant for the conversion
"""
if semrep == "mrs":
MRS = Namespace("http://www.delph-in.net/schema/mrs#")
store.bind("mrs", MRS)
return MRS.MRS, DELPH.hasMRS
elif semrep == "eds":
EDS = Namespace("http://www.delph-in.net/schema/eds#")
store.bind("eds",EDS)
return EDS.EDS, DELPH.hasEDS
elif semrep == "dmrs":
DMRS = Namespace("http://www.delph-in.net/schema/dmrs#")
store.bind("dmrs", DMRS)
return DMRS.DMRS, DELPH.hasDMRS

# sets parser and interface function
parser = argparse.ArgumentParser(add_help=False)
parser.set_defaults(func=__cli_parse__)

# sets the command infos
COMMAND_INFO = {
'name': 'profile-to-rdf', # Required
'help': 'delphin profile to rdf', # Optional
'help': 'incr tsdb test suite to rdf', # Optional
'description': __doc__, # Optional
'parser': parser, # Required
}
Expand All @@ -130,23 +198,23 @@ def _get_parsers(semrep):
"profile",
help="profile path")

_default_prefix = "http://example.com/example"
_default_prefix = "http://example.com/"
parser.add_argument(
"-p",
# "--prefix",
dest="prefix",
help=f"URI prefix (default: {_default_prefix})",
default=_default_prefix)

_default_output = "output.ttl"
_default_output = "output.nq"
parser.add_argument(
"-o",
# "--output",
dest="output",
help=f"output file name (default: {_default_output})",
default=_default_output)

_defaut_format = "turtle"
_defaut_format = "nquads"
parser.add_argument(
"-f",
# "--format",
Expand All @@ -159,5 +227,5 @@ def _get_parsers(semrep):
# "-t",
"--to",
dest="semrep",
help=f"(mrs|eds|dmrs) modeled semantic representation (default: {_default_delphin})",
help=f"(mrs|dmrs|eds) semantic representation to serialize (default: {_default_delphin})",
default=_default_delphin)
2 changes: 1 addition & 1 deletion delphin/rdf/__about__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

__name__ = "Delphin RDF"
__summary__ = "DELPH-IN formats in RDF"
__version__ = "1.0.1"
__version__ = "1.0.3"

__author__ = "foo"
__email__ = "foo"
Expand Down
Loading

0 comments on commit 5ffb0e1

Please sign in to comment.