diff --git a/dependencies/rdflib/__init__.py b/dependencies/rdflib/__init__.py index c7b249a18..c03beb09e 100644 --- a/dependencies/rdflib/__init__.py +++ b/dependencies/rdflib/__init__.py @@ -42,11 +42,16 @@ True """ +import logging +import sys +from importlib import metadata + +_DISTRIBUTION_METADATA = metadata.metadata("rdflib") + __docformat__ = "restructuredtext en" -# The format of the __version__ line is matched by a regex in setup.py -__version__ = "6.2.0" -__date__ = "2022-12-20" +__version__: str = _DISTRIBUTION_METADATA["Version"] +__date__ = "2023-08-02" __all__ = [ "URIRef", @@ -82,15 +87,14 @@ "TIME", "VANN", "VOID", + "XMLNS", "XSD", "util", "plugin", "query", + "NORMALIZE_LITERALS", ] -import logging -import sys - logger = logging.getLogger(__name__) try: diff --git a/dependencies/rdflib/_networking.py b/dependencies/rdflib/_networking.py new file mode 100644 index 000000000..311096a89 --- /dev/null +++ b/dependencies/rdflib/_networking.py @@ -0,0 +1,117 @@ +from __future__ import annotations + +import string +import sys +from typing import Dict +from urllib.error import HTTPError +from urllib.parse import quote as urlquote +from urllib.parse import urljoin, urlsplit +from urllib.request import HTTPRedirectHandler, Request, urlopen +from urllib.response import addinfourl + + +def _make_redirect_request(request: Request, http_error: HTTPError) -> Request: + """ + Create a new request object for a redirected request. + + The logic is based on `urllib.request.HTTPRedirectHandler` from `this commit _`. + + :param request: The original request that resulted in the redirect. + :param http_error: The response to the original request that indicates a + redirect should occur and contains the new location. + :return: A new request object to the location indicated by the response. + :raises HTTPError: the supplied ``http_error`` if the redirect request + cannot be created. + :raises ValueError: If the response code is `None`. + :raises ValueError: If the response does not contain a ``Location`` header + or the ``Location`` header is not a string. + :raises HTTPError: If the scheme of the new location is not ``http``, + ``https``, or ``ftp``. + :raises HTTPError: If there are too many redirects or a redirect loop. + """ + new_url = http_error.headers.get("Location") + if new_url is None: + raise http_error + if not isinstance(new_url, str): + raise ValueError(f"Location header {new_url!r} is not a string") + + new_url_parts = urlsplit(new_url) + + # For security reasons don't allow redirection to anything other than http, + # https or ftp. + if new_url_parts.scheme not in ("http", "https", "ftp", ""): + raise HTTPError( + new_url, + http_error.code, + f"{http_error.reason} - Redirection to url {new_url!r} is not allowed", + http_error.headers, + http_error.fp, + ) + + # http.client.parse_headers() decodes as ISO-8859-1. Recover the original + # bytes and percent-encode non-ASCII bytes, and any special characters such + # as the space. + new_url = urlquote(new_url, encoding="iso-8859-1", safe=string.punctuation) + new_url = urljoin(request.full_url, new_url) + + # XXX Probably want to forget about the state of the current + # request, although that might interact poorly with other + # handlers that also use handler-specific request attributes + content_headers = ("content-length", "content-type") + newheaders = { + k: v for k, v in request.headers.items() if k.lower() not in content_headers + } + new_request = Request( + new_url, + headers=newheaders, + origin_req_host=request.origin_req_host, + unverifiable=True, + ) + + visited: Dict[str, int] + if hasattr(request, "redirect_dict"): + visited = request.redirect_dict + if ( + visited.get(new_url, 0) >= HTTPRedirectHandler.max_repeats + or len(visited) >= HTTPRedirectHandler.max_redirections + ): + raise HTTPError( + request.full_url, + http_error.code, + HTTPRedirectHandler.inf_msg + http_error.reason, + http_error.headers, + http_error.fp, + ) + else: + visited = {} + setattr(request, "redirect_dict", visited) + + setattr(new_request, "redirect_dict", visited) + visited[new_url] = visited.get(new_url, 0) + 1 + return new_request + + +def _urlopen(request: Request) -> addinfourl: + """ + This is a shim for `urlopen` that handles HTTP redirects with status code + 308 (Permanent Redirect). + + This function should be removed once all supported versions of Python + handles the 308 HTTP status code. + + :param request: The request to open. + :return: The response to the request. + """ + try: + return urlopen(request) + except HTTPError as error: + if error.code == 308 and sys.version_info < (3, 11): + # HTTP response code 308 (Permanent Redirect) is not supported by python + # versions older than 3.11. See and + # for more details. + # This custom error handling should be removed once all supported + # versions of Python handles 308. + new_request = _make_redirect_request(request, error) + return _urlopen(new_request) + else: + raise diff --git a/dependencies/rdflib/_type_checking.py b/dependencies/rdflib/_type_checking.py index 4f32cdc3b..c9e0202ea 100644 --- a/dependencies/rdflib/_type_checking.py +++ b/dependencies/rdflib/_type_checking.py @@ -14,16 +14,13 @@ and this module is not part the the RDFLib public API. """ -import sys - __all__ = [ "_NamespaceSetString", + "_MulPathMod", ] -if sys.version_info >= (3, 8): - from typing import Literal as PyLiteral -else: - from typing_extensions import Literal as PyLiteral +from typing import Literal as PyLiteral _NamespaceSetString = PyLiteral["core", "rdflib", "none"] +_MulPathMod = PyLiteral["*", "+", "?"] # noqa: F722 diff --git a/dependencies/rdflib/collection.py b/dependencies/rdflib/collection.py index 1286a948f..fd64ab20b 100644 --- a/dependencies/rdflib/collection.py +++ b/dependencies/rdflib/collection.py @@ -1,23 +1,31 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Iterable, Iterator, List, Optional + from rdflib.namespace import RDF -from rdflib.term import BNode, Literal +from rdflib.term import BNode, Node + +if TYPE_CHECKING: + from rdflib.graph import Graph __all__ = ["Collection"] -class Collection(object): +class Collection: __doc__ = """ See "Emulating container types": https://docs.python.org/reference/datamodel.html#emulating-container-types + >>> from rdflib.term import Literal >>> from rdflib.graph import Graph >>> from pprint import pprint - >>> listName = BNode() + >>> listname = BNode() >>> g = Graph('Memory') >>> listItem1 = BNode() >>> listItem2 = BNode() - >>> g.add((listName, RDF.first, Literal(1))) # doctest: +ELLIPSIS + >>> g.add((listname, RDF.first, Literal(1))) # doctest: +ELLIPSIS )> - >>> g.add((listName, RDF.rest, listItem1)) # doctest: +ELLIPSIS + >>> g.add((listname, RDF.rest, listItem1)) # doctest: +ELLIPSIS )> >>> g.add((listItem1, RDF.first, Literal(2))) # doctest: +ELLIPSIS )> @@ -27,7 +35,7 @@ class Collection(object): )> >>> g.add((listItem2, RDF.first, Literal(3))) # doctest: +ELLIPSIS )> - >>> c = Collection(g,listName) + >>> c = Collection(g,listname) >>> pprint([term.n3() for term in c]) [u'"1"^^', u'"2"^^', @@ -43,21 +51,22 @@ class Collection(object): True """ - def __init__(self, graph, uri, seq=[]): + def __init__(self, graph: Graph, uri: Node, seq: List[Node] = []): self.graph = graph self.uri = uri or BNode() self += seq - def n3(self): + def n3(self) -> str: """ + >>> from rdflib.term import Literal >>> from rdflib.graph import Graph - >>> listName = BNode() + >>> listname = BNode() >>> g = Graph('Memory') >>> listItem1 = BNode() >>> listItem2 = BNode() - >>> g.add((listName, RDF.first, Literal(1))) # doctest: +ELLIPSIS + >>> g.add((listname, RDF.first, Literal(1))) # doctest: +ELLIPSIS )> - >>> g.add((listName, RDF.rest, listItem1)) # doctest: +ELLIPSIS + >>> g.add((listname, RDF.rest, listItem1)) # doctest: +ELLIPSIS )> >>> g.add((listItem1, RDF.first, Literal(2))) # doctest: +ELLIPSIS )> @@ -67,19 +76,20 @@ def n3(self): )> >>> g.add((listItem2, RDF.first, Literal(3))) # doctest: +ELLIPSIS )> - >>> c = Collection(g, listName) + >>> c = Collection(g, listname) >>> print(c.n3()) #doctest: +NORMALIZE_WHITESPACE ( "1"^^ "2"^^ "3"^^ ) """ - return "( %s )" % (" ".join([i.n3() for i in self])) + # type error: "Node" has no attribute "n3" + return "( %s )" % (" ".join([i.n3() for i in self])) # type: ignore[attr-defined] - def _get_container(self, index): + def _get_container(self, index: int) -> Optional[Node]: """Gets the first, rest holding node at index.""" assert isinstance(index, int) graph = self.graph - container = self.uri + container: Optional[Node] = self.uri i = 0 while i < index: i += 1 @@ -88,31 +98,31 @@ def _get_container(self, index): break return container - def __len__(self): + def __len__(self) -> int: """length of items in collection.""" return len(list(self.graph.items(self.uri))) - def index(self, item): + def index(self, item: Node) -> int: """ Returns the 0-based numerical index of the item in the list """ - listName = self.uri + listname = self.uri index = 0 while True: - if (listName, RDF.first, item) in self.graph: + if (listname, RDF.first, item) in self.graph: return index else: - newLink = list(self.graph.objects(listName, RDF.rest)) + newlink = list(self.graph.objects(listname, RDF.rest)) index += 1 - if newLink == [RDF.nil]: + if newlink == [RDF.nil]: raise ValueError("%s is not in %s" % (item, self.uri)) - elif not newLink: + elif not newlink: raise Exception("Malformed RDF Collection: %s" % self.uri) else: - assert len(newLink) == 1, "Malformed RDF Collection: %s" % self.uri - listName = newLink[0] + assert len(newlink) == 1, "Malformed RDF Collection: %s" % self.uri + listname = newlink[0] - def __getitem__(self, key): + def __getitem__(self, key: int) -> Node: """TODO""" c = self._get_container(key) if c: @@ -124,7 +134,7 @@ def __getitem__(self, key): else: raise IndexError(key) - def __setitem__(self, key, value): + def __setitem__(self, key: int, value: Node) -> None: """TODO""" c = self._get_container(key) if c: @@ -132,7 +142,7 @@ def __setitem__(self, key, value): else: raise IndexError(key) - def __delitem__(self, key): + def __delitem__(self, key: int) -> None: """ >>> from rdflib.namespace import RDF, RDFS >>> from rdflib import Graph @@ -183,8 +193,9 @@ def __delitem__(self, key): pass elif key == len(self) - 1: # the tail - priorLink = self._get_container(key - 1) - self.graph.set((priorLink, RDF.rest, RDF.nil)) + priorlink = self._get_container(key - 1) + # type error: Argument 1 to "set" of "Graph" has incompatible type "Tuple[Optional[Node], URIRef, URIRef]"; expected "Tuple[Node, Node, Any]" + self.graph.set((priorlink, RDF.rest, RDF.nil)) # type: ignore[arg-type] graph.remove((current, None, None)) else: next = self._get_container(key + 1) @@ -193,11 +204,11 @@ def __delitem__(self, key): graph.remove((current, None, None)) graph.set((prior, RDF.rest, next)) - def __iter__(self): + def __iter__(self) -> Iterator[Node]: """Iterator over items in Collections""" return self.graph.items(self.uri) - def _end(self): + def _end(self) -> Node: # find end of list container = self.uri while True: @@ -207,12 +218,13 @@ def _end(self): else: container = rest - def append(self, item): + def append(self, item: Node) -> Collection: """ + >>> from rdflib.term import Literal >>> from rdflib.graph import Graph - >>> listName = BNode() + >>> listname = BNode() >>> g = Graph() - >>> c = Collection(g,listName,[Literal(1),Literal(2)]) + >>> c = Collection(g,listname,[Literal(1),Literal(2)]) >>> links = [ ... list(g.subjects(object=i, predicate=RDF.first))[0] for i in c] >>> len([i for i in links if (i, RDF.rest, RDF.nil) in g]) @@ -231,8 +243,7 @@ def append(self, item): self.graph.add((end, RDF.rest, RDF.nil)) return self - def __iadd__(self, other): - + def __iadd__(self, other: Iterable[Node]): end = self._end() self.graph.remove((end, RDF.rest, None)) @@ -248,7 +259,7 @@ def __iadd__(self, other): return self def clear(self): - container = self.uri + container: Optional[Node] = self.uri graph = self.graph while container: rest = graph.value(container, RDF.rest) diff --git a/dependencies/rdflib/compare.py b/dependencies/rdflib/compare.py index 592836d6e..30f52d973 100644 --- a/dependencies/rdflib/compare.py +++ b/dependencies/rdflib/compare.py @@ -118,7 +118,7 @@ def _total_seconds(td): return result -class _runtime(object): +class _runtime: # noqa: N801 def __init__(self, label): self.label = label @@ -137,7 +137,7 @@ def wrapped_f(*args, **kwargs): return wrapped_f -class _call_count(object): +class _call_count: # noqa: N801 def __init__(self, label): self.label = label @@ -284,7 +284,7 @@ def copy(self): _HashT = Callable[[], "HASH"] -class _TripleCanonicalizer(object): +class _TripleCanonicalizer: def __init__(self, graph: Graph, hashfunc: _HashT = sha256): self.graph = graph @@ -619,7 +619,7 @@ def similar(g1: Graph, g2: Graph): def _squashed_graphs_triples(g1: Graph, g2: Graph): - for (t1, t2) in zip(sorted(_squash_graph(g1)), sorted(_squash_graph(g2))): + for t1, t2 in zip(sorted(_squash_graph(g1)), sorted(_squash_graph(g2))): yield t1, t2 diff --git a/dependencies/rdflib/compat.py b/dependencies/rdflib/compat.py index 52407812b..1cc4adacd 100644 --- a/dependencies/rdflib/compat.py +++ b/dependencies/rdflib/compat.py @@ -6,15 +6,7 @@ import codecs import re import warnings -from typing import TYPE_CHECKING, Match - -if TYPE_CHECKING: - import xml.etree.ElementTree as etree -else: - try: - from lxml import etree - except ImportError: - import xml.etree.ElementTree as etree +from typing import Match def cast_bytes(s, enc="utf-8"): @@ -105,10 +97,3 @@ def decodeUnicodeEscape(escaped: str) -> str: # Most of times, there are no backslashes in strings. return escaped return _turtle_escape_pattern.sub(_turtle_escape_subber, escaped) - - -# Migration to abc in Python 3.8 -try: - from collections.abc import Mapping, MutableMapping -except: - from collections import Mapping, MutableMapping diff --git a/dependencies/rdflib/container.py b/dependencies/rdflib/container.py index ef071fe73..56554df04 100644 --- a/dependencies/rdflib/container.py +++ b/dependencies/rdflib/container.py @@ -6,7 +6,7 @@ __all__ = ["Container", "Bag", "Seq", "Alt", "NoElementException"] -class Container(object): +class Container: """A class for constructing RDF containers, as per https://www.w3.org/TR/rdf11-mt/#rdf-containers Basic usage, creating a ``Bag`` and adding to it:: @@ -64,10 +64,8 @@ def __init__(self, graph, uri, seq=[], rtype="Bag"): self.graph.add((self.uri, RDF.type, RDF[self._rtype])) def n3(self): - items = [] for i in range(len(self)): - v = self[i + 1] items.append(v) @@ -92,11 +90,11 @@ def index(self, item): pred = self.graph.predicates(self.uri, item) if not pred: raise ValueError("%s is not in %s" % (item, "container")) - LI_INDEX = URIRef(str(RDF) + "_") + li_index = URIRef(str(RDF) + "_") i = None for p in pred: - i = int(p.replace(LI_INDEX, "")) + i = int(p.replace(li_index, "")) return i def __getitem__(self, key): @@ -163,7 +161,6 @@ def items(self): return l_ def end(self): # - # find end index (1-based) of container container = self.uri @@ -194,7 +191,6 @@ def append_multiple(self, other): container = self.uri for item in other: - end += 1 self._len += 1 elem_uri = str(RDF) + "_" + str(end) diff --git a/dependencies/rdflib/events.py b/dependencies/rdflib/events.py index e973c3082..84c9f07a0 100644 --- a/dependencies/rdflib/events.py +++ b/dependencies/rdflib/events.py @@ -1,3 +1,5 @@ +from __future__ import annotations + __doc__ = """ Dirt Simple Events @@ -23,10 +25,13 @@ """ + +from typing import Any, Dict, Optional + __all__ = ["Event", "Dispatcher"] -class Event(object): +class Event: """ An event is a container for attributes. The source of an event creates this object, or a subclass, gives it any kind of data that @@ -47,15 +52,15 @@ def __repr__(self): return "" % ([a for a in attrs],) -class Dispatcher(object): +class Dispatcher: """ An object that can dispatch events to a privately managed group of subscribers. """ - _dispatch_map = None + _dispatch_map: Optional[Dict[Any, Any]] = None - def set_map(self, amap): + def set_map(self, amap: Dict[Any, Any]): self._dispatch_map = amap return self diff --git a/dependencies/rdflib/exceptions.py b/dependencies/rdflib/exceptions.py index 2d71e6e2e..708756ef6 100644 --- a/dependencies/rdflib/exceptions.py +++ b/dependencies/rdflib/exceptions.py @@ -5,13 +5,17 @@ __all__ = [ "Error", "ParserError", + "UniquenessError", ] +from typing import Any, Optional + + class Error(Exception): """Base class for rdflib exceptions.""" - def __init__(self, msg=None): + def __init__(self, msg: Optional[str] = None): Exception.__init__(self, msg) self.msg = msg @@ -19,18 +23,18 @@ def __init__(self, msg=None): class ParserError(Error): """RDF Parser error.""" - def __init__(self, msg): + def __init__(self, msg: str): Error.__init__(self, msg) - self.msg = msg + self.msg: str = msg - def __str__(self): + def __str__(self) -> str: return self.msg class UniquenessError(Error): """A uniqueness assumption was made in the context, and that is not true""" - def __init__(self, values): + def __init__(self, values: Any): Error.__init__( self, "\ diff --git a/dependencies/rdflib/extras/describer.py b/dependencies/rdflib/extras/describer.py index 1f2ce79aa..023970555 100644 --- a/dependencies/rdflib/extras/describer.py +++ b/dependencies/rdflib/extras/describer.py @@ -6,7 +6,7 @@ semi-declarative manner. It has methods for creating literal values, rel and rev resource relations (somewhat resembling RDFa). -The `rel` and ``rev`` methods return a context manager which sets the current +The `Describer.rel` and `Describer.rev` methods return a context manager which sets the current about to the referenced resource for the context scope (for use with the ``with`` statement). @@ -20,7 +20,7 @@ >>> >>> CV = Namespace("http://purl.org/captsolo/resume-rdf/0.2/cv#") >>> - >>> class Person(object): + >>> class Person: ... def __init__(self): ... self.first_name = u"Some" ... self.last_name = u"Body" @@ -112,7 +112,7 @@ from rdflib.term import BNode, Identifier, Literal, URIRef -class Describer(object): +class Describer: def __init__(self, graph=None, about=None, base=None): if graph is None: graph = Graph() diff --git a/dependencies/rdflib/extras/external_graph_libs.py b/dependencies/rdflib/extras/external_graph_libs.py index 69d42b29f..f50994b5b 100644 --- a/dependencies/rdflib/extras/external_graph_libs.py +++ b/dependencies/rdflib/extras/external_graph_libs.py @@ -1,5 +1,6 @@ #!/usr/bin/env python # encoding: utf-8 +from __future__ import annotations """Convert (to and) from rdflib graphs to other well known graph libraries. @@ -13,6 +14,10 @@ """ import logging +from typing import TYPE_CHECKING, Any, Dict, List + +if TYPE_CHECKING: + from rdflib.graph import Graph logger = logging.getLogger(__name__) @@ -22,9 +27,9 @@ def _identity(x): def _rdflib_to_networkx_graph( - graph, + graph: Graph, nxgraph, - calc_weights, + calc_weights: bool, edge_attrs, transform_s=_identity, transform_o=_identity, @@ -70,7 +75,7 @@ def _rdflib_to_networkx_graph( def rdflib_to_networkx_multidigraph( - graph, edge_attrs=lambda s, p, o: {"key": p}, **kwds + graph: Graph, edge_attrs=lambda s, p, o: {"key": p}, **kwds ): """Converts the given graph into a networkx.MultiDiGraph. @@ -86,7 +91,7 @@ def rdflib_to_networkx_multidigraph( By default this will include setting the MultiDiGraph key=p here. If you don't want to be able to re-identify the edge later on, you - can set this to `lambda s, p, o: {}`. In this case MultiDiGraph's + can set this to ``lambda s, p, o: {}``. In this case MultiDiGraph's default (increasing ints) will be used. Returns: @@ -115,7 +120,7 @@ def rdflib_to_networkx_multidigraph( True >>> mdg.has_edge(a, b, key=1) True - """ + """ # noqa: W605 import networkx as nx mdg = nx.MultiDiGraph() @@ -124,8 +129,8 @@ def rdflib_to_networkx_multidigraph( def rdflib_to_networkx_digraph( - graph, - calc_weights=True, + graph: Graph, + calc_weights: bool = True, edge_attrs=lambda s, p, o: {"triples": [(s, p, o)]}, **kwds, ): @@ -138,9 +143,9 @@ def rdflib_to_networkx_digraph( :Parameters: - - `graph`: a rdflib.Graph. - - `calc_weights`: If true calculate multi-graph edge-count as edge 'weight' - - `edge_attrs`: Callable to construct later edge_attributes. It receives + - ``graph``: a rdflib.Graph. + - ``calc_weights``: If true calculate multi-graph edge-count as edge 'weight' + - ``edge_attrs``: Callable to construct later edge_attributes. It receives 3 variables (s, p, o) and should construct a dictionary that is passed to networkx's add_edge(s, o, \*\*attrs) function. @@ -148,7 +153,7 @@ def rdflib_to_networkx_digraph( which is treated specially by us to be merged. Other attributes of multi-edges will only contain the attributes of the first edge. If you don't want the 'triples' attribute for tracking, set this to - `lambda s, p, o: {}`. + ``lambda s, p, o: {}``. Returns: networkx.DiGraph @@ -178,7 +183,7 @@ def rdflib_to_networkx_digraph( >>> 'triples' in dg[a][b] False - """ + """ # noqa: W605 import networkx as nx dg = nx.DiGraph() @@ -187,8 +192,8 @@ def rdflib_to_networkx_digraph( def rdflib_to_networkx_graph( - graph, - calc_weights=True, + graph: Graph, + calc_weights: bool = True, edge_attrs=lambda s, p, o: {"triples": [(s, p, o)]}, **kwds, ): @@ -211,7 +216,7 @@ def rdflib_to_networkx_graph( which is treated specially by us to be merged. Other attributes of multi-edges will only contain the attributes of the first edge. If you don't want the 'triples' attribute for tracking, set this to - `lambda s, p, o: {}`. + ``lambda s, p, o: {}``. Returns: networkx.Graph @@ -241,7 +246,7 @@ def rdflib_to_networkx_graph( False >>> 'triples' in ug[a][b] False - """ + """ # noqa: W605 import networkx as nx g = nx.Graph() @@ -250,9 +255,9 @@ def rdflib_to_networkx_graph( def rdflib_to_graphtool( - graph, - v_prop_names=[str("term")], - e_prop_names=[str("term")], + graph: Graph, + v_prop_names: List[str] = [str("term")], + e_prop_names: List[str] = [str("term")], transform_s=lambda s, p, o: {str("term"): s}, transform_p=lambda s, p, o: {str("term"): p}, transform_o=lambda s, p, o: {str("term"): o}, @@ -313,7 +318,8 @@ def rdflib_to_graphtool( True """ - import graph_tool as gt + # pytype error: Can't find module 'graph_tool'. + import graph_tool as gt # pytype: disable=import-error g = gt.Graph() @@ -323,7 +329,7 @@ def rdflib_to_graphtool( eprops = [(epn, g.new_edge_property("object")) for epn in e_prop_names] for epn, eprop in eprops: g.edge_properties[epn] = eprop - node_to_vertex = {} + node_to_vertex: Dict[Any, Any] = {} for s, p, o in graph: sv = node_to_vertex.get(s) if sv is None: diff --git a/dependencies/rdflib/extras/infixowl.py b/dependencies/rdflib/extras/infixowl.py index 166186629..dadc6324e 100644 --- a/dependencies/rdflib/extras/infixowl.py +++ b/dependencies/rdflib/extras/infixowl.py @@ -1,8 +1,20 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- __doc__ = """RDFLib Python binding for OWL Abstract Syntax +OWL Constructor DL Syntax Manchester OWL Syntax Example +==================================================================================== +intersectionOf C ∩ D C AND D Human AND Male +unionOf C ∪ D C OR D Man OR Woman +complementOf ¬ C NOT C NOT Male +oneOf {a} ∪ {b}... {a b ...} {England Italy Spain} +someValuesFrom ∃ R C R SOME C hasColleague SOME Professor +allValuesFrom ∀ R C R ONLY C hasColleague ONLY Professor +minCardinality ≥ N R R MIN 3 hasColleague MIN 3 +maxCardinality ≤ N R R MAX 3 hasColleague MAX 3 +cardinality = N R R EXACTLY 3 hasColleague EXACTLY 3 +hasValue ∃ R {a} R VALUE a hasColleague VALUE Matthew + see: http://www.w3.org/TR/owl-semantics/syntax.html http://owl-workshop.man.ac.uk/acceptedLong/submission_9.pdf @@ -13,12 +25,9 @@ Uses Manchester Syntax for __repr__ ->>> exNs = Namespace('http://example.com/') ->>> namespace_manager = NamespaceManager(Graph()) ->>> namespace_manager.bind('ex', exNs, override=False) ->>> namespace_manager.bind('owl', OWL, override=False) +>>> exNs = Namespace("http://example.com/") >>> g = Graph() ->>> g.namespace_manager = namespace_manager +>>> g.bind("ex", exNs, override=False) Now we have an empty graph, we can construct OWL classes in it using the Python classes defined in this module @@ -40,8 +49,6 @@ This can also be used against already populated graphs: >>> owlGraph = Graph().parse(str(OWL)) ->>> namespace_manager.bind('owl', OWL, override=False) ->>> owlGraph.namespace_manager = namespace_manager >>> list(Class(OWL.Class, graph=owlGraph).subClassOf) [Class: rdfs:Class ] @@ -98,24 +105,23 @@ Restrictions can also be created using Manchester OWL syntax in 'colloquial' Python ->>> exNs.hasParent << some >> Class(exNs.Physician, graph=g) +>>> exNs.hasParent @ some @ Class(exNs.Physician, graph=g) ( ex:hasParent SOME ex:Physician ) ->>> Property(exNs.hasParent, graph=g) << max >> Literal(1) +>>> Property(exNs.hasParent, graph=g) @ max @ Literal(1) ( ex:hasParent MAX 1 ) ->>> print(g.serialize(format='pretty-xml')) #doctest: +SKIP +>>> print(g.serialize(format='pretty-xml')) # doctest: +SKIP """ import itertools import logging -from rdflib import RDF, RDFS, BNode, Literal, Namespace, URIRef, Variable from rdflib.collection import Collection from rdflib.graph import Graph -from rdflib.namespace import OWL, XSD, NamespaceManager -from rdflib.term import Identifier +from rdflib.namespace import OWL, RDF, RDFS, XSD, Namespace, NamespaceManager +from rdflib.term import BNode, Identifier, Literal, URIRef, Variable from rdflib.util import first logger = logging.getLogger(__name__) @@ -131,63 +137,55 @@ """ __all__ = [ - "nsBinds", "ACE_NS", - "CLASS_RELATIONS", - "some", - "only", - "max", - "min", - "exactly", - "value", - "PropertyAbstractSyntax", "AllClasses", "AllDifferent", "AllProperties", "AnnotatableTerms", "BooleanClass", + "CLASS_RELATIONS", "Callable", "CastClass", "Class", "ClassNamespaceFactory", - "classOrIdentifier", - "classOrTerm", "CommonNSBindings", "ComponentTerms", "DeepClassClear", "EnumeratedClass", - "generateQName", "GetIdentifiedClasses", "Individual", "Infix", "MalformedClass", - "manchesterSyntax", - "Ontology", + "MalformedClassError", "OWLRDFListProxy", + "Ontology", "Property", - "propertyOrIdentifier", + "PropertyAbstractSyntax", "Restriction", - "termDeletionDecorator", + "classOrIdentifier", + "classOrTerm", + "exactly", + "generateQName", + "manchesterSyntax", + "max", + "min", + "nsBinds", + "only", + "propertyOrIdentifier", + "some", + "value", ] # definition of an Infix operator class # this recipe also works in jython -# calling sequence for the infix is either: -# x |op| y -# or: -# x <> y +# calling sequence for the infix is: +# x @ op @ y class Infix: def __init__(self, function): self.function = function - def __ror__(self, other): - return Infix(lambda x, self=self, other=other: self.function(other, x)) - - def __or__(self, other): - return self.function(other) - def __rlshift__(self, other): return Infix(lambda x, self=self, other=other: self.function(other, x)) @@ -250,6 +248,8 @@ def manchesterSyntax( # noqa: N802 ): """ Core serialization + thing is a Class and is processed as a subject + store is an RDFLib Graph to be queried about thing """ assert thing is not None if boolean: @@ -315,8 +315,9 @@ def castToQName(x): # noqa: N802 OWL.minCardinality: "MIN", OWL.cardinality: "EQUALS", } - for s, p, o in store.triples_choices((thing, list(cardlookup.keys()), None)): + for _s, p, o in store.triples_choices((thing, list(cardlookup.keys()), None)): return "( %s %s %s )" % (propstring, cardlookup[p], o) + # is thing a complement of anything compl = list(store.objects(subject=thing, predicate=OWL.complementOf)) if compl: return "( NOT %s )" % (manchesterSyntax(compl[0], store)) @@ -337,7 +338,8 @@ def castToQName(x): # noqa: N802 except Exception: if isinstance(thing, BNode): return thing.n3() - return "<" + thing + ">" + # Expect the unexpected + return thing.identifier if not isinstance(thing, str) else thing label = first(Class(thing, graph=store).label) if label: return label @@ -351,14 +353,6 @@ def GetIdentifiedClasses(graph): # noqa: N802 yield Class(c) -def termDeletionDecorator(prop): # noqa: N802 - def someFunc(func): # noqa: N802 - func.property = prop - return func - - return someFunc - - class TermDeletionHelper: def __init__(self, prop): self.prop = prop @@ -370,9 +364,10 @@ def _remover(inst): return _remover -class Individual(object): +class Individual: """ - A typed individual + A typed individual, the base class of the InfixOWL classes. + """ factoryGraph = Graph() # noqa: N815 @@ -396,17 +391,46 @@ def __init__(self, identifier=None, graph=None): pass # pragma: no cover def clearInDegree(self): # noqa: N802 + """ + Remove references to this individual as an object in the + backing store. + """ self.graph.remove((None, None, self.identifier)) def clearOutDegree(self): # noqa: N802 + """ + Remove all statements to this individual as a subject in the + backing store. Note that this only removes the statements + themselves, not the blank node closure so there is a chance + that this will cause orphaned blank nodes to remain in the + graph. + """ self.graph.remove((self.identifier, None, None)) def delete(self): + """ + Delete the individual from the graph, clearing the in and + out degrees. + """ self.clearInDegree() self.clearOutDegree() def replace(self, other): - for s, p, o in self.graph.triples((None, None, self.identifier)): + """ + Replace the individual in the graph with the given other, + causing all triples that refer to it to be changed and then + delete the individual. + + >>> g = Graph() + >>> b = Individual(OWL.Restriction, g) + >>> b.type = RDFS.Resource + >>> len(list(b.type)) + 1 + >>> del b.type + >>> len(list(b.type)) + 0 + """ + for s, p, _o in self.graph.triples((None, None, self.identifier)): self.graph.add((s, p, classOrIdentifier(other))) self.delete() @@ -497,6 +521,75 @@ def _delete_sameAs(self): # noqa: N802 class AnnotatableTerms(Individual): """ Terms in an OWL ontology with rdfs:label and rdfs:comment + + + ## Interface with ATTEMPTO (http://attempto.ifi.uzh.ch/site) + + ### Verbalisation of OWL entity IRIS + + #### How are OWL entity IRIs verbalized? + + The OWL verbalizer maps OWL entity IRIs to ACE content words such + that + + - OWL individuals map to ACE proper names (PN) + - OWL classes map to ACE common nouns (CN) + - OWL properties map to ACE transitive verbs (TV) + + There are 6 morphological categories that determine the surface form + of an IRI: + + - singular form of a proper name (e.g. John) + - singular form of a common noun (e.g. man) + - plural form of a common noun (e.g. men) + - singular form of a transitive verb (e.g. mans) + - plural form of a transitive verb (e.g. man) + - past participle form a transitive verb (e.g. manned) + + The user has full control over the eventual surface forms of the IRIs + but has to choose them in terms of the above categories. + Furthermore, + + - the surface forms must be legal ACE content words (e.g. they + should not contain punctuation symbols); + - the mapping of IRIs to surface forms must be bidirectional + within the same word class, in order to be able to (if needed) + parse the verbalization back into OWL in a semantics preserving + way. + + ### Using the lexicon + + It is possible to specify the mapping of IRIs to surface forms using + the following annotation properties: + + .. code-block:: none + + http://attempto.ifi.uzh.ch/ace_lexicon#PN_sg + http://attempto.ifi.uzh.ch/ace_lexicon#CN_sg + http://attempto.ifi.uzh.ch/ace_lexicon#CN_pl + http://attempto.ifi.uzh.ch/ace_lexicon#TV_sg + http://attempto.ifi.uzh.ch/ace_lexicon#TV_pl + http://attempto.ifi.uzh.ch/ace_lexicon#TV_vbg + + For example, the following axioms state that if the IRI "#man" is used + as a plural common noun, then the wordform men must be used by the + verbalizer. If, however, it is used as a singular transitive verb, + then mans must be used. + + .. code-block:: none + + + + #man + men + + + + + #man + mans + + """ def __init__( @@ -653,16 +746,13 @@ def _del_imports(self): def AllClasses(graph): # noqa: N802 - prevclasses = set() - for c in graph.subjects(predicate=RDF.type, object=OWL.Class): - if c not in prevclasses: - prevclasses.add(c) - yield Class(c) + for c in set(graph.subjects(predicate=RDF.type, object=OWL.Class)): + yield Class(c) def AllProperties(graph): # noqa: N802 prevprops = set() - for s, p, o in graph.triples_choices( + for s, _p, o in graph.triples_choices( ( None, RDF.type, @@ -733,7 +823,7 @@ def ComponentTerms(cls): # noqa: N802 if OWL.Restriction in cls.type: try: cls = CastClass(cls, Individual.factoryGraph) - for s, p, inner_class_id in cls.factoryGraph.triples_choices( + for _s, _p, inner_class_id in cls.factoryGraph.triples_choices( (cls.identifier, [OWL.allValuesFrom, OWL.someValuesFrom], None) ): inner_class = Class(inner_class_id, skipOWLClassMembership=True) @@ -761,7 +851,7 @@ def ComponentTerms(cls): # noqa: N802 yield _c else: yield inner_class - for s, p, o in cls.factoryGraph.triples_choices( + for _s, _p, o in cls.factoryGraph.triples_choices( (classOrIdentifier(cls), CLASS_RELATIONS, None) ): if isinstance(o, BNode): @@ -776,26 +866,23 @@ def DeepClassClear(class_to_prune): # noqa: N802 Recursively clear the given class, continuing where any related class is an anonymous class - >>> EX = Namespace('http://example.com/') - >>> namespace_manager = NamespaceManager(Graph()) - >>> namespace_manager.bind('ex', EX, override=False) - >>> namespace_manager.bind('owl', OWL, override=False) + >>> EX = Namespace("http://example.com/") >>> g = Graph() - >>> g.namespace_manager = namespace_manager + >>> g.bind("ex", EX, override=False) >>> Individual.factoryGraph = g >>> classB = Class(EX.B) >>> classC = Class(EX.C) >>> classD = Class(EX.D) >>> classE = Class(EX.E) >>> classF = Class(EX.F) - >>> anonClass = EX.someProp << some >> classD + >>> anonClass = EX.someProp @ some @ classD >>> classF += anonClass >>> list(anonClass.subClassOf) [Class: ex:F ] >>> classA = classE | classF | anonClass >>> classB += classA >>> classA.equivalentClass = [Class()] - >>> classB.subClassOf = [EX.someProp << some >> classC] + >>> classB.subClassOf = [EX.someProp @ some @ classC] >>> classA ( ex:E OR ex:F OR ( ex:someProp SOME ex:D ) ) >>> DeepClassClear(classA) @@ -841,7 +928,16 @@ def deepClearIfBNode(_class): # noqa: N802 ) -class MalformedClass(Exception): +class MalformedClass(ValueError): + """ + .. deprecated:: TODO-NEXT-VERSION + This class will be removed in version ``7.0.0``. + """ + + pass + + +class MalformedClassError(MalformedClass): def __init__(self, msg): self.msg = msg @@ -854,7 +950,7 @@ def CastClass(c, graph=None): # noqa: N802 for kind in graph.objects(subject=classOrIdentifier(c), predicate=RDF.type): if kind == OWL.Restriction: kwargs = {"identifier": classOrIdentifier(c), "graph": graph} - for s, p, o in graph.triples((classOrIdentifier(c), None, None)): + for _s, p, o in graph.triples((classOrIdentifier(c), None, None)): if p != RDF.type: if p == OWL.onProperty: kwargs["onProperty"] = o @@ -865,10 +961,10 @@ def CastClass(c, graph=None): # noqa: N802 if not set( [str(i.split(str(OWL))[-1]) for i in Restriction.restrictionKinds] ).intersection(kwargs): - raise MalformedClass("Malformed owl:Restriction") + raise MalformedClassError("Malformed owl:Restriction") return Restriction(**kwargs) else: - for s, p, o in graph.triples_choices( + for _s, p, _o in graph.triples_choices( ( classOrIdentifier(c), [OWL.intersectionOf, OWL.unionOf, OWL.oneOf], @@ -1051,20 +1147,16 @@ def __and__(self, other): Construct an anonymous class description consisting of the intersection of this class and 'other' and return it - >>> exNs = Namespace('http://example.com/') - >>> namespace_manager = NamespaceManager(Graph()) - >>> namespace_manager.bind('ex', exNs, override=False) - >>> namespace_manager.bind('owl', OWL, override=False) - >>> g = Graph() - >>> g.namespace_manager = namespace_manager - Chaining 3 intersections + >>> exNs = Namespace("http://example.com/") + >>> g = Graph() + >>> g.bind("ex", exNs, override=False) >>> female = Class(exNs.Female, graph=g) >>> human = Class(exNs.Human, graph=g) >>> youngPerson = Class(exNs.YoungPerson, graph=g) >>> youngWoman = female & human & youngPerson - >>> youngWoman #doctest: +SKIP + >>> youngWoman # doctest: +SKIP ex:YoungPerson THAT ( ex:Female AND ex:Human ) >>> isinstance(youngWoman, BooleanClass) True @@ -1168,11 +1260,8 @@ def _get_parents(self): >>> from rdflib.util import first >>> exNs = Namespace('http://example.com/') - >>> namespace_manager = NamespaceManager(Graph()) - >>> namespace_manager.bind('ex', exNs, override=False) - >>> namespace_manager.bind('owl', OWL, override=False) >>> g = Graph() - >>> g.namespace_manager = namespace_manager + >>> g.bind("ex", exNs, override=False) >>> Individual.factoryGraph = g >>> brother = Class(exNs.Brother) >>> sister = Class(exNs.Sister) @@ -1217,11 +1306,11 @@ def isPrimitive(self): # noqa: N802 return False # sc = list(self.subClassOf) ec = list(self.equivalentClass) - for boolclass, p, rdf_list in self.graph.triples_choices( + for _boolclass, p, rdf_list in self.graph.triples_choices( (self.identifier, [OWL.intersectionOf, OWL.unionOf], None) ): ec.append(manchesterSyntax(rdf_list, self.graph, boolean=p)) - for e in ec: + for _e in ec: return False if self.complementOf: return False @@ -1243,7 +1332,7 @@ def __repr__(self, full=False, normalization=True): exprs = [] sc = list(self.subClassOf) ec = list(self.equivalentClass) - for boolclass, p, rdf_list in self.graph.triples_choices( + for _boolclass, p, rdf_list in self.graph.triples_choices( (self.identifier, [OWL.intersectionOf, OWL.unionOf], None) ): ec.append(manchesterSyntax(rdf_list, self.graph, boolean=p)) @@ -1320,7 +1409,7 @@ def __repr__(self, full=False, normalization=True): ) + klassdescr -class OWLRDFListProxy(object): +class OWLRDFListProxy: def __init__(self, rdf_list, members=None, graph=None): if graph: self.graph = graph @@ -1400,25 +1489,21 @@ class EnumeratedClass(OWLRDFListProxy, Class): axiom ::= 'EnumeratedClass(' classID ['Deprecated'] { annotation } { individualID } ')' - - >>> exNs = Namespace('http://example.com/') - >>> namespace_manager = NamespaceManager(Graph()) - >>> namespace_manager.bind('ex', exNs, override=False) - >>> namespace_manager.bind('owl', OWL, override=False) + >>> exNs = Namespace("http://example.com/") >>> g = Graph() - >>> g.namespace_manager = namespace_manager + >>> g.bind("ex", exNs, override=False) >>> Individual.factoryGraph = g >>> ogbujiBros = EnumeratedClass(exNs.ogbujicBros, ... members=[exNs.chime, ... exNs.uche, ... exNs.ejike]) - >>> ogbujiBros #doctest: +SKIP + >>> ogbujiBros # doctest: +SKIP { ex:chime ex:uche ex:ejike } >>> col = Collection(g, first( ... g.objects(predicate=OWL.oneOf, subject=ogbujiBros.identifier))) >>> sorted([g.qname(item) for item in col]) ['ex:chime', 'ex:ejike', 'ex:uche'] - >>> print(g.serialize(format='n3')) #doctest: +SKIP + >>> print(g.serialize(format='n3')) # doctest: +SKIP @prefix ex: . @prefix owl: . @prefix rdf: . @@ -1469,16 +1554,14 @@ class BooleanClassExtentHelper: >>> testGraph = Graph() >>> Individual.factoryGraph = testGraph >>> EX = Namespace("http://example.com/") - >>> namespace_manager = NamespaceManager(Graph()) - >>> namespace_manager.bind('ex', EX, override=False) - >>> testGraph.namespace_manager = namespace_manager + >>> testGraph.bind("ex", EX, override=False) >>> fire = Class(EX.Fire) >>> water = Class(EX.Water) >>> testClass = BooleanClass(members=[fire, water]) >>> testClass2 = BooleanClass( ... operator=OWL.unionOf, members=[fire, water]) >>> for c in BooleanClass.getIntersections(): - ... print(c) #doctest: +SKIP + ... print(c) # doctest: +SKIP ( ex:Fire AND ex:Water ) >>> for c in BooleanClass.getUnions(): ... print(c) #doctest: +SKIP @@ -1498,7 +1581,10 @@ def _getExtent(): # noqa: N802 class Callable: def __init__(self, anycallable): - self.__call__ = anycallable + self._callfn = anycallable + + def __call__(self, *args, **kwargs): + return self._callfn(*args, **kwargs) class BooleanClass(OWLRDFListProxy, Class): @@ -1528,7 +1614,7 @@ def __init__( ): if operator is None: props = [] - for s, p, o in graph.triples_choices( + for _s, p, _o in graph.triples_choices( (identifier, [OWL.intersectionOf, OWL.unionOf], None) ): props.append(p) @@ -1540,9 +1626,7 @@ def __init__( rdf_list = list(self.graph.objects(predicate=operator, subject=self.identifier)) assert ( not members or not rdf_list - ), "This is a previous boolean class description!" + repr( - Collection(self.graph, rdf_list[0]).n3() - ) + ), "This is a previous boolean class description." OWLRDFListProxy.__init__(self, rdf_list, members) def copy(self): @@ -1575,23 +1659,22 @@ def changeOperator(self, newOperator): # noqa: N802, N803 Converts a unionOf / intersectionOf class expression into one that instead uses the given operator - >>> testGraph = Graph() >>> Individual.factoryGraph = testGraph >>> EX = Namespace("http://example.com/") - >>> namespace_manager = NamespaceManager(Graph()) - >>> namespace_manager.bind('ex', EX, override=False) - >>> testGraph.namespace_manager = namespace_manager + >>> testGraph.bind("ex", EX, override=False) >>> fire = Class(EX.Fire) >>> water = Class(EX.Water) >>> testClass = BooleanClass(members=[fire,water]) - >>> testClass #doctest: +SKIP + >>> testClass ( ex:Fire AND ex:Water ) >>> testClass.changeOperator(OWL.unionOf) - >>> testClass #doctest: +SKIP + >>> testClass ( ex:Fire OR ex:Water ) - >>> try: testClass.changeOperator(OWL.unionOf) - ... except Exception as e: print(e) + >>> try: + ... testClass.changeOperator(OWL.unionOf) + ... except Exception as e: + ... print(e) # doctest: +SKIP The new operator is already being used! """ @@ -1604,7 +1687,11 @@ def __repr__(self): """ Returns the Manchester Syntax equivalent for this class """ - return manchesterSyntax(self._rdfList.uri, self.graph, boolean=self._operator) + return manchesterSyntax( + self._rdfList.uri if isinstance(self._rdfList, Collection) else BNode(), + self.graph, + boolean=self._operator, + ) def __or__(self, other): """ @@ -1617,6 +1704,8 @@ def __or__(self, other): def AllDifferent(members): # noqa: N802 """ + TODO: implement this function + DisjointClasses(' description description { description } ')' """ @@ -1638,6 +1727,7 @@ class Restriction(Class): OWL.allValuesFrom, OWL.someValuesFrom, OWL.hasValue, + OWL.cardinality, OWL.maxCardinality, OWL.minCardinality, ] @@ -1645,7 +1735,7 @@ class Restriction(Class): def __init__( self, onProperty, # noqa: N803 - graph=Graph(), + graph=None, allValuesFrom=None, someValuesFrom=None, value=None, @@ -1654,6 +1744,7 @@ def __init__( minCardinality=None, identifier=None, ): + graph = Graph() if graph is None else graph super(Restriction, self).__init__( identifier, graph=graph, skipOWLClassMembership=True ) @@ -1675,7 +1766,12 @@ def __init__( (minCardinality, OWL.minCardinality), ] valid_restr_props = [(i, oterm) for (i, oterm) in restr_types if i is not None] - assert len(valid_restr_props) + if not len(valid_restr_props): + raise ValueError( + "Missing value. One of: allValuesFrom, someValuesFrom," + "value, cardinality, maxCardinality or minCardinality" + "must have a value." + ) restriction_range, restriction_type = valid_restr_props.pop() self.restrictionType = restriction_type if isinstance(restriction_range, Identifier): @@ -1702,16 +1798,14 @@ def serialize(self, graph): >>> g1 = Graph() >>> g2 = Graph() >>> EX = Namespace("http://example.com/") - >>> namespace_manager = NamespaceManager(g1) - >>> namespace_manager.bind('ex', EX, override=False) - >>> namespace_manager = NamespaceManager(g2) - >>> namespace_manager.bind('ex', EX, override=False) + >>> g1.bind("ex", EX, override=False) + >>> g2.bind("ex", EX, override=False) >>> Individual.factoryGraph = g1 >>> prop = Property(EX.someProp, baseType=OWL.DatatypeProperty) >>> restr1 = (Property( ... EX.someProp, - ... baseType=OWL.DatatypeProperty)) << some >> (Class(EX.Foo)) - >>> restr1 #doctest: +SKIP + ... baseType=OWL.DatatypeProperty)) @ some @ (Class(EX.Foo)) + >>> restr1 # doctest: +SKIP ( ex:someProp SOME ex:Foo ) >>> restr1.serialize(g2) >>> Individual.factoryGraph = g2 @@ -1753,10 +1847,10 @@ def _get_onproperty(self): )[0] def _set_onproperty(self, prop): - triple = (self.identifier, OWL.onProperty, propertyOrIdentifier(prop)) if not prop: return - elif triple in self.graph: + triple = (self.identifier, OWL.onProperty, propertyOrIdentifier(prop)) + if triple in self.graph: return else: self.graph.set(triple) @@ -1777,10 +1871,10 @@ def _get_allvaluesfrom(self): return None def _set_allvaluesfrom(self, other): - triple = (self.identifier, OWL.allValuesFrom, classOrIdentifier(other)) if not other: return - elif triple in self.graph: + triple = (self.identifier, OWL.allValuesFrom, classOrIdentifier(other)) + if triple in self.graph: return else: self.graph.set(triple) @@ -1801,10 +1895,10 @@ def _get_somevaluesfrom(self): return None def _set_somevaluesfrom(self, other): - triple = (self.identifier, OWL.someValuesFrom, classOrIdentifier(other)) if not other: return - elif triple in self.graph: + triple = (self.identifier, OWL.someValuesFrom, classOrIdentifier(other)) + if triple in self.graph: return else: self.graph.set(triple) @@ -1823,10 +1917,10 @@ def _get_hasvalue(self): return None def _set_hasvalue(self, other): - triple = (self.identifier, OWL.hasValue, classOrIdentifier(other)) if not other: return - elif triple in self.graph: + triple = (self.identifier, OWL.hasValue, classOrIdentifier(other)) + if triple in self.graph: return else: self.graph.set(triple) @@ -1843,10 +1937,10 @@ def _get_cardinality(self): return None def _set_cardinality(self, other): - triple = (self.identifier, OWL.cardinality, classOrIdentifier(other)) if not other: return - elif triple in self.graph: + triple = (self.identifier, OWL.cardinality, classOrTerm(other)) + if triple in self.graph: return else: self.graph.set(triple) @@ -1865,10 +1959,10 @@ def _get_maxcardinality(self): return None def _set_maxcardinality(self, other): - triple = (self.identifier, OWL.maxCardinality, classOrIdentifier(other)) if not other: return - elif triple in self.graph: + triple = (self.identifier, OWL.maxCardinality, classOrTerm(other)) + if triple in self.graph: return else: self.graph.set(triple) @@ -1889,10 +1983,10 @@ def _get_mincardinality(self): return None def _set_mincardinality(self, other): - triple = (self.identifier, OWL.minCardinality, classOrIdentifier(other)) if not other: return - elif triple in self.graph: + triple = (self.identifier, OWL.minCardinality, classOrIdentifier(other)) + if triple in self.graph: return else: self.graph.set(triple) @@ -1906,11 +2000,11 @@ def _del_mincardinality(self): ) def restrictionKind(self): # noqa: N802 - for p in self.graph.triple_choices( + for s, p, o in self.graph.triples_choices( (self.identifier, self.restrictionKinds, None) ): - return p.split(OWL)[-1] - raise + return p.split(str(OWL))[-1] + return None def __repr__(self): """ @@ -1939,6 +2033,7 @@ def __repr__(self): ) value = Infix(lambda prop, _class: Restriction(prop, graph=prop.graph, value=_class)) +# Unused PropertyAbstractSyntax = """ %s( %s { %s } %s @@ -1964,6 +2059,23 @@ class Property(AnnotatableTerms): """ def setupVerbAnnotations(self, verb_annotations): # noqa: N802 + """ + + OWL properties map to ACE transitive verbs (TV) + + There are 6 morphological categories that determine the surface form + of an IRI: + + singular form of a transitive verb (e.g. mans) + plural form of a transitive verb (e.g. man) + past participle form a transitive verb (e.g. manned) + + http://attempto.ifi.uzh.ch/ace_lexicon#TV_sg + http://attempto.ifi.uzh.ch/ace_lexicon#TV_pl + http://attempto.ifi.uzh.ch/ace_lexicon#TV_vbg + + """ + if isinstance(verb_annotations, tuple): tv_sgprop, tv_plprop, tv_vbg = verb_annotations else: @@ -2057,7 +2169,7 @@ def __repr__(self): OWL.SymmetricProperty in self.type and " Symmetric" or "", ) ) - for s, p, roletype in self.graph.triples_choices( + for _s, _p, roletype in self.graph.triples_choices( ( self.identifier, RDF.type, @@ -2068,13 +2180,13 @@ def __repr__(self): ], ) ): - rt.append(str(roletype.split(OWL)[-1])) + rt.append(str(roletype.split(str(OWL))[-1])) else: rt.append( "DatatypeProperty( %s %s" % (self.qname, first(self.comment) and first(self.comment) or "") ) - for s, p, roletype in self.graph.triples( + for _s, _p, roletype in self.graph.triples( (self.identifier, RDF.type, OWL.FunctionalProperty) ): rt.append(" Functional") @@ -2199,19 +2311,20 @@ def _del_range(self): def replace(self, other): # extension = [] - for s, p, o in self.extent: + for s, _p, o in self.extent: self.graph.add((s, propertyOrIdentifier(other), o)) self.graph.remove((None, self.identifier, None)) -def CommonNSBindings(graph, additionalNS={}): # noqa: N802, N803 +def CommonNSBindings(graph, additionalNS=None): # noqa: N802, N803 """ Takes a graph and binds the common namespaces (rdf,rdfs, & owl) """ + additional_ns = {} if additionalNS is None else additionalNS namespace_manager = NamespaceManager(graph) namespace_manager.bind("rdfs", RDFS) namespace_manager.bind("rdf", RDF) namespace_manager.bind("owl", OWL) - for prefix, uri in list(additionalNS.items()): + for prefix, uri in list(additional_ns.items()): namespace_manager.bind(prefix, uri, override=False) graph.namespace_manager = namespace_manager diff --git a/dependencies/rdflib/graph.py b/dependencies/rdflib/graph.py index 6933b736a..4d8645b2f 100644 --- a/dependencies/rdflib/graph.py +++ b/dependencies/rdflib/graph.py @@ -1,18 +1,23 @@ +from __future__ import annotations + import logging -import os import pathlib import random -import shutil -import tempfile from io import BytesIO from typing import ( IO, TYPE_CHECKING, Any, BinaryIO, + Callable, + Dict, Generator, Iterable, + List, + Mapping, + NoReturn, Optional, + Set, TextIO, Tuple, Type, @@ -37,33 +42,78 @@ from rdflib.resource import Resource from rdflib.serializer import Serializer from rdflib.store import Store -from rdflib.term import BNode, Genid, IdentifiedNode, Literal, Node, RDFLibGenid, URIRef +from rdflib.term import ( + BNode, + Genid, + IdentifiedNode, + Identifier, + Literal, + Node, + RDFLibGenid, + URIRef, +) + +if TYPE_CHECKING: + import typing_extensions as te + + import rdflib.query + from rdflib.plugins.sparql.sparql import Query, Update _SubjectType = Node _PredicateType = Node _ObjectType = Node +_ContextIdentifierType = IdentifiedNode _TripleType = Tuple["_SubjectType", "_PredicateType", "_ObjectType"] -_QuadType = Tuple["_SubjectType", "_PredicateType", "_ObjectType", "Graph"] +_QuadType = Tuple["_SubjectType", "_PredicateType", "_ObjectType", "_ContextType"] _OptionalQuadType = Tuple[ - "_SubjectType", "_PredicateType", "_ObjectType", Optional["Graph"] + "_SubjectType", "_PredicateType", "_ObjectType", Optional["_ContextType"] ] +_TripleOrOptionalQuadType = Union["_TripleType", "_OptionalQuadType"] _OptionalIdentifiedQuadType = Tuple[ - "_SubjectType", "_PredicateType", "_ObjectType", Optional["Node"] + "_SubjectType", "_PredicateType", "_ObjectType", Optional["_ContextIdentifierType"] ] _TriplePatternType = Tuple[ Optional["_SubjectType"], Optional["_PredicateType"], Optional["_ObjectType"] ] +_TriplePathPatternType = Tuple[Optional["_SubjectType"], Path, Optional["_ObjectType"]] _QuadPatternType = Tuple[ Optional["_SubjectType"], Optional["_PredicateType"], Optional["_ObjectType"], - Optional["Graph"], + Optional["_ContextType"], +] +_QuadPathPatternType = Tuple[ + Optional["_SubjectType"], + Path, + Optional["_ObjectType"], + Optional["_ContextType"], ] +_TripleOrQuadPatternType = Union["_TriplePatternType", "_QuadPatternType"] +_TripleOrQuadPathPatternType = Union["_TriplePathPatternType", "_QuadPathPatternType"] +_TripleSelectorType = Tuple[ + Optional["_SubjectType"], + Optional[Union["Path", "_PredicateType"]], + Optional["_ObjectType"], +] +_QuadSelectorType = Tuple[ + Optional["_SubjectType"], + Optional[Union["Path", "_PredicateType"]], + Optional["_ObjectType"], + Optional["_ContextType"], +] +_TripleOrQuadSelectorType = Union["_TripleSelectorType", "_QuadSelectorType"] +_TriplePathType = Tuple["_SubjectType", Path, "_ObjectType"] +_TripleOrTriplePathType = Union["_TripleType", "_TriplePathType"] + _GraphT = TypeVar("_GraphT", bound="Graph") +_ConjunctiveGraphT = TypeVar("_ConjunctiveGraphT", bound="ConjunctiveGraph") +_DatasetT = TypeVar("_DatasetT", bound="Dataset") -assert Literal # avoid warning -assert Namespace # avoid warning +# type error: Function "Type[Literal]" could always be true in boolean contex +assert Literal # type: ignore[truthy-function] # avoid warning +# type error: Function "Type[Namespace]" could always be true in boolean context +assert Namespace # type: ignore[truthy-function] # avoid warning if TYPE_CHECKING: from rdflib._type_checking import _NamespaceSetString @@ -95,10 +145,10 @@ considered to be the boundary for closed world assumptions. This boundary is equivalent to that of the store instance (which is itself uniquely identified and distinct from other instances of -:class:`Store` that signify other Conjunctive Graphs). It is +:class:`~rdflib.store.Store` that signify other Conjunctive Graphs). It is equivalent to all the named graphs within it and associated with a -``_default_`` graph which is automatically assigned a :class:`BNode` -for an identifier - if one isn't given. +``_default_`` graph which is automatically assigned a +:class:`~rdflib.term.BNode` for an identifier - if one isn't given. see :class:`~rdflib.graph.ConjunctiveGraph` @@ -327,8 +377,34 @@ "UnSupportedAggregateOperation", "ReadOnlyGraphAggregate", "BatchAddGraph", + "_ConjunctiveGraphT", + "_ContextIdentifierType", + "_DatasetT", + "_GraphT", + "_ObjectType", + "_OptionalIdentifiedQuadType", + "_OptionalQuadType", + "_PredicateType", + "_QuadPathPatternType", + "_QuadPatternType", + "_QuadSelectorType", + "_QuadType", + "_SubjectType", + "_TripleOrOptionalQuadType", + "_TripleOrTriplePathType", + "_TripleOrQuadPathPatternType", + "_TripleOrQuadPatternType", + "_TripleOrQuadSelectorType", + "_TriplePathPatternType", + "_TriplePathType", + "_TriplePatternType", + "_TripleSelectorType", + "_TripleType", ] +# : Transitive closure arg type. +_TCArgT = TypeVar("_TCArgT") + class Graph(Node): """An RDF Graph @@ -356,16 +432,16 @@ class Graph(Node): def __init__( self, store: Union[Store, str] = "default", - identifier: Optional[Union[IdentifiedNode, str]] = None, + identifier: Optional[Union[_ContextIdentifierType, str]] = None, namespace_manager: Optional[NamespaceManager] = None, base: Optional[str] = None, - bind_namespaces: "_NamespaceSetString" = "core", + bind_namespaces: "_NamespaceSetString" = "rdflib", ): super(Graph, self).__init__() self.base = base - self.__identifier: Node + self.__identifier: _ContextIdentifierType self.__identifier = identifier or BNode() # type: ignore[assignment] - if not isinstance(self.__identifier, Node): + if not isinstance(self.__identifier, IdentifiedNode): self.__identifier = URIRef(self.__identifier) # type: ignore[unreachable] self.__store: Store if not isinstance(store, Store): @@ -384,7 +460,7 @@ def store(self) -> Store: return self.__store @property - def identifier(self) -> Node: + def identifier(self) -> "_ContextIdentifierType": return self.__identifier @property @@ -397,13 +473,13 @@ def namespace_manager(self) -> NamespaceManager: return self.__namespace_manager @namespace_manager.setter - def namespace_manager(self, nm: NamespaceManager): + def namespace_manager(self, nm: NamespaceManager) -> None: self.__namespace_manager = nm - def __repr__(self): + def __repr__(self) -> str: return "" % (self.identifier, type(self)) - def __str__(self): + def __str__(self) -> str: if isinstance(self.identifier, URIRef): return ( "%s a rdfg:Graph;rdflib:storage " + "[a rdflib:Store;rdfs:label '%s']." @@ -413,26 +489,26 @@ def __str__(self): "[a rdfg:Graph;rdflib:storage " + "[a rdflib:Store;rdfs:label '%s']]." ) % self.store.__class__.__name__ - def toPython(self): # noqa: N802 + def toPython(self: _GraphT) -> _GraphT: # noqa: N802 return self - def destroy(self, configuration): - """Destroy the store identified by `configuration` if supported""" + def destroy(self: _GraphT, configuration: str) -> _GraphT: + """Destroy the store identified by ``configuration`` if supported""" self.__store.destroy(configuration) return self # Transactional interfaces (optional) - def commit(self): + def commit(self: _GraphT) -> _GraphT: """Commits active transactions""" self.__store.commit() return self - def rollback(self): + def rollback(self: _GraphT) -> _GraphT: """Rollback active transactions""" self.__store.rollback() return self - def open(self, configuration, create=False): + def open(self, configuration: str, create: bool = False) -> Optional[int]: """Open the graph store Might be necessary for stores that require opening a connection to a @@ -440,7 +516,7 @@ def open(self, configuration, create=False): """ return self.__store.open(configuration, create) - def close(self, commit_pending_transaction=False): + def close(self, commit_pending_transaction: bool = False) -> None: """Close the graph store Might be necessary for stores that require closing a connection to a @@ -448,7 +524,7 @@ def close(self, commit_pending_transaction=False): """ return self.__store.close(commit_pending_transaction=commit_pending_transaction) - def add(self, triple: "_TripleType"): + def add(self: _GraphT, triple: "_TripleType") -> _GraphT: """Add a triple with self as context""" s, p, o = triple assert isinstance(s, Node), "Subject %s must be an rdflib term" % (s,) @@ -457,7 +533,7 @@ def add(self, triple: "_TripleType"): self.__store.add((s, p, o), self, quoted=False) return self - def addN(self, quads: Iterable["_QuadType"]): # noqa: N802 + def addN(self: _GraphT, quads: Iterable["_QuadType"]) -> _GraphT: # noqa: N802 """Add a sequence of triple with context""" self.__store.addN( @@ -469,7 +545,7 @@ def addN(self, quads: Iterable["_QuadType"]): # noqa: N802 ) return self - def remove(self, triple): + def remove(self: _GraphT, triple: "_TriplePatternType") -> _GraphT: """Remove a triple from the graph If the triple does not provide a context attribute, removes the triple @@ -488,33 +564,21 @@ def triples( @overload def triples( self, - triple: Tuple[Optional["_SubjectType"], Path, Optional["_ObjectType"]], - ) -> Generator[Tuple["_SubjectType", Path, "_ObjectType"], None, None]: + triple: "_TriplePathPatternType", + ) -> Generator["_TriplePathType", None, None]: ... @overload def triples( self, - triple: Tuple[ - Optional["_SubjectType"], - Union[None, Path, "_PredicateType"], - Optional["_ObjectType"], - ], - ) -> Generator[ - Tuple["_SubjectType", Union["_PredicateType", Path], "_ObjectType"], None, None - ]: + triple: "_TripleSelectorType", + ) -> Generator["_TripleOrTriplePathType", None, None]: ... def triples( self, - triple: Tuple[ - Optional["_SubjectType"], - Union[None, Path, "_PredicateType"], - Optional["_ObjectType"], - ], - ) -> Generator[ - Tuple["_SubjectType", Union["_PredicateType", Path], "_ObjectType"], None, None - ]: + triple: "_TripleSelectorType", + ) -> Generator["_TripleOrTriplePathType", None, None]: """Generator over the triple store Returns triples that match the given triple pattern. If triple pattern @@ -525,9 +589,7 @@ def triples( for _s, _o in p.eval(self, s, o): yield _s, p, _o else: - # type error: Argument 1 to "triples" of "Store" has incompatible type "Tuple[Optional[Node], Optional[Node], Optional[Node]]"; expected "Tuple[Optional[IdentifiedNode], Optional[IdentifiedNode], Optional[Node]]" - # NOTE on type error: This is because the store typing is too narrow, willbe fixed in subsequent PR. - for (_s, _p, _o), cg in self.__store.triples((s, p, o), context=self): # type: ignore [arg-type] + for (_s, _p, _o), cg in self.__store.triples((s, p, o), context=self): yield _s, _p, _o def __getitem__(self, item): @@ -573,7 +635,6 @@ def __getitem__(self, item): """ if isinstance(item, slice): - s, p, o = item.start, item.stop, item.step if s is None and p is None and o is None: return self.triples((s, p, o)) @@ -594,7 +655,6 @@ def __getitem__(self, item): return (s, p, o) in self elif isinstance(item, (Path, Node)): - return self.predicate_objects(item) else: @@ -602,28 +662,29 @@ def __getitem__(self, item): "You can only index a graph by a single rdflib term or path, or a slice of rdflib terms." ) - def __len__(self): + def __len__(self) -> int: """Returns the number of triples in the graph If context is specified then the number of triples in the context is returned instead. """ - return self.__store.__len__(context=self) + # type error: Unexpected keyword argument "context" for "__len__" of "Store" + return self.__store.__len__(context=self) # type: ignore[call-arg] def __iter__(self) -> Generator["_TripleType", None, None]: """Iterates over all triples in the store""" return self.triples((None, None, None)) - def __contains__(self, triple): + def __contains__(self, triple: _TripleSelectorType) -> bool: """Support for 'triple in graph' syntax""" for triple in self.triples(triple): return True return False - def __hash__(self): + def __hash__(self) -> int: return hash(self.identifier) - def __cmp__(self, other): + def __cmp__(self, other) -> int: if other is None: return -1 elif isinstance(other, Graph): @@ -636,23 +697,23 @@ def __cmp__(self, other): # equivalent to None (if compared to it)? return 1 - def __eq__(self, other): + def __eq__(self, other) -> bool: return isinstance(other, Graph) and self.identifier == other.identifier - def __lt__(self, other): + def __lt__(self, other) -> bool: return (other is None) or ( isinstance(other, Graph) and self.identifier < other.identifier ) - def __le__(self, other): + def __le__(self, other: Graph) -> bool: return self < other or self == other - def __gt__(self, other): + def __gt__(self, other) -> bool: return (isinstance(other, Graph) and self.identifier > other.identifier) or ( other is not None ) - def __ge__(self, other): + def __ge__(self, other: Graph) -> bool: return self > other or self == other def __iadd__(self: "_GraphT", other: Iterable["_TripleType"]) -> "_GraphT": @@ -675,7 +736,7 @@ def __add__(self, other: "Graph") -> "Graph": retval = type(self)() except TypeError: retval = Graph() - for (prefix, uri) in set(list(self.namespaces()) + list(other.namespaces())): + for prefix, uri in set(list(self.namespaces()) + list(other.namespaces())): retval.bind(prefix, uri) for x in self: retval.add(x) @@ -707,7 +768,7 @@ def __sub__(self, other: "Graph") -> "Graph": retval.add(x) return retval - def __xor__(self, other): + def __xor__(self, other: "Graph") -> "Graph": """Set-theoretic XOR. BNode IDs are not changed.""" return (self - other) + (other - self) @@ -717,7 +778,9 @@ def __xor__(self, other): # Conv. methods - def set(self, triple): + def set( + self: _GraphT, triple: Tuple[_SubjectType, _PredicateType, _ObjectType] + ) -> _GraphT: """Convenience method to update the value of object Remove any existing triples for subject and predicate before adding @@ -871,16 +934,75 @@ def predicate_objects( ) raise - def triples_choices(self, triple, context=None): + def triples_choices( + self, + triple: Union[ + Tuple[List["_SubjectType"], "_PredicateType", "_ObjectType"], + Tuple["_SubjectType", List["_PredicateType"], "_ObjectType"], + Tuple["_SubjectType", "_PredicateType", List["_ObjectType"]], + ], + context: Optional["_ContextType"] = None, + ) -> Generator[_TripleType, None, None]: subject, predicate, object_ = triple + # type error: Argument 1 to "triples_choices" of "Store" has incompatible type "Tuple[Union[List[Node], Node], Union[Node, List[Node]], Union[Node, List[Node]]]"; expected "Union[Tuple[List[Node], Node, Node], Tuple[Node, List[Node], Node], Tuple[Node, Node, List[Node]]]" + # type error note: unpacking discards type info for (s, p, o), cg in self.store.triples_choices( - (subject, predicate, object_), context=self + (subject, predicate, object_), context=self # type: ignore[arg-type] ): yield s, p, o + @overload def value( - self, subject=None, predicate=RDF.value, object=None, default=None, any=True - ): + self, + subject: None = ..., + predicate: None = ..., + object: Optional[_ObjectType] = ..., + default: Optional[Node] = ..., + any: bool = ..., + ) -> None: + ... + + @overload + def value( + self, + subject: Optional[_SubjectType] = ..., + predicate: None = ..., + object: None = ..., + default: Optional[Node] = ..., + any: bool = ..., + ) -> None: + ... + + @overload + def value( + self, + subject: None = ..., + predicate: Optional[_PredicateType] = ..., + object: None = ..., + default: Optional[Node] = ..., + any: bool = ..., + ) -> None: + ... + + @overload + def value( + self, + subject: Optional[_SubjectType] = ..., + predicate: Optional[_PredicateType] = ..., + object: Optional[_ObjectType] = ..., + default: Optional[Node] = ..., + any: bool = ..., + ) -> Optional[Node]: + ... + + def value( + self, + subject: Optional[_SubjectType] = None, + predicate: Optional[_PredicateType] = RDF.value, + object: Optional[_ObjectType] = None, + default: Optional[Node] = None, + any: bool = True, + ) -> Optional[Node]: """Get a value for a pair of two criteria Exactly one of subject, predicate, object must be None. Useful if one @@ -937,7 +1059,7 @@ def value( pass return retval - def items(self, list): + def items(self, list: Node) -> Generator[Node, None, None]: """Generator over all items in the resource specified by list list is an RDF collection. @@ -947,12 +1069,18 @@ def items(self, list): item = self.value(list, RDF.first) if item is not None: yield item - list = self.value(list, RDF.rest) + # type error: Incompatible types in assignment (expression has type "Optional[Node]", variable has type "Node") + list = self.value(list, RDF.rest) # type: ignore[assignment] if list in chain: raise ValueError("List contains a recursive rdf:rest reference") chain.add(list) - def transitiveClosure(self, func, arg, seen=None): # noqa: N802 + def transitiveClosure( # noqa: N802 + self, + func: Callable[[_TCArgT, "Graph"], Iterable[_TCArgT]], + arg: _TCArgT, + seen: Optional[Dict[_TCArgT, int]] = None, + ): # noqa: N802 """ Generates transitive closure of a user-defined function against the graph @@ -1009,7 +1137,12 @@ def transitiveClosure(self, func, arg, seen=None): # noqa: N802 for rt_2 in self.transitiveClosure(func, rt, seen): yield rt_2 - def transitive_objects(self, subject, predicate, remember=None): + def transitive_objects( + self, + subject: Optional[_SubjectType], + predicate: Optional[_PredicateType], + remember: Optional[Dict[Optional[_SubjectType], int]] = None, + ) -> Generator[Optional[_SubjectType], None, None]: """Transitively generate objects for the ``predicate`` relationship Generated objects belong to the depth first transitive closure of the @@ -1025,7 +1158,12 @@ def transitive_objects(self, subject, predicate, remember=None): for o in self.transitive_objects(object, predicate, remember): yield o - def transitive_subjects(self, predicate, object, remember=None): + def transitive_subjects( + self, + predicate: Optional[_PredicateType], + object: Optional[_ObjectType], + remember: Optional[Dict[Optional[_ObjectType], int]] = None, + ) -> Generator[Optional[_ObjectType], None, None]: """Transitively generate subjects for the ``predicate`` relationship Generated subjects belong to the depth first transitive closure of the @@ -1041,13 +1179,19 @@ def transitive_subjects(self, predicate, object, remember=None): for s in self.transitive_subjects(predicate, subject, remember): yield s - def qname(self, uri): + def qname(self, uri: str) -> str: return self.namespace_manager.qname(uri) - def compute_qname(self, uri, generate=True): + def compute_qname(self, uri: str, generate: bool = True) -> Tuple[str, URIRef, str]: return self.namespace_manager.compute_qname(uri, generate) - def bind(self, prefix, namespace, override=True, replace=False) -> None: + def bind( + self, + prefix: Optional[str], + namespace: Any, # noqa: F811 + override: bool = True, + replace: bool = False, + ) -> None: """Bind prefix to namespace If override is True will bind namespace to given prefix even @@ -1069,19 +1213,24 @@ def bind(self, prefix, namespace, override=True, replace=False) -> None: prefix, namespace, override=override, replace=replace ) - def namespaces(self): + def namespaces(self) -> Generator[Tuple[str, URIRef], None, None]: """Generator over all the prefix, namespace tuples""" - for prefix, namespace in self.namespace_manager.namespaces(): + for prefix, namespace in self.namespace_manager.namespaces(): # noqa: F402 yield prefix, namespace - def absolutize(self, uri, defrag=1): + def absolutize(self, uri: str, defrag: int = 1) -> URIRef: """Turn uri into an absolute URI if it's not one already""" return self.namespace_manager.absolutize(uri, defrag) # no destination and non-None positional encoding @overload def serialize( - self, destination: None, format: str, base: Optional[str], encoding: str, **args + self, + destination: None, + format: str, + base: Optional[str], + encoding: str, + **args: Any, ) -> bytes: ... @@ -1094,7 +1243,7 @@ def serialize( base: Optional[str] = ..., *, encoding: str, - **args, + **args: Any, ) -> bytes: ... @@ -1106,7 +1255,7 @@ def serialize( format: str = ..., base: Optional[str] = ..., encoding: None = ..., - **args, + **args: Any, ) -> str: ... @@ -1118,7 +1267,7 @@ def serialize( format: str = ..., base: Optional[str] = ..., encoding: Optional[str] = ..., - **args, + **args: Any, ) -> "Graph": ... @@ -1130,18 +1279,18 @@ def serialize( format: str = ..., base: Optional[str] = ..., encoding: Optional[str] = ..., - **args, + **args: Any, ) -> Union[bytes, str, "Graph"]: ... def serialize( - self, + self: _GraphT, destination: Optional[Union[str, pathlib.PurePath, IO[bytes]]] = None, format: str = "turtle", base: Optional[str] = None, encoding: Optional[str] = None, **args: Any, - ) -> Union[bytes, str, "Graph"]: + ) -> Union[bytes, str, _GraphT]: """ Serialize the graph. @@ -1193,27 +1342,28 @@ def serialize( serializer.serialize(stream, base=base, encoding=encoding, **args) else: if isinstance(destination, pathlib.PurePath): - location = str(destination) + os_path = str(destination) else: location = cast(str, destination) - scheme, netloc, path, params, _query, fragment = urlparse(location) - if netloc != "": - raise ValueError( - f"destination {destination} is not a local file reference" - ) - fd, name = tempfile.mkstemp() - stream = os.fdopen(fd, "wb") - serializer.serialize(stream, base=base, encoding=encoding, **args) - stream.close() - dest = url2pathname(path) if scheme == "file" else location - if hasattr(shutil, "move"): - shutil.move(name, dest) - else: - shutil.copy(name, dest) - os.remove(name) + scheme, netloc, path, params, _query, fragment = urlparse(location) + if scheme == "file": + if netloc != "": + raise ValueError( + f"the file URI {location!r} has an authority component which is not supported" + ) + os_path = url2pathname(path) + else: + os_path = location + with open(os_path, "wb") as stream: + serializer.serialize(stream, encoding=encoding, **args) return self - def print(self, format="turtle", encoding="utf-8", out=None): + def print( + self, + format: str = "turtle", + encoding: str = "utf-8", + out: Optional[TextIO] = None, + ) -> None: print( self.serialize(None, format=format, encoding=encoding).decode(encoding), file=out, @@ -1230,34 +1380,46 @@ def parse( location: Optional[str] = None, file: Optional[Union[BinaryIO, TextIO]] = None, data: Optional[Union[str, bytes]] = None, - **args, - ): + **args: Any, + ) -> "Graph": """ Parse an RDF source adding the resulting triples to the Graph. - The source is specified using one of source, location, file or - data. + The source is specified using one of source, location, file or data. - :Parameters: + .. caution:: - - `source`: An InputSource, file-like object, or string. In the case - of a string the string is the location of the source. - - `location`: A string indicating the relative or absolute URL of the - source. Graph's absolutize method is used if a relative location - is specified. - - `file`: A file-like object. - - `data`: A string containing the data to be parsed. - - `format`: Used if format can not be determined from source, e.g. file - extension or Media Type. Defaults to text/turtle. Format support can - be extended with plugins, but "xml", "n3" (use for turtle), "nt" & - "trix" are built in. - - `publicID`: the logical URI to use as the document base. If None - specified the document location is used (at least in the case where - there is a document location). + This method can access directly or indirectly requested network or + file resources, for example, when parsing JSON-LD documents with + ``@context`` directives that point to a network location. - :Returns: + When processing untrusted or potentially malicious documents, + measures should be taken to restrict network and file access. + + For information on available security measures, see the RDFLib + :doc:`Security Considerations ` + documentation. - - self, the graph instance. + :param source: An `InputSource`, file-like object, `Path` like object, + or string. In the case of a string the string is the location of the + source. + :param location: A string indicating the relative or absolute URL of the + source. `Graph`'s absolutize method is used if a relative location + is specified. + :param file: A file-like object. + :param data: A string containing the data to be parsed. + :param format: Used if format can not be determined from source, e.g. + file extension or Media Type. Defaults to text/turtle. Format + support can be extended with plugins, but "xml", "n3" (use for + turtle), "nt" & "trix" are built in. + :param publicID: the logical URI to use as the document base. If None + specified the document location is used (at least in the case where + there is a document location). This is used as the base URI when + resolving relative URIs in the source document, as defined in `IETF + RFC 3986 + `_, + given the source document does not define a base URI. + :return: ``self``, i.e. the :class:`~rdflib.graph.Graph` instance. Examples: @@ -1272,7 +1434,7 @@ def parse( ... ... ... ''' - >>> import tempfile + >>> import os, tempfile >>> fd, file_name = tempfile.mkstemp() >>> f = os.fdopen(fd, "w") >>> dummy = f.write(my_data) # Returns num bytes written @@ -1317,10 +1479,10 @@ def parse( if format is None: if ( hasattr(source, "file") - and getattr(source.file, "name", None) # type: ignore[attr-defined] - and isinstance(source.file.name, str) # type: ignore[attr-defined] + and getattr(source.file, "name", None) + and isinstance(source.file.name, str) ): - format = rdflib.util.guess_format(source.file.name) # type: ignore[attr-defined] + format = rdflib.util.guess_format(source.file.name) if format is None: format = "turtle" could_not_guess_format = True @@ -1344,23 +1506,36 @@ def parse( def query( self, - query_object, + query_object: Union[str, Query], processor: Union[str, query.Processor] = "sparql", result: Union[str, Type[query.Result]] = "sparql", - initNs=None, # noqa: N803 - initBindings=None, + initNs: Optional[Mapping[str, Any]] = None, # noqa: N803 + initBindings: Optional[Mapping[str, Identifier]] = None, use_store_provided: bool = True, - **kwargs, + **kwargs: Any, ) -> query.Result: """ Query this graph. - A type of 'prepared queries' can be realised by providing - initial variable bindings with initBindings + A type of 'prepared queries' can be realised by providing initial + variable bindings with initBindings + + Initial namespaces are used to resolve prefixes used in the query, if + none are given, the namespaces from the graph's namespace manager are + used. + + .. caution:: + + This method can access indirectly requested network endpoints, for + example, query processing will attempt to access network endpoints + specified in ``SERVICE`` directives. - Initial namespaces are used to resolve prefixes used in the query, - if none are given, the namespaces from the graph's namespace manager - are used. + When processing untrusted or potentially malicious queries, measures + should be taken to restrict network and file access. + + For information on available security measures, see the RDFLib + :doc:`Security Considerations ` + documentation. :returntype: :class:`~rdflib.query.Result` @@ -1386,18 +1561,34 @@ def query( if not isinstance(processor, query.Processor): processor = plugin.get(processor, query.Processor)(self) - return result(processor.query(query_object, initBindings, initNs, **kwargs)) + # type error: Argument 1 to "Result" has incompatible type "Mapping[str, Any]"; expected "str" + return result(processor.query(query_object, initBindings, initNs, **kwargs)) # type: ignore[arg-type] def update( self, - update_object, - processor="sparql", - initNs=None, # noqa: N803 - initBindings=None, - use_store_provided=True, - **kwargs, - ): - """Update this graph with the given update query.""" + update_object: Union[Update, str], + processor: Union[str, rdflib.query.UpdateProcessor] = "sparql", + initNs: Optional[Mapping[str, Any]] = None, # noqa: N803 + initBindings: Optional[Mapping[str, Identifier]] = None, + use_store_provided: bool = True, + **kwargs: Any, + ) -> None: + """ + Update this graph with the given update query. + + .. caution:: + + This method can access indirectly requested network endpoints, for + example, query processing will attempt to access network endpoints + specified in ``SERVICE`` directives. + + When processing untrusted or potentially malicious queries, measures + should be taken to restrict network and file access. + + For information on available security measures, see the RDFLib + :doc:`Security Considerations ` + documentation. + """ initBindings = initBindings or {} # noqa: N806 initNs = initNs or dict(self.namespaces()) # noqa: N806 @@ -1418,11 +1609,12 @@ def update( return processor.update(update_object, initBindings, initNs, **kwargs) - def n3(self): + def n3(self) -> str: """Return an n3 identifier for the Graph""" - return "[%s]" % self.identifier.n3() + # type error: "IdentifiedNode" has no attribute "n3" + return "[%s]" % self.identifier.n3() # type: ignore[attr-defined] - def __reduce__(self): + def __reduce__(self) -> Tuple[Type[Graph], Tuple[Store, _ContextIdentifierType]]: return ( Graph, ( @@ -1431,7 +1623,7 @@ def __reduce__(self): ), ) - def isomorphic(self, other): + def isomorphic(self, other: Graph) -> bool: """ does a very basic check if these graphs are the same If no BNodes are involved, this is accurate. @@ -1452,7 +1644,7 @@ def isomorphic(self, other): # TODO: very well could be a false positive at this point yet. return True - def connected(self): + def connected(self) -> bool: """Check if the Graph is connected The Graph is considered undirectional. @@ -1489,12 +1681,12 @@ def connected(self): else: return False - def all_nodes(self): + def all_nodes(self) -> Set[Node]: res = set(self.objects()) res.update(self.subjects()) return res - def collection(self, identifier): + def collection(self, identifier: _SubjectType) -> Collection: """Create a new ``Collection`` instance. Parameters: @@ -1514,7 +1706,7 @@ def collection(self, identifier): return Collection(self, identifier) - def resource(self, identifier): + def resource(self, identifier: Union[Node, str]) -> Resource: """Create a new ``Resource`` instance. Parameters: @@ -1535,20 +1727,32 @@ def resource(self, identifier): identifier = URIRef(identifier) return Resource(self, identifier) - def _process_skolem_tuples(self, target, func): + def _process_skolem_tuples( + self, target: Graph, func: Callable[[_TripleType], _TripleType] + ) -> None: for t in self.triples((None, None, None)): target.add(func(t)) - def skolemize(self, new_graph=None, bnode=None, authority=None, basepath=None): - def do_skolemize(bnode, t): + def skolemize( + self, + new_graph: Optional[Graph] = None, + bnode: Optional[BNode] = None, + authority: Optional[str] = None, + basepath: Optional[str] = None, + ) -> Graph: + def do_skolemize(bnode: BNode, t: _TripleType) -> _TripleType: (s, p, o) = t if s == bnode: + if TYPE_CHECKING: + assert isinstance(s, BNode) s = s.skolemize(authority=authority, basepath=basepath) if o == bnode: + if TYPE_CHECKING: + assert isinstance(o, BNode) o = o.skolemize(authority=authority, basepath=basepath) return s, p, o - def do_skolemize2(t): + def do_skolemize2(t: _TripleType) -> _TripleType: (s, p, o) = t if isinstance(s, BNode): s = s.skolemize(authority=authority, basepath=basepath) @@ -1561,31 +1765,42 @@ def do_skolemize2(t): if bnode is None: self._process_skolem_tuples(retval, do_skolemize2) elif isinstance(bnode, BNode): - self._process_skolem_tuples(retval, lambda t: do_skolemize(bnode, t)) + # type error: Argument 1 to "do_skolemize" has incompatible type "Optional[BNode]"; expected "BNode" + self._process_skolem_tuples(retval, lambda t: do_skolemize(bnode, t)) # type: ignore[arg-type] return retval - def de_skolemize(self, new_graph=None, uriref=None): - def do_de_skolemize(uriref, t): + def de_skolemize( + self, new_graph: Optional[Graph] = None, uriref: Optional[URIRef] = None + ) -> Graph: + def do_de_skolemize(uriref: URIRef, t: _TripleType) -> _TripleType: (s, p, o) = t if s == uriref: + if TYPE_CHECKING: + assert isinstance(s, URIRef) s = s.de_skolemize() if o == uriref: + if TYPE_CHECKING: + assert isinstance(o, URIRef) o = o.de_skolemize() return s, p, o - def do_de_skolemize2(t): + def do_de_skolemize2(t: _TripleType) -> _TripleType: (s, p, o) = t if RDFLibGenid._is_rdflib_skolem(s): - s = RDFLibGenid(s).de_skolemize() + # type error: Argument 1 to "RDFLibGenid" has incompatible type "Node"; expected "str" + s = RDFLibGenid(s).de_skolemize() # type: ignore[arg-type] elif Genid._is_external_skolem(s): - s = Genid(s).de_skolemize() + # type error: Argument 1 to "Genid" has incompatible type "Node"; expected "str" + s = Genid(s).de_skolemize() # type: ignore[arg-type] if RDFLibGenid._is_rdflib_skolem(o): - o = RDFLibGenid(o).de_skolemize() + # type error: Argument 1 to "RDFLibGenid" has incompatible type "Node"; expected "str" + o = RDFLibGenid(o).de_skolemize() # type: ignore[arg-type] elif Genid._is_external_skolem(o): - o = Genid(o).de_skolemize() + # type error: Argument 1 to "Genid" has incompatible type "Node"; expected "str" + o = Genid(o).de_skolemize() # type: ignore[arg-type] return s, p, o @@ -1594,11 +1809,14 @@ def do_de_skolemize2(t): if uriref is None: self._process_skolem_tuples(retval, do_de_skolemize2) elif isinstance(uriref, Genid): - self._process_skolem_tuples(retval, lambda t: do_de_skolemize(uriref, t)) + # type error: Argument 1 to "do_de_skolemize" has incompatible type "Optional[URIRef]"; expected "URIRef" + self._process_skolem_tuples(retval, lambda t: do_de_skolemize(uriref, t)) # type: ignore[arg-type] return retval - def cbd(self, resource): + def cbd( + self, resource: _SubjectType, *, target_graph: Optional[Graph] = None + ) -> Graph: """Retrieves the Concise Bounded Description of a Resource from a Graph Concise Bounded Description (CBD) is defined in [1] as: @@ -1624,12 +1842,16 @@ def cbd(self, resource): [1] https://www.w3.org/Submission/CBD/ :param resource: a URIRef object, of the Resource for queried for - :return: a Graph, subgraph of self + :param target_graph: Optionally, a graph to add the CBD to; otherwise, a new graph is created for the CBD + :return: a Graph, subgraph of self if no graph was provided otherwise the provided graph """ - subgraph = Graph() + if target_graph is None: + subgraph = Graph() + else: + subgraph = target_graph - def add_to_cbd(uri): + def add_to_cbd(uri: _SubjectType) -> None: for s, p, o in self.triples((uri, None, None)): subgraph.add((s, p, o)) # recurse 'down' through ll Blank Nodes @@ -1652,6 +1874,9 @@ def add_to_cbd(uri): return subgraph +_ContextType = Graph + + class ConjunctiveGraph(Graph): """A ConjunctiveGraph is an (unnamed) aggregation of all the named graphs in a store. @@ -1678,11 +1903,11 @@ def __init__( ) self.context_aware = True self.default_union = True # Conjunctive! - self.default_context = Graph( + self.default_context: _ContextType = Graph( store=self.store, identifier=identifier or BNode(), base=default_graph_base ) - def __str__(self): + def __str__(self) -> str: pattern = ( "[a rdflib:ConjunctiveGraph;rdflib:storage " "[a rdflib:Store;rdfs:label '%s']]" @@ -1692,22 +1917,17 @@ def __str__(self): @overload def _spoc( self, - triple_or_quad: Union[ - Tuple[ - Optional["_SubjectType"], - Optional["_PredicateType"], - Optional["_ObjectType"], - Optional[Any], - ], - "_TriplePatternType", - ], + triple_or_quad: "_QuadType", default: bool = False, - ) -> Tuple[ - Optional["_SubjectType"], - Optional["_PredicateType"], - Optional["_ObjectType"], - Optional[Graph], - ]: + ) -> "_QuadType": + ... + + @overload + def _spoc( + self, + triple_or_quad: Union["_TripleType", "_OptionalQuadType"], + default: bool = False, + ) -> "_OptionalQuadType": ... @overload @@ -1718,26 +1938,35 @@ def _spoc( ) -> Tuple[None, None, None, Optional[Graph]]: ... + @overload def _spoc( self, - triple_or_quad: Optional[ - Union[ - Tuple[ - Optional["_SubjectType"], - Optional["_PredicateType"], - Optional["_ObjectType"], - Optional[Any], - ], - "_TriplePatternType", - ] - ], + triple_or_quad: Optional[_TripleOrQuadPatternType], default: bool = False, - ) -> Tuple[ - Optional["_SubjectType"], - Optional["_PredicateType"], - Optional["_ObjectType"], - Optional[Graph], - ]: + ) -> "_QuadPatternType": + ... + + @overload + def _spoc( + self, + triple_or_quad: _TripleOrQuadSelectorType, + default: bool = False, + ) -> _QuadSelectorType: + ... + + @overload + def _spoc( + self, + triple_or_quad: Optional[_TripleOrQuadSelectorType], + default: bool = False, + ) -> _QuadSelectorType: + ... + + def _spoc( + self, + triple_or_quad: Optional[_TripleOrQuadSelectorType], + default: bool = False, + ) -> _QuadSelectorType: """ helper method for having methods that support either triples or quads @@ -1746,13 +1975,15 @@ def _spoc( return (None, None, None, self.default_context if default else None) if len(triple_or_quad) == 3: c = self.default_context if default else None + # type error: Too many values to unpack (3 expected, 4 provided) (s, p, o) = triple_or_quad # type: ignore[misc] elif len(triple_or_quad) == 4: + # type error: Need more than 3 values to unpack (4 expected) (s, p, o, c) = triple_or_quad # type: ignore[misc] c = self._graph(c) return s, p, o, c - def __contains__(self, triple_or_quad): + def __contains__(self, triple_or_quad: _TripleOrQuadSelectorType) -> bool: """Support for 'triple/quad in graph' syntax""" s, p, o, c = self._spoc(triple_or_quad) for t in self.triples((s, p, o), context=c): @@ -1760,12 +1991,9 @@ def __contains__(self, triple_or_quad): return False def add( - self, - triple_or_quad: Union[ - Tuple["_SubjectType", "_PredicateType", "_ObjectType", Optional[Any]], - "_TripleType", - ], - ) -> "ConjunctiveGraph": + self: _ConjunctiveGraphT, + triple_or_quad: _TripleOrOptionalQuadType, + ) -> _ConjunctiveGraphT: """ Add a triple or quad to the store. @@ -1781,14 +2009,16 @@ def add( return self @overload - def _graph(self, c: Union[Graph, Node, str]) -> Graph: + def _graph(self, c: Union[Graph, _ContextIdentifierType, str]) -> Graph: ... @overload def _graph(self, c: None) -> None: ... - def _graph(self, c: Optional[Union[Graph, Node, str]]) -> Optional[Graph]: + def _graph( + self, c: Optional[Union[Graph, _ContextIdentifierType, str]] + ) -> Optional[Graph]: if c is None: return None if not isinstance(c, Graph): @@ -1796,7 +2026,9 @@ def _graph(self, c: Optional[Union[Graph, Node, str]]) -> Optional[Graph]: else: return c - def addN(self, quads: Iterable["_QuadType"]): # noqa: N802 + def addN( # noqa: N802 + self: _ConjunctiveGraphT, quads: Iterable["_QuadType"] + ) -> _ConjunctiveGraphT: """Add a sequence of triples with context""" self.store.addN( @@ -1804,7 +2036,8 @@ def addN(self, quads: Iterable["_QuadType"]): # noqa: N802 ) return self - def remove(self, triple_or_quad): + # type error: Argument 1 of "remove" is incompatible with supertype "Graph"; supertype defines the argument type as "Tuple[Optional[Node], Optional[Node], Optional[Node]]" + def remove(self: _ConjunctiveGraphT, triple_or_quad: _TripleOrOptionalQuadType) -> _ConjunctiveGraphT: # type: ignore[override] """ Removes a triple or quads @@ -1818,7 +2051,35 @@ def remove(self, triple_or_quad): self.store.remove((s, p, o), context=c) return self - def triples(self, triple_or_quad, context=None): + @overload + def triples( + self, + triple_or_quad: "_TripleOrQuadPatternType", + context: Optional[_ContextType] = ..., + ) -> Generator["_TripleType", None, None]: + ... + + @overload + def triples( + self, + triple_or_quad: "_TripleOrQuadPathPatternType", + context: Optional[_ContextType] = ..., + ) -> Generator["_TriplePathType", None, None]: + ... + + @overload + def triples( + self, + triple_or_quad: _TripleOrQuadSelectorType, + context: Optional[_ContextType] = ..., + ) -> Generator["_TripleOrTriplePathType", None, None]: + ... + + def triples( + self, + triple_or_quad: _TripleOrQuadSelectorType, + context: Optional[_ContextType] = None, + ) -> Generator["_TripleOrTriplePathType", None, None]: """ Iterate over all the triples in the entire conjunctive graph @@ -1848,25 +2109,25 @@ def triples(self, triple_or_quad, context=None): yield s, p, o def quads( - self, - triple_or_quad: Union[ - "_TriplePatternType", - "_QuadPatternType", - None, - ] = None, + self, triple_or_quad: Optional[_TripleOrQuadPatternType] = None ) -> Generator[_OptionalQuadType, None, None]: """Iterate over all the quads in the entire conjunctive graph""" s, p, o, c = self._spoc(triple_or_quad) - # type error: Argument 1 to "triples" of "Store" has incompatible type "Tuple[Optional[Node], Optional[Node], Optional[Node]]"; expected "Tuple[Optional[IdentifiedNode], Optional[IdentifiedNode], Optional[Node]]" - # NOTE on type error: This is because the store typing is too narrow, willbe fixed in subsequent PR. - for (s, p, o), cg in self.store.triples((s, p, o), context=c): # type: ignore[arg-type] + for (s, p, o), cg in self.store.triples((s, p, o), context=c): for ctx in cg: - # type error: Incompatible types in "yield" (actual type "Tuple[Optional[Node], Optional[Node], Optional[Node], Any]", expected type "Tuple[Node, Node, Node, Optional[Graph]]") - yield s, p, o, ctx # type: ignore[misc] + yield s, p, o, ctx - def triples_choices(self, triple, context=None): + def triples_choices( + self, + triple: Union[ + Tuple[List["_SubjectType"], "_PredicateType", "_ObjectType"], + Tuple["_SubjectType", List["_PredicateType"], "_ObjectType"], + Tuple["_SubjectType", "_PredicateType", List["_ObjectType"]], + ], + context: Optional["_ContextType"] = None, + ) -> Generator[_TripleType, None, None]: """Iterate over all the triples in the entire conjunctive graph""" s, p, o = triple if context is None: @@ -1874,17 +2135,18 @@ def triples_choices(self, triple, context=None): context = self.default_context else: context = self._graph(context) - - for (s1, p1, o1), cg in self.store.triples_choices((s, p, o), context=context): + # type error: Argument 1 to "triples_choices" of "Store" has incompatible type "Tuple[Union[List[Node], Node], Union[Node, List[Node]], Union[Node, List[Node]]]"; expected "Union[Tuple[List[Node], Node, Node], Tuple[Node, List[Node], Node], Tuple[Node, Node, List[Node]]]" + # type error note: unpacking discards type info + for (s1, p1, o1), cg in self.store.triples_choices((s, p, o), context=context): # type: ignore[arg-type] yield s1, p1, o1 - def __len__(self): + def __len__(self) -> int: """Number of triples in the entire conjunctive graph""" return self.store.__len__() def contexts( self, triple: Optional["_TripleType"] = None - ) -> Generator[Graph, None, None]: + ) -> Generator["_ContextType", None, None]: """Iterate over all contexts in the graph If triple is specified, iterate over all contexts the triple is in. @@ -1896,15 +2158,16 @@ def contexts( # the weirdness - see #225 yield context else: - yield self.get_context(context) + # type error: Statement is unreachable + yield self.get_context(context) # type: ignore[unreachable] - def get_graph(self, identifier: Union[URIRef, BNode]) -> Union[Graph, None]: + def get_graph(self, identifier: "_ContextIdentifierType") -> Union[Graph, None]: """Returns the graph identified by given identifier""" return [x for x in self.contexts() if x.identifier == identifier][0] def get_context( self, - identifier: Optional[Union[Node, str]], + identifier: Optional[Union["_ContextIdentifierType", str]], quoted: bool = False, base: Optional[str] = None, ) -> Graph: @@ -1912,12 +2175,14 @@ def get_context( identifier must be a URIRef or BNode. """ - # TODO: FIXME - why is ConjunctiveGraph passed as namespace_manager? return Graph( - store=self.store, identifier=identifier, namespace_manager=self, base=base # type: ignore[arg-type] + store=self.store, + identifier=identifier, + namespace_manager=self.namespace_manager, + base=base, ) - def remove_context(self, context): + def remove_context(self, context: "_ContextType") -> None: """Removes the given context from the graph""" self.store.remove((None, None, None), context) @@ -1938,18 +2203,42 @@ def parse( location: Optional[str] = None, file: Optional[Union[BinaryIO, TextIO]] = None, data: Optional[Union[str, bytes]] = None, - **args, - ): + **args: Any, + ) -> "Graph": """ - Parse source adding the resulting triples to its own context - (sub graph of this graph). + Parse source adding the resulting triples to its own context (sub graph + of this graph). See :meth:`rdflib.graph.Graph.parse` for documentation on arguments. + If the source is in a format that does not support named graphs it's triples + will be added to the default graph (i.e. `Dataset.default_context`). + :Returns: - The graph into which the source was parsed. In the case of n3 - it returns the root context. + The graph into which the source was parsed. In the case of n3 it returns + the root context. + + .. caution:: + + This method can access directly or indirectly requested network or + file resources, for example, when parsing JSON-LD documents with + ``@context`` directives that point to a network location. + + When processing untrusted or potentially malicious documents, + measures should be taken to restrict network and file access. + + For information on available security measures, see the RDFLib + :doc:`Security Considerations ` + documentation. + + *Changed in 7.0*: The ``publicID`` argument is no longer used as the + identifier (i.e. name) of the default graph as was the case before + version 7.0. In the case of sources that do not support named graphs, + the ``publicID`` parameter will also not be used as the name for the + graph that the data is loaded into, and instead the triples from sources + that do not support named graphs will be loaded into the default graph + (i.e. `ConjunctionGraph.default_context`). """ source = create_input_source( @@ -1968,17 +2257,13 @@ def parse( # create_input_source will ensure that publicId is not None, though it # would be good if this guarantee was made more explicit i.e. by type # hint on InputSource (TODO/FIXME). - g_id: str = publicID and publicID or source.getPublicId() - if not isinstance(g_id, Node): - g_id = URIRef(g_id) - context = Graph(store=self.store, identifier=g_id) - context.remove((None, None, None)) # hmm ? + context = self.default_context context.parse(source, publicID=publicID, format=format, **args) # TODO: FIXME: This should not return context, but self. return context - def __reduce__(self): + def __reduce__(self) -> Tuple[Type[Graph], Tuple[Store, _ContextIdentifierType]]: return ConjunctiveGraph, (self.store, self.identifier) @@ -2108,7 +2393,12 @@ class Dataset(ConjunctiveGraph): .. versionadded:: 4.0 """ - def __init__(self, store="default", default_union=False, default_graph_base=None): + def __init__( + self, + store: Union[Store, str] = "default", + default_union: bool = False, + default_graph_base: Optional[str] = None, + ): super(Dataset, self).__init__(store=store, identifier=None) if not self.store.graph_aware: @@ -2121,22 +2411,32 @@ def __init__(self, store="default", default_union=False, default_graph_base=None self.default_union = default_union - def __str__(self): + def __str__(self) -> str: pattern = ( "[a rdflib:Dataset;rdflib:storage " "[a rdflib:Store;rdfs:label '%s']]" ) return pattern % self.store.__class__.__name__ - def __reduce__(self): + # type error: Return type "Tuple[Type[Dataset], Tuple[Store, bool]]" of "__reduce__" incompatible with return type "Tuple[Type[Graph], Tuple[Store, IdentifiedNode]]" in supertype "ConjunctiveGraph" + # type error: Return type "Tuple[Type[Dataset], Tuple[Store, bool]]" of "__reduce__" incompatible with return type "Tuple[Type[Graph], Tuple[Store, IdentifiedNode]]" in supertype "Graph" + def __reduce__(self) -> Tuple[Type[Dataset], Tuple[Store, bool]]: # type: ignore[override] return (type(self), (self.store, self.default_union)) - def __getstate__(self): + def __getstate__(self) -> Tuple[Store, _ContextIdentifierType, _ContextType, bool]: return self.store, self.identifier, self.default_context, self.default_union - def __setstate__(self, state): - self.store, self.identifier, self.default_context, self.default_union = state + def __setstate__( + self, state: Tuple[Store, _ContextIdentifierType, _ContextType, bool] + ) -> None: + # type error: Property "store" defined in "Graph" is read-only + # type error: Property "identifier" defined in "Graph" is read-only + self.store, self.identifier, self.default_context, self.default_union = state # type: ignore[misc] - def graph(self, identifier=None, base=None): + def graph( + self, + identifier: Optional[Union[_ContextIdentifierType, _ContextType, str]] = None, + base: Optional[str] = None, + ) -> Graph: if identifier is None: from rdflib.term import _SKOLEM_DEFAULT_AUTHORITY, rdflib_skolem_genid @@ -2155,25 +2455,63 @@ def graph(self, identifier=None, base=None): def parse( self, - source=None, - publicID=None, # noqa: N803 - format=None, - location=None, - file=None, - data=None, - **args, - ): + source: Optional[ + Union[IO[bytes], TextIO, InputSource, str, bytes, pathlib.PurePath] + ] = None, + publicID: Optional[str] = None, # noqa: N803 + format: Optional[str] = None, + location: Optional[str] = None, + file: Optional[Union[BinaryIO, TextIO]] = None, + data: Optional[Union[str, bytes]] = None, + **args: Any, + ) -> "Graph": + """ + Parse an RDF source adding the resulting triples to the Graph. + + See :meth:`rdflib.graph.Graph.parse` for documentation on arguments. + + The source is specified using one of source, location, file or data. + + If the source is in a format that does not support named graphs it's triples + will be added to the default graph (i.e. `Dataset.default_context`). + + .. caution:: + + This method can access directly or indirectly requested network or + file resources, for example, when parsing JSON-LD documents with + ``@context`` directives that point to a network location. + + When processing untrusted or potentially malicious documents, + measures should be taken to restrict network and file access. + + For information on available security measures, see the RDFLib + :doc:`Security Considerations ` + documentation. + + *Changed in 7.0*: The ``publicID`` argument is no longer used as the + identifier (i.e. name) of the default graph as was the case before + version 7.0. In the case of sources that do not support named graphs, + the ``publicID`` parameter will also not be used as the name for the + graph that the data is loaded into, and instead the triples from sources + that do not support named graphs will be loaded into the default graph + (i.e. `ConjunctionGraph.default_context`). + """ + c = ConjunctiveGraph.parse( self, source, publicID, format, location, file, data, **args ) self.graph(c) return c - def add_graph(self, g): + def add_graph( + self, g: Optional[Union[_ContextIdentifierType, _ContextType, str]] + ) -> Graph: """alias of graph for consistency""" return self.graph(g) - def remove_graph(self, g): + def remove_graph( + self: _DatasetT, g: Optional[Union[_ContextIdentifierType, _ContextType, str]] + ) -> _DatasetT: if not isinstance(g, Graph): g = self.get_context(g) @@ -2184,7 +2522,9 @@ def remove_graph(self, g): self.store.add_graph(self.default_context) return self - def contexts(self, triple=None): + def contexts( + self, triple: Optional["_TripleType"] = None + ) -> Generator["_ContextType", None, None]: default = False for c in super(Dataset, self).contexts(triple): default |= c.identifier == DATASET_DEFAULT_GRAPH_ID @@ -2196,12 +2536,7 @@ def contexts(self, triple=None): # type error: Return type "Generator[Tuple[Node, Node, Node, Optional[Node]], None, None]" of "quads" incompatible with return type "Generator[Tuple[Node, Node, Node, Optional[Graph]], None, None]" in supertype "ConjunctiveGraph" def quads( # type: ignore[override] - self, - quad: Union[ - "_TriplePatternType", - "_QuadPatternType", - None, - ] = None, + self, quad: Optional["_TripleOrQuadPatternType"] = None ) -> Generator[_OptionalIdentifiedQuadType, None, None]: for s, p, o, c in super(Dataset, self).quads(quad): # type error: Item "None" of "Optional[Graph]" has no attribute "identifier" @@ -2227,10 +2562,14 @@ class QuotedGraph(Graph): such as implication and other such processing. """ - def __init__(self, store, identifier): + def __init__( + self, + store: Union[Store, str], + identifier: Optional[Union[_ContextIdentifierType, str]], + ): super(QuotedGraph, self).__init__(store, identifier) - def add(self, triple: "_TripleType"): + def add(self: _GraphT, triple: "_TripleType") -> _GraphT: """Add a triple with self as context""" s, p, o = triple assert isinstance(s, Node), "Subject %s must be an rdflib term" % (s,) @@ -2240,7 +2579,7 @@ def add(self, triple: "_TripleType"): self.store.add((s, p, o), self, quoted=True) return self - def addN(self, quads: Iterable["_QuadType"]) -> "QuotedGraph": # noqa: N802 + def addN(self: _GraphT, quads: Iterable["_QuadType"]) -> _GraphT: # noqa: N802 """Add a sequence of triple with context""" self.store.addN( @@ -2252,12 +2591,14 @@ def addN(self, quads: Iterable["_QuadType"]) -> "QuotedGraph": # noqa: N802 ) return self - def n3(self): + def n3(self) -> str: """Return an n3 identifier for the Graph""" - return "{%s}" % self.identifier.n3() + # type error: "IdentifiedNode" has no attribute "n3" + return "{%s}" % self.identifier.n3() # type: ignore[attr-defined] - def __str__(self): - identifier = self.identifier.n3() + def __str__(self) -> str: + # type error: "IdentifiedNode" has no attribute "n3" + identifier = self.identifier.n3() # type: ignore[attr-defined] label = self.store.__class__.__name__ pattern = ( "{this rdflib.identifier %s;rdflib:storage " @@ -2265,7 +2606,7 @@ def __str__(self): ) return pattern % (identifier, label) - def __reduce__(self): + def __reduce__(self) -> Tuple[Type[Graph], Tuple[Store, _ContextIdentifierType]]: return QuotedGraph, (self.store, self.identifier) @@ -2276,7 +2617,7 @@ def __reduce__(self): rdflib.term._ORDERING[QuotedGraph] = 11 -class Seq(object): +class Seq: """Wrapper around an RDF Seq resource It implements a container type in Python with the order of the items @@ -2285,7 +2626,7 @@ class Seq(object): 'implementation' of a sequence in RDF terms. """ - def __init__(self, graph, subject): + def __init__(self, graph: Graph, subject: _SubjectType): """Parameters: - graph: @@ -2297,40 +2638,43 @@ def __init__(self, graph, subject): creates this instance! """ + self._list: List[Tuple[int, _ObjectType]] _list = self._list = list() LI_INDEX = URIRef(str(RDF) + "_") # noqa: N806 - for (p, o) in graph.predicate_objects(subject): - if p.startswith(LI_INDEX): # != RDF.Seq: # - i = int(p.replace(LI_INDEX, "")) + for p, o in graph.predicate_objects(subject): + # type error: "Node" has no attribute "startswith" + if p.startswith(LI_INDEX): # type: ignore[attr-defined] # != RDF.Seq: + # type error: "Node" has no attribute "replace" + i = int(p.replace(LI_INDEX, "")) # type: ignore[attr-defined] _list.append((i, o)) # here is the trick: the predicates are _1, _2, _3, etc. Ie, # by sorting the keys (by integer) we have what we want! _list.sort() - def toPython(self): # noqa: N802 + def toPython(self) -> "Seq": # noqa: N802 return self - def __iter__(self): + def __iter__(self) -> Generator[_ObjectType, None, None]: """Generator over the items in the Seq""" for _, item in self._list: yield item - def __len__(self): + def __len__(self) -> int: """Length of the Seq""" return len(self._list) - def __getitem__(self, index): + def __getitem__(self, index) -> _ObjectType: """Item given by index from the Seq""" index, item = self._list.__getitem__(index) return item class ModificationException(Exception): - def __init__(self): + def __init__(self) -> None: pass - def __str__(self): + def __str__(self) -> str: return ( "Modifications and transactional operations not allowed on " "ReadOnlyGraphAggregate instances" @@ -2338,10 +2682,10 @@ def __str__(self): class UnSupportedAggregateOperation(Exception): - def __init__(self): + def __init__(self) -> None: pass - def __str__(self): + def __str__(self) -> str: return "This operation is not supported by ReadOnlyGraphAggregate " "instances" @@ -2352,7 +2696,7 @@ class ReadOnlyGraphAggregate(ConjunctiveGraph): ConjunctiveGraph over an explicit subset of the entire store. """ - def __init__(self, graphs, store="default"): + def __init__(self, graphs: List[Graph], store: Union[str, Store] = "default"): if store is not None: super(ReadOnlyGraphAggregate, self).__init__(store) Graph.__init__(self, store) @@ -2365,38 +2709,68 @@ def __init__(self, graphs, store="default"): ), "graphs argument must be a list of Graphs!!" self.graphs = graphs - def __repr__(self): + def __repr__(self) -> str: return "" % len(self.graphs) - def destroy(self, configuration): + def destroy(self, configuration: str) -> NoReturn: raise ModificationException() # Transactional interfaces (optional) - def commit(self): + def commit(self) -> NoReturn: raise ModificationException() - def rollback(self): + def rollback(self) -> NoReturn: raise ModificationException() - def open(self, configuration, create=False): + def open(self, configuration: str, create: bool = False) -> None: # TODO: is there a use case for this method? for graph in self.graphs: - graph.open(self, configuration, create) + # type error: Too many arguments for "open" of "Graph" + # type error: Argument 1 to "open" of "Graph" has incompatible type "ReadOnlyGraphAggregate"; expected "str" [arg-type] + # type error: Argument 2 to "open" of "Graph" has incompatible type "str"; expected "bool" [arg-type] + graph.open(self, configuration, create) # type: ignore[call-arg, arg-type] - def close(self): + # type error: Signature of "close" incompatible with supertype "Graph" + def close(self) -> None: # type: ignore[override] for graph in self.graphs: graph.close() - def add(self, triple): + def add(self, triple: _TripleOrOptionalQuadType) -> NoReturn: raise ModificationException() - def addN(self, quads): # noqa: N802 + def addN(self, quads: Iterable["_QuadType"]) -> NoReturn: # noqa: N802 raise ModificationException() - def remove(self, triple): + # type error: Argument 1 of "remove" is incompatible with supertype "Graph"; supertype defines the argument type as "Tuple[Optional[Node], Optional[Node], Optional[Node]]" + def remove(self, triple: _TripleOrOptionalQuadType) -> NoReturn: # type: ignore[override] raise ModificationException() - def triples(self, triple): + # type error: Signature of "triples" incompatible with supertype "ConjunctiveGraph" + @overload # type: ignore[override] + def triples( + self, + triple: "_TriplePatternType", + ) -> Generator["_TripleType", None, None]: + ... + + @overload + def triples( + self, + triple: "_TriplePathPatternType", + ) -> Generator["_TriplePathType", None, None]: + ... + + @overload + def triples( + self, + triple: "_TripleSelectorType", + ) -> Generator["_TripleOrTriplePathType", None, None]: + ... + + def triples( + self, + triple: "_TripleSelectorType", + ) -> Generator["_TripleOrTriplePathType", None, None]: s, p, o = triple for graph in self.graphs: if isinstance(p, Path): @@ -2406,23 +2780,35 @@ def triples(self, triple): for s1, p1, o1 in graph.triples((s, p, o)): yield s1, p1, o1 - def __contains__(self, triple_or_quad): + def __contains__(self, triple_or_quad: _TripleOrQuadSelectorType) -> bool: context = None if len(triple_or_quad) == 4: - context = triple_or_quad[3] + # type error: Tuple index out of range + context = triple_or_quad[3] # type: ignore [misc] for graph in self.graphs: if context is None or graph.identifier == context.identifier: if triple_or_quad[:3] in graph: return True return False - def quads(self, triple_or_quad): + # type error: Signature of "quads" incompatible with supertype "ConjunctiveGraph" + def quads( # type: ignore[override] + self, triple_or_quad: _TripleOrQuadSelectorType + ) -> Generator[ + Tuple[ + "_SubjectType", Union[Path, "_PredicateType"], "_ObjectType", "_ContextType" + ], + None, + None, + ]: """Iterate over all the quads in the entire aggregate graph""" c = None if len(triple_or_quad) == 4: - s, p, o, c = triple_or_quad + # type error: Need more than 3 values to unpack (4 expected) + s, p, o, c = triple_or_quad # type: ignore[misc] else: - s, p, o = triple_or_quad + # type error: Too many values to unpack (3 expected, 4 provided) + s, p, o = triple_or_quad # type: ignore[misc] if c is not None: for graph in [g for g in self.graphs if g == c]: @@ -2433,13 +2819,13 @@ def quads(self, triple_or_quad): for s1, p1, o1 in graph.triples((s, p, o)): yield s1, p1, o1, graph - def __len__(self): + def __len__(self) -> int: return sum(len(g) for g in self.graphs) - def __hash__(self): + def __hash__(self) -> NoReturn: raise UnSupportedAggregateOperation() - def __cmp__(self, other): + def __cmp__(self, other) -> int: if other is None: return -1 elif isinstance(other, Graph): @@ -2449,63 +2835,95 @@ def __cmp__(self, other): else: return -1 - def __iadd__(self: "_GraphT", other: Iterable["_TripleType"]) -> "_GraphT": + def __iadd__(self: "_GraphT", other: Iterable["_TripleType"]) -> NoReturn: raise ModificationException() - def __isub__(self: "_GraphT", other: Iterable["_TripleType"]) -> "_GraphT": + def __isub__(self: "_GraphT", other: Iterable["_TripleType"]) -> NoReturn: raise ModificationException() # Conv. methods - def triples_choices(self, triple, context=None): + def triples_choices( + self, + triple: Union[ + Tuple[List["_SubjectType"], "_PredicateType", "_ObjectType"], + Tuple["_SubjectType", List["_PredicateType"], "_ObjectType"], + Tuple["_SubjectType", "_PredicateType", List["_ObjectType"]], + ], + context: Optional["_ContextType"] = None, + ) -> Generator[_TripleType, None, None]: subject, predicate, object_ = triple for graph in self.graphs: - choices = graph.triples_choices((subject, predicate, object_)) - for (s, p, o) in choices: + # type error: Argument 1 to "triples_choices" of "Graph" has incompatible type "Tuple[Union[List[Node], Node], Union[Node, List[Node]], Union[Node, List[Node]]]"; expected "Union[Tuple[List[Node], Node, Node], Tuple[Node, List[Node], Node], Tuple[Node, Node, List[Node]]]" + # type error note: unpacking discards type info + choices = graph.triples_choices((subject, predicate, object_)) # type: ignore[arg-type] + for s, p, o in choices: yield s, p, o - def qname(self, uri): + def qname(self, uri: str) -> str: if hasattr(self, "namespace_manager") and self.namespace_manager: return self.namespace_manager.qname(uri) raise UnSupportedAggregateOperation() - def compute_qname(self, uri, generate=True): + def compute_qname(self, uri: str, generate: bool = True) -> Tuple[str, URIRef, str]: if hasattr(self, "namespace_manager") and self.namespace_manager: return self.namespace_manager.compute_qname(uri, generate) raise UnSupportedAggregateOperation() - def bind(self, prefix, namespace, override=True): + # type error: Signature of "bind" incompatible with supertype "Graph" + def bind( # type: ignore[override] + self, prefix: Optional[str], namespace: Any, override: bool = True # noqa: F811 + ) -> NoReturn: raise UnSupportedAggregateOperation() - def namespaces(self): + def namespaces(self) -> Generator[Tuple[str, URIRef], None, None]: if hasattr(self, "namespace_manager"): - for prefix, namespace in self.namespace_manager.namespaces(): + for prefix, namespace in self.namespace_manager.namespaces(): # noqa: F402 yield prefix, namespace else: for graph in self.graphs: for prefix, namespace in graph.namespaces(): yield prefix, namespace - def absolutize(self, uri, defrag=1): + def absolutize(self, uri: str, defrag: int = 1) -> NoReturn: raise UnSupportedAggregateOperation() - def parse(self, source, publicID=None, format=None, **args): # noqa: N803 + # type error: Signature of "parse" incompatible with supertype "ConjunctiveGraph" + def parse( # type: ignore[override] + self, + source: Optional[ + Union[IO[bytes], TextIO, InputSource, str, bytes, pathlib.PurePath] + ], + publicID: Optional[str] = None, + format: Optional[str] = None, + **args: Any, + ) -> NoReturn: # noqa: N803 raise ModificationException() - def n3(self): + def n3(self) -> NoReturn: raise UnSupportedAggregateOperation() - def __reduce__(self): + def __reduce__(self) -> NoReturn: raise UnSupportedAggregateOperation() -def _assertnode(*terms): +@overload +def _assertnode(*terms: Node) -> "te.Literal[True]": + ... + + +@overload +def _assertnode(*terms: Any) -> bool: + ... + + +def _assertnode(*terms: Any) -> bool: for t in terms: assert isinstance(t, Node), "Term %s must be an rdflib term" % (t,) return True -class BatchAddGraph(object): +class BatchAddGraph: """ Wrapper around graph that turns batches of calls to Graph's add (and optionally, addN) into calls to batched calls to addN`. @@ -2533,11 +2951,11 @@ def __init__(self, graph: Graph, batch_size: int = 1000, batch_addn: bool = Fals self.__batch_addn = batch_addn self.reset() - def reset(self): + def reset(self) -> BatchAddGraph: """ Manually clear the buffered triples and reset the count to zero """ - self.batch = [] + self.batch: List[_QuadType] = [] self.count = 0 return self @@ -2558,12 +2976,14 @@ def add( self.batch = [] self.count += 1 if len(triple_or_quad) == 3: - self.batch.append(triple_or_quad + self.__graph_tuple) + # type error: Argument 1 to "append" of "list" has incompatible type "Tuple[Node, ...]"; expected "Tuple[Node, Node, Node, Graph]" + self.batch.append(triple_or_quad + self.__graph_tuple) # type: ignore[arg-type] else: - self.batch.append(triple_or_quad) + # type error: Argument 1 to "append" of "list" has incompatible type "Union[Tuple[Node, Node, Node], Tuple[Node, Node, Node, Graph]]"; expected "Tuple[Node, Node, Node, Graph]" + self.batch.append(triple_or_quad) # type: ignore[arg-type] return self - def addN(self, quads: Iterable["_QuadType"]): # noqa: N802 + def addN(self, quads: Iterable["_QuadType"]) -> BatchAddGraph: # noqa: N802 if self.__batch_addn: for q in quads: self.add(q) @@ -2571,10 +2991,10 @@ def addN(self, quads: Iterable["_QuadType"]): # noqa: N802 self.graph.addN(quads) return self - def __enter__(self): + def __enter__(self) -> BatchAddGraph: self.reset() return self - def __exit__(self, *exc): + def __exit__(self, *exc) -> None: if exc[0] is None: self.graph.addN(self.batch) diff --git a/dependencies/rdflib/namespace/_GEO.py b/dependencies/rdflib/namespace/_GEO.py index 7f316fcbc..c890973ca 100644 --- a/dependencies/rdflib/namespace/_GEO.py +++ b/dependencies/rdflib/namespace/_GEO.py @@ -9,18 +9,20 @@ class GEO(DefinedNamespace): Generated from: http://schemas.opengis.net/geosparql/1.0/geosparql_vocab_all.rdf Date: 2021-12-27 17:38:15.101187 - dc:creator "Open Geospatial Consortium"^^xsd:string - dc:date "2012-04-30"^^xsd:date - dc:source - "OGC GeoSPARQL – A Geographic Query Language for RDF Data OGC 11-052r5"^^xsd:string - rdfs:seeAlso - - - owl:imports dc: - - - - owl:versionInfo "OGC GeoSPARQL 1.0"^^xsd:string + .. code-block:: Turtle + + dc:creator "Open Geospatial Consortium"^^xsd:string + dc:date "2012-04-30"^^xsd:date + dc:source + "OGC GeoSPARQL – A Geographic Query Language for RDF Data OGC 11-052r5"^^xsd:string + rdfs:seeAlso + + + owl:imports dc: + + + + owl:versionInfo "OGC GeoSPARQL 1.0"^^xsd:string """ # http://www.w3.org/2000/01/rdf-schema#Datatype diff --git a/dependencies/rdflib/namespace/__init__.py b/dependencies/rdflib/namespace/__init__.py index 087d4a7e1..3e591fcf7 100644 --- a/dependencies/rdflib/namespace/__init__.py +++ b/dependencies/rdflib/namespace/__init__.py @@ -1,6 +1,4 @@ -import json import logging -import sys import warnings from functools import lru_cache from pathlib import Path @@ -93,6 +91,34 @@ "ClosedNamespace", "DefinedNamespace", "NamespaceManager", + "BRICK", + "CSVW", + "DC", + "DCAM", + "DCAT", + "DCMITYPE", + "DCTERMS", + "DOAP", + "FOAF", + "GEO", + "ODRL2", + "ORG", + "OWL", + "PROF", + "PROV", + "QB", + "RDF", + "RDFS", + "SDO", + "SH", + "SKOS", + "SOSA", + "SSN", + "TIME", + "VANN", + "VOID", + "WGS", + "XSD", ] logger = logging.getLogger(__name__) @@ -122,8 +148,9 @@ def __new__(cls, value: Union[str, bytes]) -> "Namespace": rt = str.__new__(cls, value, "utf-8") # type: ignore[arg-type] return rt + # type error: Signature of "title" incompatible with supertype "str" @property - def title(self) -> URIRef: + def title(self) -> URIRef: # type: ignore[override] # Override for DCTERMS.title to return a URIRef instead of str.title method return URIRef(self + "title") @@ -349,7 +376,7 @@ def _ipython_key_completions_(self) -> List[str]: _with_bind_override_fix = True -class NamespaceManager(object): +class NamespaceManager: """Class for managing prefix => namespace mappings This class requires an RDFlib Graph as an input parameter and may optionally have @@ -359,13 +386,13 @@ class NamespaceManager(object): * core: * binds several core RDF prefixes only * owl, rdf, rdfs, xsd, xml from the NAMESPACE_PREFIXES_CORE object - * this is default * rdflib: * binds all the namespaces shipped with RDFLib as DefinedNamespace instances * all the core namespaces and all the following: brick, csvw, dc, dcat - * dcmitype, cdterms, dcam, doap, foaf, geo, odrl, org, prof, prov, qb, sdo + * dcmitype, dcterms, dcam, doap, foaf, geo, odrl, org, prof, prov, qb, schema * sh, skos, sosa, ssn, time, vann, void * see the NAMESPACE_PREFIXES_RDFLIB object for the up-to-date list + * this is default * none: * binds no namespaces to prefixes * note this is NOT default behaviour @@ -373,6 +400,14 @@ class NamespaceManager(object): * using prefix bindings from prefix.cc which is a online prefixes database * not implemented yet - this is aspirational + .. attention:: + + The namespaces bound for specific values of ``bind_namespaces`` + constitute part of RDFLib's public interface, so changes to them should + only be additive within the same minor version. Removing values, or + removing namespaces that are bound by default, constitutes a breaking + change. + See the Sample usage @@ -389,10 +424,11 @@ class NamespaceManager(object): >>> all_ns = [n for n in g.namespace_manager.namespaces()] >>> assert ('ex', rdflib.term.URIRef('http://example.com/')) in all_ns >>> - """ - def __init__(self, graph: "Graph", bind_namespaces: "_NamespaceSetString" = "core"): + def __init__( + self, graph: "Graph", bind_namespaces: "_NamespaceSetString" = "rdflib" + ): self.graph = graph self.__cache: Dict[str, Tuple[str, URIRef, str]] = {} self.__cache_strict: Dict[str, Tuple[str, URIRef, str]] = {} @@ -454,6 +490,35 @@ def qname(self, uri: str) -> str: else: return ":".join((prefix, name)) + def curie(self, uri: str, generate: bool = True) -> str: + """ + From a URI, generate a valid CURIE. + + Result is guaranteed to contain a colon separating the prefix from the + name, even if the prefix is an empty string. + + .. warning:: + + When ``generate`` is `True` (which is the default) and there is no + matching namespace for the URI in the namespace manager then a new + namespace will be added with prefix ``ns{index}``. + + Thus, when ``generate`` is `True`, this function is not a pure + function because of this side-effect. + + This default behaviour is chosen so that this function operates + similarly to `NamespaceManager.qname`. + + :param uri: URI to generate CURIE for. + :param generate: Whether to add a prefix for the namespace if one doesn't + already exist. Default: `True`. + :return: CURIE for the URI. + :raises KeyError: If generate is `False` and the namespace doesn't already have + a prefix. + """ + prefix, namespace, name = self.compute_qname(uri, generate=generate) + return ":".join((prefix, name)) + def qname_strict(self, uri: str) -> str: prefix, namespace, name = self.compute_qname_strict(uri) if prefix == "": @@ -472,7 +537,7 @@ def normalizeUri(self, rdfTerm: str) -> str: if namespace not in self.__strie: insert_strie(self.__strie, self.__trie, str(namespace)) namespace = URIRef(str(namespace)) - except: + except Exception: if isinstance(rdfTerm, Variable): return "?%s" % rdfTerm else: @@ -487,10 +552,8 @@ def normalizeUri(self, rdfTerm: str) -> str: return ":".join([qNameParts[0], qNameParts[-1]]) def compute_qname(self, uri: str, generate: bool = True) -> Tuple[str, URIRef, str]: - prefix: Optional[str] if uri not in self.__cache: - if not _is_valid_uri(uri): raise ValueError( '"{}" does not look like a valid URI, cannot serialize this. Did you want to urlencode it?'.format( @@ -503,6 +566,7 @@ def compute_qname(self, uri: str, generate: bool = True) -> Tuple[str, URIRef, s except ValueError as e: namespace = URIRef(uri) prefix = self.store.prefix(namespace) + name = "" # empty prefix case, safe since not prefix is error if not prefix: raise e if namespace not in self.__strie: @@ -592,7 +656,7 @@ def compute_qname_strict( return self.__cache_strict[uri] - def expand_curie(self, curie: str) -> Union[URIRef, None]: + def expand_curie(self, curie: str) -> URIRef: """ Expand a CURIE of the form , e.g. "rdf:type" into its full expression: @@ -608,7 +672,7 @@ def expand_curie(self, curie: str) -> Union[URIRef, None]: if not type(curie) is str: raise TypeError(f"Argument must be a string, not {type(curie).__name__}.") parts = curie.split(":", 1) - if len(parts) != 2 or len(parts[0]) < 1: + if len(parts) != 2: raise ValueError( "Malformed curie argument, format should be e.g. “foaf:name”." ) @@ -668,7 +732,6 @@ def bind( if bound_namespace: bound_namespace = URIRef(bound_namespace) if bound_namespace and bound_namespace != namespace: - if replace: self._store_bind(prefix, namespace, override=override) insert_trie(self.__trie, str(namespace)) @@ -897,7 +960,7 @@ def get_longest_namespace(trie: Dict[str, Any], value: str) -> Optional[str]: "dc": DC, "dcat": DCAT, "dcmitype": DCMITYPE, - "cdterms": DCTERMS, + "dcterms": DCTERMS, "dcam": DCAM, "doap": DOAP, "foaf": FOAF, @@ -907,7 +970,7 @@ def get_longest_namespace(trie: Dict[str, Any], value: str) -> Optional[str]: "prof": PROF, "prov": PROV, "qb": QB, - "sdo": SDO, + "schema": SDO, "sh": SH, "skos": SKOS, "sosa": SOSA, @@ -915,4 +978,5 @@ def get_longest_namespace(trie: Dict[str, Any], value: str) -> Optional[str]: "time": TIME, "vann": VANN, "void": VOID, + "wgs": WGS, } diff --git a/dependencies/rdflib/parser.py b/dependencies/rdflib/parser.py index 7837fdeb6..a35c1d825 100644 --- a/dependencies/rdflib/parser.py +++ b/dependencies/rdflib/parser.py @@ -9,6 +9,7 @@ want to do so through the Graph class parse method. """ +from __future__ import annotations import codecs import os @@ -26,20 +27,21 @@ Tuple, Union, ) -from urllib.error import HTTPError from urllib.parse import urljoin -from urllib.request import Request, url2pathname, urlopen +from urllib.request import Request, url2pathname from xml.sax import xmlreader import rdflib.util from rdflib import __version__ +from rdflib._networking import _urlopen from rdflib.namespace import Namespace from rdflib.term import URIRef if TYPE_CHECKING: - from http.client import HTTPMessage, HTTPResponse + from email.message import Message + from urllib.response import addinfourl - from rdflib import Graph + from rdflib.graph import Graph __all__ = [ "Parser", @@ -51,13 +53,13 @@ ] -class Parser(object): +class Parser: __slots__ = () def __init__(self): pass - def parse(self, source: "InputSource", sink: "Graph"): + def parse(self, source: "InputSource", sink: "Graph") -> None: pass @@ -92,7 +94,7 @@ def write(self, *args, **kwargs): raise NotImplementedError() -class InputSource(xmlreader.InputSource, object): +class InputSource(xmlreader.InputSource): """ TODO: """ @@ -102,7 +104,7 @@ def __init__(self, system_id: Optional[str] = None): self.content_type: Optional[str] = None self.auto_close = False # see Graph.parse(), true if opened by us - def close(self): + def close(self) -> None: c = self.getCharacterStream() if c and hasattr(c, "close"): try: @@ -133,26 +135,26 @@ class PythonInputSource(InputSource): True """ - def __init__(self, data, system_id=None): + def __init__(self, data: Any, system_id: Optional[str] = None): self.content_type = None self.auto_close = False # see Graph.parse(), true if opened by us - self.public_id = None - self.system_id = system_id + self.public_id: Optional[str] = None + self.system_id: Optional[str] = system_id self.data = data - def getPublicId(self): # noqa: N802 + def getPublicId(self) -> Optional[str]: # noqa: N802 return self.public_id - def setPublicId(self, public_id): # noqa: N802 + def setPublicId(self, public_id: Optional[str]) -> None: # noqa: N802 self.public_id = public_id - def getSystemId(self): # noqa: N802 + def getSystemId(self) -> Optional[str]: # noqa: N802 return self.system_id - def setSystemId(self, system_id): # noqa: N802 + def setSystemId(self, system_id: Optional[str]) -> None: # noqa: N802 self.system_id = system_id - def close(self): + def close(self) -> None: self.data = None @@ -197,16 +199,16 @@ class URLInputSource(InputSource): links: List[str] @classmethod - def getallmatchingheaders(cls, message: "HTTPMessage", name): + def getallmatchingheaders(cls, message: "Message", name) -> List[str]: # This is reimplemented here, because the method # getallmatchingheaders from HTTPMessage is broken since Python 3.0 name = name.lower() return [val for key, val in message.items() if key.lower() == name] @classmethod - def get_links(cls, response: "HTTPResponse"): + def get_links(cls, response: addinfourl) -> List[str]: linkslines = cls.getallmatchingheaders(response.headers, "Link") - retarray = [] + retarray: List[str] = [] for linksline in linkslines: links = [linkstr.strip() for linkstr in linksline.split(",")] for link in links: @@ -265,21 +267,7 @@ def __init__(self, system_id: Optional[str] = None, format: Optional[str] = None req = Request(system_id, None, myheaders) # type: ignore[arg-type] - def _urlopen(req: Request) -> Any: - try: - return urlopen(req) - except HTTPError as ex: - # 308 (Permanent Redirect) is not supported by current python version(s) - # See https://bugs.python.org/issue40321 - # This custom error handling should be removed once all - # supported versions of python support 308. - if ex.code == 308: - req.full_url = ex.headers.get("Location") - return _urlopen(req) - else: - raise - - response: HTTPResponse = _urlopen(req) + response: addinfourl = _urlopen(req) self.url = response.geturl() # in case redirections took place self.links = self.get_links(response) if format in ("json-ld", "application/ld+json"): @@ -300,8 +288,9 @@ def _urlopen(req: Request) -> Any: # TODO: self.setEncoding(encoding) self.response_info = response.info() # a mimetools.Message instance - def __repr__(self): - return self.url + def __repr__(self) -> str: + # type error: Incompatible return value type (got "Optional[str]", expected "str") + return self.url # type: ignore[return-value] class FileInputSource(InputSource): @@ -325,7 +314,7 @@ def __init__( # We cannot set characterStream here because # we do not know the Raw Bytes File encoding. - def __repr__(self): + def __repr__(self) -> str: return repr(self.file) @@ -336,8 +325,8 @@ def create_input_source( publicID: Optional[str] = None, # noqa: N803 location: Optional[str] = None, file: Optional[Union[BinaryIO, TextIO]] = None, - data: Union[str, bytes, dict] = None, - format: str = None, + data: Optional[Union[str, bytes, dict]] = None, + format: Optional[str] = None, ) -> InputSource: """ Return an appropriate InputSource instance for the given @@ -360,6 +349,10 @@ def create_input_source( input_source = None if source is not None: + if TYPE_CHECKING: + assert file is None + assert data is None + assert location is None if isinstance(source, InputSource): input_source = source else: @@ -369,14 +362,14 @@ def create_input_source( location = str(source) elif isinstance(source, bytes): data = source - elif hasattr(source, "read") and not isinstance(source, Namespace): # type: ignore[unreachable] + elif hasattr(source, "read") and not isinstance(source, Namespace): f = source input_source = InputSource() if hasattr(source, "encoding"): input_source.setCharacterStream(source) - input_source.setEncoding(source.encoding) # type: ignore[union-attr] + input_source.setEncoding(source.encoding) try: - b = file.buffer # type: ignore[union-attr] + b = source.buffer # type: ignore[union-attr] input_source.setByteStream(b) except (AttributeError, LookupError): input_source.setByteStream(source) @@ -396,6 +389,10 @@ def create_input_source( auto_close = False # make sure we close all file handles we open if location is not None: + if TYPE_CHECKING: + assert file is None + assert data is None + assert source is None ( absolute_location, auto_close, @@ -409,9 +406,17 @@ def create_input_source( ) if file is not None: + if TYPE_CHECKING: + assert location is None + assert data is None + assert source is None input_source = FileInputSource(file) if data is not None: + if TYPE_CHECKING: + assert location is None + assert file is None + assert source is None if isinstance(data, dict): input_source = PythonInputSource(data) auto_close = True diff --git a/dependencies/rdflib/paths.py b/dependencies/rdflib/paths.py index 97adf5b98..9f9538784 100644 --- a/dependencies/rdflib/paths.py +++ b/dependencies/rdflib/paths.py @@ -1,3 +1,5 @@ +from __future__ import annotations + __doc__ = r""" This module implements the SPARQL 1.1 Property path operators, as @@ -103,74 +105,74 @@ A more complete set of tests: ->>> list(evalPath(g, (None, e.p1/e.p2, None)))==[(e.a, e.e)] +>>> list(eval_path(g, (None, e.p1/e.p2, None)))==[(e.a, e.e)] True ->>> list(evalPath(g, (e.a, e.p1|e.p2, None)))==[(e.a,e.c), (e.a,e.f)] +>>> list(eval_path(g, (e.a, e.p1|e.p2, None)))==[(e.a,e.c), (e.a,e.f)] True ->>> list(evalPath(g, (e.c, ~e.p1, None))) == [ (e.c, e.a) ] +>>> list(eval_path(g, (e.c, ~e.p1, None))) == [ (e.c, e.a) ] True ->>> list(evalPath(g, (e.a, e.p1*ZeroOrOne, None))) == [(e.a, e.a), (e.a, e.c)] +>>> list(eval_path(g, (e.a, e.p1*ZeroOrOne, None))) == [(e.a, e.a), (e.a, e.c)] True ->>> list(evalPath(g, (e.c, e.p3*OneOrMore, None))) == [ +>>> list(eval_path(g, (e.c, e.p3*OneOrMore, None))) == [ ... (e.c, e.g), (e.c, e.h), (e.c, e.a)] True ->>> list(evalPath(g, (e.c, e.p3*ZeroOrMore, None))) == [(e.c, e.c), +>>> list(eval_path(g, (e.c, e.p3*ZeroOrMore, None))) == [(e.c, e.c), ... (e.c, e.g), (e.c, e.h), (e.c, e.a)] True ->>> list(evalPath(g, (e.a, -e.p1, None))) == [(e.a, e.f)] +>>> list(eval_path(g, (e.a, -e.p1, None))) == [(e.a, e.f)] True ->>> list(evalPath(g, (e.a, -(e.p1|e.p2), None))) == [] +>>> list(eval_path(g, (e.a, -(e.p1|e.p2), None))) == [] True ->>> list(evalPath(g, (e.g, -~e.p2, None))) == [(e.g, e.j)] +>>> list(eval_path(g, (e.g, -~e.p2, None))) == [(e.g, e.j)] True ->>> list(evalPath(g, (e.e, ~(e.p1/e.p2), None))) == [(e.e, e.a)] +>>> list(eval_path(g, (e.e, ~(e.p1/e.p2), None))) == [(e.e, e.a)] True ->>> list(evalPath(g, (e.a, e.p1/e.p3/e.p3, None))) == [(e.a, e.h)] +>>> list(eval_path(g, (e.a, e.p1/e.p3/e.p3, None))) == [(e.a, e.h)] True ->>> list(evalPath(g, (e.q, e.px*OneOrMore, None))) +>>> list(eval_path(g, (e.q, e.px*OneOrMore, None))) [(rdflib.term.URIRef('ex:q'), rdflib.term.URIRef('ex:q'))] ->>> list(evalPath(g, (None, e.p1|e.p2, e.c))) +>>> list(eval_path(g, (None, e.p1|e.p2, e.c))) [(rdflib.term.URIRef('ex:a'), rdflib.term.URIRef('ex:c'))] ->>> list(evalPath(g, (None, ~e.p1, e.a))) == [ (e.c, e.a) ] +>>> list(eval_path(g, (None, ~e.p1, e.a))) == [ (e.c, e.a) ] True ->>> list(evalPath(g, (None, e.p1*ZeroOrOne, e.c))) # doctest: +NORMALIZE_WHITESPACE +>>> list(eval_path(g, (None, e.p1*ZeroOrOne, e.c))) # doctest: +NORMALIZE_WHITESPACE [(rdflib.term.URIRef('ex:c'), rdflib.term.URIRef('ex:c')), (rdflib.term.URIRef('ex:a'), rdflib.term.URIRef('ex:c'))] ->>> list(evalPath(g, (None, e.p3*OneOrMore, e.a))) # doctest: +NORMALIZE_WHITESPACE +>>> list(eval_path(g, (None, e.p3*OneOrMore, e.a))) # doctest: +NORMALIZE_WHITESPACE [(rdflib.term.URIRef('ex:h'), rdflib.term.URIRef('ex:a')), (rdflib.term.URIRef('ex:g'), rdflib.term.URIRef('ex:a')), (rdflib.term.URIRef('ex:c'), rdflib.term.URIRef('ex:a'))] ->>> list(evalPath(g, (None, e.p3*ZeroOrMore, e.a))) # doctest: +NORMALIZE_WHITESPACE +>>> list(eval_path(g, (None, e.p3*ZeroOrMore, e.a))) # doctest: +NORMALIZE_WHITESPACE [(rdflib.term.URIRef('ex:a'), rdflib.term.URIRef('ex:a')), (rdflib.term.URIRef('ex:h'), rdflib.term.URIRef('ex:a')), (rdflib.term.URIRef('ex:g'), rdflib.term.URIRef('ex:a')), (rdflib.term.URIRef('ex:c'), rdflib.term.URIRef('ex:a'))] ->>> list(evalPath(g, (None, -e.p1, e.f))) == [(e.a, e.f)] +>>> list(eval_path(g, (None, -e.p1, e.f))) == [(e.a, e.f)] True ->>> list(evalPath(g, (None, -(e.p1|e.p2), e.c))) == [] +>>> list(eval_path(g, (None, -(e.p1|e.p2), e.c))) == [] True ->>> list(evalPath(g, (None, -~e.p2, e.j))) == [(e.g, e.j)] +>>> list(eval_path(g, (None, -~e.p2, e.j))) == [(e.g, e.j)] True ->>> list(evalPath(g, (None, ~(e.p1/e.p2), e.a))) == [(e.e, e.a)] +>>> list(eval_path(g, (None, ~(e.p1/e.p2), e.a))) == [(e.e, e.a)] True ->>> list(evalPath(g, (None, e.p1/e.p3/e.p3, e.h))) == [(e.a, e.h)] +>>> list(eval_path(g, (None, e.p1/e.p3/e.p3, e.h))) == [(e.a, e.h)] True ->>> list(evalPath(g, (e.q, e.px*OneOrMore, None))) +>>> list(eval_path(g, (e.q, e.px*OneOrMore, None))) [(rdflib.term.URIRef('ex:q'), rdflib.term.URIRef('ex:q'))] ->>> list(evalPath(g, (e.c, (e.p2|e.p3)*ZeroOrMore, e.j))) +>>> list(eval_path(g, (e.c, (e.p2|e.p3)*ZeroOrMore, e.j))) [(rdflib.term.URIRef('ex:c'), rdflib.term.URIRef('ex:j'))] No vars specified: ->>> sorted(list(evalPath(g, (None, e.p3*OneOrMore, None)))) #doctest: +NORMALIZE_WHITESPACE +>>> sorted(list(eval_path(g, (None, e.p3*OneOrMore, None)))) #doctest: +NORMALIZE_WHITESPACE [(rdflib.term.URIRef('ex:c'), rdflib.term.URIRef('ex:a')), (rdflib.term.URIRef('ex:c'), rdflib.term.URIRef('ex:g')), (rdflib.term.URIRef('ex:c'), rdflib.term.URIRef('ex:h')), @@ -181,13 +183,27 @@ """ +import warnings from functools import total_ordering -from typing import TYPE_CHECKING, Callable, Iterator, Optional, Tuple, Union +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Generator, + Iterator, + List, + Optional, + Set, + Tuple, + Union, +) from rdflib.term import Node, URIRef if TYPE_CHECKING: - from rdflib.graph import Graph, _ObjectType, _SubjectType + from rdflib._type_checking import _MulPathMod + from rdflib.graph import Graph, _ObjectType, _PredicateType, _SubjectType + from rdflib.namespace import NamespaceManager # property paths @@ -197,9 +213,17 @@ ZeroOrOne = "?" -@total_ordering -class Path(object): +def _n3( + arg: Union["URIRef", "Path"], namespace_manager: Optional["NamespaceManager"] = None +) -> str: + # type error: Item "Path" of "Union[Path, URIRef]" has no attribute "n3" [union-attr] + if isinstance(arg, (SequencePath, AlternativePath)) and len(arg.args) > 1: + return "(%s)" % arg.n3(namespace_manager) + return arg.n3(namespace_manager) # type: ignore[union-attr] + +@total_ordering +class Path: __or__: Callable[["Path", Union["URIRef", "Path"]], "AlternativePath"] __invert__: Callable[["Path"], "InvPath"] __neg__: Callable[["Path"], "NegatedPath"] @@ -214,7 +238,13 @@ def eval( ) -> Iterator[Tuple["_SubjectType", "_ObjectType"]]: raise NotImplementedError() - def __lt__(self, other): + def __hash__(self): + return hash(repr(self)) + + def __eq__(self, other): + return repr(self) == repr(other) + + def __lt__(self, other: Any) -> bool: if not isinstance(other, (Path, Node)): raise TypeError( "unorderable types: %s() < %s()" % (repr(self), repr(other)) @@ -223,48 +253,66 @@ def __lt__(self, other): class InvPath(Path): - def __init__(self, arg): + def __init__(self, arg: Union[Path, URIRef]): self.arg = arg - def eval(self, graph, subj=None, obj=None): - for s, o in evalPath(graph, (obj, self.arg, subj)): + def eval( + self, + graph: "Graph", + subj: Optional["_SubjectType"] = None, + obj: Optional["_ObjectType"] = None, + ) -> Generator[Tuple[_ObjectType, _SubjectType], None, None]: + for s, o in eval_path(graph, (obj, self.arg, subj)): yield o, s - def __repr__(self): + def __repr__(self) -> str: return "Path(~%s)" % (self.arg,) - def n3(self): - return "^%s" % self.arg.n3() + def n3(self, namespace_manager: Optional["NamespaceManager"] = None) -> str: + return "^%s" % _n3(self.arg, namespace_manager) class SequencePath(Path): - def __init__(self, *args): - self.args = [] + def __init__(self, *args: Union[Path, URIRef]): + self.args: List[Union[Path, URIRef]] = [] for a in args: if isinstance(a, SequencePath): self.args += a.args else: self.args.append(a) - def eval(self, graph, subj=None, obj=None): - def _eval_seq(paths, subj, obj): + def eval( + self, + graph: "Graph", + subj: Optional["_SubjectType"] = None, + obj: Optional["_ObjectType"] = None, + ) -> Generator[Tuple[_SubjectType, _ObjectType], None, None]: + def _eval_seq( + paths: List[Union[Path, URIRef]], + subj: Optional[_SubjectType], + obj: Optional[_ObjectType], + ) -> Generator[Tuple[_SubjectType, _ObjectType], None, None]: if paths[1:]: - for s, o in evalPath(graph, (subj, paths[0], None)): + for s, o in eval_path(graph, (subj, paths[0], None)): for r in _eval_seq(paths[1:], o, obj): yield s, r[1] else: - for s, o in evalPath(graph, (subj, paths[0], obj)): + for s, o in eval_path(graph, (subj, paths[0], obj)): yield s, o - def _eval_seq_bw(paths, subj, obj): + def _eval_seq_bw( + paths: List[Union[Path, URIRef]], + subj: Optional[_SubjectType], + obj: _ObjectType, + ) -> Generator[Tuple[_SubjectType, _ObjectType], None, None]: if paths[:-1]: - for s, o in evalPath(graph, (None, paths[-1], obj)): + for s, o in eval_path(graph, (None, paths[-1], obj)): for r in _eval_seq(paths[:-1], subj, s): yield r[0], o else: - for s, o in evalPath(graph, (subj, paths[0], obj)): + for s, o in eval_path(graph, (subj, paths[0], obj)): yield s, o if subj: @@ -274,36 +322,41 @@ def _eval_seq_bw(paths, subj, obj): else: # no vars bound, we can start anywhere return _eval_seq(self.args, subj, obj) - def __repr__(self): + def __repr__(self) -> str: return "Path(%s)" % " / ".join(str(x) for x in self.args) - def n3(self): - return "/".join(a.n3() for a in self.args) + def n3(self, namespace_manager: Optional["NamespaceManager"] = None) -> str: + return "/".join(_n3(a, namespace_manager) for a in self.args) class AlternativePath(Path): - def __init__(self, *args): - self.args = [] + def __init__(self, *args: Union[Path, URIRef]): + self.args: List[Union[Path, URIRef]] = [] for a in args: if isinstance(a, AlternativePath): self.args += a.args else: self.args.append(a) - def eval(self, graph, subj=None, obj=None): + def eval( + self, + graph: "Graph", + subj: Optional["_SubjectType"] = None, + obj: Optional["_ObjectType"] = None, + ) -> Generator[Tuple[_SubjectType, _ObjectType], None, None]: for x in self.args: - for y in evalPath(graph, (subj, x, obj)): + for y in eval_path(graph, (subj, x, obj)): yield y - def __repr__(self): + def __repr__(self) -> str: return "Path(%s)" % " | ".join(str(x) for x in self.args) - def n3(self): - return "|".join(a.n3() for a in self.args) + def n3(self, namespace_manager: Optional["NamespaceManager"] = None) -> str: + return "|".join(_n3(a, namespace_manager) for a in self.args) class MulPath(Path): - def __init__(self, path, mod): + def __init__(self, path: Union[Path, URIRef], mod: _MulPathMod): self.path = path self.mod = mod @@ -319,7 +372,13 @@ def __init__(self, path, mod): else: raise Exception("Unknown modifier %s" % mod) - def eval(self, graph, subj=None, obj=None, first=True): + def eval( + self, + graph: "Graph", + subj: Optional["_SubjectType"] = None, + obj: Optional["_ObjectType"] = None, + first: bool = True, + ) -> Generator[Tuple[_SubjectType, _ObjectType], None, None]: if self.zero and first: if subj and obj: if subj == obj: @@ -329,32 +388,46 @@ def eval(self, graph, subj=None, obj=None, first=True): elif obj: yield obj, obj - def _fwd(subj=None, obj=None, seen=None): - seen.add(subj) + def _fwd( + subj: Optional[_SubjectType] = None, + obj: Optional[_ObjectType] = None, + seen: Optional[Set[_SubjectType]] = None, + ) -> Generator[Tuple[_SubjectType, _ObjectType], None, None]: + # type error: Item "None" of "Optional[Set[Node]]" has no attribute "add" + # type error: Argument 1 to "add" of "set" has incompatible type "Optional[Node]"; expected "Node" + seen.add(subj) # type: ignore[union-attr, arg-type] - for s, o in evalPath(graph, (subj, self.path, None)): + for s, o in eval_path(graph, (subj, self.path, None)): if not obj or o == obj: yield s, o if self.more: - if o in seen: + # type error: Unsupported right operand type for in ("Optional[Set[Node]]") + if o in seen: # type: ignore[operator] continue for s2, o2 in _fwd(o, obj, seen): yield s, o2 - def _bwd(subj=None, obj=None, seen=None): - seen.add(obj) + def _bwd( + subj: Optional[_SubjectType] = None, + obj: Optional[_ObjectType] = None, + seen: Optional[Set[_ObjectType]] = None, + ) -> Generator[Tuple[_SubjectType, _ObjectType], None, None]: + # type error: Item "None" of "Optional[Set[Node]]" has no attribute "add" + # type error: Argument 1 to "add" of "set" has incompatible type "Optional[Node]"; expected "Node" + seen.add(obj) # type: ignore[union-attr, arg-type] - for s, o in evalPath(graph, (None, self.path, obj)): + for s, o in eval_path(graph, (None, self.path, obj)): if not subj or subj == s: yield s, o if self.more: - if s in seen: + # type error: Unsupported right operand type for in ("Optional[Set[Node]]") + if s in seen: # type: ignore[operator] continue for s2, o2 in _bwd(None, s, seen): yield s2, o - def _all_fwd_paths(): + def _all_fwd_paths() -> Generator[Tuple[_SubjectType, _ObjectType], None, None]: if self.zero: seen1 = set() # According to the spec, ALL nodes are possible solutions @@ -371,7 +444,7 @@ def _all_fwd_paths(): yield o, o seen = set() - for s, o in evalPath(graph, (None, self.path, None)): + for s, o in eval_path(graph, (None, self.path, None)): if not self.more: yield s, o else: @@ -399,15 +472,16 @@ def _all_fwd_paths(): done.add(x) yield x - def __repr__(self): + def __repr__(self) -> str: return "Path(%s%s)" % (self.path, self.mod) - def n3(self): - return "%s%s" % (self.path.n3(), self.mod) + def n3(self, namespace_manager: Optional["NamespaceManager"] = None) -> str: + return "%s%s" % (_n3(self.path, namespace_manager), self.mod) class NegatedPath(Path): - def __init__(self, arg): + def __init__(self, arg: Union[AlternativePath, InvPath, URIRef]): + self.args: List[Union[URIRef, Path]] if isinstance(arg, (URIRef, InvPath)): self.args = [arg] elif isinstance(arg, AlternativePath): @@ -432,18 +506,18 @@ def eval(self, graph, subj=None, obj=None): else: yield s, o - def __repr__(self): + def __repr__(self) -> str: return "Path(! %s)" % ",".join(str(x) for x in self.args) - def n3(self): - return "!(%s)" % ("|".join(self.args)) + def n3(self, namespace_manager: Optional["NamespaceManager"] = None) -> str: + return "!(%s)" % ("|".join(_n3(arg, namespace_manager) for arg in self.args)) class PathList(list): pass -def path_alternative(self, other): +def path_alternative(self: Union[URIRef, Path], other: Union[URIRef, Path]): """ alternative path """ @@ -452,7 +526,7 @@ def path_alternative(self, other): return AlternativePath(self, other) -def path_sequence(self, other): +def path_sequence(self: Union[URIRef, Path], other: Union[URIRef, Path]): """ sequence path """ @@ -461,25 +535,50 @@ def path_sequence(self, other): return SequencePath(self, other) -def evalPath(graph, t): +def evalPath( # noqa: N802 + graph: Graph, + t: Tuple[ + Optional["_SubjectType"], + Union[None, Path, _PredicateType], + Optional["_ObjectType"], + ], +) -> Iterator[Tuple[_SubjectType, _ObjectType]]: + warnings.warn( + DeprecationWarning( + "rdflib.path.evalPath() is deprecated, use the (snake-cased) eval_path(). " + "The mixed-case evalPath() function name is incompatible with PEP8 " + "recommendations and will be replaced by eval_path() in rdflib 7.0.0." + ) + ) + return eval_path(graph, t) + + +def eval_path( + graph: Graph, + t: Tuple[ + Optional["_SubjectType"], + Union[None, Path, _PredicateType], + Optional["_ObjectType"], + ], +) -> Iterator[Tuple[_SubjectType, _ObjectType]]: return ((s, o) for s, p, o in graph.triples(t)) -def mul_path(p, mul): +def mul_path(p: Union[URIRef, Path], mul: _MulPathMod) -> MulPath: """ cardinality path """ return MulPath(p, mul) -def inv_path(p): +def inv_path(p: Union[URIRef, Path]) -> InvPath: """ inverse path """ return InvPath(p) -def neg_path(p): +def neg_path(p: Union[URIRef, AlternativePath, InvPath]) -> NegatedPath: """ negated path """ @@ -502,7 +601,9 @@ def neg_path(p): URIRef.__truediv__ = path_sequence Path.__invert__ = inv_path - Path.__neg__ = neg_path - Path.__mul__ = mul_path + # type error: Incompatible types in assignment (expression has type "Callable[[Union[URIRef, AlternativePath, InvPath]], NegatedPath]", variable has type "Callable[[Path], NegatedPath]") + Path.__neg__ = neg_path # type: ignore[assignment] + # type error: Incompatible types in assignment (expression has type "Callable[[Union[URIRef, Path], Literal['*', '+', '?']], MulPath]", variable has type "Callable[[Path, str], MulPath]") + Path.__mul__ = mul_path # type: ignore[assignment] Path.__or__ = path_alternative Path.__truediv__ = path_sequence diff --git a/dependencies/rdflib/plugin.py b/dependencies/rdflib/plugin.py index ec654229c..676ffbaa8 100644 --- a/dependencies/rdflib/plugin.py +++ b/dependencies/rdflib/plugin.py @@ -25,7 +25,7 @@ """ -import sys +from importlib.metadata import EntryPoint, entry_points from typing import ( TYPE_CHECKING, Any, @@ -39,6 +39,7 @@ overload, ) +import rdflib.plugins.stores.berkeleydb from rdflib.exceptions import Error from rdflib.parser import Parser from rdflib.query import ( @@ -51,12 +52,15 @@ from rdflib.serializer import Serializer from rdflib.store import Store -if sys.version_info < (3, 8): - from importlib_metadata import EntryPoint, entry_points -else: - from importlib.metadata import EntryPoint, entry_points - -__all__ = ["register", "get", "plugins", "PluginException", "Plugin", "PKGPlugin"] +__all__ = [ + "register", + "get", + "plugins", + "PluginException", + "Plugin", + "PluginT", + "PKGPlugin", +] rdflib_entry_points = { "rdf.plugins.store": Store, @@ -76,6 +80,7 @@ class PluginException(Error): pass +#: A generic type variable for plugins PluginT = TypeVar("PluginT") @@ -89,7 +94,7 @@ def __init__( self.class_name = class_name self._class: Optional[Type[PluginT]] = None - def getClass(self) -> Type[PluginT]: + def getClass(self) -> Type[PluginT]: # noqa: N802 if self._class is None: module = __import__(self.module_path, globals(), locals(), [""]) self._class = getattr(module, self.class_name) @@ -103,7 +108,7 @@ def __init__(self, name: str, kind: Type[PluginT], ep: "EntryPoint"): self.ep = ep self._class: Optional[Type[PluginT]] = None - def getClass(self) -> Type[PluginT]: + def getClass(self) -> Type[PluginT]: # noqa: N802 if self._class is None: self._class = self.ep.load() return self._class @@ -158,7 +163,7 @@ def plugins(name: Optional[str] = ..., kind: None = ...) -> Iterator[Plugin]: def plugins( name: Optional[str] = None, kind: Optional[Type[PluginT]] = None -) -> Iterator[Plugin]: +) -> Iterator[Plugin[PluginT]]: """ A generator of the plugins. @@ -170,6 +175,15 @@ def plugins( # Register Stores + +if rdflib.plugins.stores.berkeleydb.has_bsddb: + # Checks for BerkeleyDB before registering it + register( + "BerkeleyDB", + Store, + "rdflib.plugins.stores.berkeleydb", + "BerkeleyDB", + ) register( "default", Store, @@ -200,12 +214,7 @@ def plugins( "rdflib.plugins.stores.concurrent", "ConcurrentStore", ) -register( - "BerkeleyDB", - Store, - "rdflib.plugins.stores.berkeleydb", - "BerkeleyDB", -) + register( "SPARQLStore", Store, diff --git a/dependencies/rdflib/plugins/parsers/hext.py b/dependencies/rdflib/plugins/parsers/hext.py index ae60cca4d..47d436f29 100644 --- a/dependencies/rdflib/plugins/parsers/hext.py +++ b/dependencies/rdflib/plugins/parsers/hext.py @@ -3,12 +3,16 @@ (ndjson) files, into Conjunctive. The store that backs the graph *must* be able to handle contexts, i.e. multiple graphs. """ +from __future__ import annotations + import json import warnings -from typing import List, Union +from io import TextIOWrapper +from typing import Any, BinaryIO, List, Optional, TextIO, Union -from rdflib import BNode, ConjunctiveGraph, Literal, URIRef -from rdflib.parser import Parser +from rdflib.graph import ConjunctiveGraph, Graph +from rdflib.parser import InputSource, Parser +from rdflib.term import BNode, Literal, URIRef __all__ = ["HextuplesParser"] @@ -22,7 +26,7 @@ class HextuplesParser(Parser): def __init__(self): pass - def _load_json_line(self, line: str): + def _load_json_line(self, line: str) -> List[Optional[Any]]: # this complex handing is because the 'value' component is # allowed to be "" but not None # all other "" values are treated as None @@ -32,7 +36,9 @@ def _load_json_line(self, line: str): ret2[2] = "" return ret2 - def _parse_hextuple(self, cg: ConjunctiveGraph, tup: List[Union[str, None]]): + def _parse_hextuple( + self, cg: ConjunctiveGraph, tup: List[Union[str, None]] + ) -> None: # all values check # subject, predicate, value, datatype cannot be None # language and graph may be None @@ -66,11 +72,13 @@ def _parse_hextuple(self, cg: ConjunctiveGraph, tup: List[Union[str, None]]): # 6 - context if tup[5] is not None: c = URIRef(tup[5]) - cg.add((s, p, o, c)) + # type error: Argument 1 to "add" of "ConjunctiveGraph" has incompatible type "Tuple[Union[URIRef, BNode], URIRef, Union[URIRef, BNode, Literal], URIRef]"; expected "Union[Tuple[Node, Node, Node], Tuple[Node, Node, Node, Optional[Graph]]]" + cg.add((s, p, o, c)) # type: ignore[arg-type] else: cg.add((s, p, o)) - def parse(self, source, graph, **kwargs): + # type error: Signature of "parse" incompatible with supertype "Parser" + def parse(self, source: InputSource, graph: Graph, **kwargs: Any) -> None: # type: ignore[override] if kwargs.get("encoding") not in [None, "utf-8"]: warnings.warn( f"Hextuples files are always utf-8 encoded, " @@ -85,12 +93,19 @@ def parse(self, source, graph, **kwargs): cg = ConjunctiveGraph(store=graph.store, identifier=graph.identifier) cg.default_context = graph - # handle different source types - only file and string (data) for now - if hasattr(source, "file"): - with open(source.file.name) as fp: - for l in fp: - self._parse_hextuple(cg, self._load_json_line(l)) - elif hasattr(source, "_InputSource__bytefile"): - if hasattr(source._InputSource__bytefile, "wrapped"): - for l in source._InputSource__bytefile.wrapped.strip().splitlines(): - self._parse_hextuple(cg, self._load_json_line(l)) + text_stream: Optional[TextIO] = source.getCharacterStream() + if text_stream is None: + binary_stream: Optional[BinaryIO] = source.getByteStream() + if binary_stream is None: + raise ValueError( + f"Source does not have a character stream or a byte stream and cannot be used {type(source)}" + ) + text_stream = TextIOWrapper(binary_stream, encoding="utf-8") + + for line in text_stream: + if len(line) == 0 or line.isspace(): + # Skipping empty lines because this is what was being done before for the first and last lines, albeit in an rather indirect way. + # The result is that we accept input that would otherwise be invalid. + # Possibly we should just let this result in an error. + continue + self._parse_hextuple(cg, self._load_json_line(line)) diff --git a/dependencies/rdflib/plugins/parsers/jsonld.py b/dependencies/rdflib/plugins/parsers/jsonld.py index 86fdf8002..4eb05fcee 100644 --- a/dependencies/rdflib/plugins/parsers/jsonld.py +++ b/dependencies/rdflib/plugins/parsers/jsonld.py @@ -32,15 +32,16 @@ # NOTE: This code reads the entire JSON object into memory before parsing, but # we should consider streaming the input to deal with arbitrarily large graphs. +from __future__ import annotations import warnings -from typing import Optional +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union import rdflib.parser -from rdflib.graph import ConjunctiveGraph +from rdflib.graph import ConjunctiveGraph, Graph from rdflib.namespace import RDF, XSD -from rdflib.parser import URLInputSource -from rdflib.term import BNode, Literal, URIRef +from rdflib.parser import InputSource, URLInputSource +from rdflib.term import BNode, IdentifiedNode, Literal, Node, URIRef from ..shared.jsonld.context import UNDEF, Context, Term from ..shared.jsonld.keys import ( @@ -78,7 +79,7 @@ class JsonLDParser(rdflib.parser.Parser): def __init__(self): super(JsonLDParser, self).__init__() - def parse(self, source, sink, **kwargs): + def parse(self, source: InputSource, sink: Graph, **kwargs: Any) -> None: # TODO: docstring w. args and return value encoding = kwargs.get("encoding") or "utf-8" if encoding not in ("utf-8", "utf-16"): @@ -93,6 +94,8 @@ def parse(self, source, sink, **kwargs): context_data = kwargs.get("context") if not context_data and hasattr(source, "url") and hasattr(source, "links"): + if TYPE_CHECKING: + assert isinstance(source, URLInputSource) context_data = context_from_urlinputsource(source) try: @@ -107,6 +110,7 @@ def parse(self, source, sink, **kwargs): # NOTE: A ConjunctiveGraph parses into a Graph sink, so no sink will be # context_aware. Keeping this check in case RDFLib is changed, or # someone passes something context_aware to this parser directly. + conj_sink: Graph if not sink.context_aware: conj_sink = ConjunctiveGraph(store=sink.store, identifier=sink.identifier) else: @@ -116,13 +120,13 @@ def parse(self, source, sink, **kwargs): def to_rdf( - data, - dataset, - base=None, - context_data=None, + data: Any, + dataset: Graph, + base: Optional[str] = None, + context_data: Optional[bool] = None, version: Optional[float] = None, - generalized_rdf=False, - allow_lists_of_lists=None, + generalized_rdf: bool = False, + allow_lists_of_lists: Optional[bool] = None, ): # TODO: docstring w. args and return value context = Context(base=base, version=version) @@ -134,8 +138,10 @@ def to_rdf( return parser.parse(data, context, dataset) -class Parser(object): - def __init__(self, generalized_rdf=False, allow_lists_of_lists=None): +class Parser: + def __init__( + self, generalized_rdf: bool = False, allow_lists_of_lists: Optional[bool] = None + ): self.generalized_rdf = generalized_rdf self.allow_lists_of_lists = ( allow_lists_of_lists @@ -143,9 +149,9 @@ def __init__(self, generalized_rdf=False, allow_lists_of_lists=None): else ALLOW_LISTS_OF_LISTS ) - def parse(self, data, context, dataset): + def parse(self, data: Any, context: Context, dataset: Graph) -> Graph: topcontext = False - + resources: Union[Dict[str, Any], List[Any]] if isinstance(data, list): resources = data elif isinstance(data, dict): @@ -154,7 +160,8 @@ def parse(self, data, context, dataset): context.load(local_context, context.base) topcontext = True resources = data - if not isinstance(resources, list): + # type error: Subclass of "Dict[str, Any]" and "List[Any]" cannot exist: would have incompatible method signatures + if not isinstance(resources, list): # type: ignore[unreachable] resources = [resources] if context.vocab: @@ -163,16 +170,25 @@ def parse(self, data, context, dataset): if term.id and term.id.endswith(VOCAB_DELIMS): dataset.bind(name, term.id) - graph = dataset.default_context if dataset.context_aware else dataset + # type error: "Graph" has no attribute "default_context" + graph = dataset.default_context if dataset.context_aware else dataset # type: ignore[attr-defined] for node in resources: self._add_to_graph(dataset, graph, context, node, topcontext) return graph - def _add_to_graph(self, dataset, graph, context, node, topcontext=False): + def _add_to_graph( + self, + dataset: Graph, + graph: Graph, + context: Context, + node: Any, + topcontext: bool = False, + ) -> Optional[Node]: if not isinstance(node, dict) or context.get_value(node): - return + # type error: Return value expected + return # type: ignore[return-value] if CONTEXT in node and not topcontext: local_context = node[CONTEXT] @@ -181,7 +197,8 @@ def _add_to_graph(self, dataset, graph, context, node, topcontext=False): else: context = Context(base=context.doc_base) - context = context.get_context_for_type(node) + # type error: Incompatible types in assignment (expression has type "Optional[Context]", variable has type "Context") + context = context.get_context_for_type(node) # type: ignore[assignment] id_val = context.get_id(node) @@ -222,7 +239,8 @@ def _add_to_graph(self, dataset, graph, context, node, topcontext=False): return subj - def _get_nested_id(self, context, node): + # type error: Missing return statement + def _get_nested_id(self, context: Context, node: Dict[str, Any]) -> Optional[str]: # type: ignore[return] for key, obj in node.items(): if context.version >= 1.1 and key in context.get_keys(NEST): term = context.terms.get(key) @@ -242,9 +260,16 @@ def _get_nested_id(self, context, node): return id_val def _key_to_graph( - self, dataset, graph, context, subj, key, obj, reverse=False, no_id=False - ): - + self, + dataset: Graph, + graph: Graph, + context: Context, + subj: Node, + key: str, + obj: Any, + reverse: bool = False, + no_id: bool = False, + ) -> None: if isinstance(obj, list): obj_nodes = obj else: @@ -267,7 +292,10 @@ def _key_to_graph( if GRAPH in (key, term_id): if dataset.context_aware and not no_id: - subgraph = dataset.get_context(subj) + if TYPE_CHECKING: + assert isinstance(dataset, ConjunctiveGraph) + # type error: Argument 1 to "get_context" of "ConjunctiveGraph" has incompatible type "Node"; expected "Union[IdentifiedNode, str, None]" + subgraph = dataset.get_context(subj) # type: ignore[arg-type] else: subgraph = graph for onode in obj_nodes: @@ -297,7 +325,8 @@ def _key_to_graph( if nkey in context.get_keys(ID): continue subcontext = context.get_context_for_type(obj) - self._key_to_graph(dataset, graph, subcontext, subj, nkey, nobj) + # type error: Argument 3 to "_key_to_graph" of "Parser" has incompatible type "Optional[Context]"; expected "Context" + self._key_to_graph(dataset, graph, subcontext, subj, nkey, nobj) # type: ignore[arg-type] return pred_uri = term.id if term else context.expand(key) @@ -322,6 +351,7 @@ def _key_to_graph( if term and term.reverse: reverse = not reverse + pred: IdentifiedNode bid = self._get_bnodeid(pred_uri) if bid: if not self.generalized_rdf: @@ -339,7 +369,9 @@ def _key_to_graph( else: graph.add((subj, pred, obj)) - def _parse_container(self, context, term, obj): + def _parse_container( + self, context: Context, term: Term, obj: Dict[str, Any] + ) -> List[Any]: if LANG in term.container: obj_nodes = [] for lang, values in obj.items(): @@ -412,7 +444,7 @@ def _parse_container(self, context, term, obj): return [obj] @staticmethod - def _add_type(context, o, k): + def _add_type(context: Context, o: Dict[str, Any], k: str) -> Dict[str, Any]: otype = context.get_type(o) or [] if otype and not isinstance(otype, list): otype = [otype] @@ -420,20 +452,31 @@ def _add_type(context, o, k): o[TYPE] = otype return o - def _to_object(self, dataset, graph, context, term, node, inlist=False): + def _to_object( + self, + dataset: Graph, + graph: Graph, + context: Context, + term: Optional[Term], + node: Any, + inlist: bool = False, + ) -> Optional[Node]: if isinstance(node, tuple): value, lang = node if value is None: - return + # type error: Return value expected + return # type: ignore[return-value] if lang and " " in lang: - return + # type error: Return value expected + return # type: ignore[return-value] return Literal(value, lang=lang) if isinstance(node, dict): node_list = context.get_list(node) if node_list is not None: if inlist and not self.allow_lists_of_lists: - return + # type error: Return value expected + return # type: ignore[return-value] listref = self._add_list(dataset, graph, context, term, node_list) if listref: return listref @@ -443,7 +486,8 @@ def _to_object(self, dataset, graph, context, term, node, inlist=False): if term.type == JSON: node = self._to_typed_json_value(node) elif node is None: - return + # type error: Return value expected + return # type: ignore[return-value] elif term.type == ID and isinstance(node, str): node = {ID: context.resolve(node)} elif term.type == VOCAB and isinstance(node, str): @@ -452,7 +496,8 @@ def _to_object(self, dataset, graph, context, term, node, inlist=False): node = {TYPE: term.type, VALUE: node} else: if node is None: - return + # type error: Return value expected + return # type: ignore[return-value] if isinstance(node, float): return Literal(node, datatype=XSD.double) @@ -465,7 +510,8 @@ def _to_object(self, dataset, graph, context, term, node, inlist=False): lang = context.get_language(node) datatype = not lang and context.get_type(node) or None value = context.get_value(node) - if datatype in context.get_keys(JSON): + # type error: Unsupported operand types for in ("Optional[Any]" and "Generator[str, None, None]") + if datatype in context.get_keys(JSON): # type: ignore[operator] node = self._to_typed_json_value(value) datatype = context.get_type(node) value = context.get_value(node) @@ -475,7 +521,8 @@ def _to_object(self, dataset, graph, context, term, node, inlist=False): return None if lang: if " " in lang: - return + # type error: Return value expected + return # type: ignore[return-value] return Literal(value, lang=lang) elif datatype: return Literal(value, datatype=context.expand(datatype)) @@ -484,7 +531,7 @@ def _to_object(self, dataset, graph, context, term, node, inlist=False): else: return self._add_to_graph(dataset, graph, context, node) - def _to_rdf_id(self, context, id_val): + def _to_rdf_id(self, context: Context, id_val: str) -> Optional[IdentifiedNode]: bid = self._get_bnodeid(id_val) if bid: return BNode(bid) @@ -494,13 +541,21 @@ def _to_rdf_id(self, context, id_val): return None return URIRef(uri) - def _get_bnodeid(self, ref): + def _get_bnodeid(self, ref: str) -> Optional[str]: if not ref.startswith("_:"): - return + # type error: Return value expected + return # type: ignore[return-value] bid = ref.split("_:", 1)[-1] return bid or None - def _add_list(self, dataset, graph, context, term, node_list): + def _add_list( + self, + dataset: Graph, + graph: Graph, + context: Context, + term: Optional[Term], + node_list: Any, + ) -> IdentifiedNode: if not isinstance(node_list, list): node_list = [node_list] @@ -512,7 +567,8 @@ def _add_list(self, dataset, graph, context, term, node_list): continue if rest: - graph.add((subj, RDF.rest, rest)) + # type error: Statement is unreachable + graph.add((subj, RDF.rest, rest)) # type: ignore[unreachable] subj = rest obj = self._to_object(dataset, graph, context, term, node, inlist=True) @@ -530,7 +586,7 @@ def _add_list(self, dataset, graph, context, term, node_list): return RDF.nil @staticmethod - def _to_typed_json_value(value): + def _to_typed_json_value(value: Any) -> Dict[str, str]: return { TYPE: URIRef("%sJSON" % str(RDF)), VALUE: json.dumps( diff --git a/dependencies/rdflib/plugins/parsers/notation3.py b/dependencies/rdflib/plugins/parsers/notation3.py index 8b6a43dd5..290e7d04b 100644 --- a/dependencies/rdflib/plugins/parsers/notation3.py +++ b/dependencies/rdflib/plugins/parsers/notation3.py @@ -27,6 +27,8 @@ Copyright 2010, Gunnar A. Grimnes """ +from __future__ import annotations + import codecs import os import re @@ -35,7 +37,22 @@ # importing typing for `typing.List` because `List`` is used for something else import typing from decimal import Decimal -from typing import IO, TYPE_CHECKING, Any, Callable, Dict, Optional, TypeVar, Union +from typing import ( + IO, + TYPE_CHECKING, + Any, + Callable, + Dict, + Match, + MutableSequence, + NoReturn, + Optional, + Pattern, + Set, + Tuple, + TypeVar, + Union, +) from uuid import uuid4 from rdflib.compat import long_type @@ -44,6 +61,7 @@ from rdflib.term import ( _XSD_PFX, BNode, + IdentifiedNode, Identifier, Literal, Node, @@ -62,6 +80,9 @@ "runNamespace", "uniqueURI", "hexify", + "Formula", + "RDFSink", + "SinkParser", ] from rdflib.parser import Parser @@ -69,10 +90,10 @@ if TYPE_CHECKING: from rdflib.parser import InputSource -AnyT = TypeVar("AnyT") +_AnyT = TypeVar("_AnyT") -def splitFragP(uriref, punct=0): +def splitFragP(uriref: str, punc: int = 0) -> Tuple[str, str]: """split a URI reference before the fragment Punctuation is kept. @@ -94,7 +115,10 @@ def splitFragP(uriref, punct=0): return uriref, "" -def join(here, there): +_StrT = TypeVar("_StrT", bound=str) + + +def join(here: str, there: str) -> str: """join an absolute URI and URI reference (non-ascii characters are supported/doctested; haven't checked the details of the IRI spec though) @@ -192,7 +216,7 @@ def join(here, there): return here[: slashr + 1] + path + frag -def base(): +def base() -> str: """The base URI for this process - the Web equiv of cwd Relative or absolute unix-standard filenames parsed relative to @@ -205,7 +229,7 @@ def base(): return "file://" + _fixslash(os.getcwd()) + "/" -def _fixslash(s): +def _fixslash(s: str) -> str: """Fix windowslike filename to unixlike - (#ifdef WINDOWS)""" s = s.replace("\\", "/") if s[0] != "/" and s[1] == ":": @@ -252,10 +276,10 @@ def _fixslash(s): N3_Empty = (SYMBOL, List_NS + "Empty") -runNamespaceValue = None +runNamespaceValue: Optional[str] = None -def runNamespace(): +def runNamespace() -> str: """Returns a URI suitable as a namespace for run-local objects""" # @@@ include hostname (privacy?) (hash it?) global runNamespaceValue @@ -267,7 +291,7 @@ def runNamespace(): nextu = 0 -def uniqueURI(): +def uniqueURI() -> str: """A unique URI""" global nextu nextu += 1 @@ -280,12 +304,12 @@ def uniqueURI(): # from why import BecauseOfData, becauseSubexpression -def BecauseOfData(*args, **kargs): +def BecauseOfData(*args: Any, **kargs: Any) -> None: # print args, kargs pass -def becauseSubexpression(*args, **kargs): +def becauseSubexpression(*args: Any, **kargs: Any) -> None: # print args, kargs pass @@ -326,10 +350,10 @@ def becauseSubexpression(*args, **kargs): numberCharsPlus = numberChars | {"+", "."} -def unicodeExpand(m): +def unicodeExpand(m: Match) -> str: try: return chr(int(m.group(1), 16)) - except: + except Exception: raise Exception("Invalid unicode code point: " + m.group(1)) @@ -386,10 +410,10 @@ def __init__( self._genPrefix = genPrefix self.keywords = ["a", "this", "bind", "has", "is", "of", "true", "false"] self.keywordsSet = 0 # Then only can others be considered qnames - self._anonymousNodes: Dict[str, Node] = {} + self._anonymousNodes: Dict[str, BNode] = {} # Dict of anon nodes already declared ln: Term - self._variables: Dict[Identifier, Identifier] = {} - self._parentVariables: Dict[Identifier, Identifier] = {} + self._variables: Dict[str, Variable] = {} + self._parentVariables: Dict[str, Variable] = {} self._reason = why # Why the parser was asked to parse this self.turtle = turtle # raise exception when encountering N3 extensions @@ -397,10 +421,11 @@ def __init__( # only allows double quotes. self.string_delimiters = ('"', "'") if turtle else ('"',) - self._reason2 = None # Why these triples + self._reason2: Optional[Callable[..., None]] = None # Why these triples # was: diag.tracking if tracking: - self._reason2 = BecauseOfData( + # type error: "BecauseOfData" does not return a value + self._reason2 = BecauseOfData( # type: ignore[func-returns-value] store.newSymbol(thisDoc), because=self._reason ) @@ -421,7 +446,7 @@ def __init__( else: self._genPrefix = uniqueURI() - self._formula: Formula + self._formula: Optional[Formula] if openFormula is None and not turtle: if self._thisDoc: # TODO FIXME: store.newFormula does not take any arguments @@ -429,9 +454,9 @@ def __init__( else: self._formula = store.newFormula() else: - self._formula = openFormula # type: ignore[assignment] + self._formula = openFormula - self._context = self._formula + self._context: Optional[Formula] = self._formula self._parentContext: Optional[Formula] = None def here(self, i: int) -> str: @@ -447,20 +472,20 @@ def here(self, i: int) -> str: return "%s_L%iC%i" % (self._genPrefix, self.lines, i - self.startOfLine + 1) - def formula(self): + def formula(self) -> Optional[Formula]: return self._formula def loadStream(self, stream: Union[IO[str], IO[bytes]]) -> Optional["Formula"]: return self.loadBuf(stream.read()) # Not ideal - def loadBuf(self, buf: Union[str, bytes]): + def loadBuf(self, buf: Union[str, bytes]) -> Optional[Formula]: """Parses a buffer and returns its top level formula""" self.startDoc() self.feed(buf) return self.endDoc() # self._formula - def feed(self, octets: Union[str, bytes]): + def feed(self, octets: Union[str, bytes]) -> None: """Feed an octet stream to the parser if BadSyntax is raised, the string @@ -489,7 +514,6 @@ def feed(self, octets: Union[str, bytes]): self.BadSyntax(s, j, "expected directive or statement") def directiveOrStatement(self, argstr: str, h: int) -> int: - i = self.skipSpace(argstr, h) if i < 0: return i # EOF @@ -512,7 +536,7 @@ def directiveOrStatement(self, argstr: str, h: int) -> int: # @@I18N # _namechars = string.lowercase + string.uppercase + string.digits + '_-' - def tok(self, tok: str, argstr: str, i: int, colon: bool = False): + def tok(self, tok: str, argstr: str, i: int, colon: bool = False) -> int: """Check for keyword. Space must have been stripped on entry and we must not be at end of file. @@ -558,7 +582,7 @@ def directive(self, argstr: str, i: int) -> int: j = self.skipSpace(argstr, i) if j < 0: return j # eof - res: typing.List[Any] = [] + res: typing.List[str] = [] j = self.tok("bind", argstr, i) # implied "#". Obsolete. if j > 0: @@ -588,7 +612,8 @@ def directive(self, argstr: str, i: int) -> int: for x in res: # self._context.declareUniversal(x) if x not in self._variables or x in self._parentVariables: - self._variables[x] = self._context.newUniversal(x) + # type error: Item "None" of "Optional[Formula]" has no attribute "newUniversal" + self._variables[x] = self._context.newUniversal(x) # type: ignore[union-attr] return i j = self.tok("forSome", argstr, i) @@ -600,19 +625,20 @@ def directive(self, argstr: str, i: int) -> int: if i < 0: self.BadSyntax(argstr, i, "Bad variable list after @forSome") for x in res: - self._context.declareExistential(x) + # type error: Item "None" of "Optional[Formula]" has no attribute "declareExistential" + self._context.declareExistential(x) # type: ignore[union-attr] return i j = self.tok("prefix", argstr, i, colon=True) # no implied "#" if j >= 0: - t: typing.List[Any] = [] + t: typing.List[Union[Identifier, Tuple[str, str]]] = [] i = self.qname(argstr, j, t) if i < 0: self.BadSyntax(argstr, j, "expected qname after @prefix") j = self.uri_ref2(argstr, i, t) if j < 0: self.BadSyntax(argstr, i, "expected after @prefix _qname_") - ns = self.uriOf(t[1]) + ns: str = self.uriOf(t[1]) if self._baseURI: ns = join(self._baseURI, ns) @@ -652,8 +678,7 @@ def directive(self, argstr: str, i: int) -> int: return -1 # Not a directive, could be something else. - def sparqlDirective(self, argstr: str, i: int): - + def sparqlDirective(self, argstr: str, i: int) -> int: """ turtle and trig support BASE/PREFIX without @ and without terminating . @@ -722,7 +747,7 @@ def bind(self, qn: str, uri: bytes) -> None: else: self._store.bind(qn, uri) - def setKeywords(self, k: Optional[typing.List[str]]): + def setKeywords(self, k: Optional[typing.List[str]]) -> None: """Takes a list of strings""" if k is None: self.keywordsSet = 0 @@ -739,7 +764,7 @@ def endDoc(self) -> Optional["Formula"]: self._store.endDoc(self._formula) # don't canonicalize yet return self._formula - def makeStatement(self, quadruple): + def makeStatement(self, quadruple) -> None: # $$$$$$$$$$$$$$$$$$$$$ # print "# Parser output: ", `quadruple` self._store.makeStatement(quadruple, why=self._reason2) @@ -756,10 +781,10 @@ def statement(self, argstr: str, i: int) -> int: self.BadSyntax(argstr, i, "expected propertylist") return j - def subject(self, argstr: str, i: int, res: typing.List[Any]) -> int: + def subject(self, argstr: str, i: int, res: MutableSequence[Any]) -> int: return self.item(argstr, i, res) - def verb(self, argstr: str, i: int, res: typing.List[Any]) -> int: + def verb(self, argstr: str, i: int, res: MutableSequence[Any]) -> int: """has _prop_ is _prop_ of a @@ -845,16 +870,16 @@ def verb(self, argstr: str, i: int, res: typing.List[Any]) -> int: return -1 - def prop(self, argstr: str, i: int, res): + def prop(self, argstr: str, i: int, res: MutableSequence[Any]) -> int: return self.item(argstr, i, res) - def item(self, argstr: str, i, res): + def item(self, argstr: str, i, res: MutableSequence[Any]) -> int: return self.path(argstr, i, res) - def blankNode(self, uri=None): + def blankNode(self, uri: Optional[str] = None) -> BNode: return self._store.newBlankNode(self._context, uri, why=self._reason2) - def path(self, argstr: str, i: int, res): + def path(self, argstr: str, i: int, res: MutableSequence[Any]) -> int: """Parse the path production.""" j = self.nodeOrLiteral(argstr, i, res) if j < 0: @@ -875,7 +900,7 @@ def path(self, argstr: str, i: int, res): res.append(obj) return j - def anonymousNode(self, ln: str): + def anonymousNode(self, ln: str) -> BNode: """Remember or generate a term for one of these _: anonymous nodes""" term = self._anonymousNodes.get(ln, None) if term is not None: @@ -884,12 +909,18 @@ def anonymousNode(self, ln: str): self._anonymousNodes[ln] = term return term - def node(self, argstr: str, i: int, res, subjectAlready=None): + def node( + self, + argstr: str, + i: int, + res: MutableSequence[Any], + subjectAlready: Optional[Node] = None, + ) -> int: """Parse the production. Space is now skipped once at the beginning instead of in multiple calls to self.skipSpace(). """ - subj = subjectAlready + subj: Optional[Node] = subjectAlready j = self.skipSpace(argstr, i) if j < 0: @@ -909,7 +940,7 @@ def node(self, argstr: str, i: int, res, subjectAlready=None): argstr, j, "Found '[=' or '[ =' when in turtle mode." ) i = j + 1 - objs: typing.List[Any] = [] + objs: typing.List[Node] = [] j = self.objectList(argstr, i, objs) if j >= 0: subj = objs[0] @@ -990,8 +1021,10 @@ def node(self, argstr: str, i: int, res, subjectAlready=None): reason2 = self._reason2 self._reason2 = becauseSubexpression if subj is None: - subj = self._store.newFormula() - self._context = subj + # type error: Incompatible types in assignment (expression has type "Formula", variable has type "Optional[Node]") + subj = self._store.newFormula() # type: ignore[assignment] + # type error: Incompatible types in assignment (expression has type "Optional[Node]", variable has type "Optional[Formula]") + self._context = subj # type: ignore[assignment] while 1: i = self.skipSpace(argstr, j) @@ -1012,10 +1045,16 @@ def node(self, argstr: str, i: int, res, subjectAlready=None): self._context = self._parentContext self._reason2 = reason2 self._parentContext = oldParentContext - res.append(subj.close()) # No use until closed + # type error: Item "Node" of "Optional[Node]" has no attribute "close" + res.append( + subj.close() # type: ignore[union-attr] + ) # No use until closed return j if ch == "(": + thing_type: Callable[ + [typing.List[Any], Optional[Formula]], Union[Set[Any], IdentifiedNode] + ] thing_type = self._store.newList ch2 = argstr[i + 1] if ch2 == "$": @@ -1066,7 +1105,7 @@ def node(self, argstr: str, i: int, res, subjectAlready=None): return -1 - def property_list(self, argstr: str, i: int, subj): + def property_list(self, argstr: str, i: int, subj: Node) -> int: """Parse property list Leaves the terminating punctuation in the buffer """ @@ -1115,7 +1154,13 @@ def property_list(self, argstr: str, i: int, subj): return i i += 1 # skip semicolon and continue - def commaSeparatedList(self, argstr: str, j, res, what): + def commaSeparatedList( + self, + argstr: str, + j: int, + res: MutableSequence[Any], + what: Callable[[str, int, MutableSequence[Any]], int], + ) -> int: """return value: -1 bad syntax; >1 new position in argstr res has things found appended """ @@ -1141,7 +1186,7 @@ def commaSeparatedList(self, argstr: str, j, res, what): if i < 0: self.BadSyntax(argstr, i, "bad list content") - def objectList(self, argstr: str, i: int, res: typing.List[Any]) -> int: + def objectList(self, argstr: str, i: int, res: MutableSequence[Any]) -> int: i = self.object(argstr, i, res) if i < 0: return -1 @@ -1155,7 +1200,7 @@ def objectList(self, argstr: str, i: int, res: typing.List[Any]) -> int: if i < 0: return i - def checkDot(self, argstr: str, i: int): + def checkDot(self, argstr: str, i: int) -> int: j = self.skipSpace(argstr, i) if j < 0: return j # eof @@ -1168,7 +1213,7 @@ def checkDot(self, argstr: str, i: int): return j self.BadSyntax(argstr, j, "expected '.' or '}' or ']' at end of statement") - def uri_ref2(self, argstr: str, i: int, res): + def uri_ref2(self, argstr: str, i: int, res: MutableSequence[Any]) -> int: """Generate uri from n3 representation. Note that the RDF convention of directly concatenating @@ -1244,7 +1289,7 @@ def uri_ref2(self, argstr: str, i: int, res): else: return -1 - def skipSpace(self, argstr: str, i: int): + def skipSpace(self, argstr: str, i: int) -> int: """Skip white space, newlines and comments. return -1 if EOF, else position of first non-ws character""" @@ -1273,7 +1318,7 @@ def skipSpace(self, argstr: str, i: int): m = eof.match(argstr, i) return i if m is None else -1 - def variable(self, argstr: str, i: int, res): + def variable(self, argstr: str, i: int, res) -> int: """?abc -> variable(:abc)""" j = self.skipSpace(argstr, i) @@ -1292,7 +1337,8 @@ def variable(self, argstr: str, i: int, res): if self._parentContext is None: varURI = self._store.newSymbol(self._baseURI + "#" + argstr[j:i]) # type: ignore[operator] if varURI not in self._variables: - self._variables[varURI] = self._context.newUniversal( + # type error: Item "None" of "Optional[Formula]" has no attribute "newUniversal" + self._variables[varURI] = self._context.newUniversal( # type: ignore[union-attr] varURI, why=self._reason2 ) res.append(self._variables[varURI]) @@ -1309,7 +1355,7 @@ def variable(self, argstr: str, i: int, res): res.append(self._parentVariables[varURI]) return i - def bareWord(self, argstr: str, i: int, res): + def bareWord(self, argstr: str, i: int, res: MutableSequence[Any]) -> int: """abc -> :abc""" j = self.skipSpace(argstr, i) if j < 0: @@ -1324,7 +1370,12 @@ def bareWord(self, argstr: str, i: int, res): res.append(argstr[j:i]) return i - def qname(self, argstr: str, i: int, res): + def qname( + self, + argstr: str, + i: int, + res: MutableSequence[Union[Identifier, Tuple[str, str]]], + ) -> int: """ xyz:def -> ('xyz', 'def') If not in keywords and keywordsSet: def -> ('', 'def') @@ -1427,7 +1478,12 @@ def qname(self, argstr: str, i: int, res): return i return -1 - def object(self, argstr: str, i: int, res): + def object( + self, + argstr: str, + i: int, + res: MutableSequence[Any], + ) -> int: j = self.subject(argstr, i, res) if j >= 0: return j @@ -1455,7 +1511,7 @@ def object(self, argstr: str, i: int, res): else: return -1 - def nodeOrLiteral(self, argstr: str, i: int, res): + def nodeOrLiteral(self, argstr: str, i: int, res: MutableSequence[Any]) -> int: j = self.node(argstr, i, res) startline = self.lines # Remember where for error messages if j >= 0: @@ -1523,13 +1579,13 @@ def nodeOrLiteral(self, argstr: str, i: int, res): else: return -1 - def uriOf(self, sym): + def uriOf(self, sym: Union[Identifier, Tuple[str, str]]) -> str: if isinstance(sym, tuple): return sym[1] # old system for --pipe # return sym.uriref() # cwm api return sym - def strconst(self, argstr: str, i: int, delim): + def strconst(self, argstr: str, i: int, delim: str) -> Tuple[int, str]: """parse an N3 string constant delimited by delim. return index, val """ @@ -1640,14 +1696,22 @@ def strconst(self, argstr: str, i: int, delim): self.BadSyntax(argstr, i, "unterminated string literal") - def _unicodeEscape(self, argstr: str, i, startline, reg, n, prefix): + def _unicodeEscape( + self, + argstr: str, + i: int, + startline: int, + reg: Pattern[str], + n: int, + prefix: str, + ) -> Tuple[int, str]: if len(argstr) < i + n: raise BadSyntax( self._thisDoc, startline, argstr, i, "unterminated string literal(3)" ) try: return i + n, reg.sub(unicodeExpand, "\\" + prefix + argstr[i : i + n]) - except: + except Exception: raise BadSyntax( self._thisDoc, startline, @@ -1656,13 +1720,13 @@ def _unicodeEscape(self, argstr: str, i, startline, reg, n, prefix): "bad string literal hex escape: " + argstr[i : i + n], ) - def uEscape(self, argstr: str, i, startline): + def uEscape(self, argstr: str, i: int, startline: int) -> Tuple[int, str]: return self._unicodeEscape(argstr, i, startline, unicodeEscape4, 4, "u") - def UEscape(self, argstr: str, i, startline): + def UEscape(self, argstr: str, i: int, startline: int) -> Tuple[int, str]: return self._unicodeEscape(argstr, i, startline, unicodeEscape8, 8, "U") - def BadSyntax(self, argstr: str, i, msg): + def BadSyntax(self, argstr: str, i: int, msg: str) -> NoReturn: raise BadSyntax(self._thisDoc, self.lines, argstr, i, msg) @@ -1671,14 +1735,14 @@ def BadSyntax(self, argstr: str, i, msg): class BadSyntax(SyntaxError): - def __init__(self, uri, lines, argstr, i, why): + def __init__(self, uri: str, lines: int, argstr: str, i: int, why: str): self._str = argstr.encode("utf-8") # Better go back to strings for errors self._i = i self._why = why self.lines = lines self._uri = uri - def __str__(self): + def __str__(self) -> str: argstr = self._str i = self._i st = 0 @@ -1692,8 +1756,9 @@ def __str__(self): else: post = "" + # type error: On Python 3 formatting "b'abc'" with "%s" produces "b'abc'", not "abc"; use "%r" if this is desired behavior return 'at line %i of <%s>:\nBad syntax (%s) at ^ in:\n"%s%s^%s%s"' % ( - self.lines + 1, + self.lines + 1, # type: ignore[str-bytes-safe] self._uri, self._why, pre, @@ -1703,31 +1768,33 @@ def __str__(self): ) @property - def message(self): + def message(self) -> str: return str(self) ############################################################################### -class Formula(object): +class Formula: number = 0 - def __init__(self, parent): + def __init__(self, parent: Graph): self.uuid = uuid4().hex self.counter = 0 Formula.number += 1 self.number = Formula.number - self.existentials = {} - self.universals = {} + self.existentials: Dict[str, BNode] = {} + self.universals: Dict[str, BNode] = {} self.quotedgraph = QuotedGraph(store=parent.store, identifier=self.id()) - def __str__(self): + def __str__(self) -> str: return "_:Formula%s" % self.number - def id(self): + def id(self) -> BNode: return BNode("_:Formula%s" % self.number) - def newBlankNode(self, uri=None, why=None): + def newBlankNode( + self, uri: Optional[str] = None, why: Optional[Any] = None + ) -> BNode: if uri is None: self.counter += 1 bn = BNode("f%sb%s" % (self.uuid, self.counter)) @@ -1735,21 +1802,20 @@ def newBlankNode(self, uri=None, why=None): bn = BNode(uri.split("#").pop().replace("_", "b")) return bn - def newUniversal(self, uri, why=None): + def newUniversal(self, uri: str, why: Optional[Any] = None) -> Variable: return Variable(uri.split("#").pop()) - def declareExistential(self, x): + def declareExistential(self, x: str) -> None: self.existentials[x] = self.newBlankNode() - def close(self): - + def close(self) -> QuotedGraph: return self.quotedgraph r_hibyte = re.compile(r"([\x80-\xff])") -class RDFSink(object): +class RDFSink: def __init__(self, graph: Graph): self.rootFormula: Optional[Formula] = None self.uuid = uuid4().hex @@ -1768,7 +1834,7 @@ def newFormula(self) -> Formula: def newGraph(self, identifier: Identifier) -> Graph: return Graph(self.graph.store, identifier) - def newSymbol(self, *args: str): + def newSymbol(self, *args: str) -> URIRef: return URIRef(args[0]) def newBlankNode( @@ -1792,7 +1858,7 @@ def newLiteral(self, s: str, dt: Optional[URIRef], lang: Optional[str]) -> Liter else: return Literal(s, lang=lang) - def newList(self, n: typing.List[Any], f: Optional[Formula]): + def newList(self, n: typing.List[Any], f: Optional[Formula]) -> IdentifiedNode: nil = self.newSymbol("http://www.w3.org/1999/02/22-rdf-syntax-ns#nil") if not n: return nil @@ -1810,21 +1876,26 @@ def newList(self, n: typing.List[Any], f: Optional[Formula]): self.makeStatement((f, rest, a, nil)) return af - def newSet(self, *args): + def newSet(self, *args: _AnyT) -> Set[_AnyT]: return set(args) - def setDefaultNamespace(self, *args) -> str: + def setDefaultNamespace(self, *args: bytes) -> str: return ":".join(repr(n) for n in args) - def makeStatement(self, quadruple, why=None) -> None: + def makeStatement( + self, + quadruple: Tuple[Optional[Union[Formula, Graph]], Node, Node, Node], + why: Optional[Any] = None, + ) -> None: f, p, s, o = quadruple if hasattr(p, "formula"): raise ParserError("Formula used as predicate") - s = self.normalise(f, s) - p = self.normalise(f, p) - o = self.normalise(f, o) + # type error: Argument 1 to "normalise" of "RDFSink" has incompatible type "Union[Formula, Graph, None]"; expected "Optional[Formula]" + s = self.normalise(f, s) # type: ignore[arg-type] + p = self.normalise(f, p) # type: ignore[arg-type] + o = self.normalise(f, o) # type: ignore[arg-type] if f == self.rootFormula: # print s, p, o, '.' @@ -1832,11 +1903,16 @@ def makeStatement(self, quadruple, why=None) -> None: elif isinstance(f, Formula): f.quotedgraph.add((s, p, o)) else: - f.add((s, p, o)) + # type error: Item "None" of "Optional[Graph]" has no attribute "add" + f.add((s, p, o)) # type: ignore[union-attr] # return str(quadruple) - def normalise(self, f: Optional[Formula], n): + def normalise( + self, + f: Optional[Formula], + n: Union[Tuple[int, str], bool, int, Decimal, float, _AnyT], + ) -> Union[URIRef, Literal, BNode, _AnyT]: if isinstance(n, tuple): return URIRef(str(n[1])) @@ -1861,6 +1937,8 @@ def normalise(self, f: Optional[Formula], n): if isinstance(f, Formula): if n in f.existentials: + if TYPE_CHECKING: + assert isinstance(n, URIRef) return f.existentials[n] # if isinstance(n, Var): @@ -1868,16 +1946,16 @@ def normalise(self, f: Optional[Formula], n): # return f.universals[n] # f.universals[n] = f.newBlankNode() # return f.universals[n] + # type error: Incompatible return value type (got "Union[int, _AnyT]", expected "Union[URIRef, Literal, BNode, _AnyT]") [return-value] + return n # type: ignore[return-value] - return n - - def intern(self, something: AnyT) -> AnyT: + def intern(self, something: _AnyT) -> _AnyT: return something - def bind(self, pfx, uri): + def bind(self, pfx, uri) -> None: pass # print pfx, ':', uri - def startDoc(self, formula: Optional[Formula]): + def startDoc(self, formula: Optional[Formula]) -> None: self.rootFormula = formula def endDoc(self, formula: Optional[Formula]) -> None: @@ -1890,7 +1968,7 @@ def endDoc(self, formula: Optional[Formula]) -> None: # -def hexify(ustr): +def hexify(ustr: str) -> bytes: """Use URL encoding to return an ASCII string corresponding to the given UTF8 string @@ -1926,7 +2004,7 @@ def parse( graph: Graph, encoding: Optional[str] = "utf-8", turtle: bool = True, - ): + ) -> None: if encoding not in [None, "utf-8"]: raise ParserError( "N3/Turtle files are always utf-8 encoded, I was passed: %s" % encoding @@ -1958,7 +2036,10 @@ class N3Parser(TurtleParser): def __init__(self): pass - def parse(self, source, graph, encoding="utf-8"): + # type error: Signature of "parse" incompatible with supertype "TurtleParser" + def parse( # type: ignore[override] + self, source: InputSource, graph: Graph, encoding: Optional[str] = "utf-8" + ) -> None: # we're currently being handed a Graph, not a ConjunctiveGraph # context-aware is this implied by formula_aware ca = getattr(graph.store, "context_aware", False) diff --git a/dependencies/rdflib/plugins/parsers/nquads.py b/dependencies/rdflib/plugins/parsers/nquads.py index d5d982502..eb24aa64b 100644 --- a/dependencies/rdflib/plugins/parsers/nquads.py +++ b/dependencies/rdflib/plugins/parsers/nquads.py @@ -22,24 +22,33 @@ >>> FOAF = Namespace("http://xmlns.com/foaf/0.1/") >>> assert(g.value(s, FOAF.name).eq("Arco Publications")) """ +from __future__ import annotations from codecs import getreader +from typing import Any, MutableMapping, Optional -from rdflib import ConjunctiveGraph +from rdflib.exceptions import ParserError as ParseError +from rdflib.graph import ConjunctiveGraph +from rdflib.parser import InputSource # Build up from the NTriples parser: -from rdflib.plugins.parsers.ntriples import ( - ParseError, - W3CNTriplesParser, - r_tail, - r_wspace, -) +from rdflib.plugins.parsers.ntriples import W3CNTriplesParser, r_tail, r_wspace +from rdflib.term import BNode __all__ = ["NQuadsParser"] +_BNodeContextType = MutableMapping[str, BNode] + class NQuadsParser(W3CNTriplesParser): - def parse(self, inputsource, sink, bnode_context=None, **kwargs): + # type error: Signature of "parse" incompatible with supertype "W3CNTriplesParser" + def parse( # type: ignore[override] + self, + inputsource: InputSource, + sink: ConjunctiveGraph, + bnode_context: Optional[_BNodeContextType] = None, + **kwargs: Any, + ) -> ConjunctiveGraph: """ Parse inputsource as an N-Quads file. @@ -49,12 +58,15 @@ def parse(self, inputsource, sink, bnode_context=None, **kwargs): :param sink: where to send parsed triples :type bnode_context: `dict`, optional :param bnode_context: a dict mapping blank node identifiers to `~rdflib.term.BNode` instances. - See `.NTriplesParser.parse` + See `.W3CNTriplesParser.parse` """ assert sink.store.context_aware, ( "NQuadsParser must be given" " a context aware store." ) - self.sink = ConjunctiveGraph(store=sink.store, identifier=sink.identifier) + # type error: Incompatible types in assignment (expression has type "ConjunctiveGraph", base class "W3CNTriplesParser" defined the type as "Union[DummySink, NTGraphSink]") + self.sink: ConjunctiveGraph = ConjunctiveGraph( # type: ignore[assignment] + store=sink.store, identifier=sink.identifier + ) source = inputsource.getCharacterStream() if not source: @@ -77,7 +89,7 @@ def parse(self, inputsource, sink, bnode_context=None, **kwargs): return self.sink - def parseline(self, bnode_context=None): + def parseline(self, bnode_context: Optional[_BNodeContextType] = None) -> None: self.eat(r_wspace) if (not self.line) or self.line.startswith(("#")): return # The line is empty or a comment diff --git a/dependencies/rdflib/plugins/parsers/ntriples.py b/dependencies/rdflib/plugins/parsers/ntriples.py index 2a199f637..09656faff 100644 --- a/dependencies/rdflib/plugins/parsers/ntriples.py +++ b/dependencies/rdflib/plugins/parsers/ntriples.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +from __future__ import annotations __doc__ = """\ N-Triples Parser @@ -9,19 +10,39 @@ import codecs import re from io import BytesIO, StringIO, TextIOBase -from typing import IO, TYPE_CHECKING, Optional, Pattern, TextIO, Union +from typing import ( + IO, + TYPE_CHECKING, + Any, + Match, + MutableMapping, + Optional, + Pattern, + TextIO, + Union, +) from rdflib.compat import _string_escape_map, decodeUnicodeEscape from rdflib.exceptions import ParserError as ParseError from rdflib.parser import InputSource, Parser from rdflib.term import BNode as bNode from rdflib.term import Literal +from rdflib.term import URIRef from rdflib.term import URIRef as URI if TYPE_CHECKING: + import typing_extensions as te + from rdflib.graph import Graph, _ObjectType, _PredicateType, _SubjectType -__all__ = ["unquote", "uriquote", "W3CNTriplesParser", "NTGraphSink", "NTParser"] +__all__ = [ + "unquote", + "uriquote", + "W3CNTriplesParser", + "NTGraphSink", + "NTParser", + "DummySink", +] uriref = r'<([^:]+:[^\s"<>]*)>' literal = r'"([^"\\]*(?:\\.[^"\\]*)*)"' @@ -39,7 +60,7 @@ validate = False -class DummySink(object): +class DummySink: def __init__(self): self.length = 0 @@ -95,31 +116,36 @@ def unquote(s: str) -> str: r_hibyte = re.compile(r"([\x80-\xFF])") -def uriquote(uri): +def uriquote(uri: str) -> str: if not validate: return uri else: return r_hibyte.sub(lambda m: "%%%02X" % ord(m.group(1)), uri) -class W3CNTriplesParser(object): +_BNodeContextType = MutableMapping[str, bNode] + + +class W3CNTriplesParser: """An N-Triples Parser. This is a legacy-style Triples parser for NTriples provided by W3C Usage:: - p = NTriplesParser(sink=MySink()) + p = W3CNTriplesParser(sink=MySink()) sink = p.parse(f) # file; use parsestring for a string To define a context in which blank node identifiers refer to the same blank node - across instances of NTriplesParser, pass the same dict as `bnode_context` to each + across instances of NTriplesParser, pass the same dict as ``bnode_context`` to each instance. By default, a new blank node context is created for each instance of - `NTriplesParser`. + `W3CNTriplesParser`. """ __slots__ = ("_bnode_ids", "sink", "buffer", "file", "line") def __init__( - self, sink: Optional[Union[DummySink, "NTGraphSink"]] = None, bnode_context=None + self, + sink: Optional[Union[DummySink, "NTGraphSink"]] = None, + bnode_context: Optional[_BNodeContextType] = None, ): if bnode_context is not None: self._bnode_ids = bnode_context @@ -137,8 +163,10 @@ def __init__( self.line: Optional[str] = "" def parse( - self, f: Union[TextIO, IO[bytes], codecs.StreamReader], bnode_context=None - ): + self, + f: Union[TextIO, IO[bytes], codecs.StreamReader], + bnode_context: Optional[_BNodeContextType] = None, + ) -> Union[DummySink, "NTGraphSink"]: """ Parse f as an N-Triples file. @@ -156,7 +184,7 @@ def parse( if not hasattr(f, "encoding") and not hasattr(f, "charbuffer"): # someone still using a bytestream here? - f = codecs.getreader("utf-8")(f) # type: ignore[arg-type] + f = codecs.getreader("utf-8")(f) self.file = f # type: ignore[assignment] self.buffer = "" @@ -170,7 +198,7 @@ def parse( raise ParseError("Invalid line: {}".format(self.line)) return self.sink - def parsestring(self, s: Union[bytes, bytearray, str], **kwargs): + def parsestring(self, s: Union[bytes, bytearray, str], **kwargs) -> None: """Parse s as an N-Triples string.""" if not isinstance(s, (str, bytes, bytearray)): raise ParseError("Item to parse must be a string instance.") @@ -181,12 +209,13 @@ def parsestring(self, s: Union[bytes, bytearray, str], **kwargs): f = StringIO(s) self.parse(f, **kwargs) - def readline(self): + def readline(self) -> Optional[str]: """Read an N-Triples line from buffered input.""" # N-Triples lines end in either CRLF, CR, or LF # Therefore, we can't just use f.readline() if not self.buffer: - buffer = self.file.read(bufsiz) + # type error: Item "None" of "Union[TextIO, StreamReader, None]" has no attribute "read" + buffer = self.file.read(bufsiz) # type: ignore[union-attr] if not buffer: return None self.buffer = buffer @@ -197,7 +226,8 @@ def readline(self): self.buffer = self.buffer[m.end() :] return m.group(1) else: - buffer = self.file.read(bufsiz) + # type error: Item "None" of "Union[TextIO, StreamReader, None]" has no attribute "read" + buffer = self.file.read(bufsiz) # type: ignore[union-attr] if not buffer and not self.buffer.isspace(): # Last line does not need to be terminated with a newline buffer += "\n" @@ -205,7 +235,7 @@ def readline(self): return None self.buffer += buffer - def parseline(self, bnode_context=None): + def parseline(self, bnode_context: Optional[_BNodeContextType] = None) -> None: self.eat(r_wspace) if (not self.line) or self.line.startswith("#"): return # The line is empty or a comment @@ -223,10 +253,10 @@ def parseline(self, bnode_context=None): raise ParseError("Trailing garbage: {}".format(self.line)) self.sink.triple(subject, predicate, object_) - def peek(self, token: str): + def peek(self, token: str) -> bool: return self.line.startswith(token) # type: ignore[union-attr] - def eat(self, pattern: Pattern[str]): + def eat(self, pattern: Pattern[str]) -> Match[str]: m = pattern.match(self.line) # type: ignore[arg-type] if not m: # @@ Why can't we get the original pattern? # print(dir(pattern)) @@ -235,26 +265,28 @@ def eat(self, pattern: Pattern[str]): self.line = self.line[m.end() :] # type: ignore[index] return m - def subject(self, bnode_context=None): + def subject(self, bnode_context=None) -> Union[bNode, URIRef]: # @@ Consider using dictionary cases subj = self.uriref() or self.nodeid(bnode_context) if not subj: raise ParseError("Subject must be uriref or nodeID") return subj - def predicate(self): + def predicate(self) -> URIRef: pred = self.uriref() if not pred: raise ParseError("Predicate must be uriref") return pred - def object(self, bnode_context=None): + def object( + self, bnode_context: Optional[_BNodeContextType] = None + ) -> Union[URI, bNode, Literal]: objt = self.uriref() or self.nodeid(bnode_context) or self.literal() if objt is False: raise ParseError("Unrecognised object type") return objt - def uriref(self): + def uriref(self) -> Union["te.Literal[False]", URI]: if self.peek("<"): uri = self.eat(r_uriref).group(1) uri = unquote(uri) @@ -262,7 +294,9 @@ def uriref(self): return URI(uri) return False - def nodeid(self, bnode_context=None): + def nodeid( + self, bnode_context: Optional[_BNodeContextType] = None + ) -> Union["te.Literal[False]", bNode]: if self.peek("_"): # Fix for https://github.com/RDFLib/rdflib/issues/204 if bnode_context is None: @@ -280,7 +314,7 @@ def nodeid(self, bnode_context=None): return bnode return False - def literal(self): + def literal(self) -> Union["te.Literal[False]", Literal]: if self.peek('"'): lit, lang, dtype = self.eat(r_literal).groups() if lang: @@ -300,13 +334,13 @@ def literal(self): return False -class NTGraphSink(object): +class NTGraphSink: __slots__ = ("g",) def __init__(self, graph: "Graph"): self.g = graph - def triple(self, s: "_SubjectType", p: "_PredicateType", o: "_ObjectType"): + def triple(self, s: "_SubjectType", p: "_PredicateType", o: "_ObjectType") -> None: self.g.add((s, p, o)) @@ -318,7 +352,7 @@ class NTParser(Parser): __slots__ = () @classmethod - def parse(cls, source: InputSource, sink: "Graph", **kwargs): + def parse(cls, source: InputSource, sink: "Graph", **kwargs: Any) -> None: """ Parse the NT format @@ -326,7 +360,7 @@ def parse(cls, source: InputSource, sink: "Graph", **kwargs): :param source: the source of NT-formatted data :type sink: `rdflib.graph.Graph` :param sink: where to send parsed triples - :param kwargs: Additional arguments to pass to `.NTriplesParser.parse` + :param kwargs: Additional arguments to pass to `.W3CNTriplesParser.parse` """ f: Union[TextIO, IO[bytes], codecs.StreamReader] f = source.getCharacterStream() diff --git a/dependencies/rdflib/plugins/parsers/rdfxml.py b/dependencies/rdflib/plugins/parsers/rdfxml.py index 1da4ff4f7..03650fc98 100644 --- a/dependencies/rdflib/plugins/parsers/rdfxml.py +++ b/dependencies/rdflib/plugins/parsers/rdfxml.py @@ -1,17 +1,26 @@ """ An RDF/XML parser for RDFLib """ +from __future__ import annotations +from typing import TYPE_CHECKING, Any, Dict, List, NoReturn, Optional, Tuple from urllib.parse import urldefrag, urljoin from xml.sax import handler, make_parser, xmlreader from xml.sax.handler import ErrorHandler from xml.sax.saxutils import escape, quoteattr from rdflib.exceptions import Error, ParserError +from rdflib.graph import Graph from rdflib.namespace import RDF, is_ncname -from rdflib.parser import Parser +from rdflib.parser import InputSource, Parser from rdflib.plugins.parsers.RDFVOC import RDFVOC -from rdflib.term import BNode, Literal, URIRef +from rdflib.term import BNode, Identifier, Literal, URIRef + +if TYPE_CHECKING: + # from xml.sax.expatreader import ExpatLocator + from xml.sax.xmlreader import AttributesImpl, Locator + + from rdflib.graph import _ObjectType, _SubjectType, _TripleType __all__ = ["create_parser", "BagID", "ElementHandler", "RDFXMLHandler", "RDFXMLParser"] @@ -86,7 +95,7 @@ def next_li(self): return RDFNS["_%s" % self.li] -class ElementHandler(object): +class ElementHandler: __slots__ = [ "start", "char", @@ -125,42 +134,46 @@ def next_li(self): class RDFXMLHandler(handler.ContentHandler): - def __init__(self, store): + def __init__(self, store: Graph): self.store = store self.preserve_bnode_ids = False self.reset() - def reset(self): + def reset(self) -> None: document_element = ElementHandler() document_element.start = self.document_element_start document_element.end = lambda name, qname: None - self.stack = [ + self.stack: List[Optional[ElementHandler]] = [ None, document_element, ] - self.ids = {} # remember IDs we have already seen - self.bnode = {} - self._ns_contexts = [{}] # contains uri -> prefix dicts - self._current_context = self._ns_contexts[-1] + self.ids: Dict[str, int] = {} # remember IDs we have already seen + self.bnode: Dict[str, Identifier] = {} + self._ns_contexts: List[Dict[str, Optional[str]]] = [ + {} + ] # contains uri -> prefix dicts + self._current_context: Dict[str, Optional[str]] = self._ns_contexts[-1] # ContentHandler methods - def setDocumentLocator(self, locator): + def setDocumentLocator(self, locator: Locator): self.locator = locator - def startDocument(self): + def startDocument(self) -> None: pass - def startPrefixMapping(self, prefix, namespace): + def startPrefixMapping(self, prefix: Optional[str], namespace: str) -> None: self._ns_contexts.append(self._current_context.copy()) self._current_context[namespace] = prefix self.store.bind(prefix, namespace or "", override=False) - def endPrefixMapping(self, prefix): + def endPrefixMapping(self, prefix: Optional[str]) -> None: self._current_context = self._ns_contexts[-1] del self._ns_contexts[-1] - def startElementNS(self, name, qname, attrs): + def startElementNS( + self, name: Tuple[Optional[str], str], qname, attrs: AttributesImpl + ) -> None: stack = self.stack stack.append(ElementHandler()) current = self.current @@ -189,29 +202,29 @@ def startElementNS(self, name, qname, attrs): current.language = language current.start(name, qname, attrs) - def endElementNS(self, name, qname): + def endElementNS(self, name: Tuple[Optional[str], str], qname) -> None: self.current.end(name, qname) self.stack.pop() - def characters(self, content): + def characters(self, content: str) -> None: char = self.current.char if char: char(content) - def ignorableWhitespace(self, content): + def ignorableWhitespace(self, content) -> None: pass - def processingInstruction(self, target, data): + def processingInstruction(self, target, data) -> None: pass - def add_reified(self, sid, spo): + def add_reified(self, sid: Identifier, spo: _TripleType): s, p, o = spo self.store.add((sid, RDF.type, RDF.Statement)) self.store.add((sid, RDF.subject, s)) self.store.add((sid, RDF.predicate, p)) self.store.add((sid, RDF.object, o)) - def error(self, message): + def error(self, message: str) -> NoReturn: locator = self.locator info = "%s:%s:%s: " % ( locator.getSystemId(), @@ -220,40 +233,46 @@ def error(self, message): ) raise ParserError(info + message) - def get_current(self): + def get_current(self) -> Optional[ElementHandler]: return self.stack[-2] # Create a read only property called current so that self.current # give the current element handler. current = property(get_current) - def get_next(self): + def get_next(self) -> Optional[ElementHandler]: return self.stack[-1] # Create a read only property that gives the element handler to be # used for the next element. next = property(get_next) - def get_parent(self): + def get_parent(self) -> Optional[ElementHandler]: return self.stack[-3] # Create a read only property that gives the current parent # element handler parent = property(get_parent) - def absolutize(self, uri): - result = urljoin(self.current.base, uri, allow_fragments=1) + def absolutize(self, uri: str) -> URIRef: + # type error: Argument "allow_fragments" to "urljoin" has incompatible type "int"; expected "bool" + result = urljoin(self.current.base, uri, allow_fragments=1) # type: ignore[arg-type] if uri and uri[-1] == "#" and result[-1] != "#": result = "%s#" % result return URIRef(result) - def convert(self, name, qname, attrs): + def convert( + self, name: Tuple[Optional[str], str], qname, attrs: AttributesImpl + ) -> Tuple[URIRef, Dict[URIRef, str]]: if name[0] is None: - name = URIRef(name[1]) + # type error: Incompatible types in assignment (expression has type "URIRef", variable has type "Tuple[Optional[str], str]") + name = URIRef(name[1]) # type: ignore[assignment] else: - name = URIRef("".join(name)) + # type error: Incompatible types in assignment (expression has type "URIRef", variable has type "Tuple[Optional[str], str]") + # type error: Argument 1 to "join" of "str" has incompatible type "Tuple[Optional[str], str]"; expected "Iterable[str]" + name = URIRef("".join(name)) # type: ignore[assignment, arg-type] atts = {} - for (n, v) in attrs.items(): + for n, v in attrs.items(): if n[0] is None: att = n[1] else: @@ -262,12 +281,16 @@ def convert(self, name, qname, attrs): pass elif att in UNQUALIFIED: # if not RDFNS[att] in atts: - atts[RDFNS[att]] = v # type: ignore[misc] + # type error: Variable "att" is not valid as a type + atts[RDFNS[att]] = v # type: ignore[misc, valid-type] else: atts[URIRef(att)] = v - return name, atts + # type error: Incompatible return value type (got "Tuple[Tuple[Optional[str], str], Dict[Any, Any]]", expected "Tuple[URIRef, Dict[URIRef, str]]") + return name, atts # type: ignore[return-value] - def document_element_start(self, name, qname, attrs): + def document_element_start( + self, name: Tuple[str, str], qname, attrs: AttributesImpl + ) -> None: if name[0] and URIRef("".join(name)) == RDFVOC.RDF: # Cheap hack so 2to3 doesn't turn it into __next__ next = getattr(self, "next") @@ -279,8 +302,11 @@ def document_element_start(self, name, qname, attrs): # TODO... set end to something that sets start such that # another element will cause error - def node_element_start(self, name, qname, attrs): - name, atts = self.convert(name, qname, attrs) + def node_element_start( + self, name: Tuple[str, str], qname, attrs: AttributesImpl + ) -> None: + # type error: Incompatible types in assignment (expression has type "URIRef", variable has type "Tuple[str, str]") + name, atts = self.convert(name, qname, attrs) # type: ignore[assignment] current = self.current absolutize = self.absolutize @@ -290,8 +316,9 @@ def node_element_start(self, name, qname, attrs): next.end = self.property_element_end if name in NODE_ELEMENT_EXCEPTIONS: - self.error("Invalid node element URI: %s" % name) - + # type error: Not all arguments converted during string formatting + self.error("Invalid node element URI: %s" % name) # type: ignore[str-format] + subject: _SubjectType if RDFVOC.ID in atts: if RDFVOC.about in atts or RDFVOC.nodeID in atts: self.error("Can have at most one of rdf:ID, rdf:about, and rdf:nodeID") @@ -325,8 +352,10 @@ def node_element_start(self, name, qname, attrs): subject = BNode() if name != RDFVOC.Description: # S1 - self.store.add((subject, RDF.type, absolutize(name))) + # error: Argument 1 has incompatible type "Tuple[str, str]"; expected "str" + self.store.add((subject, RDF.type, absolutize(name))) # type: ignore[arg-type] + object: _ObjectType language = current.language for att in atts: if not att.startswith(str(RDFNS)): @@ -334,7 +363,8 @@ def node_element_start(self, name, qname, attrs): try: object = Literal(atts[att], language) except Error as e: - self.error(e.msg) + # type error: Argument 1 to "error" of "RDFXMLHandler" has incompatible type "Optional[str]"; expected "str" + self.error(e.msg) # type: ignore[arg-type] elif att == RDF.type: # S2 predicate = RDF.type object = absolutize(atts[RDF.type]) @@ -342,48 +372,56 @@ def node_element_start(self, name, qname, attrs): continue elif att in PROPERTY_ATTRIBUTE_EXCEPTIONS: # S3 self.error("Invalid property attribute URI: %s" % att) - continue # for when error does not throw an exception + # type error: Statement is unreachable + continue # type: ignore[unreachable] # for when error does not throw an exception else: predicate = absolutize(att) try: object = Literal(atts[att], language) except Error as e: - self.error(e.msg) + # type error: Argument 1 to "error" of "RDFXMLHandler" has incompatible type "Optional[str]"; expected "str" + self.error(e.msg) # type: ignore[arg-type] self.store.add((subject, predicate, object)) current.subject = subject - def node_element_end(self, name, qname): + def node_element_end(self, name: Tuple[str, str], qname) -> None: # repeat node-elements are only allowed # at at top-level if self.parent.object and self.current != self.stack[2]: - self.error( "Repeat node-elements inside property elements: %s" % "".join(name) ) self.parent.object = self.current.subject - def property_element_start(self, name, qname, attrs): - name, atts = self.convert(name, qname, attrs) + def property_element_start( + self, name: Tuple[str, str], qname, attrs: AttributesImpl + ) -> None: + # type error: Incompatible types in assignment (expression has type "URIRef", variable has type "Tuple[str, str]") + name, atts = self.convert(name, qname, attrs) # type: ignore[assignment] current = self.current absolutize = self.absolutize # Cheap hack so 2to3 doesn't turn it into __next__ next = getattr(self, "next") - object = None + object: Optional[_ObjectType] = None current.data = None current.list = None - if not name.startswith(str(RDFNS)): - current.predicate = absolutize(name) + # type error: "Tuple[str, str]" has no attribute "startswith" + if not name.startswith(str(RDFNS)): # type: ignore[attr-defined] + # type error: Argument 1 has incompatible type "Tuple[str, str]"; expected "str" + current.predicate = absolutize(name) # type: ignore[arg-type] elif name == RDFVOC.li: current.predicate = current.next_li() elif name in PROPERTY_ELEMENT_EXCEPTIONS: - self.error("Invalid property element URI: %s" % name) + # type error: Not all arguments converted during string formatting + self.error("Invalid property element URI: %s" % name) # type: ignore[str-format] else: - current.predicate = absolutize(name) + # type error: Argument 1 has incompatible type "Tuple[str, str]"; expected "str" + current.predicate = absolutize(name) # type: ignore[arg-type] id = atts.get(RDFVOC.ID, None) if id is not None: @@ -465,12 +503,13 @@ def property_element_start(self, name, qname, attrs): self.error("""Invalid property attribute URI: %s""" % att) else: predicate = absolutize(att) - + o: _ObjectType if att == RDF.type: o = URIRef(atts[att]) else: if datatype is not None: - language = None + # type error: Statement is unreachable + language = None # type: ignore[unreachable] o = Literal(atts[att], language, datatype) if object is None: @@ -483,12 +522,12 @@ def property_element_start(self, name, qname, attrs): current.data = None current.object = object - def property_element_char(self, data): + def property_element_char(self, data: str) -> None: current = self.current if current.data is not None: current.data += data - def property_element_end(self, name, qname): + def property_element_end(self, name: Tuple[str, str], qname) -> None: current = self.current if current.data is not None and current.object is None: literalLang = current.language @@ -507,7 +546,7 @@ def property_element_end(self, name, qname): ) current.subject = None - def list_node_element_end(self, name, qname): + def list_node_element_end(self, name: Tuple[str, str], qname) -> None: current = self.current if self.parent.list == RDF.nil: list = BNode() @@ -525,7 +564,9 @@ def list_node_element_end(self, name, qname): self.store.add((list, RDF.first, current.subject)) self.parent.list = list - def literal_element_start(self, name, qname, attrs): + def literal_element_start( + self, name: Tuple[str, str], qname, attrs: AttributesImpl + ) -> None: current = self.current self.next.start = self.literal_element_start self.next.char = self.literal_element_char @@ -546,20 +587,21 @@ def literal_element_start(self, name, qname, attrs): else: current.object = "<%s" % name[1] - for (name, value) in attrs.items(): + for name, value in attrs.items(): if name[0]: if not name[0] in current.declared: current.declared[name[0]] = self._current_context[name[0]] name = current.declared[name[0]] + ":" + name[1] else: - name = name[1] + # type error: Incompatible types in assignment (expression has type "str", variable has type "Tuple[str, str]") + name = name[1] # type: ignore[assignment] current.object += " %s=%s" % (name, quoteattr(value)) current.object += ">" - def literal_element_char(self, data): + def literal_element_char(self, data: str) -> None: self.current.object += escape(data) - def literal_element_end(self, name, qname): + def literal_element_end(self, name: Tuple[str, str], qname) -> None: if name[0]: prefix = self._current_context[name[0]] if prefix: @@ -571,7 +613,7 @@ def literal_element_end(self, name, qname): self.parent.object += self.current.object + end -def create_parser(target, store) -> xmlreader.XMLReader: +def create_parser(target: InputSource, store: Graph) -> xmlreader.XMLReader: parser = make_parser() try: # Workaround for bug in expatreader.py. Needed when @@ -581,7 +623,8 @@ def create_parser(target, store) -> xmlreader.XMLReader: pass # Not present in Jython (at least) parser.setFeature(handler.feature_namespaces, 1) rdfxml = RDFXMLHandler(store) - rdfxml.setDocumentLocator(target) + # type error: Argument 1 to "setDocumentLocator" of "RDFXMLHandler" has incompatible type "InputSource"; expected "Locator" + rdfxml.setDocumentLocator(target) # type: ignore[arg-type] # rdfxml.setDocumentLocator(_Locator(self.url, self.parser)) parser.setContentHandler(rdfxml) parser.setErrorHandler(ErrorHandler()) @@ -592,7 +635,7 @@ class RDFXMLParser(Parser): def __init__(self): pass - def parse(self, source, sink, **args): + def parse(self, source: InputSource, sink: Graph, **args: Any) -> None: self._parser = create_parser(source, sink) content_handler = self._parser.getContentHandler() preserve_bnode_ids = args.get("preserve_bnode_ids", None) diff --git a/dependencies/rdflib/plugins/parsers/trig.py b/dependencies/rdflib/plugins/parsers/trig.py index 215586a0e..cc4cf131e 100644 --- a/dependencies/rdflib/plugins/parsers/trig.py +++ b/dependencies/rdflib/plugins/parsers/trig.py @@ -1,5 +1,9 @@ -from rdflib import ConjunctiveGraph -from rdflib.parser import Parser +from __future__ import annotations + +from typing import Any, MutableSequence + +from rdflib.graph import ConjunctiveGraph, Graph +from rdflib.parser import InputSource, Parser from .notation3 import RDFSink, SinkParser @@ -9,8 +13,7 @@ def becauseSubGraph(*args, **kwargs): class TrigSinkParser(SinkParser): - def directiveOrStatement(self, argstr, h): - + def directiveOrStatement(self, argstr: str, h: int) -> int: # noqa: N802 # import pdb; pdb.set_trace() i = self.skipSpace(argstr, h) @@ -35,7 +38,9 @@ def directiveOrStatement(self, argstr, h): return j - def labelOrSubject(self, argstr, i, res): + def labelOrSubject( # noqa: N802 + self, argstr: str, i: int, res: MutableSequence[Any] + ) -> int: j = self.skipSpace(argstr, i) if j < 0: return j # eof @@ -54,7 +59,7 @@ def labelOrSubject(self, argstr, i, res): return j + 1 return -1 - def graph(self, argstr, i): + def graph(self, argstr: str, i: int) -> int: """ Parse trig graph, i.e. @@ -64,16 +69,20 @@ def graph(self, argstr, i): raise Exception if it looks like a graph, but isn't. """ + need_graphid = False # import pdb; pdb.set_trace() j = self.sparqlTok("GRAPH", argstr, i) # optional GRAPH keyword if j >= 0: i = j + need_graphid = True - r = [] + r: MutableSequence[Any] = [] j = self.labelOrSubject(argstr, i, r) if j >= 0: graph = r[0] i = j + elif need_graphid: + self.BadSyntax(argstr, i, "GRAPH keyword must be followed by graph name") else: graph = self._store.graph.identifier # hack @@ -82,7 +91,6 @@ def graph(self, argstr, i): self.BadSyntax(argstr, i, "EOF found when expected graph") if argstr[j : j + 1] == "=": # optional = for legacy support - i = self.skipSpace(argstr, j + 1) if i < 0: self.BadSyntax(argstr, i, "EOF found when expecting '{'") @@ -94,11 +102,15 @@ def graph(self, argstr, i): j = i + 1 + if self._context is not None: + self.BadSyntax(argstr, i, "Nested graphs are not allowed") + oldParentContext = self._parentContext self._parentContext = self._context reason2 = self._reason2 self._reason2 = becauseSubGraph - self._context = self._store.newGraph(graph) + # type error: Incompatible types in assignment (expression has type "Graph", variable has type "Optional[Formula]") + self._context = self._store.newGraph(graph) # type: ignore[assignment] while 1: i = self.skipSpace(argstr, j) @@ -129,11 +141,12 @@ class TrigParser(Parser): def __init__(self): pass - def parse(self, source, graph, encoding="utf-8"): - + def parse(self, source: InputSource, graph: Graph, encoding: str = "utf-8") -> None: if encoding not in [None, "utf-8"]: raise Exception( - ("TriG files are always utf-8 encoded, ", "I was passed: %s") % encoding + # type error: Unsupported left operand type for % ("Tuple[str, str]") + ("TriG files are always utf-8 encoded, ", "I was passed: %s") # type: ignore[operator] + % encoding ) # we're currently being handed a Graph, not a ConjunctiveGraph diff --git a/dependencies/rdflib/plugins/parsers/trix.py b/dependencies/rdflib/plugins/parsers/trix.py index 5529b0fbd..8baaf5ca4 100644 --- a/dependencies/rdflib/plugins/parsers/trix.py +++ b/dependencies/rdflib/plugins/parsers/trix.py @@ -1,15 +1,22 @@ """ A TriX parser for RDFLib """ -from xml.sax import make_parser +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, Dict, List, NoReturn, Optional, Tuple +from xml.sax import handler, make_parser from xml.sax.handler import ErrorHandler -from xml.sax.saxutils import handler from rdflib.exceptions import ParserError from rdflib.graph import Graph from rdflib.namespace import Namespace -from rdflib.parser import Parser -from rdflib.term import BNode, Literal, URIRef +from rdflib.parser import InputSource, Parser +from rdflib.store import Store +from rdflib.term import BNode, Identifier, Literal, URIRef + +if TYPE_CHECKING: + # from xml.sax.expatreader import ExpatLocator + from xml.sax.xmlreader import AttributesImpl, Locator, XMLReader __all__ = ["create_parser", "TriXHandler", "TriXParser"] @@ -21,35 +28,36 @@ class TriXHandler(handler.ContentHandler): """An Sax Handler for TriX. See http://sw.nokia.com/trix/""" - def __init__(self, store): + def __init__(self, store: Store): self.store = store self.preserve_bnode_ids = False self.reset() - def reset(self): - self.bnode = {} - self.graph = None - self.triple = None + def reset(self) -> None: + self.bnode: Dict[str, BNode] = {} + self.graph: Optional[Graph] = None + self.triple: Optional[List[Identifier]] = None self.state = 0 self.lang = None self.datatype = None # ContentHandler methods - def setDocumentLocator(self, locator): + def setDocumentLocator(self, locator: Locator): self.locator = locator - def startDocument(self): + def startDocument(self) -> None: pass - def startPrefixMapping(self, prefix, namespace): + def startPrefixMapping(self, prefix: Optional[str], namespace: str) -> None: pass - def endPrefixMapping(self, prefix): + def endPrefixMapping(self, prefix: Optional[str]) -> None: pass - def startElementNS(self, name, qname, attrs): - + def startElementNS( + self, name: Tuple[Optional[str], str], qname, attrs: AttributesImpl + ) -> None: if name[0] != str(TRIXNS): self.error( "Only elements in the TriX namespace are allowed. %s!=%s" @@ -97,7 +105,7 @@ def startElementNS(self, name, qname, attrs): try: self.lang = attrs.getValue((str(XMLNS), "lang")) - except: + except Exception: # language not required - ignore pass try: @@ -114,7 +122,7 @@ def startElementNS(self, name, qname, attrs): self.datatype = None try: self.lang = attrs.getValue((str(XMLNS), "lang")) - except: + except Exception: # language not required - ignore pass @@ -137,7 +145,9 @@ def startElementNS(self, name, qname, attrs): self.chars = "" - def endElementNS(self, name, qname): + def endElementNS(self, name: Tuple[Optional[str], str], qname) -> None: + if TYPE_CHECKING: + assert self.triple is not None if name[0] != str(TRIXNS): self.error( "Only elements in the TriX namespace are allowed. %s!=%s" @@ -190,8 +200,9 @@ def endElementNS(self, name, qname): "Triple has wrong length, got %d elements: %s" % (len(self.triple), self.triple) ) - - self.graph.add(self.triple) + # type error: Item "None" of "Optional[Graph]" has no attribute "add" + # type error: Argument 1 to "add" of "Graph" has incompatible type "List[Identifier]"; expected "Tuple[Node, Node, Node]" + self.graph.add(self.triple) # type: ignore[union-attr, arg-type] # self.store.store.add(self.triple,context=self.graph) # self.store.addN([self.triple+[self.graph]]) self.state = 2 @@ -211,7 +222,7 @@ def endElementNS(self, name, qname): else: self.error("Unexpected close element") - def get_bnode(self, label): + def get_bnode(self, label: str) -> BNode: if self.preserve_bnode_ids: bn = BNode(label) else: @@ -222,16 +233,16 @@ def get_bnode(self, label): self.bnode[label] = bn return bn - def characters(self, content): + def characters(self, content: str) -> None: self.chars += content - def ignorableWhitespace(self, content): + def ignorableWhitespace(self, content) -> None: pass - def processingInstruction(self, target, data): + def processingInstruction(self, target, data) -> None: pass - def error(self, message): + def error(self, message: str) -> NoReturn: locator = self.locator info = "%s:%s:%s: " % ( locator.getSystemId(), @@ -241,12 +252,13 @@ def error(self, message): raise ParserError(info + message) -def create_parser(store): +def create_parser(store: Store) -> XMLReader: parser = make_parser() try: # Workaround for bug in expatreader.py. Needed when # expatreader is trying to guess a prefix. - parser.start_namespace_decl("xml", "http://www.w3.org/XML/1998/namespace") + # type error: "XMLReader" has no attribute "start_namespace_decl" + parser.start_namespace_decl("xml", "http://www.w3.org/XML/1998/namespace") # type: ignore[attr-defined] except AttributeError: pass # Not present in Jython (at least) parser.setFeature(handler.feature_namespaces, 1) @@ -262,7 +274,7 @@ class TriXParser(Parser): def __init__(self): pass - def parse(self, source, sink, **args): + def parse(self, source: InputSource, sink: Graph, **args: Any) -> None: assert ( sink.store.context_aware ), "TriXParser must be given a context aware store." diff --git a/dependencies/rdflib/plugins/serializers/jsonld.py b/dependencies/rdflib/plugins/serializers/jsonld.py index e9ff401b7..e5d9b0384 100644 --- a/dependencies/rdflib/plugins/serializers/jsonld.py +++ b/dependencies/rdflib/plugins/serializers/jsonld.py @@ -138,7 +138,7 @@ def from_rdf( return result -class Converter(object): +class Converter: def __init__(self, context, use_native_types, use_rdf_type): self.context = context self.use_native_types = context.active or use_native_types diff --git a/dependencies/rdflib/plugins/serializers/longturtle.py b/dependencies/rdflib/plugins/serializers/longturtle.py index 7a106ebbc..e886574f3 100644 --- a/dependencies/rdflib/plugins/serializers/longturtle.py +++ b/dependencies/rdflib/plugins/serializers/longturtle.py @@ -13,7 +13,7 @@ on the start of the next line * uses default encoding (encode()) is used instead of "latin-1" -- Nicholas Car, 2021 +- Nicholas Car, 2023 """ from rdflib.exceptions import Error @@ -33,7 +33,6 @@ class LongTurtleSerializer(RecursiveSerializer): - short_name = "longturtle" indentString = " " @@ -58,7 +57,6 @@ def addNamespace(self, prefix, namespace): if (prefix > "" and prefix[0] == "_") or self.namespaces.get( prefix, namespace ) != namespace: - if prefix not in self._ns_rewrite: p = "p" + prefix while p in self.namespaces: @@ -103,7 +101,6 @@ def serialize(self, stream, base=None, encoding=None, spacious=None, **args): self.write("\n") self.endDocument() - self.write("\n") self.base = None @@ -126,7 +123,7 @@ def getQName(self, uri, gen_prefix=True): try: parts = self.store.compute_qname(uri, generate=gen_prefix) - except: + except Exception: # is the uri a namespace in itself? pfx = self.store.store.prefix(uri) @@ -170,21 +167,20 @@ def s_default(self, subject): self.path(subject, SUBJECT) self.write("\n" + self.indent()) self.predicateList(subject) - self.write(" ;\n.") + self.write("\n.") return True def s_squared(self, subject): if (self._references[subject] > 0) or not isinstance(subject, BNode): return False self.write("\n" + self.indent() + "[]") - self.predicateList(subject) + self.predicateList(subject, newline=False) self.write(" ;\n.") return True def path(self, node, position, newline=False): if not ( - self.p_squared(node, position, newline) - or self.p_default(node, position, newline) + self.p_squared(node, position) or self.p_default(node, position, newline) ): raise Error("Cannot serialize node '%s'" % (node,)) @@ -209,7 +205,11 @@ def label(self, node, position): return self.getQName(node, position == VERB) or node.n3() - def p_squared(self, node, position, newline=False): + def p_squared( + self, + node, + position, + ): if ( not isinstance(node, BNode) or node in self._serialized @@ -218,23 +218,19 @@ def p_squared(self, node, position, newline=False): ): return False - if not newline: - self.write(" ") - if self.isValidList(node): # this is a list self.depth += 2 - self.write("(\n") - self.depth -= 1 + self.write(" (\n") + self.depth -= 2 self.doList(node) - self.depth -= 1 - self.write("\n" + self.indent(1) + ")") + self.write("\n" + self.indent() + ")") else: + # this is a Blank Node self.subjectDone(node) - self.depth += 2 - self.write("[\n") - self.depth -= 1 - self.predicateList(node, newline=False) + self.write("\n" + self.indent(1) + "[\n") + self.depth += 1 + self.predicateList(node) self.depth -= 1 self.write("\n" + self.indent(1) + "]") @@ -247,7 +243,7 @@ def isValidList(self, l_): try: if self.store.value(l_, RDF.first) is None: return False - except: + except Exception: return False while l_: if l_ != RDF.nil and len(list(self.store.predicate_objects(l_))) != 2: @@ -281,6 +277,7 @@ def predicateList(self, subject, newline=False): self.write(" ;\n" + self.indent(1)) self.verb(predicate, newline=True) self.objectList(properties[predicate]) + self.write(" ;") def verb(self, node, newline=False): self.path(node, VERB, newline) @@ -293,11 +290,13 @@ def objectList(self, objects): self.depth += depthmod first_nl = False if count > 1: - self.write("\n" + self.indent(1)) + if not isinstance(objects[0], BNode): + self.write("\n" + self.indent(1)) first_nl = True self.path(objects[0], OBJECT, newline=first_nl) for obj in objects[1:]: - self.write(" ,\n") - self.write(self.indent(1)) + self.write(" ,") + if not isinstance(obj, BNode): + self.write("\n" + self.indent(1)) self.path(obj, OBJECT, newline=True) self.depth -= depthmod diff --git a/dependencies/rdflib/plugins/serializers/n3.py b/dependencies/rdflib/plugins/serializers/n3.py index 1135ff9a3..6f074f68f 100644 --- a/dependencies/rdflib/plugins/serializers/n3.py +++ b/dependencies/rdflib/plugins/serializers/n3.py @@ -11,7 +11,6 @@ class N3Serializer(TurtleSerializer): - short_name = "n3" def __init__(self, store: Graph, parent=None): diff --git a/dependencies/rdflib/plugins/serializers/nt.py b/dependencies/rdflib/plugins/serializers/nt.py index 913dbedf1..5faf9efb2 100644 --- a/dependencies/rdflib/plugins/serializers/nt.py +++ b/dependencies/rdflib/plugins/serializers/nt.py @@ -1,3 +1,5 @@ +from __future__ import annotations + """ N-Triples RDF graph serializer for RDFLib. See for details about the @@ -5,12 +7,15 @@ """ import codecs import warnings -from typing import IO, Optional +from typing import IO, TYPE_CHECKING, Optional, Tuple, Union from rdflib.graph import Graph from rdflib.serializer import Serializer from rdflib.term import Literal +if TYPE_CHECKING: + from rdflib.graph import _TripleType + __all__ = ["NTSerializer"] @@ -28,7 +33,7 @@ def serialize( base: Optional[str] = None, encoding: Optional[str] = "utf-8", **args, - ): + ) -> None: if base is not None: warnings.warn("NTSerializer does not support base.") if encoding != "utf-8": @@ -52,18 +57,20 @@ def __init__(self, store: Graph): Serializer.__init__(self, store) # default to utf-8 -def _nt_row(triple): +def _nt_row(triple: _TripleType) -> str: if isinstance(triple[2], Literal): return "%s %s %s .\n" % ( - triple[0].n3(), - triple[1].n3(), + # type error: "Node" has no attribute "n3" + triple[0].n3(), # type: ignore[attr-defined] + triple[1].n3(), # type: ignore[attr-defined] _quoteLiteral(triple[2]), ) else: - return "%s %s %s .\n" % (triple[0].n3(), triple[1].n3(), triple[2].n3()) + # type error: "Node" has no attribute "n3" + return "%s %s %s .\n" % (triple[0].n3(), triple[1].n3(), triple[2].n3()) # type: ignore[attr-defined] -def _quoteLiteral(l_): +def _quoteLiteral(l_: Literal) -> str: # noqa: N802 """ a simpler version of term.Literal.n3() """ @@ -80,13 +87,15 @@ def _quoteLiteral(l_): return "%s" % encoded -def _quote_encode(l_): +def _quote_encode(l_: str) -> str: return '"%s"' % l_.replace("\\", "\\\\").replace("\n", "\\n").replace( '"', '\\"' ).replace("\r", "\\r") -def _nt_unicode_error_resolver(err): +def _nt_unicode_error_resolver( + err: UnicodeError, +) -> Tuple[Union[str, bytes], int]: """ Do unicode char replaces as defined in https://www.w3.org/TR/2004/REC-rdf-testcases-20040210/#ntrip_strings """ @@ -96,8 +105,12 @@ def _replace_single(c): fmt = "\\u%04X" if c <= 0xFFFF else "\\U%08X" return fmt % c - string = err.object[err.start : err.end] - return "".join(_replace_single(c) for c in string), err.end + # type error: "UnicodeError" has no attribute "object" + # type error: "UnicodeError" has no attribute "start" + # type error: "UnicodeError" has no attribute "end" + string = err.object[err.start : err.end] # type: ignore[attr-defined] + # type error: "UnicodeError" has no attribute "end" + return "".join(_replace_single(c) for c in string), err.end # type: ignore[attr-defined] codecs.register_error("_rdflib_nt_escape", _nt_unicode_error_resolver) diff --git a/dependencies/rdflib/plugins/serializers/rdfxml.py b/dependencies/rdflib/plugins/serializers/rdfxml.py index 0308b5164..c5acc74ad 100644 --- a/dependencies/rdflib/plugins/serializers/rdfxml.py +++ b/dependencies/rdflib/plugins/serializers/rdfxml.py @@ -135,7 +135,6 @@ def predicate(self, predicate, object, depth=1): % (indent, qname, attributes, escape(object, ESCAPE_ENTITIES), qname) ) else: - if isinstance(object, BNode): write('%s<%s rdf:nodeID="%s"/>\n' % (indent, qname, object)) else: @@ -252,8 +251,9 @@ def subject(self, subject: IdentifiedNode, depth: int = 1): type = first(store.objects(subject, RDF.type)) try: - self.nm.qname(type) - except: + # type error: Argument 1 to "qname" of "NamespaceManager" has incompatible type "Optional[Node]"; expected "str" + self.nm.qname(type) # type: ignore[arg-type] + except Exception: type = None element = type or RDFVOC.Description @@ -309,7 +309,6 @@ def predicate(self, predicate, object, depth=1): writer.text(object) elif object in self.__serialized or not (object, None, None) in store: - if isinstance(object, BNode): if more_than(store.triples((None, None, object)), 0): writer.attribute(RDFVOC.nodeID, fix(object)) @@ -337,7 +336,6 @@ def predicate(self, predicate, object, depth=1): col = Collection(store, object) for item in col: - if isinstance(item, URIRef): self.forceRDFAbout.add(item) self.subject(item) @@ -356,7 +354,6 @@ def predicate(self, predicate, object, depth=1): self.subject(object, depth + 1) elif isinstance(object, BNode): - if ( object not in self.__serialized and (object, None, None) in store diff --git a/dependencies/rdflib/plugins/serializers/trig.py b/dependencies/rdflib/plugins/serializers/trig.py index fa9641d1a..6f2aa50a8 100644 --- a/dependencies/rdflib/plugins/serializers/trig.py +++ b/dependencies/rdflib/plugins/serializers/trig.py @@ -3,18 +3,19 @@ See for syntax specification. """ -from collections import defaultdict -from typing import IO, TYPE_CHECKING, Optional, Union +from typing import IO, TYPE_CHECKING, Dict, List, Optional, Tuple, Union from rdflib.graph import ConjunctiveGraph, Graph from rdflib.plugins.serializers.turtle import TurtleSerializer from rdflib.term import BNode, Node +if TYPE_CHECKING: + from rdflib.graph import _ContextType, _SubjectType + __all__ = ["TrigSerializer"] class TrigSerializer(TurtleSerializer): - short_name = "trig" indentString = 4 * " " @@ -33,25 +34,30 @@ def __init__(self, store: Union[Graph, ConjunctiveGraph]): super(TrigSerializer, self).__init__(store) - def preprocess(self): + def preprocess(self) -> None: for context in self.contexts: + # do not write unnecessary prefix (ex: for an empty default graph) + if len(context) == 0: + continue self.store = context - self.getQName(context.identifier) - self._references = defaultdict(int) + # Don't generate a new prefix for a graph URI if one already exists + self.getQName(context.identifier, False) self._subjects = {} for triple in context: self.preprocessTriple(triple) - self._contexts[context] = ( - self.orderSubjects(), - self._subjects, - self._references, - ) + for subject in self._subjects.keys(): + self._references[subject] += 1 + + self._contexts[context] = (self.orderSubjects(), self._subjects) - def reset(self): + def reset(self) -> None: super(TrigSerializer, self).reset() - self._contexts = {} + self._contexts: Dict[ + _ContextType, + Tuple[List[_SubjectType], Dict[_SubjectType, bool]], + ] = {} def serialize( self, @@ -77,11 +83,10 @@ def serialize( self.startDocument() firstTime = True - for store, (ordered_subjects, subjects, ref) in self._contexts.items(): + for store, (ordered_subjects, subjects) in self._contexts.items(): if not ordered_subjects: continue - self._references = ref self._serialized = {} self.store = store self._subjects = subjects @@ -93,9 +98,11 @@ def serialize( if isinstance(store.identifier, BNode): iri = store.identifier.n3() else: - iri = self.getQName(store.identifier) + # Show the full graph URI if a prefix for it doesn't already exist + iri = self.getQName(store.identifier, False) if iri is None: - iri = store.identifier.n3() + # type error: "IdentifiedNode" has no attribute "n3" + iri = store.identifier.n3() # type: ignore[attr-defined] self.write(self.indent() + "\n%s {" % iri) self.depth += 1 diff --git a/dependencies/rdflib/plugins/serializers/trix.py b/dependencies/rdflib/plugins/serializers/trix.py index 1ff9008f9..c29a8559d 100644 --- a/dependencies/rdflib/plugins/serializers/trix.py +++ b/dependencies/rdflib/plugins/serializers/trix.py @@ -28,7 +28,6 @@ def serialize( encoding: Optional[str] = None, **args, ): - nm = self.store.namespace_manager self.writer = XMLWriter(stream, nm, encoding, extra_ns={"": TRIXNS}) diff --git a/dependencies/rdflib/plugins/serializers/turtle.py b/dependencies/rdflib/plugins/serializers/turtle.py index 21df28ff4..ad1182474 100644 --- a/dependencies/rdflib/plugins/serializers/turtle.py +++ b/dependencies/rdflib/plugins/serializers/turtle.py @@ -37,7 +37,6 @@ def _object_comparator(a, b): class RecursiveSerializer(Serializer): - topClasses = [RDFS.Class] predicateOrder = [RDF.type, RDFS.label] maxDepth = 10 @@ -45,7 +44,6 @@ class RecursiveSerializer(Serializer): roundtrip_prefixes = () def __init__(self, store): - super(RecursiveSerializer, self).__init__(store) self.stream = None self.reset() @@ -180,7 +178,6 @@ def write(self, text): class TurtleSerializer(RecursiveSerializer): - short_name = "turtle" indentString = " " @@ -205,7 +202,6 @@ def addNamespace(self, prefix, namespace): if (prefix > "" and prefix[0] == "_") or self.namespaces.get( prefix, namespace ) != namespace: - if prefix not in self._ns_rewrite: p = "p" + prefix while p in self.namespaces: @@ -277,8 +273,7 @@ def getQName(self, uri, gen_prefix=True): try: parts = self.store.compute_qname(uri, generate=gen_prefix) - except: - + except Exception: # is the uri a namespace in itself? pfx = self.store.store.prefix(uri) @@ -402,7 +397,7 @@ def isValidList(self, l_): try: if self.store.value(l_, RDF.first) is None: return False - except: + except Exception: return False while l_: if l_ != RDF.nil and len(list(self.store.predicate_objects(l_))) != 2: diff --git a/dependencies/rdflib/plugins/serializers/xmlwriter.py b/dependencies/rdflib/plugins/serializers/xmlwriter.py index 9ed10f48f..88cebdeda 100644 --- a/dependencies/rdflib/plugins/serializers/xmlwriter.py +++ b/dependencies/rdflib/plugins/serializers/xmlwriter.py @@ -6,7 +6,7 @@ ESCAPE_ENTITIES = {"\r": " "} -class XMLWriter(object): +class XMLWriter: def __init__(self, stream, namespace_manager, encoding=None, decl=1, extra_ns=None): encoding = encoding or "utf-8" encoder, decoder, stream_reader, stream_writer = codecs.lookup(encoding) diff --git a/dependencies/rdflib/plugins/shared/jsonld/context.py b/dependencies/rdflib/plugins/shared/jsonld/context.py index 7090668aa..2f6cedbdd 100644 --- a/dependencies/rdflib/plugins/shared/jsonld/context.py +++ b/dependencies/rdflib/plugins/shared/jsonld/context.py @@ -6,9 +6,22 @@ """ # https://github.com/RDFLib/rdflib-jsonld/blob/feature/json-ld-1.1/rdflib_jsonld/context.py +from __future__ import annotations from collections import namedtuple -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple, Union +from typing import ( + TYPE_CHECKING, + Any, + Collection, + Dict, + Generator, + List, + Optional, + Set, + Tuple, + Union, +) +from urllib.parse import urljoin, urlsplit from rdflib.namespace import RDF @@ -41,7 +54,7 @@ VERSION, VOCAB, ) -from .util import norm_url, source_to_json, split_iri, urljoin, urlsplit +from .util import norm_url, source_to_json, split_iri NODE_KEYS = {GRAPH, ID, INCLUDED, JSON, LIST, NEST, NONE, REV, SET, TYPE, VALUE, LANG} @@ -56,26 +69,26 @@ class Defined(int): URI_GEN_DELIMS = (":", "/", "?", "#", "[", "]", "@") -class Context(object): +class Context: def __init__( self, source: Optional[Any] = None, base: Optional[str] = None, version: Optional[float] = None, ): - self.version = version or 1.0 + self.version: float = version or 1.0 self.language = None - self.vocab = None + self.vocab: Optional[str] = None self._base: Optional[str] self.base = base self.doc_base = base self.terms: Dict[str, Any] = {} # _alias maps NODE_KEY to list of aliases self._alias: Dict[str, List[str]] = {} - self._lookup: Dict[Tuple[str, Any, Union[Defined, str], bool], Any] = {} + self._lookup: Dict[Tuple[str, Any, Union[Defined, str], bool], Term] = {} self._prefixes: Dict[str, Any] = {} self.active = False - self.parent = None + self.parent: Optional[Context] = None self.propagate = True self._context_cache: Dict[str, Any] = {} if source: @@ -98,12 +111,13 @@ def base(self, base: Optional[str]): ) self._basedomain = "%s://%s" % urlsplit(base)[0:2] if base else None - def subcontext(self, source, propagate=True): + def subcontext(self, source: Any, propagate: bool = True) -> "Context": # IMPROVE: to optimize, implement SubContext with parent fallback support parent = self.parent if self.propagate is False else self - return parent._subcontext(source, propagate) + # type error: Item "None" of "Optional[Context]" has no attribute "_subcontext" + return parent._subcontext(source, propagate) # type: ignore[union-attr] - def _subcontext(self, source, propagate): + def _subcontext(self, source: Any, propagate: bool) -> "Context": ctx = Context(version=self.version) ctx.propagate = propagate ctx.parent = self @@ -119,7 +133,7 @@ def _subcontext(self, source, propagate): ctx.load(source) return ctx - def _clear(self): + def _clear(self) -> None: self.language = None self.vocab = None self.terms = {} @@ -129,12 +143,12 @@ def _clear(self): self.active = False self.propagate = True - def get_context_for_term(self, term): + def get_context_for_term(self, term: Optional["Term"]) -> "Context": if term and term.context is not UNDEF: return self._subcontext(term.context, propagate=True) return self - def get_context_for_type(self, node): + def get_context_for_type(self, node: Any) -> Optional["Context"]: if self.version >= 1.1: rtype = self.get_type(node) if isinstance(node, dict) else None if not isinstance(rtype, list): @@ -154,41 +168,42 @@ def get_context_for_type(self, node): return self.parent if self.propagate is False else self - def get_id(self, obj): + def get_id(self, obj: Dict[str, Any]) -> Any: return self._get(obj, ID) - def get_type(self, obj): + def get_type(self, obj: Dict[str, Any]) -> Any: return self._get(obj, TYPE) - def get_language(self, obj): + def get_language(self, obj: Dict[str, Any]) -> Any: return self._get(obj, LANG) - def get_value(self, obj): + def get_value(self, obj: Dict[str, Any]) -> Any: return self._get(obj, VALUE) - def get_graph(self, obj): + def get_graph(self, obj: Dict[str, Any]) -> Any: return self._get(obj, GRAPH) - def get_list(self, obj): + def get_list(self, obj: Dict[str, Any]) -> Any: return self._get(obj, LIST) - def get_set(self, obj): + def get_set(self, obj: Dict[str, Any]) -> Any: return self._get(obj, SET) - def get_rev(self, obj): + def get_rev(self, obj: Dict[str, Any]) -> Any: return self._get(obj, REV) - def _get(self, obj, key): + def _get(self, obj: Dict[str, Any], key: str) -> Any: for alias in self._alias.get(key, []): if alias in obj: return obj.get(alias) return obj.get(key) - def get_key(self, key: str): + # type error: Missing return statement + def get_key(self, key: str) -> str: # type: ignore[return] for alias in self.get_keys(key): return alias - def get_keys(self, key: str): + def get_keys(self, key: str) -> Generator[str, None, None]: if key in self._alias: for alias in self._alias[key]: yield alias @@ -207,13 +222,13 @@ def add_term( name: str, idref: str, coercion: Union[Defined, str] = UNDEF, - container=UNDEF, - index=None, - language=UNDEF, - reverse=False, - context=UNDEF, - prefix=None, - protected=False, + container: Union[Collection[Any], str, Defined] = UNDEF, + index: Optional[Union[str, Defined]] = None, + language: Optional[Union[str, Defined]] = UNDEF, + reverse: bool = False, + context: Any = UNDEF, + prefix: Optional[bool] = None, + protected: bool = False, ): if self.version < 1.1 or prefix is None: prefix = isinstance(idref, str) and idref.endswith(URI_GEN_DELIMS) @@ -228,8 +243,10 @@ def add_term( if isinstance(container, (list, set, tuple)): container = set(container) - else: + elif container is not UNDEF: container = set([container]) + else: + container = set() term = Term( idref, @@ -261,7 +278,7 @@ def add_term( def find_term( self, idref: str, - coercion=None, + coercion: Optional[Union[str, Defined]] = None, container: Union[Defined, str] = UNDEF, language: Optional[str] = None, reverse: bool = False, @@ -296,21 +313,26 @@ def find_term( return lu.get((idref, UNDEF, UNDEF, reverse)) - def resolve(self, curie_or_iri): + def resolve(self, curie_or_iri: str) -> str: iri = self.expand(curie_or_iri, False) - if self.isblank(iri): - return iri - if " " in iri: + # type error: Argument 1 to "isblank" of "Context" has incompatible type "Optional[str]"; expected "str" + if self.isblank(iri): # type: ignore[arg-type] + # type error: Incompatible return value type (got "Optional[str]", expected "str") + return iri # type: ignore[return-value] + # type error: Unsupported right operand type for in ("Optional[str]") + if " " in iri: # type: ignore[operator] return "" - return self.resolve_iri(iri) + # type error: Argument 1 to "resolve_iri" of "Context" has incompatible type "Optional[str]"; expected "str" + return self.resolve_iri(iri) # type: ignore[arg-type] - def resolve_iri(self, iri): - return norm_url(self._base, iri) + def resolve_iri(self, iri: str) -> str: + # type error: Argument 1 to "norm_url" has incompatible type "Optional[str]"; expected "str" + return norm_url(self._base, iri) # type: ignore[arg-type] - def isblank(self, ref): + def isblank(self, ref: str) -> bool: return ref.startswith("_:") - def expand(self, term_curie_or_iri, use_vocab=True): + def expand(self, term_curie_or_iri: Any, use_vocab: bool = True) -> Optional[str]: if not isinstance(term_curie_or_iri, str): return term_curie_or_iri @@ -337,19 +359,22 @@ def expand(self, term_curie_or_iri, use_vocab=True): return self.resolve_iri(term_curie_or_iri) - def shrink_iri(self, iri): + def shrink_iri(self, iri: str) -> str: ns, name = split_iri(str(iri)) pfx = self._prefixes.get(ns) if pfx: - return ":".join((pfx, name)) + # type error: Argument 1 to "join" of "str" has incompatible type "Tuple[Any, Optional[str]]"; expected "Iterable[str]" + return ":".join((pfx, name)) # type: ignore[arg-type] elif self._base: if str(iri) == self._base: return "" - elif iri.startswith(self._basedomain): - return iri[len(self._basedomain) :] + # type error: Argument 1 to "startswith" of "str" has incompatible type "Optional[str]"; expected "Union[str, Tuple[str, ...]]" + elif iri.startswith(self._basedomain): # type: ignore[arg-type] + # type error: Argument 1 to "len" has incompatible type "Optional[str]"; expected "Sized" + return iri[len(self._basedomain) :] # type: ignore[arg-type] return iri - def to_symbol(self, iri): + def to_symbol(self, iri: str) -> Optional[str]: iri = str(iri) term = self.find_term(iri) if term: @@ -359,7 +384,8 @@ def to_symbol(self, iri): return name pfx = self._prefixes.get(ns) if pfx: - return ":".join((pfx, name)) + # type error: Argument 1 to "join" of "str" has incompatible type "Tuple[Any, Optional[str]]"; expected "Iterable[str]" + return ":".join((pfx, name)) # type: ignore[arg-type] return iri def load( @@ -391,13 +417,13 @@ def _prep_sources( self, base: Optional[str], inputs: List[Any], - sources, - referenced_contexts, - in_source_url=None, + sources: List[Any], + referenced_contexts: Set[str], + in_source_url: Optional[str] = None, ): - for source in inputs: source_url = in_source_url + new_base = base if isinstance(source, str): source_url = source source_doc_base = base or self.doc_base @@ -412,7 +438,7 @@ def _prep_sources( # if base is not None, then source_doc_base won't be # none due to how it is assigned. assert source_doc_base is not None - base = urljoin(source_doc_base, source_url) + new_base = urljoin(source_doc_base, source_url) source = new_ctx if isinstance(source, dict): @@ -422,29 +448,42 @@ def _prep_sources( if isinstance(source, list): self._prep_sources( - base, source, sources, referenced_contexts, source_url + new_base, source, sources, referenced_contexts, source_url ) else: sources.append((source_url, source)) - def _fetch_context(self, source, base, referenced_contexts): - source_url = urljoin(base, source) + def _fetch_context( + self, source: str, base: Optional[str], referenced_contexts: Set[str] + ): + # type error: Value of type variable "AnyStr" of "urljoin" cannot be "Optional[str]" + source_url = urljoin(base, source) # type: ignore[type-var] if source_url in referenced_contexts: raise RECURSIVE_CONTEXT_INCLUSION - referenced_contexts.add(source_url) + + # type error: Argument 1 to "add" of "set" has incompatible type "Optional[str]"; expected "str" + referenced_contexts.add(source_url) # type: ignore[arg-type] if source_url in self._context_cache: return self._context_cache[source_url] - source = source_to_json(source_url) + # type error: Incompatible types in assignment (expression has type "Optional[Any]", variable has type "str") + source = source_to_json(source_url) # type: ignore[assignment] if source and CONTEXT not in source: raise INVALID_REMOTE_CONTEXT - self._context_cache[source_url] = source + + # type error: Invalid index type "Optional[str]" for "Dict[str, Any]"; expected type "str" + self._context_cache[source_url] = source # type: ignore[index] return source - def _read_source(self, source, source_url=None, referenced_contexts=None): + def _read_source( + self, + source: Dict[str, Any], + source_url: Optional[str] = None, + referenced_contexts: Optional[Set[str]] = None, + ): imports = source.get(IMPORT) if imports: if not isinstance(imports, str): @@ -477,7 +516,13 @@ def _read_source(self, source, source_url=None, referenced_contexts=None): else: self._read_term(source, key, value, protected) - def _read_term(self, source, name, dfn, protected=False): + def _read_term( + self, + source: Dict[str, Any], + name: str, + dfn: Union[Dict[str, Any], str], + protected: bool = False, + ) -> None: idref = None if isinstance(dfn, dict): # term = self._create_term(source, key, value) @@ -518,17 +563,21 @@ def _read_term(self, source, name, dfn, protected=False): if not self._accept_term(dfn): return idref = self._rec_expand(source, dfn) - - self.add_term(name, idref, protected=protected) + # type error: Argument 2 to "add_term" of "Context" has incompatible type "Optional[str]"; expected "str" + self.add_term(name, idref, protected=protected) # type: ignore[arg-type] if idref in NODE_KEYS: self._alias.setdefault(idref, []).append(name) - def _rec_expand(self, source, expr, prev=None): + def _rec_expand( + self, source: Dict[str, Any], expr: Optional[str], prev: Optional[str] = None + ) -> Optional[str]: if expr == prev or expr in NODE_KEYS: return expr - is_term, pfx, nxt = self._prep_expand(expr) + nxt: Optional[str] + # type error: Argument 1 to "_prep_expand" of "Context" has incompatible type "Optional[str]"; expected "str" + is_term, pfx, nxt = self._prep_expand(expr) # type: ignore[arg-type] if pfx: iri = self._get_source_id(source, pfx) if iri is None: @@ -550,7 +599,7 @@ def _rec_expand(self, source, expr, prev=None): return self._rec_expand(source, nxt, expr) - def _prep_expand(self, expr): + def _prep_expand(self, expr: str) -> Tuple[bool, Optional[str], str]: if ":" not in expr: return True, None, expr pfx, local = expr.split(":", 1) @@ -559,7 +608,7 @@ def _prep_expand(self, expr): else: return False, None, expr - def _get_source_id(self, source, key): + def _get_source_id(self, source: Dict[str, Any], key: str) -> Optional[str]: # .. from source dict or if already defined term = source.get(key) if term is None: @@ -570,9 +619,41 @@ def _get_source_id(self, source, key): term = term.get(ID) return term + def _term_dict(self, term: Term) -> Union[Dict[str, Any], str]: + tdict: Dict[str, Any] = {} + if term.type != UNDEF: + tdict[TYPE] = self.shrink_iri(term.type) + if term.container: + tdict[CONTAINER] = list(term.container) + if term.language != UNDEF: + tdict[LANG] = term.language + if term.reverse: + tdict[REV] = term.id + else: + tdict[ID] = term.id + if tdict.keys() == {ID}: + return tdict[ID] + return tdict + + def to_dict(self) -> Dict[str, Any]: + """ + Returns a dictionary representation of the context that can be + serialized to JSON. + + :return: a dictionary representation of the context. + """ + r = {v: k for (k, v) in self._prefixes.items()} + r.update({term.name: self._term_dict(term) for term in self._lookup.values()}) + if self.base: + r[BASE] = self.base + if self.language: + r[LANG] = self.language + return r + Term = namedtuple( "Term", "id, name, type, container, index, language, reverse, context," "prefix, protected", ) + Term.__new__.__defaults__ = (UNDEF, UNDEF, UNDEF, UNDEF, False, UNDEF, False, False) diff --git a/dependencies/rdflib/plugins/shared/jsonld/util.py b/dependencies/rdflib/plugins/shared/jsonld/util.py index c60bca242..486f8b077 100644 --- a/dependencies/rdflib/plugins/shared/jsonld/util.py +++ b/dependencies/rdflib/plugins/shared/jsonld/util.py @@ -1,8 +1,11 @@ # -*- coding: utf-8 -*- # https://github.com/RDFLib/rdflib-jsonld/blob/feature/json-ld-1.1/rdflib_jsonld/util.py -import typing as t +from __future__ import annotations -if t.TYPE_CHECKING: +import pathlib +from typing import IO, TYPE_CHECKING, Any, Optional, TextIO, Tuple, Union + +if TYPE_CHECKING: import json else: try: @@ -18,13 +21,19 @@ from rdflib.parser import ( BytesIOWrapper, + InputSource, PythonInputSource, StringInputSource, + URLInputSource, create_input_source, ) -def source_to_json(source): +def source_to_json( + source: Optional[ + Union[IO[bytes], TextIO, InputSource, str, bytes, pathlib.PurePath] + ] +) -> Optional[Any]: if isinstance(source, PythonInputSource): return source.data @@ -50,7 +59,7 @@ def source_to_json(source): VOCAB_DELIMS = ("#", "/", ":") -def split_iri(iri): +def split_iri(iri: str) -> Tuple[str, Optional[str]]: for delim in VOCAB_DELIMS: at = iri.rfind(delim) if at > -1: @@ -58,7 +67,7 @@ def split_iri(iri): return iri, None -def norm_url(base, url): +def norm_url(base: str, url: str) -> str: """ >>> norm_url('http://example.org/', '/one') 'http://example.org/one' @@ -87,7 +96,8 @@ def norm_url(base, url): return result -def context_from_urlinputsource(source): +# type error: Missing return statement +def context_from_urlinputsource(source: URLInputSource) -> Optional[str]: # type: ignore[return] """ Please note that JSON-LD documents served with the application/ld+json media type MUST have all context information, including references to external contexts, @@ -100,9 +110,20 @@ def context_from_urlinputsource(source): # source.links is the new way of getting Link headers from URLInputSource links = source.links except AttributeError: - return + # type error: Return value expected + return # type: ignore[return-value] for link in links: if ' rel="http://www.w3.org/ns/json-ld#context"' in link: i, j = link.index("<"), link.index(">") if i > -1 and j > -1: - return urljoin(source.url, link[i + 1 : j]) + # type error: Value of type variable "AnyStr" of "urljoin" cannot be "Optional[str]" + return urljoin(source.url, link[i + 1 : j]) # type: ignore[type-var] + + +__all__ = [ + "json", + "source_to_json", + "split_iri", + "norm_url", + "context_from_urlinputsource", +] diff --git a/dependencies/rdflib/plugins/sparql/__init__.py b/dependencies/rdflib/plugins/sparql/__init__.py index ddd96cae1..a11a6e004 100644 --- a/dependencies/rdflib/plugins/sparql/__init__.py +++ b/dependencies/rdflib/plugins/sparql/__init__.py @@ -4,7 +4,7 @@ .. versionadded:: 4.0 """ -import sys +from importlib.metadata import entry_points from typing import TYPE_CHECKING SPARQL_LOAD_GRAPHS = True @@ -40,10 +40,6 @@ assert operators assert parserutils -if sys.version_info < (3, 8): - from importlib_metadata import entry_points -else: - from importlib.metadata import entry_points all_entry_points = entry_points() if hasattr(all_entry_points, "select"): @@ -55,3 +51,13 @@ assert isinstance(all_entry_points, dict) for ep in all_entry_points.get(PLUGIN_ENTRY_POINT, []): CUSTOM_EVALS[ep.name] = ep.load() + +__all__ = [ + "prepareQuery", + "prepareUpdate", + "processUpdate", + "operators", + "parser", + "parserutils", + "CUSTOM_EVALS", +] diff --git a/dependencies/rdflib/plugins/sparql/aggregates.py b/dependencies/rdflib/plugins/sparql/aggregates.py index 005a539d5..84ac8936d 100644 --- a/dependencies/rdflib/plugins/sparql/aggregates.py +++ b/dependencies/rdflib/plugins/sparql/aggregates.py @@ -1,51 +1,75 @@ -from decimal import Decimal +from __future__ import annotations -from rdflib import XSD, Literal +from decimal import Decimal +from typing import ( + Any, + Callable, + Dict, + Iterable, + List, + Mapping, + MutableMapping, + Optional, + Set, + Tuple, + TypeVar, + Union, + overload, +) + +from rdflib.namespace import XSD from rdflib.plugins.sparql.datatypes import type_promotion -from rdflib.plugins.sparql.evalutils import NotBoundError, _eval, _val +from rdflib.plugins.sparql.evalutils import _eval, _val from rdflib.plugins.sparql.operators import numeric -from rdflib.plugins.sparql.sparql import SPARQLTypeError +from rdflib.plugins.sparql.parserutils import CompValue +from rdflib.plugins.sparql.sparql import FrozenBindings, NotBoundError, SPARQLTypeError +from rdflib.term import BNode, Identifier, Literal, URIRef, Variable """ Aggregation functions """ -class Accumulator(object): +class Accumulator: """abstract base class for different aggregation functions""" - def __init__(self, aggregation): + def __init__(self, aggregation: CompValue): + self.get_value: Callable[[], Optional[Literal]] + self.update: Callable[[FrozenBindings, "Aggregator"], None] self.var = aggregation.res self.expr = aggregation.vars if not aggregation.distinct: - self.use_row = self.dont_care + # type error: Cannot assign to a method + self.use_row = self.dont_care # type: ignore[method-assign] self.distinct = False else: self.distinct = aggregation.distinct - self.seen = set() + self.seen: Set[Any] = set() - def dont_care(self, row): + def dont_care(self, row: FrozenBindings) -> bool: """skips distinct test""" return True - def use_row(self, row): + def use_row(self, row: FrozenBindings) -> bool: """tests distinct with set""" return _eval(self.expr, row) not in self.seen - def set_value(self, bindings): + def set_value(self, bindings: MutableMapping[Variable, Identifier]) -> None: """sets final value in bindings""" - bindings[self.var] = self.get_value() + # type error: Incompatible types in assignment (expression has type "Optional[Literal]", target has type "Identifier") + bindings[self.var] = self.get_value() # type: ignore[assignment] class Counter(Accumulator): - def __init__(self, aggregation): + def __init__(self, aggregation: CompValue): super(Counter, self).__init__(aggregation) self.value = 0 if self.expr == "*": # cannot eval "*" => always use the full row - self.eval_row = self.eval_full_row + # type error: Cannot assign to a method + self.eval_row = self.eval_full_row # type: ignore[assignment] - def update(self, row, aggregator): + def update(self, row: FrozenBindings, aggregator: "Aggregator") -> None: try: val = self.eval_row(row) except NotBoundError: @@ -55,41 +79,58 @@ def update(self, row, aggregator): if self.distinct: self.seen.add(val) - def get_value(self): + def get_value(self) -> Literal: return Literal(self.value) - def eval_row(self, row): + def eval_row(self, row: FrozenBindings) -> Identifier: return _eval(self.expr, row) - def eval_full_row(self, row): + def eval_full_row(self, row: FrozenBindings) -> FrozenBindings: return row - def use_row(self, row): - return self.eval_row(row) not in self.seen + def use_row(self, row: FrozenBindings) -> bool: + try: + return self.eval_row(row) not in self.seen + except NotBoundError: + # happens when counting zero optional nodes. See issue #2229 + return False + + +@overload +def type_safe_numbers(*args: int) -> Tuple[int]: + ... + + +@overload +def type_safe_numbers(*args: Union[Decimal, float, int]) -> Tuple[Union[float, int]]: + ... -def type_safe_numbers(*args): +def type_safe_numbers(*args: Union[Decimal, float, int]) -> Iterable[Union[float, int]]: if any(isinstance(arg, float) for arg in args) and any( isinstance(arg, Decimal) for arg in args ): return map(float, args) - return args + # type error: Incompatible return value type (got "Tuple[Union[Decimal, float, int], ...]", expected "Iterable[Union[float, int]]") + # NOTE on type error: if args contains a Decimal it will nopt get here. + return args # type: ignore[return-value] class Sum(Accumulator): - def __init__(self, aggregation): + def __init__(self, aggregation: CompValue): super(Sum, self).__init__(aggregation) self.value = 0 - self.datatype = None + self.datatype: Optional[str] = None - def update(self, row, aggregator): + def update(self, row: FrozenBindings, aggregator: "Aggregator") -> None: try: value = _eval(self.expr, row) dt = self.datatype if dt is None: dt = value.datatype else: - dt = type_promotion(dt, value.datatype) + # type error: Argument 1 to "type_promotion" has incompatible type "str"; expected "URIRef" + dt = type_promotion(dt, value.datatype) # type: ignore[arg-type] self.datatype = dt self.value = sum(type_safe_numbers(self.value, numeric(value))) if self.distinct: @@ -98,18 +139,18 @@ def update(self, row, aggregator): # skip UNDEF pass - def get_value(self): + def get_value(self) -> Literal: return Literal(self.value, datatype=self.datatype) class Average(Accumulator): - def __init__(self, aggregation): + def __init__(self, aggregation: CompValue): super(Average, self).__init__(aggregation) self.counter = 0 self.sum = 0 - self.datatype = None + self.datatype: Optional[str] = None - def update(self, row, aggregator): + def update(self, row: FrozenBindings, aggregator: "Aggregator") -> None: try: value = _eval(self.expr, row) dt = self.datatype @@ -117,7 +158,8 @@ def update(self, row, aggregator): if dt is None: dt = value.datatype else: - dt = type_promotion(dt, value.datatype) + # type error: Argument 1 to "type_promotion" has incompatible type "str"; expected "URIRef" + dt = type_promotion(dt, value.datatype) # type: ignore[arg-type] self.datatype = dt if self.distinct: self.seen.add(value) @@ -128,7 +170,7 @@ def update(self, row, aggregator): except SPARQLTypeError: pass - def get_value(self): + def get_value(self) -> Literal: if self.counter == 0: return Literal(0) if self.datatype in (XSD.float, XSD.double): @@ -140,18 +182,20 @@ def get_value(self): class Extremum(Accumulator): """abstract base class for Minimum and Maximum""" - def __init__(self, aggregation): + def __init__(self, aggregation: CompValue): + self.compare: Callable[[Any, Any], Any] super(Extremum, self).__init__(aggregation) - self.value = None + self.value: Any = None # DISTINCT would not change the value for MIN or MAX - self.use_row = self.dont_care + # type error: Cannot assign to a method + self.use_row = self.dont_care # type: ignore[method-assign] - def set_value(self, bindings): + def set_value(self, bindings: MutableMapping[Variable, Identifier]) -> None: if self.value is not None: # simply do not set if self.value is still None bindings[self.var] = Literal(self.value) - def update(self, row, aggregator): + def update(self, row: FrozenBindings, aggregator: "Aggregator") -> None: try: if self.value is None: self.value = _eval(self.expr, row) @@ -165,13 +209,16 @@ def update(self, row, aggregator): pass +_ValueT = TypeVar("_ValueT", Variable, BNode, URIRef, Literal) + + class Minimum(Extremum): - def compare(self, val1, val2): + def compare(self, val1: _ValueT, val2: _ValueT) -> _ValueT: return min(val1, val2, key=_val) class Maximum(Extremum): - def compare(self, val1, val2): + def compare(self, val1: _ValueT, val2: _ValueT) -> _ValueT: return max(val1, val2, key=_val) @@ -183,7 +230,7 @@ def __init__(self, aggregation): # DISTINCT would not change the value self.use_row = self.dont_care - def update(self, row, aggregator): + def update(self, row: FrozenBindings, aggregator: "Aggregator") -> None: try: # set the value now aggregator.bindings[self.var] = _eval(self.expr, row) @@ -192,19 +239,24 @@ def update(self, row, aggregator): except NotBoundError: pass - def get_value(self): + def get_value(self) -> None: # set None if no value was set return None class GroupConcat(Accumulator): - def __init__(self, aggregation): + value: List[Literal] + + def __init__(self, aggregation: CompValue): super(GroupConcat, self).__init__(aggregation) # only GROUPCONCAT needs to have a list as accumulator self.value = [] - self.separator = aggregation.separator or " " + if aggregation.separator is None: + self.separator = " " + else: + self.separator = aggregation.separator - def update(self, row, aggregator): + def update(self, row: FrozenBindings, aggregator: "Aggregator") -> None: try: value = _eval(self.expr, row) # skip UNDEF @@ -221,11 +273,11 @@ def update(self, row, aggregator): except NotBoundError: pass - def get_value(self): + def get_value(self) -> Literal: return Literal(self.separator.join(str(v) for v in self.value)) -class Aggregator(object): +class Aggregator: """combines different Accumulator objects""" accumulator_classes = { @@ -238,16 +290,16 @@ class Aggregator(object): "Aggregate_GroupConcat": GroupConcat, } - def __init__(self, aggregations): - self.bindings = {} - self.accumulators = {} + def __init__(self, aggregations: List[CompValue]): + self.bindings: Dict[Variable, Identifier] = {} + self.accumulators: Dict[str, Accumulator] = {} for a in aggregations: accumulator_class = self.accumulator_classes.get(a.name) if accumulator_class is None: raise Exception("Unknown aggregate function " + a.name) self.accumulators[a.res] = accumulator_class(a) - def update(self, row): + def update(self, row: FrozenBindings) -> None: """update all own accumulators""" # SAMPLE accumulators may delete themselves # => iterate over list not generator @@ -256,7 +308,7 @@ def update(self, row): if acc.use_row(row): acc.update(row, self) - def get_bindings(self): + def get_bindings(self) -> Mapping[Variable, Identifier]: """calculate and set last values""" for acc in self.accumulators.values(): acc.set_value(self.bindings) diff --git a/dependencies/rdflib/plugins/sparql/algebra.py b/dependencies/rdflib/plugins/sparql/algebra.py index 01dc17511..52aa92a7f 100644 --- a/dependencies/rdflib/plugins/sparql/algebra.py +++ b/dependencies/rdflib/plugins/sparql/algebra.py @@ -1,3 +1,5 @@ +from __future__ import annotations + """ Converting the 'parse-tree' output of pyparsing to a SPARQL Algebra expression @@ -48,9 +50,7 @@ def OrderBy(p: CompValue, expr: List[CompValue]) -> CompValue: return CompValue("OrderBy", p=p, expr=expr) -def ToMultiSet( - p: typing.Union[List[Dict[Variable, Identifier]], CompValue] -) -> CompValue: +def ToMultiSet(p: typing.Union[List[Dict[Variable, str]], CompValue]) -> CompValue: return CompValue("ToMultiSet", p=p) @@ -66,11 +66,13 @@ def Minus(p1: CompValue, p2: CompValue) -> CompValue: return CompValue("Minus", p1=p1, p2=p2) -def Graph(term, graph) -> CompValue: +def Graph(term: Identifier, graph: CompValue) -> CompValue: return CompValue("Graph", term=term, p=graph) -def BGP(triples=None) -> CompValue: +def BGP( + triples: Optional[List[Tuple[Identifier, Identifier, Identifier]]] = None +) -> CompValue: return CompValue("BGP", triples=triples or []) @@ -78,19 +80,21 @@ def LeftJoin(p1: CompValue, p2: CompValue, expr) -> CompValue: return CompValue("LeftJoin", p1=p1, p2=p2, expr=expr) -def Filter(expr, p: CompValue) -> CompValue: +def Filter(expr: Expr, p: CompValue) -> CompValue: return CompValue("Filter", expr=expr, p=p) -def Extend(p: CompValue, expr, var) -> CompValue: +def Extend( + p: CompValue, expr: typing.Union[Identifier, Expr], var: Variable +) -> CompValue: return CompValue("Extend", p=p, expr=expr, var=var) -def Values(res) -> CompValue: +def Values(res: List[Dict[Variable, str]]) -> CompValue: return CompValue("values", res=res) -def Project(p: CompValue, PV) -> CompValue: +def Project(p: CompValue, PV: List[Variable]) -> CompValue: return CompValue("Project", p=p, PV=PV) @@ -102,7 +106,7 @@ def _knownTerms( triple: Tuple[Identifier, Identifier, Identifier], varsknown: Set[typing.Union[BNode, Variable]], varscount: Dict[Identifier, int], -): +) -> Tuple[int, int, bool]: return ( len( [ @@ -124,7 +128,7 @@ def reorderTriples( ones with most bindings first """ - def _addvar(term, varsknown): + def _addvar(term: str, varsknown: Set[typing.Union[Variable, BNode]]): if isinstance(term, (Variable, BNode)): varsknown.add(term) @@ -180,20 +184,25 @@ def triples( return reorderTriples((l[x], l[x + 1], l[x + 2]) for x in range(0, len(l), 3)) # type: ignore[misc] -def translatePName(p: typing.Union[CompValue, str], prologue: Prologue): +# type error: Missing return statement +def translatePName( # type: ignore[return] + p: typing.Union[CompValue, str], prologue: Prologue +) -> Optional[Identifier]: """ Expand prefixed/relative URIs """ if isinstance(p, CompValue): if p.name == "pname": - return prologue.absolutize(p) + # type error: Incompatible return value type (got "Union[CompValue, str, None]", expected "Optional[Identifier]") + return prologue.absolutize(p) # type: ignore[return-value] if p.name == "literal": # type error: Argument "datatype" to "Literal" has incompatible type "Union[CompValue, str, None]"; expected "Optional[str]" return Literal( p.string, lang=p.lang, datatype=prologue.absolutize(p.datatype) # type: ignore[arg-type] ) elif isinstance(p, URIRef): - return prologue.absolutize(p) + # type error: Incompatible return value type (got "Union[CompValue, str, None]", expected "Optional[Identifier]") + return prologue.absolutize(p) # type: ignore[return-value] @overload @@ -253,8 +262,8 @@ def translatePath(p: typing.Union[CompValue, URIRef]) -> Optional["Path"]: # ty def translateExists( - e: typing.Union[Expr, Literal, Variable] -) -> typing.Union[Expr, Literal, Variable]: + e: typing.Union[Expr, Literal, Variable, URIRef] +) -> typing.Union[Expr, Literal, Variable, URIRef]: """ Translate the graph pattern used by EXISTS and NOT EXISTS http://www.w3.org/TR/sparql11-query/#sparqlCollectFilters @@ -273,7 +282,7 @@ def _c(n): return e -def collectAndRemoveFilters(parts): +def collectAndRemoveFilters(parts: List[CompValue]) -> Optional[Expr]: """ FILTER expressions apply to the whole group graph pattern in which @@ -294,7 +303,8 @@ def collectAndRemoveFilters(parts): i += 1 if filters: - return and_(*filters) + # type error: Argument 1 to "and_" has incompatible type "*List[Union[Expr, Literal, Variable]]"; expected "Expr" + return and_(*filters) # type: ignore[arg-type] return None @@ -325,7 +335,11 @@ def translateGroupGraphPattern(graphPattern: CompValue) -> CompValue: """ if graphPattern.name == "SubSelect": - return ToMultiSet(translate(graphPattern)[0]) + # The first output from translate cannot be None for a subselect query + # as it can only be None for certain DESCRIBE queries. + # type error: Argument 1 to "ToMultiSet" has incompatible type "Optional[CompValue]"; + # expected "Union[List[Dict[Variable, str]], CompValue]" + return ToMultiSet(translate(graphPattern)[0]) # type: ignore[arg-type] if not graphPattern.part: graphPattern.part = [] # empty { } @@ -380,7 +394,7 @@ def translateGroupGraphPattern(graphPattern: CompValue) -> CompValue: class StopTraversal(Exception): # noqa: N818 - def __init__(self, rv): + def __init__(self, rv: bool): self.rv = rv @@ -444,7 +458,7 @@ def traverse( visitPre: Callable[[Any], Any] = lambda n: None, visitPost: Callable[[Any], Any] = lambda n: None, complete: Optional[bool] = None, -): +) -> Any: """ Traverse tree, visit each node with visit function visit function may raise StopTraversal to stop traversal @@ -501,10 +515,11 @@ def _findVars(x, res: Set[Variable]) -> Optional[CompValue]: # type: ignore[ret elif x.name == "SubSelect": if x.projection: res.update(v.var or v.evar for v in x.projection) + return x -def _addVars(x, children) -> Set[Variable]: +def _addVars(x, children: List[Set[Variable]]) -> Set[Variable]: """ find which variables may be bound by this part of the query """ @@ -549,7 +564,7 @@ def _sample(e: typing.Union[CompValue, List[Expr], Expr, List[str], Variable], v return CompValue("Aggregate_Sample", vars=e) -def _simplifyFilters(e): +def _simplifyFilters(e: Any) -> Any: if isinstance(e, Expr): return simplifyFilters(e) @@ -592,17 +607,16 @@ def translateAggregates( def translateValues( v: CompValue, -) -> typing.Union[List[Dict[Variable, Identifier]], CompValue]: +) -> typing.Union[List[Dict[Variable, str]], CompValue]: # if len(v.var)!=len(v.value): # raise Exception("Unmatched vars and values in ValueClause: "+str(v)) - res: List[Dict[Variable, Identifier]] = [] + res: List[Dict[Variable, str]] = [] if not v.var: return res if not v.value: return res if not isinstance(v.value[0], list): - for val in v.value: res.append({v.var[0]: val}) else: @@ -612,7 +626,7 @@ def translateValues( return Values(res) -def translate(q: CompValue) -> Tuple[CompValue, List[Variable]]: +def translate(q: CompValue) -> Tuple[Optional[CompValue], List[Variable]]: """ http://www.w3.org/TR/sparql11-query/#convertSolMod @@ -624,9 +638,28 @@ def translate(q: CompValue) -> Tuple[CompValue, List[Variable]]: # TODO: Var scope test VS: Set[Variable] = set() - traverse(q.where, functools.partial(_findVars, res=VS)) - # all query types have a where part + # All query types have a WHERE clause EXCEPT some DESCRIBE queries + # where only explicit IRIs are provided. + if q.name == "DescribeQuery": + # For DESCRIBE queries, use the vars provided in q.var. + # If there is no WHERE clause, vars should be explicit IRIs to describe. + # If there is a WHERE clause, vars can be any combination of explicit IRIs + # and variables. + VS = set(q.var) + + # If there is no WHERE clause, just return the vars projected + if q.where is None: + return None, list(VS) + + # Otherwise, evaluate the WHERE clause like SELECT DISTINCT + else: + q.modifier = "DISTINCT" + + else: + traverse(q.where, functools.partial(_findVars, res=VS)) + + # depth-first recursive generation of mapped query tree M = translateGroupGraphPattern(q.where) aggregate = False @@ -655,9 +688,14 @@ def translate(q: CompValue) -> Tuple[CompValue, List[Variable]]: aggregate = True if aggregate: - M, E = translateAggregates(q, M) + M, aggregateAliases = translateAggregates(q, M) else: - E = [] + aggregateAliases = [] + + # Need to remove the aggregate var aliases before joining to VALUES; + # else the variable names won't match up correctly when aggregating. + for alias, var in aggregateAliases: + M = Extend(M, alias, var) # HAVING if q.having: @@ -669,8 +707,15 @@ def translate(q: CompValue) -> Tuple[CompValue, List[Variable]]: if not q.projection: # select * + + # Find the first child projection in each branch of the mapped query tree, + # then include the variables it projects out in our projected variables. + for child_projection in _find_first_child_projections(M): + VS |= set(child_projection.PV) + PV = list(VS) else: + E = list() PV = list() for v in q.projection: if v.var: @@ -684,8 +729,8 @@ def translate(q: CompValue) -> Tuple[CompValue, List[Variable]]: else: raise Exception("I expected a var or evar here!") - for e, v in E: - M = Extend(M, e, v) + for e, v in E: + M = Extend(M, e, v) # ORDER BY if q.orderby: @@ -721,8 +766,23 @@ def translate(q: CompValue) -> Tuple[CompValue, List[Variable]]: return M, PV +def _find_first_child_projections(M: CompValue) -> Iterable[CompValue]: + """ + Recursively find the first child instance of a Projection operation in each of + the branches of the query execution plan/tree. + """ + + for child_op in M.values(): + if isinstance(child_op, CompValue): + if child_op.name == "Project": + yield child_op + else: + for child_projection in _find_first_child_projections(child_op): + yield child_projection + + # type error: Missing return statement -def simplify(n) -> Optional[CompValue]: # type: ignore[return] +def simplify(n: Any) -> Optional[CompValue]: # type: ignore[return] """Remove joins to empty BGPs""" if isinstance(n, CompValue): if n.name == "Join": @@ -735,7 +795,7 @@ def simplify(n) -> Optional[CompValue]: # type: ignore[return] return n -def analyse(n, children): +def analyse(n: Any, children: Any) -> bool: """ Some things can be lazily joined. This propegates whether they can up the tree @@ -757,10 +817,9 @@ def analyse(n, children): def translatePrologue( p: ParseResults, base: Optional[str], - initNs: Optional[Mapping[str, str]] = None, + initNs: Optional[Mapping[str, Any]] = None, prologue: Optional[Prologue] = None, ) -> Prologue: - if prologue is None: prologue = Prologue() prologue.base = "" @@ -780,7 +839,12 @@ def translatePrologue( return prologue -def translateQuads(quads: CompValue): +def translateQuads( + quads: CompValue, +) -> Tuple[ + List[Tuple[Identifier, Identifier, Identifier]], + DefaultDict[str, List[Tuple[Identifier, Identifier, Identifier]]], +]: if quads.triples: alltriples = triples(quads.triples) else: @@ -825,7 +889,7 @@ def translateUpdate1(u: CompValue, prologue: Prologue) -> CompValue: def translateUpdate( q: CompValue, base: Optional[str] = None, - initNs: Optional[Mapping[str, str]] = None, + initNs: Optional[Mapping[str, Any]] = None, ) -> Update: """ Returns a list of SPARQL Update Algebra expressions @@ -854,7 +918,7 @@ def translateUpdate( def translateQuery( q: ParseResults, base: Optional[str] = None, - initNs: Optional[Mapping[str, str]] = None, + initNs: Optional[Mapping[str, Any]] = None, ) -> Query: """ Translate a query-parsetree to a SPARQL Algebra Expression @@ -874,7 +938,6 @@ def translateQuery( P, PV = translate(q[1]) datasetClause = q[1].datasetClause if q[1].name == "ConstructQuery": - template = triples(q[1].template) if q[1].template else None res = CompValue(q[1].name, p=P, template=template, datasetClause=datasetClause) @@ -892,31 +955,39 @@ class ExpressionNotCoveredException(Exception): # noqa: N818 pass -def translateAlgebra(query_algebra: Query) -> str: +class _AlgebraTranslator: """ + Translator of a Query's algebra to its equivalent SPARQL (string). - :param query_algebra: An algebra returned by the function call algebra.translateQuery(parse_tree). - :return: The query form generated from the SPARQL 1.1 algebra tree for select queries. + Coded as a class to support storage of state during the translation process, + without use of a file. - """ - import os + Anticipated Usage: - def overwrite(text): - file = open("query.txt", "w+") - file.write(text) - file.close() + .. code-block:: python + + translated_query = _AlgebraTranslator(query).translateAlgebra() + + An external convenience function which wraps the above call, + `translateAlgebra`, is supplied, so this class does not need to be + referenced by client code at all in normal use. + """ - def replace( - old, - new, + def __init__(self, query_algebra: Query): + self.query_algebra = query_algebra + self.aggr_vars: DefaultDict[ + Identifier, List[Identifier] + ] = collections.defaultdict(list) + self._alg_translation: str = "" + + def _replace( + self, + old: str, + new: str, search_from_match: str = None, search_from_match_occurrence: int = None, count: int = 1, ): - # Read in the file - with open("query.txt", "r") as file: - filedata = file.read() - def find_nth(haystack, needle, n): start = haystack.lower().find(needle) while start >= 0 and n > 1: @@ -926,27 +997,25 @@ def find_nth(haystack, needle, n): if search_from_match and search_from_match_occurrence: position = find_nth( - filedata, search_from_match, search_from_match_occurrence + self._alg_translation, search_from_match, search_from_match_occurrence ) - filedata_pre = filedata[:position] - filedata_post = filedata[position:].replace(old, new, count) - filedata = filedata_pre + filedata_post + filedata_pre = self._alg_translation[:position] + filedata_post = self._alg_translation[position:].replace(old, new, count) + self._alg_translation = filedata_pre + filedata_post else: - filedata = filedata.replace(old, new, count) - - # Write the file out again - with open("query.txt", "w") as file: - file.write(filedata) + self._alg_translation = self._alg_translation.replace(old, new, count) - aggr_vars = collections.defaultdict(list) # type: dict - - def convert_node_arg(node_arg): + def convert_node_arg( + self, node_arg: typing.Union[Identifier, CompValue, Expr, str] + ) -> str: if isinstance(node_arg, Identifier): - if node_arg in aggr_vars.keys(): - grp_var = aggr_vars[node_arg].pop(0).n3() + if node_arg in self.aggr_vars.keys(): + # type error: "Identifier" has no attribute "n3" + grp_var = self.aggr_vars[node_arg].pop(0).n3() # type: ignore[attr-defined] return grp_var else: - return node_arg.n3() + # type error: "Identifier" has no attribute "n3" + return node_arg.n3() # type: ignore[attr-defined] elif isinstance(node_arg, CompValue): return "{" + node_arg.name + "}" elif isinstance(node_arg, Expr): @@ -958,7 +1027,7 @@ def convert_node_arg(node_arg): "The expression {0} might not be covered yet.".format(node_arg) ) - def sparql_query_text(node): + def sparql_query_text(self, node): """ https://www.w3.org/TR/sparql11-query/#sparqlSyntax @@ -969,7 +1038,7 @@ def sparql_query_text(node): if isinstance(node, CompValue): # 18.2 Query Forms if node.name == "SelectQuery": - overwrite("-*-SELECT-*- " + "{" + node.p.name + "}") + self._alg_translation = "-*-SELECT-*- " + "{" + node.p.name + "}" # 18.2 Graph Patterns elif node.name == "BGP": @@ -979,18 +1048,20 @@ def sparql_query_text(node): triple[0].n3() + " " + triple[1].n3() + " " + triple[2].n3() + "." for triple in node.triples ) - replace("{BGP}", triples) + self._replace("{BGP}", triples) # The dummy -*-SELECT-*- is placed during a SelectQuery or Multiset pattern in order to be able # to match extended variables in a specific Select-clause (see "Extend" below) - replace("-*-SELECT-*-", "SELECT", count=-1) + self._replace("-*-SELECT-*-", "SELECT", count=-1) # If there is no "Group By" clause the placeholder will simply be deleted. Otherwise there will be # no matching {GroupBy} placeholder because it has already been replaced by "group by variables" - replace("{GroupBy}", "", count=-1) - replace("{Having}", "", count=-1) + self._replace("{GroupBy}", "", count=-1) + self._replace("{Having}", "", count=-1) elif node.name == "Join": - replace("{Join}", "{" + node.p1.name + "}{" + node.p2.name + "}") # + self._replace( + "{Join}", "{" + node.p1.name + "}{" + node.p2.name + "}" + ) # elif node.name == "LeftJoin": - replace( + self._replace( "{LeftJoin}", "{" + node.p1.name + "}OPTIONAL{{" + node.p2.name + "}}", ) @@ -1004,35 +1075,39 @@ def sparql_query_text(node): if node.p: # Filter with p=AggregateJoin = Having if node.p.name == "AggregateJoin": - replace("{Filter}", "{" + node.p.name + "}") - replace("{Having}", "HAVING({" + expr + "})") + self._replace("{Filter}", "{" + node.p.name + "}") + self._replace("{Having}", "HAVING({" + expr + "})") else: - replace( + self._replace( "{Filter}", "FILTER({" + expr + "}) {" + node.p.name + "}" ) else: - replace("{Filter}", "FILTER({" + expr + "})") + self._replace("{Filter}", "FILTER({" + expr + "})") elif node.name == "Union": - replace( + self._replace( "{Union}", "{{" + node.p1.name + "}}UNION{{" + node.p2.name + "}}" ) elif node.name == "Graph": expr = "GRAPH " + node.term.n3() + " {{" + node.p.name + "}}" - replace("{Graph}", expr) + self._replace("{Graph}", expr) elif node.name == "Extend": - query_string = open("query.txt", "r").read().lower() + query_string = self._alg_translation.lower() select_occurrences = query_string.count("-*-select-*-") - replace( + self._replace( node.var.n3(), - "(" + convert_node_arg(node.expr) + " as " + node.var.n3() + ")", + "(" + + self.convert_node_arg(node.expr) + + " as " + + node.var.n3() + + ")", search_from_match="-*-select-*-", search_from_match_occurrence=select_occurrences, ) - replace("{Extend}", "{" + node.p.name + "}") + self._replace("{Extend}", "{" + node.p.name + "}") elif node.name == "Minus": expr = "{" + node.p1.name + "}MINUS{{" + node.p2.name + "}}" - replace("{Minus}", expr) + self._replace("{Minus}", expr) elif node.name == "Group": group_by_vars = [] if node.expr: @@ -1043,12 +1118,14 @@ def sparql_query_text(node): raise ExpressionNotCoveredException( "This expression might not be covered yet." ) - replace("{Group}", "{" + node.p.name + "}") - replace("{GroupBy}", "GROUP BY " + " ".join(group_by_vars) + " ") + self._replace("{Group}", "{" + node.p.name + "}") + self._replace( + "{GroupBy}", "GROUP BY " + " ".join(group_by_vars) + " " + ) else: - replace("{Group}", "{" + node.p.name + "}") + self._replace("{Group}", "{" + node.p.name + "}") elif node.name == "AggregateJoin": - replace("{AggregateJoin}", "{" + node.p.name + "}") + self._replace("{AggregateJoin}", "{" + node.p.name + "}") for agg_func in node.A: if isinstance(agg_func.res, Identifier): identifier = agg_func.res.n3() @@ -1056,14 +1133,14 @@ def sparql_query_text(node): raise ExpressionNotCoveredException( "This expression might not be covered yet." ) - aggr_vars[agg_func.res].append(agg_func.vars) + self.aggr_vars[agg_func.res].append(agg_func.vars) agg_func_name = agg_func.name.split("_")[1] distinct = "" if agg_func.distinct: distinct = agg_func.distinct + " " if agg_func_name == "GroupConcat": - replace( + self._replace( identifier, "GROUP_CONCAT" + "(" @@ -1074,30 +1151,32 @@ def sparql_query_text(node): + ")", ) else: - replace( + self._replace( identifier, agg_func_name.upper() + "(" + distinct - + convert_node_arg(agg_func.vars) + + self.convert_node_arg(agg_func.vars) + ")", ) # For non-aggregated variables the aggregation function "sample" is automatically assigned. # However, we do not want to have "sample" wrapped around non-aggregated variables. That is # why we replace it. If "sample" is used on purpose it will not be replaced as the alias # must be different from the variable in this case. - replace( - "(SAMPLE({0}) as {0})".format(convert_node_arg(agg_func.vars)), - convert_node_arg(agg_func.vars), + self._replace( + "(SAMPLE({0}) as {0})".format( + self.convert_node_arg(agg_func.vars) + ), + self.convert_node_arg(agg_func.vars), ) elif node.name == "GroupGraphPatternSub": - replace( + self._replace( "GroupGraphPatternSub", - " ".join([convert_node_arg(pattern) for pattern in node.part]), + " ".join([self.convert_node_arg(pattern) for pattern in node.part]), ) elif node.name == "TriplesBlock": print("triplesblock") - replace( + self._replace( "{TriplesBlock}", "".join( triple[0].n3() @@ -1129,8 +1208,8 @@ def sparql_query_text(node): raise ExpressionNotCoveredException( "This expression might not be covered yet." ) - replace("{OrderBy}", "{" + node.p.name + "}") - replace("{OrderConditions}", " ".join(order_conditions) + " ") + self._replace("{OrderBy}", "{" + node.p.name + "}") + self._replace("{OrderConditions}", " ".join(order_conditions) + " ") elif node.name == "Project": project_variables = [] for var in node.PV: @@ -1143,7 +1222,7 @@ def sparql_query_text(node): order_by_pattern = "" if node.p.name == "OrderBy": order_by_pattern = "ORDER BY {OrderConditions}" - replace( + self._replace( "{Project}", " ".join(project_variables) + "{{" @@ -1154,17 +1233,17 @@ def sparql_query_text(node): + "{Having}", ) elif node.name == "Distinct": - replace("{Distinct}", "DISTINCT {" + node.p.name + "}") + self._replace("{Distinct}", "DISTINCT {" + node.p.name + "}") elif node.name == "Reduced": - replace("{Reduced}", "REDUCED {" + node.p.name + "}") + self._replace("{Reduced}", "REDUCED {" + node.p.name + "}") elif node.name == "Slice": slice = "OFFSET " + str(node.start) + " LIMIT " + str(node.length) - replace("{Slice}", "{" + node.p.name + "}" + slice) + self._replace("{Slice}", "{" + node.p.name + "}" + slice) elif node.name == "ToMultiSet": if node.p.name == "values": - replace("{ToMultiSet}", "{{" + node.p.name + "}}") + self._replace("{ToMultiSet}", "{{" + node.p.name + "}}") else: - replace( + self._replace( "{ToMultiSet}", "{-*-SELECT-*- " + "{" + node.p.name + "}" + "}" ) @@ -1173,71 +1252,73 @@ def sparql_query_text(node): # 17 Expressions and Testing Values # # 17.3 Operator Mapping elif node.name == "RelationalExpression": - expr = convert_node_arg(node.expr) + expr = self.convert_node_arg(node.expr) op = node.op if isinstance(list, type(node.other)): other = ( "(" - + ", ".join(convert_node_arg(expr) for expr in node.other) + + ", ".join(self.convert_node_arg(expr) for expr in node.other) + ")" ) else: - other = convert_node_arg(node.other) + other = self.convert_node_arg(node.other) condition = "{left} {operator} {right}".format( left=expr, operator=op, right=other ) - replace("{RelationalExpression}", condition) + self._replace("{RelationalExpression}", condition) elif node.name == "ConditionalAndExpression": inner_nodes = " && ".join( - [convert_node_arg(expr) for expr in node.other] + [self.convert_node_arg(expr) for expr in node.other] ) - replace( + self._replace( "{ConditionalAndExpression}", - convert_node_arg(node.expr) + " && " + inner_nodes, + self.convert_node_arg(node.expr) + " && " + inner_nodes, ) elif node.name == "ConditionalOrExpression": inner_nodes = " || ".join( - [convert_node_arg(expr) for expr in node.other] + [self.convert_node_arg(expr) for expr in node.other] ) - replace( + self._replace( "{ConditionalOrExpression}", - "(" + convert_node_arg(node.expr) + " || " + inner_nodes + ")", + "(" + self.convert_node_arg(node.expr) + " || " + inner_nodes + ")", ) elif node.name == "MultiplicativeExpression": - left_side = convert_node_arg(node.expr) + left_side = self.convert_node_arg(node.expr) multiplication = left_side for i, operator in enumerate(node.op): # noqa: F402 multiplication += ( - operator + " " + convert_node_arg(node.other[i]) + " " + operator + " " + self.convert_node_arg(node.other[i]) + " " ) - replace("{MultiplicativeExpression}", multiplication) + self._replace("{MultiplicativeExpression}", multiplication) elif node.name == "AdditiveExpression": - left_side = convert_node_arg(node.expr) + left_side = self.convert_node_arg(node.expr) addition = left_side for i, operator in enumerate(node.op): - addition += operator + " " + convert_node_arg(node.other[i]) + " " - replace("{AdditiveExpression}", addition) + addition += ( + operator + " " + self.convert_node_arg(node.other[i]) + " " + ) + self._replace("{AdditiveExpression}", addition) elif node.name == "UnaryNot": - replace("{UnaryNot}", "!" + convert_node_arg(node.expr)) + self._replace("{UnaryNot}", "!" + self.convert_node_arg(node.expr)) # # 17.4 Function Definitions # # # 17.4.1 Functional Forms elif node.name.endswith("BOUND"): - bound_var = convert_node_arg(node.arg) - replace("{Builtin_BOUND}", "bound(" + bound_var + ")") + bound_var = self.convert_node_arg(node.arg) + self._replace("{Builtin_BOUND}", "bound(" + bound_var + ")") elif node.name.endswith("IF"): - arg2 = convert_node_arg(node.arg2) - arg3 = convert_node_arg(node.arg3) + arg2 = self.convert_node_arg(node.arg2) + arg3 = self.convert_node_arg(node.arg3) if_expression = ( "IF(" + "{" + node.arg1.name + "}, " + arg2 + ", " + arg3 + ")" ) - replace("{Builtin_IF}", if_expression) + self._replace("{Builtin_IF}", if_expression) elif node.name.endswith("COALESCE"): - replace( + self._replace( "{Builtin_COALESCE}", "COALESCE(" - + ", ".join(convert_node_arg(arg) for arg in node.arg) + + ", ".join(self.convert_node_arg(arg) for arg in node.arg) + ")", ) elif node.name.endswith("Builtin_EXISTS"): @@ -1245,8 +1326,10 @@ def sparql_query_text(node): # According to https://www.w3.org/TR/2013/REC-sparql11-query-20130321/#rExistsFunc # ExistsFunc can only have a GroupGraphPattern as parameter. However, when we print the query algebra # we get a GroupGraphPatternSub - replace("{Builtin_EXISTS}", "EXISTS " + "{{" + node.graph.name + "}}") - traverse(node.graph, visitPre=sparql_query_text) + self._replace( + "{Builtin_EXISTS}", "EXISTS " + "{{" + node.graph.name + "}}" + ) + traverse(node.graph, visitPre=self.sparql_query_text) return node.graph elif node.name.endswith("Builtin_NOTEXISTS"): # The node's name which we get with node.graph.name returns "Join" instead of GroupGraphPatternSub @@ -1254,21 +1337,21 @@ def sparql_query_text(node): # NotExistsFunc can only have a GroupGraphPattern as parameter. However, when we print the query algebra # we get a GroupGraphPatternSub print(node.graph.name) - replace( + self._replace( "{Builtin_NOTEXISTS}", "NOT EXISTS " + "{{" + node.graph.name + "}}" ) - traverse(node.graph, visitPre=sparql_query_text) + traverse(node.graph, visitPre=self.sparql_query_text) return node.graph # # # # 17.4.1.5 logical-or: Covered in "RelationalExpression" # # # # 17.4.1.6 logical-and: Covered in "RelationalExpression" # # # # 17.4.1.7 RDFterm-equal: Covered in "RelationalExpression" elif node.name.endswith("sameTerm"): - replace( + self._replace( "{Builtin_sameTerm}", "SAMETERM(" - + convert_node_arg(node.arg1) + + self.convert_node_arg(node.arg1) + ", " - + convert_node_arg(node.arg2) + + self.convert_node_arg(node.arg2) + ")", ) # # # # IN: Covered in "RelationalExpression" @@ -1276,205 +1359,253 @@ def sparql_query_text(node): # # # 17.4.2 Functions on RDF Terms elif node.name.endswith("Builtin_isIRI"): - replace("{Builtin_isIRI}", "isIRI(" + convert_node_arg(node.arg) + ")") + self._replace( + "{Builtin_isIRI}", "isIRI(" + self.convert_node_arg(node.arg) + ")" + ) elif node.name.endswith("Builtin_isBLANK"): - replace( - "{Builtin_isBLANK}", "isBLANK(" + convert_node_arg(node.arg) + ")" + self._replace( + "{Builtin_isBLANK}", + "isBLANK(" + self.convert_node_arg(node.arg) + ")", ) elif node.name.endswith("Builtin_isLITERAL"): - replace( + self._replace( "{Builtin_isLITERAL}", - "isLITERAL(" + convert_node_arg(node.arg) + ")", + "isLITERAL(" + self.convert_node_arg(node.arg) + ")", ) elif node.name.endswith("Builtin_isNUMERIC"): - replace( + self._replace( "{Builtin_isNUMERIC}", - "isNUMERIC(" + convert_node_arg(node.arg) + ")", + "isNUMERIC(" + self.convert_node_arg(node.arg) + ")", ) elif node.name.endswith("Builtin_STR"): - replace("{Builtin_STR}", "STR(" + convert_node_arg(node.arg) + ")") + self._replace( + "{Builtin_STR}", "STR(" + self.convert_node_arg(node.arg) + ")" + ) elif node.name.endswith("Builtin_LANG"): - replace("{Builtin_LANG}", "LANG(" + convert_node_arg(node.arg) + ")") + self._replace( + "{Builtin_LANG}", "LANG(" + self.convert_node_arg(node.arg) + ")" + ) elif node.name.endswith("Builtin_DATATYPE"): - replace( - "{Builtin_DATATYPE}", "DATATYPE(" + convert_node_arg(node.arg) + ")" + self._replace( + "{Builtin_DATATYPE}", + "DATATYPE(" + self.convert_node_arg(node.arg) + ")", ) elif node.name.endswith("Builtin_IRI"): - replace("{Builtin_IRI}", "IRI(" + convert_node_arg(node.arg) + ")") + self._replace( + "{Builtin_IRI}", "IRI(" + self.convert_node_arg(node.arg) + ")" + ) elif node.name.endswith("Builtin_BNODE"): - replace("{Builtin_BNODE}", "BNODE(" + convert_node_arg(node.arg) + ")") + self._replace( + "{Builtin_BNODE}", "BNODE(" + self.convert_node_arg(node.arg) + ")" + ) elif node.name.endswith("STRDT"): - replace( + self._replace( "{Builtin_STRDT}", "STRDT(" - + convert_node_arg(node.arg1) + + self.convert_node_arg(node.arg1) + ", " - + convert_node_arg(node.arg2) + + self.convert_node_arg(node.arg2) + ")", ) elif node.name.endswith("Builtin_STRLANG"): - replace( + self._replace( "{Builtin_STRLANG}", "STRLANG(" - + convert_node_arg(node.arg1) + + self.convert_node_arg(node.arg1) + ", " - + convert_node_arg(node.arg2) + + self.convert_node_arg(node.arg2) + ")", ) elif node.name.endswith("Builtin_UUID"): - replace("{Builtin_UUID}", "UUID()") + self._replace("{Builtin_UUID}", "UUID()") elif node.name.endswith("Builtin_STRUUID"): - replace("{Builtin_STRUUID}", "STRUUID()") + self._replace("{Builtin_STRUUID}", "STRUUID()") # # # 17.4.3 Functions on Strings elif node.name.endswith("Builtin_STRLEN"): - replace( - "{Builtin_STRLEN}", "STRLEN(" + convert_node_arg(node.arg) + ")" + self._replace( + "{Builtin_STRLEN}", + "STRLEN(" + self.convert_node_arg(node.arg) + ")", ) elif node.name.endswith("Builtin_SUBSTR"): - args = [convert_node_arg(node.arg), node.start] + args = [self.convert_node_arg(node.arg), node.start] if node.length: args.append(node.length) expr = "SUBSTR(" + ", ".join(args) + ")" - replace("{Builtin_SUBSTR}", expr) + self._replace("{Builtin_SUBSTR}", expr) elif node.name.endswith("Builtin_UCASE"): - replace("{Builtin_UCASE}", "UCASE(" + convert_node_arg(node.arg) + ")") + self._replace( + "{Builtin_UCASE}", "UCASE(" + self.convert_node_arg(node.arg) + ")" + ) elif node.name.endswith("Builtin_LCASE"): - replace("{Builtin_LCASE}", "LCASE(" + convert_node_arg(node.arg) + ")") + self._replace( + "{Builtin_LCASE}", "LCASE(" + self.convert_node_arg(node.arg) + ")" + ) elif node.name.endswith("Builtin_STRSTARTS"): - replace( + self._replace( "{Builtin_STRSTARTS}", "STRSTARTS(" - + convert_node_arg(node.arg1) + + self.convert_node_arg(node.arg1) + ", " - + convert_node_arg(node.arg2) + + self.convert_node_arg(node.arg2) + ")", ) elif node.name.endswith("Builtin_STRENDS"): - replace( + self._replace( "{Builtin_STRENDS}", "STRENDS(" - + convert_node_arg(node.arg1) + + self.convert_node_arg(node.arg1) + ", " - + convert_node_arg(node.arg2) + + self.convert_node_arg(node.arg2) + ")", ) elif node.name.endswith("Builtin_CONTAINS"): - replace( + self._replace( "{Builtin_CONTAINS}", "CONTAINS(" - + convert_node_arg(node.arg1) + + self.convert_node_arg(node.arg1) + ", " - + convert_node_arg(node.arg2) + + self.convert_node_arg(node.arg2) + ")", ) elif node.name.endswith("Builtin_STRBEFORE"): - replace( + self._replace( "{Builtin_STRBEFORE}", "STRBEFORE(" - + convert_node_arg(node.arg1) + + self.convert_node_arg(node.arg1) + ", " - + convert_node_arg(node.arg2) + + self.convert_node_arg(node.arg2) + ")", ) elif node.name.endswith("Builtin_STRAFTER"): - replace( + self._replace( "{Builtin_STRAFTER}", "STRAFTER(" - + convert_node_arg(node.arg1) + + self.convert_node_arg(node.arg1) + ", " - + convert_node_arg(node.arg2) + + self.convert_node_arg(node.arg2) + ")", ) elif node.name.endswith("Builtin_ENCODE_FOR_URI"): - replace( + self._replace( "{Builtin_ENCODE_FOR_URI}", - "ENCODE_FOR_URI(" + convert_node_arg(node.arg) + ")", + "ENCODE_FOR_URI(" + self.convert_node_arg(node.arg) + ")", ) elif node.name.endswith("Builtin_CONCAT"): expr = "CONCAT({vars})".format( - vars=", ".join(convert_node_arg(elem) for elem in node.arg) + vars=", ".join(self.convert_node_arg(elem) for elem in node.arg) ) - replace("{Builtin_CONCAT}", expr) + self._replace("{Builtin_CONCAT}", expr) elif node.name.endswith("Builtin_LANGMATCHES"): - replace( + self._replace( "{Builtin_LANGMATCHES}", "LANGMATCHES(" - + convert_node_arg(node.arg1) + + self.convert_node_arg(node.arg1) + ", " - + convert_node_arg(node.arg2) + + self.convert_node_arg(node.arg2) + ")", ) elif node.name.endswith("REGEX"): - args = [convert_node_arg(node.text), convert_node_arg(node.pattern)] + args = [ + self.convert_node_arg(node.text), + self.convert_node_arg(node.pattern), + ] expr = "REGEX(" + ", ".join(args) + ")" - replace("{Builtin_REGEX}", expr) + self._replace("{Builtin_REGEX}", expr) elif node.name.endswith("REPLACE"): - replace( + self._replace( "{Builtin_REPLACE}", "REPLACE(" - + convert_node_arg(node.arg) + + self.convert_node_arg(node.arg) + ", " - + convert_node_arg(node.pattern) + + self.convert_node_arg(node.pattern) + ", " - + convert_node_arg(node.replacement) + + self.convert_node_arg(node.replacement) + ")", ) # # # 17.4.4 Functions on Numerics elif node.name == "Builtin_ABS": - replace("{Builtin_ABS}", "ABS(" + convert_node_arg(node.arg) + ")") + self._replace( + "{Builtin_ABS}", "ABS(" + self.convert_node_arg(node.arg) + ")" + ) elif node.name == "Builtin_ROUND": - replace("{Builtin_ROUND}", "ROUND(" + convert_node_arg(node.arg) + ")") + self._replace( + "{Builtin_ROUND}", "ROUND(" + self.convert_node_arg(node.arg) + ")" + ) elif node.name == "Builtin_CEIL": - replace("{Builtin_CEIL}", "CEIL(" + convert_node_arg(node.arg) + ")") + self._replace( + "{Builtin_CEIL}", "CEIL(" + self.convert_node_arg(node.arg) + ")" + ) elif node.name == "Builtin_FLOOR": - replace("{Builtin_FLOOR}", "FLOOR(" + convert_node_arg(node.arg) + ")") + self._replace( + "{Builtin_FLOOR}", "FLOOR(" + self.convert_node_arg(node.arg) + ")" + ) elif node.name == "Builtin_RAND": - replace("{Builtin_RAND}", "RAND()") + self._replace("{Builtin_RAND}", "RAND()") # # # 17.4.5 Functions on Dates and Times elif node.name == "Builtin_NOW": - replace("{Builtin_NOW}", "NOW()") + self._replace("{Builtin_NOW}", "NOW()") elif node.name == "Builtin_YEAR": - replace("{Builtin_YEAR}", "YEAR(" + convert_node_arg(node.arg) + ")") + self._replace( + "{Builtin_YEAR}", "YEAR(" + self.convert_node_arg(node.arg) + ")" + ) elif node.name == "Builtin_MONTH": - replace("{Builtin_MONTH}", "MONTH(" + convert_node_arg(node.arg) + ")") + self._replace( + "{Builtin_MONTH}", "MONTH(" + self.convert_node_arg(node.arg) + ")" + ) elif node.name == "Builtin_DAY": - replace("{Builtin_DAY}", "DAY(" + convert_node_arg(node.arg) + ")") + self._replace( + "{Builtin_DAY}", "DAY(" + self.convert_node_arg(node.arg) + ")" + ) elif node.name == "Builtin_HOURS": - replace("{Builtin_HOURS}", "HOURS(" + convert_node_arg(node.arg) + ")") + self._replace( + "{Builtin_HOURS}", "HOURS(" + self.convert_node_arg(node.arg) + ")" + ) elif node.name == "Builtin_MINUTES": - replace( - "{Builtin_MINUTES}", "MINUTES(" + convert_node_arg(node.arg) + ")" + self._replace( + "{Builtin_MINUTES}", + "MINUTES(" + self.convert_node_arg(node.arg) + ")", ) elif node.name == "Builtin_SECONDS": - replace( - "{Builtin_SECONDS}", "SECONDS(" + convert_node_arg(node.arg) + ")" + self._replace( + "{Builtin_SECONDS}", + "SECONDS(" + self.convert_node_arg(node.arg) + ")", ) elif node.name == "Builtin_TIMEZONE": - replace( - "{Builtin_TIMEZONE}", "TIMEZONE(" + convert_node_arg(node.arg) + ")" + self._replace( + "{Builtin_TIMEZONE}", + "TIMEZONE(" + self.convert_node_arg(node.arg) + ")", ) elif node.name == "Builtin_TZ": - replace("{Builtin_TZ}", "TZ(" + convert_node_arg(node.arg) + ")") + self._replace( + "{Builtin_TZ}", "TZ(" + self.convert_node_arg(node.arg) + ")" + ) # # # 17.4.6 Hash functions elif node.name == "Builtin_MD5": - replace("{Builtin_MD5}", "MD5(" + convert_node_arg(node.arg) + ")") + self._replace( + "{Builtin_MD5}", "MD5(" + self.convert_node_arg(node.arg) + ")" + ) elif node.name == "Builtin_SHA1": - replace("{Builtin_SHA1}", "SHA1(" + convert_node_arg(node.arg) + ")") + self._replace( + "{Builtin_SHA1}", "SHA1(" + self.convert_node_arg(node.arg) + ")" + ) elif node.name == "Builtin_SHA256": - replace( - "{Builtin_SHA256}", "SHA256(" + convert_node_arg(node.arg) + ")" + self._replace( + "{Builtin_SHA256}", + "SHA256(" + self.convert_node_arg(node.arg) + ")", ) elif node.name == "Builtin_SHA384": - replace( - "{Builtin_SHA384}", "SHA384(" + convert_node_arg(node.arg) + ")" + self._replace( + "{Builtin_SHA384}", + "SHA384(" + self.convert_node_arg(node.arg) + ")", ) elif node.name == "Builtin_SHA512": - replace( - "{Builtin_SHA512}", "SHA512(" + convert_node_arg(node.arg) + ")" + self._replace( + "{Builtin_SHA512}", + "SHA512(" + self.convert_node_arg(node.arg) + ")", ) # Other @@ -1507,29 +1638,41 @@ def sparql_query_text(node): ) rows += "(" + " ".join(row) + ")" - replace("values", values + "{" + rows + "}") + self._replace("values", values + "{" + rows + "}") elif node.name == "ServiceGraphPattern": - replace( + self._replace( "{ServiceGraphPattern}", "SERVICE " - + convert_node_arg(node.term) + + self.convert_node_arg(node.term) + "{" + node.graph.name + "}", ) - traverse(node.graph, visitPre=sparql_query_text) + traverse(node.graph, visitPre=self.sparql_query_text) return node.graph # else: # raise ExpressionNotCoveredException("The expression {0} might not be covered yet.".format(node.name)) - traverse(query_algebra.algebra, visitPre=sparql_query_text) - query_from_algebra = open("query.txt", "r").read() - os.remove("query.txt") + def translateAlgebra(self) -> str: + traverse(self.query_algebra.algebra, visitPre=self.sparql_query_text) + return self._alg_translation + +def translateAlgebra(query_algebra: Query) -> str: + """ + Translates a SPARQL 1.1 algebra tree into the corresponding query string. + + :param query_algebra: An algebra returned by `translateQuery`. + :return: The query form generated from the SPARQL 1.1 algebra tree for + SELECT queries. + """ + query_from_algebra = _AlgebraTranslator( + query_algebra=query_algebra + ).translateAlgebra() return query_from_algebra -def pprintAlgebra(q): +def pprintAlgebra(q) -> None: def pp(p, ind=" "): # if isinstance(p, list): # print "[ " diff --git a/dependencies/rdflib/plugins/sparql/datatypes.py b/dependencies/rdflib/plugins/sparql/datatypes.py index 115a953b6..2f60fe428 100644 --- a/dependencies/rdflib/plugins/sparql/datatypes.py +++ b/dependencies/rdflib/plugins/sparql/datatypes.py @@ -1,10 +1,17 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Dict, List, Optional, Set + """ Utility functions for supporting the XML Schema Datatypes hierarchy """ -from rdflib import XSD +from rdflib.namespace import XSD + +if TYPE_CHECKING: + from rdflib.term import URIRef -XSD_DTs = set( +XSD_DTs: Set[URIRef] = set( ( XSD.integer, XSD.decimal, @@ -35,7 +42,7 @@ XSD_Duration_DTs = set((XSD.duration, XSD.dayTimeDuration, XSD.yearMonthDuration)) -_sub_types = { +_sub_types: Dict[URIRef, List[URIRef]] = { XSD.integer: [ XSD.nonPositiveInteger, XSD.negativeInteger, @@ -52,13 +59,13 @@ ], } -_super_types = {} +_super_types: Dict[URIRef, URIRef] = {} for superdt in XSD_DTs: for subdt in _sub_types.get(superdt, []): _super_types[subdt] = superdt # we only care about float, double, integer, decimal -_typePromotionMap = { +_typePromotionMap: Dict[URIRef, Dict[URIRef, URIRef]] = { XSD.float: {XSD.integer: XSD.float, XSD.decimal: XSD.float, XSD.double: XSD.double}, XSD.double: { XSD.integer: XSD.double, @@ -78,7 +85,7 @@ } -def type_promotion(t1, t2): +def type_promotion(t1: URIRef, t2: Optional[URIRef]) -> URIRef: if t2 is None: return t1 t1 = _super_types.get(t1, t1) @@ -86,6 +93,9 @@ def type_promotion(t1, t2): if t1 == t2: return t1 # matching super-types try: + if TYPE_CHECKING: + # type assert because mypy is confused and thinks t2 can be None + assert t2 is not None return _typePromotionMap[t1][t2] except KeyError: raise TypeError("Operators cannot combine datatypes %s and %s" % (t1, t2)) diff --git a/dependencies/rdflib/plugins/sparql/evaluate.py b/dependencies/rdflib/plugins/sparql/evaluate.py index 7b0f6e50b..08dd02d57 100644 --- a/dependencies/rdflib/plugins/sparql/evaluate.py +++ b/dependencies/rdflib/plugins/sparql/evaluate.py @@ -1,3 +1,5 @@ +from __future__ import annotations + """ These method recursively evaluate the SPARQL Algebra @@ -18,7 +20,19 @@ import itertools import json as j import re -from typing import Any, Deque, Dict, Generator, Iterable, List, Tuple, Union +from typing import ( + TYPE_CHECKING, + Any, + Deque, + Dict, + Generator, + Iterable, + List, + Mapping, + Optional, + Tuple, + Union, +) from urllib.parse import urlencode from urllib.request import Request, urlopen @@ -46,6 +60,9 @@ ) from rdflib.term import BNode, Identifier, Literal, URIRef, Variable +if TYPE_CHECKING: + from rdflib.paths import Path + _Triple = Tuple[Identifier, Identifier, Identifier] @@ -67,24 +84,28 @@ def evalBGP( _o = ctx[o] # type error: Item "None" of "Optional[Graph]" has no attribute "triples" - for ss, sp, so in ctx.graph.triples((_s, _p, _o)): # type: ignore[union-attr] + # type Argument 1 to "triples" of "Graph" has incompatible type "Tuple[Union[str, Path, None], Union[str, Path, None], Union[str, Path, None]]"; expected "Tuple[Optional[Node], Optional[Node], Optional[Node]]" + for ss, sp, so in ctx.graph.triples((_s, _p, _o)): # type: ignore[union-attr, arg-type] if None in (_s, _p, _o): c = ctx.push() else: c = ctx if _s is None: - c[s] = ss + # type error: Incompatible types in assignment (expression has type "Union[Node, Any]", target has type "Identifier") + c[s] = ss # type: ignore[assignment] try: if _p is None: - c[p] = sp + # type error: Incompatible types in assignment (expression has type "Union[Node, Any]", target has type "Identifier") + c[p] = sp # type: ignore[assignment] except AlreadyBound: continue try: if _o is None: - c[o] = so + # type error: Incompatible types in assignment (expression has type "Union[Node, Any]", target has type "Identifier") + c[o] = so # type: ignore[assignment] except AlreadyBound: continue @@ -125,7 +146,6 @@ def evalLazyJoin( def evalJoin(ctx: QueryContext, join: CompValue) -> Generator[FrozenDict, None, None]: - # TODO: Deal with dict returned from evalPart from GROUP BY # only ever for join.p1 @@ -174,7 +194,6 @@ def evalLeftJoin( _ebv(join.expr, b) for b in evalPart(ctx.thaw(a.remember(p1_vars)), join.p2) ): - yield a @@ -193,7 +212,6 @@ def evalFilter( def evalGraph( ctx: QueryContext, part: CompValue ) -> Generator[FrozenBindings, None, None]: - if ctx.dataset is None: raise Exception( "Non-conjunctive-graph doesn't know about " @@ -201,12 +219,10 @@ def evalGraph( ) ctx = ctx.clone() - graph = ctx[part.term] + graph: Union[str, Path, None, Graph] = ctx[part.term] prev_graph = ctx.graph if graph is None: - for graph in ctx.dataset.contexts(): - # in SPARQL the default graph is NOT a named graph if graph == ctx.dataset.default_context: continue @@ -219,7 +235,10 @@ def evalGraph( yield x else: - c = ctx.pushGraph(ctx.dataset.get_context(graph)) + if TYPE_CHECKING: + assert not isinstance(graph, Graph) + # type error: Argument 1 to "get_context" of "ConjunctiveGraph" has incompatible type "Union[str, Path]"; expected "Union[Node, str, None]" + c = ctx.pushGraph(ctx.dataset.get_context(graph)) # type: ignore[arg-type] for x in evalPart(c, part.p): x.ctx.graph = prev_graph yield x @@ -241,15 +260,13 @@ def evalValues( def evalMultiset(ctx: QueryContext, part: CompValue): - if part.p.name == "values": return evalValues(ctx, part) return evalPart(ctx, part.p) -def evalPart(ctx: QueryContext, part: CompValue): - +def evalPart(ctx: QueryContext, part: CompValue) -> Any: # try custom evaluation functions for name, c in CUSTOM_EVALS.items(): try: @@ -307,26 +324,26 @@ def evalPart(ctx: QueryContext, part: CompValue): elif part.name == "ServiceGraphPattern": return evalServiceQuery(ctx, part) - # raise Exception('ServiceGraphPattern not implemented') elif part.name == "DescribeQuery": - raise Exception("DESCRIBE not implemented") + return evalDescribeQuery(ctx, part) else: raise Exception("I dont know: %s" % part.name) -def evalServiceQuery(ctx: QueryContext, part): +def evalServiceQuery(ctx: QueryContext, part: CompValue): res = {} match = re.match( "^service <(.*)>[ \n]*{(.*)}[ \n]*$", - part.get("service_string", ""), + # type error: Argument 2 to "get" of "CompValue" has incompatible type "str"; expected "bool" [arg-type] + part.get("service_string", ""), # type: ignore[arg-type] re.DOTALL | re.I, ) if match: service_url = match.group(1) - service_query = _buildQueryStringForServiceCall(ctx, match) + service_query = _buildQueryStringForServiceCall(ctx, match.group(2)) query_settings = {"query": service_query, "output": "json"} headers = { @@ -370,9 +387,7 @@ def evalServiceQuery(ctx: QueryContext, part): """ -def _buildQueryStringForServiceCall(ctx: QueryContext, match: re.Match) -> str: - - service_query = match.group(2) +def _buildQueryStringForServiceCall(ctx: QueryContext, service_query: str) -> str: try: parser.parseQuery(service_query) except ParseException: @@ -382,11 +397,11 @@ def _buildQueryStringForServiceCall(ctx: QueryContext, match: re.Match) -> str: for p in ctx.prologue.namespace_manager.store.namespaces(): # type: ignore[union-attr] service_query = "PREFIX " + p[0] + ":" + p[1].n3() + " " + service_query # re add the base if one was defined - # type error: Item "None" of "Optional[Prologue]" has no attribute "base" [union-attr] + # type error: Item "None" of "Optional[Prologue]" has no attribute "base" base = ctx.prologue.base # type: ignore[union-attr] if base is not None and len(base) > 0: service_query = "BASE <" + base + "> " + service_query - sol = ctx.solution() + sol = [v for v in ctx.solution() if isinstance(v, Variable)] if len(sol) > 0: variables = " ".join([v.n3() for v in sol]) variables_bound = " ".join([ctx.get(v).n3() for v in sol]) @@ -469,11 +484,9 @@ def evalAggregateJoin( def evalOrderBy( ctx: QueryContext, part: CompValue ) -> Generator[FrozenBindings, None, None]: - res = evalPart(ctx, part.p) for e in reversed(part.expr): - reverse = bool(e.order and e.order == "DESC") res = sorted( res, key=lambda x: _val(value(x, e.expr, variables=True)), reverse=reverse @@ -546,20 +559,20 @@ def evalDistinct( def evalProject(ctx: QueryContext, project: CompValue): res = evalPart(ctx, project.p) - return (row.project(project.PV) for row in res) -def evalSelectQuery(ctx: QueryContext, query: CompValue): - - res = {} +def evalSelectQuery( + ctx: QueryContext, query: CompValue +) -> Mapping[str, Union[str, List[Variable], Iterable[FrozenDict]]]: + res: Dict[str, Union[str, List[Variable], Iterable[FrozenDict]]] = {} res["type_"] = "SELECT" res["bindings"] = evalPart(ctx, query.p) res["vars_"] = query.PV return res -def evalAskQuery(ctx: QueryContext, query: CompValue): +def evalAskQuery(ctx: QueryContext, query: CompValue) -> Mapping[str, Union[str, bool]]: res: Dict[str, Union[bool, str]] = {} res["type_"] = "ASK" res["askAnswer"] = False @@ -570,7 +583,9 @@ def evalAskQuery(ctx: QueryContext, query: CompValue): return res -def evalConstructQuery(ctx: QueryContext, query) -> Dict[str, Union[str, Graph]]: +def evalConstructQuery( + ctx: QueryContext, query: CompValue +) -> Mapping[str, Union[str, Graph]]: template = query.template if not template: @@ -589,9 +604,64 @@ def evalConstructQuery(ctx: QueryContext, query) -> Dict[str, Union[str, Graph]] return res -def evalQuery(graph: Graph, query: Query, initBindings, base=None): +def evalDescribeQuery(ctx: QueryContext, query) -> Dict[str, Union[str, Graph]]: + # Create a result graph and bind namespaces from the graph being queried + graph = Graph() + # type error: Item "None" of "Optional[Graph]" has no attribute "namespaces" + for pfx, ns in ctx.graph.namespaces(): # type: ignore[union-attr] + graph.bind(pfx, ns) + + to_describe = set() + + # Explicit IRIs may be provided to a DESCRIBE query. + # If there is a WHERE clause, explicit IRIs may be provided in + # addition to projected variables. Find those explicit IRIs and + # prepare to describe them. + for iri in query.PV: + if isinstance(iri, URIRef): + to_describe.add(iri) + + # If there is a WHERE clause, evaluate it then find the unique set of + # resources to describe across all bindings and projected variables + if query.p is not None: + bindings = evalPart(ctx, query.p) + to_describe.update(*(set(binding.values()) for binding in bindings)) + + # Get a CBD for all resources identified to describe + for resource in to_describe: + # type error: Item "None" of "Optional[Graph]" has no attribute "cbd" + ctx.graph.cbd(resource, target_graph=graph) # type: ignore[union-attr] + + res: Dict[str, Union[str, Graph]] = {} + res["type_"] = "DESCRIBE" + res["graph"] = graph - initBindings = dict((Variable(k), v) for k, v in initBindings.items()) + return res + + +def evalQuery( + graph: Graph, + query: Query, + initBindings: Optional[Mapping[str, Identifier]] = None, + base: Optional[str] = None, +) -> Mapping[Any, Any]: + """ + + .. caution:: + + This method can access indirectly requested network endpoints, for + example, query processing will attempt to access network endpoints + specified in ``SERVICE`` directives. + + When processing untrusted or potentially malicious queries, measures + should be taken to restrict network and file access. + + For information on available security measures, see the RDFLib + :doc:`Security Considerations ` + documentation. + """ + + initBindings = dict((Variable(k), v) for k, v in (initBindings or {}).items()) ctx = QueryContext(graph, initBindings=initBindings) @@ -610,7 +680,6 @@ def evalQuery(graph: Graph, query: Query, initBindings, base=None): firstDefault = False for d in main.datasetClause: if d.default: - if firstDefault: # replace current default graph dg = ctx.dataset.get_context(BNode()) diff --git a/dependencies/rdflib/plugins/sparql/evalutils.py b/dependencies/rdflib/plugins/sparql/evalutils.py index ebec86df5..84c868c94 100644 --- a/dependencies/rdflib/plugins/sparql/evalutils.py +++ b/dependencies/rdflib/plugins/sparql/evalutils.py @@ -1,13 +1,37 @@ +from __future__ import annotations + import collections -from typing import Dict, Iterable +from typing import ( + Any, + DefaultDict, + Generator, + Iterable, + Mapping, + Set, + Tuple, + TypeVar, + Union, + overload, +) from rdflib.plugins.sparql.operators import EBV from rdflib.plugins.sparql.parserutils import CompValue, Expr -from rdflib.plugins.sparql.sparql import FrozenDict, NotBoundError, SPARQLError -from rdflib.term import BNode, Literal, URIRef, Variable - - -def _diff(a: Iterable[FrozenDict], b: Iterable[FrozenDict], expr): +from rdflib.plugins.sparql.sparql import ( + FrozenBindings, + FrozenDict, + NotBoundError, + QueryContext, + SPARQLError, +) +from rdflib.term import BNode, Identifier, Literal, URIRef, Variable + +_ContextType = Union[FrozenBindings, QueryContext] +_FrozenDictT = TypeVar("_FrozenDictT", bound=FrozenDict) + + +def _diff( + a: Iterable[_FrozenDictT], b: Iterable[_FrozenDictT], expr +) -> Set[_FrozenDictT]: res = set() for x in a: @@ -17,20 +41,38 @@ def _diff(a: Iterable[FrozenDict], b: Iterable[FrozenDict], expr): return res -def _minus(a: Iterable[FrozenDict], b: Iterable[FrozenDict]): +def _minus( + a: Iterable[_FrozenDictT], b: Iterable[_FrozenDictT] +) -> Generator[_FrozenDictT, None, None]: for x in a: if all((not x.compatible(y)) or x.disjointDomain(y) for y in b): yield x -def _join(a: Iterable[FrozenDict], b: Iterable[Dict]): +@overload +def _join( + a: Iterable[FrozenBindings], b: Iterable[Mapping[Identifier, Identifier]] +) -> Generator[FrozenBindings, None, None]: + ... + + +@overload +def _join( + a: Iterable[FrozenDict], b: Iterable[Mapping[Identifier, Identifier]] +) -> Generator[FrozenDict, None, None]: + ... + + +def _join( + a: Iterable[FrozenDict], b: Iterable[Mapping[Identifier, Identifier]] +) -> Generator[FrozenDict, None, None]: for x in a: for y in b: if x.compatible(y): yield x.merge(y) -def _ebv(expr, ctx): +def _ebv(expr: Union[Literal, Variable, Expr], ctx: FrozenDict) -> bool: """ Return true/false for the given expr Either the expr is itself true/false @@ -48,7 +90,8 @@ def _ebv(expr, ctx): return EBV(expr.eval(ctx)) except SPARQLError: return False # filter error == False - elif isinstance(expr, CompValue): + # type error: Subclass of "Literal" and "CompValue" cannot exist: would have incompatible method signatures + elif isinstance(expr, CompValue): # type: ignore[unreachable] raise Exception("Weird - filter got a CompValue without evalfn! %r" % expr) elif isinstance(expr, Variable): try: @@ -58,7 +101,29 @@ def _ebv(expr, ctx): return False -def _eval(expr, ctx, raise_not_bound_error=True): +@overload +def _eval( + expr: Union[Literal, URIRef], + ctx: FrozenBindings, + raise_not_bound_error: bool = ..., +) -> Union[Literal, URIRef]: + ... + + +@overload +def _eval( + expr: Union[Variable, Expr], + ctx: FrozenBindings, + raise_not_bound_error: bool = ..., +) -> Union[Any, SPARQLError]: + ... + + +def _eval( + expr: Union[Literal, URIRef, Variable, Expr], + ctx: FrozenBindings, + raise_not_bound_error: bool = True, +) -> Any: if isinstance(expr, (Literal, URIRef)): return expr if isinstance(expr, Expr): @@ -71,26 +136,31 @@ def _eval(expr, ctx, raise_not_bound_error=True): raise NotBoundError("Variable %s is not bound" % expr) else: return None - elif isinstance(expr, CompValue): + elif isinstance(expr, CompValue): # type: ignore[unreachable] raise Exception("Weird - _eval got a CompValue without evalfn! %r" % expr) else: raise Exception("Cannot eval thing: %s (%s)" % (expr, type(expr))) -def _filter(a, expr): +def _filter( + a: Iterable[FrozenDict], expr: Union[Literal, Variable, Expr] +) -> Generator[FrozenDict, None, None]: for c in a: if _ebv(expr, c): yield c -def _fillTemplate(template, solution): +def _fillTemplate( + template: Iterable[Tuple[Identifier, Identifier, Identifier]], + solution: _ContextType, +) -> Generator[Tuple[Identifier, Identifier, Identifier], None, None]: """ For construct/deleteWhere and friends Fill a triple template with instantiated variables """ - bnodeMap = collections.defaultdict(BNode) + bnodeMap: DefaultDict[BNode, BNode] = collections.defaultdict(BNode) for t in template: s, p, o = t @@ -104,11 +174,13 @@ def _fillTemplate(template, solution): ] if _s is not None and _p is not None and _o is not None: - yield (_s, _p, _o) -def _val(v): +_ValueT = TypeVar("_ValueT", Variable, BNode, URIRef, Literal) + + +def _val(v: _ValueT) -> Tuple[int, _ValueT]: """utilitity for ordering things""" if isinstance(v, Variable): return (0, v) diff --git a/dependencies/rdflib/plugins/sparql/operators.py b/dependencies/rdflib/plugins/sparql/operators.py index 0f7b53255..908b1d5c5 100644 --- a/dependencies/rdflib/plugins/sparql/operators.py +++ b/dependencies/rdflib/plugins/sparql/operators.py @@ -1,3 +1,5 @@ +from __future__ import annotations + """ This contains evaluation functions for expressions @@ -12,17 +14,17 @@ import operator as pyop # python operators import random import re -import sys import uuid import warnings -from decimal import ROUND_HALF_UP, Decimal, InvalidOperation +from decimal import ROUND_HALF_DOWN, ROUND_HALF_UP, Decimal, InvalidOperation from functools import reduce +from typing import Any, Callable, Dict, NoReturn, Optional, Tuple, Union, overload from urllib.parse import quote import isodate from pyparsing import ParseResults -from rdflib import RDF, XSD, BNode, Literal, URIRef, Variable +from rdflib.namespace import RDF, XSD from rdflib.plugins.sparql.datatypes import ( XSD_DateTime_DTs, XSD_DTs, @@ -30,11 +32,24 @@ type_promotion, ) from rdflib.plugins.sparql.parserutils import CompValue, Expr -from rdflib.plugins.sparql.sparql import SPARQLError, SPARQLTypeError -from rdflib.term import Node +from rdflib.plugins.sparql.sparql import ( + FrozenBindings, + QueryContext, + SPARQLError, + SPARQLTypeError, +) +from rdflib.term import ( + BNode, + IdentifiedNode, + Identifier, + Literal, + Node, + URIRef, + Variable, +) -def Builtin_IRI(expr, ctx): +def Builtin_IRI(expr: Expr, ctx: FrozenBindings) -> URIRef: """ http://www.w3.org/TR/sparql11-query/#func-iri """ @@ -44,24 +59,26 @@ def Builtin_IRI(expr, ctx): if isinstance(a, URIRef): return a if isinstance(a, Literal): - return ctx.prologue.absolutize(URIRef(a)) + # type error: Item "None" of "Optional[Prologue]" has no attribute "absolutize" + # type error: Incompatible return value type (got "Union[CompValue, str, None, Any]", expected "URIRef") + return ctx.prologue.absolutize(URIRef(a)) # type: ignore[union-attr,return-value] raise SPARQLError("IRI function only accepts URIRefs or Literals/Strings!") -def Builtin_isBLANK(expr, ctx): +def Builtin_isBLANK(expr: Expr, ctx: FrozenBindings) -> Literal: return Literal(isinstance(expr.arg, BNode)) -def Builtin_isLITERAL(expr, ctx): +def Builtin_isLITERAL(expr, ctx) -> Literal: return Literal(isinstance(expr.arg, Literal)) -def Builtin_isIRI(expr, ctx): +def Builtin_isIRI(expr, ctx) -> Literal: return Literal(isinstance(expr.arg, URIRef)) -def Builtin_isNUMERIC(expr, ctx): +def Builtin_isNUMERIC(expr, ctx) -> Literal: try: numeric(expr.arg) return Literal(True) @@ -69,7 +86,7 @@ def Builtin_isNUMERIC(expr, ctx): return Literal(False) -def Builtin_BNODE(expr, ctx): +def Builtin_BNODE(expr, ctx) -> BNode: """ http://www.w3.org/TR/sparql11-query/#func-bnode """ @@ -85,7 +102,7 @@ def Builtin_BNODE(expr, ctx): raise SPARQLError("BNode function only accepts no argument or literal/string") -def Builtin_ABS(expr, ctx): +def Builtin_ABS(expr: Expr, ctx) -> Literal: """ http://www.w3.org/TR/sparql11-query/#func-abs """ @@ -93,7 +110,7 @@ def Builtin_ABS(expr, ctx): return Literal(abs(numeric(expr.arg))) -def Builtin_IF(expr, ctx): +def Builtin_IF(expr: Expr, ctx): """ http://www.w3.org/TR/sparql11-query/#func-if """ @@ -101,7 +118,7 @@ def Builtin_IF(expr, ctx): return expr.arg2 if EBV(expr.arg1) else expr.arg3 -def Builtin_RAND(expr, ctx): +def Builtin_RAND(expr: Expr, ctx) -> Literal: """ http://www.w3.org/TR/sparql11-query/#idp2133952 """ @@ -109,7 +126,7 @@ def Builtin_RAND(expr, ctx): return Literal(random.random()) -def Builtin_UUID(expr, ctx): +def Builtin_UUID(expr: Expr, ctx) -> URIRef: """ http://www.w3.org/TR/sparql11-query/#func-strdt """ @@ -117,7 +134,7 @@ def Builtin_UUID(expr, ctx): return URIRef(uuid.uuid4().urn) -def Builtin_STRUUID(expr, ctx): +def Builtin_STRUUID(expr, ctx) -> Literal: """ http://www.w3.org/TR/sparql11-query/#func-strdt """ @@ -125,32 +142,32 @@ def Builtin_STRUUID(expr, ctx): return Literal(str(uuid.uuid4())) -def Builtin_MD5(expr, ctx): +def Builtin_MD5(expr: Expr, ctx) -> Literal: s = string(expr.arg).encode("utf-8") return Literal(hashlib.md5(s).hexdigest()) -def Builtin_SHA1(expr, ctx): +def Builtin_SHA1(expr: Expr, ctx) -> Literal: s = string(expr.arg).encode("utf-8") return Literal(hashlib.sha1(s).hexdigest()) -def Builtin_SHA256(expr, ctx): +def Builtin_SHA256(expr: Expr, ctx) -> Literal: s = string(expr.arg).encode("utf-8") return Literal(hashlib.sha256(s).hexdigest()) -def Builtin_SHA384(expr, ctx): +def Builtin_SHA384(expr: Expr, ctx) -> Literal: s = string(expr.arg).encode("utf-8") return Literal(hashlib.sha384(s).hexdigest()) -def Builtin_SHA512(expr, ctx): +def Builtin_SHA512(expr: Expr, ctx) -> Literal: s = string(expr.arg).encode("utf-8") return Literal(hashlib.sha512(s).hexdigest()) -def Builtin_COALESCE(expr, ctx): +def Builtin_COALESCE(expr: Expr, ctx): """ http://www.w3.org/TR/sparql11-query/#func-coalesce """ @@ -160,7 +177,7 @@ def Builtin_COALESCE(expr, ctx): raise SPARQLError("COALESCE got no arguments that did not evaluate to an error") -def Builtin_CEIL(expr, ctx): +def Builtin_CEIL(expr: Expr, ctx) -> Literal: """ http://www.w3.org/TR/sparql11-query/#func-ceil """ @@ -169,7 +186,7 @@ def Builtin_CEIL(expr, ctx): return Literal(int(math.ceil(numeric(l_))), datatype=l_.datatype) -def Builtin_FLOOR(expr, ctx): +def Builtin_FLOOR(expr: Expr, ctx) -> Literal: """ http://www.w3.org/TR/sparql11-query/#func-floor """ @@ -177,7 +194,7 @@ def Builtin_FLOOR(expr, ctx): return Literal(int(math.floor(numeric(l_))), datatype=l_.datatype) -def Builtin_ROUND(expr, ctx): +def Builtin_ROUND(expr: Expr, ctx) -> Literal: """ http://www.w3.org/TR/sparql11-query/#func-round """ @@ -188,11 +205,11 @@ def Builtin_ROUND(expr, ctx): # this is an ugly work-around l_ = expr.arg v = numeric(l_) - v = int(Decimal(v).quantize(1, ROUND_HALF_UP)) + v = int(Decimal(v).quantize(1, ROUND_HALF_UP if v > 0 else ROUND_HALF_DOWN)) return Literal(v, datatype=l_.datatype) -def Builtin_REGEX(expr, ctx): +def Builtin_REGEX(expr: Expr, ctx) -> Literal: """ http://www.w3.org/TR/sparql11-query/#func-regex Invokes the XPath fn:matches function to match text against a regular @@ -215,7 +232,7 @@ def Builtin_REGEX(expr, ctx): return Literal(bool(re.search(str(pattern), text, cFlag))) -def Builtin_REPLACE(expr, ctx): +def Builtin_REPLACE(expr: Expr, ctx) -> Literal: """ http://www.w3.org/TR/sparql11-query/#func-substr """ @@ -225,29 +242,8 @@ def Builtin_REPLACE(expr, ctx): flags = expr.flags # python uses \1, xpath/sparql uses $1 - replacement = re.sub("\\$([0-9]*)", r"\\\1", replacement) - - def _r(m): - - # Now this is ugly. - # Python has a "feature" where unmatched groups return None - # then re.sub chokes on this. - # see http://bugs.python.org/issue1519638 , fixed and errs in py3.5 - - # this works around and hooks into the internal of the re module... - - # the match object is replaced with a wrapper that - # returns "" instead of None for unmatched groups - - class _m: - def __init__(self, m): - self.m = m - self.string = m.string - - def group(self, n): - return m.group(n) or "" - - return re._expand(pattern, _m(m), replacement) + # type error: Incompatible types in assignment (expression has type "str", variable has type "Literal") + replacement = re.sub("\\$([0-9]*)", r"\\\1", replacement) # type: ignore[assignment] cFlag = 0 if flags: @@ -258,18 +254,14 @@ def group(self, n): # @@FIXME@@ either datatype OR lang, NOT both - # this is necessary due to different treatment of unmatched groups in - # python versions. see comments above in _r(m). - compat_r = str(replacement) if sys.version_info[:2] >= (3, 5) else _r - return Literal( - re.sub(str(pattern), compat_r, text, cFlag), + re.sub(str(pattern), replacement, text, cFlag), datatype=text.datatype, lang=text.language, ) -def Builtin_STRDT(expr, ctx): +def Builtin_STRDT(expr: Expr, ctx) -> Literal: """ http://www.w3.org/TR/sparql11-query/#func-strdt """ @@ -277,7 +269,7 @@ def Builtin_STRDT(expr, ctx): return Literal(str(expr.arg1), datatype=expr.arg2) -def Builtin_STRLANG(expr, ctx): +def Builtin_STRLANG(expr: Expr, ctx) -> Literal: """ http://www.w3.org/TR/sparql11-query/#func-strlang """ @@ -291,7 +283,7 @@ def Builtin_STRLANG(expr, ctx): return Literal(str(s), lang=str(expr.arg2).lower()) -def Builtin_CONCAT(expr, ctx): +def Builtin_CONCAT(expr: Expr, ctx) -> Literal: """ http://www.w3.org/TR/sparql11-query/#func-concat """ @@ -299,15 +291,20 @@ def Builtin_CONCAT(expr, ctx): # dt/lang passed on only if they all match dt = set(x.datatype for x in expr.arg if isinstance(x, Literal)) - dt = dt.pop() if len(dt) == 1 else None + # type error: Incompatible types in assignment (expression has type "Optional[str]", variable has type "Set[Optional[str]]") + dt = dt.pop() if len(dt) == 1 else None # type: ignore[assignment] lang = set(x.language for x in expr.arg if isinstance(x, Literal)) - lang = lang.pop() if len(lang) == 1 else None + # type error: error: Incompatible types in assignment (expression has type "Optional[str]", variable has type "Set[Optional[str]]") + lang = lang.pop() if len(lang) == 1 else None # type: ignore[assignment] - return Literal("".join(string(x) for x in expr.arg), datatype=dt, lang=lang) + # NOTE on type errors: this is because same variable is used for two incompatibel types + # type error: Argument "datatype" to "Literal" has incompatible type "Set[Any]"; expected "Optional[str]" [arg-type] + # type error: Argument "lang" to "Literal" has incompatible type "Set[Any]"; expected "Optional[str]" + return Literal("".join(string(x) for x in expr.arg), datatype=dt, lang=lang) # type: ignore[arg-type] -def _compatibleStrings(a, b): +def _compatibleStrings(a: Literal, b: Literal) -> None: string(a) string(b) @@ -315,7 +312,7 @@ def _compatibleStrings(a, b): raise SPARQLError("incompatible arguments to str functions") -def Builtin_STRSTARTS(expr, ctx): +def Builtin_STRSTARTS(expr: Expr, ctx) -> Literal: """ http://www.w3.org/TR/sparql11-query/#func-strstarts """ @@ -327,7 +324,7 @@ def Builtin_STRSTARTS(expr, ctx): return Literal(a.startswith(b)) -def Builtin_STRENDS(expr, ctx): +def Builtin_STRENDS(expr: Expr, ctx) -> Literal: """ http://www.w3.org/TR/sparql11-query/#func-strends """ @@ -339,7 +336,7 @@ def Builtin_STRENDS(expr, ctx): return Literal(a.endswith(b)) -def Builtin_STRBEFORE(expr, ctx): +def Builtin_STRBEFORE(expr: Expr, ctx) -> Literal: """ http://www.w3.org/TR/sparql11-query/#func-strbefore """ @@ -355,7 +352,7 @@ def Builtin_STRBEFORE(expr, ctx): return Literal(a[:i], lang=a.language, datatype=a.datatype) -def Builtin_STRAFTER(expr, ctx): +def Builtin_STRAFTER(expr: Expr, ctx) -> Literal: """ http://www.w3.org/TR/sparql11-query/#func-strafter """ @@ -371,7 +368,7 @@ def Builtin_STRAFTER(expr, ctx): return Literal(a[i + len(b) :], lang=a.language, datatype=a.datatype) -def Builtin_CONTAINS(expr, ctx): +def Builtin_CONTAINS(expr: Expr, ctx) -> Literal: """ http://www.w3.org/TR/sparql11-query/#func-strcontains """ @@ -383,11 +380,11 @@ def Builtin_CONTAINS(expr, ctx): return Literal(b in a) -def Builtin_ENCODE_FOR_URI(expr, ctx): - return Literal(quote(string(expr.arg).encode("utf-8"))) +def Builtin_ENCODE_FOR_URI(expr: Expr, ctx) -> Literal: + return Literal(quote(string(expr.arg).encode("utf-8"), safe="")) -def Builtin_SUBSTR(expr, ctx): +def Builtin_SUBSTR(expr: Expr, ctx) -> Literal: """ http://www.w3.org/TR/sparql11-query/#func-substr """ @@ -403,26 +400,26 @@ def Builtin_SUBSTR(expr, ctx): return Literal(a[start:length], lang=a.language, datatype=a.datatype) -def Builtin_STRLEN(e, ctx): +def Builtin_STRLEN(e: Expr, ctx) -> Literal: l_ = string(e.arg) return Literal(len(l_)) -def Builtin_STR(e, ctx): +def Builtin_STR(e: Expr, ctx) -> Literal: arg = e.arg if isinstance(arg, SPARQLError): raise arg return Literal(str(arg)) # plain literal -def Builtin_LCASE(e, ctx): +def Builtin_LCASE(e: Expr, ctx) -> Literal: l_ = string(e.arg) return Literal(l_.lower(), datatype=l_.datatype, lang=l_.language) -def Builtin_LANGMATCHES(e, ctx): +def Builtin_LANGMATCHES(e: Expr, ctx) -> Literal: """ http://www.w3.org/TR/sparql11-query/#func-langMatches @@ -437,47 +434,50 @@ def Builtin_LANGMATCHES(e, ctx): return Literal(_lang_range_check(langRange, langTag)) -def Builtin_NOW(e, ctx): +def Builtin_NOW(e: Expr, ctx) -> Literal: """ http://www.w3.org/TR/sparql11-query/#func-now """ return Literal(ctx.now) -def Builtin_YEAR(e, ctx): +def Builtin_YEAR(e: Expr, ctx) -> Literal: d = date(e.arg) return Literal(d.year) -def Builtin_MONTH(e, ctx): +def Builtin_MONTH(e: Expr, ctx) -> Literal: d = date(e.arg) return Literal(d.month) -def Builtin_DAY(e, ctx): +def Builtin_DAY(e: Expr, ctx) -> Literal: d = date(e.arg) return Literal(d.day) -def Builtin_HOURS(e, ctx): +def Builtin_HOURS(e: Expr, ctx) -> Literal: d = datetime(e.arg) return Literal(d.hour) -def Builtin_MINUTES(e, ctx): +def Builtin_MINUTES(e: Expr, ctx) -> Literal: d = datetime(e.arg) return Literal(d.minute) -def Builtin_SECONDS(e, ctx): +def Builtin_SECONDS(e: Expr, ctx) -> Literal: """ http://www.w3.org/TR/sparql11-query/#func-seconds """ d = datetime(e.arg) - return Literal(d.second, datatype=XSD.decimal) + result_value = Decimal(d.second) + if d.microsecond: + result_value += Decimal(d.microsecond) / Decimal(1000000) + return Literal(result_value, datatype=XSD.decimal) -def Builtin_TIMEZONE(e, ctx): +def Builtin_TIMEZONE(e: Expr, ctx) -> Literal: """ http://www.w3.org/TR/sparql11-query/#func-timezone @@ -490,8 +490,10 @@ def Builtin_TIMEZONE(e, ctx): delta = dt.utcoffset() - d = delta.days - s = delta.seconds + # type error: Item "None" of "Optional[timedelta]" has no attribute "days" + d = delta.days # type: ignore[union-attr] + # type error: Item "None" of "Optional[timedelta]" has no attribute "seconds" + s = delta.seconds # type: ignore[union-attr] neg = "" if d < 0: @@ -514,7 +516,7 @@ def Builtin_TIMEZONE(e, ctx): return Literal(tzdelta, datatype=XSD.dayTimeDuration) -def Builtin_TZ(e, ctx): +def Builtin_TZ(e: Expr, ctx) -> Literal: d = datetime(e.arg) if not d.tzinfo: return Literal("") @@ -524,13 +526,13 @@ def Builtin_TZ(e, ctx): return Literal(n) -def Builtin_UCASE(e, ctx): +def Builtin_UCASE(e: Expr, ctx) -> Literal: l_ = string(e.arg) return Literal(l_.upper(), datatype=l_.datatype, lang=l_.language) -def Builtin_LANG(e, ctx): +def Builtin_LANG(e: Expr, ctx) -> Literal: """ http://www.w3.org/TR/sparql11-query/#func-lang @@ -543,7 +545,7 @@ def Builtin_LANG(e, ctx): return Literal(l_.language or "") -def Builtin_DATATYPE(e, ctx): +def Builtin_DATATYPE(e: Expr, ctx) -> Optional[str]: l_ = e.arg if not isinstance(l_, Literal): raise SPARQLError("Can only get datatype of literal: %r" % l_) @@ -554,13 +556,13 @@ def Builtin_DATATYPE(e, ctx): return l_.datatype -def Builtin_sameTerm(e, ctx): +def Builtin_sameTerm(e: Expr, ctx) -> Literal: a = e.arg1 b = e.arg2 return Literal(a == b) -def Builtin_BOUND(e, ctx): +def Builtin_BOUND(e: Expr, ctx) -> Literal: """ http://www.w3.org/TR/sparql11-query/#func-bound """ @@ -569,22 +571,28 @@ def Builtin_BOUND(e, ctx): return Literal(not isinstance(n, Variable)) -def Builtin_EXISTS(e, ctx): +def Builtin_EXISTS(e: Expr, ctx: FrozenBindings) -> Literal: # damn... from rdflib.plugins.sparql.evaluate import evalPart exists = e.name == "Builtin_EXISTS" - ctx = ctx.ctx.thaw(ctx) # hmm - for x in evalPart(ctx, e.graph): + # type error: Incompatible types in assignment (expression has type "QueryContext", variable has type "FrozenBindings") + ctx = ctx.ctx.thaw(ctx) # type: ignore[assignment] # hmm + # type error: Argument 1 to "evalPart" has incompatible type "FrozenBindings"; expected "QueryContext" + for x in evalPart(ctx, e.graph): # type: ignore[arg-type] return Literal(exists) return Literal(not exists) -_CUSTOM_FUNCTIONS = {} +_CustomFunction = Callable[[Expr, FrozenBindings], Node] + +_CUSTOM_FUNCTIONS: Dict[URIRef, Tuple[_CustomFunction, bool]] = {} -def register_custom_function(uri, func, override=False, raw=False): +def register_custom_function( + uri: URIRef, func: _CustomFunction, override: bool = False, raw: bool = False +) -> None: """ Register a custom SPARQL function. @@ -598,19 +606,23 @@ def register_custom_function(uri, func, override=False, raw=False): _CUSTOM_FUNCTIONS[uri] = (func, raw) -def custom_function(uri, override=False, raw=False): +def custom_function( + uri: URIRef, override: bool = False, raw: bool = False +) -> Callable[[_CustomFunction], _CustomFunction]: """ Decorator version of :func:`register_custom_function`. """ - def decorator(func): + def decorator(func: _CustomFunction) -> _CustomFunction: register_custom_function(uri, func, override=override, raw=raw) return func return decorator -def unregister_custom_function(uri, func=None): +def unregister_custom_function( + uri: URIRef, func: Optional[Callable[..., Any]] = None +) -> None: """ The 'func' argument is included for compatibility with existing code. A previous implementation checked that the function associated with @@ -623,7 +635,7 @@ def unregister_custom_function(uri, func=None): warnings.warn("This function is not registered as %s" % uri.n3()) -def Function(e, ctx): +def Function(e: Expr, ctx: FrozenBindings) -> Node: """ Custom functions and casts """ @@ -651,7 +663,7 @@ def Function(e, ctx): @custom_function(XSD.decimal, raw=True) @custom_function(XSD.integer, raw=True) @custom_function(XSD.boolean, raw=True) -def default_cast(e, ctx): +def default_cast(e: Expr, ctx: FrozenBindings) -> Literal: # type: ignore[return] if not e.expr: raise SPARQLError("Nothing given to cast.") if len(e.expr) > 1: @@ -660,7 +672,6 @@ def default_cast(e, ctx): x = e.expr[0] if e.iri == XSD.string: - if isinstance(x, (URIRef, Literal)): return Literal(x, datatype=XSD.string) else: @@ -715,20 +726,21 @@ def default_cast(e, ctx): raise SPARQLError("Cannot interpret '%r' as bool" % x) -def UnaryNot(expr, ctx): +def UnaryNot(expr: Expr, ctx: FrozenBindings) -> Literal: return Literal(not EBV(expr.expr)) -def UnaryMinus(expr, ctx): +def UnaryMinus(expr: Expr, ctx: FrozenBindings) -> Literal: return Literal(-numeric(expr.expr)) -def UnaryPlus(expr, ctx): +def UnaryPlus(expr: Expr, ctx: FrozenBindings) -> Literal: return Literal(+numeric(expr.expr)) -def MultiplicativeExpression(e, ctx): - +def MultiplicativeExpression( + e: Expr, ctx: Union[QueryContext, FrozenBindings] +) -> Literal: expr = e.expr other = e.other @@ -737,6 +749,7 @@ def MultiplicativeExpression(e, ctx): if other is None: return expr try: + res: Union[Decimal, float] res = Decimal(numeric(expr)) for op, f in zip(e.op, other): f = numeric(f) @@ -754,8 +767,8 @@ def MultiplicativeExpression(e, ctx): return Literal(res) -def AdditiveExpression(e, ctx): - +# type error: Missing return statement +def AdditiveExpression(e: Expr, ctx: Union[QueryContext, FrozenBindings]) -> Literal: # type: ignore[return] expr = e.expr other = e.other @@ -769,12 +782,10 @@ def AdditiveExpression(e, ctx): if hasattr(expr, "datatype") and ( expr.datatype in XSD_DateTime_DTs or expr.datatype in XSD_Duration_DTs ): - res = dateTimeObjects(expr) dt = expr.datatype for op, term in zip(e.op, other): - # check if operation is datetime,date,time operation over # another datetime,date,time datatype if dt in XSD_DateTime_DTs and dt == term.datatype and op == "-": @@ -783,7 +794,8 @@ def AdditiveExpression(e, ctx): # ( dateTime1 - dateTime2 - dateTime3 ) is an invalid operation if len(other) > 1: error_message = "Can't evaluate multiple %r arguments" - raise SPARQLError(error_message, dt.datatype) + # type error: Too many arguments for "SPARQLError" + raise SPARQLError(error_message, dt.datatype) # type: ignore[call-arg] else: n = dateTimeObjects(term) res = calculateDuration(res, n) @@ -829,8 +841,7 @@ def AdditiveExpression(e, ctx): return Literal(res, datatype=dt) -def RelationalExpression(e, ctx): - +def RelationalExpression(e: Expr, ctx: Union[QueryContext, FrozenBindings]) -> Literal: expr = e.expr other = e.other op = e.op @@ -854,10 +865,9 @@ def RelationalExpression(e, ctx): ) if op in ("IN", "NOT IN"): - res = op == "NOT IN" - error = False + error: Union[bool, SPARQLError] = False if other == RDF.nil: other = [] @@ -871,7 +881,9 @@ def RelationalExpression(e, ctx): if not error: return Literal(False ^ res) else: - raise error + # Note on type error: this is because variable is Union[bool, SPARQLError] + # type error: Exception must be derived from BaseException + raise error # type: ignore[misc] if op not in ("=", "!=", "IN", "NOT IN"): if not isinstance(expr, Literal): @@ -889,7 +901,6 @@ def RelationalExpression(e, ctx): raise SPARQLError("I cannot compare this non-node: %r" % other) if isinstance(expr, Literal) and isinstance(other, Literal): - if ( expr.datatype is not None and expr.datatype not in XSD_DTs @@ -909,8 +920,9 @@ def RelationalExpression(e, ctx): return Literal(r) -def ConditionalAndExpression(e, ctx): - +def ConditionalAndExpression( + e: Expr, ctx: Union[QueryContext, FrozenBindings] +) -> Literal: # TODO: handle returned errors expr = e.expr @@ -924,8 +936,9 @@ def ConditionalAndExpression(e, ctx): return Literal(all(EBV(x) for x in [expr] + other)) -def ConditionalOrExpression(e, ctx): - +def ConditionalOrExpression( + e: Expr, ctx: Union[QueryContext, FrozenBindings] +) -> Literal: # TODO: handle errors expr = e.expr @@ -950,11 +963,11 @@ def ConditionalOrExpression(e, ctx): return Literal(False) -def not_(arg): +def not_(arg) -> Expr: return Expr("UnaryNot", UnaryNot, expr=arg) -def and_(*args): +def and_(*args: Expr) -> Expr: if len(args) == 1: return args[0] @@ -969,13 +982,14 @@ def and_(*args): TrueFilter = Expr("TrueFilter", lambda _1, _2: Literal(True)) -def simplify(expr): +def simplify(expr: Any) -> Any: if isinstance(expr, ParseResults) and len(expr) == 1: return simplify(expr[0]) if isinstance(expr, (list, ParseResults)): return list(map(simplify, expr)) - if not isinstance(expr, CompValue): + # type error: Statement is unreachable + if not isinstance(expr, CompValue): # type: ignore[unreachable] return expr if expr.name.endswith("Expression"): if expr.other is None: @@ -989,13 +1003,13 @@ def simplify(expr): return expr -def literal(s): +def literal(s: Literal) -> Literal: if not isinstance(s, Literal): raise SPARQLError("Non-literal passed as string: %r" % s) return s -def datetime(e): +def datetime(e: Literal) -> py_datetime.datetime: if not isinstance(e, Literal): raise SPARQLError("Non-literal passed as datetime: %r" % e) if not e.datatype == XSD.dateTime: @@ -1003,7 +1017,7 @@ def datetime(e): return e.toPython() -def date(e) -> py_datetime.date: +def date(e: Literal) -> py_datetime.date: if not isinstance(e, Literal): raise SPARQLError("Non-literal passed as date: %r" % e) if e.datatype not in (XSD.date, XSD.dateTime): @@ -1014,7 +1028,7 @@ def date(e) -> py_datetime.date: return result -def string(s): +def string(s: Literal) -> Literal: """ Make sure the passed thing is a string literal i.e. plain literal, xsd:string literal or lang-tagged literal @@ -1026,7 +1040,7 @@ def string(s): return s -def numeric(expr): +def numeric(expr: Literal) -> Any: """ return a number from a literal http://www.w3.org/TR/xpath20/#promotion @@ -1060,7 +1074,7 @@ def numeric(expr): return expr.toPython() -def dateTimeObjects(expr): +def dateTimeObjects(expr: Literal) -> Any: """ return a dataTime/date/time/duration/dayTimeDuration/yearMonthDuration python objects from a literal @@ -1068,7 +1082,13 @@ def dateTimeObjects(expr): return expr.toPython() -def isCompatibleDateTimeDatatype(obj1, dt1, obj2, dt2): +# type error: Missing return statement +def isCompatibleDateTimeDatatype( # type: ignore[return] + obj1: Union[py_datetime.date, py_datetime.datetime], + dt1: URIRef, + obj2: Union[isodate.Duration, py_datetime.timedelta], + dt2: URIRef, +) -> bool: """ Returns a boolean indicating if first object is compatible with operation(+/-) over second object. @@ -1102,18 +1122,28 @@ def isCompatibleDateTimeDatatype(obj1, dt1, obj2, dt2): return True -def calculateDuration(obj1, obj2): +def calculateDuration( + obj1: Union[py_datetime.date, py_datetime.datetime], + obj2: Union[py_datetime.date, py_datetime.datetime], +) -> Literal: """ returns the duration Literal between two datetime """ date1 = obj1 date2 = obj2 - difference = date1 - date2 + # type error: No overload variant of "__sub__" of "datetime" matches argument type "date" + difference = date1 - date2 # type: ignore[operator] return Literal(difference, datatype=XSD.duration) -def calculateFinalDateTime(obj1, dt1, obj2, dt2, operation): +def calculateFinalDateTime( + obj1: Union[py_datetime.date, py_datetime.datetime], + dt1: URIRef, + obj2: Union[isodate.Duration, py_datetime.timedelta], + dt2: URIRef, + operation: str, +) -> Literal: """ Calculates the final dateTime/date/time resultant after addition/ subtraction of duration/dayTimeDuration/yearMonthDuration @@ -1133,7 +1163,22 @@ def calculateFinalDateTime(obj1, dt1, obj2, dt2, operation): raise SPARQLError("Incompatible Data types to DateTime Operations") -def EBV(rt): +@overload +def EBV(rt: Literal) -> bool: + ... + + +@overload +def EBV(rt: Union[Variable, IdentifiedNode, SPARQLError, Expr]) -> NoReturn: + ... + + +@overload +def EBV(rt: Union[Identifier, SPARQLError, Expr]) -> Union[bool, NoReturn]: + ... + + +def EBV(rt: Union[Identifier, SPARQLError, Expr]) -> bool: """ Effective Boolean Value (EBV) @@ -1150,7 +1195,6 @@ def EBV(rt): """ if isinstance(rt, Literal): - if rt.datatype == XSD.boolean: return rt.toPython() @@ -1178,7 +1222,7 @@ def EBV(rt): ) -def _lang_range_check(range, lang): +def _lang_range_check(range: Literal, lang: Literal) -> bool: """ Implementation of the extended filtering algorithm, as defined in point 3.3.2, of U{RFC 4647}, on @@ -1196,7 +1240,7 @@ def _lang_range_check(range, lang): """ - def _match(r, l_): + def _match(r: str, l_: str) -> bool: """ Matching of a range and language item: either range is a wildcard or the two are equal diff --git a/dependencies/rdflib/plugins/sparql/parser.py b/dependencies/rdflib/plugins/sparql/parser.py index 2035b4f08..455377ed1 100644 --- a/dependencies/rdflib/plugins/sparql/parser.py +++ b/dependencies/rdflib/plugins/sparql/parser.py @@ -3,9 +3,13 @@ based on pyparsing """ +from __future__ import annotations import re import sys +from typing import Any, BinaryIO, List +from typing import Optional as OptionalType +from typing import TextIO, Tuple, Union from pyparsing import CaselessKeyword as Keyword # watch out :) from pyparsing import ( @@ -27,7 +31,7 @@ from rdflib.compat import decodeUnicodeEscape from . import operators as op -from .parserutils import Comp, Param, ParamList +from .parserutils import Comp, CompValue, Param, ParamList # from pyparsing import Keyword as CaseSensitiveKeyword @@ -37,25 +41,25 @@ # ---------------- ACTIONS -def neg(literal): +def neg(literal: rdflib.Literal) -> rdflib.Literal: return rdflib.Literal(-literal, datatype=literal.datatype) -def setLanguage(terms): +def setLanguage(terms: Tuple[Any, OptionalType[str]]) -> rdflib.Literal: return rdflib.Literal(terms[0], lang=terms[1]) -def setDataType(terms): +def setDataType(terms: Tuple[Any, OptionalType[str]]) -> rdflib.Literal: return rdflib.Literal(terms[0], datatype=terms[1]) -def expandTriples(terms): +def expandTriples(terms: ParseResults) -> List[Any]: """ Expand ; and , syntax for repeat predicates, subjects """ # import pdb; pdb.set_trace() try: - res = [] + res: List[Any] = [] if DEBUG: print("Terms", terms) l_ = len(terms) @@ -102,7 +106,7 @@ def expandTriples(terms): raise -def expandBNodeTriples(terms): +def expandBNodeTriples(terms: ParseResults) -> List[Any]: """ expand [ ?p ?o ] syntax for implicit bnodes """ @@ -119,14 +123,14 @@ def expandBNodeTriples(terms): raise -def expandCollection(terms): +def expandCollection(terms: ParseResults) -> List[List[Any]]: """ expand ( 1 2 3 ) notation for collections """ if DEBUG: print("Collection: ", terms) - res = [] + res: List[Any] = [] other = [] for x in terms: if isinstance(x, list): # is this a [ .. ] ? @@ -1479,7 +1483,7 @@ def expandCollection(terms): "DescribeQuery", Keyword("DESCRIBE") + (OneOrMore(ParamList("var", VarOrIri)) | "*") - + Param("datasetClause", ZeroOrMore(DatasetClause)) + + ZeroOrMore(ParamList("datasetClause", DatasetClause)) + Optional(WhereClause) + SolutionModifier + ValuesClause, @@ -1508,25 +1512,27 @@ def expandCollection(terms): UpdateUnit.ignore("#" + restOfLine) -expandUnicodeEscapes_re = re.compile(r"\\u([0-9a-f]{4}(?:[0-9a-f]{4})?)", flags=re.I) +expandUnicodeEscapes_re: re.Pattern = re.compile( + r"\\u([0-9a-f]{4}(?:[0-9a-f]{4})?)", flags=re.I +) -def expandUnicodeEscapes(q): +def expandUnicodeEscapes(q: str) -> str: r""" The syntax of the SPARQL Query Language is expressed over code points in Unicode [UNICODE]. The encoding is always UTF-8 [RFC3629]. Unicode code points may also be expressed using an \ uXXXX (U+0 to U+FFFF) or \ UXXXXXXXX syntax (for U+10000 onwards) where X is a hexadecimal digit [0-9A-F] """ - def expand(m): + def expand(m: re.Match) -> str: try: return chr(int(m.group(1), 16)) - except: # noqa: E722 - raise Exception("Invalid unicode code point: " + m) + except (ValueError, OverflowError) as e: + raise ValueError("Invalid unicode code point: " + m.group(1)) from e return expandUnicodeEscapes_re.sub(expand, q) -def parseQuery(q): +def parseQuery(q: Union[str, bytes, TextIO, BinaryIO]) -> ParseResults: if hasattr(q, "read"): q = q.read() if isinstance(q, bytes): @@ -1536,7 +1542,7 @@ def parseQuery(q): return Query.parseString(q, parseAll=True) -def parseUpdate(q): +def parseUpdate(q: Union[str, bytes, TextIO, BinaryIO]) -> CompValue: if hasattr(q, "read"): q = q.read() diff --git a/dependencies/rdflib/plugins/sparql/parserutils.py b/dependencies/rdflib/plugins/sparql/parserutils.py index a936b0467..2c5bc38bd 100644 --- a/dependencies/rdflib/plugins/sparql/parserutils.py +++ b/dependencies/rdflib/plugins/sparql/parserutils.py @@ -1,10 +1,22 @@ +from __future__ import annotations + from collections import OrderedDict from types import MethodType -from typing import TYPE_CHECKING, Any - -from pyparsing import ParseResults, TokenConverter, originalTextFor - -from rdflib import BNode, Variable +from typing import ( + TYPE_CHECKING, + Any, + Callable, + List, + Mapping, + Optional, + Tuple, + TypeVar, + Union, +) + +from pyparsing import ParserElement, ParseResults, TokenConverter, originalTextFor + +from rdflib.term import BNode, Identifier, Variable if TYPE_CHECKING: from rdflib.plugins.sparql.sparql import FrozenBindings @@ -47,7 +59,7 @@ def value( val: Any, variables: bool = False, errors: bool = False, -): +) -> Any: """ utility function for evaluating something... @@ -87,14 +99,16 @@ def value( return val -class ParamValue(object): +class ParamValue: """ The result of parsing a Param This just keeps the name/value All cleverness is in the CompValue """ - def __init__(self, name, tokenList, isList): + def __init__( + self, name: str, tokenList: Union[List[Any], ParseResults], isList: bool + ): self.isList = isList self.name = name if isinstance(tokenList, (list, ParseResults)) and len(tokenList) == 1: @@ -102,7 +116,7 @@ def __init__(self, name, tokenList, isList): self.tokenList = tokenList - def __str__(self): + def __str__(self) -> str: return "Param(%s, %s)" % (self.name, self.tokenList) @@ -113,13 +127,13 @@ class Param(TokenConverter): their values merged in a list """ - def __init__(self, name, expr, isList=False): + def __init__(self, name: str, expr, isList: bool = False): self.isList = isList TokenConverter.__init__(self, expr) self.setName(name) self.addParseAction(self.postParse2) - def postParse2(self, tokenList): + def postParse2(self, tokenList: Union[List[Any], ParseResults]) -> ParamValue: return ParamValue(self.name, tokenList, self.isList) @@ -128,14 +142,11 @@ class ParamList(Param): A shortcut for a Param with isList=True """ - def __init__(self, name, expr): + def __init__(self, name: str, expr): Param.__init__(self, name, expr, True) -class plist(list): - """this is just a list, but we want our own type to check for""" - - pass +_ValT = TypeVar("_ValT") class CompValue(OrderedDict): @@ -152,16 +163,18 @@ def __init__(self, name: str, **values): self.name = name self.update(values) - def clone(self): + def clone(self) -> CompValue: return CompValue(self.name, **self) - def __str__(self): + def __str__(self) -> str: return self.name + "_" + OrderedDict.__str__(self) - def __repr__(self): + def __repr__(self) -> str: return self.name + "_" + dict.__repr__(self) - def _value(self, val, variables=False, errors=False): + def _value( + self, val: _ValT, variables: bool = False, errors: bool = False + ) -> Union[_ValT, Any]: if self.ctx is not None: return value(self.ctx, val, variables) else: @@ -170,7 +183,9 @@ def _value(self, val, variables=False, errors=False): def __getitem__(self, a): return self._value(OrderedDict.__getitem__(self, a)) - def get(self, a, variables=False, errors=False): + # type error: Signature of "get" incompatible with supertype "dict" + # type error: Signature of "get" incompatible with supertype "Mapping" [override] + def get(self, a, variables: bool = False, errors: bool = False): # type: ignore[override] return self._value(OrderedDict.get(self, a, a), variables, errors) def __getattr__(self, a: str) -> Any: @@ -194,17 +209,23 @@ class Expr(CompValue): A CompValue that is evaluatable """ - def __init__(self, name, evalfn=None, **values): + def __init__( + self, + name: str, + evalfn: Optional[Callable[[Any, Any], Any]] = None, + **values, + ): super(Expr, self).__init__(name, **values) self._evalfn = None if evalfn: self._evalfn = MethodType(evalfn, self) - def eval(self, ctx={}): + def eval(self, ctx: Any = {}) -> Union[SPARQLError, Any]: try: - self.ctx = ctx - return self._evalfn(ctx) + self.ctx: Optional[Union[Mapping, FrozenBindings]] = ctx + # type error: "None" not callable + return self._evalfn(ctx) # type: ignore[misc] except SPARQLError as e: return e finally: @@ -220,13 +241,16 @@ class Comp(TokenConverter): Returns CompValue / Expr objects - depending on whether evalFn is set. """ - def __init__(self, name, expr): + def __init__(self, name: str, expr: ParserElement): self.expr = expr TokenConverter.__init__(self, expr) self.setName(name) - self.evalfn = None + self.evalfn: Optional[Callable[[Any, Any], Any]] = None - def postParse(self, instring, loc, tokenList): + def postParse( + self, instring: str, loc: int, tokenList: ParseResults + ) -> Union[Expr, CompValue]: + res: Union[Expr, CompValue] if self.evalfn: res = Expr(self.name) res._evalfn = MethodType(self.evalfn, res) @@ -244,7 +268,7 @@ def postParse(self, instring, loc, tokenList): if isinstance(t, ParamValue): if t.isList: if t.name not in res: - res[t.name] = plist() + res[t.name] = [] res[t.name].append(t.tokenList) else: res[t.name] = t.tokenList @@ -253,31 +277,39 @@ def postParse(self, instring, loc, tokenList): # res.update(t) return res - def setEvalFn(self, evalfn): + def setEvalFn(self, evalfn: Callable[[Any, Any], Any]) -> Comp: self.evalfn = evalfn return self -def prettify_parsetree(t, indent="", depth=0): - out = [] - if isinstance(t, ParseResults): - for e in t.asList(): - out.append(prettify_parsetree(e, indent, depth + 1)) - for k, v in sorted(t.items()): - out.append("%s%s- %s:\n" % (indent, " " * depth, k)) - out.append(prettify_parsetree(v, indent, depth + 1)) - elif isinstance(t, CompValue): +def prettify_parsetree(t: ParseResults, indent: str = "", depth: int = 0) -> str: + out: List[str] = [] + for e in t.asList(): + out.append(_prettify_sub_parsetree(e, indent, depth + 1)) + for k, v in sorted(t.items()): + out.append("%s%s- %s:\n" % (indent, " " * depth, k)) + out.append(_prettify_sub_parsetree(v, indent, depth + 1)) + return "".join(out) + + +def _prettify_sub_parsetree( + t: Union[Identifier, CompValue, set, list, dict, Tuple, bool, None], + indent: str = "", + depth: int = 0, +) -> str: + out: List[str] = [] + if isinstance(t, CompValue): out.append("%s%s> %s:\n" % (indent, " " * depth, t.name)) for k, v in t.items(): out.append("%s%s- %s:\n" % (indent, " " * (depth + 1), k)) - out.append(prettify_parsetree(v, indent, depth + 2)) + out.append(_prettify_sub_parsetree(v, indent, depth + 2)) elif isinstance(t, dict): for k, v in t.items(): out.append("%s%s- %s:\n" % (indent, " " * (depth + 1), k)) - out.append(prettify_parsetree(v, indent, depth + 2)) + out.append(_prettify_sub_parsetree(v, indent, depth + 2)) elif isinstance(t, list): for e in t: - out.append(prettify_parsetree(e, indent, depth + 1)) + out.append(_prettify_sub_parsetree(e, indent, depth + 1)) else: out.append("%s%s- %r\n" % (indent, " " * depth, t)) return "".join(out) diff --git a/dependencies/rdflib/plugins/sparql/processor.py b/dependencies/rdflib/plugins/sparql/processor.py index 26a72dd21..f10f372bc 100644 --- a/dependencies/rdflib/plugins/sparql/processor.py +++ b/dependencies/rdflib/plugins/sparql/processor.py @@ -4,35 +4,57 @@ These should be automatically registered with RDFLib """ +from __future__ import annotations +from typing import Any, Mapping, Optional, Union +from rdflib.graph import Graph from rdflib.plugins.sparql.algebra import translateQuery, translateUpdate from rdflib.plugins.sparql.evaluate import evalQuery from rdflib.plugins.sparql.parser import parseQuery, parseUpdate -from rdflib.plugins.sparql.sparql import Query +from rdflib.plugins.sparql.sparql import Query, Update from rdflib.plugins.sparql.update import evalUpdate from rdflib.query import Processor, Result, UpdateProcessor +from rdflib.term import Identifier -def prepareQuery(queryString, initNs={}, base=None) -> Query: +def prepareQuery( + queryString: str, + initNs: Optional[Mapping[str, Any]] = None, + base: Optional[str] = None, +) -> Query: """ Parse and translate a SPARQL Query """ + if initNs is None: + initNs = {} ret = translateQuery(parseQuery(queryString), base, initNs) ret._original_args = (queryString, initNs, base) return ret -def prepareUpdate(updateString, initNs={}, base=None): +def prepareUpdate( + updateString: str, + initNs: Optional[Mapping[str, Any]] = None, + base: Optional[str] = None, +) -> Update: """ Parse and translate a SPARQL Update """ + if initNs is None: + initNs = {} ret = translateUpdate(parseUpdate(updateString), base, initNs) ret._original_args = (updateString, initNs, base) return ret -def processUpdate(graph, updateString, initBindings={}, initNs={}, base=None): +def processUpdate( + graph: Graph, + updateString: str, + initBindings: Optional[Mapping[str, Identifier]] = None, + initNs: Optional[Mapping[str, Any]] = None, + base: Optional[str] = None, +) -> None: """ Process a SPARQL Update Request returns Nothing on success or raises Exceptions on error @@ -43,10 +65,11 @@ def processUpdate(graph, updateString, initBindings={}, initNs={}, base=None): class SPARQLResult(Result): - def __init__(self, res): + def __init__(self, res: Mapping[str, Any]): Result.__init__(self, res["type_"]) self.vars = res.get("vars_") - self.bindings = res.get("bindings") + # type error: Incompatible types in assignment (expression has type "Optional[Any]", variable has type "MutableSequence[Mapping[Variable, Identifier]]") + self.bindings = res.get("bindings") # type: ignore[assignment] self.askAnswer = res.get("askAnswer") self.graph = res.get("graph") @@ -55,7 +78,27 @@ class SPARQLUpdateProcessor(UpdateProcessor): def __init__(self, graph): self.graph = graph - def update(self, strOrQuery, initBindings={}, initNs={}): + def update( + self, + strOrQuery: Union[str, Update], + initBindings: Optional[Mapping[str, Identifier]] = None, + initNs: Optional[Mapping[str, Any]] = None, + ) -> None: + """ + .. caution:: + + This method can access indirectly requested network endpoints, for + example, query processing will attempt to access network endpoints + specified in ``SERVICE`` directives. + + When processing untrusted or potentially malicious queries, measures + should be taken to restrict network and file access. + + For information on available security measures, see the RDFLib + :doc:`Security Considerations ` + documentation. + """ + if isinstance(strOrQuery, str): strOrQuery = translateUpdate(parseUpdate(strOrQuery), initNs=initNs) @@ -66,16 +109,38 @@ class SPARQLProcessor(Processor): def __init__(self, graph): self.graph = graph - def query(self, strOrQuery, initBindings={}, initNs={}, base=None, DEBUG=False): + # NOTE on type error: this is because the super type constructor does not + # accept base argument and thie position of the DEBUG argument is + # different. + # type error: Signature of "query" incompatible with supertype "Processor" + def query( # type: ignore[override] + self, + strOrQuery: Union[str, Query], + initBindings: Optional[Mapping[str, Identifier]] = None, + initNs: Optional[Mapping[str, Any]] = None, + base: Optional[str] = None, + DEBUG: bool = False, + ) -> Mapping[str, Any]: """ Evaluate a query with the given initial bindings, and initial namespaces. The given base is used to resolve relative URIs in the query and will be overridden by any BASE given in the query. + + .. caution:: + + This method can access indirectly requested network endpoints, for + example, query processing will attempt to access network endpoints + specified in ``SERVICE`` directives. + + When processing untrusted or potentially malicious queries, measures + should be taken to restrict network and file access. + + For information on available security measures, see the RDFLib + :doc:`Security Considerations ` + documentation. """ - if not isinstance(strOrQuery, Query): - parsetree = parseQuery(strOrQuery) - query = translateQuery(parsetree, base, initNs) - else: - query = strOrQuery - return evalQuery(self.graph, query, initBindings, base) + if isinstance(strOrQuery, str): + strOrQuery = translateQuery(parseQuery(strOrQuery), base, initNs) + + return evalQuery(self.graph, strOrQuery, initBindings, base) diff --git a/dependencies/rdflib/plugins/sparql/results/csvresults.py b/dependencies/rdflib/plugins/sparql/results/csvresults.py index 16273cbcd..cc99ddf94 100644 --- a/dependencies/rdflib/plugins/sparql/results/csvresults.py +++ b/dependencies/rdflib/plugins/sparql/results/csvresults.py @@ -1,3 +1,5 @@ +from __future__ import annotations + """ This module implements a parser and serializer for the CSV SPARQL result @@ -9,23 +11,26 @@ import codecs import csv -from typing import IO +from typing import IO, Dict, List, Optional, Union -from rdflib import BNode, Literal, URIRef, Variable +from rdflib.plugins.sparql.processor import SPARQLResult from rdflib.query import Result, ResultParser, ResultSerializer +from rdflib.term import BNode, Identifier, Literal, URIRef, Variable class CSVResultParser(ResultParser): def __init__(self): self.delim = "," - def parse(self, source, content_type=None): - + # type error: Signature of "parse" incompatible with supertype "ResultParser" + def parse(self, source: IO, content_type: Optional[str] = None) -> Result: # type: ignore[override] r = Result("SELECT") + # type error: Incompatible types in assignment (expression has type "StreamReader", variable has type "IO[Any]") if isinstance(source.read(0), bytes): # if reading from source returns bytes do utf-8 decoding - source = codecs.getreader("utf-8")(source) + # type error: Incompatible types in assignment (expression has type "StreamReader", variable has type "IO[Any]") + source = codecs.getreader("utf-8")(source) # type: ignore[assignment] reader = csv.reader(source, delimiter=self.delim) r.vars = [Variable(x) for x in next(reader)] @@ -36,14 +41,16 @@ def parse(self, source, content_type=None): return r - def parseRow(self, row, v): + def parseRow( + self, row: List[str], v: List[Variable] + ) -> Dict[Variable, Union[BNode, URIRef, Literal]]: return dict( (var, val) for var, val in zip(v, [self.convertTerm(t) for t in row]) if val is not None ) - def convertTerm(self, t): + def convertTerm(self, t: str) -> Optional[Union[BNode, URIRef, Literal]]: if t == "": return None if t.startswith("_:"): @@ -54,15 +61,14 @@ def convertTerm(self, t): class CSVResultSerializer(ResultSerializer): - def __init__(self, result): + def __init__(self, result: SPARQLResult): ResultSerializer.__init__(self, result) self.delim = "," if result.type != "SELECT": raise Exception("CSVSerializer can only serialize select query results") - def serialize(self, stream: IO, encoding: str = "utf-8", **kwargs): - + def serialize(self, stream: IO, encoding: str = "utf-8", **kwargs) -> None: # the serialiser writes bytes in the given encoding # in py3 csv.writer is unicode aware and writes STRINGS, # so we encode afterwards @@ -80,7 +86,9 @@ def serialize(self, stream: IO, encoding: str = "utf-8", **kwargs): [self.serializeTerm(row.get(v), encoding) for v in self.result.vars] # type: ignore[union-attr] ) - def serializeTerm(self, term, encoding): + def serializeTerm( + self, term: Optional[Identifier], encoding: str + ) -> Union[str, Identifier]: if term is None: return "" elif isinstance(term, BNode): diff --git a/dependencies/rdflib/plugins/sparql/results/graph.py b/dependencies/rdflib/plugins/sparql/results/graph.py index 0b14be27b..bfd03c00c 100644 --- a/dependencies/rdflib/plugins/sparql/results/graph.py +++ b/dependencies/rdflib/plugins/sparql/results/graph.py @@ -1,10 +1,14 @@ -from rdflib import Graph +from __future__ import annotations + +from typing import IO, Optional + +from rdflib.graph import Graph from rdflib.query import Result, ResultParser class GraphResultParser(ResultParser): - def parse(self, source, content_type): - + # type error: Signature of "parse" incompatible with supertype "ResultParser" + def parse(self, source: IO, content_type: Optional[str]) -> Result: # type: ignore[override] res = Result("CONSTRUCT") # hmm - or describe?type_) res.graph = Graph() res.graph.parse(source, format=content_type) diff --git a/dependencies/rdflib/plugins/sparql/results/jsonresults.py b/dependencies/rdflib/plugins/sparql/results/jsonresults.py index 1c9c5a798..ecdb01247 100644 --- a/dependencies/rdflib/plugins/sparql/results/jsonresults.py +++ b/dependencies/rdflib/plugins/sparql/results/jsonresults.py @@ -1,8 +1,10 @@ +from __future__ import annotations + import json -from typing import IO, Any, Dict +from typing import IO, Any, Dict, Mapping, MutableSequence, Optional -from rdflib import BNode, Literal, URIRef, Variable from rdflib.query import Result, ResultException, ResultParser, ResultSerializer +from rdflib.term import BNode, Identifier, Literal, URIRef, Variable """A Serializer for SPARQL results in JSON: @@ -17,7 +19,8 @@ class JSONResultParser(ResultParser): - def parse(self, source, content_type=None): + # type error: Signature of "parse" incompatible with supertype "ResultParser" + def parse(self, source: IO, content_type: Optional[str] = None) -> Result: # type: ignore[override] inp = source.read() if isinstance(inp, bytes): inp = inp.decode("utf-8") @@ -25,11 +28,11 @@ def parse(self, source, content_type=None): class JSONResultSerializer(ResultSerializer): - def __init__(self, result): + def __init__(self, result: Result): ResultSerializer.__init__(self, result) - def serialize(self, stream: IO, encoding: str = None): # type: ignore[override] - + # type error: Signature of "serialize" incompatible with supertype "ResultSerializer" + def serialize(self, stream: IO, encoding: str = None) -> None: # type: ignore[override] res: Dict[str, Any] = {} if self.result.type == "ASK": res["head"] = {} @@ -49,7 +52,7 @@ def serialize(self, stream: IO, encoding: str = None): # type: ignore[override] else: stream.write(r) - def _bindingToJSON(self, b): + def _bindingToJSON(self, b: Mapping[Variable, Identifier]) -> Dict[Variable, Any]: res = {} for var in b: j = termToJSON(self, b[var]) @@ -59,7 +62,7 @@ def _bindingToJSON(self, b): class JSONResult(Result): - def __init__(self, json): + def __init__(self, json: Dict[str, Any]): self.json = json if "boolean" in json: type_ = "ASK" @@ -76,17 +79,17 @@ def __init__(self, json): self.bindings = self._get_bindings() self.vars = [Variable(x) for x in json["head"]["vars"]] - def _get_bindings(self): - ret = [] + def _get_bindings(self) -> MutableSequence[Mapping[Variable, Identifier]]: + ret: MutableSequence[Mapping[Variable, Identifier]] = [] for row in self.json["results"]["bindings"]: - outRow = {} + outRow: Dict[Variable, Identifier] = {} for k, v in row.items(): outRow[Variable(k)] = parseJsonTerm(v) ret.append(outRow) return ret -def parseJsonTerm(d): +def parseJsonTerm(d: Dict[str, str]) -> Identifier: """rdflib object (Literal, URIRef, BNode) for the given json-format dict. input is like: @@ -107,7 +110,9 @@ def parseJsonTerm(d): raise NotImplementedError("json term type %r" % t) -def termToJSON(self, term): +def termToJSON( + self: JSONResultSerializer, term: Optional[Identifier] +) -> Optional[Dict[str, str]]: if isinstance(term, URIRef): return {"type": "uri", "value": str(term)} elif isinstance(term, Literal): diff --git a/dependencies/rdflib/plugins/sparql/results/rdfresults.py b/dependencies/rdflib/plugins/sparql/results/rdfresults.py index 83ee3ea1f..903734f57 100644 --- a/dependencies/rdflib/plugins/sparql/results/rdfresults.py +++ b/dependencies/rdflib/plugins/sparql/results/rdfresults.py @@ -1,17 +1,20 @@ -from rdflib import RDF, Graph, Namespace, Variable +from typing import IO, Any, MutableMapping, Optional, Union + +from rdflib.graph import Graph +from rdflib.namespace import RDF, Namespace from rdflib.query import Result, ResultParser +from rdflib.term import Node, Variable RS = Namespace("http://www.w3.org/2001/sw/DataAccess/tests/result-set#") class RDFResultParser(ResultParser): - def parse(self, source, **kwargs): + def parse(self, source: Union[IO, Graph], **kwargs: Any) -> Result: return RDFResult(source, **kwargs) class RDFResult(Result): - def __init__(self, source, **kwargs): - + def __init__(self, source: Union[IO, Graph], **kwargs: Any): if not isinstance(source, Graph): graph = Graph() graph.parse(source, **kwargs) @@ -29,7 +32,6 @@ def __init__(self, source, **kwargs): g += graph else: - askAnswer = graph.value(rs, RS.boolean) if askAnswer is not None: @@ -40,20 +42,27 @@ def __init__(self, source, **kwargs): Result.__init__(self, type_) if type_ == "SELECT": - self.vars = [Variable(v) for v in graph.objects(rs, RS.resultVariable)] + # type error: Argument 1 to "Variable" has incompatible type "Node"; expected "str" + self.vars = [Variable(v) for v in graph.objects(rs, RS.resultVariable)] # type: ignore[arg-type] self.bindings = [] for s in graph.objects(rs, RS.solution): - sol = {} + sol: MutableMapping[Variable, Optional[Node]] = {} for b in graph.objects(s, RS.binding): - sol[Variable(graph.value(b, RS.variable))] = graph.value( + # type error: Argument 1 to "Variable" has incompatible type "Optional[Node]"; expected "str" + sol[Variable(graph.value(b, RS.variable))] = graph.value( # type: ignore[arg-type] b, RS.value ) - self.bindings.append(sol) + # error: Argument 1 to "append" of "list" has incompatible type "MutableMapping[Variable, Optional[Node]]"; expected "Mapping[Variable, Identifier]" + self.bindings.append(sol) # type: ignore[arg-type] elif type_ == "ASK": - self.askAnswer = askAnswer.value - if askAnswer.value is None: + # type error: Item "Node" of "Optional[Node]" has no attribute "value" + # type error: Item "None" of "Optional[Node]" has no attribute "value" + self.askAnswer = askAnswer.value # type: ignore[union-attr] + # type error: Item "Node" of "Optional[Node]" has no attribute "value" + # type error: Item "None" of "Optional[Node]" has no attribute "value" + if askAnswer.value is None: # type: ignore[union-attr] raise Exception("Malformed boolean in ask answer!") elif type_ == "CONSTRUCT": self.graph = g diff --git a/dependencies/rdflib/plugins/sparql/results/tsvresults.py b/dependencies/rdflib/plugins/sparql/results/tsvresults.py index 42671c710..02274f266 100644 --- a/dependencies/rdflib/plugins/sparql/results/tsvresults.py +++ b/dependencies/rdflib/plugins/sparql/results/tsvresults.py @@ -5,6 +5,8 @@ """ import codecs +import typing +from typing import IO, Union from pyparsing import ( FollowedBy, @@ -16,7 +18,6 @@ ZeroOrMore, ) -from rdflib import Literal as RDFLiteral from rdflib.plugins.sparql.parser import ( BLANK_NODE_LABEL, IRIREF, @@ -29,6 +30,9 @@ ) from rdflib.plugins.sparql.parserutils import Comp, CompValue, Param from rdflib.query import Result, ResultParser +from rdflib.term import BNode +from rdflib.term import Literal as RDFLiteral +from rdflib.term import URIRef ParserElement.setDefaultWhitespaceChars(" \n") @@ -59,11 +63,12 @@ class TSVResultParser(ResultParser): - def parse(self, source, content_type=None): - + # type error: Signature of "parse" incompatible with supertype "ResultParser" [override] + def parse(self, source: IO, content_type: typing.Optional[str] = None) -> Result: # type: ignore[override] if isinstance(source.read(0), bytes): # if reading from source returns bytes do utf-8 decoding - source = codecs.getreader("utf-8")(source) + # type error: Incompatible types in assignment (expression has type "StreamReader", variable has type "IO[Any]") + source = codecs.getreader("utf-8")(source) # type: ignore[assignment] r = Result("SELECT") @@ -80,11 +85,14 @@ def parse(self, source, content_type=None): continue row = ROW.parseString(line, parseAll=True) - r.bindings.append(dict(zip(r.vars, (self.convertTerm(x) for x in row)))) + # type error: Generator has incompatible item type "object"; expected "Identifier" + r.bindings.append(dict(zip(r.vars, (self.convertTerm(x) for x in row)))) # type: ignore[misc] return r - def convertTerm(self, t): + def convertTerm( + self, t: Union[object, RDFLiteral, BNode, CompValue, URIRef] + ) -> typing.Optional[Union[object, BNode, URIRef, RDFLiteral]]: if t is NONE_VALUE: return None if isinstance(t, CompValue): diff --git a/dependencies/rdflib/plugins/sparql/results/txtresults.py b/dependencies/rdflib/plugins/sparql/results/txtresults.py index 8b87864b6..3f2f1f511 100644 --- a/dependencies/rdflib/plugins/sparql/results/txtresults.py +++ b/dependencies/rdflib/plugins/sparql/results/txtresults.py @@ -1,12 +1,14 @@ -from typing import IO, List, Optional +from typing import IO, List, Optional, Union -from rdflib import BNode, Literal, URIRef from rdflib.namespace import NamespaceManager from rdflib.query import ResultSerializer -from rdflib.term import Variable +from rdflib.term import BNode, Literal, URIRef, Variable -def _termString(t, namespace_manager: Optional[NamespaceManager]): +def _termString( + t: Optional[Union[URIRef, Literal, BNode]], + namespace_manager: Optional[NamespaceManager], +) -> str: if t is None: return "-" if namespace_manager: @@ -26,12 +28,13 @@ class TXTResultSerializer(ResultSerializer): """ # TODO FIXME: class specific args should be keyword only. + # type error: Signature of "serialize" incompatible with supertype "ResultSerializer" def serialize( # type: ignore[override] self, stream: IO, encoding: str, namespace_manager: Optional[NamespaceManager] = None, - ): + ) -> None: """ return a text table of query results """ @@ -50,13 +53,17 @@ def c(s, w): raise Exception("Can only pretty print SELECT results!") if not self.result: - return "(no results)\n" + # type error: No return value expected + return "(no results)\n" # type: ignore[return-value] else: - keys: List[Variable] = self.result.vars # type: ignore[assignment] maxlen = [0] * len(keys) b = [ - [_termString(r[k], namespace_manager) for k in keys] + # type error: Value of type "Union[Tuple[Node, Node, Node], bool, ResultRow]" is not indexable + # type error: Argument 1 to "_termString" has incompatible type "Union[Node, Any]"; expected "Union[URIRef, Literal, BNode, None]" [arg-type] + # type error: No overload variant of "__getitem__" of "tuple" matches argument type "Variable" + # NOTE on type error: The problem here is that r can be more types than _termString expects because result can be a result of multiple types. + [_termString(r[k], namespace_manager) for k in keys] # type: ignore[index, arg-type, call-overload] for r in self.result ] for r in b: diff --git a/dependencies/rdflib/plugins/sparql/results/xmlresults.py b/dependencies/rdflib/plugins/sparql/results/xmlresults.py index 69cb5c303..21ee3449d 100644 --- a/dependencies/rdflib/plugins/sparql/results/xmlresults.py +++ b/dependencies/rdflib/plugins/sparql/results/xmlresults.py @@ -1,12 +1,33 @@ import logging -from typing import IO, Optional +import xml.etree.ElementTree as xml_etree # noqa: N813 +from io import BytesIO +from typing import ( + IO, + TYPE_CHECKING, + Any, + BinaryIO, + Dict, + Optional, + Sequence, + TextIO, + Tuple, + Union, + cast, +) from xml.dom import XML_NAMESPACE from xml.sax.saxutils import XMLGenerator from xml.sax.xmlreader import AttributesNSImpl -from rdflib import BNode, Literal, URIRef, Variable -from rdflib.compat import etree from rdflib.query import Result, ResultException, ResultParser, ResultSerializer +from rdflib.term import BNode, Identifier, Literal, URIRef, Variable + +try: + # https://adamj.eu/tech/2021/12/29/python-type-hints-optional-imports/ + import lxml.etree as lxml_etree + + FOUND_LXML = True +except ImportError: + FOUND_LXML = False SPARQL_XML_NAMESPACE = "http://www.w3.org/2005/sparql-results#" RESULTS_NS_ET = "{%s}" % SPARQL_XML_NAMESPACE @@ -27,19 +48,32 @@ class XMLResultParser(ResultParser): # TODO FIXME: content_type should be a keyword only arg. - def parse(self, source, content_type: Optional[str] = None): # type: ignore[override] + def parse(self, source: IO, content_type: Optional[str] = None) -> Result: # type: ignore[override] return XMLResult(source) class XMLResult(Result): - def __init__(self, source, content_type: Optional[str] = None): - - try: - # try use as if etree is from lxml, and if not use it as normal. - parser = etree.XMLParser(huge_tree=True) # type: ignore[call-arg] - tree = etree.parse(source, parser) - except TypeError: - tree = etree.parse(source) + def __init__(self, source: IO, content_type: Optional[str] = None): + parser_encoding: Optional[str] = None + if hasattr(source, "encoding"): + if TYPE_CHECKING: + assert isinstance(source, TextIO) + parser_encoding = "utf-8" + source_str = source.read() + source = BytesIO(source_str.encode(parser_encoding)) + else: + if TYPE_CHECKING: + assert isinstance(source, BinaryIO) + + if FOUND_LXML: + lxml_parser = lxml_etree.XMLParser(huge_tree=True, encoding=parser_encoding) + tree = cast( + xml_etree.ElementTree, + lxml_etree.parse(source, parser=lxml_parser), + ) + else: + xml_parser = xml_etree.XMLParser(encoding=parser_encoding) + tree = xml_etree.parse(source, parser=xml_parser) boolean = tree.find(RESULTS_NS_ET + "boolean") results = tree.find(RESULTS_NS_ET + "results") @@ -56,8 +90,18 @@ def __init__(self, source, content_type: Optional[str] = None): if type_ == "SELECT": self.bindings = [] for result in results: # type: ignore[union-attr] + if result.tag != f"{RESULTS_NS_ET}result": + # This is here because with lxml this also gets comments, + # not just elements. Also this should not operate on non + # "result" elements. + continue r = {} for binding in result: + if binding.tag != f"{RESULTS_NS_ET}binding": + # This is here because with lxml this also gets + # comments, not just elements. Also this should not + # operate on non "binding" elements. + continue # type error: error: Argument 1 to "Variable" has incompatible type "Union[str, None, Any]"; expected "str" # NOTE on type error: Element.get() can return None, and # this will invariably fail if passed into Variable @@ -80,7 +124,7 @@ def __init__(self, source, content_type: Optional[str] = None): self.askAnswer = boolean.text.lower().strip() == "true" # type: ignore[union-attr] -def parseTerm(element): +def parseTerm(element: xml_etree.Element) -> Union[URIRef, Literal, BNode]: """rdflib object (Literal, URIRef, BNode) for the given elementtree element""" tag, text = element.tag, element.text @@ -90,7 +134,8 @@ def parseTerm(element): datatype = None lang = None if element.get("datatype", None): - datatype = URIRef(element.get("datatype")) + # type error: Argument 1 to "URIRef" has incompatible type "Optional[str]"; expected "str" + datatype = URIRef(element.get("datatype")) # type: ignore[arg-type] elif element.get("{%s}lang" % XML_NAMESPACE, None): lang = element.get("{%s}lang" % XML_NAMESPACE) @@ -98,7 +143,8 @@ def parseTerm(element): return ret elif tag == RESULTS_NS_ET + "uri": - return URIRef(text) + # type error: Argument 1 to "URIRef" has incompatible type "Optional[str]"; expected "str" + return URIRef(text) # type: ignore[arg-type] elif tag == RESULTS_NS_ET + "bnode": return BNode(text) else: @@ -106,17 +152,18 @@ def parseTerm(element): class XMLResultSerializer(ResultSerializer): - def __init__(self, result): + def __init__(self, result: Result): ResultSerializer.__init__(self, result) - def serialize(self, stream: IO, encoding: str = "utf-8", **kwargs): - + def serialize(self, stream: IO, encoding: str = "utf-8", **kwargs: Any) -> None: writer = SPARQLXMLWriter(stream, encoding) if self.result.type == "ASK": writer.write_header([]) - writer.write_ask(self.result.askAnswer) + # type error: Argument 1 to "write_ask" of "SPARQLXMLWriter" has incompatible type "Optional[bool]"; expected "bool" + writer.write_ask(self.result.askAnswer) # type: ignore[arg-type] else: - writer.write_header(self.result.vars) + # type error: Argument 1 to "write_header" of "SPARQLXMLWriter" has incompatible type "Optional[List[Variable]]"; expected "Sequence[Variable]" + writer.write_header(self.result.vars) # type: ignore[arg-type] writer.write_results_header() for b in self.result.bindings: writer.write_start_result() @@ -134,7 +181,7 @@ class SPARQLXMLWriter: Python saxutils-based SPARQL XML Writer """ - def __init__(self, output, encoding="utf-8"): + def __init__(self, output: IO, encoding: str = "utf-8"): writer = XMLGenerator(output, encoding) writer.startDocument() writer.startPrefixMapping("", SPARQL_XML_NAMESPACE) @@ -147,7 +194,7 @@ def __init__(self, output, encoding="utf-8"): self._encoding = encoding self._results = False - def write_header(self, allvarsL): + def write_header(self, allvarsL: Sequence[Variable]) -> None: self.writer.startElementNS( (SPARQL_XML_NAMESPACE, "head"), "head", AttributesNSImpl({}, {}) ) @@ -161,48 +208,52 @@ def write_header(self, allvarsL): self.writer.startElementNS( (SPARQL_XML_NAMESPACE, "variable"), "variable", - AttributesNSImpl(attr_vals, attr_qnames), + # type error: Argument 1 to "AttributesNSImpl" has incompatible type "Dict[Tuple[None, str], str]"; expected "Mapping[Tuple[str, str], str]" + # type error: Argument 2 to "AttributesNSImpl" has incompatible type "Dict[Tuple[None, str], str]"; expected "Mapping[Tuple[str, str], str]" [arg-type] + AttributesNSImpl(attr_vals, attr_qnames), # type: ignore[arg-type] ) self.writer.endElementNS((SPARQL_XML_NAMESPACE, "variable"), "variable") self.writer.endElementNS((SPARQL_XML_NAMESPACE, "head"), "head") - def write_ask(self, val): + def write_ask(self, val: bool) -> None: self.writer.startElementNS( (SPARQL_XML_NAMESPACE, "boolean"), "boolean", AttributesNSImpl({}, {}) ) self.writer.characters(str(val).lower()) self.writer.endElementNS((SPARQL_XML_NAMESPACE, "boolean"), "boolean") - def write_results_header(self): + def write_results_header(self) -> None: self.writer.startElementNS( (SPARQL_XML_NAMESPACE, "results"), "results", AttributesNSImpl({}, {}) ) self._results = True - def write_start_result(self): + def write_start_result(self) -> None: self.writer.startElementNS( (SPARQL_XML_NAMESPACE, "result"), "result", AttributesNSImpl({}, {}) ) self._resultStarted = True - def write_end_result(self): + def write_end_result(self) -> None: assert self._resultStarted self.writer.endElementNS((SPARQL_XML_NAMESPACE, "result"), "result") self._resultStarted = False - def write_binding(self, name, val): + def write_binding(self, name: Variable, val: Identifier) -> None: assert self._resultStarted - attr_vals = { + attr_vals: Dict[Tuple[Optional[str], str], str] = { (None, "name"): str(name), } - attr_qnames = { + attr_qnames: Dict[Tuple[Optional[str], str], str] = { (None, "name"): "name", } self.writer.startElementNS( (SPARQL_XML_NAMESPACE, "binding"), "binding", - AttributesNSImpl(attr_vals, attr_qnames), + # type error: Argument 1 to "AttributesNSImpl" has incompatible type "Dict[Tuple[None, str], str]"; expected "Mapping[Tuple[str, str], str]" + # type error: Argument 2 to "AttributesNSImpl" has incompatible type "Dict[Tuple[None, str], str]"; expected "Mapping[Tuple[str, str], str]" + AttributesNSImpl(attr_vals, attr_qnames), # type: ignore[arg-type] ) if isinstance(val, URIRef): @@ -230,7 +281,9 @@ def write_binding(self, name, val): self.writer.startElementNS( (SPARQL_XML_NAMESPACE, "literal"), "literal", - AttributesNSImpl(attr_vals, attr_qnames), + # type error: Argument 1 to "AttributesNSImpl" has incompatible type "Dict[Tuple[Optional[str], str], str]"; expected "Mapping[Tuple[str, str], str]" + # type error: Argument 2 to "AttributesNSImpl" has incompatible type "Dict[Tuple[Optional[str], str], str]"; expected "Mapping[Tuple[str, str], str]" + AttributesNSImpl(attr_vals, attr_qnames), # type: ignore[arg-type] ) self.writer.characters(val) self.writer.endElementNS((SPARQL_XML_NAMESPACE, "literal"), "literal") @@ -240,7 +293,7 @@ def write_binding(self, name, val): self.writer.endElementNS((SPARQL_XML_NAMESPACE, "binding"), "binding") - def close(self): + def close(self) -> None: if self._results: self.writer.endElementNS((SPARQL_XML_NAMESPACE, "results"), "results") self.writer.endElementNS((SPARQL_XML_NAMESPACE, "sparql"), "sparql") diff --git a/dependencies/rdflib/plugins/sparql/sparql.py b/dependencies/rdflib/plugins/sparql/sparql.py index 760d29652..7bfe28284 100644 --- a/dependencies/rdflib/plugins/sparql/sparql.py +++ b/dependencies/rdflib/plugins/sparql/sparql.py @@ -1,18 +1,38 @@ +from __future__ import annotations + import collections import datetime import itertools import typing as t -from typing import Any, Container, Dict, Iterable, List, Optional, Tuple, Union +from collections.abc import Mapping, MutableMapping +from typing import ( + TYPE_CHECKING, + Any, + Container, + Dict, + Generator, + Iterable, + List, + Optional, + Tuple, + TypeVar, + Union, +) import isodate import rdflib.plugins.sparql -from rdflib.compat import Mapping, MutableMapping from rdflib.graph import ConjunctiveGraph, Graph from rdflib.namespace import NamespaceManager from rdflib.plugins.sparql.parserutils import CompValue from rdflib.term import BNode, Identifier, Literal, Node, URIRef, Variable +if TYPE_CHECKING: + from rdflib.paths import Path + + +_AnyT = TypeVar("_AnyT") + class SPARQLError(Exception): def __init__(self, msg: Optional[str] = None): @@ -80,8 +100,8 @@ def __len__(self) -> int: d = d.outer return i - def __iter__(self): - d = self + def __iter__(self) -> Generator[str, None, None]: + d: Optional[Bindings] = self while d is not None: yield from d._d d = d.outer @@ -162,7 +182,6 @@ def __init__(self, ctx: "QueryContext", *args, **kwargs): self.ctx = ctx def __getitem__(self, key: Union[Identifier, str]) -> Identifier: - if not isinstance(key, Node): key = Variable(key) @@ -197,7 +216,7 @@ def prologue(self) -> Optional["Prologue"]: def forget( self, before: "QueryContext", _except: Optional[Container[Variable]] = None - ): + ) -> FrozenBindings: """ return a frozen dict only of bindings made in self since before @@ -220,14 +239,14 @@ def forget( ), ) - def remember(self, these): + def remember(self, these) -> FrozenBindings: """ return a frozen dict only of bindings in these """ return FrozenBindings(self.ctx, (x for x in self.items() if x[0] in these)) -class QueryContext(object): +class QueryContext: """ Query context - passed along when evaluating the query """ @@ -236,7 +255,7 @@ def __init__( self, graph: Optional[Graph] = None, bindings: Optional[Union[Bindings, FrozenBindings, List[Any]]] = None, - initBindings: Optional[Dict[Variable, Identifier]] = None, + initBindings: Optional[Mapping[str, Identifier]] = None, ): self.initBindings = initBindings self.bindings = Bindings(d=bindings or []) @@ -292,7 +311,18 @@ def dataset(self) -> ConjunctiveGraph: ) return self._dataset - def load(self, source: URIRef, default: bool = False, **kwargs): + def load(self, source: URIRef, default: bool = False, **kwargs: Any) -> None: + """ + Load data from the source into the query context's. + + :param source: The source to load from. + :param default: If `True`, triples from the source will be added to the + default graph, otherwise it will be loaded into a graph with + ``source`` URI as its name. + :param kwargs: Keyword arguments to pass to + :meth:`rdflib.graph.Graph.parse`. + """ + def _load(graph, source): try: return graph.parse(source, format="turtle", **kwargs) @@ -320,13 +350,12 @@ def _load(graph, source): # Unsupported left operand type for + ("None") self.graph += self.dataset.get_context(source) # type: ignore[operator] else: - if default: _load(self.graph, source) else: - _load(self.dataset, source) + _load(self.dataset.get_context(source), source) - def __getitem__(self, key) -> Any: + def __getitem__(self, key: Union[str, Path]) -> Optional[Union[str, Path]]: # in SPARQL BNodes are just labels if not isinstance(key, (BNode, Variable)): return key @@ -335,7 +364,7 @@ def __getitem__(self, key) -> Any: except KeyError: return None - def get(self, key: Variable, default: Optional[Any] = None): + def get(self, key: str, default: Optional[Any] = None) -> Any: try: return self[key] except KeyError: @@ -352,7 +381,7 @@ def solution(self, vars: Optional[Iterable[Variable]] = None) -> FrozenBindings: else: return FrozenBindings(self, self.bindings.items()) - def __setitem__(self, key: Identifier, value: Identifier) -> None: + def __setitem__(self, key: str, value: str) -> None: if key in self.bindings and self.bindings[key] != value: raise AlreadyBound() @@ -384,7 +413,7 @@ class Prologue: A class for holding prefixing bindings and base URI information """ - def __init__(self): + def __init__(self) -> None: self.base: Optional[str] = None self.namespace_manager = NamespaceManager(Graph()) # ns man needs a store diff --git a/dependencies/rdflib/plugins/sparql/update.py b/dependencies/rdflib/plugins/sparql/update.py index 371d43011..5ce86f393 100644 --- a/dependencies/rdflib/plugins/sparql/update.py +++ b/dependencies/rdflib/plugins/sparql/update.py @@ -3,29 +3,38 @@ Code for carrying out Update Operations """ +from __future__ import annotations -from rdflib import Graph, Variable +from typing import TYPE_CHECKING, Iterator, Mapping, Optional, Sequence + +from rdflib.graph import Graph from rdflib.plugins.sparql.evaluate import evalBGP, evalPart from rdflib.plugins.sparql.evalutils import _fillTemplate, _join -from rdflib.plugins.sparql.sparql import QueryContext +from rdflib.plugins.sparql.parserutils import CompValue +from rdflib.plugins.sparql.sparql import FrozenDict, QueryContext, Update +from rdflib.term import Identifier, URIRef, Variable -def _graphOrDefault(ctx, g): +def _graphOrDefault(ctx: QueryContext, g: str) -> Optional[Graph]: if g == "DEFAULT": return ctx.graph else: return ctx.dataset.get_context(g) -def _graphAll(ctx, g): +def _graphAll(ctx: QueryContext, g: str) -> Sequence[Graph]: """ return a list of graphs """ if g == "DEFAULT": - return [ctx.graph] + # type error: List item 0 has incompatible type "Optional[Graph]"; expected "Graph" + return [ctx.graph] # type: ignore[list-item] elif g == "NAMED": return [ - c for c in ctx.dataset.contexts() if c.identifier != ctx.graph.identifier + # type error: Item "None" of "Optional[Graph]" has no attribute "identifier" + c + for c in ctx.dataset.contexts() + if c.identifier != ctx.graph.identifier # type: ignore[union-attr] ] elif g == "ALL": return list(ctx.dataset.contexts()) @@ -33,18 +42,21 @@ def _graphAll(ctx, g): return [ctx.dataset.get_context(g)] -def evalLoad(ctx, u): +def evalLoad(ctx: QueryContext, u: CompValue) -> None: """ http://www.w3.org/TR/sparql11-update/#load """ + if TYPE_CHECKING: + assert isinstance(u.iri, URIRef) + if u.graphiri: ctx.load(u.iri, default=False, publicID=u.graphiri) else: ctx.load(u.iri, default=True) -def evalCreate(ctx, u): +def evalCreate(ctx: QueryContext, u: CompValue) -> None: """ http://www.w3.org/TR/sparql11-update/#create """ @@ -54,16 +66,15 @@ def evalCreate(ctx, u): raise Exception("Create not implemented!") -def evalClear(ctx, u): +def evalClear(ctx: QueryContext, u: CompValue) -> None: """ http://www.w3.org/TR/sparql11-update/#clear """ - for g in _graphAll(ctx, u.graphiri): g.remove((None, None, None)) -def evalDrop(ctx, u): +def evalDrop(ctx: QueryContext, u: CompValue) -> None: """ http://www.w3.org/TR/sparql11-update/#drop """ @@ -74,22 +85,22 @@ def evalDrop(ctx, u): evalClear(ctx, u) -def evalInsertData(ctx, u): +def evalInsertData(ctx: QueryContext, u: CompValue) -> None: """ http://www.w3.org/TR/sparql11-update/#insertData """ # add triples g = ctx.graph g += u.triples - # add quads # u.quads is a dict of graphURI=>[triples] for g in u.quads: - cg = ctx.dataset.get_context(g) + # type error: Argument 1 to "get_context" of "ConjunctiveGraph" has incompatible type "Optional[Graph]"; expected "Union[IdentifiedNode, str, None]" + cg = ctx.dataset.get_context(g) # type: ignore[arg-type] cg += u.quads[g] -def evalDeleteData(ctx, u): +def evalDeleteData(ctx: QueryContext, u: CompValue) -> None: """ http://www.w3.org/TR/sparql11-update/#deleteData """ @@ -100,22 +111,24 @@ def evalDeleteData(ctx, u): # remove quads # u.quads is a dict of graphURI=>[triples] for g in u.quads: - cg = ctx.dataset.get_context(g) + # type error: Argument 1 to "get_context" of "ConjunctiveGraph" has incompatible type "Optional[Graph]"; expected "Union[IdentifiedNode, str, None]" + cg = ctx.dataset.get_context(g) # type: ignore[arg-type] cg -= u.quads[g] -def evalDeleteWhere(ctx, u): +def evalDeleteWhere(ctx: QueryContext, u: CompValue) -> None: """ http://www.w3.org/TR/sparql11-update/#deleteWhere """ - res = evalBGP(ctx, u.triples) + res: Iterator[FrozenDict] = evalBGP(ctx, u.triples) for g in u.quads: cg = ctx.dataset.get_context(g) c = ctx.pushGraph(cg) res = _join(res, list(evalBGP(c, u.quads[g]))) - for c in res: + # type error: Incompatible types in assignment (expression has type "FrozenBindings", variable has type "QueryContext") + for c in res: # type: ignore[assignment] g = ctx.graph g -= _fillTemplate(u.triples, c) @@ -124,16 +137,15 @@ def evalDeleteWhere(ctx, u): cg -= _fillTemplate(u.quads[g], c) -def evalModify(ctx, u): - +def evalModify(ctx: QueryContext, u: CompValue) -> None: originalctx = ctx # Using replaces the dataset for evaluating the where-clause + dg: Optional[Graph] if u.using: otherDefault = False for d in u.using: if d.default: - if not otherDefault: # replace current default graph dg = Graph() @@ -171,21 +183,25 @@ def evalModify(ctx, u): for c in res: dg = ctx.graph if u.delete: - dg -= _fillTemplate(u.delete.triples, c) + # type error: Unsupported left operand type for - ("None") + # type error: Unsupported operand types for - ("Graph" and "Generator[Tuple[Identifier, Identifier, Identifier], None, None]") + dg -= _fillTemplate(u.delete.triples, c) # type: ignore[operator] for g, q in u.delete.quads.items(): cg = ctx.dataset.get_context(c.get(g)) cg -= _fillTemplate(q, c) if u.insert: - dg += _fillTemplate(u.insert.triples, c) + # type error: Unsupported left operand type for + ("None") + # type error: Unsupported operand types for + ("Graph" and "Generator[Tuple[Identifier, Identifier, Identifier], None, None]") + dg += _fillTemplate(u.insert.triples, c) # type: ignore[operator] for g, q in u.insert.quads.items(): cg = ctx.dataset.get_context(c.get(g)) cg += _fillTemplate(q, c) -def evalAdd(ctx, u): +def evalAdd(ctx: QueryContext, u: CompValue) -> None: """ add all triples from src to dst @@ -197,13 +213,15 @@ def evalAdd(ctx, u): srcg = _graphOrDefault(ctx, src) dstg = _graphOrDefault(ctx, dst) - if srcg.identifier == dstg.identifier: + # type error: Item "None" of "Optional[Graph]" has no attribute "identifier" + if srcg.identifier == dstg.identifier: # type: ignore[union-attr] return - dstg += srcg + # type error: Unsupported left operand type for + ("None") + dstg += srcg # type: ignore[operator] -def evalMove(ctx, u): +def evalMove(ctx: QueryContext, u: CompValue) -> None: """ remove all triples from dst @@ -218,20 +236,25 @@ def evalMove(ctx, u): srcg = _graphOrDefault(ctx, src) dstg = _graphOrDefault(ctx, dst) - if srcg.identifier == dstg.identifier: + # type error: Item "None" of "Optional[Graph]" has no attribute "identifier" + if srcg.identifier == dstg.identifier: # type: ignore[union-attr] return - dstg.remove((None, None, None)) + # type error: Item "None" of "Optional[Graph]" has no attribute "remove" + dstg.remove((None, None, None)) # type: ignore[union-attr] - dstg += srcg + # type error: Unsupported left operand type for + ("None") + dstg += srcg # type: ignore[operator] if ctx.dataset.store.graph_aware: - ctx.dataset.store.remove_graph(srcg) + # type error: Argument 1 to "remove_graph" of "Store" has incompatible type "Optional[Graph]"; expected "Graph" + ctx.dataset.store.remove_graph(srcg) # type: ignore[arg-type] else: - srcg.remove((None, None, None)) + # type error: Item "None" of "Optional[Graph]" has no attribute "remove" + srcg.remove((None, None, None)) # type: ignore[union-attr] -def evalCopy(ctx, u): +def evalCopy(ctx: QueryContext, u: CompValue) -> None: """ remove all triples from dst @@ -245,15 +268,22 @@ def evalCopy(ctx, u): srcg = _graphOrDefault(ctx, src) dstg = _graphOrDefault(ctx, dst) - if srcg.identifier == dstg.identifier: + # type error: Item "None" of "Optional[Graph]" has no attribute "remove" + if srcg.identifier == dstg.identifier: # type: ignore[union-attr] return - dstg.remove((None, None, None)) + # type error: Item "None" of "Optional[Graph]" has no attribute "remove" + dstg.remove((None, None, None)) # type: ignore[union-attr] - dstg += srcg + # type error: Unsupported left operand type for + ("None") + dstg += srcg # type: ignore[operator] -def evalUpdate(graph, update, initBindings={}): +def evalUpdate( + graph: Graph, + update: Update, + initBindings: Optional[Mapping[str, Identifier]] = None, +) -> None: """ http://www.w3.org/TR/sparql11-update/#updateLanguage @@ -271,11 +301,23 @@ def evalUpdate(graph, update, initBindings={}): This will return None on success and raise Exceptions on error + .. caution:: + + This method can access indirectly requested network endpoints, for + example, query processing will attempt to access network endpoints + specified in ``SERVICE`` directives. + + When processing untrusted or potentially malicious queries, measures + should be taken to restrict network and file access. + + For information on available security measures, see the RDFLib + :doc:`Security Considerations ` + documentation. + """ for u in update.algebra: - - initBindings = dict((Variable(k), v) for k, v in initBindings.items()) + initBindings = dict((Variable(k), v) for k, v in (initBindings or {}).items()) ctx = QueryContext(graph, initBindings=initBindings) ctx.prologue = u.prologue diff --git a/dependencies/rdflib/plugins/stores/auditable.py b/dependencies/rdflib/plugins/stores/auditable.py index 8bbdcd2f5..17fa0e548 100644 --- a/dependencies/rdflib/plugins/stores/auditable.py +++ b/dependencies/rdflib/plugins/stores/auditable.py @@ -16,10 +16,25 @@ """ import threading +from typing import TYPE_CHECKING, Any, Generator, Iterator, List, Optional, Tuple -from rdflib import ConjunctiveGraph, Graph +from rdflib.graph import ConjunctiveGraph, Graph from rdflib.store import Store +if TYPE_CHECKING: + from rdflib.graph import ( + _ContextIdentifierType, + _ContextType, + _ObjectType, + _PredicateType, + _SubjectType, + _TriplePatternType, + _TripleType, + ) + from rdflib.query import Result + from rdflib.term import URIRef + + destructiveOpLocks = { # noqa: N816 "add": None, "remove": None, @@ -27,29 +42,39 @@ class AuditableStore(Store): - def __init__(self, store): + def __init__(self, store: "Store"): self.store = store self.context_aware = store.context_aware # NOTE: this store can't be formula_aware as it doesn't have enough # info to reverse the removal of a quoted statement self.formula_aware = False # store.formula_aware self.transaction_aware = True # This is only half true - self.reverseOps = [] + self.reverseOps: List[ + Tuple[ + Optional["_SubjectType"], + Optional["_PredicateType"], + Optional["_ObjectType"], + Optional["_ContextIdentifierType"], + str, + ] + ] = [] self.rollbackLock = threading.RLock() - def open(self, configuration, create=True): + def open(self, configuration: str, create: bool = True) -> Optional[int]: return self.store.open(configuration, create) - def close(self, commit_pending_transaction=False): + def close(self, commit_pending_transaction: bool = False) -> None: self.store.close() - def destroy(self, configuration): + def destroy(self, configuration: str) -> None: self.store.destroy(configuration) - def query(self, *args, **kw): + def query(self, *args: Any, **kw: Any) -> "Result": return self.store.query(*args, **kw) - def add(self, triple, context, quoted=False): + def add( + self, triple: "_TripleType", context: "_ContextType", quoted: bool = False + ) -> None: (s, p, o) = triple lock = destructiveOpLocks["add"] lock = lock if lock else threading.RLock() @@ -69,7 +94,9 @@ def add(self, triple, context, quoted=False): pass self.store.add((s, p, o), context, quoted) - def remove(self, spo, context=None): + def remove( + self, spo: "_TriplePatternType", context: Optional["_ContextType"] = None + ) -> None: subject, predicate, object_ = spo lock = destructiveOpLocks["remove"] lock = lock if lock else threading.RLock() @@ -84,7 +111,8 @@ def remove(self, spo, context=None): ctxId = context.identifier if context is not None else None # noqa: N806 if None in [subject, predicate, object_, context]: if ctxId: - for s, p, o in context.triples((subject, predicate, object_)): + # type error: Item "None" of "Optional[Graph]" has no attribute "triples" + for s, p, o in context.triples((subject, predicate, object_)): # type: ignore[union-attr] try: self.reverseOps.remove((s, p, o, ctxId, "remove")) except ValueError: @@ -94,9 +122,11 @@ def remove(self, spo, context=None): (subject, predicate, object_) ): try: - self.reverseOps.remove((s, p, o, ctx.identifier, "remove")) + # type error: Item "None" of "Optional[Graph]" has no attribute "identifier" + self.reverseOps.remove((s, p, o, ctx.identifier, "remove")) # type: ignore[union-attr] except ValueError: - self.reverseOps.append((s, p, o, ctx.identifier, "add")) + # type error: Item "None" of "Optional[Graph]" has no attribute "identifier" + self.reverseOps.append((s, p, o, ctx.identifier, "add")) # type: ignore[union-attr] else: if not list(self.triples((subject, predicate, object_), context)): return # triple not present in store, do nothing @@ -108,7 +138,9 @@ def remove(self, spo, context=None): self.reverseOps.append((subject, predicate, object_, ctxId, "add")) self.store.remove((subject, predicate, object_), context) - def triples(self, triple, context=None): + def triples( + self, triple: "_TriplePatternType", context: Optional["_ContextType"] = None + ) -> Iterator[Tuple["_TripleType", Iterator[Optional["_ContextType"]]]]: (su, pr, ob) = triple context = ( context.__class__(self.store, context.identifier) @@ -118,7 +150,7 @@ def triples(self, triple, context=None): for (s, p, o), cg in self.store.triples((su, pr, ob), context): yield (s, p, o), cg - def __len__(self, context=None): + def __len__(self, context: Optional["_ContextType"] = None): context = ( context.__class__(self.store, context.identifier) if context is not None @@ -126,33 +158,36 @@ def __len__(self, context=None): ) return self.store.__len__(context) - def contexts(self, triple=None): + def contexts( + self, triple: Optional["_TripleType"] = None + ) -> Generator["_ContextType", None, None]: for ctx in self.store.contexts(triple): yield ctx - def bind(self, prefix, namespace, override=True): + def bind(self, prefix: str, namespace: "URIRef", override: bool = True) -> None: self.store.bind(prefix, namespace, override=override) - def prefix(self, namespace): + def prefix(self, namespace: "URIRef") -> Optional[str]: return self.store.prefix(namespace) - def namespace(self, prefix): + def namespace(self, prefix: str) -> Optional["URIRef"]: return self.store.namespace(prefix) - def namespaces(self): + def namespaces(self) -> Iterator[Tuple[str, "URIRef"]]: return self.store.namespaces() - def commit(self): + def commit(self) -> None: self.reverseOps = [] - def rollback(self): + def rollback(self) -> None: # Acquire Rollback lock and apply reverse operations in the forward # order with self.rollbackLock: for subject, predicate, obj, context, op in self.reverseOps: if op == "add": + # type error: Argument 2 to "Graph" has incompatible type "Optional[Node]"; expected "Union[IdentifiedNode, str, None]" self.store.add( - (subject, predicate, obj), Graph(self.store, context) + (subject, predicate, obj), Graph(self.store, context) # type: ignore[arg-type] ) else: self.store.remove( diff --git a/dependencies/rdflib/plugins/stores/berkeleydb.py b/dependencies/rdflib/plugins/stores/berkeleydb.py index b580b2f4f..23968f77a 100644 --- a/dependencies/rdflib/plugins/stores/berkeleydb.py +++ b/dependencies/rdflib/plugins/stores/berkeleydb.py @@ -2,13 +2,17 @@ from os import mkdir from os.path import abspath, exists from threading import Thread +from typing import TYPE_CHECKING, Any, Callable, Dict, Generator, List, Optional, Tuple from urllib.request import pathname2url from rdflib.store import NO_STORE, VALID_STORE, Store -from rdflib.term import URIRef +from rdflib.term import Identifier, Node, URIRef +if TYPE_CHECKING: + from rdflib.graph import Graph, _ContextType, _TriplePatternType, _TripleType -def bb(u): + +def bb(u: str) -> bytes: return u.encode("utf-8") @@ -34,7 +38,24 @@ def bb(u): logger = logging.getLogger(__name__) -__all__ = ["BerkeleyDB"] +__all__ = [ + "BerkeleyDB", + "_ToKeyFunc", + "_FromKeyFunc", + "_GetPrefixFunc", + "_ResultsFromKeyFunc", +] + + +_ToKeyFunc = Callable[[Tuple[bytes, bytes, bytes], bytes], bytes] +_FromKeyFunc = Callable[[bytes], Tuple[bytes, bytes, bytes, bytes]] +_GetPrefixFunc = Callable[ + [Tuple[str, str, str], Optional[str]], Generator[str, None, None] +] +_ResultsFromKeyFunc = Callable[ + [bytes, Optional[Node], Optional[Node], Optional[Node], bytes], + Tuple[Tuple[Node, Node, Node], Generator[Node, None, None]], +] class BerkeleyDB(Store): @@ -47,25 +68,29 @@ class BerkeleyDB(Store): for BerkeleyDB. This store allows for quads as well as triples. See examples of use - in both the `examples.berkeleydb_example` and `test.test_store_berkeleydb` + in both the `examples.berkeleydb_example` and ``test/test_store/test_store_berkeleydb.py`` files. **NOTE on installation**: To use this store, you must have BerkeleyDB installed on your system - separately to Python (`brew install berkeley-db` on a Mac) and also have - the BerkeleyDB Python wrapper installed (`pip install berkeleydb`). + separately to Python (``brew install berkeley-db`` on a Mac) and also have + the BerkeleyDB Python wrapper installed (``pip install berkeleydb``). You may need to install BerkeleyDB Python wrapper like this: - `YES_I_HAVE_THE_RIGHT_TO_USE_THIS_BERKELEY_DB_VERSION=1 pip install berkeleydb` + ``YES_I_HAVE_THE_RIGHT_TO_USE_THIS_BERKELEY_DB_VERSION=1 pip install berkeleydb`` """ context_aware = True formula_aware = True transaction_aware = False graph_aware = True - db_env = None + db_env: "db.DBEnv" = None - def __init__(self, configuration=None, identifier=None): + def __init__( + self, + configuration: Optional[str] = None, + identifier: Optional["Identifier"] = None, + ): if not has_bsddb: raise ImportError("Unable to import berkeleydb, store is unusable.") self.__open = False @@ -73,13 +98,16 @@ def __init__(self, configuration=None, identifier=None): super(BerkeleyDB, self).__init__(configuration) self._loads = self.node_pickler.loads self._dumps = self.node_pickler.dumps + self.__indicies_info: List[Tuple[Any, _ToKeyFunc, _FromKeyFunc]] - def __get_identifier(self): + def __get_identifier(self) -> Optional["Identifier"]: return self.__identifier identifier = property(__get_identifier) - def _init_db_environment(self, homeDir, create=True): # noqa: N803 + def _init_db_environment( + self, homeDir: str, create: bool = True # noqa: N803 + ) -> "db.DBEnv": # noqa: N803 if not exists(homeDir): if create is True: mkdir(homeDir) @@ -94,10 +122,10 @@ def _init_db_environment(self, homeDir, create=True): # noqa: N803 db_env.open(homeDir, ENVFLAGS | db.DB_CREATE) return db_env - def is_open(self): + def is_open(self) -> bool: return self.__open - def open(self, path, create=True): + def open(self, path: str, create: bool = True) -> Optional[int]: if not has_bsddb: return NO_STORE homeDir = path # noqa: N806 @@ -127,11 +155,14 @@ def open(self, path, create=True): dbsetflags = 0 # create and open the DBs - self.__indicies = [ + self.__indicies: List["db.DB"] = [ None, ] * 3 + # NOTE on type ingore: this is because type checker does not like this + # way of initializing, using a temporary variable will solve it. + # type error: error: List item 0 has incompatible type "None"; expected "Tuple[Any, Callable[[Tuple[bytes, bytes, bytes], bytes], bytes], Callable[[bytes], Tuple[bytes, bytes, bytes, bytes]]]" self.__indicies_info = [ - None, + None, # type: ignore[list-item] ] * 3 for i in range(0, 3): index_name = to_key_func(i)( @@ -144,9 +175,11 @@ def open(self, path, create=True): self.__indicies[i] = index self.__indicies_info[i] = (index, to_key_func(i), from_key_func(i)) - lookup = {} + lookup: Dict[ + int, Tuple["db.DB", _GetPrefixFunc, _FromKeyFunc, _ResultsFromKeyFunc] + ] = {} for i in range(0, 8): - results = [] + results: List[Tuple[Tuple[int, int], int, int]] = [] for start in range(0, 3): score = 1 len = 0 @@ -160,10 +193,15 @@ def open(self, path, create=True): results.append(((score, tie_break), start, len)) results.sort() - score, start, len = results[-1] - - def get_prefix_func(start, end): - def get_prefix(triple, context): + # NOTE on type error: this is because the variable `score` is + # reused with different type + # type error: Incompatible types in assignment (expression has type "Tuple[int, int]", variable has type "int") + score, start, len = results[-1] # type: ignore[assignment] + + def get_prefix_func(start: int, end: int) -> _GetPrefixFunc: + def get_prefix( + triple: Tuple[str, str, str], context: Optional[str] + ) -> Generator[str, None, None]: if context is None: yield "" else: @@ -212,7 +250,7 @@ def get_prefix(triple, context): self.__sync_thread = t return VALID_STORE - def __sync_run(self): + def __sync_run(self) -> None: from time import sleep, time try: @@ -236,7 +274,7 @@ def __sync_run(self): except Exception as e: logger.exception(e) - def sync(self): + def sync(self) -> None: if self.__open: for i in self.__indicies: i.sync() @@ -246,7 +284,7 @@ def sync(self): self.__i2k.sync() self.__k2i.sync() - def close(self, commit_pending_transaction=False): + def close(self, commit_pending_transaction: bool = False) -> None: self.__open = False self.__sync_thread.join() for i in self.__indicies: @@ -258,7 +296,13 @@ def close(self, commit_pending_transaction=False): self.__k2i.close() self.db_env.close() - def add(self, triple, context, quoted=False, txn=None): + def add( + self, + triple: "_TripleType", + context: "_ContextType", + quoted: bool = False, + txn: Optional[Any] = None, + ) -> None: """\ Add a triple to the store of triples. """ @@ -298,7 +342,13 @@ def add(self, triple, context, quoted=False, txn=None): self.__needs_sync = True - def __remove(self, spo, c, quoted=False, txn=None): + def __remove( + self, + spo: Tuple[bytes, bytes, bytes], + c: bytes, + quoted: bool = False, + txn: Optional[Any] = None, + ) -> None: s, p, o = spo cspo, cpos, cosp = self.__indicies contexts_value = cspo.get( @@ -327,7 +377,13 @@ def __remove(self, spo, c, quoted=False, txn=None): except db.DBNotFoundError: pass # TODO: is it okay to ignore these? - def remove(self, spo, context, txn=None): + # type error: Signature of "remove" incompatible with supertype "Store" + def remove( # type: ignore[override] + self, + spo: "_TriplePatternType", + context: Optional["_ContextType"], + txn: Optional[Any] = None, + ) -> None: subject, predicate, object = spo assert self.__open, "The Store must be open." Store.remove(self, (subject, predicate, object), context) @@ -376,7 +432,10 @@ def remove(self, spo, context, txn=None): current = None cursor.close() if key.startswith(prefix): - c, s, p, o = from_key(key) + # NOTE on type error: variables are being reused with a + # different type + # type error: Incompatible types in assignment (expression has type "bytes", variable has type "str") + c, s, p, o = from_key(key) # type: ignore[assignment] if context is None: contexts_value = index.get(key, txn=txn) or "".encode("latin-1") # remove triple from all non quoted contexts @@ -385,9 +444,15 @@ def remove(self, spo, context, txn=None): contexts.add("".encode("latin-1")) for c in contexts: for i, _to_key, _ in self.__indicies_info: - i.delete(_to_key((s, p, o), c), txn=txn) + # NOTE on type error: variables are being + # reused with a different type + # type error: Argument 1 has incompatible type "Tuple[str, str, str]"; expected "Tuple[bytes, bytes, bytes]" + # type error: Argument 2 has incompatible type "str"; expected "bytes" + i.delete(_to_key((s, p, o), c), txn=txn) # type: ignore[arg-type] else: - self.__remove((s, p, o), c, txn=txn) + # type error: Argument 1 to "__remove" of "BerkeleyDB" has incompatible type "Tuple[str, str, str]"; expected "Tuple[bytes, bytes, bytes]" + # type error: Argument 2 to "__remove" of "BerkeleyDB" has incompatible type "str"; expected "bytes" + self.__remove((s, p, o), c, txn=txn) # type: ignore[arg-type] else: break @@ -404,7 +469,16 @@ def remove(self, spo, context, txn=None): self.__needs_sync = needs_sync - def triples(self, spo, context=None, txn=None): + def triples( + self, + spo: "_TriplePatternType", + context: Optional["_ContextType"] = None, + txn: Optional[Any] = None, + ) -> Generator[ + Tuple["_TripleType", Generator[Optional["_ContextType"], None, None]], + None, + None, + ]: """A generator over all the triples matching""" assert self.__open, "The Store must be open." @@ -437,11 +511,14 @@ def triples(self, spo, context=None, txn=None): cursor.close() if key and key.startswith(prefix): contexts_value = index.get(key, txn=txn) - yield results_from_key(key, subject, predicate, object, contexts_value) + # type error: Incompatible types in "yield" (actual type "Tuple[Tuple[Node, Node, Node], Generator[Node, None, None]]", expected type "Tuple[Tuple[IdentifiedNode, URIRef, Identifier], Iterator[Optional[Graph]]]") + # NOTE on type ignore: this is needed because some context is + # lost in the process of extracting triples from the database. + yield results_from_key(key, subject, predicate, object, contexts_value) # type: ignore[misc] else: break - def __len__(self, context=None): + def __len__(self, context: Optional["_ContextType"] = None) -> int: assert self.__open, "The Store must be open." if context is not None: if context == self: @@ -467,9 +544,13 @@ def __len__(self, context=None): cursor.close() return count - def bind(self, prefix, namespace, override=True): - prefix = prefix.encode("utf-8") - namespace = namespace.encode("utf-8") + def bind(self, prefix: str, namespace: "URIRef", override: bool = True) -> None: + # NOTE on type error: this is because the variables are reused with + # another type. + # type error: Incompatible types in assignment (expression has type "bytes", variable has type "str") + prefix = prefix.encode("utf-8") # type: ignore[assignment] + # type error: Incompatible types in assignment (expression has type "bytes", variable has type "URIRef") + namespace = namespace.encode("utf-8") # type: ignore[assignment] bound_prefix = self.__prefix.get(namespace) bound_namespace = self.__namespace.get(prefix) if override: @@ -483,21 +564,27 @@ def bind(self, prefix, namespace, override=True): self.__prefix[bound_namespace or namespace] = bound_prefix or prefix self.__namespace[bound_prefix or prefix] = bound_namespace or namespace - def namespace(self, prefix): - prefix = prefix.encode("utf-8") + def namespace(self, prefix: str) -> Optional["URIRef"]: + # NOTE on type error: this is because the variable is reused with + # another type. + # type error: Incompatible types in assignment (expression has type "bytes", variable has type "str") + prefix = prefix.encode("utf-8") # type: ignore[assignment] ns = self.__namespace.get(prefix, None) if ns is not None: return URIRef(ns.decode("utf-8")) return None - def prefix(self, namespace): - namespace = namespace.encode("utf-8") + def prefix(self, namespace: "URIRef") -> Optional[str]: + # NOTE on type error: this is because the variable is reused with + # another type. + # type error: Incompatible types in assignment (expression has type "bytes", variable has type "URIRef") + namespace = namespace.encode("utf-8") # type: ignore[assignment] prefix = self.__prefix.get(namespace, None) if prefix is not None: return prefix.decode("utf-8") return None - def namespaces(self): + def namespaces(self) -> Generator[Tuple[str, "URIRef"], None, None]: cursor = self.__namespace.cursor() results = [] current = cursor.first() @@ -510,20 +597,31 @@ def namespaces(self): for prefix, namespace in results: yield prefix, URIRef(namespace) - def contexts(self, triple=None): + def contexts( + self, triple: Optional["_TripleType"] = None + ) -> Generator["_ContextType", None, None]: _from_string = self._from_string _to_string = self._to_string - + # NOTE on type errors: context is lost because of how data is loaded + # from the DB. if triple: - s, p, o = triple - s = _to_string(s) - p = _to_string(p) - o = _to_string(o) + s: str + p: str + o: str + # type error: Incompatible types in assignment (expression has type "Node", variable has type "str") + s, p, o = triple # type: ignore[assignment] + # type error: Argument 1 has incompatible type "str"; expected "Node" + s = _to_string(s) # type: ignore[arg-type] + # type error: Argument 1 has incompatible type "str"; expected "Node" + p = _to_string(p) # type: ignore[arg-type] + # type error: Argument 1 has incompatible type "str"; expected "Node" + o = _to_string(o) # type: ignore[arg-type] contexts = self.__indicies[0].get(bb("%s^%s^%s^%s^" % ("", s, p, o))) if contexts: for c in contexts.split("^".encode("latin-1")): if c: - yield _from_string(c) + # type error: Incompatible types in "yield" (actual type "Node", expected type "Graph") + yield _from_string(c) # type: ignore[misc] else: index = self.__contexts cursor = index.cursor() @@ -532,7 +630,8 @@ def contexts(self, triple=None): while current: key, value = current context = _from_string(key) - yield context + # type error: Incompatible types in "yield" (actual type "Node", expected type "Graph") + yield context # type: ignore[misc] cursor = index.cursor() try: cursor.set_range(key) @@ -542,17 +641,17 @@ def contexts(self, triple=None): current = None cursor.close() - def add_graph(self, graph): + def add_graph(self, graph: "Graph") -> None: self.__contexts.put(bb(self._to_string(graph)), b"") - def remove_graph(self, graph): + def remove_graph(self, graph: "Graph"): self.remove((None, None, None), graph) - def _from_string(self, i): + def _from_string(self, i: bytes) -> Node: k = self.__i2k.get(int(i)) return self._loads(k) - def _to_string(self, term, txn=None): + def _to_string(self, term: Node, txn: Optional[Any] = None) -> str: k = self._dumps(term) i = self.__k2i.get(k, txn=txn) if i is None: @@ -568,30 +667,42 @@ def _to_string(self, term, txn=None): i = i.decode() return i - def __lookup(self, spo, context, txn=None): + def __lookup( + self, + spo: "_TriplePatternType", + context: Optional["_ContextType"], + txn: Optional[Any] = None, + ) -> Tuple["db.DB", bytes, _FromKeyFunc, _ResultsFromKeyFunc]: subject, predicate, object = spo _to_string = self._to_string + # NOTE on type errors: this is because the same variable is used with different types. if context is not None: - context = _to_string(context, txn=txn) + # type error: Incompatible types in assignment (expression has type "str", variable has type "Optional[Graph]") + context = _to_string(context, txn=txn) # type: ignore[assignment] i = 0 if subject is not None: i += 1 - subject = _to_string(subject, txn=txn) + # type error: Incompatible types in assignment (expression has type "str", variable has type "Node") + subject = _to_string(subject, txn=txn) # type: ignore[assignment] if predicate is not None: i += 2 - predicate = _to_string(predicate, txn=txn) + # type error: Incompatible types in assignment (expression has type "str", variable has type "Node") + predicate = _to_string(predicate, txn=txn) # type: ignore[assignment] if object is not None: i += 4 - object = _to_string(object, txn=txn) + # type error: Incompatible types in assignment (expression has type "str", variable has type "Node") + object = _to_string(object, txn=txn) # type: ignore[assignment] index, prefix_func, from_key, results_from_key = self.__lookup_dict[i] # print (subject, predicate, object), context, prefix_func, index # #DEBUG - prefix = bb("^".join(prefix_func((subject, predicate, object), context))) + # type error: Argument 1 has incompatible type "Tuple[Node, Node, Node]"; expected "Tuple[str, str, str]" + # type error: Argument 2 has incompatible type "Optional[Graph]"; expected "Optional[str]" + prefix = bb("^".join(prefix_func((subject, predicate, object), context))) # type: ignore[arg-type] return index, prefix, from_key, results_from_key -def to_key_func(i): - def to_key(triple, context): +def to_key_func(i: int) -> _ToKeyFunc: + def to_key(triple: Tuple[bytes, bytes, bytes], context: bytes) -> bytes: "Takes a string; returns key" return "^".encode("latin-1").join( ( @@ -606,8 +717,8 @@ def to_key(triple, context): return to_key -def from_key_func(i): - def from_key(key): +def from_key_func(i: int) -> _FromKeyFunc: + def from_key(key: bytes) -> Tuple[bytes, bytes, bytes, bytes]: "Takes a key; returns string" parts = key.split("^".encode("latin-1")) return ( @@ -620,8 +731,16 @@ def from_key(key): return from_key -def results_from_key_func(i, from_string): - def from_key(key, subject, predicate, object, contexts_value): +def results_from_key_func( + i: int, from_string: Callable[[bytes], Node] +) -> _ResultsFromKeyFunc: + def from_key( + key: bytes, + subject: Optional[Node], + predicate: Optional[Node], + object: Optional[Node], + contexts_value: bytes, + ) -> Tuple[Tuple[Node, Node, Node], Generator[Node, None, None]]: "Takes a key and subject, predicate, object; returns tuple for yield" parts = key.split("^".encode("latin-1")) if subject is None: @@ -646,8 +765,10 @@ def from_key(key, subject, predicate, object, contexts_value): return from_key -def readable_index(i): - s, p, o = "?" * 3 +# TODO: Remove unused +def readable_index(i: int) -> str: + # type error: Unpacking a string is disallowed + s, p, o = "?" * 3 # type: ignore[misc] if i & 1: s = "s" if i & 2: diff --git a/dependencies/rdflib/plugins/stores/concurrent.py b/dependencies/rdflib/plugins/stores/concurrent.py index fd4167983..c07867958 100644 --- a/dependencies/rdflib/plugins/stores/concurrent.py +++ b/dependencies/rdflib/plugins/stores/concurrent.py @@ -1,7 +1,7 @@ from threading import Lock -class ResponsibleGenerator(object): +class ResponsibleGenerator: """A generator that will help clean up when it is done being used.""" __slots__ = ["cleanup", "gen"] @@ -20,7 +20,7 @@ def __next__(self): return next(self.gen) -class ConcurrentStore(object): +class ConcurrentStore: def __init__(self, store): self.store = store @@ -58,7 +58,7 @@ def triples(self, triple): if not (s, p, o) in pending_removes: yield s, p, o - for (s, p, o) in self.__pending_adds: + for s, p, o in self.__pending_adds: if ( (su is None or su == s) and (pr is None or pr == p) diff --git a/dependencies/rdflib/plugins/stores/memory.py b/dependencies/rdflib/plugins/stores/memory.py index 1fd26a1b2..68f0ece50 100644 --- a/dependencies/rdflib/plugins/stores/memory.py +++ b/dependencies/rdflib/plugins/stores/memory.py @@ -1,11 +1,42 @@ # # +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Any, + Collection, + Dict, + Generator, + Iterator, + Mapping, + Optional, + Set, + Tuple, + Union, + overload, +) + from rdflib.store import Store from rdflib.util import _coalesce +if TYPE_CHECKING: + from rdflib.graph import ( + Graph, + _ContextType, + _ObjectType, + _PredicateType, + _SubjectType, + _TriplePatternType, + _TripleType, + ) + from rdflib.plugins.sparql.sparql import Query, Update + from rdflib.query import Result + from rdflib.term import Identifier, URIRef + __all__ = ["SimpleMemory", "Memory"] -ANY = None +ANY: None = None class SimpleMemory(Store): @@ -19,23 +50,38 @@ class SimpleMemory(Store): Authors: Michel Pelletier, Daniel Krech, Stefan Niederhauser """ - def __init__(self, configuration=None, identifier=None): + def __init__( + self, + configuration: Optional[str] = None, + identifier: Optional["Identifier"] = None, + ): super(SimpleMemory, self).__init__(configuration) self.identifier = identifier # indexed by [subject][predicate][object] - self.__spo = {} + self.__spo: Dict[ + "_SubjectType", Dict["_PredicateType", Dict["_ObjectType", int]] + ] = {} # indexed by [predicate][object][subject] - self.__pos = {} + self.__pos: Dict[ + "_PredicateType", Dict["_ObjectType", Dict["_SubjectType", int]] + ] = {} # indexed by [predicate][object][subject] - self.__osp = {} - - self.__namespace = {} - self.__prefix = {} - - def add(self, triple, context, quoted=False): + self.__osp: Dict[ + "_ObjectType", Dict["_SubjectType", Dict["_PredicateType", int]] + ] = {} + + self.__namespace: Dict[str, "URIRef"] = {} + self.__prefix: Dict["URIRef", str] = {} + + def add( + self, + triple: "_TripleType", + context: "_ContextType", + quoted: bool = False, + ) -> None: """\ Add a triple to the store of triples. """ @@ -76,13 +122,21 @@ def add(self, triple, context, quoted=False): p = sp[subject] = {} p[predicate] = 1 - def remove(self, triple_pattern, context=None): + def remove( + self, + triple_pattern: "_TriplePatternType", + context: Optional["_ContextType"] = None, + ) -> None: for (subject, predicate, object), c in list(self.triples(triple_pattern)): del self.__spo[subject][predicate][object] del self.__pos[predicate][object][subject] del self.__osp[object][subject][predicate] - def triples(self, triple_pattern, context=None): + def triples( + self, + triple_pattern: "_TriplePatternType", + context: Optional["_ContextType"] = None, + ) -> Iterator[Tuple["_TripleType", Iterator[Optional["_ContextType"]]]]: """A generator over all the triples matching""" subject, predicate, object = triple_pattern if subject != ANY: # subject is given @@ -142,19 +196,20 @@ def triples(self, triple_pattern, context=None): for o in subjectDictionary[p].keys(): yield (s, p, o), self.__contexts() - def __len__(self, context=None): + def __len__(self, context: Optional["_ContextType"] = None) -> int: # @@ optimize i = 0 for triple in self.triples((None, None, None)): i += 1 return i - def bind(self, prefix, namespace, override=True): + def bind(self, prefix: str, namespace: "URIRef", override: bool = True) -> None: # should be identical to `Memory.bind` bound_namespace = self.__namespace.get(prefix) bound_prefix = _coalesce( self.__prefix.get(namespace), - self.__prefix.get(bound_namespace), + # type error: error: Argument 1 to "get" of "Mapping" has incompatible type "Optional[URIRef]"; expected "URIRef" + self.__prefix.get(bound_namespace), # type: ignore[arg-type] ) if override: if bound_prefix is not None: @@ -164,32 +219,51 @@ def bind(self, prefix, namespace, override=True): self.__prefix[namespace] = prefix self.__namespace[prefix] = namespace else: - self.__prefix[_coalesce(bound_namespace, namespace)] = _coalesce( - bound_prefix, prefix + # type error: Invalid index type "Optional[URIRef]" for "Dict[URIRef, str]"; expected type "URIRef" + self.__prefix[_coalesce(bound_namespace, namespace)] = _coalesce( # type: ignore[index] + bound_prefix, default=prefix ) - self.__namespace[_coalesce(bound_prefix, prefix)] = _coalesce( - bound_namespace, namespace + # type error: Invalid index type "Optional[str]" for "Dict[str, URIRef]"; expected type "str" + self.__namespace[_coalesce(bound_prefix, prefix)] = _coalesce( # type: ignore[index] + bound_namespace, default=namespace ) - def namespace(self, prefix): + def namespace(self, prefix: str) -> Optional["URIRef"]: return self.__namespace.get(prefix, None) - def prefix(self, namespace): + def prefix(self, namespace: "URIRef") -> Optional[str]: return self.__prefix.get(namespace, None) - def namespaces(self): + def namespaces(self) -> Iterator[Tuple[str, "URIRef"]]: for prefix, namespace in self.__namespace.items(): yield prefix, namespace - def __contexts(self): - return (c for c in []) # TODO: best way to return empty generator - - def query(self, query, initNs, initBindings, queryGraph, **kwargs): # noqa: N803 + def __contexts(self) -> Generator["_ContextType", None, None]: + # TODO: best way to return empty generator + # type error: Need type annotation for "c" + return (c for c in []) # type: ignore[var-annotated] + + # type error: Missing return statement + def query( # type: ignore[return] + self, + query: Union["Query", str], + initNs: Mapping[str, Any], # noqa: N803 + initBindings: Mapping["str", "Identifier"], # noqa: N803 + queryGraph: "str", # noqa: N803 + **kwargs: Any, + ) -> "Result": super(SimpleMemory, self).query( query, initNs, initBindings, queryGraph, **kwargs ) - def update(self, update, initNs, initBindings, queryGraph, **kwargs): # noqa: N803 + def update( + self, + update: Union["Update", str], + initNs: Mapping[str, Any], # noqa: N803 + initBindings: Mapping["str", "Identifier"], # noqa: N803 + queryGraph: "str", # noqa: N803 + **kwargs: Any, + ) -> None: super(SimpleMemory, self).update( update, initNs, initBindings, queryGraph, **kwargs ) @@ -207,30 +281,45 @@ class Memory(Store): formula_aware = True graph_aware = True - def __init__(self, configuration=None, identifier=None): + def __init__( + self, + configuration: Optional[str] = None, + identifier: Optional["Identifier"] = None, + ): super(Memory, self).__init__(configuration) self.identifier = identifier # indexed by [subject][predicate][object] - self.__spo = {} + self.__spo: Dict[ + "_SubjectType", Dict["_PredicateType", Dict["_ObjectType", int]] + ] = {} # indexed by [predicate][object][subject] - self.__pos = {} + self.__pos: Dict[ + "_PredicateType", Dict["_ObjectType", Dict["_SubjectType", int]] + ] = {} # indexed by [predicate][object][subject] - self.__osp = {} - - self.__namespace = {} - self.__prefix = {} - self.__context_obj_map = {} - self.__tripleContexts = {} - self.__contextTriples = {None: set()} + self.__osp: Dict[ + "_ObjectType", Dict["_SubjectType", Dict["_PredicateType", int]] + ] = {} + + self.__namespace: Dict[str, "URIRef"] = {} + self.__prefix: Dict["URIRef", str] = {} + self.__context_obj_map: Dict[str, "Graph"] = {} + self.__tripleContexts: Dict["_TripleType", Dict[Optional[str], bool]] = {} + self.__contextTriples: Dict[Optional[str], Set["_TripleType"]] = {None: set()} # all contexts used in store (unencoded) - self.__all_contexts = set() + self.__all_contexts: Set["Graph"] = set() # default context information for triples - self.__defaultContexts = None - - def add(self, triple, context, quoted=False): + self.__defaultContexts: Optional[Dict[Optional[str], bool]] = None + + def add( + self, + triple: "_TripleType", + context: "_ContextType", + quoted: bool = False, + ) -> None: """\ Add a triple to the store of triples. """ @@ -287,7 +376,11 @@ def add(self, triple, context, quoted=False): p = sp[subject] = {} p[predicate] = 1 - def remove(self, triple_pattern, context=None): + def remove( + self, + triple_pattern: "_TriplePatternType", + context: Optional["_ContextType"] = None, + ) -> None: req_ctx = self.__ctx_to_str(context) for triple, c in self.triples(triple_pattern, context=context): subject, predicate, object_ = triple @@ -321,7 +414,15 @@ def remove(self, triple_pattern, context=None): # remove the whole context self.__all_contexts.remove(context) - def triples(self, triple_pattern, context=None): + def triples( + self, + triple_pattern: "_TriplePatternType", + context: Optional["_ContextType"] = None, + ) -> Generator[ + Tuple["_TripleType", Generator[Optional["_ContextType"], None, None]], + None, + None, + ]: """A generator over all the triples matching""" req_ctx = self.__ctx_to_str(context) subject, predicate, object_ = triple_pattern @@ -336,7 +437,10 @@ def triples(self, triple_pattern, context=None): # optimize "triple in graph" case (all parts given) elif subject is not None and predicate is not None and object_ is not None: - triple = triple_pattern + # type error: Incompatible types in assignment (expression has type "Tuple[Optional[IdentifiedNode], Optional[IdentifiedNode], Optional[Identifier]]", variable has type "Tuple[IdentifiedNode, IdentifiedNode, Identifier]") + # NOTE on type error: at this point, all elements of triple_pattern + # is not None, so it has the same type as triple + triple = triple_pattern # type: ignore[assignment] try: _ = self.__spo[subject][predicate][object_] if self.__triple_has_context(triple, req_ctx): @@ -418,12 +522,13 @@ def triples(self, triple_pattern, context=None): if self.__triple_has_context(triple, req_ctx): yield triple, self.__contexts(triple) - def bind(self, prefix, namespace, override=True): + def bind(self, prefix: str, namespace: "URIRef", override: bool = True) -> None: # should be identical to `SimpleMemory.bind` bound_namespace = self.__namespace.get(prefix) bound_prefix = _coalesce( self.__prefix.get(namespace), - self.__prefix.get(bound_namespace), + # type error: error: Argument 1 to "get" of "Mapping" has incompatible type "Optional[URIRef]"; expected "URIRef" + self.__prefix.get(bound_namespace), # type: ignore[arg-type] ) if override: if bound_prefix is not None: @@ -433,24 +538,29 @@ def bind(self, prefix, namespace, override=True): self.__prefix[namespace] = prefix self.__namespace[prefix] = namespace else: - self.__prefix[_coalesce(bound_namespace, namespace)] = _coalesce( - bound_prefix, prefix + # type error: Invalid index type "Optional[URIRef]" for "Dict[URIRef, str]"; expected type "URIRef" + self.__prefix[_coalesce(bound_namespace, namespace)] = _coalesce( # type: ignore[index] + bound_prefix, default=prefix ) - self.__namespace[_coalesce(bound_prefix, prefix)] = _coalesce( - bound_namespace, namespace + # type error: Invalid index type "Optional[str]" for "Dict[str, URIRef]"; expected type "str" + # type error: Incompatible types in assignment (expression has type "Optional[URIRef]", target has type "URIRef") + self.__namespace[_coalesce(bound_prefix, prefix)] = _coalesce( # type: ignore[index] + bound_namespace, default=namespace ) - def namespace(self, prefix): + def namespace(self, prefix: str) -> Optional["URIRef"]: return self.__namespace.get(prefix, None) - def prefix(self, namespace): + def prefix(self, namespace: "URIRef") -> Optional[str]: return self.__prefix.get(namespace, None) - def namespaces(self): + def namespaces(self) -> Iterator[Tuple[str, "URIRef"]]: for prefix, namespace in self.__namespace.items(): yield prefix, namespace - def contexts(self, triple=None): + def contexts( + self, triple: Optional["_TripleType"] = None + ) -> Generator["_ContextType", None, None]: if triple is None or triple == (None, None, None): return (context for context in self.__all_contexts) @@ -461,19 +571,19 @@ def contexts(self, triple=None): except KeyError: return (_ for _ in []) - def __len__(self, context=None): + def __len__(self, context: Optional["_ContextType"] = None) -> int: ctx = self.__ctx_to_str(context) if ctx not in self.__contextTriples: return 0 return len(self.__contextTriples[ctx]) - def add_graph(self, graph): + def add_graph(self, graph: "Graph") -> None: if not self.graph_aware: Store.add_graph(self, graph) else: self.__all_contexts.add(graph) - def remove_graph(self, graph): + def remove_graph(self, graph: "Graph") -> None: if not self.graph_aware: Store.remove_graph(self, graph) else: @@ -484,7 +594,13 @@ def remove_graph(self, graph): pass # we didn't know this graph, no problem # internal utility methods below - def __add_triple_context(self, triple, triple_exists, context, quoted): + def __add_triple_context( + self, + triple: "_TripleType", + triple_exists: bool, + context: Optional["_ContextType"], + quoted: bool, + ) -> None: """add the given context to the set of contexts for the triple""" ctx = self.__ctx_to_str(context) quoted = bool(quoted) @@ -495,9 +611,10 @@ def __add_triple_context(self, triple, triple_exists, context, quoted): except KeyError: # triple exists with default ctx info # start with a copy of the default ctx info + # type error: Item "None" of "Optional[Dict[Optional[str], bool]]" has no attribute "copy" triple_context = self.__tripleContexts[ triple - ] = self.__defaultContexts.copy() + ] = self.__defaultContexts.copy() # type: ignore[union-attr] triple_context[ctx] = quoted @@ -530,24 +647,30 @@ def __add_triple_context(self, triple, triple_exists, context, quoted): if triple_context == self.__defaultContexts: del self.__tripleContexts[triple] - def __get_context_for_triple(self, triple, skipQuoted=False): # noqa: N803 + def __get_context_for_triple( + self, triple: "_TripleType", skipQuoted: bool = False # noqa: N803 + ) -> Collection[Optional[str]]: """return a list of contexts (str) for the triple, skipping quoted contexts if skipQuoted==True""" ctxs = self.__tripleContexts.get(triple, self.__defaultContexts) if not skipQuoted: - return ctxs.keys() + # type error: Item "None" of "Optional[Dict[Optional[str], bool]]" has no attribute "keys" + return ctxs.keys() # type: ignore[union-attr] - return [ctx for ctx, quoted in ctxs.items() if not quoted] + # type error: Item "None" of "Optional[Dict[Optional[str], bool]]" has no attribute "items" + return [ctx for ctx, quoted in ctxs.items() if not quoted] # type: ignore[union-attr] - def __triple_has_context(self, triple, ctx): + def __triple_has_context(self, triple: "_TripleType", ctx: Optional[str]) -> bool: """return True if the triple exists in the given context""" - return ctx in self.__tripleContexts.get(triple, self.__defaultContexts) + # type error: Unsupported right operand type for in ("Optional[Dict[Optional[str], bool]]") + return ctx in self.__tripleContexts.get(triple, self.__defaultContexts) # type: ignore[operator] - def __remove_triple_context(self, triple, ctx): + def __remove_triple_context(self, triple: "_TripleType", ctx): """remove the context from the triple""" - ctxs = self.__tripleContexts.get(triple, self.__defaultContexts).copy() + # type error: Item "None" of "Optional[Dict[Optional[str], bool]]" has no attribute "copy" + ctxs = self.__tripleContexts.get(triple, self.__defaultContexts).copy() # type: ignore[union-attr] del ctxs[ctx] if ctxs == self.__defaultContexts: del self.__tripleContexts[triple] @@ -555,7 +678,15 @@ def __remove_triple_context(self, triple, ctx): self.__tripleContexts[triple] = ctxs self.__contextTriples[ctx].remove(triple) - def __ctx_to_str(self, ctx): + @overload + def __ctx_to_str(self, ctx: "_ContextType") -> str: + ... + + @overload + def __ctx_to_str(self, ctx: None) -> None: + ... + + def __ctx_to_str(self, ctx: Optional["_ContextType"]) -> Optional[str]: if ctx is None: return None try: @@ -565,25 +696,46 @@ def __ctx_to_str(self, ctx): return ctx_str except AttributeError: # otherwise, ctx should be a URIRef or BNode or str - if isinstance(ctx, str): - ctx_str = "{}:{}".format(ctx.__class__.__name__, ctx) + # NOTE on type errors: This is actually never called with ctx value as str in all unit tests, so this seems like it should just not be here. + # type error: Subclass of "Graph" and "str" cannot exist: would have incompatible method signatures + if isinstance(ctx, str): # type: ignore[unreachable] + # type error: Statement is unreachable + ctx_str = "{}:{}".format(ctx.__class__.__name__, ctx) # type: ignore[unreachable] if ctx_str in self.__context_obj_map: return ctx_str self.__context_obj_map[ctx_str] = ctx return ctx_str raise RuntimeError("Cannot use that type of object as a Graph context") - def __contexts(self, triple): + def __contexts( + self, triple: "_TripleType" + ) -> Generator["_ContextType", None, None]: """return a generator for all the non-quoted contexts (dereferenced) the encoded triple appears in""" + # type error: Argument 2 to "get" of "Mapping" has incompatible type "str"; expected "Optional[Graph]" return ( - self.__context_obj_map.get(ctx_str, ctx_str) + self.__context_obj_map.get(ctx_str, ctx_str) # type: ignore[arg-type] for ctx_str in self.__get_context_for_triple(triple, skipQuoted=True) if ctx_str is not None ) - def query(self, query, initNs, initBindings, queryGraph, **kwargs): # noqa: N803 + # type error: Missing return statement + def query( # type: ignore[return] + self, + query: Union["Query", str], + initNs: Mapping[str, Any], # noqa: N803 + initBindings: Mapping["str", "Identifier"], # noqa: N803 + queryGraph: "str", + **kwargs, + ) -> "Result": super(Memory, self).query(query, initNs, initBindings, queryGraph, **kwargs) - def update(self, update, initNs, initBindings, queryGraph, **kwargs): # noqa: N803 + def update( + self, + update: Union["Update", Any], + initNs: Mapping[str, Any], # noqa: N803 + initBindings: Mapping["str", "Identifier"], # noqa: N803 + queryGraph: "str", + **kwargs, + ) -> None: super(Memory, self).update(update, initNs, initBindings, queryGraph, **kwargs) diff --git a/dependencies/rdflib/plugins/stores/sparqlconnector.py b/dependencies/rdflib/plugins/stores/sparqlconnector.py index 1af3b369e..cbf7bd92a 100644 --- a/dependencies/rdflib/plugins/stores/sparqlconnector.py +++ b/dependencies/rdflib/plugins/stores/sparqlconnector.py @@ -1,4 +1,5 @@ import base64 +import copy import logging from io import BytesIO from typing import TYPE_CHECKING, Optional, Tuple @@ -6,8 +7,8 @@ from urllib.parse import urlencode from urllib.request import Request, urlopen -from rdflib import BNode from rdflib.query import Result +from rdflib.term import BNode log = logging.getLogger(__name__) @@ -29,7 +30,7 @@ class SPARQLConnectorException(Exception): # noqa: N818 } -class SPARQLConnector(object): +class SPARQLConnector: """ this class deals with nitty gritty details of talking to a SPARQL server """ @@ -48,7 +49,7 @@ def __init__( Any additional keyword arguments will be passed to to the request, and can be used to setup timesouts etc. """ - + self._method: str self.returnFormat = returnFormat self.query_endpoint = query_endpoint self.update_endpoint = update_endpoint @@ -66,11 +67,11 @@ def __init__( ) @property - def method(self): + def method(self) -> str: return self._method @method.setter - def method(self, method): + def method(self, method: str) -> None: if method not in ("GET", "POST", "POST_FORM"): raise SPARQLConnectorException( 'Method must be "GET", "POST", or "POST_FORM"' @@ -78,7 +79,12 @@ def method(self, method): self._method = method - def query(self, query, default_graph: str = None, named_graph: str = None): + def query( + self, + query: str, + default_graph: Optional[str] = None, + named_graph: Optional[str] = None, + ) -> "Result": if not self.query_endpoint: raise SPARQLConnectorException("Query endpoint not set!") @@ -89,7 +95,7 @@ def query(self, query, default_graph: str = None, named_graph: str = None): headers = {"Accept": _response_mime_types[self.returnFormat]} - args = dict(self.kwargs) + args = copy.deepcopy(self.kwargs) # merge params/headers dicts args.setdefault("params", {}) @@ -111,7 +117,8 @@ def query(self, query, default_graph: str = None, named_graph: str = None): ) elif self.method == "POST": args["headers"].update({"Content-Type": "application/sparql-query"}) - qsa = "?" + urlencode(params) + args["params"].update(params) + qsa = "?" + urlencode(args["params"]) try: res = urlopen( Request( @@ -121,7 +128,8 @@ def query(self, query, default_graph: str = None, named_graph: str = None): ) ) except HTTPError as e: - return e.code, str(e), None + # type error: Incompatible return value type (got "Tuple[int, str, None]", expected "Result") + return e.code, str(e), None # type: ignore[return-value] elif self.method == "POST_FORM": params["query"] = query args["params"].update(params) @@ -134,7 +142,8 @@ def query(self, query, default_graph: str = None, named_graph: str = None): ) ) except HTTPError as e: - return e.code, str(e), None + # type error: Incompatible return value type (got "Tuple[int, str, None]", expected "Result") + return e.code, str(e), None # type: ignore[return-value] else: raise SPARQLConnectorException("Unknown method %s" % self.method) return Result.parse( @@ -143,10 +152,10 @@ def query(self, query, default_graph: str = None, named_graph: str = None): def update( self, - query, + query: str, default_graph: Optional[str] = None, named_graph: Optional[str] = None, - ): + ) -> None: if not self.update_endpoint: raise SPARQLConnectorException("Query endpoint not set!") @@ -160,10 +169,10 @@ def update( headers = { "Accept": _response_mime_types[self.returnFormat], - "Content-Type": "application/sparql-update", + "Content-Type": "application/sparql-update; charset=UTF-8", } - args = dict(self.kwargs) # other QSAs + args = copy.deepcopy(self.kwargs) # other QSAs args.setdefault("params", {}) args["params"].update(params) @@ -176,3 +185,6 @@ def update( self.update_endpoint + qsa, data=query.encode(), headers=args["headers"] ) ) + + +__all__ = ["SPARQLConnector", "SPARQLConnectorException"] diff --git a/dependencies/rdflib/plugins/stores/sparqlstore.py b/dependencies/rdflib/plugins/stores/sparqlstore.py index fb46badad..cfffbd768 100644 --- a/dependencies/rdflib/plugins/stores/sparqlstore.py +++ b/dependencies/rdflib/plugins/stores/sparqlstore.py @@ -7,13 +7,41 @@ """ import collections import re -from typing import Any, Callable, Dict, Optional, Tuple, Union - -from rdflib import BNode, Variable -from rdflib.graph import DATASET_DEFAULT_GRAPH_ID +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Dict, + Generator, + Iterable, + Iterator, + List, + Mapping, + Optional, + Tuple, + Union, + overload, +) + +from rdflib.graph import DATASET_DEFAULT_GRAPH_ID, Graph from rdflib.plugins.stores.regexmatching import NATIVE_REGEX from rdflib.store import Store -from rdflib.term import Node +from rdflib.term import BNode, Identifier, Node, URIRef, Variable + +if TYPE_CHECKING: + import typing_extensions as te + from rdflib.graph import ( + _TripleType, + _ContextType, + _QuadType, + _TriplePatternType, + _SubjectType, + _PredicateType, + _ObjectType, + _ContextIdentifierType, + ) + from rdflib.plugins.sparql.sparql import Query, Update + from rdflib.query import Result, ResultRow from .sparqlconnector import SPARQLConnector @@ -24,16 +52,17 @@ BNODE_IDENT_PATTERN = re.compile(r"(?P