Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OGC Features queryables bugfix + cleaner curie generation #284

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 17 additions & 2 deletions prez/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
JSONMediaType,
GeoJSONMediaType,
)
from prez.exceptions.model_exceptions import NoEndpointNodeshapeException
from prez.exceptions.model_exceptions import NoEndpointNodeshapeException, URINotFoundException
from prez.models.query_params import QueryParams
from prez.reference_data.prez_ns import ALTREXT, ONT, EP, OGCE, OGCFEAT
from prez.repositories import PyoxigraphRepo, RemoteSparqlRepo, OxrdflibRepo, Repo
Expand Down Expand Up @@ -489,7 +489,22 @@ async def get_endpoint_uri(
async def get_ogc_features_path_params(
request: Request,
):
return request.path_params
collection_id = request.path_params.get("collectionId")
feature_id = request.path_params.get("featureId")
path_params = {}
if feature_id:
try:
feature_uri = await get_uri_for_curie_id(feature_id)
except ValueError:
raise URINotFoundException(curie=feature_id)
path_params["feature_uri"] = feature_uri
if collection_id:
try:
collection_uri = await get_uri_for_curie_id(collection_id)
except ValueError:
raise URINotFoundException(curie=collection_id)
path_params["collection_uri"] = collection_uri
return path_params


async def get_ogc_features_mediatype(
Expand Down
7 changes: 5 additions & 2 deletions prez/exceptions/model_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,11 @@ class URINotFoundException(Exception):
Raised when a URI is not found in the triplestore.
"""

def __init__(self, uri: URIRef):
self.message = f"URI {uri} not found at endpoint {settings.sparql_endpoint}."
def __init__(self, uri: URIRef = None, curie: str = None):
if uri:
self.message = f"URI \"{uri}\" not found at endpoint {settings.sparql_endpoint}."
if curie:
self.message = f"URI for curie \"{curie}\" not found at endpoint {settings.sparql_endpoint}."
super().__init__(self.message)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,12 @@ ex:Object

ex:QueryablesGlobal
a sh:NodeShape ;
sh:targetClass prez:Queryable ;
sh:targetClass geo:Feature ;
ont:hierarchyLevel 1 ;
.

ex:QueryablesLocal
a sh:NodeShape ;
sh:targetClass prez:Queryable ;
sh:targetClass geo:Feature ;
ont:hierarchyLevel 2 ;
.
2 changes: 1 addition & 1 deletion prez/reference_data/profiles/ogc_features.ttl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
@prefix shext: <http://example.com/shacl-extension#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

prez:OGCFeaturesProfile a prof:Profile ;
prez:OGCFeaturesProfile a prof:Profile , prez:IndexProfile ;
dcterms:description "A system profile for OGC Features conformant API" ;
dcterms:identifier "ogcfeat"^^xsd:token ;
dcterms:title "OGC Features Profile" ;
Expand Down
5 changes: 2 additions & 3 deletions prez/reference_data/profiles/ogc_records_profile.ttl
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ PREFIX shext: <http://example.com/shacl-extension#>


prez:OGCRecordsProfile
a prof:Profile ;
a prof:Profile , prez:IndexProfile ;
dcterms:identifier "ogc"^^xsd:token ;
dcterms:description "A system profile for OGC Records conformant API" ;
dcterms:title "OGC Profile" ;
Expand Down Expand Up @@ -57,8 +57,7 @@ prez:OGCRecordsProfile
sh:targetClass
dcat:Catalog,
dcat:Resource,
skos:Concept
,
skos:Concept ,
skos:Collection,
rdf:Resource ;
altr-ext:hasDefaultProfile prez:OGCItemProfile
Expand Down
2 changes: 1 addition & 1 deletion prez/reference_data/profiles/prez_default_profiles.ttl
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>


<https://prez.dev/profile/prez>
a prof:Profile ;
a prof:Profile , prez:IndexProfile ;
dcterms:identifier "prez"^^xsd:token ;
dcterms:description "A profile for the Prez Linked Data API" ;
dcterms:title "Prez profile" ;
Expand Down
4 changes: 2 additions & 2 deletions prez/routers/ogc_features_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ async def listings_with_feature_collection(
system_repo,
cql_parser,
query_params,
**path_params,
path_params,
)
except Exception as e:
raise e
Expand Down Expand Up @@ -202,7 +202,7 @@ async def objects(
url,
data_repo,
system_repo,
**path_params,
path_params,
)
except Exception as e:
raise e
Expand Down
5 changes: 4 additions & 1 deletion prez/services/annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,10 @@ async def process_uncached_terms(
rdf_queries=[annotations_query], tabular_queries=[]
)

all_results = context_results[0] + repo_results[0] + system_results[0]
all_results = Graph()
all_results += context_results[0]
all_results += repo_results[0]
all_results += system_results[0]

# Initialize subjects_map with each term having an empty set to start with
subjects_map = {term: set() for term in terms}
Expand Down
24 changes: 22 additions & 2 deletions prez/services/curie_functions.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
import re
from urllib.parse import urlparse

from aiocache import caches
Expand Down Expand Up @@ -31,6 +32,19 @@ def namespace_registered(namespace):
return False


def valid_prefix(prefix: str):
"""For turtle serialization, as per https://www.w3.org/TR/turtle/#grammar-production-PN_PREFIX"""
valid = True
PN_CHARS_BASE = r"([A-Z]|[a-z]|[\u00C0-\u00D6]|[\u00D8-\u00F6]|[\u00F8-\u02FF]|[\u0370-\u037D]|[\u037F-\u1FFF]|[\u200C-\u200D]|[\u2070-\u218F]|[\u2C00-\u2FEF]|[\u3001-\uD7FF]|[\uF900-\uFDCF]|[\uFDF0-\uFFFD]|[\U00010000-\U000EFFFF])"
PN_CHARS_U = rf"({PN_CHARS_BASE}|_)"
PN_CHARS = rf"({PN_CHARS_U}|-|[0-9]|\u00B7|[\u0300-\u036F]|[\u203F-\u2040])"
PN_PREFIX = rf"({PN_CHARS_BASE}(({PN_CHARS}|.)*{PN_CHARS})?)"
matches = re.match(PN_PREFIX, prefix)
if not matches:
valid = False
return valid


def generate_new_prefix(uri):
"""
Generates a new prefix for a uri
Expand All @@ -52,8 +66,11 @@ def generate_new_prefix(uri):
return
# otherwise, remove vowels to reduce length
proposed_prefix = "".join(
[c for c in to_generate_prefix_from if c not in "aeiou"]
[c for c in to_generate_prefix_from if c not in "aeiou!@#$%^&*()_+-=,."]
)
if not valid_prefix(proposed_prefix):
# if we still can't get a nice prefix. use an ugly but valid one using a hash of the IRI
proposed_prefix = f"ns{hash(to_generate_prefix_from)}"
if not prefix_registered(proposed_prefix):
prefix_graph.bind(proposed_prefix, ns)
return
Expand Down Expand Up @@ -95,6 +112,9 @@ async def get_uri_for_curie_id(curie_id: str):
else:
separator = settings.curie_separator
curie = curie_id.replace(separator, ":")
uri = prefix_graph.namespace_manager.expand_curie(curie)
try:
uri = prefix_graph.namespace_manager.expand_curie(curie)
except ValueError:
raise
await curie_cache.set(curie_id, uri)
return uri
33 changes: 5 additions & 28 deletions prez/services/generate_profiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,35 +18,12 @@ async def create_profiles_graph(repo) -> Graph:
profiles_graph_cache.parse(f)
log.info("Prez default profiles loaded")
remote_profiles_query = """
PREFIX dcat: <http://www.w3.org/ns/dcat#>
PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX prof: <http://www.w3.org/ns/dx/prof/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

CONSTRUCT {?s ?p ?o .
?o ?p2 ?o2 .
?o2 ?p3 ?o3 .
?class ?cp ?co}
WHERE {?s a prof:Profile ;
?p ?o
OPTIONAL {?o ?p2 ?o2
FILTER(ISBLANK(?o))
OPTIONAL {?o2 ?p3 ?o3
FILTER(ISBLANK(?o2))}
}
OPTIONAL {
?class rdfs:subClassOf dcat:Resource ;
?cp ?co .
}
OPTIONAL {
?class rdfs:subClassOf geo:Feature ;
?cp ?co .
}
OPTIONAL {
?class rdfs:subClassOf skos:Concept ;
?cp ?co .
}
PREFIX prez: <https://prez.dev/>

DESCRIBE ?prof {
VALUES ?prof_class { prez:ListingProfile prez:ObjectProfile prez:IndexProfile }
?prof a ?prof_class
}
"""
g, _ = await repo.send_queries([remote_profiles_query], [])
Expand Down
8 changes: 4 additions & 4 deletions prez/services/listings.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,11 +156,11 @@ async def ogc_features_listing_function(
system_repo,
cql_parser,
query_params,
**path_params,
path_params,
):
count_query = None
count = 0
collectionId = path_params.get("collectionId")
collection_uri = path_params.get("collection_uri")
subselect_kwargs = merge_listing_query_grammar_inputs(
endpoint_nodeshape=endpoint_nodeshape,
cql_parser=cql_parser,
Expand Down Expand Up @@ -199,6 +199,7 @@ async def ogc_features_listing_function(
TriplesSameSubjectPath.from_spo(*innser_select_triple)
)
subselect_kwargs["inner_select_vars"] = [queryable_var]
subselect_kwargs["limit"] = 100
construct_triple = (
queryable_var,
IRI(value=RDF.type),
Expand All @@ -211,7 +212,7 @@ async def ogc_features_listing_function(
**subselect_kwargs,
).to_string()
queries.append(query)
elif not collectionId: # list Feature Collections
elif not collection_uri: # list Feature Collections
query = PrezQueryConstructor(
construct_tss_list=construct_tss_list,
profile_triples=profile_nodeshape.tssp_list,
Expand Down Expand Up @@ -240,7 +241,6 @@ async def ogc_features_listing_function(

# Features listing requires CBD of the Feature Collection as well; reuse items profile to get all props/bns to
# depth two.
collection_uri = await get_uri_for_curie_id(collectionId)
gpnt = GraphPatternNotTriples(
content=Bind(
expression=Expression.from_primary_expression(
Expand Down
23 changes: 9 additions & 14 deletions prez/services/objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from fastapi.responses import PlainTextResponse, RedirectResponse
from rdf2geojson import convert
from rdflib import RDF, URIRef, RDFS
from rdflib import RDF, URIRef
from rdflib.namespace import GEO
from sparql_grammar_pydantic import TriplesSameSubject, IRI, Var, TriplesSameSubjectPath

Expand Down Expand Up @@ -129,25 +129,20 @@ async def ogc_features_object_function(
url,
data_repo,
system_repo,
**path_params,
path_params,
):
collectionId = path_params.get("collectionId")
featureId = path_params.get("featureId")
if featureId:
feature_uri = await get_uri_for_curie_id(featureId)
else:
feature_uri = None
collection_uri = await get_uri_for_curie_id(collectionId)
collection_uri = path_params.get("collection_uri")
feature_uri = path_params.get("feature_uri")
if template_query:
if featureId:
focus_uri = await get_uri_for_curie_id(featureId)
if feature_uri:
focus_uri = feature_uri
else:
focus_uri = collection_uri
query = template_query.replace(
"VALUES ?focusNode { UNDEF }", f"VALUES ?focusNode {{ {focus_uri.n3()} }}"
)
else:
if featureId is None: # feature collection
if feature_uri is None: # feature collection
collection_iri = IRI(value=collection_uri)
construct_tss_list = None
tssp_list = [
Expand All @@ -156,7 +151,6 @@ async def ogc_features_object_function(
)
]
else: # feature
feature_uri = await get_uri_for_curie_id(featureId)
feature_iri = IRI(value=feature_uri)
triples = [
(feature_iri, Var(value="prop"), Var(value="val")),
Expand All @@ -180,14 +174,15 @@ async def ogc_features_object_function(
item_graph, _ = await data_repo.send_queries([query], [])
if len(item_graph) == 0:
uri = feature_uri if feature_uri else collection_uri
raise URINotFoundException(uri)
raise URINotFoundException(uri=uri)
annotations_graph = await return_annotated_rdf(item_graph, data_repo, system_repo)
log.debug(f"Query time: {time.time() - query_start_time}")

link_headers = None
if selected_mediatype == "application/sparql-query":
content = io.BytesIO(query.encode("utf-8"))
elif selected_mediatype == "application/json":
collectionId = get_curie_id_for_uri(collection_uri)
collection = create_collection_json(
collectionId, collection_uri, annotations_graph, url
)
Expand Down
10 changes: 6 additions & 4 deletions prez/services/query_generation/count.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,9 @@ class CountQuery(ConstructQuery):
}
WHERE {
{
SELECT (COUNT(DISTINCT ?focus_node) AS ?count)
SELECT (COUNT(?focus_node) AS ?count)
WHERE {
SELECT ?focus_node
SELECT DISTINCT ?focus_node
WHERE {
<<< original where clause >>>
} LIMIT 101
Expand All @@ -64,7 +64,10 @@ def __init__(self, original_subselect: SubSelect):
limit = settings.listing_count_limit
limit_plus_one = limit + 1
inner_ss = SubSelect(
select_clause=SelectClause(variables_or_all=[Var(value="focus_node")]),
select_clause=SelectClause(
variables_or_all=[Var(value="focus_node")],
distinct=True,
),
where_clause=original_subselect.where_clause,
solution_modifier=SolutionModifier(
limit_offset=LimitOffsetClauses(
Expand All @@ -78,7 +81,6 @@ def __init__(self, original_subselect: SubSelect):
content=BuiltInCall(
other_expressions=Aggregate(
function_name="COUNT",
distinct=True,
expression=Expression.from_primary_expression(
PrimaryExpression(content=Var(value="focus_node"))
),
Expand Down
Loading