Merge pull request #155 from RDFLib/edmond/fix/alt-prof

Drop-down profile implementation
RDFLib · Oct 11, 2023 · 50500e5 · 50500e5
2 parents 570701e + 41de555
commit 50500e5
Show file tree

Hide file tree

Showing 20 changed files with 805 additions and 41 deletions.
diff --git a/.github/workflows/on_pr_to_main.yaml b/.github/workflows/on_pr_to_main.yaml
@@ -66,3 +66,4 @@ jobs:
           cd ../identifier && poetry run pytest
           cd ../object && poetry run pytest
           cd ../caching && poetry run pytest
+          cd ../dd_profile && poetry run pytest
diff --git a/.github/workflows/on_push_to_feature.yaml b/.github/workflows/on_push_to_feature.yaml
@@ -64,3 +64,4 @@ jobs:
           cd ../identifier && poetry run pytest
           cd ../object && poetry run pytest
           cd ../caching && poetry run pytest
+          cd ../dd_profile && poetry run pytest
diff --git a/README-Dev.md b/README-Dev.md
@@ -79,6 +79,8 @@ using the properties listed below.
 
 ## High Level Sequence `/object` endpoint
 
+### Prez UI or similar human-actionable client
+
 Prez provides a `/object` endpoint as an endpoint that supplies any information known about a given URI. If an annotated
 mediatype is requested, prez will additionally provide all system links for endpoints which can render the object. The
 high level sequence for this endpoint is as follows:
@@ -91,6 +93,10 @@ these endpoints, specifying any variables that need to be substituted (such as p
 to construct the system links.
 5. Return the response
 
+### Machine requests
+
+Machine requests made to `/object` will use the provided media type and profile to return an appropriate response in one of the subsystems.
+
 ## High Level Sequence listing and individual object endpoints
 
 Prez follows the following logic to determine what information to return, based on a profile, and in what mediatype to return it.

diff --git a/prez/app.py b/prez/app.py
@@ -115,8 +115,9 @@ async def app_startup():
     setup_logger(settings)
     log = logging.getLogger("prez")
     log.info("Starting up")
-    await add_prefixes_to_prefix_graph()
+
     await healthcheck_sparql_endpoints()
+    await add_prefixes_to_prefix_graph()
     await get_all_search_methods()
     await create_profiles_graph()
     await create_endpoints_graph()

diff --git a/prez/reference_data/profiles/vocprez_default_profiles.ttl b/prez/reference_data/profiles/vocprez_default_profiles.ttl
@@ -154,6 +154,31 @@ prez:VocPrezProfile
         prez:VocPrezCollectionList ,
         skos:ConceptScheme ,
         skos:Collection ;
+    altr-ext:hasLabelPredicate skos:prefLabel ;
+    altr-ext:hasNodeShape [
+        a sh:NodeShape ;
+        sh:targetClass skos:ConceptScheme ;
+        altr-ext:childToFocus skos:inScheme ;
+        altr-ext:relativeProperties skos:broader ;
+    ] ;
+    altr-ext:hasNodeShape [
+        a sh:NodeShape ;
+        sh:targetClass skos:Collection ;
+        altr-ext:focusToChild skos:member ;
+        altr-ext:relativeProperties skos:definition ;
+    ] ;
+    altr-ext:hasNodeShape [
+        a sh:NodeShape ;
+        sh:targetClass prez:SchemesList ;
+        altr-ext:containerClass skos:ConceptScheme ;
+        altr-ext:relativeProperties skos:definition, dcterms:publisher, reg:status ;
+    ] ;
+    altr-ext:hasNodeShape [
+        a sh:NodeShape ;
+        sh:targetClass prez:VocPrezCollectionList ;
+        altr-ext:containerClass skos:Collection ;
+        altr-ext:relativeProperties skos:definition, dcterms:publisher, reg:status ;
+    ] ;
     altr-ext:hasDefaultResourceFormat "application/json" ;
     altr-ext:hasResourceFormat
         "application/json" ,

diff --git a/prez/renderers/csv_renderer.py b/prez/renderers/csv_renderer.py
@@ -0,0 +1,17 @@
+import io
+import csv
+
+
+def render_csv_dropdown(rows: list[dict]) -> io.StringIO:
+    stream = io.StringIO()
+    headers = list(rows[0].keys())
+    writer = csv.DictWriter(
+        stream, fieldnames=headers, quotechar='"', quoting=csv.QUOTE_MINIMAL
+    )
+    writer.writeheader()
+
+    for row in rows:
+        writer.writerow(row)
+
+    stream.seek(0)
+    return stream
diff --git a/prez/renderers/json_renderer.py b/prez/renderers/json_renderer.py
@@ -0,0 +1,129 @@
+from itertools import chain
+
+from rdflib import Graph, URIRef, RDF, SH, Literal
+from rdflib.term import Node
+
+from prez.cache import profiles_graph_cache
+from prez.reference_data.prez_ns import ALTREXT
+from prez.sparql.objects_listings import get_listing_predicates
+
+
+class NotFoundError(Exception):
+    ...
+
+
+def _get_resource_iri(graph: Graph, profile_graph: Graph, profile: URIRef) -> Node:
+    target_classes = profile_graph.objects(profile, ALTREXT.constrainsClass)
+    for target_class in target_classes:
+        iri = graph.value(predicate=RDF.type, object=target_class)
+        if iri is not None:
+            return iri
+
+    raise NotFoundError(
+        f"No resource IRI found based on the constrained classes defined in {profile}."
+    )
+
+
+def _get_label_predicates(profile_graph: Graph, profile: URIRef) -> list[Node]:
+    return list(profile_graph.objects(profile, ALTREXT.hasLabelPredicate))
+
+
+def _get_child_iris(
+    graph: Graph,
+    iri: Node,
+    child_to_focus_predicates: list[Node],
+    parent_to_focus_predicates: list[Node],
+    focus_to_child_predicates: list[Node],
+) -> list[Node]:
+    children = []
+    for predicate in child_to_focus_predicates:
+        child_iris = list(graph.subjects(predicate, iri))
+        if child_iris:
+            children += child_iris
+
+    for predicate in parent_to_focus_predicates:
+        child_iris = list(graph.objects(iri, predicate))
+        if child_iris:
+            children += child_iris
+
+    for predicate in focus_to_child_predicates:
+        child_iris = list(graph.objects(iri, predicate))
+        if child_iris:
+            children += child_iris
+
+    return children
+
+
+def create_graph_item(
+    iri: str, predicates: list[Node], graph: Graph, context: dict
+) -> tuple[dict, dict]:
+    item = {"iri": iri}
+    for predicate in predicates:
+        values = list(graph.objects(URIRef(iri), predicate))
+        predicate_localname = str(predicate).split("#")[-1].split("/")[-1]
+        item[str(predicate_localname)] = str(values[0]) if values else None
+        context[predicate_localname] = str(predicate)
+
+    return item, context
+
+
+async def render_json_dropdown(
+    graph: Graph,
+    profile: URIRef,
+    selected_class: URIRef,
+) -> dict:
+    profile_graph = profiles_graph_cache.cbd(profile)
+
+    iri = _get_resource_iri(graph, profile_graph, profile)
+
+    items = []
+    context = {
+        "iri": "@id",
+    }
+
+    (
+        child_to_focus_predicates,
+        parent_to_focus,
+        focus_to_child_predicates,
+        focus_to_parent_predicates,
+        relative_predicates,
+    ) = get_listing_predicates(profile, selected_class)
+
+    if (
+        not child_to_focus_predicates
+        and not focus_to_parent_predicates
+        and not focus_to_child_predicates
+    ):
+        # This is a listing view, e.g. /v/vocab.
+        node_shape = profile_graph.value(
+            predicate=SH.targetClass, object=selected_class
+        )
+        container_class = profile_graph.value(node_shape, ALTREXT.containerClass)
+        if container_class is None:
+            raise NotFoundError(
+                f"No container class found for resource {iri} in profile {profile}."
+            )
+
+        for resource in graph.subjects(RDF.type, container_class):
+            relative_predicates += _get_label_predicates(profile_graph, profile)
+            item, context = create_graph_item(
+                str(resource), relative_predicates, graph, context
+            )
+            items.append(item)
+    else:
+        relative_predicates += _get_label_predicates(profile_graph, profile)
+
+        child_iris = _get_child_iris(
+            graph,
+            iri,
+            child_to_focus_predicates,
+            focus_to_parent_predicates,
+            focus_to_child_predicates,
+        )
+        for child_iri in child_iris:
+            item, context = create_graph_item(
+                str(child_iri), relative_predicates, graph, context
+            )
+            items.append(item)
+
+    return {"@context": context, "@graph": sorted(items, key=lambda x: x["iri"])}
diff --git a/prez/renderers/renderer.py b/prez/renderers/renderer.py
@@ -1,21 +1,27 @@
 import io
+import json
 import logging
 from typing import Optional
 
 from connegp import RDF_MEDIATYPES, RDF_SERIALIZER_TYPES_MAP
+from fastapi import status
+from fastapi.exceptions import HTTPException
 from fastapi.responses import StreamingResponse
 from pydantic.types import List
-from rdflib import Graph, URIRef, Namespace
+from rdflib import Graph, URIRef, Namespace, RDF
 from starlette.requests import Request
 from starlette.responses import Response
 
 from prez.models.profiles_and_mediatypes import ProfilesMediatypesInfo
 from prez.models.profiles_item import ProfileItem
+from prez.renderers.csv_renderer import render_csv_dropdown
+from prez.services.curie_functions import get_curie_id_for_uri
 from prez.sparql.methods import send_queries, rdf_query_to_graph
 from prez.sparql.objects_listings import (
     generate_item_construct,
     get_annotation_properties,
 )
+from prez.renderers.json_renderer import render_json_dropdown, NotFoundError
 
 log = logging.getLogger(__name__)
 
@@ -25,34 +31,82 @@ async def return_from_queries(
     mediatype,
     profile,
     profile_headers,
+    selected_class: URIRef,
+    predicates_for_link_addition: dict = None,
 ):
     """
     Executes SPARQL queries, loads these to RDFLib Graphs, and calls the "return_from_graph" function to return the
     content
     """
     graph, _ = await send_queries(queries)
-    return await return_from_graph(graph, mediatype, profile, profile_headers)
+    return await return_from_graph(
+        graph,
+        mediatype,
+        profile,
+        profile_headers,
+        selected_class,
+    )
 
 
 async def return_from_graph(
     graph,
     mediatype,
     profile,
     profile_headers,
+    selected_class: URIRef,
 ):
     profile_headers["Content-Disposition"] = "inline"
+
     if str(mediatype) in RDF_MEDIATYPES:
         return await return_rdf(graph, mediatype, profile_headers)
 
-    # elif mediatype == "xml":
-    #     ...
+    elif profile == URIRef("https://w3id.org/profile/dd"):
+        graph = await return_annotated_rdf(
+            graph,
+            profile,
+        )
+
+        try:
+            # TODO: Currently, data is generated in memory, instead of in a streaming manner.
+            #       Not possible to do a streaming response yet since we are reading the RDF
+            #       data into an in-memory graph.
+            jsonld_data = await render_json_dropdown(graph, profile, selected_class)
+
+            if str(mediatype) == "text/csv":
+                iri = graph.value(None, RDF.type, selected_class)
+                if iri:
+                    filename = get_curie_id_for_uri(URIRef(str(iri)))
+                else:
+                    filename = selected_class.split("#")[-1].split("/")[-1]
+                stream = render_csv_dropdown(jsonld_data["@graph"])
+                response = StreamingResponse(stream, media_type=mediatype)
+                response.headers[
+                    "Content-Disposition"
+                ] = f"attachment;filename={filename}.csv"
+                return response
+
+            # application/json
+            stream = io.StringIO(json.dumps(jsonld_data))
+            return StreamingResponse(stream, media_type=mediatype)
+
+        except NotFoundError as err:
+            raise HTTPException(status.HTTP_404_NOT_FOUND, str(err))
 
     else:
         if "anot+" in mediatype:
-            return await return_annotated_rdf(
-                graph, profile_headers, profile, mediatype
+            non_anot_mediatype = mediatype.replace("anot+", "")
+            graph = await return_annotated_rdf(graph, profile)
+            content = io.BytesIO(
+                graph.serialize(format=non_anot_mediatype, encoding="utf-8")
+            )
+            return StreamingResponse(
+                content=content, media_type=non_anot_mediatype, headers=profile_headers
             )
 
+        raise HTTPException(
+            status.HTTP_400_BAD_REQUEST, f"Unsupported mediatype: {mediatype}."
+        )
+
 
 async def return_rdf(graph, mediatype, profile_headers):
     RDF_SERIALIZER_TYPES_MAP["text/anot+turtle"] = "turtle"
@@ -83,14 +137,10 @@ async def get_annotations_graph(profile, graph, cache):
 
 async def return_annotated_rdf(
     graph: Graph,
-    profile_headers,
     profile,
-    mediatype="text/anot+turtle",
-):
+) -> Graph:
     from prez.cache import tbox_cache
 
-    non_anot_mediatype = mediatype.replace("anot+", "")
-
     cache = tbox_cache
     queries_for_uncached, annotations_graph = await get_annotation_properties(graph)
     anots_from_triplestore, _ = await send_queries([queries_for_uncached])
@@ -108,12 +158,7 @@ async def return_annotated_rdf(
         previous_triples_count = len(graph)
 
     graph.bind("prez", "https://prez.dev/")
-    obj = io.BytesIO(graph.serialize(format=non_anot_mediatype, encoding="utf-8"))
-
-    # TODO move responses to router and return graph here
-    return StreamingResponse(
-        content=obj, media_type=non_anot_mediatype, headers=profile_headers
-    )
+    return graph
 
 
 async def return_profiles(
@@ -144,4 +189,5 @@ async def return_profiles(
         prof_and_mt_info.mediatype,
         prof_and_mt_info.profile,
         prof_and_mt_info.profile_headers,
+        prof_and_mt_info.selected_class,
     )