From 257408938bcb226b812cecda2a3923737994d993 Mon Sep 17 00:00:00 2001 From: Jacob Wegner Date: Wed, 4 Dec 2019 12:38:00 -0600 Subject: [PATCH 01/10] add samples for further elab --- wa_examples/html_wa.json | 32 ++++++++++++++++++++++++++++++++ wa_examples/text_wa.json | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) create mode 100644 wa_examples/html_wa.json create mode 100644 wa_examples/text_wa.json diff --git a/wa_examples/html_wa.json b/wa_examples/html_wa.json new file mode 100644 index 0000000..0dd148e --- /dev/null +++ b/wa_examples/html_wa.json @@ -0,0 +1,32 @@ +{ + "id": "/wa/translation-alignment/QWxpZ25tZW50Q2h1bmtOb2RlOjE=", + "type": "Annotation", + "target": [ + "urn:cts:greekLit:tlg0012.tlg001.msA:1.1-1.7", + { + "type": "SpecificResource", + "source": { + "id": "https://rosetest.library.jhu.edu/rosademo/iiif/homer/VA/VA012RN-0013/canvas", + "type": "Canvas" + }, + "selector": { + "type": "ImageApiSelector", + "region": "pct:6.11,22.52,46.75,15.01" + } + } + ], + "body": [ + { + "type": "TextualBody", + "value": "", + "format": "text/html", + "language": "el" + }, + { + "type": "TextualBody", + "value": "", + "format": "text/html", + "language": "en" + } + ] +} \ No newline at end of file diff --git a/wa_examples/text_wa.json b/wa_examples/text_wa.json new file mode 100644 index 0000000..94fc564 --- /dev/null +++ b/wa_examples/text_wa.json @@ -0,0 +1,32 @@ +{ + "id": "/wa/translation-alignment/QWxpZ25tZW50Q2h1bmtOb2RlOjE=", + "type": "Annotation", + "target": [ + "urn:cts:greekLit:tlg0012.tlg001.msA:1.1-1.7", + { + "type": "SpecificResource", + "source": { + "id": "https://rosetest.library.jhu.edu/rosademo/iiif/homer/VA/VA012RN-0013/canvas", + "type": "Canvas" + }, + "selector": { + "type": "ImageApiSelector", + "region": "pct:6.11,22.52,46.75,15.01" + } + } + ], + "body": [ + { + "type": "TextualBody", + "value": "1.1) μῆνιν ἄειδε θεὰ Πηληϊάδεω Ἀχιλῆος\n1.2) οὐλομένην, ἣ μυρί᾽ Ἀχαιοῖς ἄλγε᾽ ἔθηκε,\n1.3) πολλὰς δ᾽ ἰφθίμους ψυχὰς Ἄϊδι προΐαψεν\n1.4) ἡρώων, αὐτοὺς δὲ ἑλώρια τεῦχε κύνεσσιν\n1.5) οἰωνοῖσί τε πᾶσι, Διὸς δ᾽ ἐτελείετο βουλή,\n1.6) ἐξ οὗ δὴ τὰ πρῶτα διαστήτην ἐρίσαντε\n1.7) Ἀτρεΐδης τε ἄναξ ἀνδρῶν καὶ δῖος Ἀχιλλεύς.", + "format": "text/plain", + "language": "el" + }, + { + "type": "TextualBody", + "value": "1.1-1.7) The wrath sing, goddess, of Peleus' son, Achilles, that destructive wrath which brought countless woes upon the Achaeans, and sent forth to Hades many valiant souls of heroes, and made them themselves spoil for dogs and every bird; thus the plan of Zeus came to fulfillment, from the time when 1 first they parted in strife Atreus' son, king of men, and brilliant Achilles.", + "format": "text/plain", + "language": "en" + } + ] +} \ No newline at end of file From e8f8ad554896d69dfa9af15a25937e0e3cb55250 Mon Sep 17 00:00:00 2001 From: Jacob Wegner Date: Wed, 4 Dec 2019 15:15:00 -0600 Subject: [PATCH 02/10] add an Image target --- wa_examples/text_wa_canvas_image.json | 43 +++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 wa_examples/text_wa_canvas_image.json diff --git a/wa_examples/text_wa_canvas_image.json b/wa_examples/text_wa_canvas_image.json new file mode 100644 index 0000000..bc31c9e --- /dev/null +++ b/wa_examples/text_wa_canvas_image.json @@ -0,0 +1,43 @@ +{ + "id": "/wa/translation-alignment/QWxpZ25tZW50Q2h1bmtOb2RlOjE=", + "type": "Annotation", + "target": [ + "urn:cts:greekLit:tlg0012.tlg001.msA:1.1-1.7", + { + "type": "SpecificResource", + "source": { + "id": "https://rosetest.library.jhu.edu/rosademo/iiif/homer/VA/VA012RN-0013/canvas", + "type": "Canvas" + }, + "selector": { + "type": "FragmentSelector", + "value": "xywh=percent:6,23,47,15", + } + }, + { + "type": "SpecificResource", + "source": { + "id": "https://image.library.jhu.edu/iiif/homer%2FVA%2FVA012RN-0013", + "type": "Image" + }, + "selector": { + "type": "ImageApiSelector", + "region": "pct:6.11,22.52,46.75,15.01" + } + } + ], + "body": [ + { + "type": "TextualBody", + "value": "1.1) μῆνιν ἄειδε θεὰ Πηληϊάδεω Ἀχιλῆος\n1.2) οὐλομένην, ἣ μυρί᾽ Ἀχαιοῖς ἄλγε᾽ ἔθηκε,\n1.3) πολλὰς δ᾽ ἰφθίμους ψυχὰς Ἄϊδι προΐαψεν\n1.4) ἡρώων, αὐτοὺς δὲ ἑλώρια τεῦχε κύνεσσιν\n1.5) οἰωνοῖσί τε πᾶσι, Διὸς δ᾽ ἐτελείετο βουλή,\n1.6) ἐξ οὗ δὴ τὰ πρῶτα διαστήτην ἐρίσαντε\n1.7) Ἀτρεΐδης τε ἄναξ ἀνδρῶν καὶ δῖος Ἀχιλλεύς.", + "format": "text/plain", + "language": "el" + }, + { + "type": "TextualBody", + "value": "1.1-1.7) The wrath sing, goddess, of Peleus' son, Achilles, that destructive wrath which brought countless woes upon the Achaeans, and sent forth to Hades many valiant souls of heroes, and made them themselves spoil for dogs and every bird; thus the plan of Zeus came to fulfillment, from the time when 1 first they parted in strife Atreus' son, king of men, and brilliant Achilles.", + "format": "text/plain", + "language": "en" + } + ] +} From 682e2db1c24cfec718a961c5a6958365a70865f7 Mon Sep 17 00:00:00 2001 From: Jacob Wegner Date: Wed, 4 Dec 2019 17:09:07 -0600 Subject: [PATCH 03/10] backport wac and wa from delarose and adapt --- hmt_cite_atlas/iiif.py | 70 ++++++++ hmt_cite_atlas/library/shortcuts.py | 32 ++++ hmt_cite_atlas/settings.py | 3 + hmt_cite_atlas/urls.py | 3 +- hmt_cite_atlas/web_annotation/__init__.py | 0 hmt_cite_atlas/web_annotation/apps.py | 5 + hmt_cite_atlas/web_annotation/shims.py | 63 +++++++ hmt_cite_atlas/web_annotation/urls.py | 26 +++ hmt_cite_atlas/web_annotation/utils.py | 201 ++++++++++++++++++++++ hmt_cite_atlas/web_annotation/views.py | 99 +++++++++++ 10 files changed, 501 insertions(+), 1 deletion(-) create mode 100644 hmt_cite_atlas/iiif.py create mode 100644 hmt_cite_atlas/library/shortcuts.py create mode 100644 hmt_cite_atlas/web_annotation/__init__.py create mode 100644 hmt_cite_atlas/web_annotation/apps.py create mode 100644 hmt_cite_atlas/web_annotation/shims.py create mode 100644 hmt_cite_atlas/web_annotation/urls.py create mode 100644 hmt_cite_atlas/web_annotation/utils.py create mode 100644 hmt_cite_atlas/web_annotation/views.py diff --git a/hmt_cite_atlas/iiif.py b/hmt_cite_atlas/iiif.py new file mode 100644 index 0000000..f075962 --- /dev/null +++ b/hmt_cite_atlas/iiif.py @@ -0,0 +1,70 @@ +from posixpath import join as urljoin +from urllib.parse import quote_plus, unquote + + +class IIIFResolver: + BASE_URL = "https://image.library.jhu.edu/iiif/" + # @@@ figure out what this actually is in IIIF spec terms + CANVAS_BASE_URL = "https://rosetest.library.jhu.edu/rosademo/iiif3/" + COLLETION_SUBDIR = "homer/VA" + iruri_kwargs = { + "region": "full", + "size": "full", + "rotation": "0", + "quality": "default", + "format": "jpg", + } + + def __init__(self, urn): + """ + IIIFResolver("urn:cite2:hmt:vaimg.2017a:VA012VN_0514") + """ + self.urn = urn + + @property + def munged_image_path(self): + image_part = self.urn.rsplit(":", maxsplit=1).pop() + return image_part.replace("_", "-") + + @property + def iiif_image_id(self): + path = urljoin(self.COLLETION_SUBDIR, self.munged_image_path) + return quote_plus(path) + + @property + def identifier(self): + return urljoin(self.BASE_URL, self.iiif_image_id) + + @property + def info_url(self): + info_path = "image.json" + return urljoin(self.identifier, info_path) + + def build_image_request_url(self, **kwargs): + iruri_kwargs = {} + iruri_kwargs.update(self.iruri_kwargs) + iruri_kwargs.update(**kwargs) + return urljoin( + self.identifier, + "{region}/{size}/{rotation}/{quality}.{format}".format(**iruri_kwargs), + ) + + @property + def image_url(self): + return self.build_image_request_url() + + @property + def canvas_url(self): + path = unquote(self.iiif_image_id) + return urljoin(self.CANVAS_BASE_URL, path, "canvas") + + def get_region_by_pct(self, dimensions): + percentages = ",".join( + [ + f'{dimensions["x"]:.2f}', + f'{dimensions["y"]:.2f}', + f'{dimensions["w"]:.2f}', + f'{dimensions["h"]:.2f}', + ] + ) + return f"pct:{percentages}" diff --git a/hmt_cite_atlas/library/shortcuts.py b/hmt_cite_atlas/library/shortcuts.py new file mode 100644 index 0000000..13e19aa --- /dev/null +++ b/hmt_cite_atlas/library/shortcuts.py @@ -0,0 +1,32 @@ +from .models import CITEDatum, CTSCatalog, CTSDatum + + +CITATION_SCHEME_SCHOLION = "scholion" + + +def get_lines_for_folio(folio_urn): + """ + get_lines_for_folio("urn:cite2:hmt:msA.v1:12r") + """ + try: + folio = CITEDatum.objects.get(urn=folio_urn) + except CITEDatum.DoesNotExist as e: + print(f'Could not resolve folio [urn="{folio_urn}""]') + raise e + + # @@@ this will break in SQLite for the time being, but not in the future: + # https://code.djangoproject.com/ticket/12990 + folio_cite_datum = CITEDatum.objects.filter( + fields__contains={"urn:cite2:hmt:va_dse.v1.surface:": folio.urn} + ) + + catalog_obj = CTSCatalog.objects.exclude( + citation_scheme__contains=[CITATION_SCHEME_SCHOLION] + ).get() + book_line_urn = catalog_obj.urn + + # @@@ might be a way we can do some db-level "icontains" against `urn:cite2:hmt:va_dse.v1.passage:` + line_cite_datum = folio_cite_datum.filter(fields__icontains=book_line_urn) + # @@@ might be a way we can use a subquery against the values in `urn:cite2:hmt:va_dse.v1.passage:` + line_urns = [l.fields["urn:cite2:hmt:va_dse.v1.passage:"] for l in line_cite_datum] + return CTSDatum.objects.filter(urn__in=line_urns) diff --git a/hmt_cite_atlas/settings.py b/hmt_cite_atlas/settings.py index 262e876..5d185e8 100644 --- a/hmt_cite_atlas/settings.py +++ b/hmt_cite_atlas/settings.py @@ -133,6 +133,7 @@ # Project "hmt_cite_atlas", "hmt_cite_atlas.library", + "hmt_cite_atlas.web_annotation", ] if DEBUG: @@ -176,3 +177,5 @@ "SCHEMA": "hmt_cite_atlas.schema.schema", "RELAY_CONNECTION_MAX_LIMIT": None, } + +DEFAULT_HTTP_CACHE_DURATION = 60 * 60 * 24 * 365 # one year diff --git a/hmt_cite_atlas/urls.py b/hmt_cite_atlas/urls.py index 0f05287..3469f8a 100644 --- a/hmt_cite_atlas/urls.py +++ b/hmt_cite_atlas/urls.py @@ -1,5 +1,5 @@ from django.contrib import admin -from django.urls import path +from django.urls import include, path from graphene_django.views import GraphQLView @@ -7,4 +7,5 @@ urlpatterns = [ path("admin/", admin.site.urls), path("graphql/", GraphQLView.as_view(graphiql=True)), + path("wa/", include("hmt_cite_atlas.web_annotation.urls")), ] diff --git a/hmt_cite_atlas/web_annotation/__init__.py b/hmt_cite_atlas/web_annotation/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/hmt_cite_atlas/web_annotation/apps.py b/hmt_cite_atlas/web_annotation/apps.py new file mode 100644 index 0000000..9ebf6f2 --- /dev/null +++ b/hmt_cite_atlas/web_annotation/apps.py @@ -0,0 +1,5 @@ +from django.apps import AppConfig + + +class WebAnnotationConfig(AppConfig): + name = "web_annotation" diff --git a/hmt_cite_atlas/web_annotation/shims.py b/hmt_cite_atlas/web_annotation/shims.py new file mode 100644 index 0000000..a939570 --- /dev/null +++ b/hmt_cite_atlas/web_annotation/shims.py @@ -0,0 +1,63 @@ +from django.utils.functional import cached_property + +import requests + +from ..library.shortcuts import get_lines_for_folio + + +class AlignmentsShim: + """ + Shim to allow us to retrieve alignment data from explorehomer; + eventually, we'll likely want to write out bonding box info as standoff annotation + and ship to explorehomer directly. + """ + + GRAPHQL_ENDPOINT = "https://explorehomer-atlas-dev.herokuapp.com/graphql/" + + def __init__(self, folio_urn): + self.folio_urn = folio_urn + + @cached_property + def folio_lines(self): + return get_lines_for_folio(self.folio_urn) + + @cached_property + def line_urns(self): + return [l.urn for l in self.folio_lines] + + def get_ref(self): + first = self.line_urns[0].rsplit(":", maxsplit=1)[1] + last = self.line_urns[-1].rsplit(":", maxsplit=1)[1] + if first == last: + return first + return f"{first}-{last}" + + def get_alignment_data(self, idx=None, fields=None): + if fields is None: + fields = ["idx", "items", "citation"] + ref = self.get_ref() + # @@@ hardcoded version urn + # @@@ add the ability to get a count from an edge + predicate = f'version_Urn:"urn:cts:greekLit:tlg0012.tlg001.perseus-grc2" reference:"{ref}"' + if idx: + predicate = f"{predicate} idx: {idx}" + resp = requests.post( + self.GRAPHQL_ENDPOINT, + json={ + "query": """ + { + alignmentChunks(%s) { + edges { + node { + %s + } + } + } + }""" + % (predicate, "\n".join(fields)) + }, + ) + data = [] + for edge in resp.json()["data"]["alignmentChunks"]["edges"]: + data.append(edge["node"]) + return data diff --git a/hmt_cite_atlas/web_annotation/urls.py b/hmt_cite_atlas/web_annotation/urls.py new file mode 100644 index 0000000..bf894be --- /dev/null +++ b/hmt_cite_atlas/web_annotation/urls.py @@ -0,0 +1,26 @@ +from django.urls import path + +from .views import ( + serve_wa, + serve_web_annotation_collection, + serve_web_annotation_page +) + + +urlpatterns = [ + path( + "/translation-alignment/collection//", + serve_web_annotation_collection, + name="serve_web_annotation_collection", + ), + path( + "/translation-alignment/collection///", + serve_web_annotation_page, + name="serve_web_annotation_page", + ), + path( + "/translation-alignment///", + serve_wa, + name="serve_web_annotation", + ), +] diff --git a/hmt_cite_atlas/web_annotation/utils.py b/hmt_cite_atlas/web_annotation/utils.py new file mode 100644 index 0000000..0ae4a19 --- /dev/null +++ b/hmt_cite_atlas/web_annotation/utils.py @@ -0,0 +1,201 @@ +from django.db.models import Q +from django.utils.functional import cached_property + +from ..iiif import IIIFResolver +from ..library.models import CITEDatum + + +def as_zero_based(int_val): + """ + https://www.w3.org/TR/annotation-model/#model-35 + The relative position of the first Annotation in the items list, relative to the Annotation Collection. The first entry in the first page is considered to be entry 0. + Each Page should have exactly 1 startIndex, and must not have more than 1. The value must be an xsd:nonNegativeInteger. + + JHU seems to be using zero-based pagination too, so we're matching that. + """ + return int_val - 1 + + +def map_dimensions_to_integers(dimensions): + """ + FragmentSelector requires percentages expressed as integers. + + https://www.w3.org/TR/media-frags/#naming-space + """ + int_dimensions = {} + for k, v in dimensions.items(): + int_dimensions[k] = round(v) + return int_dimensions + + +class WebAnnotationGenerator: + def __init__(self, folio_urn, alignment): + self.urn = folio_urn + self.alignment = alignment + self.idx = alignment["idx"] + + @cached_property + def folio_image_urn(self): + datum = CITEDatum.objects.get(urn=self.urn) + return datum.fields["urn:cite2:hmt:msA.v1.image:"] + + @property + def greek_lines(self): + return self.alignment["items"][0] + + @property + def english_lines(self): + return self.alignment["items"][1] + + def as_text(self, lines): + return "\n".join([f"{l[0]}) {l[1]}" for l in lines]) + + def as_html(self, lines): + # @@@ this could be rendered via Django if we need fancier HTML + return "
    " + "".join([f"
  • {l[0]}) {l[1]}
  • " for l in lines]) + "
" + + @property + def alignment_urn(self): + # @@@ what if we have multiple alignments covering a single line? + # @@@ we can use the idx, but no too helpful downstream + version_urn = "urn:cts:greekLit:tlg0012.tlg001.perseus-grc2:" + return f'{version_urn}{self.alignment["citation"]}' + + def get_urn_coordinates(self, urns): + # @@@ support a single URN + predicate = Q() + for urn in urns: + predicate.add( + Q(fields__contains={"urn:cite2:hmt:va_dse.v1.passage:": urn}), Q.OR + ) + results = CITEDatum.objects.filter(predicate).order_by("pk") + coordinates = [] + for result in results: + if result.fields["urn:cite2:hmt:va_dse.v1.surface:"] != self.urn: + # @@@ validates that the URNs are found within the current folio + continue + + _, roi = result.fields["urn:cite2:hmt:va_dse.v1.imageroi:"].rsplit( + "@", maxsplit=1 + ) + coords = [float(part) for part in roi.split(",")] + coordinates.append(coords) + return coordinates + + def get_bounding_box_dimensions(self, coords): + dimensions = {} + y_coords = [] + for x, y, w, h in coords: + dimensions["x"] = min(dimensions.get("x", 100.0), x * 100) + dimensions["y"] = min(dimensions.get("y", 100.0), y * 100) + dimensions["w"] = max(dimensions.get("w", 0.0), w * 100) + y_coords.append(y * 100) + + dimensions["h"] = y_coords[-1] - y_coords[0] + h * 100 + return dimensions + + @cached_property + def common_obj(self): + cite_version_urn = "urn:cts:greekLit:tlg0012.tlg001.msA:" + urns = [] + # @@@ this is a giant hack, would be better to resolve the citation ref + for ref, _, _ in self.greek_lines: + urns.append(f"{cite_version_urn}{ref}") + urn_coordinates = self.get_urn_coordinates(urns) + precise_bb_dimensions = self.get_bounding_box_dimensions(urn_coordinates) + bb_dimensions = map_dimensions_to_integers(precise_bb_dimensions) + + dimensions_str = ",".join( + [ + str(bb_dimensions["x"]), + str(bb_dimensions["y"]), + str(bb_dimensions["w"]), + str(bb_dimensions["h"]), + ] + ) + fragment_selector_val = f"xywh=percent:{dimensions_str}" + + image_urn = self.folio_image_urn + iiif_obj = IIIFResolver(image_urn) + image_api_selector_region = iiif_obj.get_region_by_pct(bb_dimensions) + + return { + "@context": "http://www.w3.org/ns/anno.jsonld", + "type": "Annotation", + "target": [ + self.alignment_urn, + { + "type": "SpecificResource", + "source": {"id": f"{iiif_obj.canvas_url}", "type": "Canvas"}, + "selector": { + "type": "FragmentSelector", + "region": fragment_selector_val, + }, + }, + { + "type": "SpecificResource", + "source": {"id": f"{iiif_obj.identifier}", "type": "Image"}, + "selector": { + "type": "ImageApiSelector", + "region": image_api_selector_region, + }, + }, + iiif_obj.build_image_request_url(region=image_api_selector_region), + ], + } + + def get_textual_bodies(self, body_format): + bodies = [ + {"type": "TextualBody", "language": "el"}, + {"type": "TextualBody", "language": "en"}, + ] + if body_format == "text": + for body, lines in zip(bodies, [self.greek_lines, self.english_lines]): + body["format"] = "text/plain" + body["value"] = self.as_text(lines) + elif body_format == "html": + for body, lines in zip(bodies, [self.greek_lines, self.english_lines]): + body["format"] = "text/plain" + body["value"] = self.as_html(lines) + return bodies + + @property + def text_obj(self): + obj = { + "body": self.get_textual_bodies("text"), + "id": f"/wa/{self.urn}/translation-alignment/{self.idx}/text/", + } + obj.update(self.common_obj) + return obj + + @property + def html_obj(self): + obj = { + "body": self.get_textual_bodies("html"), + "id": f"/wa/{self.urn}/translation-alignment/{self.idx}/html/", + } + obj.update(self.common_obj) + return obj + + +class WebAnnotationCollectionGenerator: + def __init__(self, urn, alignments, format): + self.alignments = alignments + self.format = format + self.urn = urn + self.item_list = [] + + def append_to_item_list(self, data): + # strip @context key + data.pop("@context", None) + self.item_list.append(data) + + @property + def items(self): + for alignment in self.alignments: + wa = WebAnnotationGenerator(self.urn, alignment) + if self.format == "html": + self.append_to_item_list(wa.html_obj) + elif self.format == "text": + self.append_to_item_list(wa.text_obj) + return self.item_list diff --git a/hmt_cite_atlas/web_annotation/views.py b/hmt_cite_atlas/web_annotation/views.py new file mode 100644 index 0000000..6d22bc6 --- /dev/null +++ b/hmt_cite_atlas/web_annotation/views.py @@ -0,0 +1,99 @@ +from django.conf import settings +from django.core.paginator import EmptyPage, Paginator +from django.http import Http404, JsonResponse +from django.shortcuts import get_object_or_404 +from django.urls import reverse_lazy +from django.views.decorators.cache import cache_page + +from ..library.models import CITEDatum +from .shims import AlignmentsShim +from .utils import ( + WebAnnotationCollectionGenerator, + WebAnnotationGenerator, + as_zero_based +) + + +PAGE_SIZE = 10 + + +@cache_page(settings.DEFAULT_HTTP_CACHE_DURATION) +def serve_wa(request, urn, idx, format): + # @@@ query alignments from Postgres + alignment_by_idx = None + alignments = AlignmentsShim(urn).get_alignment_data() + for alignment in alignments: + if alignment["idx"] == idx: + alignment_by_idx = alignment + break + if not alignment_by_idx: + raise Http404 + + wa = WebAnnotationGenerator(urn, alignment) + if format == "text": + return JsonResponse(data=wa.text_obj) + elif format == "html": + return JsonResponse(data=wa.html_obj) + else: + raise Http404 + + +@cache_page(settings.DEFAULT_HTTP_CACHE_DURATION) +def serve_web_annotation_collection(request, urn, format): + get_object_or_404(CITEDatum, **{"urn": urn}) + # @@@ query alignments from Postgres + alignments = AlignmentsShim(urn).get_alignment_data(fields=["idx"]) + paginator = Paginator(alignments, per_page=PAGE_SIZE) + data = { + "@context": "http://www.w3.org/ns/anno.jsonld", + "id": reverse_lazy("serve_web_annotation_collection", args=[urn, format]), + "type": "AnnotationCollection", + "label": f"Translation Alignments for {urn}", + "total": paginator.count, + "first": reverse_lazy( + "serve_web_annotation_page", + args=[urn, format, as_zero_based(paginator.page_range[0])], + ), + "last": reverse_lazy( + "serve_web_annotation_page", + args=[urn, format, as_zero_based(paginator.page_range[-1])], + ), + } + return JsonResponse(data) + + +@cache_page(settings.DEFAULT_HTTP_CACHE_DURATION) +def serve_web_annotation_page(request, urn, format, zero_page_number): + get_object_or_404(CITEDatum, **{"urn": urn}) + + # @@@ query alignments from Postgres + alignments = AlignmentsShim(urn).get_alignment_data() + + page_number = zero_page_number + 1 + paginator = Paginator(alignments, per_page=PAGE_SIZE) + try: + page = paginator.page(page_number) + except EmptyPage: + raise Http404 + collection = WebAnnotationCollectionGenerator(urn, page.object_list, format) + data = { + "@context": "http://www.w3.org/ns/anno.jsonld", + "id": reverse_lazy( + "serve_web_annotation_page", args=[urn, format, as_zero_based(page_number)] + ), + "type": "AnnotationPage", + "partOf": reverse_lazy("serve_web_annotation_collection", args=[urn, format]), + "startIndex": as_zero_based(page.start_index()), + "items": collection.items, + } + if page.has_previous(): + data["prev"] = reverse_lazy( + "serve_web_annotation_page", + args=[urn, format, as_zero_based(page.previous_page_number())], + ) + if page.has_next(): + data["next"] = reverse_lazy( + "serve_web_annotation_page", + args=[urn, format, as_zero_based(page.next_page_number())], + ) + return JsonResponse(data) From f7a36ec6ba3fc45bbab577cb870f52d4f681de23 Mon Sep 17 00:00:00 2001 From: Jacob Wegner Date: Wed, 4 Dec 2019 17:21:33 -0600 Subject: [PATCH 04/10] sort and add requests --- requirements.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 7a987a5..b3d9f50 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,10 @@ case-conversion==2.1.0 -django-cors-headers==3.1.0 dj-database-url==0.5.0 -django==2.2.6 +django-cors-headers==3.1.0 django-filter==2.2.0 +django==2.2.6 graphene-django==2.5.0 gunicorn==19.9.0 psycopg2-binary==2.8.3 +requests==2.22.0 whitenoise==4.1.3 From 72ef6e3fad7890be7bed45be1b42132be4695bfd Mon Sep 17 00:00:00 2001 From: Jacob Wegner Date: Thu, 5 Dec 2019 09:55:49 -0600 Subject: [PATCH 05/10] prefer absolute URLs --- fixtures/sites.json | 4 ++-- hmt_cite_atlas/settings.py | 1 + hmt_cite_atlas/web_annotation/shortcuts.py | 11 +++++++++ hmt_cite_atlas/web_annotation/utils.py | 27 ++++++++++++++-------- hmt_cite_atlas/web_annotation/views.py | 26 ++++++++++++++------- 5 files changed, 49 insertions(+), 20 deletions(-) create mode 100644 hmt_cite_atlas/web_annotation/shortcuts.py diff --git a/fixtures/sites.json b/fixtures/sites.json index bda24f8..9f50d68 100644 --- a/fixtures/sites.json +++ b/fixtures/sites.json @@ -11,8 +11,8 @@ "pk": 2, "model": "sites.site", "fields": { - "domain": "example.com", - "name": "example.com" + "domain": "explorehomer-atlas-dev3.herokuapp.com", + "name": "Explore Homer ATLAS [dev3]" } } ] diff --git a/hmt_cite_atlas/settings.py b/hmt_cite_atlas/settings.py index 5d185e8..edf0da0 100644 --- a/hmt_cite_atlas/settings.py +++ b/hmt_cite_atlas/settings.py @@ -179,3 +179,4 @@ } DEFAULT_HTTP_CACHE_DURATION = 60 * 60 * 24 * 365 # one year +DEFAULT_HTTP_PROTOCOL = os.environ.get("DEFAULT_HTTP_PROTOCOL", "http") diff --git a/hmt_cite_atlas/web_annotation/shortcuts.py b/hmt_cite_atlas/web_annotation/shortcuts.py new file mode 100644 index 0000000..791700e --- /dev/null +++ b/hmt_cite_atlas/web_annotation/shortcuts.py @@ -0,0 +1,11 @@ +from django.conf import settings +from django.contrib.sites.models import Site + + +CURRENT_SITE = Site.objects.get_current() + + +def build_absolute_url(url): + return "{scheme}://{host}{url}".format( + scheme=settings.DEFAULT_HTTP_PROTOCOL, host=CURRENT_SITE.domain, url=url + ) diff --git a/hmt_cite_atlas/web_annotation/utils.py b/hmt_cite_atlas/web_annotation/utils.py index 0ae4a19..4d9f3eb 100644 --- a/hmt_cite_atlas/web_annotation/utils.py +++ b/hmt_cite_atlas/web_annotation/utils.py @@ -1,8 +1,10 @@ from django.db.models import Q +from django.urls import reverse_lazy from django.utils.functional import cached_property from ..iiif import IIIFResolver from ..library.models import CITEDatum +from .shortcuts import build_absolute_url def as_zero_based(int_val): @@ -159,23 +161,28 @@ def get_textual_bodies(self, body_format): body["value"] = self.as_html(lines) return bodies - @property - def text_obj(self): + def get_absolute_url(self, body_format): + url = reverse_lazy( + "serve_web_annotation", + kwargs={"urn": self.urn, "idx": self.idx, "format": body_format}, + ) + return build_absolute_url(url) + + def get_object_for_body_format(self, body_format): obj = { - "body": self.get_textual_bodies("text"), - "id": f"/wa/{self.urn}/translation-alignment/{self.idx}/text/", + "body": self.get_textual_bodies(body_format), + "id": self.get_absolute_url(body_format), } obj.update(self.common_obj) return obj + @property + def text_obj(self): + return self.get_object_for_body_format("text") + @property def html_obj(self): - obj = { - "body": self.get_textual_bodies("html"), - "id": f"/wa/{self.urn}/translation-alignment/{self.idx}/html/", - } - obj.update(self.common_obj) - return obj + return self.get_object_for_body_format("html") class WebAnnotationCollectionGenerator: diff --git a/hmt_cite_atlas/web_annotation/views.py b/hmt_cite_atlas/web_annotation/views.py index 6d22bc6..3540c8f 100644 --- a/hmt_cite_atlas/web_annotation/views.py +++ b/hmt_cite_atlas/web_annotation/views.py @@ -7,6 +7,7 @@ from ..library.models import CITEDatum from .shims import AlignmentsShim +from .shortcuts import build_absolute_url from .utils import ( WebAnnotationCollectionGenerator, WebAnnotationGenerator, @@ -44,12 +45,8 @@ def serve_web_annotation_collection(request, urn, format): # @@@ query alignments from Postgres alignments = AlignmentsShim(urn).get_alignment_data(fields=["idx"]) paginator = Paginator(alignments, per_page=PAGE_SIZE) - data = { - "@context": "http://www.w3.org/ns/anno.jsonld", + urls = { "id": reverse_lazy("serve_web_annotation_collection", args=[urn, format]), - "type": "AnnotationCollection", - "label": f"Translation Alignments for {urn}", - "total": paginator.count, "first": reverse_lazy( "serve_web_annotation_page", args=[urn, format, as_zero_based(paginator.page_range[0])], @@ -59,6 +56,15 @@ def serve_web_annotation_collection(request, urn, format): args=[urn, format, as_zero_based(paginator.page_range[-1])], ), } + data = { + "@context": "http://www.w3.org/ns/anno.jsonld", + "id": build_absolute_url(urls["id"]), + "type": "AnnotationCollection", + "label": f"Translation Alignments for {urn}", + "total": paginator.count, + "first": build_absolute_url(urls["first"]), + "last": build_absolute_url(urls["last"]), + } return JsonResponse(data) @@ -76,13 +82,17 @@ def serve_web_annotation_page(request, urn, format, zero_page_number): except EmptyPage: raise Http404 collection = WebAnnotationCollectionGenerator(urn, page.object_list, format) - data = { - "@context": "http://www.w3.org/ns/anno.jsonld", + urls = { "id": reverse_lazy( "serve_web_annotation_page", args=[urn, format, as_zero_based(page_number)] ), + "part_of": reverse_lazy("serve_web_annotation_collection", args=[urn, format]), + } + data = { + "@context": "http://www.w3.org/ns/anno.jsonld", + "id": build_absolute_url(urls["id"]), "type": "AnnotationPage", - "partOf": reverse_lazy("serve_web_annotation_collection", args=[urn, format]), + "partOf": build_absolute_url(urls["part_of"]), "startIndex": as_zero_based(page.start_index()), "items": collection.items, } From a2f7ba6b2b099bb52bf5f46efe360ddd861f7dac Mon Sep 17 00:00:00 2001 From: Jacob Wegner Date: Mon, 16 Mar 2020 10:02:17 -0500 Subject: [PATCH 06/10] override endpoint --- hmt_cite_atlas/web_annotation/shims.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hmt_cite_atlas/web_annotation/shims.py b/hmt_cite_atlas/web_annotation/shims.py index a939570..6778ab4 100644 --- a/hmt_cite_atlas/web_annotation/shims.py +++ b/hmt_cite_atlas/web_annotation/shims.py @@ -1,3 +1,5 @@ +import os + from django.utils.functional import cached_property import requests @@ -12,7 +14,7 @@ class AlignmentsShim: and ship to explorehomer directly. """ - GRAPHQL_ENDPOINT = "https://explorehomer-atlas-dev.herokuapp.com/graphql/" + GRAPHQL_ENDPOINT = os.environ.get("ATLAS_GRAPHQL_ENDPOINT", "https://explorehomer-feature-tr-vlxhmh.herokuapp.com/graphql/") def __init__(self, folio_urn): self.folio_urn = folio_urn From b8f252eed98b528699cf19f88725fd839d4af7da Mon Sep 17 00:00:00 2001 From: Jacob Wegner Date: Thu, 19 Mar 2020 12:20:33 -0500 Subject: [PATCH 07/10] update for new reference format --- hmt_cite_atlas/web_annotation/shims.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hmt_cite_atlas/web_annotation/shims.py b/hmt_cite_atlas/web_annotation/shims.py index 6778ab4..ff0c5ed 100644 --- a/hmt_cite_atlas/web_annotation/shims.py +++ b/hmt_cite_atlas/web_annotation/shims.py @@ -40,7 +40,8 @@ def get_alignment_data(self, idx=None, fields=None): ref = self.get_ref() # @@@ hardcoded version urn # @@@ add the ability to get a count from an edge - predicate = f'version_Urn:"urn:cts:greekLit:tlg0012.tlg001.perseus-grc2" reference:"{ref}"' + reference = f"urn:cts:greekLit:tlg0012.tlg001.perseus-grc2:{ref}" + predicate = f'reference:"{reference}"' if idx: predicate = f"{predicate} idx: {idx}" resp = requests.post( From ed683961985525a1aebcb53f08695d6e90b73b01 Mon Sep 17 00:00:00 2001 From: Jacob Wegner Date: Thu, 19 Mar 2020 12:24:40 -0500 Subject: [PATCH 08/10] update endpoint --- hmt_cite_atlas/web_annotation/shims.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmt_cite_atlas/web_annotation/shims.py b/hmt_cite_atlas/web_annotation/shims.py index ff0c5ed..3b8a464 100644 --- a/hmt_cite_atlas/web_annotation/shims.py +++ b/hmt_cite_atlas/web_annotation/shims.py @@ -14,7 +14,7 @@ class AlignmentsShim: and ship to explorehomer directly. """ - GRAPHQL_ENDPOINT = os.environ.get("ATLAS_GRAPHQL_ENDPOINT", "https://explorehomer-feature-tr-vlxhmh.herokuapp.com/graphql/") + GRAPHQL_ENDPOINT = os.environ.get("ATLAS_GRAPHQL_ENDPOINT", "https://explorehomer-atlas-dev.herokuapp.com/graphql/") def __init__(self, folio_urn): self.folio_urn = folio_urn From a008bc21095fd3346272108f057214cad6baaa33 Mon Sep 17 00:00:00 2001 From: Jacob Wegner Date: Fri, 3 Apr 2020 15:25:21 -0500 Subject: [PATCH 09/10] update to textAlignmentChunks --- hmt_cite_atlas/web_annotation/shims.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmt_cite_atlas/web_annotation/shims.py b/hmt_cite_atlas/web_annotation/shims.py index 3b8a464..2208ec2 100644 --- a/hmt_cite_atlas/web_annotation/shims.py +++ b/hmt_cite_atlas/web_annotation/shims.py @@ -49,7 +49,7 @@ def get_alignment_data(self, idx=None, fields=None): json={ "query": """ { - alignmentChunks(%s) { + textAlignmentChunks(%s) { edges { node { %s @@ -61,6 +61,6 @@ def get_alignment_data(self, idx=None, fields=None): }, ) data = [] - for edge in resp.json()["data"]["alignmentChunks"]["edges"]: + for edge in resp.json()["data"]["textAlignmentChunks"]["edges"]: data.append(edge["node"]) return data From c65c428ffcc08a250b43bef1003d9dd9ed8e6365 Mon Sep 17 00:00:00 2001 From: Jacob Wegner Date: Fri, 3 Apr 2020 15:28:44 -0500 Subject: [PATCH 10/10] prefer absolute URLs for next / prev --- hmt_cite_atlas/web_annotation/views.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/hmt_cite_atlas/web_annotation/views.py b/hmt_cite_atlas/web_annotation/views.py index 3540c8f..7b7c846 100644 --- a/hmt_cite_atlas/web_annotation/views.py +++ b/hmt_cite_atlas/web_annotation/views.py @@ -97,13 +97,15 @@ def serve_web_annotation_page(request, urn, format, zero_page_number): "items": collection.items, } if page.has_previous(): - data["prev"] = reverse_lazy( + prev_url = reverse_lazy( "serve_web_annotation_page", args=[urn, format, as_zero_based(page.previous_page_number())], ) + data["prev"] = build_absolute_url(prev_url)) if page.has_next(): - data["next"] = reverse_lazy( + next_url = reverse_lazy( "serve_web_annotation_page", args=[urn, format, as_zero_based(page.next_page_number())], ) + data["next"] = build_absolute_url(next_url)) return JsonResponse(data)