Merge pull request #35 from scaife-viewer/feature/image-annotations

Merge #30 into develop
scaife-viewer · Apr 23, 2020 · c927364 · c927364
2 parents 5709c57 + 65d85d2
commit c927364
Show file tree

Hide file tree

Showing 15 changed files with 611 additions and 67 deletions.
diff --git a/bin/post_compile b/bin/post_compile
@@ -1,2 +1,3 @@
 #!/bin/bash
 python manage.py prepare_db
+python manage.py loaddata fixtures/sites.json
diff --git a/fixtures/sites.json b/fixtures/sites.json
@@ -14,5 +14,13 @@
             "domain": "example.com",
             "name": "example.com"
         }
+    },
+    {
+        "pk": 3,
+        "model": "sites.site",
+        "fields": {
+            "domain": "aniop-atlas-staging.eldarion.com",
+            "name": "AniOp ATLAS [staging]"
+        }
     }
 ]
diff --git a/readhomer_atlas/iiif.py b/readhomer_atlas/iiif.py
@@ -0,0 +1,70 @@
+from posixpath import join as urljoin
+from urllib.parse import quote_plus, unquote
+
+
+class IIIFResolver:
+    BASE_URL = "https://image.library.jhu.edu/iiif/"
+    # @@@ figure out what this actually is in IIIF spec terms
+    CANVAS_BASE_URL = "https://rosetest.library.jhu.edu/rosademo/iiif3/"
+    COLLETION_SUBDIR = "homer/VA"
+    iruri_kwargs = {
+        "region": "full",
+        "size": "full",
+        "rotation": "0",
+        "quality": "default",
+        "format": "jpg",
+    }
+
+    def __init__(self, urn):
+        """
+        IIIFResolver("urn:cite2:hmt:vaimg.2017a:VA012VN_0514")
+        """
+        self.urn = urn
+
+    @property
+    def munged_image_path(self):
+        image_part = self.urn.rsplit(":", maxsplit=1).pop()
+        return image_part.replace("_", "-")
+
+    @property
+    def iiif_image_id(self):
+        path = urljoin(self.COLLETION_SUBDIR, self.munged_image_path)
+        return quote_plus(path)
+
+    @property
+    def identifier(self):
+        return urljoin(self.BASE_URL, self.iiif_image_id)
+
+    @property
+    def info_url(self):
+        info_path = "image.json"
+        return urljoin(self.identifier, info_path)
+
+    def build_image_request_url(self, **kwargs):
+        iruri_kwargs = {}
+        iruri_kwargs.update(self.iruri_kwargs)
+        iruri_kwargs.update(**kwargs)
+        return urljoin(
+            self.identifier,
+            "{region}/{size}/{rotation}/{quality}.{format}".format(**iruri_kwargs),
+        )
+
+    @property
+    def image_url(self):
+        return self.build_image_request_url()
+
+    @property
+    def canvas_url(self):
+        path = unquote(self.iiif_image_id)
+        return urljoin(self.CANVAS_BASE_URL, path, "canvas")
+
+    def get_region_by_pct(self, dimensions):
+        percentages = ",".join(
+            [
+                f'{dimensions["x"]:.2f}',
+                f'{dimensions["y"]:.2f}',
+                f'{dimensions["w"]:.2f}',
+                f'{dimensions["h"]:.2f}',
+            ]
+        )
+        return f"pct:{percentages}"
diff --git a/readhomer_atlas/library/models.py b/readhomer_atlas/library/models.py
@@ -95,7 +95,7 @@ def resolve_references(self):
 
         if delta_urns:
             print(
-                f'Could not resolve all references, probably to bad data in the CEX file [urn="{self.urn}" unresolved_urns="{",".join(delta_urns)}"]'
+                f'Could not resolve all references, probably due to bad data in the CEX file [urn="{self.urn}" unresolved_urns="{",".join(delta_urns)}"]'
             )
         self.text_parts.set(reference_objs)
 

diff --git a/readhomer_atlas/library/schema.py b/readhomer_atlas/library/schema.py
@@ -1,4 +1,4 @@
-from django.db.models import Max, Min, Q
+from django.db.models import Q
 
 import django_filters
 from graphene import Connection, Field, ObjectType, String, relay
@@ -18,31 +18,18 @@
     Token,
 )
 from .urn import URN
-from .utils import get_chunker
+from .utils import (
+    extract_version_urn_and_ref,
+    filter_via_ref_predicate,
+    get_chunker,
+    get_textparts_from_passage_reference,
+)
 
 
 # @@@ alias Node because relay.Node is quite different
 TextPart = Node
 
 
-def extract_version_urn_and_ref(value):
-    dirty_version_urn, ref = value.rsplit(":", maxsplit=1)
-    # Restore the trailing ":".
-    version_urn = f"{dirty_version_urn}:"
-    return version_urn, ref
-
-
-def filter_via_ref_predicate(instance, queryset, predicate):
-    # We need a sequential identifier to do the range unless there is something
-    # else we can do with siblings / slicing within treebeard. Using `path`
-    # might work too, but having `idx` also allows us to do simple integer math
-    # as-needed.
-    if queryset.exists():
-        subquery = queryset.filter(predicate).aggregate(min=Min("idx"), max=Max("idx"))
-        queryset = queryset.filter(idx__gte=subquery["min"], idx__lte=subquery["max"])
-    return queryset
-
-
 class LimitedConnectionField(DjangoFilterConnectionField):
     """
     Ensures that queries without `first` or `last` return up to
@@ -191,7 +178,7 @@ def reference_filter(self, queryset, name, value):
             urn__startswith=version_urn,
             depth=len(start.split(".")) + 1,
         )
-        return filter_via_ref_predicate(self, queryset, predicate)
+        return filter_via_ref_predicate(queryset, predicate)
 
     class Meta:
         model = TextPart
@@ -219,52 +206,10 @@ def _add_passage_to_context(self, reference):
 
         self.request.passage["version"] = version
 
-    def _build_predicate(self, queryset, ref, max_rank):
-        predicate = Q()
-        if not ref:
-            # @@@ get all the text parts in the work; do we want to support this
-            # or should we just return the first text part?
-            start = queryset.first().ref
-            end = queryset.last().ref
-        else:
-            try:
-                start, end = ref.split("-")
-            except ValueError:
-                start = end = ref
-
-        # @@@ still need to validate reference based on the depth
-        # start_book, start_line = instance._resolve_ref(start)
-        # end_book, end_line = instance._resolve_ref(end)
-        # the validation might be done through treebeard; for now
-        # going to avoid the queries at this time
-        if start:
-            if len(start.split(".")) == max_rank:
-                condition = Q(ref=start)
-            else:
-                condition = Q(ref__istartswith=f"{start}.")
-            predicate.add(condition, Q.OR)
-        if end:
-            if len(end.split(".")) == max_rank:
-                condition = Q(ref=end)
-            else:
-                condition = Q(ref__istartswith=f"{end}.")
-            predicate.add(condition, Q.OR)
-        if not start or not end:
-            raise ValueError(f"Invalid reference: {ref}")
-
-        return predicate
-
     def get_lowest_textparts_queryset(self, value):
         self._add_passage_to_context(value)
         version = self.request.passage["version"]
-        citation_scheme = version.metadata["citation_scheme"]
-        max_depth = version.get_descendants().last().depth
-
-        max_rank = len(citation_scheme)
-        queryset = version.get_descendants().filter(depth=max_depth)
-        _, ref = value.rsplit(":", maxsplit=1)
-        predicate = self._build_predicate(queryset, ref, max_rank)
-        return filter_via_ref_predicate(self, queryset, predicate)
+        return get_textparts_from_passage_reference(value, version=version)
 
 
 class PassageTextPartFilterSet(TextPartsReferenceFilterMixin, django_filters.FilterSet):

diff --git a/readhomer_atlas/library/utils.py b/readhomer_atlas/library/utils.py
@@ -1,5 +1,5 @@
 from django.conf import settings
-from django.db.models import Max, Min
+from django.db.models import Max, Min, Q
 from django.utils.functional import cached_property
 
 
@@ -88,3 +88,68 @@ def get_chunker(queryset, start_idx, chunk_length, **kwargs):
     if chunk_length < settings.ATLAS_CONFIG["IN_MEMORY_PASSAGE_CHUNK_MAX"]:
         return InMemorySiblingChunker(queryset, start_idx, chunk_length, **kwargs)
     return SQLSiblingChunker(queryset, start_idx, chunk_length, **kwargs)
+
+
+def extract_version_urn_and_ref(value):
+    dirty_version_urn, ref = value.rsplit(":", maxsplit=1)
+    # Restore the trailing ":".
+    version_urn = f"{dirty_version_urn}:"
+    return version_urn, ref
+
+
+def build_textpart_predicate(queryset, ref, max_rank):
+    predicate = Q()
+    if not ref:
+        # @@@ get all the text parts in the work; do we want to support this
+        # or should we just return the first text part?
+        start = queryset.first().ref
+        end = queryset.last().ref
+    else:
+        try:
+            start, end = ref.split("-")
+        except ValueError:
+            start = end = ref
+
+    # @@@ still need to validate reference based on the depth
+    # start_book, start_line = instance._resolve_ref(start)
+    # end_book, end_line = instance._resolve_ref(end)
+    # the validation might be done through treebeard; for now
+    # going to avoid the queries at this time
+    if start:
+        if len(start.split(".")) == max_rank:
+            condition = Q(ref=start)
+        else:
+            condition = Q(ref__istartswith=f"{start}.")
+        predicate.add(condition, Q.OR)
+    if end:
+        if len(end.split(".")) == max_rank:
+            condition = Q(ref=end)
+        else:
+            condition = Q(ref__istartswith=f"{end}.")
+        predicate.add(condition, Q.OR)
+    if not start or not end:
+        raise ValueError(f"Invalid reference: {ref}")
+
+    return predicate
+
+
+def filter_via_ref_predicate(queryset, predicate):
+    # We need a sequential identifier to do the range unless there is something
+    # else we can do with siblings / slicing within treebeard. Using `path`
+    # might work too, but having `idx` also allows us to do simple integer math
+    # as-needed.
+    if queryset.exists():
+        subquery = queryset.filter(predicate).aggregate(min=Min("idx"), max=Max("idx"))
+        queryset = queryset.filter(idx__gte=subquery["min"], idx__lte=subquery["max"])
+    return queryset
+
+
+def get_textparts_from_passage_reference(passage_reference, version):
+    citation_scheme = version.metadata["citation_scheme"]
+    max_depth = version.get_descendants().last().depth
+
+    max_rank = len(citation_scheme)
+    queryset = version.get_descendants().filter(depth=max_depth)
+    _, ref = passage_reference.rsplit(":", maxsplit=1)
+    predicate = build_textpart_predicate(queryset, ref, max_rank)
+    return filter_via_ref_predicate(queryset, predicate)
diff --git a/readhomer_atlas/settings.py b/readhomer_atlas/settings.py
@@ -133,6 +133,7 @@
     "readhomer_atlas",
     "readhomer_atlas.library",
     "readhomer_atlas.tocs",
+    "readhomer_atlas.web_annotation",
 ]
 
 ADMIN_URL = "admin:index"
@@ -183,3 +184,7 @@
 )
 
 NODE_ALPHABET = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+
+# @@@ review
+DEFAULT_HTTP_CACHE_DURATION = 60 * 60 * 24 * 365  # one year
+DEFAULT_HTTP_PROTOCOL = os.environ.get("DEFAULT_HTTP_PROTOCOL", "http")
diff --git a/readhomer_atlas/urls.py b/readhomer_atlas/urls.py
@@ -1,4 +1,4 @@
-from django.urls import path
+from django.urls import include, path
 from django.views.decorators.csrf import csrf_exempt
 
 from django.contrib import admin
@@ -13,4 +13,5 @@
     path("graphql/", csrf_exempt(GraphQLView.as_view(graphiql=True))),
     path("tocs/<filename>", serve_toc, name="serve_toc"),
     path("tocs/", tocs_index, name="tocs_index"),
+    path("wa/", include("readhomer_atlas.web_annotation.urls")),
 ]
diff --git a/readhomer_atlas/web_annotation/__init__.py b/readhomer_atlas/web_annotation/__init__.py
diff --git a/readhomer_atlas/web_annotation/apps.py b/readhomer_atlas/web_annotation/apps.py
@@ -0,0 +1,5 @@
+from django.apps import AppConfig
+
+
+class WebAnnotationConfig(AppConfig):
+    name = "web_annotation"
diff --git a/readhomer_atlas/web_annotation/shims.py b/readhomer_atlas/web_annotation/shims.py
@@ -0,0 +1,61 @@
+from django.db.models import Q
+from django.utils.functional import cached_property
+
+from ..library.models import Node, TextAlignmentChunk
+from ..library.utils import (
+    extract_version_urn_and_ref,
+    get_textparts_from_passage_reference,
+)
+from .utils import preferred_folio_urn
+
+
+class AlignmentsShim:
+    """
+    Shim to allow us to retrieve alignment data indirectly from the database
+    eventually, we'll likely want to write out bonding box info as standoff annotation
+    and ship to explorehomer directly.
+    """
+
+    def __init__(self, folio_urn):
+        self.folio_urn = preferred_folio_urn(folio_urn)
+
+    @cached_property
+    def folio_lines(self):
+        return Node.objects.filter(urn__startswith=self.folio_urn).filter(kind="line")
+
+    @cached_property
+    def line_urns(self):
+        return [l.urn for l in self.folio_lines]
+
+    def get_ref(self):
+        first = self.line_urns[0].rsplit(":", maxsplit=1)[1]
+        last = self.line_urns[-1].rsplit(":", maxsplit=1)[1]
+        # @@@ strip folios
+        first = first.split(".", maxsplit=1)[1]
+        last = last.split(".", maxsplit=1)[1]
+        if first == last:
+            return first
+        return f"{first}-{last}"
+
+    def get_alignment_data(self, idx=None, fields=None):
+        if fields is None:
+            fields = ["idx", "items", "citation"]
+
+        ref = self.get_ref()
+        version_urn = "urn:cts:greekLit:tlg0012.tlg001.perseus-grc2:"
+        passage_reference = f"{version_urn}{ref}"
+
+        # @@@ add as a Node manager method
+        version_urn, ref = extract_version_urn_and_ref(passage_reference)
+        try:
+            version = Node.objects.get(urn=version_urn)
+        except Node.DoesNotExist:
+            raise Exception(f"{version_urn} was not found.")
+
+        textparts_queryset = get_textparts_from_passage_reference(
+            passage_reference, version
+        )
+        alignments = TextAlignmentChunk.objects.filter(
+            Q(start__in=textparts_queryset) | Q(end__in=textparts_queryset)
+        ).values(*fields)
+        return list(alignments)
diff --git a/readhomer_atlas/web_annotation/shortcuts.py b/readhomer_atlas/web_annotation/shortcuts.py
@@ -0,0 +1,12 @@
+from django.conf import settings
+
+from django.contrib.sites.models import Site
+
+
+def build_absolute_url(url):
+    # get_current should cache:
+    # https://docs.djangoproject.com/en/2.2/ref/contrib/sites/#caching-the-current-site-object
+    current_site = Site.objects.get_current()
+    return "{scheme}://{host}{url}".format(
+        scheme=settings.DEFAULT_HTTP_PROTOCOL, host=current_site.domain, url=url
+    )