Skip to content

Commit

Permalink
Merge pull request #35 from scaife-viewer/feature/image-annotations
Browse files Browse the repository at this point in the history
Merge #30 into develop
  • Loading branch information
jacobwegner authored Apr 23, 2020
2 parents 5709c57 + 65d85d2 commit c927364
Show file tree
Hide file tree
Showing 15 changed files with 611 additions and 67 deletions.
1 change: 1 addition & 0 deletions bin/post_compile
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
#!/bin/bash
python manage.py prepare_db
python manage.py loaddata fixtures/sites.json
8 changes: 8 additions & 0 deletions fixtures/sites.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,13 @@
"domain": "example.com",
"name": "example.com"
}
},
{
"pk": 3,
"model": "sites.site",
"fields": {
"domain": "aniop-atlas-staging.eldarion.com",
"name": "AniOp ATLAS [staging]"
}
}
]
70 changes: 70 additions & 0 deletions readhomer_atlas/iiif.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
from posixpath import join as urljoin
from urllib.parse import quote_plus, unquote


class IIIFResolver:
BASE_URL = "https://image.library.jhu.edu/iiif/"
# @@@ figure out what this actually is in IIIF spec terms
CANVAS_BASE_URL = "https://rosetest.library.jhu.edu/rosademo/iiif3/"
COLLETION_SUBDIR = "homer/VA"
iruri_kwargs = {
"region": "full",
"size": "full",
"rotation": "0",
"quality": "default",
"format": "jpg",
}

def __init__(self, urn):
"""
IIIFResolver("urn:cite2:hmt:vaimg.2017a:VA012VN_0514")
"""
self.urn = urn

@property
def munged_image_path(self):
image_part = self.urn.rsplit(":", maxsplit=1).pop()
return image_part.replace("_", "-")

@property
def iiif_image_id(self):
path = urljoin(self.COLLETION_SUBDIR, self.munged_image_path)
return quote_plus(path)

@property
def identifier(self):
return urljoin(self.BASE_URL, self.iiif_image_id)

@property
def info_url(self):
info_path = "image.json"
return urljoin(self.identifier, info_path)

def build_image_request_url(self, **kwargs):
iruri_kwargs = {}
iruri_kwargs.update(self.iruri_kwargs)
iruri_kwargs.update(**kwargs)
return urljoin(
self.identifier,
"{region}/{size}/{rotation}/{quality}.{format}".format(**iruri_kwargs),
)

@property
def image_url(self):
return self.build_image_request_url()

@property
def canvas_url(self):
path = unquote(self.iiif_image_id)
return urljoin(self.CANVAS_BASE_URL, path, "canvas")

def get_region_by_pct(self, dimensions):
percentages = ",".join(
[
f'{dimensions["x"]:.2f}',
f'{dimensions["y"]:.2f}',
f'{dimensions["w"]:.2f}',
f'{dimensions["h"]:.2f}',
]
)
return f"pct:{percentages}"
2 changes: 1 addition & 1 deletion readhomer_atlas/library/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def resolve_references(self):

if delta_urns:
print(
f'Could not resolve all references, probably to bad data in the CEX file [urn="{self.urn}" unresolved_urns="{",".join(delta_urns)}"]'
f'Could not resolve all references, probably due to bad data in the CEX file [urn="{self.urn}" unresolved_urns="{",".join(delta_urns)}"]'
)
self.text_parts.set(reference_objs)

Expand Down
73 changes: 9 additions & 64 deletions readhomer_atlas/library/schema.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from django.db.models import Max, Min, Q
from django.db.models import Q

import django_filters
from graphene import Connection, Field, ObjectType, String, relay
Expand All @@ -18,31 +18,18 @@
Token,
)
from .urn import URN
from .utils import get_chunker
from .utils import (
extract_version_urn_and_ref,
filter_via_ref_predicate,
get_chunker,
get_textparts_from_passage_reference,
)


# @@@ alias Node because relay.Node is quite different
TextPart = Node


def extract_version_urn_and_ref(value):
dirty_version_urn, ref = value.rsplit(":", maxsplit=1)
# Restore the trailing ":".
version_urn = f"{dirty_version_urn}:"
return version_urn, ref


def filter_via_ref_predicate(instance, queryset, predicate):
# We need a sequential identifier to do the range unless there is something
# else we can do with siblings / slicing within treebeard. Using `path`
# might work too, but having `idx` also allows us to do simple integer math
# as-needed.
if queryset.exists():
subquery = queryset.filter(predicate).aggregate(min=Min("idx"), max=Max("idx"))
queryset = queryset.filter(idx__gte=subquery["min"], idx__lte=subquery["max"])
return queryset


class LimitedConnectionField(DjangoFilterConnectionField):
"""
Ensures that queries without `first` or `last` return up to
Expand Down Expand Up @@ -191,7 +178,7 @@ def reference_filter(self, queryset, name, value):
urn__startswith=version_urn,
depth=len(start.split(".")) + 1,
)
return filter_via_ref_predicate(self, queryset, predicate)
return filter_via_ref_predicate(queryset, predicate)

class Meta:
model = TextPart
Expand Down Expand Up @@ -219,52 +206,10 @@ def _add_passage_to_context(self, reference):

self.request.passage["version"] = version

def _build_predicate(self, queryset, ref, max_rank):
predicate = Q()
if not ref:
# @@@ get all the text parts in the work; do we want to support this
# or should we just return the first text part?
start = queryset.first().ref
end = queryset.last().ref
else:
try:
start, end = ref.split("-")
except ValueError:
start = end = ref

# @@@ still need to validate reference based on the depth
# start_book, start_line = instance._resolve_ref(start)
# end_book, end_line = instance._resolve_ref(end)
# the validation might be done through treebeard; for now
# going to avoid the queries at this time
if start:
if len(start.split(".")) == max_rank:
condition = Q(ref=start)
else:
condition = Q(ref__istartswith=f"{start}.")
predicate.add(condition, Q.OR)
if end:
if len(end.split(".")) == max_rank:
condition = Q(ref=end)
else:
condition = Q(ref__istartswith=f"{end}.")
predicate.add(condition, Q.OR)
if not start or not end:
raise ValueError(f"Invalid reference: {ref}")

return predicate

def get_lowest_textparts_queryset(self, value):
self._add_passage_to_context(value)
version = self.request.passage["version"]
citation_scheme = version.metadata["citation_scheme"]
max_depth = version.get_descendants().last().depth

max_rank = len(citation_scheme)
queryset = version.get_descendants().filter(depth=max_depth)
_, ref = value.rsplit(":", maxsplit=1)
predicate = self._build_predicate(queryset, ref, max_rank)
return filter_via_ref_predicate(self, queryset, predicate)
return get_textparts_from_passage_reference(value, version=version)


class PassageTextPartFilterSet(TextPartsReferenceFilterMixin, django_filters.FilterSet):
Expand Down
67 changes: 66 additions & 1 deletion readhomer_atlas/library/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from django.conf import settings
from django.db.models import Max, Min
from django.db.models import Max, Min, Q
from django.utils.functional import cached_property


Expand Down Expand Up @@ -88,3 +88,68 @@ def get_chunker(queryset, start_idx, chunk_length, **kwargs):
if chunk_length < settings.ATLAS_CONFIG["IN_MEMORY_PASSAGE_CHUNK_MAX"]:
return InMemorySiblingChunker(queryset, start_idx, chunk_length, **kwargs)
return SQLSiblingChunker(queryset, start_idx, chunk_length, **kwargs)


def extract_version_urn_and_ref(value):
dirty_version_urn, ref = value.rsplit(":", maxsplit=1)
# Restore the trailing ":".
version_urn = f"{dirty_version_urn}:"
return version_urn, ref


def build_textpart_predicate(queryset, ref, max_rank):
predicate = Q()
if not ref:
# @@@ get all the text parts in the work; do we want to support this
# or should we just return the first text part?
start = queryset.first().ref
end = queryset.last().ref
else:
try:
start, end = ref.split("-")
except ValueError:
start = end = ref

# @@@ still need to validate reference based on the depth
# start_book, start_line = instance._resolve_ref(start)
# end_book, end_line = instance._resolve_ref(end)
# the validation might be done through treebeard; for now
# going to avoid the queries at this time
if start:
if len(start.split(".")) == max_rank:
condition = Q(ref=start)
else:
condition = Q(ref__istartswith=f"{start}.")
predicate.add(condition, Q.OR)
if end:
if len(end.split(".")) == max_rank:
condition = Q(ref=end)
else:
condition = Q(ref__istartswith=f"{end}.")
predicate.add(condition, Q.OR)
if not start or not end:
raise ValueError(f"Invalid reference: {ref}")

return predicate


def filter_via_ref_predicate(queryset, predicate):
# We need a sequential identifier to do the range unless there is something
# else we can do with siblings / slicing within treebeard. Using `path`
# might work too, but having `idx` also allows us to do simple integer math
# as-needed.
if queryset.exists():
subquery = queryset.filter(predicate).aggregate(min=Min("idx"), max=Max("idx"))
queryset = queryset.filter(idx__gte=subquery["min"], idx__lte=subquery["max"])
return queryset


def get_textparts_from_passage_reference(passage_reference, version):
citation_scheme = version.metadata["citation_scheme"]
max_depth = version.get_descendants().last().depth

max_rank = len(citation_scheme)
queryset = version.get_descendants().filter(depth=max_depth)
_, ref = passage_reference.rsplit(":", maxsplit=1)
predicate = build_textpart_predicate(queryset, ref, max_rank)
return filter_via_ref_predicate(queryset, predicate)
5 changes: 5 additions & 0 deletions readhomer_atlas/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@
"readhomer_atlas",
"readhomer_atlas.library",
"readhomer_atlas.tocs",
"readhomer_atlas.web_annotation",
]

ADMIN_URL = "admin:index"
Expand Down Expand Up @@ -183,3 +184,7 @@
)

NODE_ALPHABET = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"

# @@@ review
DEFAULT_HTTP_CACHE_DURATION = 60 * 60 * 24 * 365 # one year
DEFAULT_HTTP_PROTOCOL = os.environ.get("DEFAULT_HTTP_PROTOCOL", "http")
3 changes: 2 additions & 1 deletion readhomer_atlas/urls.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from django.urls import path
from django.urls import include, path
from django.views.decorators.csrf import csrf_exempt

from django.contrib import admin
Expand All @@ -13,4 +13,5 @@
path("graphql/", csrf_exempt(GraphQLView.as_view(graphiql=True))),
path("tocs/<filename>", serve_toc, name="serve_toc"),
path("tocs/", tocs_index, name="tocs_index"),
path("wa/", include("readhomer_atlas.web_annotation.urls")),
]
Empty file.
5 changes: 5 additions & 0 deletions readhomer_atlas/web_annotation/apps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from django.apps import AppConfig


class WebAnnotationConfig(AppConfig):
name = "web_annotation"
61 changes: 61 additions & 0 deletions readhomer_atlas/web_annotation/shims.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
from django.db.models import Q
from django.utils.functional import cached_property

from ..library.models import Node, TextAlignmentChunk
from ..library.utils import (
extract_version_urn_and_ref,
get_textparts_from_passage_reference,
)
from .utils import preferred_folio_urn


class AlignmentsShim:
"""
Shim to allow us to retrieve alignment data indirectly from the database
eventually, we'll likely want to write out bonding box info as standoff annotation
and ship to explorehomer directly.
"""

def __init__(self, folio_urn):
self.folio_urn = preferred_folio_urn(folio_urn)

@cached_property
def folio_lines(self):
return Node.objects.filter(urn__startswith=self.folio_urn).filter(kind="line")

@cached_property
def line_urns(self):
return [l.urn for l in self.folio_lines]

def get_ref(self):
first = self.line_urns[0].rsplit(":", maxsplit=1)[1]
last = self.line_urns[-1].rsplit(":", maxsplit=1)[1]
# @@@ strip folios
first = first.split(".", maxsplit=1)[1]
last = last.split(".", maxsplit=1)[1]
if first == last:
return first
return f"{first}-{last}"

def get_alignment_data(self, idx=None, fields=None):
if fields is None:
fields = ["idx", "items", "citation"]

ref = self.get_ref()
version_urn = "urn:cts:greekLit:tlg0012.tlg001.perseus-grc2:"
passage_reference = f"{version_urn}{ref}"

# @@@ add as a Node manager method
version_urn, ref = extract_version_urn_and_ref(passage_reference)
try:
version = Node.objects.get(urn=version_urn)
except Node.DoesNotExist:
raise Exception(f"{version_urn} was not found.")

textparts_queryset = get_textparts_from_passage_reference(
passage_reference, version
)
alignments = TextAlignmentChunk.objects.filter(
Q(start__in=textparts_queryset) | Q(end__in=textparts_queryset)
).values(*fields)
return list(alignments)
12 changes: 12 additions & 0 deletions readhomer_atlas/web_annotation/shortcuts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from django.conf import settings

from django.contrib.sites.models import Site


def build_absolute_url(url):
# get_current should cache:
# https://docs.djangoproject.com/en/2.2/ref/contrib/sites/#caching-the-current-site-object
current_site = Site.objects.get_current()
return "{scheme}://{host}{url}".format(
scheme=settings.DEFAULT_HTTP_PROTOCOL, host=current_site.domain, url=url
)
Loading

0 comments on commit c927364

Please sign in to comment.