Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

For discussion only: translation alignment web annotations #2

Closed
wants to merge 10 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions fixtures/sites.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
"pk": 2,
"model": "sites.site",
"fields": {
"domain": "example.com",
"name": "example.com"
"domain": "explorehomer-atlas-dev3.herokuapp.com",
"name": "Explore Homer ATLAS [dev3]"
}
}
]
70 changes: 70 additions & 0 deletions hmt_cite_atlas/iiif.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
from posixpath import join as urljoin
from urllib.parse import quote_plus, unquote


class IIIFResolver:
BASE_URL = "https://image.library.jhu.edu/iiif/"
# @@@ figure out what this actually is in IIIF spec terms
CANVAS_BASE_URL = "https://rosetest.library.jhu.edu/rosademo/iiif3/"
COLLETION_SUBDIR = "homer/VA"
iruri_kwargs = {
"region": "full",
"size": "full",
"rotation": "0",
"quality": "default",
"format": "jpg",
}

def __init__(self, urn):
"""
IIIFResolver("urn:cite2:hmt:vaimg.2017a:VA012VN_0514")
"""
self.urn = urn

@property
def munged_image_path(self):
image_part = self.urn.rsplit(":", maxsplit=1).pop()
return image_part.replace("_", "-")

@property
def iiif_image_id(self):
path = urljoin(self.COLLETION_SUBDIR, self.munged_image_path)
return quote_plus(path)

@property
def identifier(self):
return urljoin(self.BASE_URL, self.iiif_image_id)

@property
def info_url(self):
info_path = "image.json"
return urljoin(self.identifier, info_path)

def build_image_request_url(self, **kwargs):
iruri_kwargs = {}
iruri_kwargs.update(self.iruri_kwargs)
iruri_kwargs.update(**kwargs)
return urljoin(
self.identifier,
"{region}/{size}/{rotation}/{quality}.{format}".format(**iruri_kwargs),
)

@property
def image_url(self):
return self.build_image_request_url()

@property
def canvas_url(self):
path = unquote(self.iiif_image_id)
return urljoin(self.CANVAS_BASE_URL, path, "canvas")

def get_region_by_pct(self, dimensions):
percentages = ",".join(
[
f'{dimensions["x"]:.2f}',
f'{dimensions["y"]:.2f}',
f'{dimensions["w"]:.2f}',
f'{dimensions["h"]:.2f}',
]
)
return f"pct:{percentages}"
32 changes: 32 additions & 0 deletions hmt_cite_atlas/library/shortcuts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from .models import CITEDatum, CTSCatalog, CTSDatum


CITATION_SCHEME_SCHOLION = "scholion"


def get_lines_for_folio(folio_urn):
"""
get_lines_for_folio("urn:cite2:hmt:msA.v1:12r")
"""
try:
folio = CITEDatum.objects.get(urn=folio_urn)
except CITEDatum.DoesNotExist as e:
print(f'Could not resolve folio [urn="{folio_urn}""]')
raise e

# @@@ this will break in SQLite for the time being, but not in the future:
# https://code.djangoproject.com/ticket/12990
folio_cite_datum = CITEDatum.objects.filter(
fields__contains={"urn:cite2:hmt:va_dse.v1.surface:": folio.urn}
)

catalog_obj = CTSCatalog.objects.exclude(
citation_scheme__contains=[CITATION_SCHEME_SCHOLION]
).get()
book_line_urn = catalog_obj.urn

# @@@ might be a way we can do some db-level "icontains" against `urn:cite2:hmt:va_dse.v1.passage:`
line_cite_datum = folio_cite_datum.filter(fields__icontains=book_line_urn)
# @@@ might be a way we can use a subquery against the values in `urn:cite2:hmt:va_dse.v1.passage:`
line_urns = [l.fields["urn:cite2:hmt:va_dse.v1.passage:"] for l in line_cite_datum]
return CTSDatum.objects.filter(urn__in=line_urns)
4 changes: 4 additions & 0 deletions hmt_cite_atlas/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@
# Project
"hmt_cite_atlas",
"hmt_cite_atlas.library",
"hmt_cite_atlas.web_annotation",
]

if DEBUG:
Expand Down Expand Up @@ -176,3 +177,6 @@
"SCHEMA": "hmt_cite_atlas.schema.schema",
"RELAY_CONNECTION_MAX_LIMIT": None,
}

DEFAULT_HTTP_CACHE_DURATION = 60 * 60 * 24 * 365 # one year
DEFAULT_HTTP_PROTOCOL = os.environ.get("DEFAULT_HTTP_PROTOCOL", "http")
3 changes: 2 additions & 1 deletion hmt_cite_atlas/urls.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
from django.contrib import admin
from django.urls import path
from django.urls import include, path

from graphene_django.views import GraphQLView


urlpatterns = [
path("admin/", admin.site.urls),
path("graphql/", GraphQLView.as_view(graphiql=True)),
path("wa/", include("hmt_cite_atlas.web_annotation.urls")),
]
Empty file.
5 changes: 5 additions & 0 deletions hmt_cite_atlas/web_annotation/apps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from django.apps import AppConfig


class WebAnnotationConfig(AppConfig):
name = "web_annotation"
66 changes: 66 additions & 0 deletions hmt_cite_atlas/web_annotation/shims.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import os

from django.utils.functional import cached_property

import requests

from ..library.shortcuts import get_lines_for_folio


class AlignmentsShim:
"""
Shim to allow us to retrieve alignment data from explorehomer;
eventually, we'll likely want to write out bonding box info as standoff annotation
and ship to explorehomer directly.
"""

GRAPHQL_ENDPOINT = os.environ.get("ATLAS_GRAPHQL_ENDPOINT", "https://explorehomer-atlas-dev.herokuapp.com/graphql/")

def __init__(self, folio_urn):
self.folio_urn = folio_urn

@cached_property
def folio_lines(self):
return get_lines_for_folio(self.folio_urn)

@cached_property
def line_urns(self):
return [l.urn for l in self.folio_lines]

def get_ref(self):
first = self.line_urns[0].rsplit(":", maxsplit=1)[1]
last = self.line_urns[-1].rsplit(":", maxsplit=1)[1]
if first == last:
return first
return f"{first}-{last}"

def get_alignment_data(self, idx=None, fields=None):
if fields is None:
fields = ["idx", "items", "citation"]
ref = self.get_ref()
# @@@ hardcoded version urn
# @@@ add the ability to get a count from an edge
reference = f"urn:cts:greekLit:tlg0012.tlg001.perseus-grc2:{ref}"
predicate = f'reference:"{reference}"'
if idx:
predicate = f"{predicate} idx: {idx}"
resp = requests.post(
self.GRAPHQL_ENDPOINT,
json={
"query": """
{
textAlignmentChunks(%s) {
edges {
node {
%s
}
}
}
}"""
% (predicate, "\n".join(fields))
},
)
data = []
for edge in resp.json()["data"]["textAlignmentChunks"]["edges"]:
data.append(edge["node"])
return data
11 changes: 11 additions & 0 deletions hmt_cite_atlas/web_annotation/shortcuts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from django.conf import settings
from django.contrib.sites.models import Site


CURRENT_SITE = Site.objects.get_current()


def build_absolute_url(url):
return "{scheme}://{host}{url}".format(
scheme=settings.DEFAULT_HTTP_PROTOCOL, host=CURRENT_SITE.domain, url=url
)
26 changes: 26 additions & 0 deletions hmt_cite_atlas/web_annotation/urls.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from django.urls import path

from .views import (
serve_wa,
serve_web_annotation_collection,
serve_web_annotation_page
)


urlpatterns = [
path(
"<urn>/translation-alignment/collection/<format>/",
serve_web_annotation_collection,
name="serve_web_annotation_collection",
),
path(
"<urn>/translation-alignment/collection/<format>/<int:zero_page_number>/",
serve_web_annotation_page,
name="serve_web_annotation_page",
),
path(
"<urn>/translation-alignment/<int:idx>/<format>/",
serve_wa,
name="serve_web_annotation",
),
]
Loading