Skip to content

Commit

Permalink
removed reliance on browser session, replaced uuid with sha1 hash (#26)
Browse files Browse the repository at this point in the history
  • Loading branch information
keighrim committed Aug 27, 2023
1 parent aeb7f41 commit c86e1e2
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 54 deletions.
38 changes: 18 additions & 20 deletions app.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
import hashlib
import os
import secrets
import sys
from pathlib import Path
from threading import Thread

import shortuuid
from flask import request, render_template, flash, send_from_directory, session, redirect
from flask import request, render_template, flash, send_from_directory, redirect
from mmif.serialize import Mmif

import cache
from cache import set_last_access, cleanup
from utils import app, render_ocr, get_media, prep_annotations, prepare_ocr_visualization
from utils import app, render_ocr, documents_to_htmls, prep_annotations, prepare_ocr_visualization


@app.route('/')
Expand Down Expand Up @@ -82,38 +81,37 @@ def send_js(path):
return send_from_directory("uv", path)


def render_mmif(mmif_str):
def render_mmif(mmif_str, viz_id):
mmif = Mmif(mmif_str)
media = get_media(mmif)
annotations = prep_annotations(mmif)
media = documents_to_htmls(mmif, viz_id)
annotations = prep_annotations(mmif, viz_id)
return render_template('player.html',
media=media, annotations=annotations)


def upload_file(file):
def upload_file(in_mmif):
# Save file locally
uuid = shortuuid.uuid()
session["mmif_id"] = uuid
app.logger.debug(uuid)
path = Path(app.root_path) / 'static' / 'tmp' / uuid
os.makedirs(path)
in_mmif_bytes = in_mmif.read()
in_mmif_str = in_mmif_bytes.decode('utf-8')
viz_id = hashlib.sha1(in_mmif_bytes).hexdigest()
app.logger.debug(viz_id)
path = cache.get_cache_path() / viz_id
os.makedirs(path, exist_ok=True)
set_last_access(path)
file.save(os.path.join(path, "file.mmif"))
with open(os.path.join(path, "file.mmif")) as fh:
mmif_str = fh.read()
html_page = render_mmif(mmif_str)
with open(path / 'file.mmif', 'w') as in_mmif_file:
in_mmif_file.write(in_mmif_str)
html_page = render_mmif(in_mmif_str, viz_id)
with open(os.path.join(path, "index.html"), "w") as f:
f.write(html_page)

# Perform cleanup
t = Thread(target=cleanup)
t.daemon = True
t.run()

agent = request.headers.get('User-Agent')
if 'curl' in agent.lower():
return f"Visualization ID is {uuid}\nYou can access the visualized file at /display/{uuid}\n"
return redirect(f"/display/{uuid}", code=301)
return f"Visualization ID is {viz_id}\nYou can access the visualized file at /display/{viz_id}\n"
return redirect(f"/display/{viz_id}", code=301)


if __name__ == '__main__':
Expand Down
6 changes: 5 additions & 1 deletion cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,11 @@


def get_cache_path():
return pathlib.Path(app.root_path) / "static" / "tmp"
return pathlib.Path(app.static_folder) / "tmp"


def get_cache_relpath(full_path):
return str(full_path)[len(app.static_folder):]


def set_last_access(path):
Expand Down
12 changes: 6 additions & 6 deletions iiif_utils.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
import datetime
import json
import os
import pathlib
import tempfile
from typing import Dict

import mmif
from flask import url_for, session
from flask import url_for
from mmif import AnnotationTypes, DocumentTypes, Mmif
import datetime

import cache


def generate_iiif_manifest(in_mmif: mmif.Mmif):
def generate_iiif_manifest(in_mmif: mmif.Mmif, viz_id):
iiif_json = {
"@context": "http://iiif.io/api/presentation/2/context.json",
"id": "http://0.0.0.0:5000/mmif_example_manifest.json",
Expand All @@ -29,7 +29,7 @@ def generate_iiif_manifest(in_mmif: mmif.Mmif):
}
add_canvas_from_documents(in_mmif, iiif_json)
add_structure_from_timeframe(in_mmif, iiif_json)
return save_manifest(iiif_json)
return save_manifest(iiif_json, viz_id)


def add_canvas_from_documents(in_mmif, iiif_json):
Expand Down Expand Up @@ -106,7 +106,7 @@ def add_structure_from_timeframe(in_mmif: Mmif, iiif_json: Dict):
iiif_json["structures"].append(view_range)


def save_manifest(iiif_json: Dict) -> str:
def save_manifest(iiif_json: Dict, viz_id) -> str:
# generate a iiif manifest and save output file
manifest = tempfile.NamedTemporaryFile(
'w', dir=str(cache.get_cache_path() / viz_id), suffix='.json', delete=False)
Expand Down
47 changes: 20 additions & 27 deletions utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from datetime import timedelta
from io import StringIO

from flask import Flask, session
from flask import Flask
from lapps.discriminators import Uri
from mmif import DocumentTypes
from mmif.serialize.annotation import Text
Expand All @@ -19,8 +19,11 @@
app.secret_key = 'your_secret_key_here'


def get_alignments(alignment_view):
vtt_file = tempfile.NamedTemporaryFile('w', dir=str(cache.get_cache_path() / session['mmif_id']), suffix='.vtt', delete=False)
def asr_alignments_to_vtt(alignment_view, viz_id):
vtt_filename = cache.get_cache_path() / viz_id / f"{alignment_view.id.replace(':', '-')}.vtt"
if vtt_filename.exists():
return str(vtt_filename)
vtt_file = open(vtt_filename, 'w')
vtt_file.write("WEBVTT\n\n")
annotations = alignment_view.annotations
timeframe_at_type = [at_type for at_type in alignment_view.metadata.contains if at_type.shortname == "TimeFrame"][0]
Expand Down Expand Up @@ -51,7 +54,7 @@ def get_alignments(alignment_view):
vtt_file.write(f'{vtt_start} --> {vtt_end}\n{" ".join(texts)}\n\n')
vtt_start = None
texts = []
return vtt_file
return vtt_file.name


def build_alignment(alignment, token_idx, timeframe_idx):
Expand All @@ -66,7 +69,7 @@ def build_alignment(alignment, token_idx, timeframe_idx):
return start, end, text


def get_media(mmif):
def documents_to_htmls(mmif, viz_id):
# Returns a list of tuples, one for each element in the documents list of
# the MMIF object, following the order in that list. Each tuple has four
# elements: document type, document identifier, document path and the HTML
Expand All @@ -80,16 +83,16 @@ def get_media(mmif):
elif document.at_type == DocumentTypes.VideoDocument:
fa_views = get_alignment_views(mmif)
fa_view = fa_views[0] if fa_views else None
html = html_video(doc_path, fa_view)
html = html_video(viz_id, doc_path, fa_view)
elif document.at_type == DocumentTypes.AudioDocument:
html = html_audio(doc_path)
elif document.at_type == DocumentTypes.ImageDocument:
boxes = get_boxes(mmif)
html = html_img(doc_path, boxes)
media.append((document.at_type, document.id, doc_path, html))
manifest_filename = generate_iiif_manifest(mmif)
manifest_filename = generate_iiif_manifest(mmif, viz_id)
man = os.path.basename(manifest_filename)
temp = render_template("uv_player.html", manifest=man, mmif_id=session["mmif_id"])
temp = render_template("uv_player.html", manifest=man, mmif_id=viz_id)
media.append(('UV', "", "", temp))
return media

Expand All @@ -116,7 +119,7 @@ def get_boxes(mmif):
return boxes


def prep_annotations(mmif):
def prep_annotations(mmif, viz_id):
"""Prepare annotations from the views, and return a list of pairs of tabname
and tab content. The first tab is alway the full MMIF pretty print."""
tabs = [("Info", "<pre>" + create_info(mmif) + "</pre>"),
Expand All @@ -127,7 +130,7 @@ def prep_annotations(mmif):
# stuff; it does a loop but for now we assume there is just one file with
# alignments (generated by Kaldi)
for fa_view in get_alignment_views(mmif):
vtt_file = view_to_vtt(fa_view)
vtt_file = asr_alignments_to_vtt(fa_view, viz_id)
tabs.append(("WebVTT", '<pre>' + open(vtt_file).read() + '</pre>'))
ner_views = get_ner_views(mmif)
use_id = True if len(ner_views) > 1 else False
Expand All @@ -144,7 +147,7 @@ def prep_annotations(mmif):
if not ocr_view.annotations:
continue
tabname = "Frames-%s" % ocr_view.id
visualization = render_template("pre-ocr.html", view_id=ocr_view.id, tabname=tabname, mmif_id=session["mmif_id"])
visualization = render_template("pre-ocr.html", view_id=ocr_view.id, tabname=tabname, mmif_id=viz_id)
tabs.append((tabname, visualization))
return tabs

Expand Down Expand Up @@ -217,19 +220,16 @@ def get_alignment_views(mmif):

# Remder Media as HTML ------------

def html_video(vpath, vtt_srcview=None):
def html_video(viz_id, vpath, vtt_srcview=None):
vpath = url2posix(vpath)
html = StringIO()
html.write('<video id="vid" controls>\n')
html.write('<video id="vid" controls crossorigin="anonymous" >\n')
html.write(f' <source src=\"{vpath}\">\n')
if vtt_srcview is not None:
vtt_path = view_to_vtt(vtt_srcview)
src = "/" + os.sep.join(vtt_path.split(os.sep)[-2:])
# use only basename because "static" directory is mapped to '' route by
# `static_url_path` param
# src = os.path.basename(vtt_path)

html.write(f' <track kind="subtitles" srclang="en" src="{src}" label="English" default>\n')
vtt_path = asr_alignments_to_vtt(vtt_srcview, viz_id)
src = cache.get_cache_relpath(vtt_path)
app.logger.debug(f"VTT path: {vtt_path}")
html.write(f' <track kind="captions" srclang="en" src="{src}" label="transcript" default/>\n')
html.write("</video>\n")
return html.getvalue()

Expand Down Expand Up @@ -300,13 +300,6 @@ def get_ner_views(mmif):
return [v for v in mmif.views if Uri.NE in v.metadata.contains]


def view_to_vtt(alignment_view):
"""Write alignments to a file in VTT style and return the filename."""
vtt_file = get_alignments(alignment_view)
return vtt_file.name
# return os.sep.join(vtt_file.name.split(os.sep)[-3:])


def create_ner_visualization(mmif, view):
metadata = view.metadata.contains.get(Uri.NE)
try:
Expand Down

0 comments on commit c86e1e2

Please sign in to comment.