From c86e1e2f09736b86d1a78a7dd24d720b7cb4c3bf Mon Sep 17 00:00:00 2001 From: Keigh Rim Date: Sun, 27 Aug 2023 15:50:17 -0400 Subject: [PATCH] removed reliance on browser session, replaced uuid with sha1 hash (#26) --- app.py | 38 ++++++++++++++++++-------------------- cache.py | 6 +++++- iiif_utils.py | 12 ++++++------ utils.py | 47 ++++++++++++++++++++--------------------------- 4 files changed, 49 insertions(+), 54 deletions(-) diff --git a/app.py b/app.py index 48c2ad0..5e47e18 100644 --- a/app.py +++ b/app.py @@ -1,16 +1,15 @@ +import hashlib import os import secrets import sys -from pathlib import Path from threading import Thread -import shortuuid -from flask import request, render_template, flash, send_from_directory, session, redirect +from flask import request, render_template, flash, send_from_directory, redirect from mmif.serialize import Mmif import cache from cache import set_last_access, cleanup -from utils import app, render_ocr, get_media, prep_annotations, prepare_ocr_visualization +from utils import app, render_ocr, documents_to_htmls, prep_annotations, prepare_ocr_visualization @app.route('/') @@ -82,29 +81,28 @@ def send_js(path): return send_from_directory("uv", path) -def render_mmif(mmif_str): +def render_mmif(mmif_str, viz_id): mmif = Mmif(mmif_str) - media = get_media(mmif) - annotations = prep_annotations(mmif) + media = documents_to_htmls(mmif, viz_id) + annotations = prep_annotations(mmif, viz_id) return render_template('player.html', media=media, annotations=annotations) -def upload_file(file): +def upload_file(in_mmif): # Save file locally - uuid = shortuuid.uuid() - session["mmif_id"] = uuid - app.logger.debug(uuid) - path = Path(app.root_path) / 'static' / 'tmp' / uuid - os.makedirs(path) + in_mmif_bytes = in_mmif.read() + in_mmif_str = in_mmif_bytes.decode('utf-8') + viz_id = hashlib.sha1(in_mmif_bytes).hexdigest() + app.logger.debug(viz_id) + path = cache.get_cache_path() / viz_id + os.makedirs(path, exist_ok=True) set_last_access(path) - file.save(os.path.join(path, "file.mmif")) - with open(os.path.join(path, "file.mmif")) as fh: - mmif_str = fh.read() - html_page = render_mmif(mmif_str) + with open(path / 'file.mmif', 'w') as in_mmif_file: + in_mmif_file.write(in_mmif_str) + html_page = render_mmif(in_mmif_str, viz_id) with open(os.path.join(path, "index.html"), "w") as f: f.write(html_page) - # Perform cleanup t = Thread(target=cleanup) t.daemon = True @@ -112,8 +110,8 @@ def upload_file(file): agent = request.headers.get('User-Agent') if 'curl' in agent.lower(): - return f"Visualization ID is {uuid}\nYou can access the visualized file at /display/{uuid}\n" - return redirect(f"/display/{uuid}", code=301) + return f"Visualization ID is {viz_id}\nYou can access the visualized file at /display/{viz_id}\n" + return redirect(f"/display/{viz_id}", code=301) if __name__ == '__main__': diff --git a/cache.py b/cache.py index 951604a..4892893 100644 --- a/cache.py +++ b/cache.py @@ -10,7 +10,11 @@ def get_cache_path(): - return pathlib.Path(app.root_path) / "static" / "tmp" + return pathlib.Path(app.static_folder) / "tmp" + + +def get_cache_relpath(full_path): + return str(full_path)[len(app.static_folder):] def set_last_access(path): diff --git a/iiif_utils.py b/iiif_utils.py index 5e603e9..7be9eb8 100644 --- a/iiif_utils.py +++ b/iiif_utils.py @@ -1,17 +1,17 @@ +import datetime import json import os -import pathlib import tempfile from typing import Dict import mmif -from flask import url_for, session +from flask import url_for from mmif import AnnotationTypes, DocumentTypes, Mmif -import datetime + import cache -def generate_iiif_manifest(in_mmif: mmif.Mmif): +def generate_iiif_manifest(in_mmif: mmif.Mmif, viz_id): iiif_json = { "@context": "http://iiif.io/api/presentation/2/context.json", "id": "http://0.0.0.0:5000/mmif_example_manifest.json", @@ -29,7 +29,7 @@ def generate_iiif_manifest(in_mmif: mmif.Mmif): } add_canvas_from_documents(in_mmif, iiif_json) add_structure_from_timeframe(in_mmif, iiif_json) - return save_manifest(iiif_json) + return save_manifest(iiif_json, viz_id) def add_canvas_from_documents(in_mmif, iiif_json): @@ -106,7 +106,7 @@ def add_structure_from_timeframe(in_mmif: Mmif, iiif_json: Dict): iiif_json["structures"].append(view_range) -def save_manifest(iiif_json: Dict) -> str: +def save_manifest(iiif_json: Dict, viz_id) -> str: # generate a iiif manifest and save output file manifest = tempfile.NamedTemporaryFile( 'w', dir=str(cache.get_cache_path() / viz_id), suffix='.json', delete=False) diff --git a/utils.py b/utils.py index 7c49598..ff18ddc 100644 --- a/utils.py +++ b/utils.py @@ -2,7 +2,7 @@ from datetime import timedelta from io import StringIO -from flask import Flask, session +from flask import Flask from lapps.discriminators import Uri from mmif import DocumentTypes from mmif.serialize.annotation import Text @@ -19,8 +19,11 @@ app.secret_key = 'your_secret_key_here' -def get_alignments(alignment_view): - vtt_file = tempfile.NamedTemporaryFile('w', dir=str(cache.get_cache_path() / session['mmif_id']), suffix='.vtt', delete=False) +def asr_alignments_to_vtt(alignment_view, viz_id): + vtt_filename = cache.get_cache_path() / viz_id / f"{alignment_view.id.replace(':', '-')}.vtt" + if vtt_filename.exists(): + return str(vtt_filename) + vtt_file = open(vtt_filename, 'w') vtt_file.write("WEBVTT\n\n") annotations = alignment_view.annotations timeframe_at_type = [at_type for at_type in alignment_view.metadata.contains if at_type.shortname == "TimeFrame"][0] @@ -51,7 +54,7 @@ def get_alignments(alignment_view): vtt_file.write(f'{vtt_start} --> {vtt_end}\n{" ".join(texts)}\n\n') vtt_start = None texts = [] - return vtt_file + return vtt_file.name def build_alignment(alignment, token_idx, timeframe_idx): @@ -66,7 +69,7 @@ def build_alignment(alignment, token_idx, timeframe_idx): return start, end, text -def get_media(mmif): +def documents_to_htmls(mmif, viz_id): # Returns a list of tuples, one for each element in the documents list of # the MMIF object, following the order in that list. Each tuple has four # elements: document type, document identifier, document path and the HTML @@ -80,16 +83,16 @@ def get_media(mmif): elif document.at_type == DocumentTypes.VideoDocument: fa_views = get_alignment_views(mmif) fa_view = fa_views[0] if fa_views else None - html = html_video(doc_path, fa_view) + html = html_video(viz_id, doc_path, fa_view) elif document.at_type == DocumentTypes.AudioDocument: html = html_audio(doc_path) elif document.at_type == DocumentTypes.ImageDocument: boxes = get_boxes(mmif) html = html_img(doc_path, boxes) media.append((document.at_type, document.id, doc_path, html)) - manifest_filename = generate_iiif_manifest(mmif) + manifest_filename = generate_iiif_manifest(mmif, viz_id) man = os.path.basename(manifest_filename) - temp = render_template("uv_player.html", manifest=man, mmif_id=session["mmif_id"]) + temp = render_template("uv_player.html", manifest=man, mmif_id=viz_id) media.append(('UV', "", "", temp)) return media @@ -116,7 +119,7 @@ def get_boxes(mmif): return boxes -def prep_annotations(mmif): +def prep_annotations(mmif, viz_id): """Prepare annotations from the views, and return a list of pairs of tabname and tab content. The first tab is alway the full MMIF pretty print.""" tabs = [("Info", "
" + create_info(mmif) + "
"), @@ -127,7 +130,7 @@ def prep_annotations(mmif): # stuff; it does a loop but for now we assume there is just one file with # alignments (generated by Kaldi) for fa_view in get_alignment_views(mmif): - vtt_file = view_to_vtt(fa_view) + vtt_file = asr_alignments_to_vtt(fa_view, viz_id) tabs.append(("WebVTT", '
' + open(vtt_file).read() + '
')) ner_views = get_ner_views(mmif) use_id = True if len(ner_views) > 1 else False @@ -144,7 +147,7 @@ def prep_annotations(mmif): if not ocr_view.annotations: continue tabname = "Frames-%s" % ocr_view.id - visualization = render_template("pre-ocr.html", view_id=ocr_view.id, tabname=tabname, mmif_id=session["mmif_id"]) + visualization = render_template("pre-ocr.html", view_id=ocr_view.id, tabname=tabname, mmif_id=viz_id) tabs.append((tabname, visualization)) return tabs @@ -217,19 +220,16 @@ def get_alignment_views(mmif): # Remder Media as HTML ------------ -def html_video(vpath, vtt_srcview=None): +def html_video(viz_id, vpath, vtt_srcview=None): vpath = url2posix(vpath) html = StringIO() - html.write('