From 48c62f60e21e8326643f1040775390f132ae28a0 Mon Sep 17 00:00:00 2001 From: Hayden McCormick Date: Thu, 3 Aug 2023 16:07:54 -0400 Subject: [PATCH 01/13] MMIF uploads now generate static /display URL for direct GET access --- app.py | 37 +++++++++++++++++++++++++++---------- iiif_utils.py | 4 ++-- ocr.py | 32 +++++++++++++++++++++----------- templates/image.html | 3 +++ templates/ocr.html | 5 +++-- templates/pre-ocr.html | 3 ++- templates/uv_player.html | 2 +- utils.py | 22 ++++++++++++---------- 8 files changed, 71 insertions(+), 37 deletions(-) diff --git a/app.py b/app.py index be37df7..0f11524 100644 --- a/app.py +++ b/app.py @@ -4,8 +4,9 @@ import secrets import json import html +import uuid -from flask import request, render_template, flash, redirect, send_from_directory, session +from flask import request, render_template, flash, redirect, send_from_directory, session, redirect from werkzeug.utils import secure_filename from mmif.serialize import Mmif @@ -20,10 +21,10 @@ def index(): def ocr(): try: data = dict(request.json) - mmif_str = open(session["mmif_file"]).read() + mmif_str = open(os.path.join("/app", "static", data["mmif_id"], "file.mmif")).read() mmif = Mmif(mmif_str) ocr_view = mmif.get_view_by_id(data["view_id"]) - return prepare_ocr_visualization(mmif, ocr_view) + return prepare_ocr_visualization(mmif, ocr_view, data["mmif_id"]) except Exception as e: return f'

{e}' @@ -32,7 +33,7 @@ def ocr(): def ocrpage(): data = request.json try: - return (render_ocr(data['vid_path'], data["view_id"], data["page_number"])) + return (render_ocr(data["mmif_id"], data['vid_path'], data["view_id"], data["page_number"])) except Exception as e: return f'

Unexpected error of type {type(e)}: {e}' @@ -56,14 +57,30 @@ def upload(): flash('WARNING: no file was selected') return redirect(request.url) if file: - filename = secure_filename(file.filename) - file.save(os.path.join('temp', filename)) - with open("temp/" + filename) as fh: - session["mmif_file"] = fh.name + # filename = secure_filename(file.filename) + id = str(uuid.uuid4()) + session["mmif_id"] = id + path = os.path.join("/app", "static", id) + os.makedirs(path) + + file.save(os.path.join(path, "file.mmif")) + with open(os.path.join(path, "file.mmif")) as fh: mmif_str = fh.read() - return render_mmif(mmif_str) + html_page = render_mmif(mmif_str) + file.save(os.path.join(path, "index.html")) + with open(os.path.join(path, "index.html"), "w") as f: + f.write(html_page) + return redirect(f"/display/{id}", code=302) + return render_template('upload.html') +@app.route('/display/') +def display(id): + print ("THE ID IS " + id) + path = os.path.join("/app", "static", id) + with open(os.path.join(path, "index.html")) as f: + html_file = f.read() + return html_file @app.route('/uv/') def send_js(path): @@ -75,7 +92,7 @@ def render_mmif(mmif_str): media = get_media(mmif) annotations = prep_annotations(mmif) return render_template('player.html', - mmif=mmif, media=media, annotations=annotations) + media=media, annotations=annotations) if __name__ == '__main__': diff --git a/iiif_utils.py b/iiif_utils.py index 47a19b7..237f63c 100644 --- a/iiif_utils.py +++ b/iiif_utils.py @@ -5,7 +5,7 @@ from typing import Dict import mmif -from flask import url_for +from flask import url_for, session from mmif import AnnotationTypes, DocumentTypes, Mmif import datetime @@ -107,7 +107,7 @@ def add_structure_from_timeframe(in_mmif: Mmif, iiif_json: Dict): def save_manifest(iiif_json: Dict) -> str: # generate a iiif manifest and save output file - manifest = tempfile.NamedTemporaryFile('w', dir=str(pathlib.Path(__file__).parent /'static'/'tmp'), suffix='.json', delete=False) + manifest = tempfile.NamedTemporaryFile('w', dir=str(pathlib.Path(__file__).parent /'static'/session["mmif_id"]), suffix='.json', delete=False) json.dump(iiif_json, manifest, indent=4) return manifest.name diff --git a/ocr.py b/ocr.py index 2b88360..f6667e1 100644 --- a/ocr.py +++ b/ocr.py @@ -6,8 +6,9 @@ import json import re import html +import os, shutil -from flask import render_template, session +from flask import render_template from mmif.utils.video_document_helper import convert_timepoint, convert_timeframe @@ -124,14 +125,15 @@ def paginate(frames_list): return {i: page for (i, page) in enumerate(pages)} -def render_ocr(vid_path, view_id, page_number): +def render_ocr(mmif_id, vid_path, view_id, page_number): """Iterate through frames and display the contents/alignments.""" # Path for storing temporary images generated by cv2 cv2_vid = cv2.VideoCapture(vid_path) - f = open(session[f"{view_id}-page-file"]) + f = open(os.path.join("/app/static", mmif_id, f"{view_id}-pages.json")) frames_pages = json.load(f) page = frames_pages[str(page_number)] prev_frame_cap = None + path = make_image_directory(mmif_id) for frame_num, frame in page: # If index is range instead of frame... if frame.get("range"): @@ -144,9 +146,8 @@ def render_ocr(vid_path, view_id, page_number): # Double check histogram similarity of "repeat" frames -- if they're significantly different, un-mark as repeat if prev_frame_cap is not None and frame["repeat"] and not is_duplicate_image(prev_frame_cap, frame_cap, cv2_vid): frame["repeat"] = False - with tempfile.NamedTemporaryFile( - prefix=str(pathlib.Path(__file__).parent /'static'/'tmp'), suffix=".jpg", delete=False) as tf: + prefix=path, suffix=".jpg", delete=False) as tf: cv2.imwrite(tf.name, frame_cap) # "id" is just the name of the temp image file frame["id"] = pathlib.Path(tf.name).name @@ -157,7 +158,17 @@ def render_ocr(vid_path, view_id, page_number): view_id=view_id, page=page, n_pages=len(frames_pages), - page_number=str(page_number)) + page_number=str(page_number), + mmif_id = mmif_id) + + +def make_image_directory(mmif_id): + # Make path for temp OCR image files or clear image files if it exists + path = os.path.join("/app", "static", mmif_id, "img/") + if os.path.exists(path): + shutil.rmtree(path) + os.makedirs(path) + return path def find_duplicates(frames_list, cv2_vid): @@ -236,8 +247,7 @@ def get_ocr_views(mmif): continue return views -def save_json(dict, view_id): - with tempfile.NamedTemporaryFile(prefix=str(pathlib.Path(__file__).parent /'static'/'tmp'), suffix=".json", delete=False) as tf: - pages_json = open(tf.name, "w") - json.dump(dict, pages_json) - session[f"{view_id}-page-file"] = tf.name +def save_json(dict, view_id, mmif_id): + path = os.path.join("/app/static", mmif_id, f"{view_id}-pages.json") + with open(path, 'w') as f: + json.dump(dict, f) \ No newline at end of file diff --git a/templates/image.html b/templates/image.html index 1c07297..4aa066d 100644 --- a/templates/image.html +++ b/templates/image.html @@ -3,6 +3,8 @@