Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
keighrim committed Aug 27, 2023
2 parents 9b3ae27 + b222a7c commit c1b4c9a
Show file tree
Hide file tree
Showing 7 changed files with 185 additions and 59 deletions.
25 changes: 19 additions & 6 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,39 @@
import pathlib
import sys
import secrets
import json
import html

from flask import request, render_template, flash, redirect, send_from_directory
from flask import request, render_template, flash, redirect, send_from_directory, session
from werkzeug.utils import secure_filename
from mmif.serialize import Mmif

from utils import app, render_ocr, get_media, prep_annotations
from utils import app, render_ocr, get_media, prep_annotations, prepare_ocr_visualization

@app.route('/')
def index():
return render_template('index.html')


@app.route('/ocr', methods=['POST'])
def ocr():
try:
data = dict(request.json)
mmif_str = open(session["mmif_file"]).read()
mmif = Mmif(mmif_str)
ocr_view = mmif.get_view_by_id(data["view_id"])
return prepare_ocr_visualization(mmif, ocr_view)
except Exception as e:
return f'<p class="error">{e}</h1>'


@app.route('/ocrpage', methods=['POST'])
def ocrpage():
data = request.json
try:
page_number = data["page_number"]
view_id = data["view_id"]
return (render_ocr(data['vid_path'], data["view_id"], page_number))
return (render_ocr(data['vid_path'], data["view_id"], data["page_number"]))
except Exception as e:
return f'<p class="error">Unexpected error of type {type(e)}: {e}</h1>'
pass

@app.route('/upload', methods=['GET', 'POST'])
def upload():
Expand All @@ -47,6 +59,7 @@ def upload():
filename = secure_filename(file.filename)
file.save(os.path.join('temp', filename))
with open("temp/" + filename) as fh:
session["mmif_file"] = fh.name
mmif_str = fh.read()
return render_mmif(mmif_str)
return render_template('upload.html')
Expand Down
129 changes: 88 additions & 41 deletions ocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,14 @@
import html

from flask import render_template, session
# from utils import app
from mmif.utils.video_document_helper import convert_timepoint, convert_timeframe


class OCRFrame():
"""Class representing an (aligned or otherwise) set of OCR annotations for a single frame"""

def __init__(self, anno, fps):
def __init__(self, anno, mmif):
self.text = []
self.fps = fps
self.boxes = []
self.anno_ids = []
self.timestamp = None
Expand All @@ -26,24 +25,26 @@ def __init__(self, anno, fps):
self.range = None
self.timestamp_range = None
self.sec_range = None
self.frametype = None
self.boxtypes = []

self.update(anno)
self.update(anno, mmif)

def update(self, anno):
def update(self, anno, mmif):
if anno.at_type.shortname == "BoundingBox":
self.add_bounding_box(anno)
self.add_bounding_box(anno, mmif)

elif anno.at_type.shortname == "TimeFrame":
self.add_timeframe(anno)
self.add_timeframe(anno, mmif)

elif anno.at_type.shortname == "TextDocument":
t = anno.properties.get("text_value") or anno.properties.get("text").value
if t:
self.text.append(re.sub(r'([\\\/\|\"\'])', r'\1 ', t))

def add_bounding_box(self, anno):
self.frame_num = anno.properties.get(
"frame") or anno.properties.get("timePoint")
def add_bounding_box(self, anno, mmif):
self.frame_num = convert_timepoint(mmif, anno, "frames")
self.secs = convert_timepoint(mmif, anno, "seconds")
box_id = anno.properties["id"]
boxType = anno.properties["boxType"]
coordinates = anno.properties["coordinates"]
Expand All @@ -54,18 +55,19 @@ def add_bounding_box(self, anno):
box = [box_id, boxType, [x, y, w, h]]
self.boxes.append(box)
self.anno_ids.append(box_id)
if self.fps:
secs = int(self.frame_num/self.fps)
self.timestamp = str(datetime.timedelta(seconds=secs))
self.secs = secs
self.timestamp = str(datetime.timedelta(seconds=self.secs))
if anno.properties.get("boxType") and anno.properties.get("boxType") not in self.boxtypes:
self.boxtypes.append(anno.properties.get("boxType"))

def add_timeframe(self, anno):
start, end = anno.properties.get('start'), anno.properties.get('end')

def add_timeframe(self, anno, mmif):
start, end = convert_timeframe(mmif, anno, "frames")
start_secs, end_secs = convert_timeframe(mmif, anno, "seconds")
self.range = (start, end)
if self.fps:
start_secs, end_secs = int(start/self.fps), int(end/self.fps)
self.timestamp_range = (str(datetime.timedelta(seconds=start_secs)), str(datetime.timedelta(seconds=end_secs)))
self.sec_range = (start_secs, end_secs)
self.timestamp_range = (str(datetime.timedelta(seconds=start_secs)), str(datetime.timedelta(seconds=end_secs)))
self.sec_range = (start_secs, end_secs)
if anno.properties.get("frameType"):
self.frametype = anno.properties.get("frameType")


def find_annotation(anno_id, view, mmif):
Expand All @@ -84,22 +86,23 @@ def get_ocr_frames(view, mmif, fps):
for alignment in view.get_annotations(full_alignment_type[0]):
source = find_annotation(alignment.properties["source"], view, mmif)
target = find_annotation(alignment.properties["target"], view, mmif)
frame = OCRFrame(source, fps)

frame = OCRFrame(source, mmif)
i = frame.frame_num if frame.frame_num is not None else frame.range
if i in frames.keys():
frames[i].update(source)
frames[i].update(target)
frames[i].update(source, mmif)
frames[i].update(target, mmif)
else:
frame.update(target)
frame.update(target, mmif)
frames[i] = frame
else:
for annotation in view.get_annotations():
frame = OCRFrame(annotation, fps)
frame = OCRFrame(annotation, mmif)
i = frame.frame_num if frame.frame_num is not None else frame.range
if i is None:
continue
if i in frames.keys():
frames[i].update(annotation)
frames[i].update(annotation, mmif)
else:
frames[i] = frame
return frames
Expand Down Expand Up @@ -128,17 +131,26 @@ def render_ocr(vid_path, view_id, page_number):
f = open(session[f"{view_id}-page-file"])
frames_pages = json.load(f)
page = frames_pages[str(page_number)]
prev_frame_cap = None
for frame_num, frame in page:
# If index is range instead of frame...
if frame.get("range"):
frame_num = (int(frame["range"][0]) + int(frame["range"][1])) / 2
cv2_vid.set(1, frame_num)
_, frame_cap = cv2_vid.read()
if frame_cap is None:
raise FileNotFoundError(f"Video file {vid_path} not found!")

# Double check histogram similarity of "repeat" frames -- if they're significantly different, un-mark as repeat
if prev_frame_cap is not None and frame["repeat"] and not is_duplicate_image(prev_frame_cap, frame_cap, cv2_vid):
frame["repeat"] = False

with tempfile.NamedTemporaryFile(
prefix=str(pathlib.Path(__file__).parent /'static'/'tmp'), suffix=".jpg", delete=False) as tf:
cv2.imwrite(tf.name, frame_cap)
# "id" is just the name of the temp image file
frame["id"] = pathlib.Path(tf.name).name
prev_frame_cap = frame_cap

return render_template('ocr.html',
vid_path=vid_path,
Expand All @@ -148,48 +160,83 @@ def render_ocr(vid_path, view_id, page_number):
page_number=str(page_number))


def find_duplicates(frames_list):
def find_duplicates(frames_list, cv2_vid):
"""Find duplicate frames"""
prev_frame = None
for frame_num, frame in frames_list:
if is_duplicate_ocr_frame(frame, prev_frame):
# Frame is timeframe annotation
if type(frame_num) != int:
continue
if is_duplicate_ocr_frame(prev_frame, frame):
frame["repeat"] = True
prev_frame = frame
return frames_list


def is_duplicate_ocr_frame(frame, prev_frame):
if prev_frame:
# Check Boundingbox distances
rounded_prev = round_boxes(prev_frame["boxes"])
for box in round_boxes(frame["boxes"]):
if box in rounded_prev and frame["secs"]-prev_frame["secs"] < 5:
return True
def is_duplicate_ocr_frame(prev_frame, frame):
if not prev_frame:
return False
if prev_frame.get("boxtypes") != frame.get("boxtypes"):
return False
if abs(len(prev_frame.get("boxes"))-len(frame.get("boxes"))) > 3:
return False
# Check Boundingbox distances
rounded_prev = round_boxes(prev_frame.get("boxes"))
for box in round_boxes(frame.get("boxes")):
if box in rounded_prev and frame["secs"]-prev_frame["secs"] < 10:
return True
# Check overlap in text
prev_text, text = set(prev_frame.get("text")), set(frame.get("text"))
if prev_text and text and prev_text.intersection(text):
return True
return False

def is_duplicate_image(prev_frame, frame, cv2_vid):

# Convert it to HSV
img1_hsv = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2HSV)
img2_hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)

# Calculate the histogram and normalize it
hist_img1 = cv2.calcHist([img1_hsv], [0,1], None, [180,256], [0,180,0,256])
cv2.normalize(hist_img1, hist_img1, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX);
hist_img2 = cv2.calcHist([img2_hsv], [0,1], None, [180,256], [0,180,0,256])
cv2.normalize(hist_img2, hist_img2, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX);

# Find the metric value
metric_val = cv2.compareHist(hist_img1, hist_img2, cv2.HISTCMP_CHISQR)
return metric_val < 50



def round_boxes(boxes):
# To account for jittery bounding boxes in OCR annotations
rounded_boxes = []
for box in boxes:
rounded_box = []
for coord in box[2]:
rounded_box.append(round(coord/10)*10)
rounded_box.append(round(coord/100)*100)
rounded_boxes.append(rounded_box)
return rounded_boxes


def get_ocr_views(mmif):
"""Return OCR views, which have TextDocument, BoundingBox, and Alignment annotations"""
"""Returns all CV views, which contain timeframes or bounding boxes"""
views = []
ocr_apps = ["east-textdetection", "tesseract", "chyron-text-recognition", "slatedetection", "barsdetection", "parseq-wrapper"]
required_types = ["TimeFrame", "BoundingBox"]
for view in mmif.views:
if any([ocr_app in view.metadata.app for ocr_app in ocr_apps]):
views.append(view)
for anno_type, anno in view.metadata.contains.items():
# Annotation belongs to a CV view if it is a TimeFrame/BB and it refers to a VideoDocument
if anno_type.shortname in required_types and mmif.get_document_by_id(anno["document"]).at_type.shortname == "VideoDocument":
views.append(view)
continue
# TODO: Couldn't find a simple way to show if an alignment view is a CV/Frames-type view
elif "parseq" in view.metadata.app:
views.append(view)
continue
return views

def save_json(dict, view_id):
# jsonified_pages = json.dumps(dict)
with tempfile.NamedTemporaryFile(prefix=str(pathlib.Path(__file__).parent /'static'/'tmp'), suffix=".json", delete=False) as tf:
pages_json = open(tf.name, "w")
json.dump(dict, pages_json)
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
spacy==2.3.2
clams-python==1.0.*
mmif-python==1.0.8
mmif-python[utils]==1.0.8
flask-session
opencv-python==4.*
15 changes: 11 additions & 4 deletions templates/ocr.html
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

<div id="ocr_tab_{{view_id}}">
{% for frame_num, frame in page %}
{% set filename = frame["id"] %}
Expand All @@ -11,19 +12,25 @@
{% include "image.html" %}
<div>
<h4>
{% if frame["frame_num"] %}
{% if frame["frame_num"] is not none %}
frame: {{frame["frame_num"]}}<br>
{% endif %}
{% if frame["range"] %}
{% if frame["range"] is not none %}
frames: {{frame["range"][0]}} - {{frame["range"][1]}}<br>
{% endif %}
{% if frame["timestamp"] %}
{% if frame["timestamp"] is not none %}
timestamp: <a class="timestamp" onclick="SetCurTime('{{secs}}')">{{frame["timestamp"]}}</a><br>
{% endif %}
{% if frame["timestamp_range"] %}
{% if frame["timestamp_range"] is not none %}
{% set sec_range = frame["sec_range"] %}
timestamps: <a class="timestamp" onclick="SetCurTime('{{sec_range[0]}}')">{{frame["timestamp_range"][0]}}</a> - <a class="timestamp" onclick="SetCurTime('{{sec_range[1]}}')">{{frame["timestamp_range"][1]}}</a><br>
{% endif %}
{% if frame["frametype"] is not none %}
frame type: {{frame["frametype"]}}<br>
{% endif %}
{% if frame["boxtypes"] %}
box types: {{frame["boxtypes"]}}<br>
{% endif %}

{% if frame["text"] %}
text detected:<br>
Expand Down
31 changes: 31 additions & 0 deletions templates/player.html
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,37 @@
padding: 10px;
border-radius: 5px;
}

/* Loading spinner styles */

.loader-container {
display: flex;
justify-content: center;
align-items: center;
height: 50vh;
}

.loader {
border: 16px solid #f3f3f3;
border-radius: 50%;
border-top: 16px solid #3498db;
margin-top: 20px;
width: 60px;
height: 60px;
-webkit-animation: spin 2s linear infinite; /* Safari */
animation: spin 2s linear infinite;
}

/* Safari */
@-webkit-keyframes spin {
0% { -webkit-transform: rotate(0deg); }
100% { -webkit-transform: rotate(360deg); }
}

@keyframes spin {
0% { transform: rotate(0deg); }
100% { transform: rotate(360deg); }
}
</style>

<body>
Expand Down
Loading

0 comments on commit c1b4c9a

Please sign in to comment.