diff --git a/README.md b/README.md
index f2d9af5..c8984e9 100644
--- a/README.md
+++ b/README.md
@@ -47,7 +47,7 @@ In these notes we assume that the data are in a local directory named `/Users/Sh
 $ docker run --rm -d -p 5000:5000 -v /Users/Shared/archive:/data clams-mmif-visualizer
 ```
 
-After this, all you need to do is point your browser at [http://0.0.0.0:5000/upload](http://0.0.0.0:5000/upload), click "Choose File", select a MMIF file and then click "Visualize". See the *Data source repository and input MMIF file* section below for a description of the MMIF file. Assuming you have not made any changes to the directory structure you can use the example MMIF files in the `input` folder.
+See the *Data source repository and input MMIF file* section below for a description of the MMIF file. Assuming you have not made any changes to the directory structure you can use the example MMIF files in the `input` folder.
 
 **Some background**
 
@@ -89,8 +89,17 @@ To run the server do:
 $ python app.py
 ```
 
-Then point your browser at [http://0.0.0.0:5000/upload](http://0.0.0.0:5000/upload), click "Choose File" and then click "Visualize".
 
+## Uploading Files
+MMIF files can be uploaded to the visualization server one of two ways:
+* Point your browser to http://0.0.0.0:5000/upload, click "Choose File" and then click "Visualize". This will generate a static URL containing the visualization of the input file (e.g. `http://localhost:5000/display/HaTxbhDfwakewakmzdXu5e`). Once the file is uploaded, the page will automatically redirect to the file's visualization.
+* Using a command line, enter:
+  ``` 
+  curl -X POST -F "file=@<filename>" -s http://localhost:5000/upload
+  ```
+  This will upload the file and print the unique identifier for the file visualization. The visualization can be accessed at `http://localhost:5000/display/<id>`
+
+The server will maintain a cache of up to 50MB for these temporary files, so the visualizations can be repeatedly accessed without needing to re-upload any files. Once this limit is reached, the server will delete stored visualizations until enough space is reclaimed, drawing from oldest/least recently accessed pages first. If you attempt to access the /display URL of a deleted file, you will be redirected back to the upload page instead.
 
 
 ## Data source repository and input MMIF file
diff --git a/app.py b/app.py
index be37df7..5e53c65 100644
--- a/app.py
+++ b/app.py
@@ -1,15 +1,16 @@
+import hashlib
 import os
-import pathlib
-import sys
 import secrets
-import json
-import html
+import sys
+from threading import Thread
 
-from flask import request, render_template, flash, redirect, send_from_directory, session
-from werkzeug.utils import secure_filename
+from flask import request, render_template, flash, send_from_directory, redirect
 from mmif.serialize import Mmif
 
-from utils import app, render_ocr, get_media, prep_annotations, prepare_ocr_visualization
+import cache
+from cache import set_last_access, cleanup
+from utils import app, render_ocr, documents_to_htmls, prep_annotations, prepare_ocr_visualization
+
 
 @app.route('/')
 def index():
@@ -20,10 +21,10 @@ def index():
 def ocr():
     try:
         data = dict(request.json)
-        mmif_str = open(session["mmif_file"]).read()
+        mmif_str = open(cache.get_cache_path() / data["mmif_id"] / "file.mmif").read()
         mmif = Mmif(mmif_str)
         ocr_view = mmif.get_view_by_id(data["view_id"])
-        return prepare_ocr_visualization(mmif, ocr_view)
+        return prepare_ocr_visualization(mmif, ocr_view, data["mmif_id"])
     except Exception as e:
         return f'<p class="error">{e}</h1>'
 
@@ -32,10 +33,11 @@ def ocr():
 def ocrpage():
     data = request.json
     try:
-        return (render_ocr(data['vid_path'], data["view_id"], data["page_number"]))
+        return render_ocr(data["mmif_id"], data['vid_path'], data["view_id"], data["page_number"])
     except Exception as e:
         return f'<p class="error">Unexpected error of type {type(e)}: {e}</h1>'
 
+
 @app.route('/upload', methods=['GET', 'POST'])
 def upload():
     # NOTE. Uses of flash() originally gaven a RuntimeError (The session is
@@ -44,7 +46,7 @@ def upload():
     if request.method == 'POST':
         # Check if request is coming from elasticsearch
         if 'data' in request.form:
-            return render_mmif(request.form['data'])
+            return upload_file(request.form['data'])
         # Otherwise, check if the post request has the file part
         elif 'file' not in request.files:
             flash('WARNING: post request has no file part')
@@ -56,34 +58,87 @@ def upload():
             flash('WARNING: no file was selected')
             return redirect(request.url)
         if file:
-            filename = secure_filename(file.filename)
-            file.save(os.path.join('temp', filename))
-            with open("temp/" + filename) as fh:
-                session["mmif_file"] = fh.name
-                mmif_str = fh.read()
-                return render_mmif(mmif_str)
+            return upload_file(file)
+
     return render_template('upload.html')
 
 
+@app.route('/decache', methods=['GET', 'POST'])
+def invalidate_cache():
+    app.logger.debug(f"Request to invalidate cache on {request.args}")
+    if not request.args.get('viz_id'):
+        cache.invalidate_cache()
+        return redirect("/upload")
+    viz_id = request.args.get('viz_id')
+    in_mmif = open(cache.get_cache_path() / viz_id / 'file.mmif', 'rb').read()
+    cache.invalidate_cache([viz_id])
+    return upload_file(in_mmif)
+
+
+@app.route('/display/<viz_id>')
+def display(viz_id):
+    try:
+        path = cache.get_cache_path() / viz_id
+        set_last_access(path)
+        with open(os.path.join(path, "index.html")) as f:
+            html_file = f.read()
+        return html_file
+    except FileNotFoundError:
+        flash("File not found -- please upload again (it may have been deleted to clear up cache space).")
+        return redirect("/upload")
+
+
 @app.route('/uv/<path:path>')
 def send_js(path):
     return send_from_directory("uv", path)
 
 
-def render_mmif(mmif_str):
+def render_mmif(mmif_str, viz_id):
     mmif = Mmif(mmif_str)
-    media = get_media(mmif)
-    annotations = prep_annotations(mmif)
+    media = documents_to_htmls(mmif, viz_id)
+    app.logger.debug(f"Prepared Media: {[m[0] for m in media]}")
+    annotations = prep_annotations(mmif, viz_id)
+    app.logger.debug(f"Prepared Annotations: {[annotation[0] for annotation in annotations]}")
     return render_template('player.html',
-                           mmif=mmif, media=media, annotations=annotations)
+                           media=media, viz_id=viz_id, annotations=annotations)
+
+
+def upload_file(in_mmif):
+    # Save file locally
+    in_mmif_bytes = in_mmif if isinstance(in_mmif, bytes) else in_mmif.read()
+    in_mmif_str = in_mmif_bytes.decode('utf-8')
+    viz_id = hashlib.sha1(in_mmif_bytes).hexdigest()
+    app.logger.debug(f"Visualization ID: {viz_id}")
+    path = cache.get_cache_path() / viz_id
+    app.logger.debug(f"Visualization Directory: {path}")
+    try:
+        os.makedirs(path)
+        set_last_access(path)
+        with open(path / 'file.mmif', 'w') as in_mmif_file:
+            app.logger.debug(f"Writing original MMIF to {path / 'file.mmif'}")
+            in_mmif_file.write(in_mmif_str)
+        html_page = render_mmif(in_mmif_str, viz_id)
+        with open(os.path.join(path, "index.html"), "w") as f:
+            f.write(html_page)
+    except FileExistsError:
+        app.logger.debug("Visualization already cached")
+    finally:
+        # Perform cleanup
+        t = Thread(target=cleanup)
+        t.daemon = True
+        t.run()
+
+    agent = request.headers.get('User-Agent')
+    if 'curl' in agent.lower():
+        return f"Visualization ID is {viz_id}\nYou can access the visualized file at /display/{viz_id}\n"
+    return redirect(f"/display/{viz_id}", code=301)
 
 
 if __name__ == '__main__':
     # Make path for temp files
-    tmp_path = pathlib.Path(__file__).parent /'static'/'tmp'
-    if not os.path.exists(tmp_path):
-        os.makedirs(tmp_path)
-
+    cache_path = cache.get_cache_path()
+    if not os.path.exists(cache_path):
+        os.makedirs(cache_path)
 
     # to avoid runtime errors for missing keys when using flash()
     alphabet = 'abcdefghijklmnopqrstuvwxyz1234567890'
@@ -92,4 +147,5 @@ def render_mmif(mmif_str):
     port = 5000
     if len(sys.argv) > 2 and sys.argv[1] == '-p':
         port = int(sys.argv[2])
-    app.run(port=port, host='0.0.0.0', debug=True)
+        
+    app.run(port=port, host='0.0.0.0', debug=True, use_reloader=False)
diff --git a/cache.py b/cache.py
new file mode 100644
index 0000000..7c9660b
--- /dev/null
+++ b/cache.py
@@ -0,0 +1,68 @@
+import os
+import time
+import shutil
+import threading
+import pathlib
+
+from utils import app
+
+lock = threading.Lock()
+
+
+def get_cache_path():
+    return pathlib.Path(app.static_folder) / "tmp"
+
+
+def get_cache_relpath(full_path):
+    return str(full_path)[len(app.static_folder):]
+
+
+def invalidate_cache(viz_ids):
+    if not viz_ids:
+        app.logger.debug("Invalidating entire cache.")
+        shutil.rmtree(get_cache_path())
+        os.makedirs(get_cache_path())
+    else:
+        for v in viz_ids:
+            app.logger.debug(f"Invalidating {v} from cache.")
+            shutil.rmtree(get_cache_path() / v)
+
+
+def set_last_access(path):
+    with open(os.path.join(path, "last_access.txt"), "w") as f:
+        f.write(str(time.time()))
+
+
+def scan_tmp_directory():
+    oldest_accessed_dir = {"dir": None, "access_time": None}
+    total_size = sum(f.stat().st_size for f in get_cache_path().glob('**/*') if f.is_file())
+    # this will be some visualization IDs
+    for p in get_cache_path().glob('*'):
+        if not (p / 'last_access.txt').exists():
+            oldest_accessed_dir = {"dir": p, "access_time": 0}
+        elif oldest_accessed_dir["dir"] is None:
+            with open(p / 'last_access.txt') as f:
+                timestamp = f.read()
+                if timestamp == '':
+                    continue
+                oldest_accessed_dir = {"dir": p, "access_time": float(timestamp)}
+        else:
+            with open(p / 'last_access.txt') as f:
+                if float(f.read()) < oldest_accessed_dir["access_time"]:
+                    timestamp = f.read()
+                    if timestamp == '':
+                        continue
+                    oldest_accessed_dir = {"dir": p, "access_time": float(timestamp)}
+    return total_size, oldest_accessed_dir["dir"]
+
+
+def cleanup():
+    with lock:
+        print("Checking visualization cache...")
+        # Max tmp size is 500MB
+        max_size = 500000000
+        folder_size, oldest_dir = scan_tmp_directory()
+        while folder_size > max_size:
+            print(f"Maximum cache size reached. Deleting {os.path.basename(oldest_dir)}.")
+            shutil.rmtree(oldest_dir)
+            folder_size, oldest_dir = scan_tmp_directory()
diff --git a/iiif_utils.py b/iiif_utils.py
index 47a19b7..7be9eb8 100644
--- a/iiif_utils.py
+++ b/iiif_utils.py
@@ -1,16 +1,17 @@
+import datetime
 import json
 import os
-import pathlib
 import tempfile
 from typing import Dict
 
 import mmif
 from flask import url_for
 from mmif import AnnotationTypes, DocumentTypes, Mmif
-import datetime
+
+import cache
 
 
-def generate_iiif_manifest(in_mmif: mmif.Mmif):
+def generate_iiif_manifest(in_mmif: mmif.Mmif, viz_id):
     iiif_json = {
         "@context": "http://iiif.io/api/presentation/2/context.json",
         "id": "http://0.0.0.0:5000/mmif_example_manifest.json",
@@ -28,7 +29,7 @@ def generate_iiif_manifest(in_mmif: mmif.Mmif):
     }
     add_canvas_from_documents(in_mmif, iiif_json)
     add_structure_from_timeframe(in_mmif, iiif_json)
-    return save_manifest(iiif_json)
+    return save_manifest(iiif_json, viz_id)
 
 
 def add_canvas_from_documents(in_mmif, iiif_json):
@@ -105,9 +106,10 @@ def add_structure_from_timeframe(in_mmif: Mmif, iiif_json: Dict):
         iiif_json["structures"].append(view_range)
 
 
-def save_manifest(iiif_json: Dict) -> str:
+def save_manifest(iiif_json: Dict, viz_id) -> str:
     # generate a iiif manifest and save output file
-    manifest = tempfile.NamedTemporaryFile('w', dir=str(pathlib.Path(__file__).parent /'static'/'tmp'), suffix='.json', delete=False)
+    manifest = tempfile.NamedTemporaryFile(
+        'w', dir=str(cache.get_cache_path() / viz_id), suffix='.json', delete=False)
     json.dump(iiif_json, manifest, indent=4)
     return manifest.name
 
diff --git a/ocr.py b/ocr.py
index 2b88360..41aa30e 100644
--- a/ocr.py
+++ b/ocr.py
@@ -5,11 +5,13 @@
 import tempfile
 import json
 import re
-import html
+import os, shutil
 
-from flask import render_template, session
+from flask import render_template
 from mmif.utils.video_document_helper import convert_timepoint, convert_timeframe
 
+import cache
+
 
 class OCRFrame():
     """Class representing an (aligned or otherwise) set of OCR annotations for a single frame"""
@@ -59,7 +61,6 @@ def add_bounding_box(self, anno, mmif):
         if anno.properties.get("boxType") and anno.properties.get("boxType") not in self.boxtypes:
             self.boxtypes.append(anno.properties.get("boxType"))
 
-
     def add_timeframe(self, anno, mmif):
         start, end = convert_timeframe(mmif, anno, "frames")
         start_secs, end_secs = convert_timeframe(mmif, anno, "seconds")
@@ -86,7 +87,7 @@ def get_ocr_frames(view, mmif, fps):
         for alignment in view.get_annotations(full_alignment_type[0]):
             source = find_annotation(alignment.properties["source"], view, mmif)
             target = find_annotation(alignment.properties["target"], view, mmif)
-            
+
             frame = OCRFrame(source, mmif)
             i = frame.frame_num if frame.frame_num is not None else frame.range
             if i in frames.keys():
@@ -124,14 +125,16 @@ def paginate(frames_list):
 
     return {i: page for (i, page) in enumerate(pages)}
 
-def render_ocr(vid_path, view_id, page_number):
+
+def render_ocr(mmif_id, vid_path, view_id, page_number):
     """Iterate through frames and display the contents/alignments."""
     # Path for storing temporary images generated by cv2
     cv2_vid = cv2.VideoCapture(vid_path)
-    f = open(session[f"{view_id}-page-file"])
-    frames_pages = json.load(f)
-    page = frames_pages[str(page_number)]
+    tn_data_fname = cache.get_cache_path() / mmif_id / f"{view_id}-pages.json"
+    thumbnail_pages = json.load(open(tn_data_fname))
+    page = thumbnail_pages[str(page_number)]
     prev_frame_cap = None
+    path = make_image_directory(mmif_id)
     for frame_num, frame in page:
         # If index is range instead of frame...
         if frame.get("range"):
@@ -142,22 +145,28 @@ def render_ocr(vid_path, view_id, page_number):
             raise FileNotFoundError(f"Video file {vid_path} not found!")
 
         # Double check histogram similarity of "repeat" frames -- if they're significantly different, un-mark as repeat
-        if prev_frame_cap is not None and frame["repeat"] and not is_duplicate_image(prev_frame_cap, frame_cap, cv2_vid):
+        if prev_frame_cap is not None and frame["repeat"] and not is_duplicate_image(prev_frame_cap, frame_cap,
+                                                                                     cv2_vid):
             frame["repeat"] = False
-
-        with tempfile.NamedTemporaryFile(
-                prefix=str(pathlib.Path(__file__).parent /'static'/'tmp'), suffix=".jpg", delete=False) as tf:
+        with tempfile.NamedTemporaryFile(dir=str(path), suffix=".jpg", delete=False) as tf:
             cv2.imwrite(tf.name, frame_cap)
             # "id" is just the name of the temp image file
             frame["id"] = pathlib.Path(tf.name).name
         prev_frame_cap = frame_cap
 
-    return render_template('ocr.html',
-                           vid_path=vid_path,
-                           view_id=view_id,
-                           page=page,
-                           n_pages=len(frames_pages),
-                           page_number=str(page_number))
+    tn_page_html = render_template(
+        'ocr.html', vid_path=vid_path, view_id=view_id, page=page,
+        n_pages=len(thumbnail_pages), page_number=str(page_number), mmif_id=mmif_id)
+    return tn_page_html
+
+
+def make_image_directory(mmif_id):
+    # Make path for temp OCR image files or clear image files if it exists
+    path = cache.get_cache_path() / mmif_id / "img"
+    if os.path.exists(path):
+        shutil.rmtree(path)
+    os.makedirs(path)
+    return path
 
 
 def find_duplicates(frames_list, cv2_vid):
@@ -175,15 +184,15 @@ def find_duplicates(frames_list, cv2_vid):
 
 def is_duplicate_ocr_frame(prev_frame, frame):
     if not prev_frame:
-        return False 
+        return False
     if prev_frame.get("boxtypes") != frame.get("boxtypes"):
         return False
-    if abs(len(prev_frame.get("boxes"))-len(frame.get("boxes"))) > 3:
+    if abs(len(prev_frame.get("boxes")) - len(frame.get("boxes"))) > 3:
         return False
     # Check Boundingbox distances
     rounded_prev = round_boxes(prev_frame.get("boxes"))
     for box in round_boxes(frame.get("boxes")):
-        if box in rounded_prev and frame["secs"]-prev_frame["secs"] < 10:
+        if box in rounded_prev and frame["secs"] - prev_frame["secs"] < 10:
             return True
     # Check overlap in text
     prev_text, text = set(prev_frame.get("text")), set(frame.get("text"))
@@ -191,16 +200,16 @@ def is_duplicate_ocr_frame(prev_frame, frame):
         return True
     return False
 
-def is_duplicate_image(prev_frame, frame, cv2_vid):
 
+def is_duplicate_image(prev_frame, frame, cv2_vid):
     # Convert it to HSV
     img1_hsv = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2HSV)
     img2_hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
 
     # Calculate the histogram and normalize it
-    hist_img1 = cv2.calcHist([img1_hsv], [0,1], None, [180,256], [0,180,0,256])
+    hist_img1 = cv2.calcHist([img1_hsv], [0, 1], None, [180, 256], [0, 180, 0, 256])
     cv2.normalize(hist_img1, hist_img1, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX);
-    hist_img2 = cv2.calcHist([img2_hsv], [0,1], None, [180,256], [0,180,0,256])
+    hist_img2 = cv2.calcHist([img2_hsv], [0, 1], None, [180, 256], [0, 180, 0, 256])
     cv2.normalize(hist_img2, hist_img2, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX);
 
     # Find the metric value
@@ -208,14 +217,13 @@ def is_duplicate_image(prev_frame, frame, cv2_vid):
     return metric_val < 50
 
 
-
 def round_boxes(boxes):
     # To account for jittery bounding boxes in OCR annotations
     rounded_boxes = []
     for box in boxes:
         rounded_box = []
         for coord in box[2]:
-            rounded_box.append(round(coord/100)*100)
+            rounded_box.append(round(coord / 100) * 100)
         rounded_boxes.append(rounded_box)
     return rounded_boxes
 
@@ -227,7 +235,8 @@ def get_ocr_views(mmif):
     for view in mmif.views:
         for anno_type, anno in view.metadata.contains.items():
             # Annotation belongs to a CV view if it is a TimeFrame/BB and it refers to a VideoDocument
-            if anno_type.shortname in required_types and mmif.get_document_by_id(anno["document"]).at_type.shortname == "VideoDocument":
+            if anno_type.shortname in required_types and mmif.get_document_by_id(
+                    anno["document"]).at_type.shortname == "VideoDocument":
                 views.append(view)
                 continue
             # TODO: Couldn't find a simple way to show if an alignment view is a CV/Frames-type view
@@ -236,8 +245,8 @@ def get_ocr_views(mmif):
                 continue
     return views
 
-def save_json(dict, view_id):
-    with tempfile.NamedTemporaryFile(prefix=str(pathlib.Path(__file__).parent /'static'/'tmp'), suffix=".json", delete=False) as tf:
-        pages_json = open(tf.name, "w")
-        json.dump(dict, pages_json)
-        session[f"{view_id}-page-file"] = tf.name
+
+def save_json(data, view_id, mmif_id):
+    path = cache.get_cache_path() / mmif_id / f"{view_id}-pages.json"
+    with open(path, 'w') as f:
+        json.dump(data, f)
diff --git a/requirements.txt b/requirements.txt
index 563435d..09b2fd3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,7 @@
-spacy==2.3.2
+spacy==2.*
 mmif-python==1.0.8
-mmif-python[utils]==1.0.8
+lapps
 flask-session
-opencv-python==4.*
\ No newline at end of file
+flask[async]
+opencv-python==4.*
+shortuuid==1.0.11
\ No newline at end of file
diff --git a/templates/ocr.html b/templates/ocr.html
index 35b624d..ff584eb 100644
--- a/templates/ocr.html
+++ b/templates/ocr.html
@@ -1,7 +1,7 @@
 
 <div id="ocr_tab_{{view_id}}">
     {% for frame_num, frame in page %}
-        {% set filename = frame["id"] %}
+        {% set filename = "/tmp/" + mmif_id + "/img/" + frame["id"] %}
         {% set id = frame["id"] %}
         {% set boxes = frame["boxes"] %}
         {% set secs = frame["secs"] %}
@@ -127,12 +127,13 @@ <h4>
         var data = {
             "vid_path": "{{vid_path}}",
             "view_id": "{{view_id}}",
-            "page_number": parseInt("{{page_number}}")
+            "page_number": parseInt("{{page_number}}"),
+            "mmif_id": "{{mmif_id}}"
         }
-        if (page == BACKWARD) {
+        if (page === BACKWARD) {
             data["page_number"] -= 1
         }
-        else if (page == FORWARD) {
+        else if (page === FORWARD) {
             data["page_number"] += 1
         }
         else {
diff --git a/templates/player.html b/templates/player.html
index 1116688..5da61c0 100644
--- a/templates/player.html
+++ b/templates/player.html
@@ -23,6 +23,13 @@
 </head>
 
 <style>
+    .card-header {
+      display: flex;
+      flex-direction: row;
+      width: 100%;
+      justify-content: space-between;
+    }
+
     .tab-content {
         display: inline-block;
         overflow-y: auto;
@@ -62,6 +69,16 @@
         animation: spin 2s linear infinite;
     }
 
+    .left, .right, .title {
+      flex: 1;
+      display: flex;
+      align-items: center;
+    }
+
+    .title {
+      justify-content: center;
+    }
+    
     /* Safari */
     @-webkit-keyframes spin {
         0% { -webkit-transform: rotate(0deg); }
@@ -79,7 +96,16 @@
 <div class="panel panel-default">
 
   <div class="card-header">
-    <h1 align="center">Visualizing MMIF</h1>
+    <div class="left">
+      <form action="/upload">
+        <input type="submit" value="Upload another file" />
+      </form>
+      <form action="/decache?viz_id={{ viz_id }}" method="POST">
+        <input type="submit" value="Invalidate cache and regenerate visualization" />  
+      </form>
+    </div>
+    <h1 class="title">Visualizing MMIF</h1>
+    <div class="right"></div>
   </div>
 
   <div class="card-body container-fluid">
diff --git a/templates/pre-ocr.html b/templates/pre-ocr.html
index d877c80..eba08d3 100644
--- a/templates/pre-ocr.html
+++ b/templates/pre-ocr.html
@@ -11,7 +11,8 @@
             return
         loaded_ocr_tabs += "{{tabname}}";
         var data = {
-            "view_id": "{{view_id}}"
+            "view_id": "{{view_id}}",
+            "mmif_id": "{{mmif_id}}"
         }
         $.ajax({
             type:'POST',
diff --git a/templates/uv_player.html b/templates/uv_player.html
index 908236d..b6d0fa1 100644
--- a/templates/uv_player.html
+++ b/templates/uv_player.html
@@ -20,7 +20,7 @@
     $(".nav-item.UV").click(function() {
         if (!uvLoaded) {
             const data = {
-                manifest: "/tmp/{{manifest}}",
+                manifest: "/tmp/{{mmif_id}}/{{manifest}}",
                 embedded: true
              };
 
diff --git a/utils.py b/utils.py
index 1a7150a..b8718d9 100644
--- a/utils.py
+++ b/utils.py
@@ -1,23 +1,16 @@
-import os
-import cv2
-from io import StringIO, BytesIO
-# from string import Template
 from collections import Counter
+from datetime import timedelta
+from io import StringIO
 
-import displacy
-import tempfile
-
-from flask import Flask, render_template
-from werkzeug.utils import secure_filename
-
-from mmif.serialize import Mmif, View
+from flask import Flask
+from lapps.discriminators import Uri
+from mmif import DocumentTypes
 from mmif.serialize.annotation import Text
 from mmif.vocabulary import AnnotationTypes
-from lapps.discriminators import Uri
+
+import displacy
 from iiif_utils import generate_iiif_manifest
 from ocr import *
-from datetime import timedelta
-
 
 # Get Properties from MMIF file ---
 
@@ -25,16 +18,20 @@
 app = Flask(__name__, static_folder='static', static_url_path='')
 app.secret_key = 'your_secret_key_here'
 
-def get_alignments(alignment_view):
-    vtt_file = tempfile.NamedTemporaryFile('w', dir="static/", suffix='.vtt', delete=False)
+
+def asr_alignments_to_vtt(alignment_view, viz_id):
+    vtt_filename = cache.get_cache_path() / viz_id / f"{alignment_view.id.replace(':', '-')}.vtt" 
+    if vtt_filename.exists():
+        return str(vtt_filename)
+    vtt_file = open(vtt_filename, 'w')
     vtt_file.write("WEBVTT\n\n")
     annotations = alignment_view.annotations
-    timeframe_at_type = [at_type for at_type in alignment_view.metadata.contains if at_type.shortname == "TimeFrame" ][0]
+    timeframe_at_type = [at_type for at_type in alignment_view.metadata.contains if at_type.shortname == "TimeFrame"][0]
     timeunit = alignment_view.metadata.contains[timeframe_at_type]["timeUnit"]
     # TODO: wanted to use "mmif.get_alignments(AnnotationTypes.TimeFrame, Uri.TOKEN)"
     # but that gave errors so I gave up on it
-    token_idx = {a.id:a for a in annotations if a.at_type.shortname == "Token"}
-    timeframe_idx = {a.id:a for a in annotations if a.at_type.shortname == "TimeFrame"}
+    token_idx = {a.id: a for a in annotations if a.at_type.shortname == "Token"}
+    timeframe_idx = {a.id: a for a in annotations if a.at_type.shortname == "TimeFrame"}
     alignments = [a for a in annotations if a.at_type.shortname == "Alignment"]
     vtt_start = None
     texts = []
@@ -49,15 +46,18 @@ def get_alignments(alignment_view):
             start, end, text = start_end_text
             start_kwarg, end_kwarg = {timeunit: float(start)}, {timeunit: float(end)}
             start, end = timedelta(**start_kwarg), timedelta(**end_kwarg)
+            s_mins, s_secs = divmod(start.seconds, 60)
+            e_mins, e_secs = divmod(end.seconds, 60)
             if not vtt_start:
-                vtt_start = f'{start.seconds // 3600 :02d}:{start.seconds:02d}.{start.microseconds // 1000 :03d}'
+                vtt_start = f'{s_mins:02d}:{s_secs:02d}.{((s_secs - int(s_secs)) * 1000):03d}'
             texts.append(text)
             if len(texts) > 8:
-                vtt_end = f'{end.seconds // 3600 :02d}:{end.seconds:02d}.{end.microseconds // 1000 :03d}'
+                vtt_end = f'{e_mins:02d}:{e_secs:02d}.{((e_secs - int(e_secs)) * 1000):03d}'
                 vtt_file.write(f'{vtt_start} --> {vtt_end}\n{" ".join(texts)}\n\n')
                 vtt_start = None
                 texts = []
-    return vtt_file
+    return vtt_file.name
+
 
 def build_alignment(alignment, token_idx, timeframe_idx):
     target = alignment.properties['target']
@@ -70,31 +70,31 @@ def build_alignment(alignment, token_idx, timeframe_idx):
         text = token.properties['word']
         return start, end, text
 
-def get_media(mmif):
+
+def documents_to_htmls(mmif, viz_id):
     # Returns a list of tuples, one for each element in the documents list of
     # the MMIF object, following the order in that list. Each tuple has four
     # elements: document type, document identifier, document path and the HTML
     # visualization.
     media = []
     for document in mmif.documents:
-        doc_type = get_document_type_short_form(document)
-        doc_path = document.location
-        print('>>>', doc_path)
-        if doc_type == 'Text':
+        doc_path = document.location_path()
+        app.logger.debug(f"MMIF on AV asset: {doc_path}")
+        if document.at_type == DocumentTypes.TextDocument:
             html = html_text(doc_path)
-        elif doc_type == 'Video':
+        elif document.at_type == DocumentTypes.VideoDocument:
             fa_views = get_alignment_views(mmif)
             fa_view = fa_views[0] if fa_views else None
-            html = html_video(doc_path, fa_view)
-        elif doc_type == 'Audio':
+            html = html_video(viz_id, doc_path, fa_view)
+        elif document.at_type == DocumentTypes.AudioDocument:
             html = html_audio(doc_path)
-        elif doc_type == 'Image':
+        elif document.at_type == DocumentTypes.ImageDocument:
             boxes = get_boxes(mmif)
             html = html_img(doc_path, boxes)
-        media.append((doc_type, document.id, doc_path, html))
-    manifest_filename = generate_iiif_manifest(mmif)
+        media.append((document.at_type.shortname, document.id, doc_path, html))
+    manifest_filename = generate_iiif_manifest(mmif, viz_id)
     man = os.path.basename(manifest_filename)
-    temp = render_template("uv_player.html", manifest=man)
+    temp = render_template("uv_player.html", manifest=man, mmif_id=viz_id)
     media.append(('UV', "", "", temp))
     return media
 
@@ -121,25 +121,25 @@ def get_boxes(mmif):
     return boxes
 
 
-def get_document_type_short_form(document):
-    """Returns 'Video', 'Text', 'Audio' or 'Image' from the document type of
-    the document."""
-    document_type = document.at_type.shortname
-    return document_type[:-8]
-
-def prep_annotations(mmif):
+def prep_annotations(mmif, viz_id):
     """Prepare annotations from the views, and return a list of pairs of tabname
     and tab content. The first tab is alway the full MMIF pretty print."""
-    tabs = [("Info", "<pre>" + create_info(mmif) + "</pre>"),
-            ("MMIF", "<pre>" + mmif.serialize(pretty=True) + "</pre>"),
-            ("Annotations", create_annotation_tables(mmif)),
-            ("Tree", render_interactive_mmif(mmif))]
+    tabs = []
+    tabs.append(("Info", "<pre>" + create_info(mmif) + "</pre>"))
+    app.logger.debug(f"Prepared INFO Tab: {tabs[-1][0]}")
+    # tabs.append(("MMIF", "<pre>" + mmif.serialize(pretty=True) + "</pre>"))
+    # app.logger.debug(f"Prepared RAW Tab: {tabs[-1][0]}")
+    tabs.append(("Annotations", create_annotation_tables(mmif)))
+    app.logger.debug(f"Prepared SUMMARY Tab: {tabs[-1][0]}")
+    tabs.append(("Tree", render_interactive_mmif(mmif)))
+    app.logger.debug(f"Prepared JSTREE Tab: {tabs[-1][0]}")
     # TODO: since this uses the same tab-name this will only show the same
     # stuff; it does a loop but for now we assume there is just one file with
     # alignments (generated by Kaldi)
     for fa_view in get_alignment_views(mmif):
-        vtt_file = view_to_vtt(fa_view)
+        vtt_file = asr_alignments_to_vtt(fa_view, viz_id)
         tabs.append(("WebVTT", '<pre>' + open(vtt_file).read() + '</pre>'))
+        app.logger.debug(f"Prepared a VTT Tab: {tabs[-1][0]}")
     ner_views = get_ner_views(mmif)
     use_id = True if len(ner_views) > 1 else False
     for ner_view in ner_views:
@@ -148,27 +148,20 @@ def prep_annotations(mmif):
         visualization = create_ner_visualization(mmif, ner_view)
         tabname = "Entities-%s" % ner_view.id if use_id else "Entities"
         tabs.append((tabname, visualization))
+        app.logger.debug(f"Prepared a displaCy Tab: {tabs[-1][0]}")
     # TODO: somewhat hackish
     ocr_views = get_ocr_views(mmif)
     use_id = True if len(ocr_views) > 1 else False
     for ocr_view in ocr_views:
         if not ocr_view.annotations:
             continue
-        # visualization = "prepare_ocr_visualization(mmif, ocr_view)"
-        tabname = "Frames-%s" % ocr_view.id
-        visualization = render_template("pre-ocr.html", view_id=ocr_view.id, tabname=tabname)
+        tabname = "Thumbnails-%s" % ocr_view.id
+        visualization = render_template("pre-ocr.html", view_id=ocr_view.id, tabname=tabname, mmif_id=viz_id)
         tabs.append((tabname, visualization))
+        app.logger.debug(f"Prepared a Thumbnails Tab: {tabs[-1][0]}")
     return tabs
 
 
-def get_video_path(mmif):
-    media = get_media(mmif)
-    for file in media:
-        if file[0] == "Video":
-            return file[2]
-    return None    
-
-
 def create_info(mmif):
     s = StringIO('Howdy')
     for document in mmif.documents:
@@ -198,7 +191,7 @@ def create_annotation_tables(mmif):
                 % (view.id, view.metadata.app, status, len(view.annotations)))
         s.write("<blockquote>\n")
         s.write("<table cellspacing=0 cellpadding=5 border=1>\n")
-        limit_len = lambda str : str[:500] + "  . . .  }" if len(str) > 500 else str
+        limit_len = lambda str: str[:500] + "  . . .  }" if len(str) > 500 else str
         for annotation in view.annotations:
             s.write('  <tr>\n')
             s.write('    <td>%s</td>\n' % annotation.id)
@@ -235,38 +228,27 @@ def get_alignment_views(mmif):
     return views
 
 
-
 # Remder Media as HTML ------------
 
-def html_video(vpath, vtt_srcview=None):
+def html_video(viz_id, vpath, vtt_srcview=None):
     vpath = url2posix(vpath)
     html = StringIO()
-    html.write('<video id="vid" controls>\n')
+    html.write('<video id="vid" controls crossorigin="anonymous" >\n')
     html.write(f'    <source src=\"{vpath}\">\n')
     if vtt_srcview is not None:
-        vtt_path = view_to_vtt(vtt_srcview)
-        # use only basename because "static" directory is mapped to '' route by
-        # `static_url_path` param
-        src = os.path.basename(vtt_path)
-        html.write(f'    <track kind="subtitles" srclang="en" src="{src}" label="English" default>\n')
+        vtt_path = asr_alignments_to_vtt(vtt_srcview, viz_id)
+        src = cache.get_cache_relpath(vtt_path)
+        app.logger.debug(f"VTT path: {vtt_path}")
+        html.write(f'    <track kind="captions" srclang="en" src="{src}" label="transcript" default/>\n')
     html.write("</video>\n")
     return html.getvalue()
 
 
 def html_text(tpath):
-    """Return the conent of the text document, but with some HTML tags added."""
+    """Return the content of the text document, but with some HTML tags added."""
     if not os.path.isfile(tpath):
-        # This is to fix a problem when running this from a local machine where
-        # /data/text may not be available (it always is available from the
-        # container). The same problem occurs in displacy/__init__.py.
-        if tpath.startswith('file:///'):
-            tpath = tpath[8:]
-        else:
-            # this should not happen anymore, but keeping it anyway
-            tpath = tpath[1:]
-        tpath = os.path.join(app.root_path, 'static', tpath)
+        raise FileNotFoundError(f"File not found: {tpath}")
     with open(tpath) as t_file:
-        #return f"<pre width=\"100%\">\n{t_file.read()}\n</pre>"
         content = t_file.read().replace("\n", "<br/>\n")
         return f"{content}\n"
 
@@ -274,7 +256,6 @@ def html_text(tpath):
 def html_img(ipath, boxes=None, id="imgCanvas"):
     ipath = url2posix(ipath)
     boxes = [] if boxes is None else boxes
-    # t = Template(open('templates/image.html').read())
     return render_template('image.html', filename=ipath, boxes=boxes, id=id)
 
 
@@ -290,11 +271,13 @@ def url2posix(path):
         path = path[7:]
     return path
 
+
 # Interactive MMIF Tab -----------
 
 def render_interactive_mmif(mmif):
     return render_template('interactive.html', mmif=mmif, aligned_views=get_aligned_views(mmif))
 
+
 # Functions for checking if view can be rendered with alignment highlighting
 def get_aligned_views(mmif):
     """Return list of properly aligned views (for tree display)"""
@@ -305,6 +288,7 @@ def get_aligned_views(mmif):
                 aligned_views.append(view.id)
     return aligned_views
 
+
 def check_view_alignment(annotations):
     anno_stack = []
     for annotation in annotations:
@@ -313,20 +297,18 @@ def check_view_alignment(annotations):
         else:
             anno_stack.append(annotation.id)
         if len(anno_stack) == 3:
-            if type(anno_stack[0]) == str or not (anno_stack[0]["source"] in anno_stack and anno_stack[0]["target"] in anno_stack):
+            if type(anno_stack[0]) == str or not (
+                    anno_stack[0]["source"] in anno_stack and anno_stack[0]["target"] in anno_stack):
                 return False
             anno_stack = []
     return True
 
+
 # NER Tools ----------------------
 
 def get_ner_views(mmif):
     return [v for v in mmif.views if Uri.NE in v.metadata.contains]
 
-def view_to_vtt(alignment_view):
-    """Write alignments to a file in VTT style and return the filename."""
-    vtt_file = get_alignments(alignment_view)
-    return os.sep.join(vtt_file.name.split(os.sep)[-2:])
 
 def create_ner_visualization(mmif, view):
     metadata = view.metadata.contains.get(Uri.NE)
@@ -337,6 +319,8 @@ def create_ner_visualization(mmif, view):
     except KeyError as e:
         # the view's entities refer to more than one text document (tessearct)
         pass
+
+
 def get_status(view):
     return 'ERROR' if 'message' in view.metadata.error else 'OKAY'
 
@@ -352,12 +336,13 @@ def get_properties(annotation):
         props_list.append("%s=%s" % (prop, val))
     return '{ %s }' % ', '.join(props_list)
 
+
 # OCR Tools ----------------------
 
-def prepare_ocr_visualization(mmif, view):
+def prepare_ocr_visualization(mmif, view, mmif_id):
     """ Visualize OCR by extracting image frames with BoundingBoxes from video"""
     # frames, text_docs, alignments = {}, {}, {}
-    vid_path = get_video_path(mmif)
+    vid_path = mmif.get_documents_by_type(DocumentTypes.VideoDocument)[0].location_path()
     cv2_vid = cv2.VideoCapture(vid_path)
     fps = cv2_vid.get(cv2.CAP_PROP_FPS)
 
@@ -368,5 +353,5 @@ def prepare_ocr_visualization(mmif, view):
     frames_list = find_duplicates(frames_list, cv2_vid)
     frames_pages = paginate(frames_list)
     # Save page list as temp file
-    save_json(frames_pages, view.id)
-    return render_ocr(vid_path, view.id, 0)
\ No newline at end of file
+    save_json(frames_pages, view.id, mmif_id)
+    return render_ocr(mmif_id, vid_path, view.id, 0)