diff --git a/.gitignore b/.gitignore
index 54b1b40..70fdd66 100644
--- a/.gitignore
+++ b/.gitignore
@@ -155,3 +155,5 @@ images_errors.csv
 
 # patch files
 *.py.patch
+
+tmp/
diff --git a/requirements.txt b/requirements.txt
index 48bd83d..724e437 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,5 +9,5 @@ ffmpeg-python
 wget
 requests
 flask>=1.1.1
-werkzeug==0.16.1 # It fixes the error: from werkzeug import FileStorage cannot import FileStorage.
+werkzeug>=1.0.1
 cached_property
diff --git a/scripts/up_serving.sh b/scripts/up_serving.sh
index 721ea89..b34129f 100755
--- a/scripts/up_serving.sh
+++ b/scripts/up_serving.sh
@@ -4,5 +4,6 @@ RATIO_GPU=${RATIO_GPU:-0.45}
 echo "Using GPU: $NVIDIA_VISIBLE_DEVICES"
 echo "Limiting GPU to ratio: $RATIO_GPU"
 
+cd /src/mot/serving
 python3 -m mot.serving.app &
 /usr/bin/tf_serving_entrypoint.sh --per_process_gpu_memory_fraction=$RATIO_GPU
diff --git a/src/mot/object_detection/query_server.py b/src/mot/object_detection/query_server.py
index 8c9d9ef..f15ca9e 100644
--- a/src/mot/object_detection/query_server.py
+++ b/src/mot/object_detection/query_server.py
@@ -1,13 +1,15 @@
 import json
+import logging
 import os
 from typing import Dict
 
 import numpy as np
 import requests
-from tensorpack import logger
 
 from mot.object_detection.preprocessing import preprocess_for_serving
 
+logger = logging.getLogger(__file__)
+
 
 def query_tensorflow_server(signature: Dict, url: str) -> Dict:
     """Will send a REST query to the tensorflow server.
@@ -61,12 +63,16 @@ def localizer_tensorflow_serving_inference(
 
     Return:
 
-    - *Dict*: A dict with the predictions with the following format:
+    - *predictions*: A dict with the predictions with the following format:
 
     ```python
     if return_all_scores:
         predictions = {
-            'output/boxes:0': [[0, 0, 1, 1], [0, 0, 10, 10], [10, 10, 15, 100]], (y1, x1, y2, x2)
+            'output/boxes:0': [
+                [0.1, 0.1, 0.9, 0.9],
+                [0.0, 0.2, 0.1, 0.4],
+                [0.2, 0.4, 0.5, 0.7],
+                ], (y1, x1, y2, x2) scaled between 0 and 1
             'output/labels:0': [3, 1, 2],  # the labels start at 1 since 0 is for background
             'output/scores:0': [
                 [0.001, 0.001, 0.98],
@@ -76,7 +82,11 @@ def localizer_tensorflow_serving_inference(
         }
     else:
         predictions = {
-            'output/boxes:0': [[0, 0, 1, 1], [0, 0, 10, 10], [10, 10, 15, 100]],
+            'output/boxes:0': [
+                [0.1, 0.1, 0.9, 0.9],
+                [0.0, 0.2, 0.1, 0.4],
+                [0.2, 0.4, 0.5, 0.7],
+                ], # (y1, x1, y2, x2) scaled between 0 and 1
             'output/labels:0': [3, 1, 2], # the labels start at 1 since 0 is for background
             'output/scores:0': [0.98, 0.87, 0.76] # sorted in descending order
         }
@@ -87,10 +97,10 @@ def localizer_tensorflow_serving_inference(
     scores = np.array(predictions['output/scores:0'])
     if len(predictions["output/boxes:0"]) > 0:
         predictions['output/boxes:0'] = np.array(predictions['output/boxes:0'], np.int32) / ratio
-        predictions["output/boxes:0"][:, 0] /= image.shape[0] # scaling coords to [0, 1]
-        predictions["output/boxes:0"][:, 1] /= image.shape[1] # scaling coords to [0, 1]
-        predictions["output/boxes:0"][:, 2] /= image.shape[0] # scaling coords to [0, 1]
-        predictions["output/boxes:0"][:, 3] /= image.shape[1] # scaling coords to [0, 1]
+        predictions["output/boxes:0"][:, 0] /= image.shape[0]  # scaling coords to [0, 1]
+        predictions["output/boxes:0"][:, 1] /= image.shape[1]  # scaling coords to [0, 1]
+        predictions["output/boxes:0"][:, 2] /= image.shape[0]  # scaling coords to [0, 1]
+        predictions["output/boxes:0"][:, 3] /= image.shape[1]  # scaling coords to [0, 1]
         predictions['output/boxes:0'] = predictions['output/boxes:0'].tolist()
         if return_all_scores and len(scores.shape) == 1:
             raise ValueError(
diff --git a/src/mot/serving/README.md b/src/mot/serving/README.md
index 4424999..b1a4a9c 100644
--- a/src/mot/serving/README.md
+++ b/src/mot/serving/README.md
@@ -24,31 +24,38 @@ NVIDIA_VISIBLE_DEVICES=2 RATIO_GPU=0.3 MODEL_FOLDER=/path/to/serving PORT=the_po
 
 Here are the different ways to perform inference requests.
 
-### Web interface
+### Tracking
 
-You can access a basic web interface to manually upload pictures or videos to do inference.
-In your browser, access the address `host:port`, with port being the one you specified in the previous step.
+*host:port/tracking*
 
-### cURL
-
-#### Json
-
-This only works for images.
+Available with web interface or a simple curl. You can upload a video or a zip archive containing images.
 
 ```bash
-curl -d @/path/to/json --header "Content-Type: application/json" host:port
+curl -F "file=@/path/to/video.mp4" -F "fps=2" -F "resolution=(10,10)" host:port
 ```
 
-#### File
+You don't have to specify those parameters and you can find their default value in [this file](inference.py).
+
+### Demo
+
+*host:port/demo*
+
+Available with web interface: you can upload an image, a localizer will make predictions on it which will be displayed in your browser.
+
+
+### Image
+
+*host:port/image*
+
+You can post to get the predictions of the localizer
+-  an image file:
 
 ```bash
-curl -F "file=@/path/to/file" host:port
+curl  -F "file=@/path/to/image.jpg" host:port
 ```
 
-For videos you can add parameters such as fps and resolution like that:
+- an image as a JSON in BGR:
 
 ```bash
-curl -F "file=@/path/to/video.mp4" -F "fps=2" -F "resolution=(10,10)" host:port
+curl -d @/path/to/json --header "Content-Type: application/json" host:port
 ```
-
-You don't have to specify those parameters and you can find their default value in [this file](inference.py).
diff --git a/src/mot/serving/app.py b/src/mot/serving/app.py
index 5266b64..bb79550 100644
--- a/src/mot/serving/app.py
+++ b/src/mot/serving/app.py
@@ -1,15 +1,57 @@
+import json
+import os
+
+import cv2
+import numpy as np
 from flask import Flask, render_template, request
 
-from mot.serving.inference import handle_post_request
+from mot.serving.constants import TMP_IMAGE_NAME, UPLOAD_FOLDER
+from mot.serving.inference import (
+    detect_and_track_images, predict_and_format_image, predict_image_file
+)
+from mot.serving.viz import draw_boxes
 
 app = Flask(__name__)
+app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
+
+
+@app.route('/tracking', methods=['GET', 'POST'])
+def tracking():
+    if request.method == "GET":
+        # landing page on browser
+        return render_template("upload.html")
+    return detect_and_track_images(request.files['file'], app.config["UPLOAD_FOLDER"])
+
+
+@app.route('/demo', methods=['GET', 'POST'])
+def demo():
+    """Route to upload an image and visualize the prediction of a localizer."""
+    if request.method == "GET":
+        # landing page on browser
+        return render_template("upload_image.html")
+
+    analysis_results = predict_image_file(request.files["file"], app.config["UPLOAD_FOLDER"])
+    draw_boxes(analysis_results["full_filepath"], analysis_results["detected_trash"])
+    return render_template("image.html", filename=analysis_results["full_filepath"])
 
 
-@app.route('/', methods=['GET', 'POST'])
-def index():
-    if request.method == 'POST':
-        return handle_post_request()
-    return render_template("upload.html")
+@app.route('/image', methods=['POST'])
+def image():
+    """Route to upload an image file or a JSON image in BGR and get the prediction of a localizer."""
+    if "file" in request.files:
+        return predict_image_file(request.files["file"], app.config["UPLOAD_FOLDER"])
+    else:
+        data = json.loads(request.data.decode("utf-8"))
+        if "image" not in data:
+            return {
+                "error":
+                    "Your JSON must have a field image with the image as an array in RGB"
+            }
+        image = np.array(data["image"])
+        image_path = os.path.join(app.config["UPLOAD_FOLDER"], TMP_IMAGE_NAME)
+        cv2.imwrite(image_path, image)
+        detected_trash = predict_and_format_image(image)
+        return {"detected_trash": detected_trash}
 
 
 if __name__ == "__main__":
diff --git a/src/mot/serving/constants.py b/src/mot/serving/constants.py
new file mode 100644
index 0000000..bb026f0
--- /dev/null
+++ b/src/mot/serving/constants.py
@@ -0,0 +1,27 @@
+import multiprocessing
+
+
+# app configuration
+UPLOAD_FOLDER = "static/tmp"
+TMP_IMAGE_NAME = "tmp_image.jpg"
+SERVING_URL = "http://localhost:8501"  # the url where the tf-serving container exposes the model
+CPU_COUNT = min(int(multiprocessing.cpu_count() / 2), 32)
+
+
+# video settings
+FPS = 4
+RESOLUTION = (1024, 768)
+SUM_THRESHOLD = 0.6  # the sum of scores for all classes must be greater than this value
+
+# object detection settings
+CLASS_NAMES = ["bottles", "others", "fragments"]
+# for the prediction to be kept
+CLASS_TO_THRESHOLD = {"bottles": 0.7, "others": 0.7, "fragments": 0.7}
+DEFAULT_THRESHOLD = 0.5 # default threshold applied when the class isn't in CLASS_TO_THRESHOLD
+
+
+CLASS_NAME_TO_COLOR = {
+    "bottles": (255, 0, 0),
+    "others": (0, 255, 0),
+    "fragments": (0, 0, 255),
+}
diff --git a/src/mot/serving/inference.py b/src/mot/serving/inference.py
index 68f1a8f..558e901 100644
--- a/src/mot/serving/inference.py
+++ b/src/mot/serving/inference.py
@@ -1,113 +1,45 @@
-import json
+import logging
 import multiprocessing
 import os
 import shutil
+import zipfile
 from typing import Dict, List, Tuple
 
 import cv2
 import numpy as np
-from flask import request
-from tensorpack.utils import logger
 from tqdm import tqdm
-from werkzeug import FileStorage
-from werkzeug.utils import secure_filename
-from zipfile import ZipFile
+from werkzeug.datastructures import FileStorage
 
 from mot.object_detection.query_server import \
     localizer_tensorflow_serving_inference
+from mot.serving.constants import (CLASS_NAMES, CLASS_TO_THRESHOLD, CPU_COUNT,
+                                   DEFAULT_THRESHOLD, FPS, RESOLUTION,
+                                   SERVING_URL)
+from mot.serving.utils import save_file
 from mot.tracker.object_tracking import ObjectTracking
 from mot.tracker.video_utils import read_folder, split_video
 
-SERVING_URL = "http://localhost:8501"  # the url where the tf-serving container exposes the model
-UPLOAD_FOLDER = 'tmp'  # folder used to store images or videos when sending files
-FPS = 4
-RESOLUTION = (1024, 768)
-CLASS_NAMES = ["bottles", "others", "fragments"]
-SUM_THRESHOLD = 0.6  # the sum of scores for all classes must be greater than this value
-# for the prediction to be kept
-CLASS_TO_THRESHOLD = {"bottles": 0.4, "others": 0.3, "fragments": 0.3}
-CPU_COUNT = min(int(multiprocessing.cpu_count() / 2), 32)
+logger = logging.getLogger(__file__)
 
+###    TRACKING (FROM VIDEO OR ZIPFILE)    ###
 
-def handle_post_request(upload_folder: str = UPLOAD_FOLDER) -> Dict[str, np.array]:
-    """This method is the first one to be called when a POST request is coming. It analyzes the incoming
-        format (file or JSON) and then call the appropiate methods to do the prediction.
 
-    If you want to make a prediction by sending the data as a JSON, it has to be in this format:
-
-    ```json
-    {"image":[[[0,0,0],[0,0,0]],[[0,0,0],[0,0,0]]]}
-    ```
-
-    or
-
-    ```json
-    {"video": TODO}
-    ```
-    Arguments:
-
-    - *upload_folder*: Where the files are temporarly stored
-
-    Returns:
-
-    - *Dict[str, np.array]*: The predictions of the TF serving module
-
-    Raises:
-
-    - *NotImplementedError*: If the format of data isn't handled yet
-    """
-    if "file" in request.files:
-        return handle_file(request.files['file'], upload_folder, **request.form)
-    data = json.loads(request.data.decode("utf-8"))
-    if "image" in data:
-        image = np.array(data["image"])
-        return {"detected_trash": predict_and_format_image(image)}
-    if "video" in data:
-        raise NotImplementedError("video")
-    raise ValueError(
-        "Error during the reading of JSON. Keys {} aren't valid ones.".format(data.keys()) +
-        "For an image, send a JSON such as {'image': [0, 0, 0]}." +
-        "Sending videos over JSON isn't implemented yet."
-    )
-
-
-def handle_file(
+def detect_and_track_images(
     file: FileStorage,
-    upload_folder: str = UPLOAD_FOLDER,
+    upload_folder: str,
     fps: int = FPS,
     resolution: Tuple[int, int] = RESOLUTION,
-    **kwargs
 ) -> Dict[str, np.array]:
-    """Make the prediction if the data is coming from an uploaded file.
+    """Performs object detection and then tracking on a video or a zip containing images.
 
     Arguments:
 
-    - *file*: The file, can be either an image or a video, or a zipped folder
+    - *file*: The file, can be either a video or a zipped folder
     - *upload_folder*: Where the files are temporarly stored
 
     Returns:
 
-    - for an image: a json of format
-
-    ```json
-    {
-        "image": filename,
-        "detected_trash":
-            [
-                {
-                    "box": [1, 1, 2, 20],
-                    "label": "fragments",
-                    "score": 0.92
-                }, {
-                    "box": [10, 10, 25, 20],
-                    "label": "bottles",
-                    "score": 0.75
-                }
-            ]
-    }
-    ```
-
-    - for a video or a zipped file: a json of format
+    - *predictions*:
 
     ```json
     {
@@ -133,84 +65,84 @@ def handle_file(
             ]
     }
     ```
-
-    Raises:
-
-    - *NotImplementedError*: If the format of data isn't handled yet
     """
-    if kwargs:
-        logger.warning("Unused kwargs: {}".format(kwargs))
-    filename = secure_filename(file.filename)
-    full_filepath = os.path.join(upload_folder, filename)
-    if not os.path.isdir(upload_folder):
-        os.mkdir(upload_folder)
-    if os.path.isfile(full_filepath):
-        os.remove(full_filepath)
-    file.save(full_filepath)
-    file_type = file.mimetype.split("/")[0]
-    # mimetype is for example 'image/png' and we only want the image
-
-    if file_type == "image":
-        image = cv2.imread(full_filepath)  # cv2 opens in BGR
-        os.remove(full_filepath)  # remove it as we don't need it anymore
-        try:
-            detected_trash = predict_and_format_image(image)
-        except ValueError as e:
-            return {"error": str(e)}
-        return {"image": filename, "detected_trash": detected_trash}
-
-    elif file_type in ["video", "application"]:
-        folder = None
-
-        if file.mimetype == "application/zip":
-            # zip case
-            ZipFile(full_filepath).extractall(upload_folder)
-            dirname = None
-            with ZipFile(full_filepath, 'r') as zipObj:
-                listOfFileNames = zipObj.namelist()
-                for fileName in listOfFileNames:
-                    dirname = os.path.dirname(fileName)
-                    zipObj.extract(fileName, upload_folder)
-
-            folder = os.path.join(upload_folder, dirname)
-        else:
-            # video case: splitting video and saving frames
-            folder = os.path.join(upload_folder, "{}_split".format(filename))
-            if os.path.isdir(folder):
-                shutil.rmtree(folder)
-            os.mkdir(folder)
-            logger.info("Splitting video {} to {}.".format(full_filepath, folder))
-            split_video(full_filepath, folder, fps=fps, resolution=resolution)
-        print("folder:", folder, "uplaod_folder:", upload_folder, "file.filename:", file.filename)
-        image_paths = read_folder(folder)
-        if len(image_paths) == 0:
-            raise ValueError("No output image")
-
-        # making inference on frames
-        logger.info("{} images to analyze on {} CPUs.".format(len(image_paths), CPU_COUNT))
+
+    filename, full_filepath = save_file(file, upload_folder)
+
+    def process_video():
+        images_folder = os.path.join(upload_folder, "{}_split".format(filename))
+        shutil.rmtree(images_folder, ignore_errors=True)
+        os.mkdir(images_folder)
+        logger.info("Splitting video {} to {}.".format(full_filepath, images_folder))
+        split_video(full_filepath, images_folder, fps=fps, resolution=resolution)
+        return images_folder
+
+    def process_zip():
+        images_folder = os.path.join(upload_folder, "{}_split".format(filename))
+        with zipfile.ZipFile(full_filepath, 'r') as zip_obj:
+            zip_obj.extractall(images_folder)
+
+        def move_files_to_root(directory, root_directory):
+            for x in os.listdir(directory):
+                path = os.path.join(directory, x)
+                if x.startswith("._") or x.startswith("__"):
+                    # unwanted files such as __MACOSX
+                    shutil.rmtree(path)
+                else:
+                    if os.path.isfile(path):
+                        # we want to move this file to the root of the zip directory
+                        if not os.path.isfile(os.path.join(root_directory, x)):
+                            # unless it is aleady present at root
+                            shutil.move(path, root_directory)
+                    else:
+                        # if there is a folder, we want to move back the files to root
+                        move_files_to_root(path, root_directory)
+
+        move_files_to_root(images_folder, images_folder)
+
+        return images_folder
+
+    if file.mimetype == "":
+        # no type: we try to unzip, and if it fails we split as a video
         try:
-            with multiprocessing.Pool(CPU_COUNT) as p:
-                inference_outputs = list(
-                    tqdm(
-                        p.imap(process_image, image_paths),
-                        total=len(image_paths),
-                    )
-                )
-        except ValueError as e:
-            return {"error": str(e)}
-        logger.info("Finish analyzing video {}.".format(full_filepath))
-
-        # tracking objects
-        logger.info("Starting tracking.")
-        object_tracker = ObjectTracking(filename, image_paths, inference_outputs, fps=fps)
-        tracks = object_tracker.compute_tracks()
-        logger.info("Tracking finished.")
-        return object_tracker.json_result(tracks)
+            images_folder = process_zip()
+        except zipfile.BadZipFile:
+            images_folder = process_video()
+    elif file.mimetype == "application/zip":
+        # zip case
+        images_folder = process_zip()
     else:
-        raise NotImplementedError(file_type)
+        # video case: splitting video and saving frames
+        images_folder = process_video()
+
+    image_paths = read_folder(images_folder)
+    if len(image_paths) == 0:
+        raise ValueError("No output image")
+
+    # making inference on frames
+    logger.info("{} images to analyze on {} CPUs.".format(len(image_paths), CPU_COUNT))
+    try:
+        with multiprocessing.Pool(CPU_COUNT) as p:
+            inference_outputs = list(
+                tqdm(
+                    p.imap(_process_image, image_paths),
+                    total=len(image_paths),
+                )
+            )
+    except ValueError as e:
+        return {"error": str(e)}
+    logger.info("Object detection on video {} finished.".format(full_filepath))
+
+    # tracking objects
+    logger.info("Starting tracking for video {}.".format(full_filepath))
+    object_tracker = ObjectTracking(filename, image_paths, inference_outputs, fps=fps)
+    tracks = object_tracker.compute_tracks()
+    logger.info("Tracking finished.")
+    predictions = object_tracker.json_result(tracks)
+    return predictions
 
 
-def process_image(image_path: str) -> Dict[str, object]:
+def _process_image(image_path: str) -> Dict:
     """Function used to open and predict on an image. It is suposed to be used in multiprocessing.
 
     Arguments:
@@ -219,38 +151,76 @@ def process_image(image_path: str) -> Dict[str, object]:
 
     Returns:
 
-    - *Dict[str, object]*: Predictions for this image path
+    - *predictions*: Object detection predictions for this image path. A dict such as:
 
     ```python
     predictions = {
         'output/boxes:0': [[0, 0, 1, 1], [0, 0, 10, 10], [10, 10, 15, 100]],
         'output/labels:0': [3, 1, 2], # the labels start at 1 since 0 is for background
-        'output/scores:0': [0.98, 0.87, 0.76] # sorted in descending order
+        'output/scores:0': [
+                [0.001, 0.001, 0.98],
+                [0.87, 0.05, 0.03],
+                [0.1, 0.76, 0.1],
+            ]
+            # the scores don't necessarly sum up to 1 because. The remainder is the background score.
     }
     ```
     """
     image = cv2.imread(image_path)  # cv2 opens in BGR
-    return localizer_tensorflow_serving_inference(image, SERVING_URL, return_all_scores=True)
+    predictions = localizer_tensorflow_serving_inference(image, SERVING_URL, return_all_scores=True)
+    return predictions
+
+
+###   OBJECT DETECTION (FROM IMAGE)    ###
+
+
+def predict_image_file(file: FileStorage, upload_folder: str):
+    """Save an image file on disk and then perform object detection on it.
+
+    Arguments:
+
+    - *file*: An image file sent to the app.
+    - *upload_folder*: The folder where the picture will be temporarly stored.
+
+    Returns:
+
+    - *[type]*: [description]
+
+    Raises:
+
+    - *ValueError*: [description]
+    """
+    filename, full_filepath = save_file(file, upload_folder)
+
+    image = cv2.imread(full_filepath)  # cv2 opens in BGR
+    try:
+        detected_trash = predict_and_format_image(image)
+    except ValueError as e:
+        return {"error": str(e)}
+    return {
+        "full_filepath": full_filepath,
+        "filename": filename,
+        "detected_trash": detected_trash,
+    }
 
 
 def predict_and_format_image(
     image: np.ndarray,
     class_names: List[str] = CLASS_NAMES,
     class_to_threshold: Dict[str, float] = CLASS_TO_THRESHOLD
-) -> List[Dict[str, object]]:
-    """Make prediction on an image and return them in a human readable format.
+) -> List[Dict]:
+    """Make prediction on an image as an array and return them in a human readable format.
 
     Arguments:
 
     - *image*: An numpy array in BGR
     - *class_names*: The list of class names without background
     - *class_to_threshold*: A dict assigning class names to threshold. If a class name isn't in
-        this dict, no threshold will be applied, which means that all predictions for this class
-        will be kept.
+        this dict, DEFAUL_THRESHOLD will be applied.
 
     Returns:
 
-    - *List[Dict[str, object]]*: List of dicts such as:
+    - *detected_trash*: List of dicts such as:
 
     ```python3
     {
@@ -266,20 +236,12 @@ def predict_and_format_image(
     for box, label, score in zip(
         outputs["output/boxes:0"], outputs["output/labels:0"], outputs["output/scores:0"]
     ):
-        if keep_prediction(class_names, label, class_to_threshold, score):
+        if score >= class_to_threshold.get(class_names[label], DEFAULT_THRESHOLD):
             trash_json = {
                 "box": [round(coord, 2) for coord in box],
                 "label": class_names[label],
                 "score": score,
             }
             detected_trash.append(trash_json)
-    return detected_trash
-
 
-def keep_prediction(class_names, label, class_to_threshold, score):
-    if isinstance(score, list):  # we have scores for all classes
-        if np.array(score).sum() < SUM_THRESHOLD:
-            return False
-        return True
-    return class_names[label] not in class_to_threshold or score >= class_to_threshold[
-        class_names[label]]
+    return detected_trash
diff --git a/src/mot/serving/templates/image.html b/src/mot/serving/templates/image.html
new file mode 100644
index 0000000..3653910
--- /dev/null
+++ b/src/mot/serving/templates/image.html
@@ -0,0 +1,9 @@
+<!DOCTYPE html>
+<html>
+<head>
+    <title>Index</title>
+</head>
+<body>
+    <img src="{{ filename }}" alt="User Image">
+</body>
+</html>
diff --git a/src/mot/serving/templates/results.html b/src/mot/serving/templates/results.html
deleted file mode 100644
index 331b75b..0000000
--- a/src/mot/serving/templates/results.html
+++ /dev/null
@@ -1,8 +0,0 @@
-<table>
-    {% for key, value in result.items() %}
-    <tr>
-        <th> {{ key }} </th>
-        <td> {{ value }} </td>
-    </tr>
-    {% endfor %}
-</table>
\ No newline at end of file
diff --git a/src/mot/serving/templates/upload_image.html b/src/mot/serving/templates/upload_image.html
new file mode 100644
index 0000000..dd8f960
--- /dev/null
+++ b/src/mot/serving/templates/upload_image.html
@@ -0,0 +1,6 @@
+<!doctype html>
+<title>Upload an Image for demo purpose</title>
+<h1>Upload image</h1>
+<form method=post enctype=multipart/form-data> <input type=file name=file>
+    <input type=submit value=Upload>
+</form>
diff --git a/src/mot/serving/utils.py b/src/mot/serving/utils.py
new file mode 100644
index 0000000..c9aaf3d
--- /dev/null
+++ b/src/mot/serving/utils.py
@@ -0,0 +1,17 @@
+import os
+
+from werkzeug.datastructures import FileStorage
+from werkzeug.utils import secure_filename
+
+
+def save_file(file: FileStorage, upload_folder: str):
+    filename = secure_filename(file.filename)
+    if not filename:
+        raise ValueError("You must choose a file before uploading it.")
+
+    full_filepath = os.path.join(upload_folder, filename)
+    os.makedirs(upload_folder, exist_ok=True)
+    if os.path.isfile(full_filepath):
+        os.remove(full_filepath)
+    file.save(full_filepath)
+    return filename, full_filepath
diff --git a/src/mot/serving/viz.py b/src/mot/serving/viz.py
new file mode 100644
index 0000000..bda3607
--- /dev/null
+++ b/src/mot/serving/viz.py
@@ -0,0 +1,34 @@
+from typing import Dict, List
+
+import cv2
+
+from mot.serving.constants import CLASS_NAME_TO_COLOR
+
+
+def draw_boxes(image_path: str, trashes: List[Dict]):
+    img = cv2.imread(image_path)  # img.shape = [height, width, channels]
+
+    for trash in trashes:
+        box = trash["box"]
+        box[0] = int(box[0] * img.shape[0])  # y1
+        box[1] = int(box[1] * img.shape[1])  # x1
+        box[2] = int(box[2] * img.shape[0])  # y2
+        box[3] = int(box[3] * img.shape[1])  # x2
+        img = cv2.rectangle(
+            img,
+            (box[0], box[1]),
+            (box[2], box[3]),
+            CLASS_NAME_TO_COLOR[trash["label"]],
+            3,
+        )
+        img = cv2.putText(
+            img,
+            trash["label"] + " " + str(round(trash["score"], 2)),
+            (box[0], box[1] - 10),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            1,
+            CLASS_NAME_TO_COLOR[trash["label"]],
+            cv2.LINE_4,
+        )
+
+    cv2.imwrite(image_path, img)
diff --git a/src/mot/tracker/video_utils.py b/src/mot/tracker/video_utils.py
index 26b13e7..58b6468 100644
--- a/src/mot/tracker/video_utils.py
+++ b/src/mot/tracker/video_utils.py
@@ -28,4 +28,8 @@ def split_video(input_path, output_folder, fps=1.5, resolution=(1024, 768)):
 
 def read_folder(input_path):
     # for now, read directly from images in folder ; later from json outputs
-    return [os.path.join(input_path, file) for file in sorted(os.listdir(input_path))]
+    return [
+        os.path.join(input_path, file)
+        for file in sorted(os.listdir(input_path))
+        if os.path.isfile(os.path.join(input_path, file))
+    ]
diff --git a/tests/tests_mot/serving/test_app.py b/tests/tests_mot/serving/test_app.py
index 847a9ca..a703b41 100644
--- a/tests/tests_mot/serving/test_app.py
+++ b/tests/tests_mot/serving/test_app.py
@@ -1,23 +1,28 @@
 import json
+import os
+import shutil
 from unittest import mock
 
+import cv2
 import numpy as np
 import pytest
 
 from mot.serving.app import app
+from mot.serving.constants import TMP_IMAGE_NAME
 
 
 def mock_post_tensorpack_localizer(*args, **kwargs):
     boxes = [[0, 0, 120, 40], [0, 0, 120, 80]]
-    scores = [0.71, 0.71]
+    scores = [[0.7, 0.01, 0.01], [0.1, 0.1, 0.6]]
     classes = [1, 3]
+
     class Response(mock.Mock):
         json_text = {
-            'outputs':
+            "outputs":
                 {
-                    'output/boxes:0': boxes,
-                    'output/scores:0': scores,
-                    'output/labels:0': classes,
+                    "output/boxes:0": boxes,
+                    "output/scores:0": scores,
+                    "output/labels:0": classes,
                 }
         }
 
@@ -32,25 +37,85 @@ def json(self):
     return response
 
 
-@mock.patch('requests.post', side_effect=mock_post_tensorpack_localizer)
-def test_app_post(mock_server_result):
-    image = np.ones((300, 200, 3))
+@mock.patch("requests.post", side_effect=mock_post_tensorpack_localizer)
+def test_app_post_tracking(mock_server_result, tmpdir):
+    images_folder = os.path.join(tmpdir, "images")
+    os.makedirs(images_folder)
+    for i in range(5):
+        image = 255 * np.random.rand(300, 200, 3)
+        image_path = os.path.join(images_folder, "{}.jpg".format(str(i)))
+        cv2.imwrite(image_path, image)
+
+    zip_path = os.path.join(tmpdir, "toto")
+    shutil.make_archive(zip_path, "zip", images_folder)
+    zip_path += ".zip"
+
+    app_folder = os.path.join(tmpdir, "app_folder")
+    os.makedirs(app_folder)
+    app.config["UPLOAD_FOLDER"] = app_folder
     with app.test_client() as c:
-        response = c.post("/", json={"image": image.tolist()})
-        output = response.get_json()
+        response = c.post("/tracking", data={"file": (open(zip_path, "rb"), "toto.zip")})
+        output = response.json
+    assert response.status_code == 200
     expected_output = {
         "detected_trash":
             [
                 {
-                    "box": [0.0, 0.0, 0.1, 0.05],
-                    "label": "bottles",
-                    "score": 0.71
+                    "frame_to_box":
+                        {
+                            "0": [0.0, 0.0, 0.1, 0.05],
+                            "1": [0.0, 0.0, 0.1, 0.05],
+                            "2": [0.0, 0.0, 0.1, 0.05],
+                            "3": [0.0, 0.0, 0.1, 0.05],
+                            "4": [0.0, 0.0, 0.1, 0.05]
+                        },
+                    "id": 0,
+                    "label": "bottles"
                 }, {
-                    "box": [0.0, 0.0, 0.1, 0.1],
-                    "label": "fragments",
-                    "score": 0.71
+                    "frame_to_box":
+                        {
+                            "0": [0.0, 0.0, 0.1, 0.1],
+                            "1": [0.0, 0.0, 0.1, 0.1],
+                            "2": [0.0, 0.0, 0.1, 0.1],
+                            "3": [0.0, 0.0, 0.1, 0.1],
+                            "4": [0.0, 0.0, 0.1, 0.1]
+                        },
+                    "id": 1,
+                    "label": "fragments"
                 }
-            ]
+            ],
+        "fps": 4,
+        "video_id": "toto.zip",
+        "video_length": 5,
+    }
+    assert output == expected_output
+
+
+@mock.patch("requests.post", side_effect=mock_post_tensorpack_localizer)
+def test_app_post_demo(mock_server_result, tmpdir):
+    image = 255 * np.random.rand(300, 200, 3)
+    image_path = os.path.join(tmpdir, "toto.jpg")
+    cv2.imwrite(image_path, image)
+    app.config["UPLOAD_FOLDER"] = tmpdir
+    with app.test_client() as c:
+        response = c.post("/demo", data={"file": (open(image_path, "rb"), "toto.jpg")})
+    assert response.status_code == 200
+
+
+@mock.patch("requests.post", side_effect=mock_post_tensorpack_localizer)
+def test_app_post_image(mock_server_result, tmpdir):
+    image = np.ones((300, 200, 3))
+    app.config["UPLOAD_FOLDER"] = tmpdir
+    with app.test_client() as c:
+        response = c.post("/image", json={"image": image.tolist()})
+        output = response.get_json()
+    assert response.status_code == 200
+    expected_output = {
+        "detected_trash": [{
+            "box": [0.0, 0.0, 0.1, 0.05],
+            "label": "bottles",
+            "score": 0.7
+        }]
     }
     assert output == expected_output
 
diff --git a/tests/tests_mot/serving/test_inference.py b/tests/tests_mot/serving/test_inference.py
index e69cb96..c3deef1 100644
--- a/tests/tests_mot/serving/test_inference.py
+++ b/tests/tests_mot/serving/test_inference.py
@@ -3,11 +3,13 @@
 from unittest import mock
 
 import cv2
+import ffmpeg
 import numpy as np
 import pytest
-from werkzeug import FileStorage
+from werkzeug.datastructures import FileStorage
 
-from mot.serving.inference import handle_post_request, predict_and_format_image, process_image
+from mot.serving.inference import (_process_image, detect_and_track_images,
+                                   predict_and_format_image)
 
 HOME = os.path.expanduser("~")
 PATH_TO_TEST_VIDEO = os.path.join(HOME, ".mot/tests/test_video.mp4")
@@ -16,8 +18,9 @@
 
 def mock_post_tensorpack_localizer(*args, **kwargs):
     boxes = [[0, 0, 120, 40], [0, 0, 120, 80]]
-    scores = [[0.71, 0.1, 0.1], [0.2, 0.05, 0.71]]
+    scores = [[0.7, 0.01, 0.01], [0.1, 0.1, 0.6]]
     classes = [1, 3]
+
     class Response(mock.Mock):
         json_text = {
             'outputs':
@@ -40,104 +43,14 @@ def json(self):
 
 
 @mock.patch('requests.post', side_effect=mock_post_tensorpack_localizer)
-def test_handle_post_request_image(mock_server_result):
-    image = np.ones((300, 200, 3))
-    data = "{}".format(json.dumps({"image": image.tolist()}))
-    data = data.encode('utf-8')
-    m = mock.MagicMock()  # here we mock flask.request
-    m.data = data
-    with mock.patch("mot.serving.inference.request", m):
-        output = handle_post_request()
-    expected_output = {
-        "detected_trash":
-            [
-                {
-                    "box": [0.0, 0.0, 0.1, 0.05],
-                    "label": "bottles",
-                    "score": 0.71
-                }, {
-                    "box": [0.0, 0.0, 0.1, 0.1],
-                    "label": "fragments",
-                    "score": 0.71
-                }
-            ]
-    }
-    assert output == expected_output
-
-
-def test_handle_post_request_wrong_image():
-    data = "{}".format(json.dumps({"image:0": [[[0, 0, 0], [0, 0, 0]], [[0, 0, 0], [0, 0, 0]]]}))
-    data = data.encode('utf-8')
-    m = mock.MagicMock()  # here we mock flask.request
-    m.data = data
-    with pytest.raises(ValueError):
-        with mock.patch("mot.serving.inference.request", m):
-            output = handle_post_request()
-
-
-def test_handle_post_request_video():
-    # TODO test good behavior when implemented
-    data = "{}".format(json.dumps({"video": [[[0, 0, 0], [0, 0, 0]], [[0, 0, 0], [0, 0, 0]]]}))
-    data = data.encode('utf-8')
-    m = mock.MagicMock()  # here we mock flask.request
-    m.data = data
-    with pytest.raises(NotImplementedError):
-        with mock.patch("mot.serving.inference.request", m):
-            output = handle_post_request()
-
-
-@mock.patch('requests.post', side_effect=mock_post_tensorpack_localizer)
-def test_handle_post_request_file_image(mock_server_result, tmpdir):
-    data = np.ones((300, 200, 3))
-    filename = "test.jpg"
-    filepath = os.path.join(tmpdir, filename)
-    cv2.imwrite(filepath, data)
-    m = mock.MagicMock()
-    files = {"file": FileStorage(open(filepath, "rb"), content_type='image/jpg')}
-    m.files = files
-
-    upload_folder = os.path.join(tmpdir, "upload")
-    os.mkdir(upload_folder)
-    with open(os.path.join(upload_folder, filename), "w+") as f:
-        f.write(
-            "this file should be deleted by the handle post request to be replaced"
-            " by the image we are uploading."
-        )
-
-    with mock.patch("mot.serving.inference.request", m):
-        output = handle_post_request(upload_folder=upload_folder)
-
-    expected_output = {
-        "image":
-            output["image"],
-        "detected_trash":
-            [
-                {
-                    "box": [0.0, 0.0, 0.1, 0.05],
-                    "label": "bottles",
-                    "score": 0.71
-                }, {
-                    "box": [0.0, 0.0, 0.1, 0.1],
-                    "label": "fragments",
-                    "score": 0.71
-                }
-            ]
-    }
-    assert output == expected_output
-
-
-@mock.patch('requests.post', side_effect=mock_post_tensorpack_localizer)
-def test_handle_post_request_file_video(mock_server_result, tmpdir):
-    m = mock.MagicMock()
-    files = {"file": FileStorage(open(PATH_TO_TEST_VIDEO, "rb"), content_type='video/mkv')}
-    m.files = files
-    m.form = {"fps": 2, "foo": "bar"}
+def test_detect_and_track_images_video(mock_server_result, tmpdir):
     split_frames_folder = os.path.join(
         tmpdir, "{}_split".format(os.path.basename(PATH_TO_TEST_VIDEO))
     )
     os.mkdir(split_frames_folder)  # this folder should be deleted by handle post request
-    with mock.patch("mot.serving.inference.request", m):
-        output = handle_post_request(upload_folder=str(tmpdir))
+    output = detect_and_track_images(
+        FileStorage(open(PATH_TO_TEST_VIDEO, "rb")), upload_folder=str(tmpdir), fps=2
+    )
 
     assert len(output["detected_trash"]) == 2
     assert "id" in output["detected_trash"][0]
@@ -151,13 +64,10 @@ def test_handle_post_request_file_video(mock_server_result, tmpdir):
     assert "video_id" in output
 
 @mock.patch('requests.post', side_effect=mock_post_tensorpack_localizer)
-def test_handle_post_request_file_zip(mock_server_result, tmpdir):
-    m = mock.MagicMock()
-    files = {"file": FileStorage(open(PATH_TO_TEST_ZIP, "rb"), content_type='application/zip')}
-    m.files = files
-    m.form = {"fps": 2, "foo": "bar"}
-    with mock.patch("mot.serving.inference.request", m):
-        output = handle_post_request(upload_folder=str(tmpdir))
+def test_detect_and_track_images_zip(mock_server_result, tmpdir):
+    output = detect_and_track_images(
+        FileStorage(open(PATH_TO_TEST_ZIP, "rb")), upload_folder=str(tmpdir), fps=2
+    )
 
     assert len(output["detected_trash"]) == 2
     assert "id" in output["detected_trash"][0]
@@ -176,13 +86,9 @@ def test_handle_post_request_file_other(tmpdir):
     filepath = os.path.join(tmpdir, filename)
     with open(filepath, "w") as f:
         f.write("mock data")
-    m = mock.MagicMock()
-    files = {"file": FileStorage(open(filepath, "rb"), content_type='poulet/pdf')}
-    m.files = files
     upload_folder = os.path.join(tmpdir, "upload_folder")
-    with pytest.raises(NotImplementedError):
-        with mock.patch("mot.serving.inference.request", m):
-            output = handle_post_request(upload_folder=upload_folder)
+    with pytest.raises(ffmpeg._run.Error):
+        detect_and_track_images(FileStorage(open(filepath, "rb")), upload_folder=upload_folder)
     assert os.path.isdir(
         upload_folder
     )  # the upload_folder should be created by handle post request
@@ -193,59 +99,54 @@ def test_process_image(mock_server_result, tmpdir):
     image = np.ones((300, 200, 3))
     image_path = os.path.join(tmpdir, "image.jpg")
     cv2.imwrite(image_path, image)
-    predictions = process_image(image_path)
-    assert predictions == {
+    output = _process_image(image_path)
+    expected_output = {
         'output/boxes:0': [[0, 0, 0.1, 0.05], [0, 0, 0.1, 0.1]],
-        'output/scores:0': [[0.71, 0.1, 0.1], [0.2, 0.05, 0.71]],
+        'output/scores:0': [[0.7, 0.01, 0.01], [0.1, 0.1, 0.6]],
         'output/labels:0': [1, 3]
     }
+    assert output == expected_output
 
 
 @mock.patch('requests.post', side_effect=mock_post_tensorpack_localizer)
 def test_predict_and_format_image(mock_server_result, tmpdir):
     image = np.ones((300, 200, 3))
     predictions = predict_and_format_image(image)
-    assert predictions == [
-        {
-            "box": [0, 0, 0.1, 0.05],
-            "label": "bottles",
-            "score": 0.71
-        }, {
-            "box": [0, 0, 0.1, 0.1],
-            "label": "fragments",
-            "score": 0.71
-        }
-    ]
+    assert predictions == [{"box": [0, 0, 0.1, 0.05], "label": "bottles", "score": 0.7}]
 
     # tesing with different class names
     class_names = ["others", "fragments", "chicken", "bottles"]
-    predictions = predict_and_format_image(image, class_names)
-    assert predictions == [
+    output = predict_and_format_image(image, class_names)
+    expected_output = [
         {
             "box": [0, 0, 0.1, 0.05],
             "label": "others",
-            "score": 0.71
+            "score": 0.7
         }, {
             "box": [0, 0, 0.1, 0.1],
             "label": "chicken",
-            "score": 0.71
+            "score": 0.6
         }
     ]
+    assert output == expected_output
 
     # testing with different thresholds
     class_to_threshold = {"bottles": 0.8, "others": 0.3, "fragments": 0.3}
-    predictions = predict_and_format_image(image, class_to_threshold=class_to_threshold)
-    assert predictions == [{"box": [0, 0, 0.1, 0.1], "label": "fragments", "score": 0.71}]
+    output = predict_and_format_image(image, class_to_threshold=class_to_threshold)
+    expected_output = [{"box": [0, 0, 0.1, 0.1], "label": "fragments", "score": 0.6}]
+    assert output == expected_output
 
     # testing with different thresholds
     class_to_threshold = {"bottles": 0.8, "others": 0.3, "fragments": 0.8}
-    predictions = predict_and_format_image(image, class_to_threshold=class_to_threshold)
-    assert predictions == []
+    output = predict_and_format_image(image, class_to_threshold=class_to_threshold)
+    expected_output = []
+    assert output == expected_output
 
 
 def mock_post_tensorpack_localizer_error(*args, **kwargs):
+
     class Response(mock.Mock):
-        json_text = {'error':  "¯\(°_o)/¯"}
+        json_text = {'error': "¯\(°_o)/¯"}
 
         @property
         def text(self):
@@ -261,33 +162,28 @@ def json(self):
 @mock.patch('requests.post', side_effect=mock_post_tensorpack_localizer_error)
 def test_handle_post_request_file_error(mock_server_result, tmpdir):
     # videos
-    m = mock.MagicMock()
-    files = {"file": FileStorage(open(PATH_TO_TEST_VIDEO, "rb"), content_type='video/mkv')}
-    m.files = files
-    m.form = {"fps": 2, "foo": "bar"}
     split_frames_folder = os.path.join(
         tmpdir, "{}_split".format(os.path.basename(PATH_TO_TEST_VIDEO))
     )
     os.mkdir(split_frames_folder)  # this folder should be deleted by handle post request
-    with mock.patch("mot.serving.inference.request", m):
-        outputs = handle_post_request(upload_folder=str(tmpdir))
-        assert "error" in outputs
-        assert outputs["error"].endswith("¯\(°_o)/¯")
+    outputs = detect_and_track_images(
+        FileStorage(open(PATH_TO_TEST_VIDEO, "rb")), upload_folder=str(tmpdir)
+    )
+    assert "error" in outputs
+    assert outputs["error"].endswith("¯\(°_o)/¯")
 
     # images
     data = np.ones((300, 200, 3))
     filename = "test.jpg"
     filepath = os.path.join(tmpdir, filename)
     cv2.imwrite(filepath, data)
-    m = mock.MagicMock()
-    files = {"file": FileStorage(open(filepath, "rb"), content_type='image/jpg')}
-    m.files = files
-    with mock.patch("mot.serving.inference.request", m):
-        outputs = handle_post_request(upload_folder=str(tmpdir))
-        assert "error" in outputs
-        assert outputs["error"].endswith("¯\(°_o)/¯")
+    outputs = detect_and_track_images(FileStorage(open(filepath, "rb")), upload_folder=str(tmpdir))
+    assert "error" in outputs
+    assert outputs["error"].endswith("¯\(°_o)/¯")
+
 
 def mock_post_tensorpack_localizer_unknown(*args, **kwargs):
+
     class Response(mock.Mock):
         json_text = {'unknown': "¯\_(ツ)_/¯"}
 
@@ -305,28 +201,21 @@ def json(self):
 @mock.patch('requests.post', side_effect=mock_post_tensorpack_localizer_unknown)
 def test_handle_post_request_file_unknwon(mock_server_result, tmpdir):
     # videos
-    m = mock.MagicMock()
-    files = {"file": FileStorage(open(PATH_TO_TEST_VIDEO, "rb"), content_type='video/mkv')}
-    m.files = files
-    m.form = {"fps": 2, "foo": "bar"}
     split_frames_folder = os.path.join(
         tmpdir, "{}_split".format(os.path.basename(PATH_TO_TEST_VIDEO))
     )
     os.mkdir(split_frames_folder)  # this folder should be deleted by handle post request
-    with mock.patch("mot.serving.inference.request", m):
-        outputs = handle_post_request(upload_folder=str(tmpdir))
-        assert "error" in outputs
-        assert outputs["error"].endswith("{'unknown': '¯\\\\_(ツ)_/¯'}")
+    outputs = detect_and_track_images(
+        FileStorage(open(PATH_TO_TEST_VIDEO, "rb")), upload_folder=str(tmpdir)
+    )
+    assert "error" in outputs
+    assert outputs["error"].endswith("{'unknown': '¯\\\\_(ツ)_/¯'}")
 
     # images
     data = np.ones((300, 200, 3))
     filename = "test.jpg"
     filepath = os.path.join(tmpdir, filename)
     cv2.imwrite(filepath, data)
-    m = mock.MagicMock()
-    files = {"file": FileStorage(open(filepath, "rb"), content_type='image/jpg')}
-    m.files = files
-    with mock.patch("mot.serving.inference.request", m):
-        outputs = handle_post_request(upload_folder=str(tmpdir))
-        assert "error" in outputs
-        assert outputs["error"].endswith("{'unknown': '¯\\\\_(ツ)_/¯'}")
+    outputs = detect_and_track_images(FileStorage(open(filepath, "rb")), upload_folder=str(tmpdir))
+    assert "error" in outputs
+    assert outputs["error"].endswith("{'unknown': '¯\\\\_(ツ)_/¯'}")