diff --git a/.gitignore b/.gitignore index 54b1b40..70fdd66 100644 --- a/.gitignore +++ b/.gitignore @@ -155,3 +155,5 @@ images_errors.csv # patch files *.py.patch + +tmp/ diff --git a/requirements.txt b/requirements.txt index 48bd83d..724e437 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,5 +9,5 @@ ffmpeg-python wget requests flask>=1.1.1 -werkzeug==0.16.1 # It fixes the error: from werkzeug import FileStorage cannot import FileStorage. +werkzeug>=1.0.1 cached_property diff --git a/scripts/up_serving.sh b/scripts/up_serving.sh index 721ea89..b34129f 100755 --- a/scripts/up_serving.sh +++ b/scripts/up_serving.sh @@ -4,5 +4,6 @@ RATIO_GPU=${RATIO_GPU:-0.45} echo "Using GPU: $NVIDIA_VISIBLE_DEVICES" echo "Limiting GPU to ratio: $RATIO_GPU" +cd /src/mot/serving python3 -m mot.serving.app & /usr/bin/tf_serving_entrypoint.sh --per_process_gpu_memory_fraction=$RATIO_GPU diff --git a/src/mot/object_detection/query_server.py b/src/mot/object_detection/query_server.py index 8c9d9ef..f15ca9e 100644 --- a/src/mot/object_detection/query_server.py +++ b/src/mot/object_detection/query_server.py @@ -1,13 +1,15 @@ import json +import logging import os from typing import Dict import numpy as np import requests -from tensorpack import logger from mot.object_detection.preprocessing import preprocess_for_serving +logger = logging.getLogger(__file__) + def query_tensorflow_server(signature: Dict, url: str) -> Dict: """Will send a REST query to the tensorflow server. @@ -61,12 +63,16 @@ def localizer_tensorflow_serving_inference( Return: - - *Dict*: A dict with the predictions with the following format: + - *predictions*: A dict with the predictions with the following format: ```python if return_all_scores: predictions = { - 'output/boxes:0': [[0, 0, 1, 1], [0, 0, 10, 10], [10, 10, 15, 100]], (y1, x1, y2, x2) + 'output/boxes:0': [ + [0.1, 0.1, 0.9, 0.9], + [0.0, 0.2, 0.1, 0.4], + [0.2, 0.4, 0.5, 0.7], + ], (y1, x1, y2, x2) scaled between 0 and 1 'output/labels:0': [3, 1, 2], # the labels start at 1 since 0 is for background 'output/scores:0': [ [0.001, 0.001, 0.98], @@ -76,7 +82,11 @@ def localizer_tensorflow_serving_inference( } else: predictions = { - 'output/boxes:0': [[0, 0, 1, 1], [0, 0, 10, 10], [10, 10, 15, 100]], + 'output/boxes:0': [ + [0.1, 0.1, 0.9, 0.9], + [0.0, 0.2, 0.1, 0.4], + [0.2, 0.4, 0.5, 0.7], + ], # (y1, x1, y2, x2) scaled between 0 and 1 'output/labels:0': [3, 1, 2], # the labels start at 1 since 0 is for background 'output/scores:0': [0.98, 0.87, 0.76] # sorted in descending order } @@ -87,10 +97,10 @@ def localizer_tensorflow_serving_inference( scores = np.array(predictions['output/scores:0']) if len(predictions["output/boxes:0"]) > 0: predictions['output/boxes:0'] = np.array(predictions['output/boxes:0'], np.int32) / ratio - predictions["output/boxes:0"][:, 0] /= image.shape[0] # scaling coords to [0, 1] - predictions["output/boxes:0"][:, 1] /= image.shape[1] # scaling coords to [0, 1] - predictions["output/boxes:0"][:, 2] /= image.shape[0] # scaling coords to [0, 1] - predictions["output/boxes:0"][:, 3] /= image.shape[1] # scaling coords to [0, 1] + predictions["output/boxes:0"][:, 0] /= image.shape[0] # scaling coords to [0, 1] + predictions["output/boxes:0"][:, 1] /= image.shape[1] # scaling coords to [0, 1] + predictions["output/boxes:0"][:, 2] /= image.shape[0] # scaling coords to [0, 1] + predictions["output/boxes:0"][:, 3] /= image.shape[1] # scaling coords to [0, 1] predictions['output/boxes:0'] = predictions['output/boxes:0'].tolist() if return_all_scores and len(scores.shape) == 1: raise ValueError( diff --git a/src/mot/serving/README.md b/src/mot/serving/README.md index 4424999..b1a4a9c 100644 --- a/src/mot/serving/README.md +++ b/src/mot/serving/README.md @@ -24,31 +24,38 @@ NVIDIA_VISIBLE_DEVICES=2 RATIO_GPU=0.3 MODEL_FOLDER=/path/to/serving PORT=the_po Here are the different ways to perform inference requests. -### Web interface +### Tracking -You can access a basic web interface to manually upload pictures or videos to do inference. -In your browser, access the address `host:port`, with port being the one you specified in the previous step. +*host:port/tracking* -### cURL - -#### Json - -This only works for images. +Available with web interface or a simple curl. You can upload a video or a zip archive containing images. ```bash -curl -d @/path/to/json --header "Content-Type: application/json" host:port +curl -F "file=@/path/to/video.mp4" -F "fps=2" -F "resolution=(10,10)" host:port ``` -#### File +You don't have to specify those parameters and you can find their default value in [this file](inference.py). + +### Demo + +*host:port/demo* + +Available with web interface: you can upload an image, a localizer will make predictions on it which will be displayed in your browser. + + +### Image + +*host:port/image* + +You can post to get the predictions of the localizer +- an image file: ```bash -curl -F "file=@/path/to/file" host:port +curl -F "file=@/path/to/image.jpg" host:port ``` -For videos you can add parameters such as fps and resolution like that: +- an image as a JSON in BGR: ```bash -curl -F "file=@/path/to/video.mp4" -F "fps=2" -F "resolution=(10,10)" host:port +curl -d @/path/to/json --header "Content-Type: application/json" host:port ``` - -You don't have to specify those parameters and you can find their default value in [this file](inference.py). diff --git a/src/mot/serving/app.py b/src/mot/serving/app.py index 5266b64..bb79550 100644 --- a/src/mot/serving/app.py +++ b/src/mot/serving/app.py @@ -1,15 +1,57 @@ +import json +import os + +import cv2 +import numpy as np from flask import Flask, render_template, request -from mot.serving.inference import handle_post_request +from mot.serving.constants import TMP_IMAGE_NAME, UPLOAD_FOLDER +from mot.serving.inference import ( + detect_and_track_images, predict_and_format_image, predict_image_file +) +from mot.serving.viz import draw_boxes app = Flask(__name__) +app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER + + +@app.route('/tracking', methods=['GET', 'POST']) +def tracking(): + if request.method == "GET": + # landing page on browser + return render_template("upload.html") + return detect_and_track_images(request.files['file'], app.config["UPLOAD_FOLDER"]) + + +@app.route('/demo', methods=['GET', 'POST']) +def demo(): + """Route to upload an image and visualize the prediction of a localizer.""" + if request.method == "GET": + # landing page on browser + return render_template("upload_image.html") + + analysis_results = predict_image_file(request.files["file"], app.config["UPLOAD_FOLDER"]) + draw_boxes(analysis_results["full_filepath"], analysis_results["detected_trash"]) + return render_template("image.html", filename=analysis_results["full_filepath"]) -@app.route('/', methods=['GET', 'POST']) -def index(): - if request.method == 'POST': - return handle_post_request() - return render_template("upload.html") +@app.route('/image', methods=['POST']) +def image(): + """Route to upload an image file or a JSON image in BGR and get the prediction of a localizer.""" + if "file" in request.files: + return predict_image_file(request.files["file"], app.config["UPLOAD_FOLDER"]) + else: + data = json.loads(request.data.decode("utf-8")) + if "image" not in data: + return { + "error": + "Your JSON must have a field image with the image as an array in RGB" + } + image = np.array(data["image"]) + image_path = os.path.join(app.config["UPLOAD_FOLDER"], TMP_IMAGE_NAME) + cv2.imwrite(image_path, image) + detected_trash = predict_and_format_image(image) + return {"detected_trash": detected_trash} if __name__ == "__main__": diff --git a/src/mot/serving/constants.py b/src/mot/serving/constants.py new file mode 100644 index 0000000..bb026f0 --- /dev/null +++ b/src/mot/serving/constants.py @@ -0,0 +1,27 @@ +import multiprocessing + + +# app configuration +UPLOAD_FOLDER = "static/tmp" +TMP_IMAGE_NAME = "tmp_image.jpg" +SERVING_URL = "http://localhost:8501" # the url where the tf-serving container exposes the model +CPU_COUNT = min(int(multiprocessing.cpu_count() / 2), 32) + + +# video settings +FPS = 4 +RESOLUTION = (1024, 768) +SUM_THRESHOLD = 0.6 # the sum of scores for all classes must be greater than this value + +# object detection settings +CLASS_NAMES = ["bottles", "others", "fragments"] +# for the prediction to be kept +CLASS_TO_THRESHOLD = {"bottles": 0.7, "others": 0.7, "fragments": 0.7} +DEFAULT_THRESHOLD = 0.5 # default threshold applied when the class isn't in CLASS_TO_THRESHOLD + + +CLASS_NAME_TO_COLOR = { + "bottles": (255, 0, 0), + "others": (0, 255, 0), + "fragments": (0, 0, 255), +} diff --git a/src/mot/serving/inference.py b/src/mot/serving/inference.py index 68f1a8f..558e901 100644 --- a/src/mot/serving/inference.py +++ b/src/mot/serving/inference.py @@ -1,113 +1,45 @@ -import json +import logging import multiprocessing import os import shutil +import zipfile from typing import Dict, List, Tuple import cv2 import numpy as np -from flask import request -from tensorpack.utils import logger from tqdm import tqdm -from werkzeug import FileStorage -from werkzeug.utils import secure_filename -from zipfile import ZipFile +from werkzeug.datastructures import FileStorage from mot.object_detection.query_server import \ localizer_tensorflow_serving_inference +from mot.serving.constants import (CLASS_NAMES, CLASS_TO_THRESHOLD, CPU_COUNT, + DEFAULT_THRESHOLD, FPS, RESOLUTION, + SERVING_URL) +from mot.serving.utils import save_file from mot.tracker.object_tracking import ObjectTracking from mot.tracker.video_utils import read_folder, split_video -SERVING_URL = "http://localhost:8501" # the url where the tf-serving container exposes the model -UPLOAD_FOLDER = 'tmp' # folder used to store images or videos when sending files -FPS = 4 -RESOLUTION = (1024, 768) -CLASS_NAMES = ["bottles", "others", "fragments"] -SUM_THRESHOLD = 0.6 # the sum of scores for all classes must be greater than this value -# for the prediction to be kept -CLASS_TO_THRESHOLD = {"bottles": 0.4, "others": 0.3, "fragments": 0.3} -CPU_COUNT = min(int(multiprocessing.cpu_count() / 2), 32) +logger = logging.getLogger(__file__) +### TRACKING (FROM VIDEO OR ZIPFILE) ### -def handle_post_request(upload_folder: str = UPLOAD_FOLDER) -> Dict[str, np.array]: - """This method is the first one to be called when a POST request is coming. It analyzes the incoming - format (file or JSON) and then call the appropiate methods to do the prediction. - If you want to make a prediction by sending the data as a JSON, it has to be in this format: - - ```json - {"image":[[[0,0,0],[0,0,0]],[[0,0,0],[0,0,0]]]} - ``` - - or - - ```json - {"video": TODO} - ``` - Arguments: - - - *upload_folder*: Where the files are temporarly stored - - Returns: - - - *Dict[str, np.array]*: The predictions of the TF serving module - - Raises: - - - *NotImplementedError*: If the format of data isn't handled yet - """ - if "file" in request.files: - return handle_file(request.files['file'], upload_folder, **request.form) - data = json.loads(request.data.decode("utf-8")) - if "image" in data: - image = np.array(data["image"]) - return {"detected_trash": predict_and_format_image(image)} - if "video" in data: - raise NotImplementedError("video") - raise ValueError( - "Error during the reading of JSON. Keys {} aren't valid ones.".format(data.keys()) + - "For an image, send a JSON such as {'image': [0, 0, 0]}." + - "Sending videos over JSON isn't implemented yet." - ) - - -def handle_file( +def detect_and_track_images( file: FileStorage, - upload_folder: str = UPLOAD_FOLDER, + upload_folder: str, fps: int = FPS, resolution: Tuple[int, int] = RESOLUTION, - **kwargs ) -> Dict[str, np.array]: - """Make the prediction if the data is coming from an uploaded file. + """Performs object detection and then tracking on a video or a zip containing images. Arguments: - - *file*: The file, can be either an image or a video, or a zipped folder + - *file*: The file, can be either a video or a zipped folder - *upload_folder*: Where the files are temporarly stored Returns: - - for an image: a json of format - - ```json - { - "image": filename, - "detected_trash": - [ - { - "box": [1, 1, 2, 20], - "label": "fragments", - "score": 0.92 - }, { - "box": [10, 10, 25, 20], - "label": "bottles", - "score": 0.75 - } - ] - } - ``` - - - for a video or a zipped file: a json of format + - *predictions*: ```json { @@ -133,84 +65,84 @@ def handle_file( ] } ``` - - Raises: - - - *NotImplementedError*: If the format of data isn't handled yet """ - if kwargs: - logger.warning("Unused kwargs: {}".format(kwargs)) - filename = secure_filename(file.filename) - full_filepath = os.path.join(upload_folder, filename) - if not os.path.isdir(upload_folder): - os.mkdir(upload_folder) - if os.path.isfile(full_filepath): - os.remove(full_filepath) - file.save(full_filepath) - file_type = file.mimetype.split("/")[0] - # mimetype is for example 'image/png' and we only want the image - - if file_type == "image": - image = cv2.imread(full_filepath) # cv2 opens in BGR - os.remove(full_filepath) # remove it as we don't need it anymore - try: - detected_trash = predict_and_format_image(image) - except ValueError as e: - return {"error": str(e)} - return {"image": filename, "detected_trash": detected_trash} - - elif file_type in ["video", "application"]: - folder = None - - if file.mimetype == "application/zip": - # zip case - ZipFile(full_filepath).extractall(upload_folder) - dirname = None - with ZipFile(full_filepath, 'r') as zipObj: - listOfFileNames = zipObj.namelist() - for fileName in listOfFileNames: - dirname = os.path.dirname(fileName) - zipObj.extract(fileName, upload_folder) - - folder = os.path.join(upload_folder, dirname) - else: - # video case: splitting video and saving frames - folder = os.path.join(upload_folder, "{}_split".format(filename)) - if os.path.isdir(folder): - shutil.rmtree(folder) - os.mkdir(folder) - logger.info("Splitting video {} to {}.".format(full_filepath, folder)) - split_video(full_filepath, folder, fps=fps, resolution=resolution) - print("folder:", folder, "uplaod_folder:", upload_folder, "file.filename:", file.filename) - image_paths = read_folder(folder) - if len(image_paths) == 0: - raise ValueError("No output image") - - # making inference on frames - logger.info("{} images to analyze on {} CPUs.".format(len(image_paths), CPU_COUNT)) + + filename, full_filepath = save_file(file, upload_folder) + + def process_video(): + images_folder = os.path.join(upload_folder, "{}_split".format(filename)) + shutil.rmtree(images_folder, ignore_errors=True) + os.mkdir(images_folder) + logger.info("Splitting video {} to {}.".format(full_filepath, images_folder)) + split_video(full_filepath, images_folder, fps=fps, resolution=resolution) + return images_folder + + def process_zip(): + images_folder = os.path.join(upload_folder, "{}_split".format(filename)) + with zipfile.ZipFile(full_filepath, 'r') as zip_obj: + zip_obj.extractall(images_folder) + + def move_files_to_root(directory, root_directory): + for x in os.listdir(directory): + path = os.path.join(directory, x) + if x.startswith("._") or x.startswith("__"): + # unwanted files such as __MACOSX + shutil.rmtree(path) + else: + if os.path.isfile(path): + # we want to move this file to the root of the zip directory + if not os.path.isfile(os.path.join(root_directory, x)): + # unless it is aleady present at root + shutil.move(path, root_directory) + else: + # if there is a folder, we want to move back the files to root + move_files_to_root(path, root_directory) + + move_files_to_root(images_folder, images_folder) + + return images_folder + + if file.mimetype == "": + # no type: we try to unzip, and if it fails we split as a video try: - with multiprocessing.Pool(CPU_COUNT) as p: - inference_outputs = list( - tqdm( - p.imap(process_image, image_paths), - total=len(image_paths), - ) - ) - except ValueError as e: - return {"error": str(e)} - logger.info("Finish analyzing video {}.".format(full_filepath)) - - # tracking objects - logger.info("Starting tracking.") - object_tracker = ObjectTracking(filename, image_paths, inference_outputs, fps=fps) - tracks = object_tracker.compute_tracks() - logger.info("Tracking finished.") - return object_tracker.json_result(tracks) + images_folder = process_zip() + except zipfile.BadZipFile: + images_folder = process_video() + elif file.mimetype == "application/zip": + # zip case + images_folder = process_zip() else: - raise NotImplementedError(file_type) + # video case: splitting video and saving frames + images_folder = process_video() + + image_paths = read_folder(images_folder) + if len(image_paths) == 0: + raise ValueError("No output image") + + # making inference on frames + logger.info("{} images to analyze on {} CPUs.".format(len(image_paths), CPU_COUNT)) + try: + with multiprocessing.Pool(CPU_COUNT) as p: + inference_outputs = list( + tqdm( + p.imap(_process_image, image_paths), + total=len(image_paths), + ) + ) + except ValueError as e: + return {"error": str(e)} + logger.info("Object detection on video {} finished.".format(full_filepath)) + + # tracking objects + logger.info("Starting tracking for video {}.".format(full_filepath)) + object_tracker = ObjectTracking(filename, image_paths, inference_outputs, fps=fps) + tracks = object_tracker.compute_tracks() + logger.info("Tracking finished.") + predictions = object_tracker.json_result(tracks) + return predictions -def process_image(image_path: str) -> Dict[str, object]: +def _process_image(image_path: str) -> Dict: """Function used to open and predict on an image. It is suposed to be used in multiprocessing. Arguments: @@ -219,38 +151,76 @@ def process_image(image_path: str) -> Dict[str, object]: Returns: - - *Dict[str, object]*: Predictions for this image path + - *predictions*: Object detection predictions for this image path. A dict such as: ```python predictions = { 'output/boxes:0': [[0, 0, 1, 1], [0, 0, 10, 10], [10, 10, 15, 100]], 'output/labels:0': [3, 1, 2], # the labels start at 1 since 0 is for background - 'output/scores:0': [0.98, 0.87, 0.76] # sorted in descending order + 'output/scores:0': [ + [0.001, 0.001, 0.98], + [0.87, 0.05, 0.03], + [0.1, 0.76, 0.1], + ] + # the scores don't necessarly sum up to 1 because. The remainder is the background score. } ``` """ image = cv2.imread(image_path) # cv2 opens in BGR - return localizer_tensorflow_serving_inference(image, SERVING_URL, return_all_scores=True) + predictions = localizer_tensorflow_serving_inference(image, SERVING_URL, return_all_scores=True) + return predictions + + +### OBJECT DETECTION (FROM IMAGE) ### + + +def predict_image_file(file: FileStorage, upload_folder: str): + """Save an image file on disk and then perform object detection on it. + + Arguments: + + - *file*: An image file sent to the app. + - *upload_folder*: The folder where the picture will be temporarly stored. + + Returns: + + - *[type]*: [description] + + Raises: + + - *ValueError*: [description] + """ + filename, full_filepath = save_file(file, upload_folder) + + image = cv2.imread(full_filepath) # cv2 opens in BGR + try: + detected_trash = predict_and_format_image(image) + except ValueError as e: + return {"error": str(e)} + return { + "full_filepath": full_filepath, + "filename": filename, + "detected_trash": detected_trash, + } def predict_and_format_image( image: np.ndarray, class_names: List[str] = CLASS_NAMES, class_to_threshold: Dict[str, float] = CLASS_TO_THRESHOLD -) -> List[Dict[str, object]]: - """Make prediction on an image and return them in a human readable format. +) -> List[Dict]: + """Make prediction on an image as an array and return them in a human readable format. Arguments: - *image*: An numpy array in BGR - *class_names*: The list of class names without background - *class_to_threshold*: A dict assigning class names to threshold. If a class name isn't in - this dict, no threshold will be applied, which means that all predictions for this class - will be kept. + this dict, DEFAUL_THRESHOLD will be applied. Returns: - - *List[Dict[str, object]]*: List of dicts such as: + - *detected_trash*: List of dicts such as: ```python3 { @@ -266,20 +236,12 @@ def predict_and_format_image( for box, label, score in zip( outputs["output/boxes:0"], outputs["output/labels:0"], outputs["output/scores:0"] ): - if keep_prediction(class_names, label, class_to_threshold, score): + if score >= class_to_threshold.get(class_names[label], DEFAULT_THRESHOLD): trash_json = { "box": [round(coord, 2) for coord in box], "label": class_names[label], "score": score, } detected_trash.append(trash_json) - return detected_trash - -def keep_prediction(class_names, label, class_to_threshold, score): - if isinstance(score, list): # we have scores for all classes - if np.array(score).sum() < SUM_THRESHOLD: - return False - return True - return class_names[label] not in class_to_threshold or score >= class_to_threshold[ - class_names[label]] + return detected_trash diff --git a/src/mot/serving/templates/image.html b/src/mot/serving/templates/image.html new file mode 100644 index 0000000..3653910 --- /dev/null +++ b/src/mot/serving/templates/image.html @@ -0,0 +1,9 @@ + + +
+{{ key }} | -{{ value }} | -
---|