Merge pull request #11 from slashtechno/multi-camera-support

Added support for multiple video sources
slashtechno · Feb 16, 2024 · d56cee6 · d56cee6
2 parents 5c1a22f + f7f5db9
commit d56cee6
Show file tree

Hide file tree

Showing 8 changed files with 1,140 additions and 1,046 deletions.
diff --git a/.python-version b/.python-version
@@ -1 +1 @@
-3.10.5
+3.10.12
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -10,10 +10,20 @@
             "request": "launch",
             "module": "wyzely_detect",
             "args": [
-                "--run-scale", "0.25", "--view-scale", "0.5", "--no-remove-representations"
+                "--run-scale", "0.25", "--view-scale", "0.5", "--no-remove-representations", "--fake-second-source"
             ],
             "justMyCode": true
         },
+        // {
+        //     "name": "Quick, Specific Debug",
+        //     "type": "python",
+        //     "request": "launch",
+        //     "module": "wyzely_detect",
+        //     "args": [
+        //         "--run-scale", "0.25", "--view-scale", "0.5", "--no-remove-representations", "--detect-object", "person", "--detect-object", "cell phone"
+        //     ],
+        //     "justMyCode": true
+        // },
         {
             // "name": "Python: Module",
             "name": "Full Debug",

diff --git a/README.md b/README.md
@@ -16,6 +16,9 @@ Recognize faces/objects in a video stream (from a webcam or a security camera) a
     - All RTSP feeds _should_ work, however.  
 - Python 3.10 or 3.11  
 - Poetry (optional)  
+- Windows or Linux  
+    - I've tested this on MacOS - it works on my 2014 MacBook Air but not a 2011 MacBook Pro  
+    - Both were upgraded with OpenCore, with the MacBook Air running Monterey and the MacBook Pro running a newer version of MacOS, which may have been the problem  
 
 ### Docker  
 - A Wyze Cam  
@@ -46,6 +49,7 @@ This assumes you have Python 3.10 or 3.11 installed
 
 #### Poetry  
 1. `poetry install`  
+    a. For GPU support, use `poetry install -E cuda --with gpu`
 2. `poetry run -- wyzely-detect`  
 ### Configuration  
 The following are some basic CLI options. Most flags have environment variable equivalents which can be helpful when using Docker. 

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -21,22 +21,46 @@ ultralytics = "^8.0.190"
 hjson = "^3.1.0"
 numpy = "^1.23.2"
 
-# https://github.com/python-poetry/poetry/issues/6409
-torch = ">=2.0.0, !=2.0.1, !=2.1.0"
+# https://github.com/python-poetry/poetry/issues/6409#issuecomment-1911735833
+# To install with GPU, use poetry install -E cuda --with gpu
+torch = {version = "2.1.*", source = "pytorch-cpu", markers = "extra!='cuda'" }
 
 # https://stackoverflow.com/a/76477590/18270659
-# https://discuss.tensorflow.org/t/tensorflow-io-gcs-filesystem-with-windows/18849/4
+# https://discfuss.tensorflow.org/t/tensorflow-io-gcs-filesystem-with-windows/18849/4
 # Might be able to remove this version constraint later
 # Working versions:
 # Python version 3.10.12 and 3.10.5 both work
 # CUDA version - 12.2
 # cuDNN version - 8.8.1
 # Installed from Nvidia website - nvidia-cuda-toolkit is not installed, but default PopOS drivers are installed
 tensorflow-io-gcs-filesystem = "0.31.0"
-tensorflow = {version = "^2.14.0", extras = ["and-cuda"]}
+tensorflow = {version = "^2.14.0", markers = "extra!='cuda'"}
 
 
 deepface = "^0.0.79"
+prettytable = "^3.9.0"
+
+
+[tool.poetry.group.gpu]
+optional = true
+
+[tool.poetry.group.gpu.dependencies]
+torch = {version = "2.1.*", source = "pytorch-cu121", markers = "extra=='cuda'"}
+tensorflow = {version = "^2.14.0", extras = ["and-cuda"], markers = "extra=='cuda'"}
+
+[tool.poetry.extras]
+# Might be better to rename this to nocpu since it's more accurate
+cuda = []
+
+[[tool.poetry.source]]
+name = "pytorch-cpu"
+url = "https://download.pytorch.org/whl/cpu"
+priority = "explicit"
+
+[[tool.poetry.source]]
+name = "pytorch-cu121"
+url = "https://download.pytorch.org/whl/cu121"
+priority = "explicit"
 
 [tool.poetry.group.dev.dependencies]
 black = "^23.9.1"

diff --git a/wyzely_detect/__main__.py b/wyzely_detect/__main__.py
@@ -1,36 +1,30 @@
 # import face_recognition
 from pathlib import Path
-import os
 import cv2
+import sys
+from prettytable import PrettyTable
 
 # import hjson as json
 import torch
 from ultralytics import YOLO
 
-from .utils import notify, utils
+from .utils import utils
 from .utils.cli_args import argparser
 
 DATETIME_FORMAT = "%Y-%m-%d %H:%M:%S"
 args = None
 
-objects_and_peoples = {
-    "objects": {},
-    "peoples": {},
-}
-
 
 def main():
-    global objects_and_peoples
     global args
-    # RUN_BY_COMPOSE = os.getenv("RUN_BY_COMPOSE") # Replace this with code to check for gpu
 
     args = argparser.parse_args()
 
     # Check if a CUDA GPU is available. If it is, set it via torch. If not, set it to cpu
     # https://github.com/ultralytics/ultralytics/issues/3084#issuecomment-1732433168
     # Currently, I have been unable to set up Poetry to use GPU for Torch
     for i in range(torch.cuda.device_count()):
-        print(f'Using {torch.cuda.get_device_properties(i).name} for pytorch')
+        print(f"Using {torch.cuda.get_device_properties(i).name} for pytorch")
     if torch.cuda.is_available():
         torch.cuda.set_device(0)
         print("Set CUDA device")
@@ -41,158 +35,108 @@ def main():
     if args.force_disable_tensorflow_gpu:
         print("Forcing tensorflow to use CPU")
         import tensorflow as tf
-        tf.config.set_visible_devices([], 'GPU')
-        if tf.config.experimental.list_logical_devices('GPU'):
-            print('GPU disabled unsuccessfully')
+
+        tf.config.set_visible_devices([], "GPU")
+        if tf.config.experimental.list_logical_devices("GPU"):
+            print("GPU disabled unsuccessfully")
         else:
             print("GPU disabled successfully")
 
     model = YOLO("yolov8n.pt")
 
     # Depending on if the user wants to use a stream or a capture device,
     # Set the video capture to the appropriate source
-    if args.rtsp_url is not None:
-        video_capture = cv2.VideoCapture(args.rtsp_url)
+    if not args.rtsp_url and not args.capture_device:
+        print("No stream or capture device set, defaulting to capture device 0")
+        video_sources = {"devices": [cv2.VideoCapture(0)]}
     else:
-        video_capture = cv2.VideoCapture(args.capture_device)
+        video_sources = {
+            "streams": [cv2.VideoCapture(url) for url in args.rtsp_url],
+            "devices": [cv2.VideoCapture(device) for device in args.capture_device],
+        }
+
+    if args.fake_second_source:
+        try:
+            video_sources["devices"].append(video_sources["devices"][0])
+        except KeyError:
+            print("No capture device to use as second source. Trying stream.")
+            try:
+                video_sources["devices"].append(video_sources["devices"][0])
+            except KeyError:
+                print("No stream to use as a second source")
+                # When the code tries to resize the nonexistent capture device 1, the program will fail
 
     # Eliminate lag by setting the buffer size to 1
     # This makes it so that the video capture will only grab the most recent frame
     # However, this means that the video may be choppy
-    video_capture.set(cv2.CAP_PROP_BUFFERSIZE, 1)
-
-    # Print the resolution of the video
-    print(
-        f"Video resolution: {video_capture.get(cv2.CAP_PROP_FRAME_WIDTH)}x{video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT)}"  # noqa: E501
-    )
-
+    # Only do this for streams
+    try:
+        for stream in video_sources["streams"]:
+            stream.set(cv2.CAP_PROP_BUFFERSIZE, 1)
+    # If there are no streams, this will throw a KeyError
+    except KeyError:
+        pass
+
+    # Print out the resolution of the video sources. Ideally, change this so the device ID/url is also printed
+    pretty_table = PrettyTable(field_names=["Source Type", "Resolution"])
+    for source_type, sources in video_sources.items():
+        for source in sources:
+            if (
+                source.get(cv2.CAP_PROP_FRAME_WIDTH) == 0
+                or source.get(cv2.CAP_PROP_FRAME_HEIGHT) == 0
+            ):
+                message = "Capture for a source failed as resolution is 0x0.\n"
+                if source_type == "streams":
+                    message += "Check if the stream URL is correct and if the stream is online."
+                else:
+                    message += "Check if the capture device is connected, working, and not in use by another program."
+                print(message)
+                sys.exit(1)
+            pretty_table.add_row(
+                [
+                    source_type,
+                    f"{source.get(cv2.CAP_PROP_FRAME_WIDTH)}x{source.get(cv2.CAP_PROP_FRAME_HEIGHT)}",
+                ]
+            )
+    print(pretty_table)
     print("Beginning video capture...")
     while True:
         # Grab a single frame of video
-        ret, frame = video_capture.read()
-        # Resize frame of video to a smaller size for faster recognition processing
-        run_frame = cv2.resize(frame, (0, 0), fx=args.run_scale, fy=args.run_scale)
-        # view_frame = cv2.resize(frame, (0, 0), fx=args.view_scale, fy=args.view_scale)
-
-        results = model(run_frame, verbose=False)
-
-        path_to_faces = Path(args.faces_directory)
-        path_to_faces_exists = path_to_faces.is_dir()
-
-        for i, r in enumerate(results):
-            # list of dicts with each dict containing a label, x1, y1, x2, y2
-            plot_boxes = []
-
-            # The following is stuff for people
-            # This is still in the for loop as each result, no matter if anything is detected, will be present.
-            # Thus, there will always be one result (r)
-
-            # Only run if path_to_faces exists
-            # May be better to check every iteration, but this also works
-            if path_to_faces_exists:
-                if face_details := utils.recognize_face(
-                    path_to_directory=path_to_faces,
-                    run_frame=run_frame,
-                    min_confidence=args.face_confidence_threshold,
+        frames = []
+        # frames = [source.read() for sources in video_sources.values() for source in sources]
+        for list_of_sources in video_sources.values():
+            frames.extend([source.read()[1] for source in list_of_sources])
+        frames_to_show = []
+        for frame in frames:
+            frames_to_show.append(
+                utils.process_footage(
+                    frame=frame,
+                    run_scale=args.run_scale,
+                    view_scale=args.view_scale,
+                    faces_directory=Path(args.faces_directory),
+                    face_confidence_threshold=args.face_confidence_threshold,
                     no_remove_representations=args.no_remove_representations,
-                ):
-                    plot_boxes.append(face_details)
-                    objects_and_peoples = notify.thing_detected(
-                        thing_name=face_details["label"],
-                        objects_and_peoples=objects_and_peoples,
-                        detection_type="peoples",
-                        detection_window=args.detection_window,
-                        detection_duration=args.detection_duration,
-                        notification_window=args.notification_window,
-                        ntfy_url=args.ntfy_url,
-                    )
-
-            # The following is stuff for objects
-            # Setup dictionary of object names
-            if (
-                objects_and_peoples["objects"] == {}
-                or objects_and_peoples["objects"] is None
-            ):
-                for name in r.names.values():
-                    objects_and_peoples["objects"][name] = {
-                        "last_detection_time": None,
-                        "detection_duration": None,
-                        # "first_detection_time": None,
-                        "last_notification_time": None,
-                    }
-                # Also, make sure that the objects to detect are in the list of objects_and_peoples
-                # If it isn't, print a warning
-                for obj in args.detect_object:
-                    if obj not in objects_and_peoples:
-                        print(
-                            f"Warning: {obj} is not in the list of objects the model can detect!"
-                        )
-
-            for box in r.boxes:
-                # Get the name of the object
-                class_id = r.names[box.cls[0].item()]
-                # Get the coordinates of the object
-                cords = box.xyxy[0].tolist()
-                cords = [round(x) for x in cords]
-                # Get the confidence
-                conf = round(box.conf[0].item(), 2)
-                # Print it out, adding a spacer between each object
-                # print("Object type:", class_id)
-                # print("Coordinates:", cords)
-                # print("Probability:", conf)
-                # print("---")
-
-                # Now do stuff (if conf > 0.5)
-                if conf < args.object_confidence_threshold or (
-                    class_id not in args.detect_object and args.detect_object != []
-                ):
-                    # If the confidence is too low
-                    # or if the object is not in the list of objects to detect and the list of objects to detect is not empty
-                    # then skip this iteration
-                    continue
-
-                # Add the object to the list of objects to plot
-                plot_boxes.append(
-                    {
-                        "label": class_id,
-                        "x1": cords[0],
-                        "y1": cords[1],
-                        "x2": cords[2],
-                        "y2": cords[3],
-                    }
-                )
-
-                objects_and_peoples = notify.thing_detected(
-                    thing_name=class_id,
-                    objects_and_peoples=objects_and_peoples,
-                    detection_type="objects",
                     detection_window=args.detection_window,
                     detection_duration=args.detection_duration,
                     notification_window=args.notification_window,
                     ntfy_url=args.ntfy_url,
+                    model=model,
+                    detect_object=args.detect_object,
+                    object_confidence_threshold=args.object_confidence_threshold,
                 )
-
-            # To debug plotting, use r.plot() to cross reference the bounding boxes drawn by the plot_label() and r.plot()
-            frame_to_show = utils.plot_label(
-                boxes=plot_boxes,
-                full_frame=frame,
-                # full_frame=r.plot(),
-                run_scale=args.run_scale,
-                view_scale=args.view_scale,
             )
-
-            # Display the resulting frame
-            # cv2.imshow("", r)
-            if not args.no_display:
-                cv2.imshow(f"Video{i}", frame_to_show)
+        # Display the resulting frame
+        if not args.no_display:
+            for i, frame_to_show in enumerate(frames_to_show):
+                cv2.imshow(f"Video {i}", frame_to_show)
 
         # Hit 'q' on the keyboard to quit!
         if cv2.waitKey(1) & 0xFF == ord("q"):
             break
 
     # Release handle to the webcam
     print("Releasing video capture")
-    video_capture.release()
+    [source.release() for sources in video_sources.values() for source in sources]
     cv2.destroyAllWindows()