Skip to content

Commit

Permalink
Merge pull request #11 from slashtechno/multi-camera-support
Browse files Browse the repository at this point in the history
Added support for multiple video sources
  • Loading branch information
slashtechno authored Feb 16, 2024
2 parents 5c1a22f + f7f5db9 commit d56cee6
Show file tree
Hide file tree
Showing 8 changed files with 1,140 additions and 1,046 deletions.
2 changes: 1 addition & 1 deletion .python-version
Original file line number Diff line number Diff line change
@@ -1 +1 @@
3.10.5
3.10.12
12 changes: 11 additions & 1 deletion .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,20 @@
"request": "launch",
"module": "wyzely_detect",
"args": [
"--run-scale", "0.25", "--view-scale", "0.5", "--no-remove-representations"
"--run-scale", "0.25", "--view-scale", "0.5", "--no-remove-representations", "--fake-second-source"
],
"justMyCode": true
},
// {
// "name": "Quick, Specific Debug",
// "type": "python",
// "request": "launch",
// "module": "wyzely_detect",
// "args": [
// "--run-scale", "0.25", "--view-scale", "0.5", "--no-remove-representations", "--detect-object", "person", "--detect-object", "cell phone"
// ],
// "justMyCode": true
// },
{
// "name": "Python: Module",
"name": "Full Debug",
Expand Down
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ Recognize faces/objects in a video stream (from a webcam or a security camera) a
- All RTSP feeds _should_ work, however.
- Python 3.10 or 3.11
- Poetry (optional)
- Windows or Linux
- I've tested this on MacOS - it works on my 2014 MacBook Air but not a 2011 MacBook Pro
- Both were upgraded with OpenCore, with the MacBook Air running Monterey and the MacBook Pro running a newer version of MacOS, which may have been the problem

### Docker
- A Wyze Cam
Expand Down Expand Up @@ -46,6 +49,7 @@ This assumes you have Python 3.10 or 3.11 installed

#### Poetry
1. `poetry install`
a. For GPU support, use `poetry install -E cuda --with gpu`
2. `poetry run -- wyzely-detect`
### Configuration
The following are some basic CLI options. Most flags have environment variable equivalents which can be helpful when using Docker.
Expand Down
1,656 changes: 796 additions & 860 deletions poetry.lock

Large diffs are not rendered by default.

32 changes: 28 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,22 +21,46 @@ ultralytics = "^8.0.190"
hjson = "^3.1.0"
numpy = "^1.23.2"

# https://github.com/python-poetry/poetry/issues/6409
torch = ">=2.0.0, !=2.0.1, !=2.1.0"
# https://github.com/python-poetry/poetry/issues/6409#issuecomment-1911735833
# To install with GPU, use poetry install -E cuda --with gpu
torch = {version = "2.1.*", source = "pytorch-cpu", markers = "extra!='cuda'" }

# https://stackoverflow.com/a/76477590/18270659
# https://discuss.tensorflow.org/t/tensorflow-io-gcs-filesystem-with-windows/18849/4
# https://discfuss.tensorflow.org/t/tensorflow-io-gcs-filesystem-with-windows/18849/4
# Might be able to remove this version constraint later
# Working versions:
# Python version 3.10.12 and 3.10.5 both work
# CUDA version - 12.2
# cuDNN version - 8.8.1
# Installed from Nvidia website - nvidia-cuda-toolkit is not installed, but default PopOS drivers are installed
tensorflow-io-gcs-filesystem = "0.31.0"
tensorflow = {version = "^2.14.0", extras = ["and-cuda"]}
tensorflow = {version = "^2.14.0", markers = "extra!='cuda'"}


deepface = "^0.0.79"
prettytable = "^3.9.0"


[tool.poetry.group.gpu]
optional = true

[tool.poetry.group.gpu.dependencies]
torch = {version = "2.1.*", source = "pytorch-cu121", markers = "extra=='cuda'"}
tensorflow = {version = "^2.14.0", extras = ["and-cuda"], markers = "extra=='cuda'"}

[tool.poetry.extras]
# Might be better to rename this to nocpu since it's more accurate
cuda = []

[[tool.poetry.source]]
name = "pytorch-cpu"
url = "https://download.pytorch.org/whl/cpu"
priority = "explicit"

[[tool.poetry.source]]
name = "pytorch-cu121"
url = "https://download.pytorch.org/whl/cu121"
priority = "explicit"

[tool.poetry.group.dev.dependencies]
black = "^23.9.1"
Expand Down
210 changes: 77 additions & 133 deletions wyzely_detect/__main__.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,30 @@
# import face_recognition
from pathlib import Path
import os
import cv2
import sys
from prettytable import PrettyTable

# import hjson as json
import torch
from ultralytics import YOLO

from .utils import notify, utils
from .utils import utils
from .utils.cli_args import argparser

DATETIME_FORMAT = "%Y-%m-%d %H:%M:%S"
args = None

objects_and_peoples = {
"objects": {},
"peoples": {},
}


def main():
global objects_and_peoples
global args
# RUN_BY_COMPOSE = os.getenv("RUN_BY_COMPOSE") # Replace this with code to check for gpu

args = argparser.parse_args()

# Check if a CUDA GPU is available. If it is, set it via torch. If not, set it to cpu
# https://github.com/ultralytics/ultralytics/issues/3084#issuecomment-1732433168
# Currently, I have been unable to set up Poetry to use GPU for Torch
for i in range(torch.cuda.device_count()):
print(f'Using {torch.cuda.get_device_properties(i).name} for pytorch')
print(f"Using {torch.cuda.get_device_properties(i).name} for pytorch")
if torch.cuda.is_available():
torch.cuda.set_device(0)
print("Set CUDA device")
Expand All @@ -41,158 +35,108 @@ def main():
if args.force_disable_tensorflow_gpu:
print("Forcing tensorflow to use CPU")
import tensorflow as tf
tf.config.set_visible_devices([], 'GPU')
if tf.config.experimental.list_logical_devices('GPU'):
print('GPU disabled unsuccessfully')

tf.config.set_visible_devices([], "GPU")
if tf.config.experimental.list_logical_devices("GPU"):
print("GPU disabled unsuccessfully")
else:
print("GPU disabled successfully")

model = YOLO("yolov8n.pt")

# Depending on if the user wants to use a stream or a capture device,
# Set the video capture to the appropriate source
if args.rtsp_url is not None:
video_capture = cv2.VideoCapture(args.rtsp_url)
if not args.rtsp_url and not args.capture_device:
print("No stream or capture device set, defaulting to capture device 0")
video_sources = {"devices": [cv2.VideoCapture(0)]}
else:
video_capture = cv2.VideoCapture(args.capture_device)
video_sources = {
"streams": [cv2.VideoCapture(url) for url in args.rtsp_url],
"devices": [cv2.VideoCapture(device) for device in args.capture_device],
}

if args.fake_second_source:
try:
video_sources["devices"].append(video_sources["devices"][0])
except KeyError:
print("No capture device to use as second source. Trying stream.")
try:
video_sources["devices"].append(video_sources["devices"][0])
except KeyError:
print("No stream to use as a second source")
# When the code tries to resize the nonexistent capture device 1, the program will fail

# Eliminate lag by setting the buffer size to 1
# This makes it so that the video capture will only grab the most recent frame
# However, this means that the video may be choppy
video_capture.set(cv2.CAP_PROP_BUFFERSIZE, 1)

# Print the resolution of the video
print(
f"Video resolution: {video_capture.get(cv2.CAP_PROP_FRAME_WIDTH)}x{video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT)}" # noqa: E501
)

# Only do this for streams
try:
for stream in video_sources["streams"]:
stream.set(cv2.CAP_PROP_BUFFERSIZE, 1)
# If there are no streams, this will throw a KeyError
except KeyError:
pass

# Print out the resolution of the video sources. Ideally, change this so the device ID/url is also printed
pretty_table = PrettyTable(field_names=["Source Type", "Resolution"])
for source_type, sources in video_sources.items():
for source in sources:
if (
source.get(cv2.CAP_PROP_FRAME_WIDTH) == 0
or source.get(cv2.CAP_PROP_FRAME_HEIGHT) == 0
):
message = "Capture for a source failed as resolution is 0x0.\n"
if source_type == "streams":
message += "Check if the stream URL is correct and if the stream is online."
else:
message += "Check if the capture device is connected, working, and not in use by another program."
print(message)
sys.exit(1)
pretty_table.add_row(
[
source_type,
f"{source.get(cv2.CAP_PROP_FRAME_WIDTH)}x{source.get(cv2.CAP_PROP_FRAME_HEIGHT)}",
]
)
print(pretty_table)
print("Beginning video capture...")
while True:
# Grab a single frame of video
ret, frame = video_capture.read()
# Resize frame of video to a smaller size for faster recognition processing
run_frame = cv2.resize(frame, (0, 0), fx=args.run_scale, fy=args.run_scale)
# view_frame = cv2.resize(frame, (0, 0), fx=args.view_scale, fy=args.view_scale)

results = model(run_frame, verbose=False)

path_to_faces = Path(args.faces_directory)
path_to_faces_exists = path_to_faces.is_dir()

for i, r in enumerate(results):
# list of dicts with each dict containing a label, x1, y1, x2, y2
plot_boxes = []

# The following is stuff for people
# This is still in the for loop as each result, no matter if anything is detected, will be present.
# Thus, there will always be one result (r)

# Only run if path_to_faces exists
# May be better to check every iteration, but this also works
if path_to_faces_exists:
if face_details := utils.recognize_face(
path_to_directory=path_to_faces,
run_frame=run_frame,
min_confidence=args.face_confidence_threshold,
frames = []
# frames = [source.read() for sources in video_sources.values() for source in sources]
for list_of_sources in video_sources.values():
frames.extend([source.read()[1] for source in list_of_sources])
frames_to_show = []
for frame in frames:
frames_to_show.append(
utils.process_footage(
frame=frame,
run_scale=args.run_scale,
view_scale=args.view_scale,
faces_directory=Path(args.faces_directory),
face_confidence_threshold=args.face_confidence_threshold,
no_remove_representations=args.no_remove_representations,
):
plot_boxes.append(face_details)
objects_and_peoples = notify.thing_detected(
thing_name=face_details["label"],
objects_and_peoples=objects_and_peoples,
detection_type="peoples",
detection_window=args.detection_window,
detection_duration=args.detection_duration,
notification_window=args.notification_window,
ntfy_url=args.ntfy_url,
)

# The following is stuff for objects
# Setup dictionary of object names
if (
objects_and_peoples["objects"] == {}
or objects_and_peoples["objects"] is None
):
for name in r.names.values():
objects_and_peoples["objects"][name] = {
"last_detection_time": None,
"detection_duration": None,
# "first_detection_time": None,
"last_notification_time": None,
}
# Also, make sure that the objects to detect are in the list of objects_and_peoples
# If it isn't, print a warning
for obj in args.detect_object:
if obj not in objects_and_peoples:
print(
f"Warning: {obj} is not in the list of objects the model can detect!"
)

for box in r.boxes:
# Get the name of the object
class_id = r.names[box.cls[0].item()]
# Get the coordinates of the object
cords = box.xyxy[0].tolist()
cords = [round(x) for x in cords]
# Get the confidence
conf = round(box.conf[0].item(), 2)
# Print it out, adding a spacer between each object
# print("Object type:", class_id)
# print("Coordinates:", cords)
# print("Probability:", conf)
# print("---")

# Now do stuff (if conf > 0.5)
if conf < args.object_confidence_threshold or (
class_id not in args.detect_object and args.detect_object != []
):
# If the confidence is too low
# or if the object is not in the list of objects to detect and the list of objects to detect is not empty
# then skip this iteration
continue

# Add the object to the list of objects to plot
plot_boxes.append(
{
"label": class_id,
"x1": cords[0],
"y1": cords[1],
"x2": cords[2],
"y2": cords[3],
}
)

objects_and_peoples = notify.thing_detected(
thing_name=class_id,
objects_and_peoples=objects_and_peoples,
detection_type="objects",
detection_window=args.detection_window,
detection_duration=args.detection_duration,
notification_window=args.notification_window,
ntfy_url=args.ntfy_url,
model=model,
detect_object=args.detect_object,
object_confidence_threshold=args.object_confidence_threshold,
)

# To debug plotting, use r.plot() to cross reference the bounding boxes drawn by the plot_label() and r.plot()
frame_to_show = utils.plot_label(
boxes=plot_boxes,
full_frame=frame,
# full_frame=r.plot(),
run_scale=args.run_scale,
view_scale=args.view_scale,
)

# Display the resulting frame
# cv2.imshow("", r)
if not args.no_display:
cv2.imshow(f"Video{i}", frame_to_show)
# Display the resulting frame
if not args.no_display:
for i, frame_to_show in enumerate(frames_to_show):
cv2.imshow(f"Video {i}", frame_to_show)

# Hit 'q' on the keyboard to quit!
if cv2.waitKey(1) & 0xFF == ord("q"):
break

# Release handle to the webcam
print("Releasing video capture")
video_capture.release()
[source.release() for sources in video_sources.values() for source in sources]
cv2.destroyAllWindows()


Expand Down
Loading

0 comments on commit d56cee6

Please sign in to comment.