play.py

import torch
import cv2
from engine.utils import load_yaml 
from tqdm import tqdm 
from models.Pedestrian_Detection.ped_inference import detect_ped_frame, load_ped_model
from models.TrafficLights_Detection.tl_inference import detect_tl_frame, message_rule, load_tl_model
from models.Lane_Detection.lane_inference import detect_lane_frame , load_lane_model
import os 
import streamlit as st 
"""
play.py

This script is intended for Web Interface Execution.
It handles interactions via a web interface for running the model.

"""

def draw_boxes_on_frame(frame, ped_info, tl_info, lane_info, task_type='all', debug=False):
    """
    type = ['all', 'message']
        - all : bounding boxes and comments
        - message : comments only 
    """
    ped_rects, ped_texts, ped_warning_texts = ped_info 
    tl_rectangles, tl_texts, tl_messages, prev_tl_messages = tl_info 
    
    # 1) Pedestrian 
    if task_type=='all':
        for rect, text in zip(ped_rects, ped_texts):
            cv2.rectangle(frame, rect[0][0], rect[0][1], rect[1], 2)
            cv2.putText(frame, text[0], text[1], cv2.FONT_HERSHEY_SIMPLEX, 0.5, text[2], 2)
        
    for warning_text in ped_warning_texts:
        cv2.putText(frame, warning_text[0], warning_text[1], cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 3)
    
    # 2) Traffic Lights 
    if task_type=="all":
        for rect, text in zip(tl_rectangles, tl_texts):
            cv2.rectangle(frame, rect[0][0], rect[0][1], rect[1], 3)
            cv2.putText(frame, text[0], text[1], cv2.FONT_HERSHEY_SIMPLEX, 0.5, text[2], 2, lineType=cv2.FILLED)
    
    # sanity check
    if debug is True:
        print(f'[DEBUG] tl_messages = {tl_messages}')
    
    if len(tl_messages) > 1:
        message, color, prev_tl_messages = message_rule(tl_messages, prev_tl_messages)
        prev_tl_messages.append(message)

        cv2.putText(frame, message, (35, 85), cv2.FONT_HERSHEY_TRIPLEX, 3, color, 3, lineType=cv2.LINE_AA)

    elif len(tl_messages) == 1:
        message, color, prev_tl_messages = message_rule(tl_messages, prev_tl_messages)
        if message == 'STOP' or message == 'PREPARE TO STOP':
            prev_tl_messages.append(message)
            cv2.putText(frame, message, (35, 85), cv2.FONT_HERSHEY_TRIPLEX, 3, color, 3, lineType=cv2.LINE_AA)
    else:
        if prev_tl_messages[-1] == prev_tl_messages[-2] and prev_tl_messages[-1] != 'NONE': #ADD for stability
            message = str(prev_tl_messages[-1])             
            if message == 'STOP':
                color = (0, 0, 255)
            elif message == 'PROCEED WITH CAUTION':
                color = (0, 248, 211)
            else:
                color = (0, 152, 255)
            cv2.putText(frame, message, (35, 85), cv2.FONT_HERSHEY_TRIPLEX, 3, color, 3, lineType=cv2.LINE_AA)
    
    # 3) Lane
    cv2.drawContours(frame, lane_info[0], lane_info[1], (0, 255, 255), thickness=3) 

    return frame, prev_tl_messages

def detect_image(task_type, pedestrian_model, traffic_light_model, lane_model, 
                 input_path, output_path, \
                 ped_score_thr, tl_score_thr, \
                 iou_thr, conf_thr, warning_dst, device, \
                 lane_merge_thr=50, resize_width=1280, resize_height=720):
    
    with st.spinner('Processing image...'):
        img_frame = cv2.imread(input_path)
        img_frame = cv2.resize(img_frame, (resize_width, resize_height))

        prev_tl_messages = ['NONE', 'NONE']  

        ped_rects, ped_texts, ped_warning_texts = detect_ped_frame(pedestrian_model, img_frame, ped_score_thr, iou_thr, conf_thr, warning_dst, device)
        ped_info = (ped_rects, ped_texts, ped_warning_texts)

        tl_rectangles, tl_texts, tl_messages = detect_tl_frame(traffic_light_model, img_frame, device, tl_score_thr)
        tl_info = (tl_rectangles, tl_texts, tl_messages, prev_tl_messages)

        lane_info = detect_lane_frame(lane_model, img_frame, device, threshold=lane_merge_thr)
        processed_frame, prev_tl_messages = draw_boxes_on_frame(img_frame, ped_info, tl_info, lane_info, task_type)

        cv2.imwrite(output_path, processed_frame)
        print(f'[INFO] Saved image to {output_path}')


def detect_video(task_type, pedestrian_model, traffic_light_model, lane_model, 
                     input_path, output_path, 
                     ped_score_thr, tl_score_thr, \
                     iou_thr, conf_thr, warning_dst, device, 
                     lane_merge_thr=50, resize_width=1280, resize_height=720):
    
    cap = cv2.VideoCapture(input_path)
    video_fps = cap.get(cv2.CAP_PROP_FPS) 

    codec = cv2.VideoWriter_fourcc(*'XVID')
    avi_output_path = output_path.with_suffix(".avi") 
    video_writer = cv2.VideoWriter(str(avi_output_path), codec, video_fps, (resize_width, resize_height))
    
    frame_cnt = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    prev_tl_messages = ['NONE', 'NONE'] 

    print(f'[INFO] Total number of frames: {frame_cnt}')

    with st.spinner('Processing video...'):
        with tqdm(total=frame_cnt, desc="[INFO] Processing Frames") as pbar:
            while True:
                hasFrame, img_frame = cap.read()
                if not hasFrame:
                    print(f'Processed all frames')
                    break
                img_frame = cv2.resize(img_frame, (resize_width, resize_height))
                
                ped_rects, ped_texts, ped_warning_texts = detect_ped_frame(pedestrian_model, img_frame, ped_score_thr, iou_thr, conf_thr, warning_dst, device)
                ped_info = (ped_rects, ped_texts, ped_warning_texts)

                tl_rectangles, tl_texts, tl_messages  = detect_tl_frame(traffic_light_model, img_frame, device, tl_score_thr)
                tl_info = (tl_rectangles, tl_texts, tl_messages, prev_tl_messages)

                lane_info = detect_lane_frame(lane_model, img_frame, device, threshold=lane_merge_thr)
                processed_frame, prev_tl_messages = draw_boxes_on_frame(img_frame, ped_info, tl_info, lane_info, task_type)

                video_writer.write(processed_frame) 
                pbar.update(1)

    video_writer.release()
    cap.release()
    print(f'[INFO] Saved video to {avi_output_path}')
    
def main(task_type, CFG_DIR, OUTPUT_DIR, video_path, image_path, ped_score_threshold, tf_score_threshold): 
    check_input_path = video_path if video_path is not None else image_path

    if not os.path.exists(check_input_path):
        print(f'[ERROR] Input file not found at: {check_input_path}. Exiting...')
        exit(1)

    cfg = load_yaml(CFG_DIR)
    device_num = cfg['device']
    device = torch.device(f"cuda:{device_num}" if torch.cuda.is_available() else "cpu")
    print(f'[INFO] Device is on {device_num}')

    print('[INFO] Loading models...')
    pedestrian_model = load_ped_model(cfg['pedestrian']['model_path'], cfg['pedestrian']['num_classes'], device)
    traffic_light_model = load_tl_model(cfg['traffic_light']['model_path'], cfg['traffic_light']['num_classes'], device)
    lane_model = load_lane_model(cfg['lane']['model_path'], cfg['lane']['model_type'], device)
    print('[INFO] Models loaded successfully')

    print(f'[INFO] Task type : {task_type} | Pedestrian Score threshold : {ped_score_threshold} | Traffic Lights Score threshold : {tf_score_threshold}')

    if video_path is not None:
        print(f'[INFO] Processing video: {video_path}')
        detect_video(task_type, pedestrian_model, traffic_light_model, lane_model, 
                         video_path, OUTPUT_DIR, 
                         ped_score_threshold, tf_score_threshold,
                         cfg['pedestrian']['iou_threshold'], 
                         cfg['pedestrian']['confidence_threshold'], 
                         cfg['pedestrian']['warning_distance'], 
                         device,
                         lane_merge_thr=cfg['lane']['lane_merge_thr'])
    
    if image_path is not None:
        print(f'[INFO] Processing image: {image_path}')        
        detect_image(task_type, pedestrian_model, traffic_light_model, lane_model, 
                         image_path, 
                         OUTPUT_DIR, 
                         ped_score_threshold, tf_score_threshold,
                         cfg['pedestrian']['iou_threshold'], 
                         cfg['pedestrian']['confidence_threshold'], 
                         cfg['pedestrian']['warning_distance'], device)