K_yolo_detect_frame_positon.py

import cv2
import random
from tracker import Tracker
from clipImage_function import clip_image
from clip_visualization import clip_visualize

from ultralytics import YOLO


model = YOLO("yolov8n.pt")

colors = [(random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) for j in range(10)]

tracker = Tracker()
detection_threshold = 0.3 # 可能性0.5以上的算作人
person_class_id = 0 # 人的id,做分类,其实好像是除了视频路径外的唯一的东西


def yolo_process_frame2(frame_data : 'KVideoFrame', words=['standing','walking']):
    mid_points = []
    datas = {}
    frame = frame_data.frame_image
    results = model(frame)  # 获取YOLO模型的输出

    for result in results: # 用while每帧都有输出,results是一个list,里面是每个检测到的物体result
        detections = []
        for r in result.boxes.data.tolist(): 
            x1, y1, x2, y2, score, class_id = r # id is in classlist
            if score > detection_threshold and class_id == person_class_id:
                x1 = int(x1)
                x2 = int(x2)
                y1 = int(y1)
                y2 = int(y2)
                class_id = int(class_id)

                dy = y2 - y1
                dx = x2 - x1
                ratio = dy/dx
                # body_correction = ratio > 2 and ratio < 5  
                body_correction = True
                if body_correction:
                    detections.append([x1, y1, x2, y2, score])
                    tracker.update(frame, detections) # 更新tracker,这样就可以跟踪人了
        
        for track in tracker.tracks: # tracker.tracks->list of Track 针对每个人
            bbox = track.bbox
            x1, y1, x2, y2 = bbox
            dy = y2 - y1
            dx = x2 - x1            
            track_id = track.track_id # 保证每个track都有一个id,这个id是唯一的,所以可以跟踪人,获取他的id,然后画框

            cx = (x1+x2)/2
            cy = (y1+y2)/2
            sz = max(dx,dy)

            xx0 = int(cx-sz/2)
            xx1 = int(cx+sz/2)
            yy0 = int(cy-sz/2)
            yy1 = int(cy+sz/2)

            xx0 = max(0,xx0)
            xx1 = min(frame.shape[1],xx1)
            yy0 = max(0,yy0)
            yy1 = min(frame.shape[0],yy1)

            ww = xx1-xx0
            hh = yy1-yy0

            if ww<=0 or hh<=0:
                continue

            crop_frame = frame[yy0:yy1, xx0:xx1]
            
            clip_data = clip_image(words,crop_frame) # 对这个人clip计算words-value,也可以用grid来计算
            for key in clip_data.keys():
                if key not in datas.keys():
                    datas[key] = []
                datas[key].append(clip_data[key])

            # tracker.clip_datas[track_id] = clip_data # 储存数据,这样相当于每次都更新了数据,改为列表            
            # tracker.clip_datas[track_id] = clip_data # 更新数据,可以体现过程,而不是只有最后的结果,但是可视化时候不用这个,可以只用clip_data,但这个有必要吗

            mid_x = int((x1 + x2) / 2)
            mid_y = int(y2)
            mid_point = (mid_x, mid_y)
            mid_points.append(mid_point)

            # tracker.mid_point = (mid_x, int(y2))
            # tracker.mid_points.append(tracker.mid_point)

           # for i in range(1, len(tracker.mid_points)): # show traces
           #     cv2.circle(frame, tracker.mid_points[i], 3, -1)
           #     cv2.putText(frame, str(mid_x)+','+str(mid_y), (int(x2), int(y1)), 0, 5e-3 * 150, (255, 0, 0), 2) # check 了一下坐标系是一致的,都是依照像素点来的
           # 可以不用画在这里,用tracker_data保存数据
            
    frame_data.mid_points = mid_points
    frame_data.clip_datas = datas
    # return tracker

def process_clip(crop_frame : 'KVideoFrame', words=['standing','walking'],n=0):
    return clip_image(words,crop_frame)