Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

python 程序中每帧图像的耗时问题 #236

Open
stephen-TT opened this issue Jul 17, 2024 · 2 comments
Open

python 程序中每帧图像的耗时问题 #236

stephen-TT opened this issue Jul 17, 2024 · 2 comments

Comments

@stephen-TT
Copy link

大佬 您好,我用的自己训练的yolov8s pt模型通过readme中的教程转成的engine,然后修改了infer_det.py代码使其能够调用摄像头,我想计算每帧的耗时,不知道下面的代码计算对不对:

from models import TRTModule  # isort:skip
import argparse
from pathlib import Path

import time
import cv2
import torch

from config import CLASSES_DET, COLORS
from models.torch_utils import det_postprocess
from models.utils import blob, letterbox, path_to_list


def main(args: argparse.Namespace) -> None:
    device = torch.device(args.device)
    Engine = TRTModule(args.engine, device)
    H, W = Engine.inp_info[0].shape[-2:]

    # set desired output names order
    Engine.set_desired(['num_dets', 'bboxes', 'scores', 'labels'])

    save_path = Path(args.out_dir)

    if not args.show and not save_path.exists():
        save_path.mkdir(parents=True, exist_ok=True)

    if args.imgs:
        images = path_to_list(args.imgs)
        print(f'images:{images}')
        for image in images:
            save_image = save_path / image.name
            bgr = cv2.imread(str(image))
            draw = bgr.copy()
            bgr, ratio, dwdh = letterbox(bgr, (W, H))
            rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
            tensor = blob(rgb, return_seg=False)
            dwdh = torch.asarray(dwdh * 2, dtype=torch.float32, device=device)
            tensor = torch.asarray(tensor, device=device)
            # inference
            data = Engine(tensor)

            bboxes, scores, labels = det_postprocess(data)
            if bboxes.numel() == 0:
                # if no bounding box
                print(f'{image}: no object!')
                continue
            bboxes -= dwdh
            bboxes /= ratio

            for (bbox, score, label) in zip(bboxes, scores, labels):
                bbox = bbox.round().int().tolist()
                cls_id = int(label)
                cls = CLASSES_DET[cls_id]
                color = COLORS[cls]

                text = f'{cls}:{score:.3f}'
                x1, y1, x2, y2 = bbox

                (_w, _h), _bl = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.8, 1)
                _y1 = min(y1 + 1, draw.shape[0])

                cv2.rectangle(draw, (x1, y1), (x2, y2), color, 2)
                cv2.rectangle(draw, (x1, _y1), (x1 + _w, _y1 + _h + _bl), (0, 0, 255), -1)
                cv2.putText(draw, text, (x1, _y1 + _h), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (255, 255, 255), 2)

            if args.show:
                cv2.imshow('result', draw)
                cv2.waitKey(0)
            else:
                cv2.imwrite(str(save_image), draw)

    print(f'111 camera:{args.camera}')  # 111 camera:0
    if args.camera:
        print(f'camera:{args.camera}')
        cap = cv2.VideoCapture(0)
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                assert cap.isOpened(), 'VideoCapture is not opened'
                break
            draw = frame.copy()
            bgr, ratio, dwdh = letterbox(frame, (W, H))
            rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
            tensor = blob(rgb, return_seg=False)
            dwdh = torch.asarray(dwdh * 2, dtype=torch.float32, device=device)
            tensor = torch.asarray(tensor, device=device)

            t1 = time.perf_counter()
            # inference
            data = Engine(tensor)

            bboxes, scores, labels = det_postprocess(data)
            bboxes -= dwdh
            bboxes /= ratio
            print(f'inference time: {time.perf_counter() - t1:.3f}s')
            
            for (bbox, score, label) in zip(bboxes, scores, labels):
                bbox = bbox.round().int().tolist()
                cls_id = int(label)
                cls = CLASSES_DET[cls_id]
                color = COLORS[cls]

                text = f'{cls}:{score:.3f}'
                x1, y1, x2, y2 = bbox

                (_w, _h), _bl = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.8, 1)
                _y1 = min(y1 + 1, draw.shape[0])

                cv2.rectangle(draw, (x1, y1), (x2, y2), color, 2)
                cv2.rectangle(draw, (x1, _y1), (x1 + _w, _y1 + _h + _bl), (0, 0, 255), -1)
                cv2.putText(draw, text, (x1, _y1 + _h), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (255, 255, 255), 2)
            if args.show:
                cv2.imshow('0', draw)
                if cv2.waitKey(1) in [ord('q'), 27]:
                    break


def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser()
    parser.add_argument('--engine', type=str, help='Engine file')
    parser.add_argument('--camera', action='store_true', help='camera index')

    parser.add_argument('--imgs', type=str,help='Images file, dir path or single img file path')
    parser.add_argument('--show',
                        action='store_true',
                        help='Show the detection results')
    parser.add_argument('--out-dir',
                        type=str,
                        default='./output',
                        help='Path to output file')
    parser.add_argument('--device',
                        type=str,
                        default='cuda:0',
                        help='TensorRT infer device')
    args = parser.parse_args()
    return args


if __name__ == '__main__':
    args = parse_args()
    main(args)

和pt模型推理的速度相比,engine加速感觉没啥提升,如下图

rtx2060
win 10
trt 8.5.1.7

修改后代码的测速

image

pt模型的推理速度

image

@triple-Mu
Copy link
Owner

大佬 您好,我用的自己训练的yolov8s pt模型通过readme中的教程转成的engine,然后修改了infer_det.py代码使其能够调用摄像头,我想计算每帧的耗时,不知道下面的代码计算对不对:

from models import TRTModule  # isort:skip
import argparse
from pathlib import Path

import time
import cv2
import torch

from config import CLASSES_DET, COLORS
from models.torch_utils import det_postprocess
from models.utils import blob, letterbox, path_to_list


def main(args: argparse.Namespace) -> None:
    device = torch.device(args.device)
    Engine = TRTModule(args.engine, device)
    H, W = Engine.inp_info[0].shape[-2:]

    # set desired output names order
    Engine.set_desired(['num_dets', 'bboxes', 'scores', 'labels'])

    save_path = Path(args.out_dir)

    if not args.show and not save_path.exists():
        save_path.mkdir(parents=True, exist_ok=True)

    if args.imgs:
        images = path_to_list(args.imgs)
        print(f'images:{images}')
        for image in images:
            save_image = save_path / image.name
            bgr = cv2.imread(str(image))
            draw = bgr.copy()
            bgr, ratio, dwdh = letterbox(bgr, (W, H))
            rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
            tensor = blob(rgb, return_seg=False)
            dwdh = torch.asarray(dwdh * 2, dtype=torch.float32, device=device)
            tensor = torch.asarray(tensor, device=device)
            # inference
            data = Engine(tensor)

            bboxes, scores, labels = det_postprocess(data)
            if bboxes.numel() == 0:
                # if no bounding box
                print(f'{image}: no object!')
                continue
            bboxes -= dwdh
            bboxes /= ratio

            for (bbox, score, label) in zip(bboxes, scores, labels):
                bbox = bbox.round().int().tolist()
                cls_id = int(label)
                cls = CLASSES_DET[cls_id]
                color = COLORS[cls]

                text = f'{cls}:{score:.3f}'
                x1, y1, x2, y2 = bbox

                (_w, _h), _bl = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.8, 1)
                _y1 = min(y1 + 1, draw.shape[0])

                cv2.rectangle(draw, (x1, y1), (x2, y2), color, 2)
                cv2.rectangle(draw, (x1, _y1), (x1 + _w, _y1 + _h + _bl), (0, 0, 255), -1)
                cv2.putText(draw, text, (x1, _y1 + _h), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (255, 255, 255), 2)

            if args.show:
                cv2.imshow('result', draw)
                cv2.waitKey(0)
            else:
                cv2.imwrite(str(save_image), draw)

    print(f'111 camera:{args.camera}')  # 111 camera:0
    if args.camera:
        print(f'camera:{args.camera}')
        cap = cv2.VideoCapture(0)
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                assert cap.isOpened(), 'VideoCapture is not opened'
                break
            draw = frame.copy()
            bgr, ratio, dwdh = letterbox(frame, (W, H))
            rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
            tensor = blob(rgb, return_seg=False)
            dwdh = torch.asarray(dwdh * 2, dtype=torch.float32, device=device)
            tensor = torch.asarray(tensor, device=device)

            t1 = time.perf_counter()
            # inference
            data = Engine(tensor)

            bboxes, scores, labels = det_postprocess(data)
            bboxes -= dwdh
            bboxes /= ratio
            print(f'inference time: {time.perf_counter() - t1:.3f}s')
            
            for (bbox, score, label) in zip(bboxes, scores, labels):
                bbox = bbox.round().int().tolist()
                cls_id = int(label)
                cls = CLASSES_DET[cls_id]
                color = COLORS[cls]

                text = f'{cls}:{score:.3f}'
                x1, y1, x2, y2 = bbox

                (_w, _h), _bl = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.8, 1)
                _y1 = min(y1 + 1, draw.shape[0])

                cv2.rectangle(draw, (x1, y1), (x2, y2), color, 2)
                cv2.rectangle(draw, (x1, _y1), (x1 + _w, _y1 + _h + _bl), (0, 0, 255), -1)
                cv2.putText(draw, text, (x1, _y1 + _h), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (255, 255, 255), 2)
            if args.show:
                cv2.imshow('0', draw)
                if cv2.waitKey(1) in [ord('q'), 27]:
                    break


def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser()
    parser.add_argument('--engine', type=str, help='Engine file')
    parser.add_argument('--camera', action='store_true', help='camera index')

    parser.add_argument('--imgs', type=str,help='Images file, dir path or single img file path')
    parser.add_argument('--show',
                        action='store_true',
                        help='Show the detection results')
    parser.add_argument('--out-dir',
                        type=str,
                        default='./output',
                        help='Path to output file')
    parser.add_argument('--device',
                        type=str,
                        default='cuda:0',
                        help='TensorRT infer device')
    args = parser.parse_args()
    return args


if __name__ == '__main__':
    args = parse_args()
    main(args)

和pt模型推理的速度相比,engine加速感觉没啥提升,如下图

rtx2060
win 10
trt 8.5.1.7

修改后代码的测速

image

pt模型的推理速度

image

因为本仓库采用了静态shape(640*640)的进行推理,然后pytorch的版本可能采用了最小padding(480x640),这样的话确实存在效率会不如pytorch.
您可以尝试fp16导出然后在进行对比,或者使用c++的版本

@Yichen-user
Copy link

请问下 使用infer-det.py 进行fp16的engine模型推理时 为什么输入的图片不需要指定dtype=float16呢 。yolov5代码里是加了half()的

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

3 participants