From 6f5a19d35b1f732715c8a33e091c6729367b75a5 Mon Sep 17 00:00:00 2001 From: wjf5203 Date: Thu, 8 Aug 2024 23:34:46 +0800 Subject: [PATCH] add video training and infer scripts for GLEE-Pro and fix improve infer speed --- projects/GLEE/configs/videos/Pro/BURST_Pro.yaml | 1 + projects/GLEE/configs/videos/Pro/TAO_Pro.yaml | 1 + projects/GLEE/configs/videos/Pro/ovis_Pro.yaml | 1 + projects/GLEE/glee/GLEE.py | 3 ++- projects/GLEE/glee/config.py | 1 + 5 files changed, 6 insertions(+), 1 deletion(-) diff --git a/projects/GLEE/configs/videos/Pro/BURST_Pro.yaml b/projects/GLEE/configs/videos/Pro/BURST_Pro.yaml index 142ce4d..7b49c7e 100644 --- a/projects/GLEE/configs/videos/Pro/BURST_Pro.yaml +++ b/projects/GLEE/configs/videos/Pro/BURST_Pro.yaml @@ -3,6 +3,7 @@ MODEL: CROSS_TRACK: False PSEUDO_VIDEO: False FREEZE_WHOLE: False + VIDEO_WINDOW_SIZE: 2 BACKBONE: NAME: "D2_EVA02" EVA02: diff --git a/projects/GLEE/configs/videos/Pro/TAO_Pro.yaml b/projects/GLEE/configs/videos/Pro/TAO_Pro.yaml index 0b6cc33..85564da 100644 --- a/projects/GLEE/configs/videos/Pro/TAO_Pro.yaml +++ b/projects/GLEE/configs/videos/Pro/TAO_Pro.yaml @@ -3,6 +3,7 @@ MODEL: CROSS_TRACK: False PSEUDO_VIDEO: False FREEZE_WHOLE: False + VIDEO_WINDOW_SIZE: 2 BACKBONE: NAME: "D2_EVA02" EVA02: diff --git a/projects/GLEE/configs/videos/Pro/ovis_Pro.yaml b/projects/GLEE/configs/videos/Pro/ovis_Pro.yaml index fe154c2..959a53e 100644 --- a/projects/GLEE/configs/videos/Pro/ovis_Pro.yaml +++ b/projects/GLEE/configs/videos/Pro/ovis_Pro.yaml @@ -3,6 +3,7 @@ MODEL: CROSS_TRACK: False PSEUDO_VIDEO: False FREEZE_WHOLE: False + VIDEO_WINDOW_SIZE: 2 BACKBONE: NAME: "D2_EVA02" EVA02: diff --git a/projects/GLEE/glee/GLEE.py b/projects/GLEE/glee/GLEE.py index e7f0cb5..020751f 100644 --- a/projects/GLEE/glee/GLEE.py +++ b/projects/GLEE/glee/GLEE.py @@ -245,6 +245,7 @@ def __init__(self, cfg): self.num_queries = cfg.MODEL.MaskDINO.NUM_OBJECT_QUERIES self.instance_on = True self.visaul_prompt = cfg.MODEL.VISUAL_PROMPT + self.video_window_size = cfg.MODEL.VIDEO_WINDOW_SIZE self.is_lsj = cfg.INPUT.DATASET_MAPPER_NAME == 'coco_instance_lsj' @@ -754,7 +755,7 @@ def MinVIS_inference(self, batched_inputs, task): video_len = len(batched_inputs[0]['file_names']) - clip_length = 5 # self.batch_infer_len + clip_length = self.video_window_size batch_name_list = self.dataset_name_dicts[task] #split long video into clips to form a batch input diff --git a/projects/GLEE/glee/config.py b/projects/GLEE/glee/config.py index f22b853..f24b549 100644 --- a/projects/GLEE/glee/config.py +++ b/projects/GLEE/glee/config.py @@ -21,6 +21,7 @@ def add_glee_config(cfg): cfg.MODEL.CROSS_TRACK = False cfg.MODEL.TRACK_VERSION = 'v3' cfg.MODEL.EARLYFUSION = True + cfg.MODEL.VIDEO_WINDOW_SIZE = 10 cfg.INPUT.SAMPLING_FRAME_NUM = 1 cfg.INPUT.SAMPLING_FRAME_RANGE = 10