Skip to content

Commit

Permalink
add video training and infer scripts for GLEE-Pro and fix improve inf…
Browse files Browse the repository at this point in the history
…er speed
  • Loading branch information
wjf5203 committed Aug 8, 2024
1 parent 5f1832d commit 028ecee
Show file tree
Hide file tree
Showing 5 changed files with 171 additions and 2 deletions.
42 changes: 42 additions & 0 deletions projects/GLEE/configs/videos/Pro/BURST_Pro.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
_BASE_: "../../images/Lite/base_clip_frozen_image_R50.yaml"
MODEL:
CROSS_TRACK: False
PSEUDO_VIDEO: False
FREEZE_WHOLE: False
BACKBONE:
NAME: "D2_EVA02"
EVA02:
CHECKPOINT: False
# PRETRAINED_WEIGHT: 'weights/converted_EVA02_m38m_psz14to16.pth'
SEM_SEG_HEAD:
# pixel decoder
PIXEL_DECODER_NAME: "MaskDINOEncoder"
DIM_FEEDFORWARD: 2048
NUM_FEATURE_LEVELS: 4
TOTAL_NUM_FEATURE_LEVELS: 4
IN_FEATURES: ["p3", "p4", "p5", "p6"]
DEFORMABLE_TRANSFORMER_ENCODER_IN_FEATURES: ["p3", "p4", "p5", "p6"]
COMMON_STRIDE: 4
TRANSFORMER_ENC_LAYERS: 6
FEATURE_ORDER: "low2high"
DATASETS:
TRAIN: ("BURST_video_train",)
TEST: ("BURST_video_val",)
SOLVER:
IMS_PER_BATCH: 8
BASE_LR: 0.0001
STEPS: (6000, )
MAX_ITER: 8000
CHECKPOINT_PERIOD: 2000
INPUT:
IMAGE_SIZE: 1536
MIN_SCALE: 0.1
MAX_SCALE: 2.0
FORMAT: "RGB"
DATASET_MAPPER_NAME: "coco_instance_lsj"
TEST:
EVAL_PERIOD: 100000
DATALOADER:
FILTER_EMPTY_ANNOTATIONS: False
NUM_WORKERS: 8
OUTPUT_DIR: ./GLEE_Pro_BURST
42 changes: 42 additions & 0 deletions projects/GLEE/configs/videos/Pro/TAO_Pro.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
_BASE_: "../../images/Lite/base_clip_frozen_image_R50.yaml"
MODEL:
CROSS_TRACK: False
PSEUDO_VIDEO: False
FREEZE_WHOLE: False
BACKBONE:
NAME: "D2_EVA02"
EVA02:
CHECKPOINT: False
# PRETRAINED_WEIGHT: 'weights/converted_EVA02_m38m_psz14to16.pth'
SEM_SEG_HEAD:
# pixel decoder
PIXEL_DECODER_NAME: "MaskDINOEncoder"
DIM_FEEDFORWARD: 2048
NUM_FEATURE_LEVELS: 4
TOTAL_NUM_FEATURE_LEVELS: 4
IN_FEATURES: ["p3", "p4", "p5", "p6"]
DEFORMABLE_TRANSFORMER_ENCODER_IN_FEATURES: ["p3", "p4", "p5", "p6"]
COMMON_STRIDE: 4
TRANSFORMER_ENC_LAYERS: 6
FEATURE_ORDER: "low2high"
DATASETS:
TRAIN: ("BURST_video_train",)
TEST: ("TAO_video_val",)
SOLVER:
IMS_PER_BATCH: 8
BASE_LR: 0.0001
STEPS: (6000, )
MAX_ITER: 8000
CHECKPOINT_PERIOD: 2000
INPUT:
IMAGE_SIZE: 1536
MIN_SCALE: 0.1
MAX_SCALE: 2.0
FORMAT: "RGB"
DATASET_MAPPER_NAME: "coco_instance_lsj"
TEST:
EVAL_PERIOD: 100000
DATALOADER:
FILTER_EMPTY_ANNOTATIONS: False
NUM_WORKERS: 8
OUTPUT_DIR: ./GLEE_Pro_TAO
42 changes: 42 additions & 0 deletions projects/GLEE/configs/videos/Pro/ovis_Pro.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
_BASE_: "../../images/Lite/base_clip_frozen_image_R50.yaml"
MODEL:
CROSS_TRACK: False
PSEUDO_VIDEO: False
FREEZE_WHOLE: False
BACKBONE:
NAME: "D2_EVA02"
EVA02:
CHECKPOINT: False
# PRETRAINED_WEIGHT: 'weights/converted_EVA02_m38m_psz14to16.pth'
SEM_SEG_HEAD:
# pixel decoder
PIXEL_DECODER_NAME: "MaskDINOEncoder"
DIM_FEEDFORWARD: 2048
NUM_FEATURE_LEVELS: 4
TOTAL_NUM_FEATURE_LEVELS: 4
IN_FEATURES: ["p3", "p4", "p5", "p6"]
DEFORMABLE_TRANSFORMER_ENCODER_IN_FEATURES: ["p3", "p4", "p5", "p6"]
COMMON_STRIDE: 4
TRANSFORMER_ENC_LAYERS: 6
FEATURE_ORDER: "low2high"
DATASETS:
TRAIN: ("ovis_train",)
TEST: ("ovis_val",)
SOLVER:
IMS_PER_BATCH: 8
BASE_LR: 0.0001
STEPS: (12000, )
MAX_ITER: 18000
CHECKPOINT_PERIOD: 2000
INPUT:
IMAGE_SIZE: 1536
MIN_SCALE: 0.1
MAX_SCALE: 2.0
FORMAT: "RGB"
DATASET_MAPPER_NAME: "coco_instance_lsj"
TEST:
EVAL_PERIOD: 100000
DATALOADER:
FILTER_EMPTY_ANNOTATIONS: True
NUM_WORKERS: 8
OUTPUT_DIR: ./GLEE_Pro_ovis
42 changes: 42 additions & 0 deletions projects/GLEE/configs/videos/Pro/ytvis19_Pro.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
_BASE_: "../../images/Lite/base_clip_frozen_image_R50.yaml"
MODEL:
CROSS_TRACK: False
PSEUDO_VIDEO: False
FREEZE_WHOLE: False
BACKBONE:
NAME: "D2_EVA02"
EVA02:
CHECKPOINT: False
# PRETRAINED_WEIGHT: 'weights/converted_EVA02_m38m_psz14to16.pth'
SEM_SEG_HEAD:
# pixel decoder
PIXEL_DECODER_NAME: "MaskDINOEncoder"
DIM_FEEDFORWARD: 2048
NUM_FEATURE_LEVELS: 4
TOTAL_NUM_FEATURE_LEVELS: 4
IN_FEATURES: ["p3", "p4", "p5", "p6"]
DEFORMABLE_TRANSFORMER_ENCODER_IN_FEATURES: ["p3", "p4", "p5", "p6"]
COMMON_STRIDE: 4
TRANSFORMER_ENC_LAYERS: 6
FEATURE_ORDER: "low2high"
DATASETS:
TRAIN: ("ytvis_2019_train", )
TEST: ("ytvis_2019_val",)
SOLVER:
IMS_PER_BATCH: 8
BASE_LR: 0.0001
STEPS: (6000, )
MAX_ITER: 8000
CHECKPOINT_PERIOD: 2000
INPUT:
IMAGE_SIZE: 1536
MIN_SCALE: 0.1
MAX_SCALE: 2.0
FORMAT: "RGB"
DATASET_MAPPER_NAME: "coco_instance_lsj"
TEST:
EVAL_PERIOD: 100000
DATALOADER:
FILTER_EMPTY_ANNOTATIONS: True
NUM_WORKERS: 8
OUTPUT_DIR: ./GLEE_Pro_ytvis19
5 changes: 3 additions & 2 deletions projects/GLEE/glee/data/ytvis_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,8 +222,9 @@ def instances_to_coco_json_video(inputs, outputs):
segms.append(dummy_seg)
_boxes.append(None)
else:
segms.append(mask_util.encode(np.array(_mask[:, :, None], order="F", dtype="uint8"))[0])
if _mask.sum()>5 and _box is not None:
rle = mask_util.encode(np.array(_mask[:, :, None], order="F", dtype="uint8"))[0]
segms.append(rle)
if mask_util.area(rle)>5 and _box is not None:
_boxes.append(_box.tolist())

for rle in segms:
Expand Down

0 comments on commit 028ecee

Please sign in to comment.