Pytorch implementation of M3TR: Multi-modal Multi-label Recognition with Transformer. ACM MM 2021
Python 3.6+
Pytorch 1.7
CUDA 10.1
Tesla V100 × 4
python main.py --data COCO2014 --data_root_dir $DATA_PATH$ --save_dir $SAVE_PATH$ --i 448 --lr 3e-4 -b 64
python main.py --data COCO2014 --data_root_dir $DATA_PATH$ --save_dir $SAVE_PATH$ --i 448 --lr 3e-4 -b 64 -e --resume checkpoint/COCO2014/checkpoint_COCO.pth
- If you find this work is helpful, please cite our paper
@inproceedings{Zhao2021M3TR,
author = {Zhao, Jiawei and Zhao, Yifan and Li, Jia},
title = {M3TR: Multi-Modal Multi-Label Recognition with Transformer},
year = {2021},
address = {New York, NY, USA},
booktitle = {Proceedings of the 29th ACM International Conference on Multimedia},
pages = {469–477},
}