-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy pathminigpt_textual_attack.py
123 lines (84 loc) · 3.46 KB
/
minigpt_textual_attack.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# 1. Download Vicuna's weights to ./models (it's a delta version)
# 2. Download LLaMA's weight via: https://huggingface.co/huggyllama/llama-13b/tree/main
# 3. merge them and setup config
# 4. Download the mini-gpt4 compoents' pretrained ckpts
# 5. vision part will be automatically download when launching the model
import argparse
import os
import random
import numpy as np
import torch
import torch.backends.cudnn as cudnn
from PIL import Image
from torchvision.utils import save_image
from minigpt_utils import prompt_wrapper, text_attacker
from minigpt4.common.config import Config
from minigpt4.common.dist_utils import get_rank
from minigpt4.common.registry import registry
from minigpt4.conversation.conversation import Chat, CONV_VISION
# imports modules for registration
from minigpt4.datasets.builders import *
from minigpt4.models import *
from minigpt4.processors import *
from minigpt4.runners import *
from minigpt4.tasks import *
def parse_args():
parser = argparse.ArgumentParser(description="Demo")
parser.add_argument("--cfg-path", default="eval_configs/minigpt4_eval.yaml", help="path to configuration file.")
parser.add_argument("--gpu-id", type=int, default=0, help="specify the gpu to load the model.")
parser.add_argument("--n_iters", type=int, default=500, help="specify the number of iterations for attack.")
parser.add_argument("--save_dir", type=str, default='output',
help="save directory")
parser.add_argument("--n_candidates", type=int, default=100,
help="n_candidates")
parser.add_argument(
"--options",
nargs="+",
help="override some settings in the used config, the key-value pair "
"in xxx=yyy format will be merged into config file (deprecate), "
"change to --cfg-options instead.",
)
args = parser.parse_args()
return args
def setup_seeds(config):
seed = config.run_cfg.seed + get_rank()
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
cudnn.benchmark = False
cudnn.deterministic = True
# ========================================
# Model Initialization
# ========================================
print('>>> Initializing Models')
args = parse_args()
cfg = Config(args)
model_config = cfg.model_cfg
model_config.device_8bit = args.gpu_id
model_cls = registry.get_model_class(model_config.arch)
model = model_cls.from_config(model_config).to('cuda:{}'.format(args.gpu_id))
model.eval()
if not os.path.exists(args.save_dir):
os.mkdir(args.save_dir)
from minigpt_utils import generator
my_generator = generator.Generator(model=model)
print('[Initialization Finished]\n')
"""
[Todos] implement text-based attacks (https://github.com/Eric-Wallace/universal-triggers) on this setup.
"""
# ========================================
# Attack
# ========================================
import csv
file = open("harmful_corpus/derogatory_corpus.csv", "r")
data = list(csv.reader(file, delimiter=","))
file.close()
targets = []
num = len(data)
for i in range(num):
targets.append(data[i][0])
my_attacker = text_attacker.Attacker(args, model, targets, device=model.device)
text_prompt_template = prompt_wrapper.minigpt4_chatbot_prompt_text_attack
offset = prompt_wrapper.minigpt4_chatbot_prompt_offset
adv_prompt = my_attacker.attack(text_prompt_template=text_prompt_template, offset=offset,
num_iter=args.n_iters, batch_size=8)