This repository has been archived by the owner on Jan 31, 2025. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path3-detect.py
139 lines (118 loc) · 5.18 KB
/
3-detect.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
from argparse import ArgumentParser
from csv import reader
from numpy import genfromtxt, savetxt
from time import perf_counter
from os.path import exists
from sys import exit
from watermarking.detection import sliding_permutation_test, phi
from watermarking.gumbel.key import gumbel_key_func
from watermarking.gumbel.score import gumbel_score, gumbel_edit_score
from watermarking.transform.key import transform_key_func
from watermarking.transform.score import transform_score, transform_edit_score
parser = ArgumentParser(description="Experiment Settings")
parser.add_argument('--token_file', default="", type=str)
parser.add_argument('--model', default="facebook/opt-1.3b", type=str)
parser.add_argument('--method', default="transform", type=str)
parser.add_argument('--watermark_key_length', default=256, type=int)
parser.add_argument('--rolling_window_size', default=0, type=int)
parser.add_argument('--permutation_count', default=999, type=int)
parser.add_argument('--seed', default=0, type=int)
parser.add_argument('--Tindex', default=1, type=int)
parser.add_argument('--rolling_window_index', default=-1, type=int)
parser.add_argument('--gamma', default=0.4, type=float)
args = parser.parse_args()
if args.model == "facebook/opt-1.3b":
vocab_size = 50272
elif args.model == "openai-community/gpt2":
vocab_size = 50257
elif args.model == "meta-llama/Meta-Llama-3-8B":
vocab_size = 128256
else:
from transformers import AutoModelForCausalLM
model = AutoModelForCausalLM.from_pretrained(args.model)
print(model.get_output_embeddings().weight.shape[0])
raise
seeds = genfromtxt(args.token_file + '-seeds.csv', delimiter=',', max_rows=1)
watermarked_samples = genfromtxt(
args.token_file + '-attacked-tokens.csv', delimiter=",")
Tindex = min(args.Tindex, watermarked_samples.shape[0])
if args.method == "transform":
test_stats = []
def dist1(x, y): return transform_edit_score(x, y, gamma=args.gamma)
def test_stat1(
tokens, watermark_key_length, rolling_window_size,
generator, vocab_size, null=False
):
return phi(
tokens, watermark_key_length, rolling_window_size, generator,
vocab_size, transform_key_func, dist1, null=False, normalize=True
)
test_stats.append(test_stat1)
def dist2(x, y): return transform_score(x, y)
def test_stat2(
tokens, watermark_key_length, rolling_window_size,
generator, vocab_size, null=False
):
return phi(
tokens, watermark_key_length, rolling_window_size, generator,
vocab_size, transform_key_func, dist2, null=False, normalize=True
)
test_stats.append(test_stat2)
elif args.method == "gumbel":
test_stats = []
def dist1(x, y): return gumbel_edit_score(x, y, gamma=args.gamma)
def test_stat1(
tokens, watermark_key_length, rolling_window_size,
generator, vocab_size, null=False
):
return phi(
tokens, watermark_key_length, rolling_window_size, generator,
vocab_size, gumbel_key_func, dist1, null=null, normalize=False
)
test_stats.append(test_stat1)
def dist2(x, y): return gumbel_score(x, y)
def test_stat2(
tokens, watermark_key_length, rolling_window_size,
generator, vocab_size, null=False
):
return phi(
tokens, watermark_key_length, rolling_window_size, generator,
vocab_size, gumbel_key_func, dist2, null=null, normalize=False
)
test_stats.append(test_stat2)
else:
raise
# Don't forget to remove the folder following the helper file,
# if the experiment needs re-running.
if exists(
args.token_file + '-' + str(args.rolling_window_size) + '-' +
str(args.permutation_count) + '-detect/' + str(args.Tindex) + '-' +
str(args.rolling_window_index) + '.csv'):
with open(
args.token_file + '-' + str(args.rolling_window_size) + '-' +
str(args.permutation_count) + '-detect/' + str(args.Tindex) + '-' +
str(args.rolling_window_index) + '.csv', 'r') as f:
first_row = next(reader(f), None)
if first_row is not None and len(first_row) == len(test_stats):
exit()
def test(tokens, seed, test_stats):
return sliding_permutation_test(tokens,
vocab_size,
args.watermark_key_length,
args.rolling_window_size,
args.permutation_count,
seed,
args.rolling_window_index,
test_stats)
start_time = perf_counter()
pval = test(watermarked_samples[Tindex, :], seeds[Tindex], test_stats)
end_time = perf_counter()
savetxt(
args.token_file + '-' + str(args.rolling_window_size) + '-' +
str(args.permutation_count) + '-detect/' + str(args.Tindex) + '-' +
str(args.rolling_window_index) + '.csv', pval, delimiter=','
)
with open(args.token_file + '-' + str(args.rolling_window_size) + '-' +
str(args.permutation_count) + '-detect/' + str(args.Tindex) + '-' +
str(args.rolling_window_index) + '-time.txt', "w") as f:
f.write(str(end_time - start_time))