forked from mozilla/bugbug
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcomment_level_labeler.py
90 lines (68 loc) · 2.72 KB
/
comment_level_labeler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# -*- coding: utf-8 -*-
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
import argparse
import csv
import os
import random
from bugbug import bugzilla
parser = argparse.ArgumentParser()
parser.add_argument('--goal', help='Goal of the labeler', choices=['str', 'regressionrange'], default='str')
args = parser.parse_args()
if args.goal == 'str':
from bugbug.models.bug import BugModel
model = BugModel.load('bugmodel')
elif args.goal == 'regressionrange':
from bugbug.models.regression import RegressionModel
model = RegressionModel.load('regressionmodel')
file_path = os.path.join('bugbug', 'labels', f'{args.goal}.csv')
with open(file_path, 'r') as f:
reader = csv.reader(f)
next(reader)
labeled_comments = [(int(r[0]), int(r[1]), r[2]) for r in reader]
already_done = set((c[0], c[1]) for c in labeled_comments)
bugs = []
for bug in bugzilla.get_bugs():
# For the str and regressionrange problems, we don't care about test failures,
if 'intermittent-failure' in bug['keywords'] or 'stockwell' in bug['whiteboard'] or 'permafail' in bug['summary'].lower():
continue
# bugs filed from Socorro,
if 'this bug was filed from the socorro interface' in bug['comments'][0]['text'].lower():
continue
# and fuzzing bugs.
if 'fuzzing' in bug['comments'][0]['text'].lower():
continue
bugs.append(bug)
random.shuffle(bugs)
for bug in bugs:
# Only show bugs that are really bugs/regressions for labeling.
c = model.classify(bug)
if c != 1:
continue
v = None
for i, comment in enumerate(bug['comments']):
if (bug['id'], i) in already_done:
continue
os.system('clear')
print(f'Bug {bug["id"]} - {bug["summary"]}')
print(f'Comment {i}')
print(comment['text'])
if args.goal == 'str':
print('\nY for comment containing STR, N for comment not containing STR, K to skip, E to exit')
elif args.goal == 'regressionrange':
print('\nY for comment containing regression range, N for comment not containing regression range, K to skip, E to exit')
v = input()
if v in ['e', 'k']:
break
if v in ['y', 'n']:
labeled_comments.append((bug['id'], i, v))
if v not in ['e', 'k']:
with open(file_path, 'w') as f:
writer = csv.writer(f)
writer.writerow(['bug_id', 'comment_num', f'has_{args.goal}'])
writer.writerows(sorted(labeled_comments))
print('\nE to exit, anything else to continue')
v = input()
if v == 'e':
break