-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtapeop
executable file
·539 lines (450 loc) · 18 KB
/
tapeop
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
#!/bin/env python
USAGE="""
%prog [options] [commands]
Status / database access -- passive:
tapes [-v] - list tapes and their status. Also shows amount of data
written, if -v is passed.
status - show breakdown of the active tape and backup jobs.
unassigned - show number and total size of unassigned targets.
tape_detail [tape_name] [file_number] [-v] - print contents of the
indicated tape, with status information. If file_number is
provided, only that file_number is shown. If -v is given, the
contents (filenames, sizes, checksums) are also printed. This can
be used to generate backup summaries.
where_is [filename] - search database for backups of the indicated
file. Only knows about assigned (or archived or confirmed) files.
Tape setup commands
open_tape - initialize a new tape.
close_tape - finalize tape and unassociate unarchived jobs.
activate_tape [tape_name] - mount an existing tape as 'online'.
Job setup
import [filename] - load list of backup targets from file. (Causes
a remote connection + checksum session.)
assign - Assign targets to the active tape.
Archiving and confirmation:
archive - copy next assigned target to the active tape.
confirm [file_number] - read back data from tape, checksum it, and
compare to database.
"""
import taped
from tapedb import TapeDB, BackupItem
import sys, os, time
#
# Main configuration. Set tape drive and ssh connections here.
# Also the name of the database file and the active tape name.
#
import optparse as o
o = o.OptionParser(usage=USAGE)
o.add_option('--retry', action='store_true')
o.add_option('--no-db-update', action='store_true')
o.add_option('--no-scan', action='store_true', help=
'Use this switch for doing blind backups with no need for readback '
'confirmation. The source tree for each target will not be scanned, '
'so no filenames / checksums will be stored in the local database.')
o.add_option('-c', '--config-file', default='tape.conf')
o.add_option('-v', '--verbose', action='store_true', default=False)
o.add_option('--repeat', action='store_true', help=
"Keep running this command until an error occurs.")
opts, args = o.parse_args()
from ConfigParser import ConfigParser
cfg = ConfigParser()
cfg.read(opts.config_file)
db_file = cfg.get('default', 'database_file')
ssh_cmd = cfg.get('default', 'ssh_command')
if cfg.has_option('default', 'emulator_dir'):
tape_dev = cfg.get('default', 'emulator_dir')
td = taped.TapeDriveEmulator(tape_dev, ssh_cmd)
else:
tape_dev = cfg.get('default', 'tape_device', None)
td = taped.TapeDrive(tape_dev, ssh_cmd)
db = TapeDB(db_file)
tape_id, tape_name = db.get_active_tape()
EXIT_NO_DATA = 40
EXIT_REQUEST = 41
EXIT_TROUBLE = 42
exit_file = 'exit_file'
if os.path.exists(exit_file):
print 'Exiting with prejudice because of presence of %s' % exit_file
sys.exit(EXIT_REQUEST)
#
# Signal handler? This doesn't really work.
#
import signal
last_exit_flag = 0
def ctrlc_handler(signum, frame):
global last_exit_flag
print 'Ctrl-C detected. Will exit at next convenient opportunity'
print '(press again within 1 second will raise an immediate exception).'
now = time.time()
if now - last_exit_flag < 1.:
raise RuntimeError, "user requested immediate exit."
last_exit_flag = now
signal.signal(signal.SIGINT, ctrlc_handler)
def print_tape_report(db, tape_name):
report = db.get_tape_report(tape_name)
print 'Current layout of work on tape "%s":' % tape_name
for row in report:
print ' %5i - %5i : %s' % (row[1], row[2], row[0])
print
def print_usage_summary(db, tape_name):
print
if tape_name is not None:
header, target, tags = \
('Work on "%s"' % tape_name, tape_name, ['confirmed', 'recorded', 'assigned'])
print header
total_size = 0
for k in tags:
jobs = db.get_tape_work(target, k)
size = sum([j.get_target_info().size_kb for j in jobs]) / 1e6
print ' type %-20s: %6i (%9.3f GB)' % (k, len(jobs), size)
total_size += size
print ' total usage: %.3f GB' % total_size
print
targets = db.get_unassigned_targets()
print 'There are %i unassigned targets.' % len(targets)
print
#
# Possibly perform an action, such as writing a new archive or confirming
# an archive.
#
command = 'status' # default command
if tape_name is None:
print 'No tape marked as active.'
command = 'tapes'
token = None
if len(args) > 0:
command = args[0]
if len(args) > 1:
token = args[1]
if command == 'status':
if token is not None:
tape_name = token
print_usage_summary(db, tape_name)
print_tape_report(db, tape_name)
sys.exit(0)
elif command == 'unassigned':
targets = db.get_unassigned_targets(get_sizes=True)
size_gb = sum([t[2] for t in targets if t[2] is not None]) / 1e6
print 'There are %i unassigned targets (%.3f GB).' % (len(targets), size_gb)
print
elif command == 'tapes':
targets = db.get_unassigned_targets()
print '\nThere are %i unassigned targets. (Use "unassigned" to see size.)\n' % len(targets)
print 'All tapes in database:'
tape_infos = db.get_tape_info(token)
nlen = max([4]+[len(r['name']) for r in tape_infos])
slen = max([7]+[len(r['serial']) for r in tape_infos])
fmt = '{name:%i} {online_flag:4} {serial:%i} {id:>5} {status:10}' % (nlen, slen)
if opts.verbose:
fmt += ' {size_gb:>10}'
header = fmt.format(name='Name', id='ID', status='Status', serial='SerialNo',
online_flag='ON', size_gb='WroteGB')
print header
print '-'*len(header)
for info in tape_infos:
info['online_flag'] = {0: '.', 1: '*'}[info['online']]
if opts.verbose:
jobs = db.get_tape_work(info['name'], ['recorded', 'confirmed'])
size_gb = sum([j.get_target_info().size_kb for j in jobs]) / 1e6
info['size_gb'] = '%.3f' % size_gb
print fmt.format(**info)
print
sys.exit(0)
elif command == 'open_tape':
assert(token is not None) # Name for new tape!
tape_name = token
print 'This action will create a new tape entry with name "%s".' % tape_name
yn = raw_input('Do you wish to proceed? [yn] ')
if not yn in ['y', 'yes', 'Y']:
print 'Aborted.'
sys.exit(1)
print
serno = raw_input('Serial number? [blank is ok] ')
print
online = raw_input('Put the tape online now? [yn] ')
for v,L in [(True, ('y', 'yes', 'Y')),
(False, ('n', 'no', 'N'))]:
if online in L:
online = v
break
else:
print 'Invalid response.'
sys.exit(1)
print 'Creating tape entry...'
db.create_tape(tape_name, serno, status='open', online=online)
elif command == 'activate_tape':
assert(token is not None) # name of an existing tape
tape_name = token
print 'Setting %s as the online tape.' % tape_name
db.set_active_tape(tape_name)
print
elif command == 'close_tape':
if token is not None:
tape_name = token
print 'This action will apply to tape "%s".' % tape_name
print
print 'This action will mark the tape as full and dissociate all '\
'non-archived targets from this tape.'
print
yn = raw_input('Do you wish to proceed? [yn] ')
if not yn in ['y', 'yes', 'Y']:
print 'Aborted.'
sys.exit(1)
jobs = db.get_tape_work(tape_name, ['assigned'])
print 'Found %i jobs to dissociate.' % len(jobs)
c = db.conn.cursor()
for j in jobs:
j.destroy(cursor=c)
db.conn.commit()
db.close_tape(tape_name)
elif command == 'tape_detail':
# Args can be [], [tape_name], or [tape_name, file_num].
if token is not None:
tape_name = token
if len(args) > 2:
file_num = int(args[2])
else:
file_num = None
tape_info = db.get_tape_info(tape_name)
if len(tape_info) == 0:
raise RuntimeError, 'No tape found with name="%s"' % tape_name
assert len(tape_info) == 1
print '# Tape name="{name}" serial="{serial}"'.format(**tape_info[0])
jobs = db.get_tape_work(tape_name, ['recorded', 'confirmed'])
fmt = '{file_num:5} {status:10} {size:>12} {name}'
print fmt.format(file_num='#FNum', status='Status',
size='Size_GB', name='Target_name')
for j in jobs:
if file_num is not None and file_num != j.file_number:
continue
info = j.get_target_info()
print fmt.format(file_num=j.file_number, status=j.status,
size='%.3f' % (info.size_kb/1e6), name=info.name)
if opts.verbose:
for row in info.files:
fname, size_kb, md5 = row
print 'file %s %10.6f %s' % (md5, size_kb/1e6, fname)
sys.exit(0)
elif command == 'where_is':
nlen = max([len(r['name']) for r in db.get_tape_info(None)])
fmt = '{tape_name:%i} {tape_filenum:>5} {backup_status:10} {md5sum:32} {target_name}{filename}' % nlen
print fmt.format(tape_name='#tape_name', tape_filenum='Fnum', backup_status='status',
md5sum='md5sum', target_name='', filename='filename')
for row in db.find_file(token):
print fmt.format(**row)
sys.exit(0)
elif command == 'archive':
transfer_rate_kbs = None
while last_exit_flag == 0:
jobs = db.get_tape_work(tape_name, 'assigned')
if len(jobs) == 0:
print('No jobs found.')
sys.exit(EXIT_NO_DATA) # no jobs left.
job = jobs[0]
info = job.get_target_info()
if transfer_rate_kbs:
all_size_kb = sum([j.get_target_info().size_kb for j in jobs])
rate_string = '(%.2f hours)' % (all_size_kb / transfer_rate_kbs / 3600)
print 'Total data remaining: %.3f GB %s' % (all_size_kb / 1e6, rate_string)
report = db.get_tape_report(tape_name)
if len(report) == 0:
next_file_number = 0
else:
next_file_number = report[-1][-1]+1
print 'Seeking to file_number=%i' % next_file_number
td.goto(next_file_number)
print 'Copying %.3f GB from network to tape...' % (info.size_kb / 1e6)
start_time = time.time()
excluded = db.get_excluded_subdirs(info.name)
code, out, err = td.archive_remote(info.name, excluded)
updated = False
if code == 0:
if opts.no_db_update:
print 'Archive job succeeded, but DB will not be updated.'
else:
print '... success.'
print 'Marking record as archived.'
job.status = 'recorded'
job.file_number = next_file_number
job.commit()
updated = True
else:
print '... exit code=%i' % code
print out, err
sys.exit(EXIT_TROUBLE)
elapsed = time.time() - start_time
transfer_rate_kbs = info.size_kb / elapsed
print(' -- completed %.3f GB in %.1f minutes; rate is %.3f GB/min' % (
info.size_kb/1e6, elapsed / 60, transfer_rate_kbs / 1e6 * 60))
if not updated:
print
print 'Database not updated. If you choose to do so manually, the command is:'
print
print ('update backups set file_number=%i,status=\'recorded\' '
'where target_id=%i;' % (next_file_number, job.target_id))
print
if not opts.repeat:
break
print('Exiting ... pass --repeat to keep doing this.')
elif command == 'confirm':
# get file number...
job_mask = ['recorded']
if opts.retry:
job_mask.append('confirmed')
transfer_rate_kbs = None
while last_exit_flag == 0:
jobs = db.get_tape_work(tape_name, job_mask)
print 'Found %i jobs to confirm.' % len(jobs)
if len(jobs) == 0:
sys.exit(EXIT_NO_DATA)
if token is None or token == 'next':
all_size_kb = sum([j.get_target_info().size_kb for j in jobs])
rate_string = '(? hours)'
if transfer_rate_kbs:
rate_string = '(%.2f hours)' % (all_size_kb / transfer_rate_kbs / 3600)
print 'Total data remaining: %.3f GB %s' % (all_size_kb / 1e6, rate_string)
j = jobs[0]
else:
all_size_kb = None
for j in jobs:
if int(token) == j.file_number:
print 'Matched file_number=%i' % j.file_number
break
else:
print 'Could not find outstanding job at file_number=%s' % token
sys.exit(1)
# Run the confirmation.
info = j.get_target_info()
assert(j.file_number >= 0)
print 'Seeking to file_number %i...' % j.file_number
td.goto(j.file_number)
print 'Checksumming %.3f GB from tape...' % (info.size_kb / 1e6)
start_time = time.time()
CD = td.tape_checksums()
CD1 = {}
for row in CD:
# Check prefix match; strip it off.
assert(row[1].startswith(info.name[1:] + '/'))
fname = row[1][len(info.name):]
CD1[fname] = row[0]
# If this archive has symlinks, then we need list the contents of
# the archive and record the presence of those pseudo-files.
n_symlinks = sum([f[2] == 'symlink' for f in info.files])
if n_symlinks:
print 'Re-seeking to file_number %i...' % j.file_number
td.goto(j.file_number)
print 'Confirming %i symlinks...' % n_symlinks
all_files = td.tape_files()
for fname in all_files:
if fname.endswith('/'):
continue
# Check prefix match; strip it off.
assert(fname.startswith(info.name[1:] + '/'))
fname = fname[len(info.name):]
if fname not in CD1:
CD1[fname] = 'symlink'
ok = True
for name, size_kb, md5 in info.files:
if opts.verbose:
print '%s [%s]...' % (name, md5),
if md5 != CD1.pop(name):
print 'Failed md5sum: %s' % name
ok = False
elif opts.verbose:
print 'ok'
if len(CD1):
ok = False
print('Backup contained %i more files than expected!' % len(CD1))
print('For example:')
for k, v in CD1.items()[:5]:
print ' %s [%s]' % (k, v)
print()
elapsed = time.time() - start_time
transfer_rate_kbs = info.size_kb / elapsed
print(' -- completed %.3f GB in %.1f minutes; rate is %.3f GB/min' % (
info.size_kb/1e6, elapsed / 60, transfer_rate_kbs / 1e6 * 60))
if ok:
if opts.no_db_update:
print 'Confirm job succeeded, but DB will not be updated.'
else:
print 'Marking record as confirmed.'
j.status = 'confirmed'
j.commit()
else:
sys.exit(1)
elif command == 'import':
print 'Reading file %s...' % token
targets = [os.path.normpath(line.strip()) for line in open(token)]
print
print ('You have proposed to import %i targets from %s. '
'They will not yet be assigned to a specific tape.' % (len(targets), token))
print
yn = raw_input('Do you wish to proceed? [yn] ')
if not yn in ['y', 'yes', 'Y']:
print 'Aborted.'
sys.exit(1)
print 'Creating target entries...'
for target in targets:
# Is this target already scanned?
target_id = db.target_create(target,
ignore_duplicate=True,
assign_parent=True)
# Check for target existence...
recs = BackupItem.for_target(db, target)
if len(recs):
#print 'Backup already exists.'
if recs[0].status != 'assigned':
print target, recs
if opts.retry:
continue
raise RuntimeError(
'Inconsistent database state. (If you are continuing '
'an interrupted import, try with --retry.)')
if opts.no_scan:
print('\n *** The no-scan option has been selected ***')
print('\n *** Checksumming for data integrity will not be supported;')
print(' All targets will report 0 size (but will archive in full).')
for target in targets:
db.set_target_scanned(target)
sys.exit(0)
print 'Scanning targets on remote filesystem...'
for target in targets:
info = db.get_target_info(target)
if info['scanned']:
print('Skipping %s because it is already scanned.' % target)
continue
exd = db.get_excluded_subdirs(target)
print time.asctime(), 'Getting files and checksums for target:\n'\
'%s (excluding %i sub-targets) ...' % (target, len(exd))
info = td.remote_target_info(target, exd, verbosity=int(opts.verbose))
print ' ... adding %i files to local database.' % len(info)
print
db.add_files(info, target)
db.set_target_scanned(target)
elif command == 'assign':
if token is not None:
tape_name = token
tape_id = db.get_tape_id(tape_name)
targets = db.get_unassigned_targets()
print '\nThere are %i unassigned targets.\n' % len(targets)
if len(targets) == 0:
sys.exit(EXIT_NO_DATA)
yn = raw_input('Do you wish to assign them to tape "%s"? [yn] ' % tape_name)
if not yn in ['y', 'yes', 'Y']:
print 'Aborted.'
sys.exit(1)
cursor = db.conn.cursor()
for target_id, name in targets:
backup = BackupItem.new(db, target_id, commit=False)
backup.tape_id = tape_id
backup.status = 'assigned'
backup.commit(cursor=cursor)
db.conn.commit()
print 'Done.\n'
else:
o.error('Unknown command "%s"' % command)
if last_exit_flag != 0:
sys.exit(EXIT_REQUEST)
if os.path.exists(exit_file):
print 'Exiting with prejudice because of presence of %s' % exit_file
sys.exit(EXIT_REQUEST)