-
Notifications
You must be signed in to change notification settings - Fork 2
/
caption_converter.py
100 lines (87 loc) · 3.19 KB
/
caption_converter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import optparse
import codecs
import pycaption
def main():
parser = optparse.OptionParser("usage: %prog [options]")
parser.add_option("--sami",
action='store_true',
dest='sami',
help="write captions in SAMI format",
default=False,)
parser.add_option("--dfxp",
action='store_true',
dest='dfxp',
help="write captions in DFXP format",
default=False,)
parser.add_option("--srt",
action='store_true',
dest='srt',
help="write captions in SRT format",
default=False,)
parser.add_option("--webvtt",
action='store_true',
dest='webvtt',
help="write captions in webvtt format",
default=False,)
parser.add_option("--transcript",
action='store_true',
dest='transcript',
help="write transcript for captions",
default=False,)
parser.add_option("--scc_lang",
dest='lang',
help="choose override language for input",
default='',)
parser.add_option("--scc_offset",
dest='offset',
help="choose offset for SCC file; measured in seconds",
default=0)
(options, args) = parser.parse_args()
try:
filename = args[0]
except:
raise Exception(
('Expected usage: python caption_converter.py <path to caption file> ',
'[--sami --dfxp --srt --webvtt --transcript]'))
try:
captions = codecs.open(filename, encoding='utf-8', mode='r').read()
except:
captions = open(filename, 'r').read()
captions = unicode(captions, errors='replace')
content = read_captions(captions, options)
write_captions(content, options)
def read_captions(captions, options):
scc_reader = pycaption.SCCReader()
srt_reader = pycaption.SRTReader()
sami_reader = pycaption.SAMIReader()
dfxp_reader = pycaption.DFXPReader()
webvtt_reader = pycaption.WebVTTReader()
if scc_reader.detect(captions):
if options.lang:
return scc_reader.read(captions, lang=options.lang,
offset=int(options.offset))
else:
return scc_reader.read(captions, offset=int(options.offset))
elif srt_reader.detect(captions):
return srt_reader.read(captions)
elif sami_reader.detect(captions):
return sami_reader.read(captions)
elif dfxp_reader.detect(captions):
return dfxp_reader.read(captions)
elif webvtt_reader.detect(captions):
return webvtt_reader.read(captions)
else:
raise Exception('No caption format detected :(')
def write_captions(content, options):
if options.sami:
print pycaption.SAMIWriter().write(content).encode("utf-8")
if options.dfxp:
print pycaption.DFXPWriter().write(content).encode("utf-8")
if options.webvtt:
print pycaption.WebVTTWriter().write(content).encode("utf-8")
if options.srt:
print pycaption.SRTWriter().write(content).encode("utf-8")
if options.transcript:
print pycaption.TranscriptWriter().write(content).encode("utf-8")
if __name__ == '__main__':
main()