-
Notifications
You must be signed in to change notification settings - Fork 0
/
forced_subtitles.py
109 lines (90 loc) · 3.8 KB
/
forced_subtitles.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
from gdata.spreadsheet.service import SpreadsheetsService
import pickle
import time
import re, string
import os
import glob
from xbmcCommand import sendRequest
DEBUG = True
class SheetQuerier:
key = "0AkGO8UqErL6idDhYYjg1ZXlORnRaM3ZhTks4Z3FrYlE"
worksheets = {"DVD" : "movietitle", "Blu-ray" : "movietitle"}
cache = "spreadsheet_cache.p"
cache_timeout = 60*60*24*7
prefixes = ["The", "A"]
whitespace_pattern = re.compile('[^a-zA-Z0-9 ]+', re.UNICODE)
def __init__(self, force_reload = False):
try:
pickle_date, self.data = pickle.load( open( self.cache, "rb" ) )
if DEBUG:
print "Loaded cache successfully"
if force_reload or time.time() - pickle_date > self.cache_timeout:
if DEBUG:
if force_reload:
print "Cache update forced."
else:
print "Cache too old, reloading."
self.reload_data()
pickle.dump( (time.time(), self.data), open( self.cache, "wb"))
except:
if DEBUG:
print "Problem loading cache, reloading."
self.reload_data()
pickle.dump( (time.time(), self.data), open( self.cache, "wb"))
def reload_data(self):
self.client = SpreadsheetsService()
feed = self.client.GetWorksheetsFeed(self.key, visibility='public', projection='basic')
self.data = {}
for entry in feed.entry:
if entry.title.text in self.worksheets.keys():
bad_rows,total_rows = self.process_sheet(entry.id.text.split("/")[-1], self.worksheets[entry.title.text])
print "Skipped %d / %d rows in sheet \"%s\"" % (bad_rows, total_rows, entry.title.text)
elif DEBUG:
print "Skipped sheet \"%s\"" % entry.title.text
def process_sheet(self, sheet_key, movie_row_key, type_row_key = "forcedsubtitletype"):
if DEBUG:
print "Document: %s" % self.key
print "Sheet: %s" % sheet_key
rows = self.client.GetListFeed(self.key, sheet_key, visibility='public', projection='values').entry
bad_rows = 0
for row in rows:
try:
self.data[SheetQuerier.clean_title(row.custom[movie_row_key].text.strip())] = row.custom[type_row_key].text.strip()
except:
bad_rows += 1
return bad_rows, len(rows)
def query_exact(self, title):
query = SheetQuerier.clean_title(title)
if query in self.data:
return self.data[query]
else:
return False
@staticmethod
def clean_title(title):
# Move prefixes
for prefix in SheetQuerier.prefixes:
if title.endswith(prefix):
title = "%s %s" % (prefix, title[:-1 * len(prefix) - 2])
break
# Strip all non alpha-numeric characters
title = SheetQuerier.whitespace_pattern.sub('', title)
# Return the lowercase version
return title.lower()
def checkAllMovies():
q = SheetQuerier()
movies = sendRequest("VideoLibrary.GetMovies", {"properties" : ["trailer", "year", "file"]})
for movie in movies["result"]["movies"]:
hit = q.query_exact(movie["label"])
if hit is "None":
continue
if hit is False:
continue
sub_name_pattern = os.path.join(os.path.dirname(movie["file"]), "Subtitles", "%s.en.forced.*" % os.path.splitext(os.path.basename(movie["file"]))[0])
if len(glob.glob(sub_name_pattern)):
continue
if os.path.exists(os.path.join(os.path.dirname(movie["file"]), "Subtitles", ".hardcoded")):
continue
# No subs found, and subs needed
print movie["label"], "=>", hit
if __name__ == "__main__":
checkAllMovies()