-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathsm2anki.py
196 lines (167 loc) · 7.44 KB
/
sm2anki.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
"""This module converts SuperMemo collections to Anki."""
class Converter:
"""
This class converts a dictionary of Element objects into a format
that can be imported into an Anki deck.
"""
def __init__(self, elements, path_to_media_directory):
"""
elements:
A dictionary of Element objects indexed by ID.
path_to_media_directory:
Full path to the directory that contains the
media files for the collection, e.g.:
c:/sm/systems/your_collection/elements/
The path can use either slashes or backslashes
but must include a trailing slash or backslash.
"""
self.elements = elements
self.full_path_to_media = path_to_media_directory
def get_tags(self, element):
"""
Return the tags of an element as a string.
SuperMemo organizes elements into categories but Anki uses tags to
organize facts. To convert, elements are tagged with the names of all
categories they belong to in the original collection.
"""
tags = []
while element.properties['Parent'] != '0':
parent_id = int(element.properties['Parent'])
element = self.elements[parent_id]
# Cleanup of category names. Anki uses spaces to separate tags.
tag = (element.element_info['Title']
.replace(' & ', '&')
.replace('] ', ']')
.replace(',', '')
.replace(' ', '_'))
tags.append(tag)
return ' '.join(tags[::-1])
def get_relative_path_to_media_file(self, path_to_file, path_to_media_directory):
"""
Return the relative path to a media file.
SuperMemo stores full paths to media files but in Anki media files
are relative to the location of the deck.
"""
path_to_file = path_to_file.replace('\\', '/')
path_to_media_directory = path_to_media_directory.replace('\\', '/')
return path_to_file.replace(path_to_media_directory, '')
def convert(self, element):
"""Return the element in a format compatible with Anki."""
question_text = element.get_question() or ''
answer_text = element.get_answer() or ''
try:
answer_sound = self.get_relative_path_to_media_file(
element.get_answer_sound(), self.full_path_to_media)
except AttributeError:
answer_sound = ''
try:
question_sound = self.get_relative_path_to_media_file(
element.get_question_sound(), self.full_path_to_media)
except AttributeError:
question_sound = ''
out = "{0}[sound:{1}]\t{2}[sound:{3}]\t{4}".format(
question_text, question_sound, answer_text, answer_sound,
self.get_tags(element))
return out
def convert_all(self):
"""Return all elements in a format compatible with Anki."""
exportable_elements = [self.convert(e) for e
in self.elements.values() if e.is_item()]
return "\n".join(exportable_elements)
class Element:
"""
This class represents a SuperMemo element such as a topic or an item.
Accessing individual pieces of information about an element:
ComponentNo -> self.properties['ComponentNo']
Status -> self.element_info['Status']
Type of component #2 -> self.components[1]['Type']
(Note that SuperMemo component numbers start at 1.)
"""
# Bits that determine if a component is a question or an answer.
question = 0b00100000
answer = 0b01000000
def __init__(self, element_body):
"""
Instantiate an Element from its textual representation.
The textual representation of an element starts with
"Begin Element #N" and ends with "End Element #N"
where N is the ID of the element.
"""
self.properties = {}
self.element_info = {}
self.components = []
self.id = 0
# Extract information from the element body line by line.
inside_element_info = False
inside_component = False
for line in element_body.strip().split("\n"):
line = line.strip()
if line == "":
pass
if line.startswith("Begin Element "):
self.id = int(line.split(' #')[-1])
elif line.startswith("End Element "):
pass
elif line.startswith("Begin ElementInfo"):
inside_element_info = True
elif line.startswith("End ElementInfo"):
inside_element_info = False
elif line.startswith("Begin Component"):
self.components.append({})
inside_component = True
elif line.startswith("End Component"):
inside_component = False
else:
key, value = line.split('=', 1)
if not inside_element_info and not inside_component:
self.properties[key] = value
elif inside_component:
# There are other properties besides PlayAt and DisplayAt
# that could be converted to integers, but those two are
# the only ones currently used by the converter.
if key in ['PlayAt', 'DisplayAt']:
value = int(value)
self.components[-1][key] = value
elif inside_element_info:
self.element_info[key] = value
def get_question(self):
"""Return the text of the question."""
for c in self.components:
if (c['Type'] == 'Text' and c['DisplayAt'] & self.question
and c['DisplayAt'] & self.answer):
return c['Text']
def get_answer(self):
"""Return the text of the answer."""
for c in self.components:
if (c['Type'] == 'Text' and not c['DisplayAt'] & self.question
and c['DisplayAt'] & self.answer):
return c['Text']
def get_question_sound(self):
"""Return the full path to the audio file associated with the question."""
for c in self.components:
if c['Type'] == 'Sound' and c['PlayAt'] & self.question:
return c['SoundFile']
def get_answer_sound(self):
"""Return the full path to the audio file associated with the answer."""
for c in self.components:
if c['Type'] == 'Sound' and c['PlayAt'] & self.answer:
return c['SoundFile']
def is_item(self):
"""Return True if the element represents an item rather than a topic."""
return self.element_info['Type'] == 'Item'
def __str__(self):
str = "Type: {} Id: {} Title: {}\n".format(
self.element_info['Type'], self.id, self.element_info['Title'])
for c in self.components:
try:
str += "Component -> Type: {} {} PlayAt {}\n".format(
c['Type'], c['SoundFile'], c['PlayAt'])
except KeyError:
str += "Component -> Type: {} {} DisplayAt {}\n".format(
c['Type'], c['Text'], c['DisplayAt'])
return str
def read_sm_file(sm_file_contents):
"""Return a dictionary of Element objects indexed by ID."""
elements = [Element(element_body) for element_body
in sm_file_contents.strip().split("\n\n")]
return dict((e.id, e) for e in elements)