Skip to content

Commit

Permalink
Merge pull request #209 from transifex/docx_hyperlinks_after_text_check
Browse files Browse the repository at this point in the history
Check hyperlinks after detecting text content
  • Loading branch information
igavriil authored Mar 12, 2021
2 parents 8d5b032 + 3222a22 commit fcfe304
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 13 deletions.
26 changes: 13 additions & 13 deletions openformats/formats/docx.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,31 +329,31 @@ def compile(self, template, stringset, **kwargs):
leading_spaces = 0

for index, text_element in enumerate(text_elements):
hyperlink_url = self.get_hyperlink_url(
text_element, rels_soup
)
# the text parts of the translation are less that the
# text parts of the document, so we will just remove
# any excessing part from the document
if len(translation_soup) == 0:
if hyperlink_url:
text_element.find_parent('w:hyperlink').decompose()
else:
text_element.find_parent('w:r').decompose()
continue

text = six.text_type(text_element.text)
# detect text elements that contain no text
# and remove leading whitespace from the next string
if not text.strip():
leading_spaces = len(text) - len(text.strip())
continue
else:
hyperlink_url = self.get_hyperlink_url(
text_element, rels_soup
)
# the text parts of the translation are less that the
# text parts of the document, so we will just remove
# any excessing part from the document
if len(translation_soup) == 0:
if hyperlink_url:
text_element.find_parent('w:hyperlink').decompose()
else:
text_element.decompose()
continue
translation_part = translation_soup.pop(0)
translation = six.text_type(translation_part)
if not translation[:leading_spaces].strip():
translation = translation[leading_spaces:]
leading_spaces = 0


# the text parts of the translation are more that the
# text parts of the document, so we will compress the
Expand Down
Binary file not shown.
Binary file not shown.
40 changes: 40 additions & 0 deletions openformats/tests/formats/docx/test_docx.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,46 @@
class DocxTestCase(unittest.TestCase):
TESTFILE_BASE = 'openformats/tests/formats/docx/files'

def test_broken_file(self):
path = '{}/missing_wr_parent.docx'.format(self.TESTFILE_BASE)
with open(path, 'rb') as f:
content = f.read()

docx = DocxFile(content)

handler = DocxHandler()
template, stringset = handler.parse(content)

self.assertEqual(len(stringset), 1)

openstring = stringset[0]
self.assertEqual(openstring.order, 0)
self.assertEqual(
openstring.string,
u'Foo bar baz'
)
self.assertEqual(openstring.string, openstring.key)

translation = u'Φου βαρ βαζ'
stringset = [
OpenString(openstring.key, translation, order=1)
]

content = handler.compile(template, stringset)

handler = DocxHandler()
template, stringset = handler.parse(content)

self.assertEqual(len(stringset), 1)

openstring = stringset[0]
self.assertEqual(openstring.order, 0)
self.assertEqual(
openstring.string,
u'Φου βαρ βαζ'
)


def test_docx_file(self):
path = '{}/hello_world.docx'.format(self.TESTFILE_BASE)
with open(path, 'rb') as f:
Expand Down

0 comments on commit fcfe304

Please sign in to comment.