diff --git a/openformats/formats/github_markdown_v2.py b/openformats/formats/github_markdown_v2.py index 0815e2ec..f0bf1fb1 100644 --- a/openformats/formats/github_markdown_v2.py +++ b/openformats/formats/github_markdown_v2.py @@ -19,22 +19,16 @@ class GithubMarkdownHandlerV2(OrderedCompilerMixin, Handler): BACKSLASH = u'\\' DOUBLE_QUOTES = u'"' + SINGLE_QUOTE = u"'" NEWLINE = u'\n' COLON = u':' ASTERISK = u'*' AMPERSAND = u'&' DASH = u'-' HASHTAG = u'#' - - def _should_wrap_in_quotes(self, tr_string): - return any([ - self.NEWLINE in tr_string[:-1], - self.COLON in tr_string, - self.HASHTAG in tr_string, - tr_string.lstrip().startswith(self.ASTERISK), - tr_string.lstrip().startswith(self.AMPERSAND), - tr_string.lstrip().startswith(self.DASH), - ]) + AT_SIGN = u'@' # reserved character in the YAML spec + BRACKET_LEFT = u'[' + BRACKET_RIGHT = u']' def compile(self, template, stringset, **kwargs): # assume stringset is ordered within the template @@ -43,25 +37,8 @@ def compile(self, template, stringset, **kwargs): for string in stringset: tr_string = string.string - try: - # if string's key is int this is a markdown string - int(string.key) - except ValueError: - if self._should_wrap_in_quotes(tr_string): - # escape double quotes inside strings - tr_string = string.string.replace( - self.DOUBLE_QUOTES, - (self.BACKSLASH + self.DOUBLE_QUOTES) - ) - # surround string with double quotes - tr_string = (self.DOUBLE_QUOTES + tr_string + - self.DOUBLE_QUOTES) - # this is to ensure that if the style is literal or folded - # http://www.yaml.org/spec/1.2/spec.html#id2795688 - # a new line always follows the string - if (string.flags and string.flags[-1] in '|>' and - tr_string[-1] != self.NEWLINE): - tr_string = tr_string + self.NEWLINE + if self._is_yaml_string(string): + tr_string = self._transform_yaml_string(string) hash_position = template.index(string.template_replacement) transcriber.copy_until(hash_position) @@ -135,3 +112,132 @@ def parse(self, content, **kwargs): template = yaml_template + seperator + md_template return force_newline_type(template, newline_type), stringset + + def _is_yaml_string(self, string): + """Return True if the given open string is in YAML format, False otherwise. + + :param OpenString string: the string object to check + :return: whether or not the string is in a YAML-formatted block + :rtype: bool + """ + # If string's key is of type `int` (e.g. 4), it is a markdown string + # Strings inside YAML blocks have a string key (e.g. 'root_dict.another_dict') + try: + int(string.key) + except ValueError: + return True + + return False + + def _transform_yaml_string(self, openstring): + """Transform the given YAML-formatted string to make it valid for compilation. + + :param OpenString openstring: the string object to use for the transformation + :return: a string that is valid for exporting + :rtype: str + """ + should_wrap, quote_char = self._should_wrap_in_quotes(openstring.string) + if should_wrap: + string = self._wrap_in_quotes(openstring.string, quote_char) + else: + string = openstring.string + + # this is to ensure that if the style is literal or folded + # http://www.yaml.org/spec/1.2/spec.html#id2795688 + # a new line always follows the string + if (openstring.flags and openstring.flags[-1] in '|>' + and string[-1] != self.NEWLINE): + string = string + self.NEWLINE + + return string + + def _wrap_in_quotes(self, string, quote_char): + """Wrap the given string in quotes, if necessary. + + :param unicode string: the string to check for wrapping + :param str quote_char: the character to use for wrapping, + one of `"` or `'` + :return: the new string, wrapped in quotes if needed + :rtype: unicode + :raise ValueError: if `quote_char` is not one of the valid values + """ + if quote_char == u'"': + string = string.replace(quote_char, self.BACKSLASH + quote_char) + elif quote_char == u"'": + string = string.replace(quote_char, quote_char * 2) + else: + raise ValueError( + 'Invalid character ({}) given for wrapping in quotes, ' + 'supported values are single quotes (\') ' + 'and double quotes (").'.format(quote_char) + ) + + # wrap string with quotes + return u'{}{}{}'.format(quote_char, string, quote_char) + + def _should_wrap_in_quotes(self, string): + """Check if the given string should be wrapped in quotes. + + In order to decide if wrapping is necessary, it takes into account + various parameters, such as what character the string starts and ends with, + whether or not it contains special characters, etc. + + :param unicode string: the string to check + :return: a tuple that shows if wrapping is needed, as well as + the character that should be used for wrapping + :rtype: tuple (bool, str) + """ + string = string.strip() + + # If wrapped already in double quotes, don't wrap again + wrapped_in_double_quotes = ( + string.startswith(self.DOUBLE_QUOTES) + and string.endswith(self.DOUBLE_QUOTES) + ) + if wrapped_in_double_quotes: + return False, None + + # If wrapped already in single quotes, don't wrap again + wrapped_in_single_quotes = ( + string.startswith(self.SINGLE_QUOTE) + and string.endswith(self.SINGLE_QUOTE) + ) + if wrapped_in_single_quotes: + return False, None + + # If starts with a double quote but does not end in a double quote, + # wrap in single quotes + should_wrap = ( + string.startswith(self.DOUBLE_QUOTES) + and not wrapped_in_double_quotes + ) + if should_wrap: + return should_wrap, self.SINGLE_QUOTE + + # If starts with a single quote but does not end in a single quote, + # wrap in double quotes + should_wrap = ( + string.startswith(self.SINGLE_QUOTE) + and not wrapped_in_single_quotes + ) + if should_wrap: + return should_wrap, self.DOUBLE_QUOTES + + # If needs wrapping due to special characters, wrap in double quotes + should_wrap = any([ + self.NEWLINE in string[:-1], + self.COLON in string, + self.HASHTAG in string, + string.startswith(self.ASTERISK), + string.startswith(self.AMPERSAND), + string.startswith(self.DASH), + string.startswith(self.AT_SIGN), + ( + string.startswith(self.BRACKET_LEFT) + and not string.endswith(self.BRACKET_RIGHT) + ), + ]) + if should_wrap: + return True, self.DOUBLE_QUOTES + + return False, None diff --git a/openformats/tests/formats/github_markdown_v2/files/1_el.md b/openformats/tests/formats/github_markdown_v2/files/1_el.md index ae597f08..ea9014d0 100644 --- a/openformats/tests/formats/github_markdown_v2/files/1_el.md +++ b/openformats/tests/formats/github_markdown_v2/files/1_el.md @@ -32,6 +32,10 @@ key.with.dots: "el:dot value" 1: "el:integer key" +wrapping: + at: "el:@something" + brackets: "el:[Something] else" + --- # el:Markdown stuff diff --git a/openformats/tests/formats/github_markdown_v2/files/1_en.md b/openformats/tests/formats/github_markdown_v2/files/1_en.md index c965f0ad..990972cd 100644 --- a/openformats/tests/formats/github_markdown_v2/files/1_en.md +++ b/openformats/tests/formats/github_markdown_v2/files/1_en.md @@ -35,6 +35,10 @@ key.with.dots: dot value 1: integer key +wrapping: + at: "@something" + brackets: "[Something] else" + --- # Markdown stuff diff --git a/openformats/tests/formats/github_markdown_v2/files/1_en_export.md b/openformats/tests/formats/github_markdown_v2/files/1_en_export.md index 5c77e44a..d0a53a1d 100644 --- a/openformats/tests/formats/github_markdown_v2/files/1_en_export.md +++ b/openformats/tests/formats/github_markdown_v2/files/1_en_export.md @@ -31,6 +31,10 @@ key.with.dots: dot value 1: integer key +wrapping: + at: "@something" + brackets: "[Something] else" + --- # Markdown stuff diff --git a/openformats/tests/formats/github_markdown_v2/files/1_tpl.md b/openformats/tests/formats/github_markdown_v2/files/1_tpl.md index b0436a75..d33ab0c7 100644 --- a/openformats/tests/formats/github_markdown_v2/files/1_tpl.md +++ b/openformats/tests/formats/github_markdown_v2/files/1_tpl.md @@ -26,52 +26,51 @@ key.with.dots: ddbc5d72cdd64acc258ab3446b6466ed_tr 1: 9354e4f807073d6a146138fe6d81e4ef_tr ---- - -# ecb3257d21a429f4e49462e8639f1494_tr +wrapping: + at: c9c07c7124a5f4774c6f7b130cc4c1fa_tr + brackets: cb544bc6f97fe540c7a4db0c9126f83e_tr -## 26101031d5115f57d3e8c34f2ac1f741_tr +--- -ac435aa88d7932bc7c26dbe0277119a3_tr +# ac435aa88d7932bc7c26dbe0277119a3_tr -### 311a0516b1c6eb11a28e8deaa5c64c78_tr +## 311a0516b1c6eb11a28e8deaa5c64c78_tr a3575631fc76819748da773b5b0087a4_tr -###### 2040af1756f3e6429f9b8abab90fb258_tr +### 2040af1756f3e6429f9b8abab90fb258_tr 691513953ea19e01a0e7881a339ce106_tr +###### 867da7e2af8e6b2f3aa7213a4080edb3_tr -## 867da7e2af8e6b2f3aa7213a4080edb3_tr +a6c1243676462dae35ed0c9122125d4e_tr -### a6c1243676462dae35ed0c9122125d4e_tr -* 6994d1415b92982eb7cf57740b15b949_tr -* 061c16448aee6be07e20ab574ef27ea4_tr - * ea8a5a340cab600d88eea120273c4022_tr - * 944024165d37855a16b48158a491e0a5_tr +## 6994d1415b92982eb7cf57740b15b949_tr + +### 061c16448aee6be07e20ab574ef27ea4_tr + +* ea8a5a340cab600d88eea120273c4022_tr +* 944024165d37855a16b48158a491e0a5_tr + * d51c2a7526e97b2e9ba2d86ac075af7c_tr + * 99cd002aedd5014650d2d43ca2967c2a_tr * ``` Item 2c ``` -### d51c2a7526e97b2e9ba2d86ac075af7c_tr +### 662a0b32c5caab5e3b8aa9a3f7bcedfc_tr -1. 99cd002aedd5014650d2d43ca2967c2a_tr -1. 662a0b32c5caab5e3b8aa9a3f7bcedfc_tr 1. f1c3d12b026ece333653621093d9abe3_tr - 1. bb74ac206332955a7b4f5076f55b2e26_tr - 1. bc3bd100ff8a56464a05d8f7f8bb7a6d_tr +1. bb74ac206332955a7b4f5076f55b2e26_tr +1. bc3bd100ff8a56464a05d8f7f8bb7a6d_tr + 1. 28e677da01f23106bbe6f32634edae27_tr + 1. 656cad764b3cb08b8a76fe22ce8cad9a_tr 1. ``` Item 3c ``` -## 28e677da01f23106bbe6f32634edae27_tr - -656cad764b3cb08b8a76fe22ce8cad9a_tr - - ## eb8f40de442bebf3cfcbe2e513add63b_tr c78e069659a818bab095c6c8976bf3ae_tr @@ -81,88 +80,93 @@ c78e069659a818bab095c6c8976bf3ae_tr 938c0de7edf02915e5f1cb8c3a8da6db_tr -61a2eceadafd120aa94c67278d7a7334_tr +## 61a2eceadafd120aa94c67278d7a7334_tr -## 48d1f08b2408fe8a97fa0bc36b7c4400_tr +48d1f08b2408fe8a97fa0bc36b7c4400_tr 3c951e08898efd90188c74084373af08_tr -# ef70b397e869e6fd08714e2f9edd3f8c_tr +## ef70b397e869e6fd08714e2f9edd3f8c_tr -## 0dcd35bbb040482717ba0d8763cf24f0_tr +0dcd35bbb040482717ba0d8763cf24f0_tr + + +# 4d22974b0d27df9918a8b922d921587c_tr + +## 5eb74b106793ebcd6eeee358aa4d25ac_tr -4d22974b0d27df9918a8b922d921587c_tr | 5eb74b106793ebcd6eeee358aa4d25ac_tr ------------- | ------------- 72e17437d94b8aaaa7ca9ff670fb2c42_tr | 4f95c94d8503a15557174036f4f7947a_tr +------------ | ------------- 6cb8da4ccdafbed92fd1c4b5888091f9_tr | 0b912bd88f2f0678a52616328ed01a63_tr +f093e61658553bbd86214bffe9a663a8_tr | d8a4fb88944aeb2b305e4e94e9d18283_tr -## f093e61658553bbd86214bffe9a663a8_tr +## 78e10a5ea41eae9ea05dc265a5c8cd66_tr -d8a4fb88944aeb2b305e4e94e9d18283_tr +d6c6d7c6ae13b3b656339b51dbeb85ff_tr -78e10a5ea41eae9ea05dc265a5c8cd66_tr +9441c042cb0b839e13ed95f55de0ee5c_tr -# d6c6d7c6ae13b3b656339b51dbeb85ff_tr -- 9441c042cb0b839e13ed95f55de0ee5c_tr -- a4ad6fda3cab93ac6479a0c30dd96b0c_tr -257f3f448e7ac5f2fe13b374496309f6_tr +# a4ad6fda3cab93ac6479a0c30dd96b0c_tr +- 257f3f448e7ac5f2fe13b374496309f6_tr +- 38d12337337477ece3edbf687c60f45b_tr +39a9c02a305227b4f9c62bc3d839500c_tr -## 38d12337337477ece3edbf687c60f45b_tr +## 00ccddea80e2b57095f5c536410069b5_tr -- 39a9c02a305227b4f9c62bc3d839500c_tr -- 00ccddea80e2b57095f5c536410069b5_tr - f592797318508cb692be49a04b5fe0a3_tr - 1958a4139b0dc6d506cfe08e1553199d_tr +- 8228238dcd753647ede755c671c9988b_tr +- fd90f0cdc5f649dd84d4e21ff33b5774_tr -## 8228238dcd753647ede755c671c9988b_tr +## a69644fd392aad985de61e166dc8e8fd_tr -fd90f0cdc5f649dd84d4e21ff33b5774_tr +ed226cb95f3b637703501d9f4ddd3d68_tr -# a69644fd392aad985de61e166dc8e8fd_tr +# 18546b56cfde1b046b0372d31f6e7293_tr -## ed226cb95f3b637703501d9f4ddd3d68_tr +## f240c90767be4aac25128f5fd8cd5716_tr -18546b56cfde1b046b0372d31f6e7293_tr +66c3fee70e0ec54cbbba4c8912c8b1dc_tr {% if version <= '2.6' %} -### f240c90767be4aac25128f5fd8cd5716_tr +### 1b125d3881f320a3e0ebc9f35e8707c5_tr -66c3fee70e0ec54cbbba4c8912c8b1dc_tr +aa5072eebea6f4364b4444a8a0f0f868_tr {% endif %} -## 1b125d3881f320a3e0ebc9f35e8707c5_tr - -### aa5072eebea6f4364b4444a8a0f0f868_tr +## f98c3b2040ea74375cdaa2759f0b86f7_tr -- "[f98c3b2040ea74375cdaa2759f0b86f7_tr](/articles/basic-writing-and-formatting-syntax)" -- [33aa6de370798fadf155bbc16f7540c7_tr](/articles/working-with-advanced-formatting) +### 33aa6de370798fadf155bbc16f7540c7_tr -### 709c93c4918ef02a126bec2a7044dca2_tr - -e4429365fc4b359487f20dc8957bdc97_tr +- "[709c93c4918ef02a126bec2a7044dca2_tr](/articles/basic-writing-and-formatting-syntax)" +- [e4429365fc4b359487f20dc8957bdc97_tr](/articles/working-with-advanced-formatting) ### a55e54225390bca0cdaa0c1d0a8d07d4_tr d175d8a8a13209341e4a0e3066113b3b_tr -[1]: http://example.com/ -[537e77e8b2aa7aac6607c173439d164e_tr]: http://example.com/ -"[42de416048295492aa089c64b44a4f61_tr]: http://example.com/" +### 537e77e8b2aa7aac6607c173439d164e_tr +42de416048295492aa089c64b44a4f61_tr -# c20f2c418bdcd3361f55a9da99810fe1_tr -c8e2e649deb2b744127c5fef84f14f46_tr +[1]: http://example.com/ +[c20f2c418bdcd3361f55a9da99810fe1_tr]: http://example.com/ +"[c8e2e649deb2b744127c5fef84f14f46_tr]: http://example.com/" -47fd1ed4c5a1115c35a6ab8342f0c145_tr +# 47fd1ed4c5a1115c35a6ab8342f0c145_tr 211ce627f55aff0238a6a4775e9d087a_tr + +5062613d56978c76710cd0c20d6db2a0_tr + +7e0a29ea7700b69c207521b160d32086_tr diff --git a/openformats/tests/formats/github_markdown_v2/test_github_markdown.py b/openformats/tests/formats/github_markdown_v2/test_github_markdown.py index 53d3c899..bd273209 100644 --- a/openformats/tests/formats/github_markdown_v2/test_github_markdown.py +++ b/openformats/tests/formats/github_markdown_v2/test_github_markdown.py @@ -1,7 +1,9 @@ +# -*- coding: utf-8 -*- import unittest from os import path +from openformats.strings import OpenString from openformats.tests.formats.common import CommonFormatTestMixin from openformats.formats.github_markdown_v2 import GithubMarkdownHandlerV2 @@ -10,6 +12,7 @@ class GithubMarkdownV2TestCase(CommonFormatTestMixin, unittest.TestCase): + """Tests the basic functionality of GithubMarkdownHandlerV2.""" HANDLER_CLASS = GithubMarkdownHandlerV2 TESTFILE_BASE = "openformats/tests/formats/github_markdown_v2/files" @@ -29,3 +32,109 @@ def test_parse(self): content_with_tab = self.handler.parse(content=u"# foo bar") content_with_spaces = self.handler.parse(content=u"# foo bar") self.assertEqual(content_with_tab[0], content_with_spaces[0]) + + +class GithubMarkdownV2CustomTestCase(unittest.TestCase): + """Tests some additional functionality of GithubMarkdownHandlerV2. + + More specifically, it tests various helper methods, to ensure + full coverage and cover edge cases. + """ + + def setUp(self): + self.handler = GithubMarkdownHandlerV2() + + def test_is_yaml_string_false_for_ints(self): + openstring = OpenString('4', 'something') + self.assertFalse(self.handler._is_yaml_string(openstring)) + + def test_is_yaml_string_true_for_strings(self): + openstring = OpenString('some.string.key', 'something') + self.assertTrue(self.handler._is_yaml_string(openstring)) + + def test_should_wrap_in_quotes_false_if_no_special_case(self): + should_wrap, wrap_char = self.handler._should_wrap_in_quotes(u' Απλό case') + self.assertFalse(should_wrap) + self.assertIsNone(wrap_char) + + def test_should_wrap_in_quotes_false_if_already_wrapped(self): + should_wrap, wrap_char = self.handler._should_wrap_in_quotes(u' "Κάτι άλλο "') + self.assertFalse(should_wrap) + self.assertIsNone(wrap_char) + + should_wrap, wrap_char = self.handler._should_wrap_in_quotes(u" 'Κάτι άλλο' ") + self.assertFalse(should_wrap) + self.assertIsNone(wrap_char) + + def test_should_wrap_in_quotes_if_starts_but_not_ends_with_quote(self): + should_wrap, wrap_char = self.handler._should_wrap_in_quotes(u' " Κάτι άλλο ') + self.assertTrue(should_wrap) + self.assertEqual(wrap_char, u"'") + + should_wrap, wrap_char = self.handler._should_wrap_in_quotes(u" ' Κάτι άλλο ") + self.assertTrue(should_wrap) + self.assertEqual(wrap_char, u'"') + + def test_should_wrap_in_quotes_if_starts_with_special_char(self): + starting_chars = [ + GithubMarkdownHandlerV2.ASTERISK, + GithubMarkdownHandlerV2.AMPERSAND, + GithubMarkdownHandlerV2.DASH, + GithubMarkdownHandlerV2.AT_SIGN, + ] + for char in starting_chars: + should_wrap, wrap_char = self.handler._should_wrap_in_quotes( + u' {} Κάτι άλλο '.format(char) + ) + self.assertTrue(should_wrap) + self.assertEqual(wrap_char, u'"') + + def test_should_wrap_in_quotes_if_has_special_char(self): + special_chars = [ + GithubMarkdownHandlerV2.NEWLINE, + GithubMarkdownHandlerV2.COLON, + GithubMarkdownHandlerV2.HASHTAG, + ] + for char in special_chars: + should_wrap, wrap_char = self.handler._should_wrap_in_quotes( + u' Κάτι άλλο {} -'.format(char) + ) + self.assertTrue(should_wrap) + self.assertEqual(wrap_char, u'"') + + def test_should_wrap_in_quotes_if_starts_but_not_ends_with_bracket(self): + should_wrap, wrap_char = self.handler._should_wrap_in_quotes(u' [Κάτι] άλλο ') + self.assertTrue(should_wrap) + self.assertEqual(wrap_char, u'"') + + def test_wrap_in_quotes(self): + """Make sure that the string is wrapped and that any existing quote chars + are escaped.""" + wrapped = self.handler._wrap_in_quotes(u"To '21", "'") + self.assertEqual(wrapped, u"'To ''21'") + wrapped = self.handler._wrap_in_quotes(u'Αυτό είναι "ΟΚ"', '"') + self.assertEqual(wrapped, u'"Αυτό είναι \\"ΟΚ\\""') + + def test_wrap_in_quotes_exception_for_wrong_quote(self): + """Make sure that the a ValueError is raised if a wrong quote char is given.""" + self.assertRaises(ValueError, self.handler._wrap_in_quotes, u"To '21", "*") + + def test_transform_yaml_string_left_as_is(self): + openstring = OpenString('k', u' Δεν θέλω τίποτα') + string = self.handler._transform_yaml_string(openstring) + self.assertEqual(string, openstring.string) + + def test_transform_yaml_string_wrapped_for_brackets(self): + openstring = OpenString('k', u' [Θέλω] quotes') + string = self.handler._transform_yaml_string(openstring) + self.assertEqual(string, u'" [Θέλω] quotes"') + + def test_transform_yaml_string_wrapped_for_double_quotes(self): + openstring = OpenString('k', u' "Θέλω" quotes') + string = self.handler._transform_yaml_string(openstring) + self.assertEqual(string, u'\' "Θέλω" quotes\'') + + def test_transform_yaml_string_wrapped_for_at_sign(self): + openstring = OpenString('k', u' @κάπου') + string = self.handler._transform_yaml_string(openstring) + self.assertEqual(string, u'" @κάπου"')