diff --git a/mig/shared/base.py b/mig/shared/base.py index 64f12b370..b21d4ae6f 100644 --- a/mig/shared/base.py +++ b/mig/shared/base.py @@ -36,6 +36,7 @@ import re # IMPORTANT: do not import any other MiG modules here - to avoid import loops +from mig.shared.compat import PY2 from mig.shared.defaults import default_str_coding, default_fs_coding, \ keyword_all, keyword_auto, sandbox_names, _user_invisible_files, \ _user_invisible_dirs, _vgrid_xgi_scripts, cert_field_order, csrf_field, \ @@ -496,7 +497,7 @@ def is_unicode(val): return (type(u"") == type(val)) -def force_utf8(val, highlight=''): +def _force_utf8_py2(val, highlight=''): """Internal helper to encode unicode strings to utf8 version. Actual changes are marked out with the highlight string if given. """ @@ -507,6 +508,31 @@ def force_utf8(val, highlight=''): return val return "%s%s%s" % (highlight, val.encode("utf8"), highlight) +def _force_utf8_py3(val, highlight='', stringify=True): + """Internal helper to encode unicode strings to utf8 version. Actual + changes are marked out with the highlight string if given. + The optional stringify turns ALL values including numbers into string. + """ + # We run into all kind of nasty encoding problems if we mix + if not isinstance(val, basestring): + if stringify: + val = "%s" % val + else: + return val + if not is_unicode(val): + return val + if is_unicode(highlight): + hl_utf = highlight.encode("utf8") + else: + hl_utf = highlight + return (b"%s%s%s" % (hl_utf, val.encode("utf8"), hl_utf)) + + +if PY2: + force_utf8 = _force_utf8_py2 +else: + force_utf8 = _force_utf8_py3 + def force_utf8_rec(input_obj, highlight=''): """Recursive object conversion from unicode to utf8: useful to convert e.g. diff --git a/mig/shared/safeinput.py b/mig/shared/safeinput.py index 592250755..e91937d8c 100644 --- a/mig/shared/safeinput.py +++ b/mig/shared/safeinput.py @@ -58,7 +58,7 @@ from html import escape as escape_html assert escape_html is not None -from mig.shared.base import force_unicode, force_utf8 +from mig.shared.base import force_unicode, force_native_str from mig.shared.defaults import src_dst_sep, username_charset, \ username_max_length, session_id_charset, session_id_length, \ subject_id_charset, subject_id_min_length, subject_id_max_length, \ @@ -2294,7 +2294,7 @@ def __init__(self, value): def __str__(self): """Return string representation""" - return force_utf8(force_unicode(self.value)) + return force_native_str(self.value) def main(_exit=sys.exit, _print=print): diff --git a/tests/test_mig_shared_base.py b/tests/test_mig_shared_base.py new file mode 100644 index 000000000..82145cb20 --- /dev/null +++ b/tests/test_mig_shared_base.py @@ -0,0 +1,60 @@ +# -*- coding: utf-8 -*- +# +# --- BEGIN_HEADER --- +# +# test_mig_shared_base - unit test of the corresponding mig shared module +# Copyright (C) 2003-2024 The MiG Project by the Science HPC Center at UCPH +# +# This file is part of MiG. +# +# MiG is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# MiG is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, +# USA. +# +# --- END_HEADER --- +# + +"""Unit test base functions""" + +import binascii +import codecs +import os +import sys + +from tests.support import PY2, MigTestCase, testmain + +from mig.shared.base import force_utf8 + +DUMMY_STRING = "foo bÆr baz" +DUMMY_UNICODE = u'UniCode123½¾µßðþđŋħĸþł@ª€£$¥©®' + + +class MigSharedBase(MigTestCase): + """Unit tests of fucntions within the mig.shared.base module.""" + + def test_force_utf8_on_string(self): + actual = force_utf8(DUMMY_STRING) + + self.assertIsInstance(actual, bytes) + self.assertEqual(binascii.hexlify(actual), b'666f6f2062c386722062617a') + + def test_force_utf8_on_unicode(self): + actual = force_utf8(DUMMY_UNICODE) + + self.assertIsInstance(actual, bytes) + self.assertEqual(actual, codecs.encode(DUMMY_UNICODE, 'utf8')) + + +if __name__ == '__main__': + testmain()