Skip to content

Commit

Permalink
Use the correct force_utf8 function based on Python version.
Browse files Browse the repository at this point in the history
Import the experimental branch version of force_utf8 wholesale adding a
-py(2|3) suffix and expose the correct implementation dependent on PY2.

Include forcing InputException messages to a native string as is done in
experimental (also taken directly from that branch) which ensures the
exception message, which may be unicode, becomes a string everywhere.
  • Loading branch information
albu-diku committed Nov 7, 2024
1 parent a146e1b commit 7d56ec9
Show file tree
Hide file tree
Showing 3 changed files with 89 additions and 3 deletions.
28 changes: 27 additions & 1 deletion mig/shared/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import re

# IMPORTANT: do not import any other MiG modules here - to avoid import loops
from mig.shared.compat import PY2
from mig.shared.defaults import default_str_coding, default_fs_coding, \
keyword_all, keyword_auto, sandbox_names, _user_invisible_files, \
_user_invisible_dirs, _vgrid_xgi_scripts, cert_field_order, csrf_field, \
Expand Down Expand Up @@ -496,7 +497,7 @@ def is_unicode(val):
return (type(u"") == type(val))


def force_utf8(val, highlight=''):
def _force_utf8_py2(val, highlight=''):
"""Internal helper to encode unicode strings to utf8 version. Actual
changes are marked out with the highlight string if given.
"""
Expand All @@ -507,6 +508,31 @@ def force_utf8(val, highlight=''):
return val
return "%s%s%s" % (highlight, val.encode("utf8"), highlight)

def _force_utf8_py3(val, highlight='', stringify=True):
"""Internal helper to encode unicode strings to utf8 version. Actual
changes are marked out with the highlight string if given.
The optional stringify turns ALL values including numbers into string.
"""
# We run into all kind of nasty encoding problems if we mix
if not isinstance(val, basestring):
if stringify:
val = "%s" % val
else:
return val
if not is_unicode(val):
return val
if is_unicode(highlight):
hl_utf = highlight.encode("utf8")
else:
hl_utf = highlight
return (b"%s%s%s" % (hl_utf, val.encode("utf8"), hl_utf))


if PY2:
force_utf8 = _force_utf8_py2
else:
force_utf8 = _force_utf8_py3


def force_utf8_rec(input_obj, highlight=''):
"""Recursive object conversion from unicode to utf8: useful to convert e.g.
Expand Down
4 changes: 2 additions & 2 deletions mig/shared/safeinput.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
from html import escape as escape_html
assert escape_html is not None

from mig.shared.base import force_unicode, force_utf8
from mig.shared.base import force_unicode, force_native_str
from mig.shared.defaults import src_dst_sep, username_charset, \
username_max_length, session_id_charset, session_id_length, \
subject_id_charset, subject_id_min_length, subject_id_max_length, \
Expand Down Expand Up @@ -2294,7 +2294,7 @@ def __init__(self, value):
def __str__(self):
"""Return string representation"""

return force_utf8(force_unicode(self.value))
return force_native_str(self.value)


def main(_exit=sys.exit, _print=print):
Expand Down
60 changes: 60 additions & 0 deletions tests/test_mig_shared_base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# -*- coding: utf-8 -*-
#
# --- BEGIN_HEADER ---
#
# test_mig_shared_base - unit test of the corresponding mig shared module
# Copyright (C) 2003-2024 The MiG Project by the Science HPC Center at UCPH
#
# This file is part of MiG.
#
# MiG is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# MiG is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
# USA.
#
# --- END_HEADER ---
#

"""Unit test base functions"""

import binascii
import codecs
import os
import sys

from tests.support import PY2, MigTestCase, testmain

from mig.shared.base import force_utf8

DUMMY_STRING = "foo bÆr baz"
DUMMY_UNICODE = u'UniCode123½¾µßðþđŋħĸþł@ª€£$¥©®'


class MigSharedBase(MigTestCase):
"""Unit tests of fucntions within the mig.shared.base module."""

def test_force_utf8_on_string(self):
actual = force_utf8(DUMMY_STRING)

self.assertIsInstance(actual, bytes)
self.assertEqual(binascii.hexlify(actual), b'666f6f2062c386722062617a')

def test_force_utf8_on_unicode(self):
actual = force_utf8(DUMMY_UNICODE)

self.assertIsInstance(actual, bytes)
self.assertEqual(actual, codecs.encode(DUMMY_UNICODE, 'utf8'))


if __name__ == '__main__':
testmain()

0 comments on commit 7d56ec9

Please sign in to comment.