From f4da1a268ab23086bedfd11d56e784bb5cbb7601 Mon Sep 17 00:00:00 2001 From: Jesse van den Kieboom Date: Mon, 1 Dec 2014 08:46:13 +0100 Subject: [PATCH] More robust conversion to unicode Fixes #72. --- cldoc/utf8.py | 9 ++++++--- tests/input/utf8.hh | 4 ++++ tests/output/utf8-A.xml | 4 ++++ tests/output/utf8-index.xml | 6 ++++++ 4 files changed, 20 insertions(+), 3 deletions(-) create mode 100644 tests/input/utf8.hh create mode 100644 tests/output/utf8-A.xml create mode 100644 tests/output/utf8-index.xml diff --git a/cldoc/utf8.py b/cldoc/utf8.py index 263b78e..78fb3a0 100644 --- a/cldoc/utf8.py +++ b/cldoc/utf8.py @@ -5,7 +5,10 @@ def makeutf8(s): if not isinstance(s, unicode): if hasattr(s, '__unicode__'): - return unicode(s) + if isinstance(s, str) or isinstance(s, buffer): + return unicode(s, 'utf-8') + else: + return unicode(s) return str(s).decode('utf-8') @@ -25,8 +28,8 @@ def makeutf8(s): string = basecls class utf8(string): - def __init__(self, s): - super(utf8, self).__init__(makeutf8(s)) + def __new__(cls, s): + return super(utf8, cls).__new__(cls, makeutf8(s)) def __str__(self): if not isinstance(self, str): diff --git a/tests/input/utf8.hh b/tests/input/utf8.hh new file mode 100644 index 0000000..f865af0 --- /dev/null +++ b/tests/input/utf8.hh @@ -0,0 +1,4 @@ +/* Copyright © */ +class A +{ +}; \ No newline at end of file diff --git a/tests/output/utf8-A.xml b/tests/output/utf8-A.xml new file mode 100644 index 0000000..50aaa60 --- /dev/null +++ b/tests/output/utf8-A.xml @@ -0,0 +1,4 @@ + + + Copyright © + diff --git a/tests/output/utf8-index.xml b/tests/output/utf8-index.xml new file mode 100644 index 0000000..5520e2f --- /dev/null +++ b/tests/output/utf8-index.xml @@ -0,0 +1,6 @@ + + + + Copyright © + +