Skip to content
This repository was archived by the owner on Apr 4, 2018. It is now read-only.

Commit 1d18884

Browse files
committed
Python 3
2 parents f940ee9 + 9bdd887 commit 1d18884

File tree

6 files changed

+62
-62
lines changed

6 files changed

+62
-62
lines changed

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
argparse==1.2.1
22
PyYAML==3.10
33
beautifulsoup4==4.2.0
4+
six==1.10.0

tests.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
# encoding=utf-8
22

3+
import six
34
import twitter_text, sys, os, json, argparse, re
45
from twitter_text.unicode import force_unicode
56

67
narrow_build = True
78
try:
8-
unichr(0x20000)
9+
six.unichr(0x20000)
910
narrow_build = False
1011
except:
1112
pass
@@ -177,4 +178,4 @@ def assert_equal(result, test):
177178

178179
sys.stdout.write(u'\033[0m-------\n\033[92m%d tests passed.\033[0m\n' % attempted)
179180
sys.stdout.flush()
180-
sys.exit(os.EX_OK)
181+
sys.exit(os.EX_OK)

twitter_text/autolink.py

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# encoding=utf-8
2-
2+
from __future__ import unicode_literals
33
import re, cgi
44

55
from twitter_text.regex import REGEXEN
@@ -113,7 +113,7 @@ def auto_link_entities(self, entities = [], options = {}):
113113
return self.text
114114

115115
# NOTE deprecate these attributes not options keys in options hash, then use html_attrs
116-
options = dict(DEFAULT_OPTIONS.items() + options.items())
116+
options = dict(list(DEFAULT_OPTIONS.items()) + list(options.items()))
117117
options['html_attrs'] = self._extract_html_attrs_from_options(options)
118118
if not options.get('suppress_no_follow', False):
119119
options['html_attrs']['rel'] = "nofollow"
@@ -306,12 +306,12 @@ def _link_url_with_entity(self, entity, options = {}):
306306
expanded_url = entity.get('expanded_url')
307307
invisible_tag_attrs = options.get('invisible_tag_attrs', DEFAULT_INVISIBLE_TAG_ATTRS)
308308

309-
display_url_sans_ellipses = re.sub(ur'…', u'', display_url)
309+
display_url_sans_ellipses = re.sub(r'…', '', display_url)
310310

311311
if expanded_url.find(display_url_sans_ellipses) > -1:
312312
before_display_url, after_display_url = expanded_url.split(display_url_sans_ellipses, 2)
313-
preceding_ellipsis = re.search(ur'\A…', display_url)
314-
following_ellipsis = re.search(ur'…\z', display_url)
313+
preceding_ellipsis = re.search(r'\A…', display_url)
314+
following_ellipsis = re.search(r'…\z', display_url)
315315
if preceding_ellipsis is not None:
316316
preceding_ellipsis = preceding_ellipsis.group()
317317
else:
@@ -356,13 +356,13 @@ def _link_to_hashtag(self, entity, chars, options = {}):
356356
if REGEXEN['rtl_chars'].search(hashtag):
357357
hashtag_class += ' rtl'
358358

359-
href = options.get('hashtag_url_transform', lambda ht: u'%s%s' % (options.get('hashtag_url_base'), ht))(hashtag)
359+
href = options.get('hashtag_url_transform', lambda ht: '%s%s' % (options.get('hashtag_url_base'), ht))(hashtag)
360360

361361
html_attrs = {}
362362
html_attrs.update(options.get('html_attrs', {}))
363363
html_attrs = {
364364
'class': hashtag_class,
365-
'title': u'#%s' % hashtag,
365+
'title': '#%s' % hashtag,
366366
}
367367

368368
link = self._link_to_text_with_symbol(entity, hashchar, hashtag, href, html_attrs, options)
@@ -372,19 +372,19 @@ def _link_to_cashtag(self, entity, chars, options = {}):
372372
dollar = chars[entity['indices'][0]]
373373
cashtag = entity['cashtag']
374374

375-
href = options.get('cashtag_url_transform', lambda ct: u'%s%s' % (options.get('cashtag_url_base'), ct))(cashtag)
375+
href = options.get('cashtag_url_transform', lambda ct: '%s%s' % (options.get('cashtag_url_base'), ct))(cashtag)
376376

377377
html_attrs = {
378378
'class': options.get('cashtag_class'),
379-
'title': u'$%s' % cashtag
379+
'title': '$%s' % cashtag
380380
}
381381
html_attrs.update(options.get('html_attrs', {}))
382382

383383
link = self._link_to_text_with_symbol(entity, dollar, cashtag, href, html_attrs, options)
384384
return chars[:entity['indices'][0]] + link + chars[entity['indices'][1]:]
385385

386386
def _link_to_screen_name(self, entity, chars, options = {}):
387-
name = u'%s%s' % (entity['screen_name'], entity.get('list_slug') or '')
387+
name = '%s%s' % (entity['screen_name'], entity.get('list_slug') or '')
388388
chunk = options.get('link_text_transform', default_transform)(entity, name)
389389
name = name.lower()
390390

@@ -395,30 +395,30 @@ def _link_to_screen_name(self, entity, chars, options = {}):
395395
del(html_attrs['title'])
396396

397397
if entity.get('list_slug') and not options.get('supress_lists'):
398-
href = options.get('list_url_transform', lambda sn: u'%s%s' % (options.get('list_url_base'), sn))(name)
398+
href = options.get('list_url_transform', lambda sn: '%s%s' % (options.get('list_url_base'), sn))(name)
399399
html_attrs['class'] = options.get('list_class')
400400
else:
401-
href = options.get('username_url_transform', lambda sn: u'%s%s' % (options.get('username_url_base'), sn))(name)
401+
href = options.get('username_url_transform', lambda sn: '%s%s' % (options.get('username_url_base'), sn))(name)
402402
html_attrs['class'] = options.get('username_class')
403403

404404
link = self._link_to_text_with_symbol(entity, at, chunk, href, html_attrs, options)
405405
return chars[:entity['indices'][0]] + link + chars[entity['indices'][1]:]
406406

407407
def _link_to_text_with_symbol(self, entity, symbol, text, href, attributes = {}, options = {}):
408-
tagged_symbol = u'<%s>%s</%s>' % (options.get('symbol_tag'), symbol, options.get('symbol_tag')) if options.get('symbol_tag') else symbol
408+
tagged_symbol = '<%s>%s</%s>' % (options.get('symbol_tag'), symbol, options.get('symbol_tag')) if options.get('symbol_tag') else symbol
409409
text = self._html_escape(text)
410-
tagged_text = u'<%s>%s</%s>' % (options.get('text_with_symbol_tag'), text, options.get('text_with_symbol_tag')) if options.get('text_with_symbol_tag') else text
410+
tagged_text = '<%s>%s</%s>' % (options.get('text_with_symbol_tag'), text, options.get('text_with_symbol_tag')) if options.get('text_with_symbol_tag') else text
411411
if options.get('username_include_symbol') or not REGEXEN['at_signs'].match(symbol):
412-
return u'%s' % self._link_to_text(entity, tagged_symbol + tagged_text, href, attributes, options)
412+
return '%s' % self._link_to_text(entity, tagged_symbol + tagged_text, href, attributes, options)
413413
else:
414-
return u'%s%s' % (tagged_symbol, self._link_to_text(entity, tagged_text, href, attributes, options))
414+
return '%s%s' % (tagged_symbol, self._link_to_text(entity, tagged_text, href, attributes, options))
415415

416416
def _link_to_text(self, entity, text, href, attributes = {}, options = {}):
417417
attributes['href'] = href
418-
if options.get('link_attributes_transform'):
419-
attributes = options.get('link_attributes_transform')(entity, attributes)
418+
if options.get('link_attribute_transform'):
419+
attributes = options.get('link_attribute_transform')(entity, attributes)
420420
text = options.get('link_text_transform', default_transform)(entity, text)
421-
return u'<a %s>%s</a>' % (self._tag_attrs(attributes), text)
421+
return '<a %s>%s</a>' % (self._tag_attrs(attributes), text)
422422

423423
def _tag_attrs(self, attributes = {}):
424424
attrs = []
@@ -428,7 +428,7 @@ def _tag_attrs(self, attributes = {}):
428428
attrs.append(key)
429429
continue
430430
if type(value) == list:
431-
value = u' '.join(value)
432-
attrs.append(u'%s="%s"' % (self._html_escape(key), self._html_escape(value)))
431+
value = ' '.join(value)
432+
attrs.append('%s="%s"' % (self._html_escape(key), self._html_escape(value)))
433433

434-
return u' '.join(attrs)
434+
return ' '.join(attrs)

twitter_text/highlighter.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
# encoding=utf-8
22

33
import re
4-
from HTMLParser import HTMLParser
4+
from six.moves import html_parser
55

66
from twitter_text.regex import UNICODE_SPACES
77
from twitter_text.unicode import force_unicode
88

99
DEFAULT_HIGHLIGHT_TAG = 'em'
1010

1111
# from http://stackoverflow.com/questions/753052/strip-html-from-strings-in-python
12-
class MLStripper(HTMLParser):
12+
class MLStripper(html_parser.HTMLParser):
1313
def __init__(self):
1414
self.reset()
1515
self.fed = []
@@ -80,4 +80,4 @@ def hit_highlight(self, hits = [], **kwargs):
8080
else:
8181
result.append(chunk)
8282
self.text = u''.join(result)
83-
return self.text
83+
return self.text

twitter_text/regex.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,35 +4,36 @@
44
# list is frozen at load time to ensure immutability. These reular expressions are
55
# used throughout the Twitter classes. Special care has been taken to make
66
# sure these reular expressions work with Tweets in all languages.
7-
import re, string
7+
import re, string, six
8+
from six.moves import reduce
89

910
REGEXEN = {} # :nodoc:
1011

1112
def regex_range(start, end = None):
1213
if end:
13-
return u'%s-%s' % (unichr(start), unichr(end))
14+
return u'%s-%s' % (six.unichr(start), six.unichr(end))
1415
else:
15-
return u'%s' % unichr(start)
16+
return u'%s' % six.unichr(start)
1617

1718
# Space is more than %20, U+3000 for example is the full-width space used with Kanji. Provide a short-hand
1819
# to access both the list of characters and a pattern suitible for use with String#split
1920
# Taken from: ActiveSupport::Multibyte::Handlers::UTF8Handler::UNICODE_WHITESPACE
2021
UNICODE_SPACES = []
2122
for space in reduce(lambda x,y: x + y if type(y) == list else x + [y], [
22-
range(0x0009, 0x000D), # White_Space # Cc [5] <control-0009>..<control-000D>
23+
list(range(0x0009, 0x000D)), # White_Space # Cc [5] <control-0009>..<control-000D>
2324
0x0020, # White_Space # Zs SPACE
2425
0x0085, # White_Space # Cc <control-0085>
2526
0x00A0, # White_Space # Zs NO-BREAK SPACE
2627
0x1680, # White_Space # Zs OGHAM SPACE MARK
2728
0x180E, # White_Space # Zs MONGOLIAN VOWEL SEPARATOR
28-
range(0x2000, 0x200A), # White_Space # Zs [11] EN QUAD..HAIR SPACE
29+
list(range(0x2000, 0x200A)), # White_Space # Zs [11] EN QUAD..HAIR SPACE
2930
0x2028, # White_Space # Zl LINE SEPARATOR
3031
0x2029, # White_Space # Zp PARAGRAPH SEPARATOR
3132
0x202F, # White_Space # Zs NARROW NO-BREAK SPACE
3233
0x205F, # White_Space # Zs MEDIUM MATHEMATICAL SPACE
3334
0x3000, # White_Space # Zs IDEOGRAPHIC SPACE
3435
]):
35-
UNICODE_SPACES.append(unichr(space))
36+
UNICODE_SPACES.append(six.unichr(space))
3637
REGEXEN['spaces'] = re.compile(ur''.join(UNICODE_SPACES))
3738

3839
# Characters not allowed in Tweets
@@ -41,7 +42,7 @@ def regex_range(start, end = None):
4142
0xFFFF, # Special
4243
0x202A, 0x202B, 0x202C, 0x202D, 0x202E, # Directional change
4344
]
44-
REGEXEN['invalid_control_characters'] = [unichr(x) for x in INVALID_CHARACTERS]
45+
REGEXEN['invalid_control_characters'] = [six.unichr(x) for x in INVALID_CHARACTERS]
4546

4647
REGEXEN['list_name'] = re.compile(ur'^[a-zA-Z][a-zA-Z0-9_\-\u0080-\u00ff]{0,24}$')
4748

twitter_text/unicode.py

Lines changed: 25 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1-
import types, datetime
1+
import types, datetime, six
22
from decimal import Decimal
33

4+
45
# borrowed from django.utils.encoding
56
class TwitterTextUnicodeDecodeError(UnicodeDecodeError):
67
def __init__(self, obj, *args):
@@ -18,46 +19,41 @@ def is_protected_type(obj):
1819
Objects of protected types are preserved as-is when passed to
1920
force_unicode(strings_only=True).
2021
"""
21-
return isinstance(obj, (
22-
types.NoneType,
23-
int, long,
22+
return isinstance(obj, six.integer_types + (
23+
type(None),
2424
datetime.datetime, datetime.date, datetime.time,
2525
float, Decimal)
2626
)
2727

2828
def force_unicode(s, encoding='utf-8', strings_only=False, errors='strict'):
2929
"""
30-
Similar to smart_unicode, except that lazy instances are resolved to
30+
Similar to smart_text, except that lazy instances are resolved to
3131
strings, rather than kept as lazy objects.
3232
3333
If strings_only is True, don't convert (some) non-string-like objects.
3434
"""
35+
# Handle the common case first for performance reasons.
36+
if issubclass(type(s), six.text_type):
37+
return s
3538
if strings_only and is_protected_type(s):
3639
return s
3740
try:
38-
if not isinstance(s, basestring,):
39-
if hasattr(s, '__unicode__'):
40-
s = unicode(s)
41+
if not issubclass(type(s), six.string_types):
42+
if six.PY3:
43+
if isinstance(s, bytes):
44+
s = six.text_type(s, encoding, errors)
45+
else:
46+
s = six.text_type(s)
47+
elif hasattr(s, '__unicode__'):
48+
s = six.text_type(s)
4149
else:
42-
try:
43-
s = unicode(str(s), encoding, errors)
44-
except UnicodeEncodeError:
45-
if not isinstance(s, Exception):
46-
raise
47-
# If we get to here, the caller has passed in an Exception
48-
# subclass populated with non-ASCII data without special
49-
# handling to display as a string. We need to handle this
50-
# without raising a further exception. We do an
51-
# approximation to what the Exception's standard str()
52-
# output should be.
53-
s = ' '.join([force_unicode(arg, encoding, strings_only,
54-
errors) for arg in s])
55-
elif not isinstance(s, unicode):
56-
# Note: We use .decode() here, instead of unicode(s, encoding,
57-
# errors), so that if s is a SafeString, it ends up being a
58-
# SafeUnicode at the end.
50+
s = six.text_type(bytes(s), encoding, errors)
51+
else:
52+
# Note: We use .decode() here, instead of six.text_type(s, encoding,
53+
# errors), so that if s is a SafeBytes, it ends up being a
54+
# SafeText at the end.
5955
s = s.decode(encoding, errors)
60-
except UnicodeDecodeError, e:
56+
except UnicodeDecodeError as e:
6157
if not isinstance(s, Exception):
6258
raise TwitterTextUnicodeDecodeError(s, *e.args)
6359
else:
@@ -66,6 +62,7 @@ def force_unicode(s, encoding='utf-8', strings_only=False, errors='strict'):
6662
# working unicode method. Try to handle this without raising a
6763
# further exception by individually forcing the exception args
6864
# to unicode.
69-
s = ' '.join([force_unicode(arg, encoding, strings_only,
70-
errors) for arg in s])
65+
s = ' '.join(force_unicode(arg, encoding, strings_only, errors)
66+
for arg in s)
7167
return s
68+

0 commit comments

Comments
 (0)