Skip to content
This repository was archived by the owner on Apr 4, 2018. It is now read-only.

Commit d431fce

Browse files
committed
Python 3
2 parents f940ee9 + 9bdd887 commit d431fce

File tree

7 files changed

+156
-150
lines changed

7 files changed

+156
-150
lines changed

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
argparse==1.2.1
22
PyYAML==3.10
33
beautifulsoup4==4.2.0
4+
six==1.10.0

tests.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
# encoding=utf-8
22

3+
import six
34
import twitter_text, sys, os, json, argparse, re
45
from twitter_text.unicode import force_unicode
56

67
narrow_build = True
78
try:
8-
unichr(0x20000)
9+
six.unichr(0x20000)
910
narrow_build = False
1011
except:
1112
pass
@@ -177,4 +178,4 @@ def assert_equal(result, test):
177178

178179
sys.stdout.write(u'\033[0m-------\n\033[92m%d tests passed.\033[0m\n' % attempted)
179180
sys.stdout.flush()
180-
sys.exit(os.EX_OK)
181+
sys.exit(os.EX_OK)

twitter_text/autolink.py

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# encoding=utf-8
2-
2+
from __future__ import unicode_literals
33
import re, cgi
44

55
from twitter_text.regex import REGEXEN
@@ -113,7 +113,7 @@ def auto_link_entities(self, entities = [], options = {}):
113113
return self.text
114114

115115
# NOTE deprecate these attributes not options keys in options hash, then use html_attrs
116-
options = dict(DEFAULT_OPTIONS.items() + options.items())
116+
options = dict(list(DEFAULT_OPTIONS.items()) + list(options.items()))
117117
options['html_attrs'] = self._extract_html_attrs_from_options(options)
118118
if not options.get('suppress_no_follow', False):
119119
options['html_attrs']['rel'] = "nofollow"
@@ -302,16 +302,16 @@ def _link_url_with_entity(self, entity, options = {}):
302302
For those URLs, display_url is not a substring of expanded_url, so we don't do anything special to render the elided parts.
303303
For a pic.twitter.com URL, the only elided part will be the "https://", so this is fine.
304304
"""
305-
display_url = entity.get('display_url').decode('utf-8')
305+
display_url = entity.get('display_url')
306306
expanded_url = entity.get('expanded_url')
307307
invisible_tag_attrs = options.get('invisible_tag_attrs', DEFAULT_INVISIBLE_TAG_ATTRS)
308308

309-
display_url_sans_ellipses = re.sub(ur'…', u'', display_url)
309+
display_url_sans_ellipses = re.sub(r'…', '', display_url)
310310

311311
if expanded_url.find(display_url_sans_ellipses) > -1:
312312
before_display_url, after_display_url = expanded_url.split(display_url_sans_ellipses, 2)
313-
preceding_ellipsis = re.search(ur'\A…', display_url)
314-
following_ellipsis = re.search(ur'…\z', display_url)
313+
preceding_ellipsis = re.search(r'\A…', display_url)
314+
following_ellipsis = re.search(r'…\Z', display_url)
315315
if preceding_ellipsis is not None:
316316
preceding_ellipsis = preceding_ellipsis.group()
317317
else:
@@ -344,7 +344,7 @@ def _link_url_with_entity(self, entity, options = {}):
344344
# …
345345
# </span>
346346

347-
return u"<span class='tco-ellipsis'>%s<span %s>&nbsp;</span></span><span %s>%s</span><span class='js-display-url'>%s</span><span %s>%s</span><span class='tco-ellipsis'><span %s>&nbsp;</span>%s</span>" % (preceding_ellipsis, invisible_tag_attrs, invisible_tag_attrs, self._html_escape(before_display_url), self._html_escape(display_url_sans_ellipses), invisible_tag_attrs, self._html_escape(after_display_url), invisible_tag_attrs, following_ellipsis)
347+
return "<span class='tco-ellipsis'>%s<span %s>&nbsp;</span></span><span %s>%s</span><span class='js-display-url'>%s</span><span %s>%s</span><span class='tco-ellipsis'><span %s>&nbsp;</span>%s</span>" % (preceding_ellipsis, invisible_tag_attrs, invisible_tag_attrs, self._html_escape(before_display_url), self._html_escape(display_url_sans_ellipses), invisible_tag_attrs, self._html_escape(after_display_url), invisible_tag_attrs, following_ellipsis)
348348
else:
349349
return self._html_escape(display_url)
350350

@@ -356,13 +356,13 @@ def _link_to_hashtag(self, entity, chars, options = {}):
356356
if REGEXEN['rtl_chars'].search(hashtag):
357357
hashtag_class += ' rtl'
358358

359-
href = options.get('hashtag_url_transform', lambda ht: u'%s%s' % (options.get('hashtag_url_base'), ht))(hashtag)
359+
href = options.get('hashtag_url_transform', lambda ht: '%s%s' % (options.get('hashtag_url_base'), ht))(hashtag)
360360

361361
html_attrs = {}
362362
html_attrs.update(options.get('html_attrs', {}))
363363
html_attrs = {
364364
'class': hashtag_class,
365-
'title': u'#%s' % hashtag,
365+
'title': '#%s' % hashtag,
366366
}
367367

368368
link = self._link_to_text_with_symbol(entity, hashchar, hashtag, href, html_attrs, options)
@@ -372,19 +372,19 @@ def _link_to_cashtag(self, entity, chars, options = {}):
372372
dollar = chars[entity['indices'][0]]
373373
cashtag = entity['cashtag']
374374

375-
href = options.get('cashtag_url_transform', lambda ct: u'%s%s' % (options.get('cashtag_url_base'), ct))(cashtag)
375+
href = options.get('cashtag_url_transform', lambda ct: '%s%s' % (options.get('cashtag_url_base'), ct))(cashtag)
376376

377377
html_attrs = {
378378
'class': options.get('cashtag_class'),
379-
'title': u'$%s' % cashtag
379+
'title': '$%s' % cashtag
380380
}
381381
html_attrs.update(options.get('html_attrs', {}))
382382

383383
link = self._link_to_text_with_symbol(entity, dollar, cashtag, href, html_attrs, options)
384384
return chars[:entity['indices'][0]] + link + chars[entity['indices'][1]:]
385385

386386
def _link_to_screen_name(self, entity, chars, options = {}):
387-
name = u'%s%s' % (entity['screen_name'], entity.get('list_slug') or '')
387+
name = '%s%s' % (entity['screen_name'], entity.get('list_slug') or '')
388388
chunk = options.get('link_text_transform', default_transform)(entity, name)
389389
name = name.lower()
390390

@@ -395,30 +395,30 @@ def _link_to_screen_name(self, entity, chars, options = {}):
395395
del(html_attrs['title'])
396396

397397
if entity.get('list_slug') and not options.get('supress_lists'):
398-
href = options.get('list_url_transform', lambda sn: u'%s%s' % (options.get('list_url_base'), sn))(name)
398+
href = options.get('list_url_transform', lambda sn: '%s%s' % (options.get('list_url_base'), sn))(name)
399399
html_attrs['class'] = options.get('list_class')
400400
else:
401-
href = options.get('username_url_transform', lambda sn: u'%s%s' % (options.get('username_url_base'), sn))(name)
401+
href = options.get('username_url_transform', lambda sn: '%s%s' % (options.get('username_url_base'), sn))(name)
402402
html_attrs['class'] = options.get('username_class')
403403

404404
link = self._link_to_text_with_symbol(entity, at, chunk, href, html_attrs, options)
405405
return chars[:entity['indices'][0]] + link + chars[entity['indices'][1]:]
406406

407407
def _link_to_text_with_symbol(self, entity, symbol, text, href, attributes = {}, options = {}):
408-
tagged_symbol = u'<%s>%s</%s>' % (options.get('symbol_tag'), symbol, options.get('symbol_tag')) if options.get('symbol_tag') else symbol
408+
tagged_symbol = '<%s>%s</%s>' % (options.get('symbol_tag'), symbol, options.get('symbol_tag')) if options.get('symbol_tag') else symbol
409409
text = self._html_escape(text)
410-
tagged_text = u'<%s>%s</%s>' % (options.get('text_with_symbol_tag'), text, options.get('text_with_symbol_tag')) if options.get('text_with_symbol_tag') else text
410+
tagged_text = '<%s>%s</%s>' % (options.get('text_with_symbol_tag'), text, options.get('text_with_symbol_tag')) if options.get('text_with_symbol_tag') else text
411411
if options.get('username_include_symbol') or not REGEXEN['at_signs'].match(symbol):
412-
return u'%s' % self._link_to_text(entity, tagged_symbol + tagged_text, href, attributes, options)
412+
return '%s' % self._link_to_text(entity, tagged_symbol + tagged_text, href, attributes, options)
413413
else:
414-
return u'%s%s' % (tagged_symbol, self._link_to_text(entity, tagged_text, href, attributes, options))
414+
return '%s%s' % (tagged_symbol, self._link_to_text(entity, tagged_text, href, attributes, options))
415415

416416
def _link_to_text(self, entity, text, href, attributes = {}, options = {}):
417417
attributes['href'] = href
418-
if options.get('link_attributes_transform'):
419-
attributes = options.get('link_attributes_transform')(entity, attributes)
418+
if options.get('link_attribute_transform'):
419+
attributes = options.get('link_attribute_transform')(entity, attributes)
420420
text = options.get('link_text_transform', default_transform)(entity, text)
421-
return u'<a %s>%s</a>' % (self._tag_attrs(attributes), text)
421+
return '<a %s>%s</a>' % (self._tag_attrs(attributes), text)
422422

423423
def _tag_attrs(self, attributes = {}):
424424
attrs = []
@@ -428,7 +428,7 @@ def _tag_attrs(self, attributes = {}):
428428
attrs.append(key)
429429
continue
430430
if type(value) == list:
431-
value = u' '.join(value)
432-
attrs.append(u'%s="%s"' % (self._html_escape(key), self._html_escape(value)))
431+
value = ' '.join(value)
432+
attrs.append('%s="%s"' % (self._html_escape(key), self._html_escape(value)))
433433

434-
return u' '.join(attrs)
434+
return ' '.join(attrs)

twitter_text/highlighter.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
# encoding=utf-8
2-
2+
from __future__ import unicode_literals
33
import re
4-
from HTMLParser import HTMLParser
4+
from six.moves import html_parser
55

66
from twitter_text.regex import UNICODE_SPACES
77
from twitter_text.unicode import force_unicode
88

99
DEFAULT_HIGHLIGHT_TAG = 'em'
1010

1111
# from http://stackoverflow.com/questions/753052/strip-html-from-strings-in-python
12-
class MLStripper(HTMLParser):
12+
class MLStripper(html_parser.HTMLParser):
1313
def __init__(self):
1414
self.reset()
1515
self.fed = []
@@ -34,14 +34,14 @@ def hit_highlight(self, hits = [], **kwargs):
3434

3535
if not hits and kwargs.get('query'):
3636
stripped_text = strip_tags(self.text)
37-
for match in re.finditer(ur'%s' % kwargs.get('query'), stripped_text):
37+
for match in re.finditer(r'%s' % kwargs.get('query'), stripped_text):
3838
hits.append(match.span())
3939

4040
if hits and not type(hits) == list:
4141
raise Exception('The syntax for the hit_highlight method has changed. You must pass in a list of lists containing the indices of the strings you want to match.')
4242

4343
tag_name = kwargs.get('tag', DEFAULT_HIGHLIGHT_TAG)
44-
tags = [u'<%s>' % tag_name, u'</%s>' % tag_name]
44+
tags = ['<%s>' % tag_name, '</%s>' % tag_name]
4545

4646
text = self.text
4747
chunks = re.split(r'[<>]', text)
@@ -58,7 +58,7 @@ def hit_highlight(self, hits = [], **kwargs):
5858
if index % 2:
5959
# we're inside a <tag>
6060
continue
61-
chunk_start = len(u''.join(text_chunks[0:index / 2]))
61+
chunk_start = len(''.join(text_chunks[0:index / 2]))
6262
chunk_end = chunk_start + len(chunk)
6363
if hit_start >= chunk_start and hit_start < chunk_end:
6464
chunk = chunk[:hit_start - chunk_start] + tags[0] + chunk[hit_start - chunk_start:]
@@ -76,8 +76,8 @@ def hit_highlight(self, hits = [], **kwargs):
7676
for index, chunk in enumerate(chunks):
7777
if index % 2:
7878
# we're inside a <tag>
79-
result.append(u'<%s>' % chunk)
79+
result.append('<%s>' % chunk)
8080
else:
8181
result.append(chunk)
82-
self.text = u''.join(result)
83-
return self.text
82+
self.text = ''.join(result)
83+
return self.text

0 commit comments

Comments
 (0)