Skip to content

Commit

Permalink
[script.module.unidecode] 1.3.6 (#2494)
Browse files Browse the repository at this point in the history
  • Loading branch information
L2501 authored Aug 18, 2023
1 parent d332aa9 commit cce14b0
Show file tree
Hide file tree
Showing 135 changed files with 5,719 additions and 5,484 deletions.
33 changes: 16 additions & 17 deletions script.module.unidecode/addon.xml
Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<addon id="script.module.unidecode" name="unidecode" version="1.1.1+matrix.2" provider-name="Tomaz Solc ([email protected])">
<requires>
<import addon="xbmc.python" version="3.0.0"/>
</requires>
<extension point="xbmc.python.module" library="lib" />
<extension point="xbmc.addon.metadata">
<platform>all</platform>
<summary lang="en_GB">ASCII transliterations of Unicode text by Sean M. Burke and Tomaz Solc</summary>
<description lang="en_GB">ASCII transliterations of Unicode text by Sean M. Burke and Tomaz Solc</description>
<disclaimer lang="en_GB">Code taken from https://pypi.org/project/Unidecode</disclaimer>
<license>GPL-2.0-or-later</license>
<website>https://pypi.org/project/Unidecode</website>
<source>https://pypi.org/project/Unidecode</source>
<assets>
<icon>icon.png</icon>
</assets>
</extension>
<addon id="script.module.unidecode" name="Unidecode" version="1.3.6" provider-name="Tomaz Solc">
<requires>
<import addon="xbmc.python" version="3.0.0"/>
</requires>
<extension point="xbmc.python.module" library="lib" />
<extension point="xbmc.addon.metadata">
<summary lang="en_GB">ASCII transliterations of Unicode text</summary>
<description lang="en_GB">Unidecode, lossy ASCII transliterations of Unicode text</description>
<license>GPL-2.0-or-later</license>
<platform>all</platform>
<website>https://pypi.org/project/Unidecode</website>
<source>https://pypi.org/project/Unidecode</source>
<assets>
<icon>resources/icon.png</icon>
</assets>
</extension>
</addon>
137 changes: 86 additions & 51 deletions script.module.unidecode/lib/unidecode/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,101 +3,136 @@
"""Transliterate Unicode text into plain 7-bit ASCII.
Example usage:
>>> from unidecode import unidecode
>>> unidecode(u"\u5317\u4EB0")
>>> unidecode("\u5317\u4EB0")
"Bei Jing "
The transliteration uses a straightforward map, and doesn't have alternatives
for the same character based on language, position, or anything else.
In Python 3, a standard string object will be returned. If you need bytes, use:
A standard string object will be returned. If you need bytes, use:
>>> unidecode("Κνωσός").encode("ascii")
b'Knosos'
"""
import warnings
from sys import version_info
from typing import Dict, Optional, Sequence

Cache = {}
Cache = {} # type: Dict[int, Optional[Sequence[Optional[str]]]]

class UnidecodeError(ValueError):
def __init__(self, message: str, index: Optional[int] = None) -> None:
"""Raised for Unidecode-related errors.
def _warn_if_not_unicode(string):
if version_info[0] < 3 and not isinstance(string, unicode):
warnings.warn( "Argument %r is not an unicode object. "
"Passing an encoded string will likely have "
"unexpected results." % (type(string),),
RuntimeWarning, 2)
The index attribute contains the index of the character that caused
the error.
"""
super(UnidecodeError, self).__init__(message)
self.index = index


def unidecode_expect_ascii(string):
def unidecode_expect_ascii(string: str, errors: str = 'ignore', replace_str: str = '?') -> str:
"""Transliterate an Unicode object into an ASCII string
>>> unidecode(u"\u5317\u4EB0")
>>> unidecode("\u5317\u4EB0")
"Bei Jing "
This function first tries to convert the string using ASCII codec.
If it fails (because of non-ASCII characters), it falls back to
transliteration using the character tables.
This is approx. five times faster if the string only contains ASCII
characters, but slightly slower than using unidecode directly if non-ASCII
chars are present.
characters, but slightly slower than unicode_expect_nonascii if
non-ASCII characters are present.
errors specifies what to do with characters that have not been
found in replacement tables. The default is 'ignore' which ignores
the character. 'strict' raises an UnidecodeError. 'replace'
substitutes the character with replace_str (default is '?').
'preserve' keeps the original character.
Note that if 'preserve' is used the returned string might not be
ASCII!
"""

_warn_if_not_unicode(string)
try:
bytestring = string.encode('ASCII')
except UnicodeEncodeError:
return _unidecode(string)
if version_info[0] >= 3:
return string
pass
else:
return bytestring
return string

def unidecode_expect_nonascii(string):
return _unidecode(string, errors, replace_str)

def unidecode_expect_nonascii(string: str, errors: str = 'ignore', replace_str: str = '?') -> str:
"""Transliterate an Unicode object into an ASCII string
>>> unidecode(u"\u5317\u4EB0")
>>> unidecode("\u5317\u4EB0")
"Bei Jing "
See unidecode_expect_ascii.
"""

_warn_if_not_unicode(string)
return _unidecode(string)
return _unidecode(string, errors, replace_str)

unidecode = unidecode_expect_ascii

def _unidecode(string):
retval = []
def _get_repl_str(char: str) -> Optional[str]:
codepoint = ord(char)

for char in string:
codepoint = ord(char)
if codepoint < 0x80:
# Already ASCII
return str(char)

if codepoint < 0x80: # Basic ASCII
retval.append(str(char))
continue

if codepoint > 0xeffff:
continue # Characters in Private Use Area and above are ignored
if codepoint > 0xeffff:
# No data on characters in Private Use Area and above.
return None

if 0xd800 <= codepoint <= 0xdfff:
warnings.warn( "Surrogate character %r will be ignored. "
"You might be using a narrow Python build." % (char,),
RuntimeWarning, 2)
if 0xd800 <= codepoint <= 0xdfff:
warnings.warn( "Surrogate character %r will be ignored. "
"You might be using a narrow Python build." % (char,),
RuntimeWarning, 2)

section = codepoint >> 8 # Chop off the last two hex digits
position = codepoint % 256 # Last two hex digits
section = codepoint >> 8 # Chop off the last two hex digits
position = codepoint % 256 # Last two hex digits

try:
table = Cache[section]
except KeyError:
try:
table = Cache[section]
except KeyError:
try:
mod = __import__('unidecode.x%03x'%(section), globals(), locals(), ['data'])
except ImportError:
Cache[section] = None
continue # No match: ignore this character and carry on.

Cache[section] = table = mod.data

if table and len(table) > position:
retval.append( table[position] )
mod = __import__('unidecode.x%03x'%(section), globals(), locals(), ['data'])
except ImportError:
# No data on this character
Cache[section] = None
return None

Cache[section] = table = mod.data

if table and len(table) > position:
return table[position]
else:
return None

def _unidecode(string: str, errors: str, replace_str:str) -> str:
retval = []

for index, char in enumerate(string):
repl = _get_repl_str(char)

if repl is None:
if errors == 'ignore':
repl = ''
elif errors == 'strict':
raise UnidecodeError('no replacement found for character %r '
'in position %d' % (char, index), index)
elif errors == 'replace':
repl = replace_str
elif errors == 'preserve':
repl = char
else:
raise UnidecodeError('invalid value for errors parameter %r' % (errors,))

retval.append(repl)

return ''.join(retval)
3 changes: 0 additions & 3 deletions script.module.unidecode/lib/unidecode/__main__.py

This file was deleted.

58 changes: 0 additions & 58 deletions script.module.unidecode/lib/unidecode/util.py

This file was deleted.

6 changes: 3 additions & 3 deletions script.module.unidecode/lib/unidecode/x000.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,9 @@
'1', # 0xb9
'o', # 0xba
'>>', # 0xbb
' 1/4 ', # 0xbc
' 1/2 ', # 0xbd
' 3/4 ', # 0xbe
' 1/4', # 0xbc
' 1/2', # 0xbd
' 3/4', # 0xbe
'?', # 0xbf
'A', # 0xc0
'A', # 0xc1
Expand Down
36 changes: 18 additions & 18 deletions script.module.unidecode/lib/unidecode/x002.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@
'T', # 0x3e
's', # 0x3f
'z', # 0x40
'[?]', # 0x41
'[?]', # 0x42
None, # 0x41
None, # 0x42
'B', # 0x43
'U', # 0x44
'^', # 0x45
Expand Down Expand Up @@ -238,20 +238,20 @@
'V', # 0xec
'=', # 0xed
'"', # 0xee
'[?]', # 0xef
'[?]', # 0xf0
'[?]', # 0xf1
'[?]', # 0xf2
'[?]', # 0xf3
'[?]', # 0xf4
'[?]', # 0xf5
'[?]', # 0xf6
'[?]', # 0xf7
'[?]', # 0xf8
'[?]', # 0xf9
'[?]', # 0xfa
'[?]', # 0xfb
'[?]', # 0xfc
'[?]', # 0xfd
'[?]', # 0xfe
None, # 0xef
None, # 0xf0
None, # 0xf1
None, # 0xf2
None, # 0xf3
None, # 0xf4
None, # 0xf5
None, # 0xf6
None, # 0xf7
None, # 0xf8
None, # 0xf9
None, # 0xfa
None, # 0xfb
None, # 0xfc
None, # 0xfd
None, # 0xfe
)
Loading

0 comments on commit cce14b0

Please sign in to comment.