From cb3846d52ed375a5a166b906046cac5c15d08818 Mon Sep 17 00:00:00 2001
From: luoliyan <joseph@lorimer.me>
Date: Mon, 29 Mar 2021 11:43:17 +0930
Subject: [PATCH] Update gTTS; fix failing unit tests

---
 Makefile                                      |   2 +-
 chinese/_version.py                           |   2 +-
 chinese/about.py                              |   1 +
 chinese/config.json                           |   4 +-
 chinese/gui.py                                |   4 +-
 chinese/lib/gtts/lang.py                      | 151 ++++------
 chinese/lib/gtts/langs.py                     |  64 +++++
 chinese/lib/gtts/tests/__init__.py            |   0
 .../tests/input_files/test_cli_test_ascii.txt |   2 +
 .../tests/input_files/test_cli_test_utf8.txt  |   5 +
 chinese/lib/gtts/tests/test_cli.py            | 264 ++++++++++++++++++
 chinese/lib/gtts/tests/test_lang.py           |  23 ++
 chinese/lib/gtts/tests/test_tts.py            | 181 ++++++++++++
 chinese/lib/gtts/tests/test_utils.py          |  62 ++++
 chinese/lib/gtts/tokenizer/tests/test_core.py |  73 +++++
 .../tokenizer/tests/test_pre_processors.py    |  30 ++
 .../tokenizer/tests/test_tokenizer_cases.py   |  44 +++
 chinese/lib/gtts/tts.py                       |  55 ++--
 chinese/lib/gtts/version.py                   |   2 +-
 chinese/tts.py                                |   4 +-
 tests/__init__.py                             |   1 +
 21 files changed, 840 insertions(+), 134 deletions(-)
 create mode 100644 chinese/lib/gtts/langs.py
 create mode 100644 chinese/lib/gtts/tests/__init__.py
 create mode 100644 chinese/lib/gtts/tests/input_files/test_cli_test_ascii.txt
 create mode 100644 chinese/lib/gtts/tests/input_files/test_cli_test_utf8.txt
 create mode 100644 chinese/lib/gtts/tests/test_cli.py
 create mode 100644 chinese/lib/gtts/tests/test_lang.py
 create mode 100644 chinese/lib/gtts/tests/test_tts.py
 create mode 100644 chinese/lib/gtts/tests/test_utils.py
 create mode 100644 chinese/lib/gtts/tokenizer/tests/test_core.py
 create mode 100644 chinese/lib/gtts/tokenizer/tests/test_pre_processors.py
 create mode 100644 chinese/lib/gtts/tokenizer/tests/test_tokenizer_cases.py

diff --git a/Makefile b/Makefile
index 7a3d8a1..a8f8271 100644
--- a/Makefile
+++ b/Makefile
@@ -15,7 +15,7 @@
 
 PROJECT_SHORT = chinese
 PROJECT_LONG = chinese-support-redux
-VERSION = 0.14.0
+VERSION = 0.14.2
 XDG_DATA_HOME ?= $(HOME)/.local/share
 ADDON_PATH = "$(XDG_DATA_HOME)/Anki2/addons21/$(PROJECT_LONG)"
 ZIP_NAME = $(PROJECT_LONG)-v$(VERSION).zip
diff --git a/chinese/_version.py b/chinese/_version.py
index ef91994..c41af0b 100644
--- a/chinese/_version.py
+++ b/chinese/_version.py
@@ -1 +1 @@
-__version__ = '0.14.0'
+__version__ = '0.14.2'
diff --git a/chinese/about.py b/chinese/about.py
index 2520994..aa198b3 100644
--- a/chinese/about.py
+++ b/chinese/about.py
@@ -33,6 +33,7 @@ def showAbout():
     contributors = [
         'Alex Griffin',
         'Chris Hatch',
+        'Joe Minicucci',
         'Roland Sieker',
         'Thomas TEMPÉ',
     ]
diff --git a/chinese/config.json b/chinese/config.json
index 726ea93..5147a67 100644
--- a/chinese/config.json
+++ b/chinese/config.json
@@ -1,8 +1,8 @@
 {
     "firstRun": true,
-    "version": "0.14.0",
+    "version": "0.14.2",
     "enabledModels": [],
-    "speech": "google|zh-cn",
+    "speech": "google|zh-CN",
     "target": "pinyin",
     "max_examples": -1,
     "fields": {
diff --git a/chinese/gui.py b/chinese/gui.py
index 775c4a0..64faea3 100644
--- a/chinese/gui.py
+++ b/chinese/gui.py
@@ -41,8 +41,8 @@
 
 SPEECH_ENGINES = {
     'Baidu Translate': 'baidu|zh',
-    'Google Mandarin (PRC)': 'google|zh-cn',
-    'Google Mandarin (Taiwan)': 'google|zh-tw',
+    'Google Mandarin (PRC)': 'google|zh-CN',
+    'Google Mandarin (Taiwan)': 'google|zh-TW',
     'Amazon Polly' : 'aws|Zhiyu',
     'Disabled': None,
 }
diff --git a/chinese/lib/gtts/lang.py b/chinese/lib/gtts/lang.py
index 089c84c..fbb5e1f 100644
--- a/chinese/lib/gtts/lang.py
+++ b/chinese/lib/gtts/lang.py
@@ -1,4 +1,6 @@
 # -*- coding: utf-8 -*-
+from gtts.langs import _main_langs
+from warnings import warn
 import logging
 
 __all__ = ['tts_langs']
@@ -14,13 +16,13 @@ def tts_langs():
     Returns:
         dict: A dictionary of the type `{ '<lang>': '<name>'}`
 
-            Where `<lang>` is an IETF language tag such as `en` or `pt-br`,
+            Where `<lang>` is an IETF language tag such as `en` or `zh-TW`,
             and `<name>` is the full English name of the language, such as
-            `English` or `Portuguese (Brazil)`.
+            `English` or `Chinese (Mandarin/Taiwan)`.
 
     The dictionary returned combines languages from two origins:
 
-    - Languages fetched from Google Translate
+    - Languages fetched from Google Translate (pre-generated in :mod:`gtts.langs`)
     - Languages that are undocumented variations that were observed to work and
       present different dialects or accents.
 
@@ -32,112 +34,65 @@ def tts_langs():
     return langs
 
 
-def _main_langs():
-    """Define the main languages.
+def _extra_langs():
+    """Define extra languages.
 
     Returns:
-        dict: A dictionnary of the main languages extracted from
-            Google Translate.
+        dict: A dictionnary of extra languages manually defined.
+
+            Variations of the ones generated in `_main_langs`,
+            observed to provide different dialects or accents or
+            just simply accepted by the Google Translate Text-to-Speech API.
 
     """
     return {
-        'af': 'Afrikaans',
-        'ar': 'Arabic',
-        'bn': 'Bengali',
-        'bs': 'Bosnian',
-        'ca': 'Catalan',
-        'cs': 'Czech',
-        'cy': 'Welsh',
-        'da': 'Danish',
-        'de': 'German',
-        'el': 'Greek',
-        'en': 'English',
-        'eo': 'Esperanto',
-        'es': 'Spanish',
-        'et': 'Estonian',
-        'fi': 'Finnish',
-        'fr': 'French',
-        'gu': 'Gujarati',
-        'hi': 'Hindi',
-        'hr': 'Croatian',
-        'hu': 'Hungarian',
-        'hy': 'Armenian',
-        'id': 'Indonesian',
-        'is': 'Icelandic',
-        'it': 'Italian',
-        'ja': 'Japanese',
-        'jw': 'Javanese',
-        'km': 'Khmer',
-        'kn': 'Kannada',
-        'ko': 'Korean',
-        'la': 'Latin',
-        'lv': 'Latvian',
-        'mk': 'Macedonian',
-        'ml': 'Malayalam',
-        'mr': 'Marathi',
-        'my': 'Myanmar (Burmese)',
-        'ne': 'Nepali',
-        'nl': 'Dutch',
-        'no': 'Norwegian',
-        'pl': 'Polish',
-        'pt': 'Portuguese',
-        'ro': 'Romanian',
-        'ru': 'Russian',
-        'si': 'Sinhala',
-        'sk': 'Slovak',
-        'sq': 'Albanian',
-        'sr': 'Serbian',
-        'su': 'Sundanese',
-        'sv': 'Swedish',
-        'sw': 'Swahili',
-        'ta': 'Tamil',
-        'te': 'Telugu',
-        'th': 'Thai',
-        'tl': 'Filipino',
-        'tr': 'Turkish',
-        'uk': 'Ukrainian',
-        'ur': 'Urdu',
-        'vi': 'Vietnamese',
-        'zh-CN': 'Chinese'
+        # Chinese
+        'zh-TW': 'Chinese (Mandarin/Taiwan)',
+        'zh': 'Chinese (Mandarin)'
     }
 
 
-def _extra_langs():
-    """Define extra languages.
+def _fallback_deprecated_lang(lang):
+    """Languages Google Text-to-Speech used to support.
+
+    Language tags that don't work anymore, but that can
+    fallback to a more general language code to maintain
+    compatibility.
+
+    Args:
+        lang (string): The language tag.
 
     Returns:
-        dict: A dictionnary of extra languages manually defined.
+        string: The language tag, as-is if not deprecated,
+            or a fallack if it exits.
 
-            Variations of the ones fetched by `_main_langs`,
-            observed to provide different dialects or accents or
-            just simply accepted by the Google Translate Text-to-Speech API.
+    Example:
+        ``en-GB`` returns ``en``.
+        ``en-gb`` returns ``en``.
 
     """
-    return {
-        # Chinese
-        'zh-cn': 'Chinese (Mandarin/China)',
-        'zh-tw': 'Chinese (Mandarin/Taiwan)',
-        # English
-        'en-us': 'English (US)',
-        'en-ca': 'English (Canada)',
-        'en-uk': 'English (UK)',
-        'en-gb': 'English (UK)',
-        'en-au': 'English (Australia)',
-        'en-gh': 'English (Ghana)',
-        'en-in': 'English (India)',
-        'en-ie': 'English (Ireland)',
-        'en-nz': 'English (New Zealand)',
-        'en-ng': 'English (Nigeria)',
-        'en-ph': 'English (Philippines)',
-        'en-za': 'English (South Africa)',
-        'en-tz': 'English (Tanzania)',
-        # French
-        'fr-ca': 'French (Canada)',
-        'fr-fr': 'French (France)',
-        # Portuguese
-        'pt-br': 'Portuguese (Brazil)',
-        'pt-pt': 'Portuguese (Portugal)',
-        # Spanish
-        'es-es': 'Spanish (Spain)',
-        'es-us': 'Spanish (United States)'
+
+    deprecated = {
+        # '<fallback>': [<list of deprecated langs>]
+        'en': ['en-us', 'en-ca', 'en-uk', 'en-gb', 'en-au', 'en-gh', 'en-in',
+               'en-ie', 'en-nz', 'en-ng', 'en-ph', 'en-za', 'en-tz'],
+        'fr': ['fr-ca', 'fr-fr'],
+        'pt': ['pt-br', 'pt-pt'],
+        'es': ['es-es', 'es-us'],
+        'zh-CN': ['zh-cn'],
+        'zh-TW': ['zh-tw'],
     }
+
+    for fallback_lang, deprecated_langs in deprecated.items():
+        if lang.lower() in deprecated_langs:
+            msg = (
+                "'{}' has been deprecated, falling back to '{}'. "
+                "This fallback will be removed in a future version."
+            ).format(lang, fallback_lang)
+
+            warn(msg, DeprecationWarning)
+            log.warning(msg)
+
+            return fallback_lang
+
+    return lang
\ No newline at end of file
diff --git a/chinese/lib/gtts/langs.py b/chinese/lib/gtts/langs.py
new file mode 100644
index 0000000..449f039
--- /dev/null
+++ b/chinese/lib/gtts/langs.py
@@ -0,0 +1,64 @@
+# Note: this file is generated
+_langs = {
+    "af": "Afrikaans",
+    "ar": "Arabic",
+    "bn": "Bengali",
+    "bs": "Bosnian",
+    "ca": "Catalan",
+    "cs": "Czech",
+    "cy": "Welsh",
+    "da": "Danish",
+    "de": "German",
+    "el": "Greek",
+    "en": "English",
+    "eo": "Esperanto",
+    "es": "Spanish",
+    "et": "Estonian",
+    "fi": "Finnish",
+    "fr": "French",
+    "gu": "Gujarati",
+    "hi": "Hindi",
+    "hr": "Croatian",
+    "hu": "Hungarian",
+    "hy": "Armenian",
+    "id": "Indonesian",
+    "is": "Icelandic",
+    "it": "Italian",
+    "ja": "Japanese",
+    "jw": "Javanese",
+    "km": "Khmer",
+    "kn": "Kannada",
+    "ko": "Korean",
+    "la": "Latin",
+    "lv": "Latvian",
+    "mk": "Macedonian",
+    "ml": "Malayalam",
+    "mr": "Marathi",
+    "my": "Myanmar (Burmese)",
+    "ne": "Nepali",
+    "nl": "Dutch",
+    "no": "Norwegian",
+    "pl": "Polish",
+    "pt": "Portuguese",
+    "ro": "Romanian",
+    "ru": "Russian",
+    "si": "Sinhala",
+    "sk": "Slovak",
+    "sq": "Albanian",
+    "sr": "Serbian",
+    "su": "Sundanese",
+    "sv": "Swedish",
+    "sw": "Swahili",
+    "ta": "Tamil",
+    "te": "Telugu",
+    "th": "Thai",
+    "tl": "Filipino",
+    "tr": "Turkish",
+    "uk": "Ukrainian",
+    "ur": "Urdu",
+    "vi": "Vietnamese",
+    "zh-CN": "Chinese"
+}
+
+def _main_langs():
+    return _langs
diff --git a/chinese/lib/gtts/tests/__init__.py b/chinese/lib/gtts/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/chinese/lib/gtts/tests/input_files/test_cli_test_ascii.txt b/chinese/lib/gtts/tests/input_files/test_cli_test_ascii.txt
new file mode 100644
index 0000000..ce0019b
--- /dev/null
+++ b/chinese/lib/gtts/tests/input_files/test_cli_test_ascii.txt
@@ -0,0 +1,2 @@
+Can you make pink a little more pinkish can you make pink a little more pinkish, nor can you make the font bigger?
+How much will it cost the website doesn't have the theme i was going for.
\ No newline at end of file
diff --git a/chinese/lib/gtts/tests/input_files/test_cli_test_utf8.txt b/chinese/lib/gtts/tests/input_files/test_cli_test_utf8.txt
new file mode 100644
index 0000000..5bde1bc
--- /dev/null
+++ b/chinese/lib/gtts/tests/input_files/test_cli_test_utf8.txt
@@ -0,0 +1,5 @@
+这是一个三岁的小孩
+在讲述她从一系列照片里看到的东西。
+对这个世界， 她也许还有很多要学的东西，
+但在一个重要的任务上， 她已经是专家了：
+去理解她所看到的东西。
diff --git a/chinese/lib/gtts/tests/test_cli.py b/chinese/lib/gtts/tests/test_cli.py
new file mode 100644
index 0000000..b801bda
--- /dev/null
+++ b/chinese/lib/gtts/tests/test_cli.py
@@ -0,0 +1,264 @@
+# -*- coding: utf-8 -*-
+import pytest
+import re
+import os
+from click.testing import CliRunner
+from gtts.cli import tts_cli
+
+# Need to look into gTTS' log output to test proper instantiation
+# - Use testfixtures.LogCapture() b/c TestCase.assertLogs() needs py3.4+
+# - Clear 'gtts' logger handlers (set in gtts.cli) to reduce test noise
+import logging
+from testfixtures import LogCapture
+logger = logging.getLogger('gtts')
+logger.handlers = []
+
+
+"""Test options and arguments"""
+
+
+def runner(args, input=None):
+    return CliRunner().invoke(tts_cli, args, input)
+
+
+def runner_debug(args, input=None):
+    return CliRunner().invoke(tts_cli, args + ['--debug'], input)
+
+
+# <text> tests
+def test_text_no_text_or_file():
+    """One of <test> (arg) and <file> <opt> should be set"""
+    result = runner_debug([])
+
+    assert "<file> required" in result.output
+    assert result.exit_code != 0
+
+
+def test_text_text_and_file(tmp_path):
+    """<test> (arg) and <file> <opt> should not be set together"""
+    filename = tmp_path / 'test_and_file.txt'
+    filename.touch()
+
+    result = runner_debug(['--file', str(filename), 'test'])
+
+    assert "<file> can't be used together" in result.output
+    assert result.exit_code != 0
+
+
+def test_text_empty(tmp_path):
+    """Exit on no text to speak (via <file>)"""
+    filename = tmp_path / 'text_empty.txt'
+    filename.touch()
+
+    result = runner_debug(['--file', str(filename)])
+
+    assert "No text to speak" in result.output
+    assert result.exit_code != 0
+
+
+# <file> tests
+def test_file_not_exists():
+    """<file> should exist"""
+    result = runner_debug(['--file', 'notexist.txt', 'test'])
+
+    assert "No such file or directory" in result.output
+    assert result.exit_code != 0
+
+
+# <all> tests
+@pytest.mark.net
+def test_all():
+    """Option <all> should return a list of languages"""
+    result = runner(['--all'])
+
+    # One or more of "  xy: name" (\n optional to match the last)
+    # Ex. "<start>  xx: xxxxx\n  xx-yy: xxxxx\n  xx: xxxxx<end>"
+
+    assert re.match(r"^(?:\s{2}(\w{2}|\w{2}-\w{2}): .+\n?)+$", result.output)
+    assert result.exit_code == 0
+
+
+# <lang> tests
+@pytest.mark.net
+def test_lang_not_valid():
+    """Invalid <lang> should display an error"""
+    result = runner(['--lang', 'xx', 'test'])
+
+    assert "xx' not in list of supported languages" in result.output
+    assert result.exit_code != 0
+
+
+@pytest.mark.net
+def test_lang_nocheck():
+    """Invalid <lang> (with <nocheck>) should display an error message from gtts"""
+    with LogCapture() as lc:
+        result = runner_debug(['--lang', 'xx', '--nocheck', 'test'])
+
+        log = str(lc)
+
+    assert 'lang: xx' in log
+    assert 'lang_check: False' in log
+    assert "Unsupported language 'xx'" in result.output
+    assert result.exit_code != 0
+
+# Param set tests
+@pytest.mark.net
+def test_params_set():
+    """Options should set gTTS instance arguments (read from debug log)"""
+    with LogCapture() as lc:
+        result = runner_debug(['--lang', 'fr', '--tld', 'es', '--slow', '--nocheck', 'test'])
+
+        log = str(lc)
+
+    assert 'lang: fr' in log
+    assert 'tld: es' in log
+    assert 'lang_check: False' in log
+    assert 'slow: True' in log
+    assert 'text: test' in log
+    assert result.exit_code == 0
+
+
+# Test all input methods
+pwd = os.path.dirname(__file__)
+
+# Text for stdin ('-' for <text> or <file>)
+textstdin = """stdin
+test
+123"""
+
+# Text for stdin ('-' for <text> or <file>) (Unicode)
+textstdin_unicode = u"""你吃饭了吗？
+你最喜欢哪部电影？
+我饿了，我要去做饭了。"""
+
+# Text for <text> and <file>
+text = """Can you make pink a little more pinkish can you make pink a little more pinkish, nor can you make the font bigger?
+How much will it cost the website doesn't have the theme i was going for."""
+
+textfile_ascii = os.path.join(pwd, 'input_files', 'test_cli_test_ascii.txt')
+
+# Text for <text> and <file> (Unicode)
+text_unicode = u"""这是一个三岁的小孩
+在讲述她从一系列照片里看到的东西。
+对这个世界， 她也许还有很多要学的东西，
+但在一个重要的任务上， 她已经是专家了：
+去理解她所看到的东西。"""
+
+textfile_utf8 = os.path.join(pwd, 'input_files', 'test_cli_test_utf8.txt')
+
+"""
+Method that mimics's LogCapture's __str__ method to make
+the string in the comprehension a unicode literal for P2.7
+https://github.com/Simplistix/testfixtures/blob/32c87902cb111b7ede5a6abca9b597db551c88ef/testfixtures/logcapture.py#L149
+"""
+
+
+def logcapture_str(lc):
+    if not lc.records:
+        return 'No logging captured'
+
+    return '\n'.join([u"%s %s\n  %s" % r for r in lc.actual()])
+
+
+@pytest.mark.net
+def test_stdin_text():
+    with LogCapture() as lc:
+        result = runner_debug(['-'], textstdin)
+        log = logcapture_str(lc)
+
+    assert 'text: %s' % textstdin in log
+    assert result.exit_code == 0
+
+
+@pytest.mark.net
+def test_stdin_text_unicode():
+    with LogCapture() as lc:
+        result = runner_debug(['-'], textstdin_unicode)
+        log = logcapture_str(lc)
+
+    assert u'text: %s' % textstdin_unicode in log
+    assert result.exit_code == 0
+
+
+@pytest.mark.net
+def test_stdin_file():
+    with LogCapture() as lc:
+        result = runner_debug(['--file', '-'], textstdin)
+        log = logcapture_str(lc)
+
+    assert 'text: %s' % textstdin in log
+    assert result.exit_code == 0
+
+
+@pytest.mark.net
+def test_stdin_file_unicode():
+    with LogCapture() as lc:
+        result = runner_debug(['--file', '-'], textstdin_unicode)
+        log = logcapture_str(lc)
+
+    assert 'text: %s' % textstdin_unicode in log
+    assert result.exit_code == 0
+
+
+@pytest.mark.net
+def test_text():
+    with LogCapture() as lc:
+        result = runner_debug([text])
+        log = logcapture_str(lc)
+
+    assert "text: %s" % text in log
+    assert result.exit_code == 0
+
+
+@pytest.mark.net
+def test_text_unicode():
+    with LogCapture() as lc:
+        result = runner_debug([text_unicode])
+        log = logcapture_str(lc)
+
+    assert "text: %s" % text_unicode in log
+    assert result.exit_code == 0
+
+
+@pytest.mark.net
+def test_file_ascii():
+    with LogCapture() as lc:
+        result = runner_debug(['--file', textfile_ascii])
+        log = logcapture_str(lc)
+
+    assert "text: %s" % text in log
+    assert result.exit_code == 0
+
+
+@pytest.mark.net
+def test_file_utf8():
+    with LogCapture() as lc:
+        result = runner_debug(['--file', textfile_utf8])
+        log = logcapture_str(lc)
+
+    assert "text: %s" % text_unicode in log
+    assert result.exit_code == 0
+
+
+@pytest.mark.net
+def test_stdout():
+    result = runner(['test'])
+
+    # The MP3 encoding (LAME 3.99.5) used to leave a signature in the raw output
+    # This no longer appears to be the case
+    assert result.exit_code == 0
+
+
+@pytest.mark.net
+def test_file(tmp_path):
+    filename = tmp_path / 'out.mp3'
+
+    result = runner(['test', '--output', str(filename)])
+
+    # Check if files created is > 2k
+    assert filename.stat().st_size > 2000
+    assert result.exit_code == 0
+
+
+if __name__ == '__main__':
+    pytest.main(['-x', __file__])
diff --git a/chinese/lib/gtts/tests/test_lang.py b/chinese/lib/gtts/tests/test_lang.py
new file mode 100644
index 0000000..bedc2d6
--- /dev/null
+++ b/chinese/lib/gtts/tests/test_lang.py
@@ -0,0 +1,23 @@
+# -*- coding: utf-8 -*-
+import pytest
+from gtts.lang import tts_langs, _extra_langs, _fallback_deprecated_lang
+from gtts.langs import _main_langs
+
+"""Test language list"""
+
+
+def test_main_langs():
+    """Fetch languages successfully"""
+    # Safe to assume 'en' (English) will always be there
+    scraped_langs = _main_langs()
+    assert 'en' in scraped_langs
+
+
+def test_deprecated_lang():
+    """Test language deprecation fallback"""
+    with pytest.deprecated_call():
+        assert _fallback_deprecated_lang('en-gb') == 'en'
+
+
+if __name__ == '__main__':
+    pytest.main(['-x', __file__])
diff --git a/chinese/lib/gtts/tests/test_tts.py b/chinese/lib/gtts/tests/test_tts.py
new file mode 100644
index 0000000..d2d2849
--- /dev/null
+++ b/chinese/lib/gtts/tests/test_tts.py
@@ -0,0 +1,181 @@
+# -*- coding: utf-8 -*-
+import os
+import pytest
+from mock import Mock
+from six.moves import urllib
+
+from gtts.tts import gTTS, gTTSError
+from gtts.langs import _main_langs
+from gtts.lang import _extra_langs
+
+# Testing all languages takes some time.
+# Set TEST_LANGS envvar to choose languages to test.
+#  * 'main': Languages extracted from the Web
+#  * 'extra': Languagee set in Languages.EXTRA_LANGS
+#  * 'all': All of the above
+#  * <csv>: Languages tags list to test
+# Unset TEST_LANGS to test everything ('all')
+# See: langs_dict()
+
+
+"""Construct a dict of suites of languages to test.
+{ '<suite name>' : <list or dict of language tags> }
+
+ex.: { 'fetch' : {'en': 'English', 'fr': 'French'},
+       'extra' : {'en': 'English', 'fr': 'French'} }
+ex.: { 'environ' : ['en', 'fr'] }
+"""
+env = os.environ.get('TEST_LANGS')
+if not env or env == 'all':
+    langs = _main_langs()
+    langs.update(_extra_langs())
+elif env == 'main':
+    langs = _main_langs()
+elif env == 'extra':
+    langs = _extra_langs()
+else:
+    env_langs = {l: l for l in env.split(',') if l}
+    langs = env_langs
+
+
+@pytest.mark.net
+@pytest.mark.parametrize('lang', langs.keys(), ids=list(langs.values()))
+def test_TTS(tmp_path, lang):
+    """Test all supported languages and file save"""
+
+    text = "This is a test"
+    """Create output .mp3 file successfully"""
+    for slow in (False, True):
+        filename = tmp_path / 'test_{}_.mp3'.format(lang)
+        # Create gTTS and save
+        tts = gTTS(text=text, lang=lang, slow=slow, lang_check=False)
+        tts.save(filename)
+
+        # Check if files created is > 1.5
+        assert filename.stat().st_size > 1500
+
+
+@pytest.mark.net
+def test_unsupported_language_check():
+    """Raise ValueError on unsupported language (with language check)"""
+    lang = 'xx'
+    text = "Lorem ipsum"
+    check = True
+    with pytest.raises(ValueError):
+        gTTS(text=text, lang=lang, lang_check=check)
+
+
+def test_empty_string():
+    """Raise AssertionError on empty string"""
+    text = ""
+    with pytest.raises(AssertionError):
+        gTTS(text=text)
+
+
+def test_no_text_parts(tmp_path):
+    """Raises AssertionError on no content to send to API (no text_parts)"""
+    text = "                                                                                                          ..,\n"
+    with pytest.raises(AssertionError):
+        filename = tmp_path / 'no_content.txt'
+        tts = gTTS(text=text)
+        tts.save(filename)
+
+
+# Test write_to_fp()/save() cases not covered elsewhere in this file
+
+def test_bad_fp_type():
+    """Raise TypeError if fp is not a file-like object (no .write())"""
+    # Create gTTS and save
+    tts = gTTS(text='test')
+    with pytest.raises(TypeError):
+        tts.write_to_fp(5)
+
+
+@pytest.mark.net
+def test_save(tmp_path):
+    """Save .mp3 file successfully"""
+    filename = tmp_path / 'save.mp3'
+    # Create gTTS and save
+    tts = gTTS(text='test')
+    tts.save(filename)
+
+    # Check if file created is > 2k
+    assert filename.stat().st_size > 2000
+
+
+@pytest.mark.net
+def test_get_bodies():
+    """get request bodies list"""
+    tts = gTTS(text='test', tld='com', lang='en')
+    body = tts.get_bodies()[0]
+    assert 'test' in body
+    # \"en\" url-encoded
+    assert '%5C%22en%5C%22' in body
+
+
+def test_msg():
+    """Test gTTsError internal exception handling
+    Set exception message successfully"""
+    error1 = gTTSError('test')
+    assert 'test' == error1.msg
+
+    error2 = gTTSError()
+    assert error2.msg is None
+
+
+def test_infer_msg():
+    """Infer message sucessfully based on context"""
+
+    # Without response:
+
+    # Bad TLD
+    ttsTLD = Mock(tld='invalid')
+    errorTLD = gTTSError(tts=ttsTLD)
+    assert errorTLD.msg == "Failed to connect. Probable cause: Host 'https://translate.google.invalid/' is not reachable"
+
+    # With response:
+
+    # 403
+    tts403 = Mock()
+    response403 = Mock(status_code=403, reason='aaa')
+    error403 = gTTSError(tts=tts403, response=response403)
+    assert error403.msg == "403 (aaa) from TTS API. Probable cause: Bad token or upstream API changes"
+
+    # 200 (and not lang_check)
+    tts200 = Mock(lang='xx', lang_check=False)
+    response404 = Mock(status_code=200, reason='bbb')
+    error200 = gTTSError(tts=tts200, response=response404)
+    assert error200.msg == "200 (bbb) from TTS API. Probable cause: No audio stream in response. Unsupported language 'xx'"
+
+    # >= 500
+    tts500 = Mock()
+    response500 = Mock(status_code=500, reason='ccc')
+    error500 = gTTSError(tts=tts500, response=response500)
+    assert error500.msg == "500 (ccc) from TTS API. Probable cause: Uptream API error. Try again later."
+
+    # Unknown (ex. 100)
+    tts100 = Mock()
+    response100 = Mock(status_code=100, reason='ddd')
+    error100 = gTTSError(tts=tts100, response=response100)
+    assert error100.msg == "100 (ddd) from TTS API. Probable cause: Unknown"
+
+
+@pytest.mark.net
+def test_WebRequest(tmp_path):
+    """Test Web Requests"""
+
+    text = "Lorem ipsum"
+
+    """Raise gTTSError on unsupported language (without language check)"""
+    lang = 'xx'
+    check = False
+
+    with pytest.raises(gTTSError):
+        filename = tmp_path / 'xx.txt'
+        # Create gTTS
+        tts = gTTS(text=text, lang=lang, lang_check=check)
+        tts.save(filename)
+
+
+if __name__ == '__main__':
+    pytest.main(['-x', __file__])
diff --git a/chinese/lib/gtts/tests/test_utils.py b/chinese/lib/gtts/tests/test_utils.py
new file mode 100644
index 0000000..e41c1c5
--- /dev/null
+++ b/chinese/lib/gtts/tests/test_utils.py
@@ -0,0 +1,62 @@
+# -*- coding: utf-8 -*-
+import pytest
+from gtts.utils import _minimize, _len, _clean_tokens, _translate_url
+
+delim = ' '
+Lmax = 10
+
+
+def test_ascii():
+    _in = "Bacon ipsum dolor sit amet"
+    _out = ["Bacon", "ipsum", "dolor sit", "amet"]
+    assert _minimize(_in, delim, Lmax) == _out
+
+
+def test_ascii_no_delim():
+    _in = "Baconipsumdolorsitametflankcornedbee"
+    _out = ["Baconipsum", "dolorsitam", "etflankcor", "nedbee"]
+    assert _minimize(_in, delim, Lmax) == _out
+
+
+def test_unicode():
+    _in = u"这是一个三岁的小孩在讲述他从一系列照片里看到的东西。"
+    _out = [u"这是一个三岁的小孩在", u"讲述他从一系列照片里", u"看到的东西。"]
+    assert _minimize(_in, delim, Lmax) == _out
+
+
+def test_startwith_delim():
+    _in = delim + "test"
+    _out = ["test"]
+    assert _minimize(_in, delim, Lmax) == _out
+
+
+def test_len_ascii():
+    text = "Bacon ipsum dolor sit amet flank corned beef."
+    assert _len(text) == 45
+
+
+def test_len_unicode():
+    text = u"但在一个重要的任务上"
+    assert _len(text) == 10
+
+
+def test_only_space_and_punc():
+    _in = [",(:)?", "\t    ", "\n"]
+    _out = []
+    assert _clean_tokens(_in) == _out
+
+
+def test_strip():
+    _in = [" Bacon  ", "& ", "ipsum\r", "."]
+    _out = ["Bacon", "&", "ipsum"]
+    assert _clean_tokens(_in) == _out
+
+
+def test_translate_url():
+    _in = {"tld": "qwerty", "path": "asdf"}
+    _out = "https://translate.google.qwerty/asdf"
+    assert _translate_url(**_in) == _out
+
+
+if __name__ == '__main__':
+    pytest.main(['-x', __file__])
diff --git a/chinese/lib/gtts/tokenizer/tests/test_core.py b/chinese/lib/gtts/tokenizer/tests/test_core.py
new file mode 100644
index 0000000..8c89ecd
--- /dev/null
+++ b/chinese/lib/gtts/tokenizer/tests/test_core.py
@@ -0,0 +1,73 @@
+# -*- coding: utf-8 -*-
+import unittest
+import re
+from gtts.tokenizer.core import RegexBuilder, PreProcessorRegex, PreProcessorSub, Tokenizer
+
+# Tests based on classes usage examples
+# See class documentation for details
+
+
+class TestRegexBuilder(unittest.TestCase):
+    def test_regexbuilder(self):
+        rb = RegexBuilder('abc', lambda x: "{}".format(x))
+        self.assertEqual(rb.regex, re.compile('a|b|c'))
+
+
+class TestPreProcessorRegex(unittest.TestCase):
+    def test_preprocessorregex(self):
+        pp = PreProcessorRegex('ab', lambda x: "{}".format(x), 'c')
+        self.assertEqual(len(pp.regexes), 2)
+        self.assertEqual(pp.regexes[0].pattern, 'a')
+        self.assertEqual(pp.regexes[1].pattern, 'b')
+
+
+class TestPreProcessorSub(unittest.TestCase):
+    def test_proprocessorsub(self):
+        sub_pairs = [('Mac', 'PC'), ('Firefox', 'Chrome')]
+        pp = PreProcessorSub(sub_pairs)
+        _in = "I use firefox on my mac"
+        _out = "I use Chrome on my PC"
+        self.assertEqual(pp.run(_in), _out)
+
+
+class TestTokenizer(unittest.TestCase):
+    # tokenizer case 1
+    def case1(self):
+        return re.compile(r"\,")
+
+    # tokenizer case 2
+    def case2(self):
+        return RegexBuilder('abc', lambda x: r"{}\.".format(x)).regex
+
+    def test_tokenizer(self):
+        t = Tokenizer([self.case1, self.case2])
+        _in = "Hello, my name is Linda a. Call me Lin, b. I'm your friend"
+        _out = [
+            'Hello',
+            ' my name is Linda ',
+            ' Call me Lin',
+            ' ',
+            " I'm your friend"]
+        self.assertEqual(t.run(_in), _out)
+
+    def test_bad_params_not_list(self):
+        # original exception: TypeError
+        with self.assertRaises(TypeError):
+            Tokenizer(self.case1)
+
+    def test_bad_params_not_callable(self):
+        # original exception: TypeError
+        with self.assertRaises(TypeError):
+            Tokenizer([100])
+
+    def test_bad_params_not_callable_returning_regex(self):
+        # original exception: AttributeError
+        def not_regex():
+            return 1
+
+        with self.assertRaises(TypeError):
+            Tokenizer([not_regex])
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/chinese/lib/gtts/tokenizer/tests/test_pre_processors.py b/chinese/lib/gtts/tokenizer/tests/test_pre_processors.py
new file mode 100644
index 0000000..8c6a428
--- /dev/null
+++ b/chinese/lib/gtts/tokenizer/tests/test_pre_processors.py
@@ -0,0 +1,30 @@
+# -*- coding: utf-8 -*-
+import unittest
+from gtts.tokenizer.pre_processors import tone_marks, end_of_line, abbreviations, word_sub
+
+
+class TestPreProcessors(unittest.TestCase):
+    def test_tone_marks(self):
+        _in = "lorem!ipsum?"
+        _out = "lorem! ipsum? "
+        self.assertEqual(tone_marks(_in), _out)
+
+    def test_end_of_line(self):
+        _in = """test-
+ing"""
+        _out = "testing"
+        self.assertEqual(end_of_line(_in), _out)
+
+    def test_abbreviations(self):
+        _in = "jr. sr. dr."
+        _out = "jr sr dr"
+        self.assertEqual(abbreviations(_in), _out)
+
+    def test_word_sub(self):
+        _in = "Esq. Bacon"
+        _out = "Esquire Bacon"
+        self.assertEqual(word_sub(_in), _out)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/chinese/lib/gtts/tokenizer/tests/test_tokenizer_cases.py b/chinese/lib/gtts/tokenizer/tests/test_tokenizer_cases.py
new file mode 100644
index 0000000..13e63f2
--- /dev/null
+++ b/chinese/lib/gtts/tokenizer/tests/test_tokenizer_cases.py
@@ -0,0 +1,44 @@
+# -*- coding: utf-8 -*-
+import unittest
+from gtts.tokenizer.tokenizer_cases import tone_marks, period_comma, colon, other_punctuation, legacy_all_punctuation
+from gtts.tokenizer import Tokenizer, symbols
+
+
+class TestPreTokenizerCases(unittest.TestCase):
+    def test_tone_marks(self):
+        t = Tokenizer([tone_marks])
+        _in = "Lorem? Ipsum!"
+        _out = ['Lorem?', 'Ipsum!']
+        self.assertEqual(t.run(_in), _out)
+
+    def test_period_comma(self):
+        t = Tokenizer([period_comma])
+        _in = "Hello, it's 24.5 degrees in the U.K. today. $20,000,000."
+        _out = ['Hello', "it's 24.5 degrees in the U.K. today", '$20,000,000.']
+        self.assertEqual(t.run(_in), _out)
+
+    def test_colon(self):
+        t = Tokenizer([colon])
+        _in = "It's now 6:30 which means: morning missing:space"
+        _out = ["It's now 6:30 which means", ' morning missing', 'space']
+        self.assertEqual(t.run(_in), _out)
+
+    def test_other_punctuation(self):
+        # String of the unique 'other punctuations'
+        other_punc_str = ''.join(
+            set(symbols.ALL_PUNC) -
+            set(symbols.TONE_MARKS) -
+            set(symbols.PERIOD_COMMA) -
+            set(symbols.COLON))
+
+        t = Tokenizer([other_punctuation])
+        self.assertEqual(len(t.run(other_punc_str)) - 1, len(other_punc_str))
+
+    def test_legacy_all_punctuation(self):
+        t = Tokenizer([legacy_all_punctuation])
+        self.assertEqual(len(t.run(symbols.ALL_PUNC)) -
+                         1, len(symbols.ALL_PUNC))
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/chinese/lib/gtts/tts.py b/chinese/lib/gtts/tts.py
index 3945dc6..43e3678 100644
--- a/chinese/lib/gtts/tts.py
+++ b/chinese/lib/gtts/tts.py
@@ -1,11 +1,15 @@
 # -*- coding: utf-8 -*-
 from gtts.tokenizer import pre_processors, Tokenizer, tokenizer_cases
 from gtts.utils import _minimize, _len, _clean_tokens, _translate_url
-from gtts.lang import tts_langs
+from gtts.lang import tts_langs, _fallback_deprecated_lang
 
 from six.moves import urllib
-from urllib.parse import quote
-import urllib3
+try:
+    from urllib.parse import quote
+    import urllib3
+except ImportError:
+    from urllib import quote
+    import urllib2
 import requests
 import logging
 import json
@@ -38,10 +42,11 @@ class gTTS:
     Args:
         text (string): The text to be read.
         tld (string): Top-level domain for the Google Translate host,
-            i.e `https://translate.google.<tld>`. This is useful
-            when ``google.com`` might be blocked within a network but
-            a local or different Google host (e.g. ``google.cn``) is not.
-            Default is ``com``.
+            i.e `https://translate.google.<tld>`. Different Google domains
+            can produce different localized 'accents' for a given
+            language. This is also useful when ``google.com`` might be blocked
+            within a network but a local or different Google host
+            (e.g. ``google.cn``) is not. Default is ``com``.
         lang (string, optional): The language (IETF language tag) to
             read the text in. Default is ``en``.
         slow (bool, optional): Reads text more slowly. Defaults to ``False``.
@@ -130,18 +135,21 @@ def __init__(
         self.tld = tld
 
         # Language
-        if lang_check:
+        self.lang_check = lang_check
+        self.lang = lang
+
+        if self.lang_check:
+            # Fallback lang in case it is deprecated
+            self.lang = _fallback_deprecated_lang(lang)
+
             try:
                 langs = tts_langs()
-                if lang.lower() not in langs:
-                    raise ValueError("Language not supported: %s" % lang)
+                if self.lang not in langs:
+                   raise ValueError("Language not supported: %s" % lang)
             except RuntimeError as e:
                 log.debug(str(e), exc_info=True)
                 log.warning(str(e))
 
-        self.lang_check = lang_check
-        self.lang = lang.lower()
-
         # Read speed
         if slow:
             self.speed = Speed.SLOW
@@ -220,18 +228,6 @@ def _package_rpc(self, text):
         espaced_rpc = json.dumps(rpc, separators=(',', ':'))
         return "f.req={}&".format(quote(espaced_rpc))
 
-    def get_urls(self):
-        """Get TTS API request URL(s) that would be sent to the TTS API.
-
-        Returns:
-            list: A list of TTS API request URLs to make.
-
-                This is particularly useful to get the list of URLs generated
-                by ``gTTS`` but not yet fullfilled,
-                for example to be used by an external program.
-        """
-        return [pr.url for pr in self._prepare_requests()]
-
     def get_bodies(self):
         """Get TTS API request bodies(s) that would be sent to the TTS API.
 
@@ -253,7 +249,12 @@ def write_to_fp(self, fp):
         """
         # When disabling ssl verify in requests (for proxies and firewalls),
         # urllib3 prints an insecure warning on stdout. We disable that.
-        urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+        try:
+            urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+        except:
+            pass
+ 
+
 
         prepared_requests = self._prepare_requests()
         for idx, pr in enumerate(prepared_requests):
@@ -356,4 +357,4 @@ def infer_msg(self, tts, rsp=None):
             elif status >= 500:
                 cause = "Uptream API error. Try again later."
 
-        return "{}. Probable cause: {}".format(premise, cause)
\ No newline at end of file
+        return "{}. Probable cause: {}".format(premise, cause)
diff --git a/chinese/lib/gtts/version.py b/chinese/lib/gtts/version.py
index 36a511e..f1edb19 100644
--- a/chinese/lib/gtts/version.py
+++ b/chinese/lib/gtts/version.py
@@ -1 +1 @@
-__version__ = '2.2.1'
+__version__ = '2.2.2'
diff --git a/chinese/tts.py b/chinese/tts.py
index d8cf290..5517cef 100644
--- a/chinese/tts.py
+++ b/chinese/tts.py
@@ -22,7 +22,7 @@
 
 
 class AudioDownloader:
-    def __init__(self, text, source='google|zh-cn'):
+    def __init__(self, text, source='google|zh-CN'):
         self.text = text
         self.service, self.lang = source.split('|')
         self.path = self.get_path()
@@ -53,7 +53,7 @@ def download(self):
         return basename(self.path)
 
     def get_google(self):
-        tts = gTTS(self.text, lang=self.lang)
+        tts = gTTS(self.text, lang=self.lang, tld='cn')
         try:
             tts.save(self.path)
         except gTTSError as e:
diff --git a/tests/__init__.py b/tests/__init__.py
index 62e6012..35367fb 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -48,6 +48,7 @@
 else:
     media_dir = 'collection.media'
     modules['gtts'] = MagicMock()
+    modules['gtts.tts'] = MagicMock()
     modules['requests'] = MagicMock()
 
 patch.dict('sys.modules', modules).start()