- Added "convert_on_find" setting for converting the text in Find Res…

…ults view (defaults to false). (#39, thanks to @songenhin) - A better handle on Codecs plugin - Change all setting options to boolean type (true or false) - Encode selection for empty file (#44 and #45, thanks to @knight9999 and @fyears) - Sync chardet to version 2.3.0
seanliang · Nov 27, 2014 · b75c440 · b75c440
1 parent 4ddabbe
commit b75c440
Show file tree

Hide file tree

Showing 15 changed files with 229 additions and 117 deletions.
diff --git a/ConvertToUTF8.py b/ConvertToUTF8.py
@@ -84,6 +84,12 @@ def save(self):
 		fp.close()
 		self.dirty = False
 
+	def get(self, file_name):
+		for item in self.cache:
+			if file_name in item:
+				return item.get(file_name)
+		return None
+
 	def pop(self, file_name):
 		for item in self.cache:
 			if file_name in item:
@@ -103,6 +109,15 @@ def set(self, file_name, encoding):
 
 encoding_cache = None
 
+OPT_MAP = {
+	'convert_and_open': True,
+	'no_action': False,
+	'always': True,
+	'never': False,
+	True: True,
+	False: False
+}
+
 def get_settings():
 	global ENCODINGS_NAME, ENCODINGS_CODE
 	settings = sublime.load_settings('ConvertToUTF8.sublime-settings')
@@ -111,11 +126,12 @@ def get_settings():
 	ENCODINGS_CODE = [pair[1] for pair in encoding_list]
 	encoding_cache.set_max_size(settings.get('max_cache_size', 100))
 	SETTINGS['max_detect_lines'] = settings.get('max_detect_lines', 600)
-	SETTINGS['preview_action'] = settings.get('preview_action', 'no_action')
+	SETTINGS['preview_action'] = OPT_MAP.get(settings.get('preview_action', False))
 	SETTINGS['default_encoding_on_create'] = settings.get('default_encoding_on_create', '')
-	SETTINGS['convert_on_load'] = settings.get('convert_on_load', 'always')
-	SETTINGS['convert_on_save'] = settings.get('convert_on_save', 'always')
+	SETTINGS['convert_on_load'] = OPT_MAP.get(settings.get('convert_on_load', True))
+	SETTINGS['convert_on_save'] = OPT_MAP.get(settings.get('convert_on_save', True))
 	SETTINGS['lazy_reload'] = settings.get('lazy_reload', True)
+	SETTINGS['convert_on_find'] = settings.get('convert_on_find', False)
 
 def get_setting(view, key):
 	# read project specific settings first
@@ -165,7 +181,7 @@ def setup_views():
 	# check existing views
 	for win in sublime.windows():
 		for view in win.views():
-			if get_setting(view, 'convert_on_load') == 'never':
+			if not get_setting(view, 'convert_on_load'):
 				break
 			if view.is_dirty() or view.settings().get('origin_encoding'):
 				show_encoding_status(view)
@@ -189,7 +205,7 @@ def wait_for_ready():
 	wait_for_ready()
 
 def detect(view, file_name, cnt):
-	if not file_name or not os.path.exists(file_name):
+	if not file_name or not os.path.exists(file_name) or os.path.getsize(file_name) == 0:
 		return
 	encoding = encoding_cache.pop(file_name)
 	if encoding:
@@ -338,20 +354,25 @@ def get_branch(self, platform, arch):
 			'osx-x64': 'osx',
 		}][ST3].get(platform + '-' + arch)
 
-	def run(self, edit, encoding, file_name):
+	def run(self, edit, encoding, file_name, need_codecs):
 		self.view.set_name('ConvertToUTF8 Instructions')
 		self.view.set_scratch(True)
 		self.view.settings().set("word_wrap", True)
-		msg = 'Oops! The file {0} is detected as {1} which is not supported by your Sublime Text.\n\nPlease check whether it is in the list of Python\'s Standard Encodings (http://docs.python.org/library/codecs.html#standard-encodings) or not.\n\nIf yes, '.format(file_name, encoding)
-		branch = self.get_branch(sublime.platform(), sublime.arch())
-		if branch:
-			ver = '33' if ST3 else '26'
-			msg = msg + 'please install Codecs{0} (https://github.com/seanliang/Codecs{0}/tree/{1}) and restart Sublime Text to make ConvertToUTF8 work properly. If it is still not working, '.format(ver, branch)
-
-		import platform
-		msg = msg + 'please kindly send the following information to sunlxy (at) yahoo.com:\n====== Debug Information ======\nVersion: {0}-{1}\nPlatform: {2}\nPath: {3}\nEncoding: {4}\n'.format(
-			sublime.version(), sublime.arch(), platform.platform(), sys.path, encoding
-		)
+		msg = 'File: {0}\nEncoding: {1}\nError: '.format(file_name, encoding)
+		if need_codecs:
+			msg = msg + 'Codecs missing\n\n'
+			branch = self.get_branch(sublime.platform(), sublime.arch())
+			if branch:
+				ver = '33' if ST3 else '26'
+				msg = msg + 'Please install Codecs{0} plugin (https://github.com/seanliang/Codecs{0}/tree/{1}).\n'.format(ver, branch)
+			else:
+				import platform
+				msg = msg + 'Please send the following information to sunlxy (at) yahoo.com:\n====== Debug Information ======\nVersion: {0}-{1}\nPlatform: {2}\nPath: {3}\nEncoding: {4}\n'.format(
+					sublime.version(), sublime.arch(), platform.platform(), sys.path, encoding
+				)
+		else:
+			msg = msg + 'Unsupported encoding, see http://docs.python.org/library/codecs.html#standard-encodings\n\nPlease try other tools such as iconv.\n'
+
 		self.view.insert(edit, 0, msg)
 		self.view.set_read_only(True)
 		self.view.window().focus_view(self.view)
@@ -384,8 +405,17 @@ def run(self, edit, encoding=None, stamp=None, detect_on_fail=False):
 			fp = codecs.open(file_name, 'rb', encoding, errors='strict')
 			contents = fp.read()
 		except LookupError as e:
-			clean_encoding_vars(view)
-			view.window().new_file().run_command('py_instruction', {'encoding': encoding, 'file_name': file_name})
+			try:
+				# reload codecs
+				import _multibytecodec, imp, encodings
+				imp.reload(encodings)
+				imp.reload(codecs)
+				codecs.getencoder(encoding)
+				view.run_command('reload_with_encoding', {'encoding': encoding})
+			except (ImportError, LookupError) as e:
+				need_codecs = (type(e) == ImportError)
+				clean_encoding_vars(view)
+				view.window().new_file().run_command('py_instruction', {'encoding': encoding, 'file_name': file_name, 'need_codecs': need_codecs})
 			return
 		except UnicodeDecodeError as e:
 			if detect_on_fail:
@@ -483,7 +513,69 @@ def description(self):
 			return
 		return 'UTF8 -> {0}'.format(encoding)
 
+class ConvertTextToUtf8Command(sublime_plugin.TextCommand):
+	def get_text(self, region):
+		content = self.view.substr(region)
+		try:
+			return content.encode('CP1252')
+		except Exception:
+			return None
+
+	def detect(self, begin_line, end_line):
+		begin_line = int(begin_line)
+		end_line = int(end_line)
+		begin_point = self.view.text_point(begin_line + 1, 0)
+		end_point = self.view.text_point(end_line, 0) - 1
+		region = sublime.Region(begin_point, end_point)
+		content = self.get_text(region)
+		if not content:
+			return
+		detector = UniversalDetector()
+		detector.feed(content)
+		detector.close()
+		encoding = detector.result['encoding']
+		confidence = detector.result['confidence']
+		encoding = encoding.upper()
+		if confidence < 0.95 or encoding in SKIP_ENCODINGS:
+			return
+		self.view.run_command('convert_text_to_utf8', {'begin_line': begin_line, 'end_line': end_line, 'encoding': encoding})
+
+	def run(self, edit, begin_line, end_line, encoding = None):
+		begin_line = int(begin_line)
+		end_line = int(end_line)
+		if not encoding:
+			# detect the encoding
+			sublime.set_timeout(lambda: self.detect(begin_line, end_line), 0)
+			return
+		view = self.view
+		last_line = begin_line + 50
+		if last_line > end_line:
+			last_line = end_line
+		begin_point = view.text_point(begin_line + 1, 0)
+		end_point = view.text_point(last_line, 0) - 1
+		region = sublime.Region(begin_point, end_point)
+		text = self.get_text(region)
+		while True:
+			if encoding:
+				try:
+					text = text.decode(encoding)
+				except UnicodeDecodeError:
+					encoding = SUPERSETS.get(encoding)
+					continue
+				break
+			else:
+				return
+		view.replace(edit, region, text)
+		if last_line < end_line:
+			view.run_command('convert_text_to_utf8', {'begin_line': last_line, 'end_line': end_line, 'encoding': encoding})
+
+	def is_enabled(self):
+		return get_setting(self.view, 'convert_on_find')
+
 class ConvertToUTF8Listener(sublime_plugin.EventListener):
+	def is_find_results(self, view):
+		return view.settings().get('syntax') == 'Packages/Default/Find Results.hidden-tmLanguage'
+
 	def check_clones(self, view):
 		clone_numbers = view.settings().get('clone_numbers', 0)
 		if clone_numbers:
@@ -495,6 +587,9 @@ def check_clones(self, view):
 		return False
 
 	def on_new(self, view):
+		if self.is_find_results(view):
+			view.settings().set('last_lines', 0)
+			return
 		if get_setting(view, 'default_encoding_on_create'):
 			init_encoding_vars(view, get_setting(view, 'default_encoding_on_create'), False)
 
@@ -511,6 +606,8 @@ def on_close(self, view):
 			view.settings().set('clone_numbers', clone_numbers - 1)
 		else:
 			remove_reverting(view.file_name())
+			if self.is_find_results(view):
+				view.settings().erase('last_lines')
 
 	def on_load(self, view):
 		encoding = view.encoding()
@@ -536,7 +633,7 @@ def on_load(self, view):
 					return
 			else:
 				return
-		if get_setting(view, 'convert_on_load') == 'never':
+		if not get_setting(view, 'convert_on_load'):
 			return
 		self.perform_action(view, file_name, 5)
 
@@ -563,7 +660,7 @@ def clean_reload(self, view, file_name):
 		threading.Thread(target=lambda: detect(view, file_name, cnt)).start()
 
 	def perform_action(self, view, file_name, times):
-		if get_setting(view, 'preview_action') != 'convert_and_open' and self.is_preview(view):
+		if not get_setting(view, 'preview_action') and self.is_preview(view):
 			if times > 0:
 				# give it another chance before everything is ready
 				sublime.set_timeout(lambda: self.perform_action(view, file_name, times - 1), 100)
@@ -580,6 +677,24 @@ def on_modified(self, view):
 			return
 		file_name = view.file_name()
 		if not file_name or view.is_loading():
+			if get_setting(view, 'convert_on_find') and self.is_find_results(view):
+				begin_line = view.settings().get('last_lines', 0)
+				end_line = view.rowcol(view.size())[0]
+				if end_line > begin_line:
+					view.settings().set('last_lines', end_line)
+					begin_point = view.text_point(begin_line, 0)
+					line = view.line(begin_point)
+					text = view.substr(line)
+					if text.endswith(':'):
+						# find the file name
+						file_name = text[:-1]
+						# skip opened file
+						if view.window().find_open_file(file_name):
+							return
+						encoding = encoding_cache.get(file_name)
+						if encoding in SKIP_ENCODINGS:
+							return
+						sublime.set_timeout(lambda: view.run_command('convert_text_to_utf8', {'begin_line': begin_line, 'end_line': end_line, 'encoding': encoding}), 0)
 			return
 		if not view.settings().get('in_converting'):
 			if view.settings().get('is_preview'):
@@ -657,7 +772,7 @@ def on_post_save(self, view):
 		file_name = view.file_name()
 		if file_name in stamps:
 			del stamps[file_name]
-		if get_setting(view, 'convert_on_save') == 'never':
+		if not get_setting(view, 'convert_on_save'):
 			return
 		# file was saved with other encoding
 		if view_encoding != 'UTF-8':

diff --git a/ConvertToUTF8.sublime-settings b/ConvertToUTF8.sublime-settings
@@ -16,17 +16,20 @@
 	// Maximum lines to detect, 0 means unlimited
 	"max_detect_lines" : 600,
 
-	// Action when previewing file: no_action or convert_and_open
-	"preview_action" : "no_action",
+	// Convert when previewing file: true or false
+	"preview_action" : false,
 
 	// Encoding for new file, empty means using sublime text's "default_encoding" setting
 	"default_encoding_on_create" : "",
 
 	// Set this option to true will cause Sublime Text reload the saved file when losing focus
 	"lazy_reload": false,
 
-	// Convert encoding options when loading/saving: always, never
-	"convert_on_load" : "always",
-	"convert_on_save" : "always"
+	// Convert in Find Results view
+	"convert_on_find": false,
+
+	// Convert when loading/saving a file
+	"convert_on_load" : true,
+	"convert_on_save" : true
 
 }
diff --git a/README.md b/README.md
@@ -11,11 +11,7 @@ If you want to support this plugin, you can donate via PayPal or Alipay. Thanks!
 
 Note
 ------------------
-** For Linux users: ConvertToUTF8 relies on several dynamic libraries which are missing in embedded version of Python of Sublime Text 2 and 3. This plugin can not work fully until you install them manully.
-
-** For OS X users: Sublime Text 3 uses an embedded Python which is facing the same problem as Linux version.
-
-** I've reported this problem to Jon but did not get any response yet, so I created extra plugins to solve it. ConvertToUTF8 will show you the instructions when necessary.
+** If the plugin can not work properly, you might need to install an extra plugin: [Codecs26](https://github.com/seanliang/Codecs26) for Sublime Text 2 or [Codecs33](https://github.com/seanliang/Codecs33) for Sublime Text 3.
 
 Installation
 ------------------
@@ -34,11 +30,12 @@ Please check ConvertToUTF8.sublime-settings file for details. You should save yo
 * encoding_list: encoding selection list when detection is failed
 * max_cache_size: maximum encoding cache size, 0 means no cache (default: 100)
 * max_detect_lines: maximum detection lines, 0 means unlimited (default: 600)
-* preview_action: specific the action when previewing a file, available options: no_action, convert_and_open (default: no_action)
-* default_encoding_on_create: specific the default encoding for newly created file (such as "GBK"), empty value means using sublime text's "default_encoding" setting (default: empty)
-* convert_on_load: enable/disable convert file content to UTF-8 when it is loaded, available options: always, never (default: always)
-* convert_on_save: enable/disable convert file from UTF-8 to a specific encoding when it is saved, available options: always, never (default: always)
-* lazy_reload: enable/disable save file to a temporary location, and reload it in background when switching to other windows or tabs, available options: true, false (default: false)
+* preview_action: converting the file's content to UTF-8 when previewing it (default: false)
+* default_encoding_on_create: specific the default encoding for newly created file (such as "GBK"), empty value means using sublime text's "default_encoding" setting (default: "")
+* convert_on_load: convert the file's content to UTF-8 when it is loaded (default: true)
+* convert_on_save: convert the file's content from UTF-8 to its original (or specific) encoding when it is saved (default: true)
+* convert_on_find: convert the text in Find Results view to UTF-8 (default: false)
+* lazy_reload: save file to a temporary location, and reload it in background when switching to other windows or tabs (default: false)
 
 Usage
 ------------------
@@ -47,11 +44,10 @@ In most cases, this plug-in will take care of encoding issues automatically.
 You can also use the "File > Set File Encoding to" menu entry to transform between different encodings. For example, you can open a UTF-8 file, and save it to GBK, and vice versa.
 
 Note:
-* if convert_on_save is set to never, the file will *NEVER* be saved to the selected encoding
+* if convert_on_save is set to `false`, the file will *NEVER* be saved to the selected encoding
 * please do not edit the file before the encoding detection process is finished
 * please try either increasing the value of max_detect_lines or set the encoding manually if the detection result is not accurate
-* due to limitation of API, when lazy_reload is set to true, quit Sublime Text immediately after saving a file will cause the file to be saved as UTF-8, the correct content will be reload next time Sublime Text starts
-
+* due to limitation of API, when lazy_reload is set to `true`, quit Sublime Text immediately after saving a file will cause the file to be saved as UTF-8, the correct content will be reload next time Sublime Text starts
 
 Q & A
 ------------------

diff --git a/README.zh_CN.md b/README.zh_CN.md
@@ -12,11 +12,7 @@
 
 注意
 ------------------
-** Linux 用户：Sublime Text 2 和 3 内嵌 Python 版本中缺失几个 ConvertToUTF8 依赖的动态库。您必须手工安装这些文件才能让本插件完全运作。
-
-** OS X 用户：Sublime Text 3 使用的内嵌 Python 存在与 Linux 版本相同的问题。
-
-** 我已将此问题报告给 Jon 但未收到任何回复，因此我创建了额外的插件来解决它。ConvertToUTF8 会在需要时显示相应说明。
+** 如果本插件无法正常工作，则可能需要安装一个额外插件：[Codecs26](https://github.com/seanliang/Codecs26)（针对 Sublime Text 2）或 [Codecs33](https://github.com/seanliang/Codecs33)（针对 Sublime Text 3）。
 
 安装
 ------------------
@@ -35,11 +31,12 @@
 * encoding_list：检测失败时显示的编码列表
 * max_cache_size：最大编码缓存数量，0 表示不缓存（默认为 100）
 * max_detect_lines：最大检测行数，0 表示不限制（默认为 600）
-* preview_action：指定预览模式下的动作，可选项：no_action 不作任何动作，convert_and_open 转换编码并打开（默认为 no_action）
-* default_encoding_on_create：指定新建文件的默认编码（如 GBK），空值表示使用 Sublime Text 的 default_encoding 设置（默认为空值）
-* convert_on_load：启用/禁用文件装载时将窗口内容转换成UTF-8编码，可选项：always 自动转换，never 不转换（默认为 always）
-* convert_on_save：启用/禁用文件保存时将其从UTF-8转换成指定转码，可选项：always 自动转换，never 不转换（默认为 always）
-* lazy_reload：启用/禁用将文件保存到临时位置，并在切换窗口或标签时在后台自动重载，可选项：true，false（默认为 false）
+* preview_action：预览文件时是否将其内容转换为 UTF-8（默认为 false）
+* default_encoding_on_create：指定新建文件的默认编码（如 GBK），空值表示使用 Sublime Text 的 default_encoding 设置（默认为 ""）
+* convert_on_load：文件装载时是否将其内容转换成 UTF-8（默认为 true）
+* convert_on_save：文件保存时是否将其内容转换成原有（或指定）编码（默认为 true）
+* convert_on_find：将 Find Results 窗口里的内容转换成 UTF-8（默认为 false）
+* lazy_reload：将文件保存到临时位置，并在切换窗口或标签时在后台自动重载（默认为 false）
 
 使用说明
 ------------------
@@ -48,11 +45,10 @@
 您也可以通过 File > Set File Encoding to 菜单对文件编码进行手工转换。例如，您可以打开一个 UTF-8 编码的文件，指定保存为 GBK，反之亦然。
 
 注意：
-* 如果 convert_on_save 被设置为 never，文件不会被保存成指定编码
+* 如果 convert_on_save 被设置为 `false`，文件*不会*被保存成指定编码
 * 在文件编码检测过程完成前请勿编辑文件
 * 若检测结果不准确，请尝试增大 max_detect_lines 的值或手工指定编码
-* 由于 API 限制，在 lazy_reload 设置为 true 时，保存文件后立即退出 Sublime Text 将造成文件被保存为 UTF-8，正确的内容将在下次 Sublime Text 打开时重载
-
+* 由于 API 限制，在 lazy_reload 设置为 `true` 时，保存文件后立即退出 Sublime Text 将造成文件被保存为 UTF-8，正确的内容将在下次 Sublime Text 打开时重载
 
 常见问题
 ------------------

diff --git a/chardet/__init__.py b/chardet/__init__.py
@@ -15,10 +15,10 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################
 
-__version__ = "2.1.1"
-
+__version__ = "2.3.0"
 from sys import version_info
 
+
 def detect(aBuf):
     if ((version_info < (3, 0) and isinstance(aBuf, unicode)) or
             (version_info >= (3, 0) and not isinstance(aBuf, bytes))):