Skip to content

Commit

Permalink
- Added "convert_on_find" setting for converting the text in Find Res…
Browse files Browse the repository at this point in the history
…ults view (defaults to false). (#39, thanks to @songenhin)

- A better handle on Codecs plugin
- Change all setting options to boolean type (true or false)
- Encode selection for empty file (#44 and #45, thanks to @knight9999 and @fyears)
- Sync chardet to version 2.3.0
  • Loading branch information
seanliang committed Nov 27, 2014
1 parent 4ddabbe commit b75c440
Show file tree
Hide file tree
Showing 15 changed files with 229 additions and 117 deletions.
157 changes: 136 additions & 21 deletions ConvertToUTF8.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,12 @@ def save(self):
fp.close()
self.dirty = False

def get(self, file_name):
for item in self.cache:
if file_name in item:
return item.get(file_name)
return None

def pop(self, file_name):
for item in self.cache:
if file_name in item:
Expand All @@ -103,6 +109,15 @@ def set(self, file_name, encoding):

encoding_cache = None

OPT_MAP = {
'convert_and_open': True,
'no_action': False,
'always': True,
'never': False,
True: True,
False: False
}

def get_settings():
global ENCODINGS_NAME, ENCODINGS_CODE
settings = sublime.load_settings('ConvertToUTF8.sublime-settings')
Expand All @@ -111,11 +126,12 @@ def get_settings():
ENCODINGS_CODE = [pair[1] for pair in encoding_list]
encoding_cache.set_max_size(settings.get('max_cache_size', 100))
SETTINGS['max_detect_lines'] = settings.get('max_detect_lines', 600)
SETTINGS['preview_action'] = settings.get('preview_action', 'no_action')
SETTINGS['preview_action'] = OPT_MAP.get(settings.get('preview_action', False))
SETTINGS['default_encoding_on_create'] = settings.get('default_encoding_on_create', '')
SETTINGS['convert_on_load'] = settings.get('convert_on_load', 'always')
SETTINGS['convert_on_save'] = settings.get('convert_on_save', 'always')
SETTINGS['convert_on_load'] = OPT_MAP.get(settings.get('convert_on_load', True))
SETTINGS['convert_on_save'] = OPT_MAP.get(settings.get('convert_on_save', True))
SETTINGS['lazy_reload'] = settings.get('lazy_reload', True)
SETTINGS['convert_on_find'] = settings.get('convert_on_find', False)

def get_setting(view, key):
# read project specific settings first
Expand Down Expand Up @@ -165,7 +181,7 @@ def setup_views():
# check existing views
for win in sublime.windows():
for view in win.views():
if get_setting(view, 'convert_on_load') == 'never':
if not get_setting(view, 'convert_on_load'):
break
if view.is_dirty() or view.settings().get('origin_encoding'):
show_encoding_status(view)
Expand All @@ -189,7 +205,7 @@ def wait_for_ready():
wait_for_ready()

def detect(view, file_name, cnt):
if not file_name or not os.path.exists(file_name):
if not file_name or not os.path.exists(file_name) or os.path.getsize(file_name) == 0:
return
encoding = encoding_cache.pop(file_name)
if encoding:
Expand Down Expand Up @@ -338,20 +354,25 @@ def get_branch(self, platform, arch):
'osx-x64': 'osx',
}][ST3].get(platform + '-' + arch)

def run(self, edit, encoding, file_name):
def run(self, edit, encoding, file_name, need_codecs):
self.view.set_name('ConvertToUTF8 Instructions')
self.view.set_scratch(True)
self.view.settings().set("word_wrap", True)
msg = 'Oops! The file {0} is detected as {1} which is not supported by your Sublime Text.\n\nPlease check whether it is in the list of Python\'s Standard Encodings (http://docs.python.org/library/codecs.html#standard-encodings) or not.\n\nIf yes, '.format(file_name, encoding)
branch = self.get_branch(sublime.platform(), sublime.arch())
if branch:
ver = '33' if ST3 else '26'
msg = msg + 'please install Codecs{0} (https://github.com/seanliang/Codecs{0}/tree/{1}) and restart Sublime Text to make ConvertToUTF8 work properly. If it is still not working, '.format(ver, branch)

import platform
msg = msg + 'please kindly send the following information to sunlxy (at) yahoo.com:\n====== Debug Information ======\nVersion: {0}-{1}\nPlatform: {2}\nPath: {3}\nEncoding: {4}\n'.format(
sublime.version(), sublime.arch(), platform.platform(), sys.path, encoding
)
msg = 'File: {0}\nEncoding: {1}\nError: '.format(file_name, encoding)
if need_codecs:
msg = msg + 'Codecs missing\n\n'
branch = self.get_branch(sublime.platform(), sublime.arch())
if branch:
ver = '33' if ST3 else '26'
msg = msg + 'Please install Codecs{0} plugin (https://github.com/seanliang/Codecs{0}/tree/{1}).\n'.format(ver, branch)
else:
import platform
msg = msg + 'Please send the following information to sunlxy (at) yahoo.com:\n====== Debug Information ======\nVersion: {0}-{1}\nPlatform: {2}\nPath: {3}\nEncoding: {4}\n'.format(
sublime.version(), sublime.arch(), platform.platform(), sys.path, encoding
)
else:
msg = msg + 'Unsupported encoding, see http://docs.python.org/library/codecs.html#standard-encodings\n\nPlease try other tools such as iconv.\n'

self.view.insert(edit, 0, msg)
self.view.set_read_only(True)
self.view.window().focus_view(self.view)
Expand Down Expand Up @@ -384,8 +405,17 @@ def run(self, edit, encoding=None, stamp=None, detect_on_fail=False):
fp = codecs.open(file_name, 'rb', encoding, errors='strict')
contents = fp.read()
except LookupError as e:
clean_encoding_vars(view)
view.window().new_file().run_command('py_instruction', {'encoding': encoding, 'file_name': file_name})
try:
# reload codecs
import _multibytecodec, imp, encodings
imp.reload(encodings)
imp.reload(codecs)
codecs.getencoder(encoding)
view.run_command('reload_with_encoding', {'encoding': encoding})
except (ImportError, LookupError) as e:
need_codecs = (type(e) == ImportError)
clean_encoding_vars(view)
view.window().new_file().run_command('py_instruction', {'encoding': encoding, 'file_name': file_name, 'need_codecs': need_codecs})
return
except UnicodeDecodeError as e:
if detect_on_fail:
Expand Down Expand Up @@ -483,7 +513,69 @@ def description(self):
return
return 'UTF8 -> {0}'.format(encoding)

class ConvertTextToUtf8Command(sublime_plugin.TextCommand):
def get_text(self, region):
content = self.view.substr(region)
try:
return content.encode('CP1252')
except Exception:
return None

def detect(self, begin_line, end_line):
begin_line = int(begin_line)
end_line = int(end_line)
begin_point = self.view.text_point(begin_line + 1, 0)
end_point = self.view.text_point(end_line, 0) - 1
region = sublime.Region(begin_point, end_point)
content = self.get_text(region)
if not content:
return
detector = UniversalDetector()
detector.feed(content)
detector.close()
encoding = detector.result['encoding']
confidence = detector.result['confidence']
encoding = encoding.upper()
if confidence < 0.95 or encoding in SKIP_ENCODINGS:
return
self.view.run_command('convert_text_to_utf8', {'begin_line': begin_line, 'end_line': end_line, 'encoding': encoding})

def run(self, edit, begin_line, end_line, encoding = None):
begin_line = int(begin_line)
end_line = int(end_line)
if not encoding:
# detect the encoding
sublime.set_timeout(lambda: self.detect(begin_line, end_line), 0)
return
view = self.view
last_line = begin_line + 50
if last_line > end_line:
last_line = end_line
begin_point = view.text_point(begin_line + 1, 0)
end_point = view.text_point(last_line, 0) - 1
region = sublime.Region(begin_point, end_point)
text = self.get_text(region)
while True:
if encoding:
try:
text = text.decode(encoding)
except UnicodeDecodeError:
encoding = SUPERSETS.get(encoding)
continue
break
else:
return
view.replace(edit, region, text)
if last_line < end_line:
view.run_command('convert_text_to_utf8', {'begin_line': last_line, 'end_line': end_line, 'encoding': encoding})

def is_enabled(self):
return get_setting(self.view, 'convert_on_find')

class ConvertToUTF8Listener(sublime_plugin.EventListener):
def is_find_results(self, view):
return view.settings().get('syntax') == 'Packages/Default/Find Results.hidden-tmLanguage'

def check_clones(self, view):
clone_numbers = view.settings().get('clone_numbers', 0)
if clone_numbers:
Expand All @@ -495,6 +587,9 @@ def check_clones(self, view):
return False

def on_new(self, view):
if self.is_find_results(view):
view.settings().set('last_lines', 0)
return
if get_setting(view, 'default_encoding_on_create'):
init_encoding_vars(view, get_setting(view, 'default_encoding_on_create'), False)

Expand All @@ -511,6 +606,8 @@ def on_close(self, view):
view.settings().set('clone_numbers', clone_numbers - 1)
else:
remove_reverting(view.file_name())
if self.is_find_results(view):
view.settings().erase('last_lines')

def on_load(self, view):
encoding = view.encoding()
Expand All @@ -536,7 +633,7 @@ def on_load(self, view):
return
else:
return
if get_setting(view, 'convert_on_load') == 'never':
if not get_setting(view, 'convert_on_load'):
return
self.perform_action(view, file_name, 5)

Expand All @@ -563,7 +660,7 @@ def clean_reload(self, view, file_name):
threading.Thread(target=lambda: detect(view, file_name, cnt)).start()

def perform_action(self, view, file_name, times):
if get_setting(view, 'preview_action') != 'convert_and_open' and self.is_preview(view):
if not get_setting(view, 'preview_action') and self.is_preview(view):
if times > 0:
# give it another chance before everything is ready
sublime.set_timeout(lambda: self.perform_action(view, file_name, times - 1), 100)
Expand All @@ -580,6 +677,24 @@ def on_modified(self, view):
return
file_name = view.file_name()
if not file_name or view.is_loading():
if get_setting(view, 'convert_on_find') and self.is_find_results(view):
begin_line = view.settings().get('last_lines', 0)
end_line = view.rowcol(view.size())[0]
if end_line > begin_line:
view.settings().set('last_lines', end_line)
begin_point = view.text_point(begin_line, 0)
line = view.line(begin_point)
text = view.substr(line)
if text.endswith(':'):
# find the file name
file_name = text[:-1]
# skip opened file
if view.window().find_open_file(file_name):
return
encoding = encoding_cache.get(file_name)
if encoding in SKIP_ENCODINGS:
return
sublime.set_timeout(lambda: view.run_command('convert_text_to_utf8', {'begin_line': begin_line, 'end_line': end_line, 'encoding': encoding}), 0)
return
if not view.settings().get('in_converting'):
if view.settings().get('is_preview'):
Expand Down Expand Up @@ -657,7 +772,7 @@ def on_post_save(self, view):
file_name = view.file_name()
if file_name in stamps:
del stamps[file_name]
if get_setting(view, 'convert_on_save') == 'never':
if not get_setting(view, 'convert_on_save'):
return
# file was saved with other encoding
if view_encoding != 'UTF-8':
Expand Down
13 changes: 8 additions & 5 deletions ConvertToUTF8.sublime-settings
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,20 @@
// Maximum lines to detect, 0 means unlimited
"max_detect_lines" : 600,

// Action when previewing file: no_action or convert_and_open
"preview_action" : "no_action",
// Convert when previewing file: true or false
"preview_action" : false,

// Encoding for new file, empty means using sublime text's "default_encoding" setting
"default_encoding_on_create" : "",

// Set this option to true will cause Sublime Text reload the saved file when losing focus
"lazy_reload": false,

// Convert encoding options when loading/saving: always, never
"convert_on_load" : "always",
"convert_on_save" : "always"
// Convert in Find Results view
"convert_on_find": false,

// Convert when loading/saving a file
"convert_on_load" : true,
"convert_on_save" : true

}
22 changes: 9 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,7 @@ If you want to support this plugin, you can donate via PayPal or Alipay. Thanks!

Note
------------------
** For Linux users: ConvertToUTF8 relies on several dynamic libraries which are missing in embedded version of Python of Sublime Text 2 and 3. This plugin can not work fully until you install them manully.

** For OS X users: Sublime Text 3 uses an embedded Python which is facing the same problem as Linux version.

** I've reported this problem to Jon but did not get any response yet, so I created extra plugins to solve it. ConvertToUTF8 will show you the instructions when necessary.
** If the plugin can not work properly, you might need to install an extra plugin: [Codecs26](https://github.com/seanliang/Codecs26) for Sublime Text 2 or [Codecs33](https://github.com/seanliang/Codecs33) for Sublime Text 3.

Installation
------------------
Expand All @@ -34,11 +30,12 @@ Please check ConvertToUTF8.sublime-settings file for details. You should save yo
* encoding_list: encoding selection list when detection is failed
* max_cache_size: maximum encoding cache size, 0 means no cache (default: 100)
* max_detect_lines: maximum detection lines, 0 means unlimited (default: 600)
* preview_action: specific the action when previewing a file, available options: no_action, convert_and_open (default: no_action)
* default_encoding_on_create: specific the default encoding for newly created file (such as "GBK"), empty value means using sublime text's "default_encoding" setting (default: empty)
* convert_on_load: enable/disable convert file content to UTF-8 when it is loaded, available options: always, never (default: always)
* convert_on_save: enable/disable convert file from UTF-8 to a specific encoding when it is saved, available options: always, never (default: always)
* lazy_reload: enable/disable save file to a temporary location, and reload it in background when switching to other windows or tabs, available options: true, false (default: false)
* preview_action: converting the file's content to UTF-8 when previewing it (default: false)
* default_encoding_on_create: specific the default encoding for newly created file (such as "GBK"), empty value means using sublime text's "default_encoding" setting (default: "")
* convert_on_load: convert the file's content to UTF-8 when it is loaded (default: true)
* convert_on_save: convert the file's content from UTF-8 to its original (or specific) encoding when it is saved (default: true)
* convert_on_find: convert the text in Find Results view to UTF-8 (default: false)
* lazy_reload: save file to a temporary location, and reload it in background when switching to other windows or tabs (default: false)

Usage
------------------
Expand All @@ -47,11 +44,10 @@ In most cases, this plug-in will take care of encoding issues automatically.
You can also use the "File > Set File Encoding to" menu entry to transform between different encodings. For example, you can open a UTF-8 file, and save it to GBK, and vice versa.

Note:
* if convert_on_save is set to never, the file will *NEVER* be saved to the selected encoding
* if convert_on_save is set to `false`, the file will *NEVER* be saved to the selected encoding
* please do not edit the file before the encoding detection process is finished
* please try either increasing the value of max_detect_lines or set the encoding manually if the detection result is not accurate
* due to limitation of API, when lazy_reload is set to true, quit Sublime Text immediately after saving a file will cause the file to be saved as UTF-8, the correct content will be reload next time Sublime Text starts

* due to limitation of API, when lazy_reload is set to `true`, quit Sublime Text immediately after saving a file will cause the file to be saved as UTF-8, the correct content will be reload next time Sublime Text starts

Q & A
------------------
Expand Down
22 changes: 9 additions & 13 deletions README.zh_CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,7 @@

注意
------------------
** Linux 用户:Sublime Text 2 和 3 内嵌 Python 版本中缺失几个 ConvertToUTF8 依赖的动态库。您必须手工安装这些文件才能让本插件完全运作。

** OS X 用户:Sublime Text 3 使用的内嵌 Python 存在与 Linux 版本相同的问题。

** 我已将此问题报告给 Jon 但未收到任何回复,因此我创建了额外的插件来解决它。ConvertToUTF8 会在需要时显示相应说明。
** 如果本插件无法正常工作,则可能需要安装一个额外插件:[Codecs26](https://github.com/seanliang/Codecs26)(针对 Sublime Text 2)或 [Codecs33](https://github.com/seanliang/Codecs33)(针对 Sublime Text 3)。

安装
------------------
Expand All @@ -35,11 +31,12 @@
* encoding_list:检测失败时显示的编码列表
* max_cache_size:最大编码缓存数量,0 表示不缓存(默认为 100)
* max_detect_lines:最大检测行数,0 表示不限制(默认为 600)
* preview_action:指定预览模式下的动作,可选项:no_action 不作任何动作,convert_and_open 转换编码并打开(默认为 no_action)
* default_encoding_on_create:指定新建文件的默认编码(如 GBK),空值表示使用 Sublime Text 的 default_encoding 设置(默认为空值)
* convert_on_load:启用/禁用文件装载时将窗口内容转换成UTF-8编码,可选项:always 自动转换,never 不转换(默认为 always)
* convert_on_save:启用/禁用文件保存时将其从UTF-8转换成指定转码,可选项:always 自动转换,never 不转换(默认为 always)
* lazy_reload:启用/禁用将文件保存到临时位置,并在切换窗口或标签时在后台自动重载,可选项:true,false(默认为 false)
* preview_action:预览文件时是否将其内容转换为 UTF-8(默认为 false)
* default_encoding_on_create:指定新建文件的默认编码(如 GBK),空值表示使用 Sublime Text 的 default_encoding 设置(默认为 "")
* convert_on_load:文件装载时是否将其内容转换成 UTF-8(默认为 true)
* convert_on_save:文件保存时是否将其内容转换成原有(或指定)编码(默认为 true)
* convert_on_find:将 Find Results 窗口里的内容转换成 UTF-8(默认为 false)
* lazy_reload:将文件保存到临时位置,并在切换窗口或标签时在后台自动重载(默认为 false)

使用说明
------------------
Expand All @@ -48,11 +45,10 @@
您也可以通过 File > Set File Encoding to 菜单对文件编码进行手工转换。例如,您可以打开一个 UTF-8 编码的文件,指定保存为 GBK,反之亦然。

注意:
* 如果 convert_on_save 被设置为 never,文件不会被保存成指定编码
* 如果 convert_on_save 被设置为 `false`,文件*不会*被保存成指定编码
* 在文件编码检测过程完成前请勿编辑文件
* 若检测结果不准确,请尝试增大 max_detect_lines 的值或手工指定编码
* 由于 API 限制,在 lazy_reload 设置为 true 时,保存文件后立即退出 Sublime Text 将造成文件被保存为 UTF-8,正确的内容将在下次 Sublime Text 打开时重载

* 由于 API 限制,在 lazy_reload 设置为 `true` 时,保存文件后立即退出 Sublime Text 将造成文件被保存为 UTF-8,正确的内容将在下次 Sublime Text 打开时重载

常见问题
------------------
Expand Down
4 changes: 2 additions & 2 deletions chardet/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################

__version__ = "2.1.1"

__version__ = "2.3.0"
from sys import version_info


def detect(aBuf):
if ((version_info < (3, 0) and isinstance(aBuf, unicode)) or
(version_info >= (3, 0) and not isinstance(aBuf, bytes))):
Expand Down
Loading

0 comments on commit b75c440

Please sign in to comment.