Skip to content

Commit

Permalink
Merge pull request #64 from watermarkhu/feat/charset
Browse files Browse the repository at this point in the history
use charset_normalizer to read file contents
  • Loading branch information
watermarkhu authored Jun 5, 2024
2 parents 0fc0a4e + 61a02f9 commit 96dd714
Show file tree
Hide file tree
Showing 6 changed files with 42 additions and 43 deletions.
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# -- Project information -----------------------------------------------------

project = "Texmate Grammar Python"
version = "0.5.3"
version = "0.6.0"
copyright = f"{date.today().year}, Mark Shui Hu"
author = "Mark Shui Hu"

Expand Down
1 change: 0 additions & 1 deletion docs/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,6 @@ pyyaml==6.0.1 ; python_version >= "3.9" and python_version < "4.0" \
--hash=sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4 \
--hash=sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba \
--hash=sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8 \
--hash=sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef \
--hash=sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5 \
--hash=sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd \
--hash=sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3 \
Expand Down
70 changes: 34 additions & 36 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ build-backend = "poetry.core.masonry.api"

[tool.poetry]
name = "textmate-grammar-python"
version = "0.5.3"
version = "0.6.0"
description = "A lexer and tokenizer for grammar files as defined by TextMate and used in VSCode, implemented in Python."
authors = ["Mark Shui Hu <[email protected]>"]
license = "MIT"
Expand All @@ -25,6 +25,7 @@ managed = true
python = "^3.9"
onigurumacffi = "^1.3.0"
PyYAML = "^6.0.1"
charset-normalizer = "^3.3.2"

[tool.poetry.group.test.dependencies]
pytest = ">=7.3.1,<9.0.0"
Expand Down
2 changes: 1 addition & 1 deletion src/textmate_grammar/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.5.3"
__version__ = "0.6.0"
7 changes: 4 additions & 3 deletions src/textmate_grammar/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from pathlib import Path
from typing import Callable

import charset_normalizer as charset
from onigurumacffi import _Match as Match
from onigurumacffi import _Pattern as Pattern
from onigurumacffi import compile
Expand Down Expand Up @@ -44,6 +45,7 @@ def __init__(
:ivar line_lengths: A list of lengths of each line in the source code.
:ivar anchor: The current position in the source code.
"""
# Proprocess the content, replace all newline characters with \n
prepared_content = pre_processor(content.replace("\r\n", "\n").replace("\r", "\n"))

self.content = prepared_content
Expand All @@ -58,9 +60,8 @@ def from_path(cls, file_path: Path, **kwargs) -> ContentHandler:
if not file_path.exists():
raise FileNotFound(str(file_path))

# Open file and replace Windows/Mac line endings
with open(file_path) as file:
content = file.read()
# Open file with best guess encoding from charset_normalizer
content = str(charset.from_path(file_path).best())

return cls(content, **kwargs)

Expand Down

0 comments on commit 96dd714

Please sign in to comment.