Skip to content

Commit

Permalink
Add encoding parameter for non-ASCII characters
Browse files Browse the repository at this point in the history
Fixed the `UnicodeDecodeError` issue in `p2j` when processing Python
scripts with Chinese comments. Added an encoding parameter
(default=“utf-8”) to handle character encoding. Resolves Issue remykarem#16.
  • Loading branch information
QuiHow committed Mar 18, 2024
1 parent 065420b commit 421efda
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 9 deletions.
7 changes: 4 additions & 3 deletions p2j/j2p.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,23 @@
from p2j.utils import _check_files


def jupyter2python(source_filename: str, target_filename: str, overwrite: bool = False):
def jupyter2python(source_filename: str, target_filename: str, overwrite: bool = False, encoding: str = "utf-8"):
"""Convert Jupyter notebooks to Python scripts
Args:
source_filename (str): Path to Jupyter notebook.
target_filename (str): Path to name of Python script. Optional.
overwrite (bool): Whether to overwrite an existing Python script.
with_markdown (bool, optional): Whether to include markdown. Defaults to False.
encoding (str): Encodes obj using the codec registered for encoding.
"""

target_filename = _check_files(
source_filename, target_filename, overwrite, conversion="j2p")

# Check if source file exists and read
try:
with open(source_filename, "r", encoding="utf-8") as infile:
with open(source_filename, "r", encoding=encoding) as infile:
myfile = json.load(infile)
except FileNotFoundError:
print("Source file not found. Specify a valid source file.")
Expand All @@ -30,6 +31,6 @@ def jupyter2python(source_filename: str, target_filename: str, overwrite: bool =
final = "\n\n".join(final)
final = final.replace("<br>", "")

with open(target_filename, "a", encoding="utf-8") as outfile:
with open(target_filename, "a", encoding=encoding) as outfile:
outfile.write(final)
print("Python script {} written.".format(target_filename))
5 changes: 5 additions & 0 deletions p2j/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@ def main():
parser.add_argument("-o", "--overwrite",
action="store_true",
help="Flag whether to overwrite existing target file.")
parser.add_argument("-e",
"--encoding",
default="utf-8",
type=str,
help="Encodes obj using the codec registered for encoding.")
args = parser.parse_args()

if args.reverse:
Expand Down
13 changes: 7 additions & 6 deletions p2j/p2j.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,32 +17,33 @@
TWELVE_SPACES = "{:<12}".format("")


def python2jupyter(source_filename: str, target_filename: str, overwrite: bool = False):
def python2jupyter(source_filename: str, target_filename: str, overwrite: bool = False, encoding: str = "utf-8"):
"""Convert Python scripts to Jupyter notebooks.
Args:
source_filename (str): Path to Python script.
target_filename (str): Path to name of Jupyter notebook. Optional.
overwrite (bool): Whether to overwrite an existing Jupyter notebook.
encoding (str): Encodes obj using the codec registered for encoding.
"""

target_filename = _check_files(
source_filename, target_filename, overwrite, conversion="p2j")

# Check if source file exists and read
try:
with open(source_filename, "r", encoding="utf-8") as infile:
with open(source_filename, "r", encoding=encoding) as infile:
data = [l.rstrip("\n") for l in infile]
except FileNotFoundError:
print("Source file not found. Specify a valid source file.")
sys.exit(1)

# Read JSON files for .ipynb template
with open(HERE + "/templates/cell_code.json", encoding="utf-8") as file:
with open(HERE + "/templates/cell_code.json", encoding=encoding) as file:
CODE = json.load(file)
with open(HERE + "/templates/cell_markdown.json", encoding="utf-8") as file:
with open(HERE + "/templates/cell_markdown.json", encoding=encoding) as file:
MARKDOWN = json.load(file)
with open(HERE + "/templates/metadata.json", encoding="utf-8") as file:
with open(HERE + "/templates/metadata.json", encoding=encoding) as file:
MISC = json.load(file)

# Initialise variables
Expand Down Expand Up @@ -160,6 +161,6 @@ def python2jupyter(source_filename: str, target_filename: str, overwrite: bool =
final.update(MISC)

# Write JSON to target file
with open(target_filename, "w", encoding="utf-8") as outfile:
with open(target_filename, "w", encoding=encoding) as outfile:
json.dump(final, outfile, indent=1, ensure_ascii=False)
print("Notebook {} written.".format(target_filename))

0 comments on commit 421efda

Please sign in to comment.