Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Explicit UTF-8 encoding for VASP input files with zopen, and open for other text files #4218

Merged
merged 28 commits into from
Jan 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
597ab65
explicit utf-8 encoding for kpoints from file
DanielYang59 Dec 6, 2024
5e41f1a
explicit utf-8 elsewhere
DanielYang59 Dec 6, 2024
1767195
fix root level and dev_scripts
DanielYang59 Dec 6, 2024
f575e74
simplify PMG PKG path
DanielYang59 Dec 6, 2024
052e949
fix analysis, cli, command_line
DanielYang59 Dec 6, 2024
9d09765
fix electronic_structure, entries and ext
DanielYang59 Dec 6, 2024
3f7b180
fix io, phonon and symmetry
DanielYang59 Dec 6, 2024
bd90e90
fix alchemy and anlysis tests
DanielYang59 Dec 6, 2024
5b8ced4
fix apps, command_line, core, elec_struct, entries, ext and vis tests
DanielYang59 Dec 6, 2024
b8d3b75
finish io and phonon tests
DanielYang59 Dec 6, 2024
c54d772
remove unnecessary seek
DanielYang59 Dec 6, 2024
bea91bd
revert encoding for json dump
DanielYang59 Dec 6, 2024
e58a4ed
type custom paths
DanielYang59 Dec 6, 2024
8a0490c
revert another json dump
DanielYang59 Dec 6, 2024
0d9de77
ignore userwarning by default
DanielYang59 Dec 7, 2024
5af79f7
relocate test-only env var
DanielYang59 Dec 7, 2024
308597a
remove unneeded default tag for non-userwarning
DanielYang59 Dec 7, 2024
1cd1aac
also explicit utf-8 for json dump though forced ASCII
DanielYang59 Dec 7, 2024
4206b7d
utf8 is alias to utf-8 in codecs, but maybe prefer the standard name
DanielYang59 Dec 8, 2024
6a90d2d
fix missing encoding in comment
DanielYang59 Dec 10, 2024
436356f
add test for Γ decoding
DanielYang59 Dec 10, 2024
2608e8a
better error message
DanielYang59 Dec 10, 2024
9259f13
Merge branch 'master' into kpoints-encoding
shyuep Dec 11, 2024
ff46384
Merge branch 'master' into kpoints-encoding
DanielYang59 Dec 11, 2024
59148a0
Merge branch 'master' into kpoints-encoding
DanielYang59 Dec 11, 2024
140b8b1
Merge branch 'master' into kpoints-encoding
shyuep Dec 12, 2024
25e5a38
Merge remote-tracking branch 'upstream/master' into kpoints-encoding
DanielYang59 Jan 2, 2025
fa5aaf8
Merge branch 'master' into kpoints-encoding
mkhorton Jan 9, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,10 @@ jobs:
split: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

runs-on: ${{ matrix.config.os }}

env:
MPLBACKEND: Agg # non-interactive backend for matplotlib
PMG_MAPI_KEY: ${{ secrets.PMG_MAPI_KEY }}
PYTHONWARNDEFAULTENCODING: "true" # PEP 597: Enable optional EncodingWarning

steps:
- name: Check out repo
uses: actions/checkout@v4
Expand Down Expand Up @@ -107,6 +105,8 @@ jobs:

- name: pytest split ${{ matrix.split }}
env:
MPLBACKEND: Agg # non-interactive backend for matplotlib
PMG_MAPI_KEY: ${{ secrets.PMG_MAPI_KEY }}
PMG_TEST_FILES_DIR: "${{ github.workspace }}/tests/files"
run: |
micromamba activate pmg
Expand Down
2 changes: 1 addition & 1 deletion dev_scripts/chemenv/explicit_permutations.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,5 +93,5 @@ class Algo:
cg._algorithms = [ExplicitPermutationsAlgorithm(permutations=explicit_permutations)]
new_geom_dir = "new_geometry_files"
os.makedirs(new_geom_dir, exist_ok=True)
with open(f"{new_geom_dir}/{cg_symbol}.json", mode="w") as file:
with open(f"{new_geom_dir}/{cg_symbol}.json", mode="w", encoding="utf-8") as file:
json.dump(cg.as_dict(), file)
Original file line number Diff line number Diff line change
Expand Up @@ -159,5 +159,5 @@
if test == "y":
cg._algorithms = new_algos
cg_dict = cg.as_dict()
with open(f"../coordination_geometries_files_new/{cg_symbol}.json", mode="w") as file:
with open(f"../coordination_geometries_files_new/{cg_symbol}.json", mode="w", encoding="utf-8") as file:
json.dump(cg_dict, file)
2 changes: 1 addition & 1 deletion dev_scripts/chemenv/get_plane_permutations_optimized.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,5 +444,5 @@ def random_permutations_iterator(initial_permutation, n_permutations):
if test == "y":
new_geom_dir = "new_geometry_files"
os.makedirs(new_geom_dir, exist_ok=True)
with open(f"{new_geom_dir}/{cg_symbol}.json", mode="w") as file:
with open(f"{new_geom_dir}/{cg_symbol}.json", mode="w", encoding="utf-8") as file:
json.dump(cg.as_dict(), file)
14 changes: 7 additions & 7 deletions dev_scripts/regen_libxcfunc.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,11 @@
from __future__ import annotations

import json
import os
import sys
from copy import deepcopy

from pymatgen.core import PKG_DIR


def parse_libxc_docs(path):
"""Parse libxc_docs.txt file, return dictionary {libxc_id: info_dict}."""
Expand All @@ -27,7 +28,7 @@ def parse_section(section):
return int(dct["Number"]), dct

dct = {}
with open(path) as file:
with open(path, encoding="utf-8") as file:
section = []
for line in file:
if not line.startswith("-"):
Expand Down Expand Up @@ -62,7 +63,7 @@ def write_libxc_docs_json(xc_funcs, json_path):
if desc is not None:
xc_funcs[num][opt] = desc

with open(json_path, "w") as fh:
with open(json_path, "w", encoding="utf-8") as fh:
json.dump(xc_funcs, fh)

return xc_funcs
Expand All @@ -85,8 +86,7 @@ def main():
xc_funcs = parse_libxc_docs(path)

# Generate new JSON file in pycore
pmg_core = os.path.abspath("../pymatgen/core/")
json_path = f"{pmg_core}/libxc_docs.json"
json_path = f"{PKG_DIR}/core/libxc_docs.json"
write_libxc_docs_json(xc_funcs, json_path)

# Build new enum list.
Expand All @@ -99,8 +99,8 @@ def main():

# Re-generate enumerations.
# [0] read py module.
xc_funcpy_path = f"{pmg_core}/libxcfunc.py"
with open(xc_funcpy_path) as file:
xc_funcpy_path = f"{PKG_DIR}/core/libxcfunc.py"
with open(xc_funcpy_path, encoding="utf-8") as file:
lines = file.readlines()

# [1] insert new enum values in list
Expand Down
36 changes: 18 additions & 18 deletions dev_scripts/update_pt_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from monty.serialization import dumpfn, loadfn
from ruamel import yaml

from pymatgen.core import Element, get_el_sp
from pymatgen.core import PKG_DIR, Element, get_el_sp

try:
from bs4 import BeautifulSoup
Expand All @@ -25,7 +25,7 @@

def parse_oxi_state():
data = loadfn(PTABLE_YAML_PATH)
with open("oxidation_states.txt") as file:
with open("oxidation_states.txt", encoding="utf-8") as file:
oxi_data = file.read()
oxi_data = re.sub("[\n\r]", "", oxi_data)
patt = re.compile("<tr>(.*?)</tr>", re.MULTILINE)
Expand Down Expand Up @@ -57,13 +57,13 @@ def parse_oxi_state():
data[el]["Common oxidation states"] = common_oxi
else:
print(el)
with open("periodic_table2.yaml", mode="w") as file:
with open("periodic_table2.yaml", mode="w", encoding="utf-8") as file:
yaml.dump(data, file)


def parse_ionic_radii():
data = loadfn(PTABLE_YAML_PATH)
with open("ionic_radii.csv") as file:
with open("ionic_radii.csv", encoding="utf-8") as file:
radii_data = file.read()
radii_data = radii_data.split("\r")
header = radii_data[0].split(",")
Expand All @@ -87,13 +87,13 @@ def parse_ionic_radii():
data[el]["Ionic_radii"] = ionic_radii
else:
print(el)
with open("periodic_table2.yaml", mode="w") as file:
with open("periodic_table2.yaml", mode="w", encoding="utf-8") as file:
yaml.dump(data, file)


def parse_radii():
data = loadfn(PTABLE_YAML_PATH)
with open("radii.csv") as file:
with open("radii.csv", encoding="utf-8") as file:
radii_data = file.read()
radii_data = radii_data.split("\r")

Expand Down Expand Up @@ -121,9 +121,9 @@ def parse_radii():
data[el]["Van der waals radius"] = vdw_radii
else:
print(el)
with open("periodic_table2.yaml", mode="w") as file:
with open("periodic_table2.yaml", mode="w", encoding="utf-8") as file:
yaml.dump(data, file)
with open("../pymatgen/core/periodic_table.json", mode="w") as file:
with open(f"{PKG_DIR}/core/periodic_table.json", mode="w", encoding="utf-8") as file:
json.dump(data, file)
DanielYang59 marked this conversation as resolved.
Show resolved Hide resolved


Expand All @@ -140,9 +140,9 @@ def update_ionic_radii():
if "Ionic_radii_ls" in dct:
dct["Ionic radii ls"] = {k: v / 100 for k, v in dct["Ionic_radii_ls"].items()}
del dct["Ionic_radii_ls"]
with open("periodic_table2.yaml", mode="w") as file:
with open("periodic_table2.yaml", mode="w", encoding="utf-8") as file:
yaml.dump(data, file)
with open("../pymatgen/core/periodic_table.json", mode="w") as file:
with open(f"{PKG_DIR}/core/periodic_table.json", mode="w", encoding="utf-8") as file:
json.dump(data, file)


Expand Down Expand Up @@ -180,19 +180,19 @@ def parse_shannon_radii():
data[el]["Shannon radii"] = dict(radii[el])

dumpfn(data, PTABLE_YAML_PATH)
with open("../pymatgen/core/periodic_table.json", mode="w") as file:
with open(f"{PKG_DIR}/core/periodic_table.json", mode="w", encoding="utf-8") as file:
json.dump(data, file)


def gen_periodic_table():
data = loadfn(PTABLE_YAML_PATH)

with open("../pymatgen/core/periodic_table.json", mode="w") as file:
with open(f"{PKG_DIR}/core/periodic_table.json", mode="w", encoding="utf-8") as file:
json.dump(data, file)


def gen_iupac_ordering():
periodic_table = loadfn("../pymatgen/core/periodic_table.json")
periodic_table = loadfn(f"{PKG_DIR}/core/periodic_table.json")
order = [
([18], range(6, 0, -1)), # noble gasses
([1], range(7, 1, -1)), # alkali metals
Expand Down Expand Up @@ -274,16 +274,16 @@ def add_electron_affinities():
missing_electron_affinities = set(range(1, 93)) - Z_set
raise ValueError(f"{missing_electron_affinities=}")
print(element_electron_affinities)
pt = loadfn("../pymatgen/core/periodic_table.json")
pt = loadfn(f"{PKG_DIR}/core/periodic_table.json")
for key, val in pt.items():
val["Electron affinity"] = element_electron_affinities.get(Element(key).long_name)
dumpfn(pt, "../pymatgen/core/periodic_table.json")
dumpfn(pt, f"{PKG_DIR}/core/periodic_table.json")


def add_ionization_energies():
"""Update the periodic table data file with ground level and ionization energies from NIST."""

with open("NIST Atomic Ionization Energies Output.html") as file:
with open("NIST Atomic Ionization Energies Output.html", encoding="utf-8") as file:
soup = BeautifulSoup(file.read(), "html.parser")
table = None
for table in soup.find_all("table"):
Expand All @@ -302,11 +302,11 @@ def add_ionization_energies():
if not set(data).issuperset(range(1, 93)):
raise RuntimeError("Failed to get data up to Uranium")

pt = loadfn("../pymatgen/core/periodic_table.json")
pt = loadfn(f"{PKG_DIR}/core/periodic_table.json")
for key, val in pt.items():
del val["Ionization energy"]
val["Ionization energies"] = data.get(Element(key).long_name, [])
dumpfn(pt, "../pymatgen/core/periodic_table.json")
dumpfn(pt, f"{PKG_DIR}/core/periodic_table.json")


if __name__ == "__main__":
Expand Down
4 changes: 2 additions & 2 deletions docs/usage.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -861,18 +861,18 @@ def __init__(self, permutations_safe_override=False, only_symbols=None):
dict.__init__(self)
self.cg_list: list[CoordinationGeometry] = []
if only_symbols is None:
with open(f"{MODULE_DIR}/coordination_geometries_files/allcg.txt") as file:
with open(f"{MODULE_DIR}/coordination_geometries_files/allcg.txt", encoding="utf-8") as file:
data = file.readlines()
for line in data:
cg_file = f"{MODULE_DIR}/{line.strip()}"
with open(cg_file) as file:
with open(cg_file, encoding="utf-8") as file:
dd = json.load(file)
self.cg_list.append(CoordinationGeometry.from_dict(dd))
else:
for symbol in only_symbols:
fsymbol = symbol.replace(":", "#")
cg_file = f"{MODULE_DIR}/coordination_geometries_files/{fsymbol}.json"
with open(cg_file) as file:
with open(cg_file, encoding="utf-8") as file:
dd = json.load(file)
self.cg_list.append(CoordinationGeometry.from_dict(dd))

Expand Down
4 changes: 2 additions & 2 deletions src/pymatgen/analysis/chemenv/utils/chemenv_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def save(self, root_dir=None):
if test != "Y":
print("Configuration not saved")
return config_file
with open(config_file, mode="w") as file:
with open(config_file, mode="w", encoding="utf-8") as file:
json.dump(config_dict, file)
print("Configuration saved")
return config_file
Expand All @@ -171,7 +171,7 @@ def auto_load(cls, root_dir=None):
root_dir = f"{home}/.chemenv"
config_file = f"{root_dir}/config.json"
try:
with open(config_file) as file:
with open(config_file, encoding="utf-8") as file:
config_dict = json.load(file)
return ChemEnvConfig(package_options=config_dict["package_options"])

Expand Down
4 changes: 2 additions & 2 deletions src/pymatgen/analysis/chempot_diagram.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
from __future__ import annotations

import json
import os
import warnings
from functools import lru_cache
from itertools import groupby
Expand All @@ -36,6 +35,7 @@
from scipy.spatial import ConvexHull, HalfspaceIntersection

from pymatgen.analysis.phase_diagram import PDEntry, PhaseDiagram
from pymatgen.core import PKG_DIR
from pymatgen.core.composition import Composition, Element
from pymatgen.util.coord import Simplex
from pymatgen.util.due import Doi, due
Expand All @@ -44,7 +44,7 @@
if TYPE_CHECKING:
from pymatgen.entries.computed_entries import ComputedEntry

with open(f"{os.path.dirname(__file__)}/../util/plotly_chempot_layouts.json") as file:
with open(f"{PKG_DIR}/util/plotly_chempot_layouts.json", encoding="utf-8") as file:
plotly_layouts = json.load(file)


Expand Down
2 changes: 1 addition & 1 deletion src/pymatgen/analysis/cost.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def __init__(self, filename):
# read in data from file
self._chemsys_entries = defaultdict(list)
filename = os.path.join(os.path.dirname(__file__), filename)
with open(filename) as file:
with open(filename, encoding="utf-8") as file:
reader = csv.reader(file, quotechar="|")
for row in reader:
comp = Composition(row[0])
Expand Down
4 changes: 2 additions & 2 deletions src/pymatgen/analysis/graphs.py
Original file line number Diff line number Diff line change
Expand Up @@ -975,7 +975,7 @@ def draw_graph_to_file(

write_dot(g, f"{basename}.dot")

with open(filename, mode="w") as file:
with open(filename, mode="w", encoding="utf-8") as file:
args = [algo, "-T", extension, f"{basename}.dot"]
with subprocess.Popen(args, stdout=file, stdin=subprocess.PIPE, close_fds=True) as rs:
rs.communicate()
Expand Down Expand Up @@ -2644,7 +2644,7 @@ def draw_graph_to_file(

write_dot(g, f"{basename}.dot")

with open(filename, mode="w") as file:
with open(filename, mode="w", encoding="utf-8") as file:
args = [algo, "-T", extension, f"{basename}.dot"]
with subprocess.Popen(args, stdout=file, stdin=subprocess.PIPE, close_fds=True) as rs:
rs.communicate()
Expand Down
2 changes: 1 addition & 1 deletion src/pymatgen/analysis/hhi.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def __init__(self):
"""Init for HHIModel."""
self.symbol_hhip_hhir = {} # symbol->(HHI_production, HHI reserve)

with open(HHI_CSV_PATH) as file:
with open(HHI_CSV_PATH, encoding="utf-8") as file:
for line in file:
if line[0] != "#":
symbol, hhi_production, hhi_reserve = line.split(",")
Expand Down
4 changes: 2 additions & 2 deletions src/pymatgen/analysis/interface_reactions.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from __future__ import annotations

import json
import os
import warnings
from typing import TYPE_CHECKING

Expand All @@ -18,6 +17,7 @@

from pymatgen.analysis.phase_diagram import GrandPotentialPhaseDiagram, PhaseDiagram
from pymatgen.analysis.reaction_calculator import Reaction
from pymatgen.core import PKG_DIR
from pymatgen.core.composition import Composition
from pymatgen.util.due import Doi, due
from pymatgen.util.plotting import pretty_plot
Expand All @@ -31,7 +31,7 @@
__email__ = "[email protected]"
__date__ = "Sep 1, 2021"

with open(os.path.join(os.path.dirname(__file__), "..", "util", "plotly_interface_rxn_layouts.json")) as file:
with open(f"{PKG_DIR}/util/plotly_interface_rxn_layouts.json", encoding="utf-8") as file:
plotly_layouts = json.load(file)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def __init__(self, lambda_table=None, alpha=-5):
else:
module_dir = os.path.dirname(__file__)
json_file = f"{module_dir}/data/lambda.json"
with open(json_file) as file:
with open(json_file, encoding="utf-8") as file:
self._lambda_table = json.load(file)

# build map of specie pairs to lambdas
Expand Down
2 changes: 1 addition & 1 deletion src/pymatgen/cli/pmg_potcar.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def gen_potcar(dirname, filename):
"""
if filename == "POTCAR.spec":
fullpath = os.path.join(dirname, filename)
with open(fullpath) as file:
with open(fullpath, encoding="utf-8") as file:
elements = file.readlines()
symbols = [el.strip() for el in elements if el.strip() != ""]
potcar = Potcar(symbols)
Expand Down
Loading
Loading