Add ruff and fix initial issues (#159)

Migrate to ruff with many linters enabled. Remaining linters to be sorted in later PRs. Fixes #158
oerc0122 · Sep 4, 2024 · f42bfbf · f42bfbf
1 parent d5d2830
commit f42bfbf
Show file tree

Hide file tree

Showing 12 changed files with 104 additions and 47 deletions.
diff --git a/.github/workflows/pylint.yml → .github/workflows/ruff.yml b/.github/workflows/pylint.yml → .github/workflows/ruff.yml
@@ -1,4 +1,4 @@
-name: Pylint
+name: Ruff
 
 on: [push]
 
@@ -17,7 +17,7 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install pylint
-    - name: Analysing the code with pylint
+        pip install ruff
+    - name: Analysing the code with Ruff
       run: |
-        pylint -d fixme $(git ls-files 'castep_outputs/*.py')
+        ruff check castep_outputs
diff --git a/castep_outputs/__init__.py b/castep_outputs/__init__.py
@@ -7,5 +7,5 @@
 
 # pylint: disable=unused-import
 
-from .parsers import *
-from .cli.castep_outputs_main import parse_single
+from .parsers import *  # noqa: F403
+from .cli.castep_outputs_main import parse_single  # noqa: F401
diff --git a/castep_outputs/cli/args.py b/castep_outputs/cli/args.py
@@ -16,22 +16,25 @@
     prog="castep_outputs",
     description=f"""Attempts to find all files for seedname, filtered by `inc` args (default: all).
     Explicit files can be passed using longname arguments.
-    castep_outputs can parse most human-readable castep outputs including: {', '.join(CASTEP_FILE_FORMATS)}"""
+    castep_outputs can parse most castep outputs including: {', '.join(CASTEP_FILE_FORMATS)}""",
 )
 
 AP.add_argument("seedname", nargs=argparse.REMAINDER, help="Seed name for data")
 AP.add_argument("-V", "--version", action="version", version="%(prog)s v0.1")
 AP.add_argument("-L", "--log", help="Verbose output",
                 choices=('DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'), default="WARNING")
 AP.add_argument("-o", "--output", help="File to write output, default: screen", default=None)
-AP.add_argument("-f", "--out-format", help="Output format", choices=SUPPORTED_FORMATS, default="json")
+AP.add_argument("-f", "--out-format",
+                help="Output format", choices=SUPPORTED_FORMATS, default="json")
 
-AP.add_argument("-t", "--testing", action="store_true", help="Set testing mode to produce flat outputs")
+AP.add_argument("-t", "--testing",
+                action="store_true", help="Set testing mode to produce flat outputs")
 
 AP.add_argument("-A", "--inc-all", action="store_true", help="Extract all available information")
 
 for output_name in CASTEP_OUTPUT_NAMES:
-    AP.add_argument(f"--inc-{output_name}", action="store_true", help=f"Extract .{output_name} information")
+    AP.add_argument(f"--inc-{output_name}",
+                    action="store_true", help=f"Extract .{output_name} information")
 
 for output_name in CASTEP_OUTPUT_NAMES:
     AP.add_argument(f"--{output_name}", nargs="*",

diff --git a/castep_outputs/cli/castep_outputs_main.py b/castep_outputs/cli/castep_outputs_main.py
@@ -4,6 +4,7 @@
 import io
 import logging
 import sys
+from collections.abc import Sequence
 from pathlib import Path
 from typing import Any, Callable, Dict, List, Optional, TextIO, Union
 
@@ -49,7 +50,7 @@ def parse_single(in_file: Union[str, Path, TextIO],
         data = normalise(data, {tuple: list})
 
     if testing:
-        if isinstance(data, list):
+        if isinstance(data, Sequence):
             data = [flatten_dict(run) for run in data]
         else:
             data = flatten_dict(data)
@@ -77,7 +78,7 @@ def parse_all(output: Optional[Path] = None, out_format: OutFormats = "json",
     elif isinstance(output, io.TextIOBase):
         file_dumper(data, output)
     else:
-        with open(output, 'a+', encoding='utf-8') as out_file:
+        with output.open('a+', encoding='utf-8') as out_file:
             file_dumper(data, out_file)
 
 

diff --git a/castep_outputs/parsers/__init__.py b/castep_outputs/parsers/__init__.py
@@ -63,7 +63,7 @@
     "magres": parse_magres_file,
     "tddft": parse_tddft_file,
     "err": parse_err_file,
-    "phonon": parse_phonon_file
+    "phonon": parse_phonon_file,
     }
 CASTEP_OUTPUT_NAMES: Tuple[str, ...] = tuple(PARSERS.keys())
 CASTEP_FILE_FORMATS: Tuple[str, ...] = tuple(f".{typ}" for typ in CASTEP_OUTPUT_NAMES)
diff --git a/castep_outputs/parsers/bands_file_parser.py b/castep_outputs/parsers/bands_file_parser.py
@@ -30,7 +30,7 @@ def parse_bands_file(bands_file: TextIO) -> Dict[str, Any]:
                                        'spin_comp': int,
                                        'band': float,
                                        'band_up': float,
-                                       'band_dn': float
+                                       'band_dn': float,
                                        })
                 bands_info['bands'].append(qdata)
             _, _, *qpt, weight = line.split()
@@ -55,7 +55,7 @@ def parse_bands_file(bands_file: TextIO) -> Dict[str, Any]:
                                'spin_comp': int,
                                'band': float,
                                'band_up': float,
-                               'band_dn': float
+                               'band_dn': float,
                                })
         bands_info['bands'].append(qdata)
 

diff --git a/castep_outputs/parsers/castep_file_parser.py b/castep_outputs/parsers/castep_file_parser.py
@@ -180,7 +180,8 @@ def parse_castep_file(castep_file_in: TextIO,
 
             logger("Found warning")
 
-            curr_run["warning"].append(" ".join(map(lambda x: x.strip(), block[1:-1])))
+            block.remove_bounds(1, 1)
+            curr_run["warning"].append(" ".join(x.strip() for x in block))
 
         elif match := re.match(r"(?:\s*[^:]+:)?(\s*)warning", line, re.IGNORECASE):
 
@@ -1016,7 +1017,7 @@ def parse_castep_file(castep_file_in: TextIO,
                 continue
 
             if not (match := re.search(REs.MINIMISERS_RE, line)):
-                raise IOError("Invalid Geom block")
+                raise OSError("Invalid Geom block")
 
             typ = match.group(0)
 
@@ -1159,7 +1160,7 @@ def parse_castep_file(castep_file_in: TextIO,
         elif block := Block.from_re(line, castep_file, "Contribution ::", REs.EMPTY):
 
             if not (match := re.match("(?P<type>.* Contribution)", line)):
-                raise IOError("Invalid elastic block")
+                raise ValueError("Invalid elastic block")
 
             typ = match.group("type")
             next(block)
@@ -1335,7 +1336,7 @@ def parse_castep_file(castep_file_in: TextIO,
 
 def _process_ps_energy(block: Block) -> Tuple[str, PSPotEnergy]:
     if not (match := REs.PS_SHELL_RE.search(next(block))):
-        raise IOError("Invalid PS Energy")
+        raise ValueError("Invalid PS Energy")
 
     key = match["spec"]
     accum: PSPotEnergy = defaultdict(list)
@@ -1596,7 +1597,7 @@ def _process_scf(block: Block) -> List[SCFReport]:
 
 def _process_forces(block: Block) -> Tuple[str, AtomPropBlock]:
     if not (ft_guess := REs.FORCES_BLOCK_RE.search(next(block))):
-        raise IOError("Invalid forces block")
+        raise ValueError("Invalid forces block")
     ftype = ft_guess.group(1) if ft_guess.group(1) else "non_descript"
     ftype = normalise_key(ftype)
 
@@ -1609,7 +1610,7 @@ def _process_forces(block: Block) -> Tuple[str, AtomPropBlock]:
 
 def _process_stresses(block: Block) -> Tuple[str, SixVector]:
     if not (ft_guess := REs.STRESSES_BLOCK_RE.search(next(block))):
-        raise IOError("Invalid stresses block")
+        raise ValueError("Invalid stresses block")
     ftype = ft_guess.group(1) if ft_guess.group(1) else "non_descript"
     ftype = normalise_key(ftype)
 
@@ -1708,7 +1709,7 @@ def _process_mulliken(block: Block) -> Dict[AtomIndex, MullikenInfo]:
                             replace=True)
                 line = next(block)
                 if not (match := REs.POPN_RE_DN.match(line)):
-                    raise IOError("Invalid mulliken down spin")
+                    raise ValueError("Invalid mulliken down spin")
                 val = match.groupdict()
 
                 add_aliases(val,
@@ -1771,7 +1772,7 @@ def _process_qdata(qdata: Dict[str, Union[str, List[str]]]) -> QData:
                     "N": int,
                     "frequency": float,
                     "intensity": float,
-                    "raman_intensity": float
+                    "raman_intensity": float,
                     })
     return cast(QData, qdata)
 
@@ -1943,7 +1944,7 @@ def _process_dynamical_matrix(block: Block) -> Tuple[Tuple[complex, ...], ...]:
 
 def _process_pspot_string(string: str, debug=False) -> PSPotStrInfo:
     if not (match := REs.PSPOT_RE.search(string)):
-        raise IOError(f"Attempt to parse {string} as PSPot failed")
+        raise ValueError(f"Attempt to parse {string} as PSPot failed")
 
     pspot = match.groupdict()
     projectors = []
@@ -1952,7 +1953,7 @@ def _process_pspot_string(string: str, debug=False) -> PSPotStrInfo:
         if match := REs.PSPOT_PROJ_RE.match(proj):
             pdict = dict(zip(REs.PSPOT_PROJ_GROUPS, match.groups()))
         else:
-            raise IOError("Invalid PSPot string")
+            raise ValueError("Invalid PSPot string")
 
         pdict["shell"] = SHELLS[int(pdict["shell"])]
 
@@ -2322,7 +2323,7 @@ def _process_elastic_properties(block: Block) -> ElasticProperties:
             accum["speed_of_sound"] = cast(ThreeByThreeMatrix,
                                            tuple(to_type(numbers, float)
                                                  for blk_line in blk
-                                                 if (numbers := get_numbers(blk_line)))
+                                                 if (numbers := get_numbers(blk_line))),
                                            )
 
     return accum
diff --git a/castep_outputs/parsers/parse_utilities.py b/castep_outputs/parsers/parse_utilities.py
@@ -15,7 +15,7 @@
 
 
 def parse_regular_header(block: Block,
-                         extra_opts: Sequence[str] = tuple()) -> Dict[str, Union[float, int]]:
+                         extra_opts: Sequence[str] = ()) -> Dict[str, Union[float, int]]:
     """ Parse (semi-)standard castep file header block (given as iterable over lines) """
 
     data = {}

diff --git a/castep_outputs/utilities/castep_res.py b/castep_outputs/utilities/castep_res.py
@@ -73,7 +73,7 @@ def get_atom_parts(spec: str) -> Dict[str, str]:
 SPECIES_RE = r"[A-Z][a-z]{0,2}"
 ATOM_NAME_RE = rf"\b{SPECIES_RE}(?::\w+)?\b(?:\s*\[[^\]]+\])?"
 ATOM_NAME_GRP_RE = re.compile(
-    rf"(?P<species>{SPECIES_RE})(?::(?P<tag>\w+))?\b(?:\s*\[(?P<label>[^\]]*)\])?"
+    rf"(?P<species>{SPECIES_RE})(?::(?P<tag>\w+))?\b(?:\s*\[(?P<label>[^\]]*)\])?",
     )
 
 
@@ -171,8 +171,6 @@ def get_atom_parts(spec: str) -> Dict[str, str]:
     $
 """, re.VERBOSE)
 
-#
-
 # Forces block
 FORCES_BLOCK_RE = re.compile(gen_table_re("([a-zA-Z ]*)Forces", r"\*+"), re.IGNORECASE)
 # Stresses block
@@ -189,11 +187,11 @@ def get_atom_parts(spec: str) -> Dict[str, str]:
 # Pair pot
 PAIR_POT_RES = {
     'two_body_one_spec': re.compile(
-        rf"^(?P<tag>\w+)?\s*\*\s*(?P<spec>{ATOM_NAME_RE})\s*\*\s*$"
+        rf"^(?P<tag>\w+)?\s*\*\s*(?P<spec>{ATOM_NAME_RE})\s*\*\s*$",
     ),
     'two_body_spec':  re.compile(
         rf"(?P<spec1>{ATOM_NAME_RE})\s*-\s*"
-        rf"(?P<spec2>{ATOM_NAME_RE})"
+        rf"(?P<spec2>{ATOM_NAME_RE})",
     ),
     'two_body_val': re.compile(
         rf"""
@@ -202,13 +200,13 @@ def get_atom_parts(spec: str) -> Dict[str, str]:
             {labelled_floats(('params',), counts=('1,4',))}\s*
             [\w^/*]+\s* \* \s*
             <--\s*(?P<type>\w+)
-            """, re.ASCII | re.VERBOSE
+            """, re.ASCII | re.VERBOSE,
     ),
     'three_body_spec': re.compile(
         rf"""
         ^(?P<tag>\w+)?\s*\*\s*
         (?P<spec>(?:{ATOM_NAME_RE}\s*){{3}})
-        \s*\*\s*$""", re.VERBOSE
+        \s*\*\s*$""", re.VERBOSE,
     ),
     'three_body_val': re.compile(
         rf"""
@@ -217,8 +215,8 @@ def get_atom_parts(spec: str) -> Dict[str, str]:
         {labelled_floats(('params',))}\s*
         [\w^/*]+\s* \* \s*
         <--\s*(?P<type>\w+)
-        """, re.VERBOSE
-    )
+        """, re.VERBOSE,
+    ),
 }
 
 # Orbital population
@@ -273,7 +271,7 @@ def get_atom_parts(spec: str) -> Dict[str, str]:
                             ATREG + r"\s*" +
                             labelled_floats(("displacement",), counts=(6,)))
 
-MINIMISERS_RE = f"(?:{'|'.join(map(lambda x: x.upper(), MINIMISERS))})"
+MINIMISERS_RE = f"(?:{'|'.join(x.upper() for x in MINIMISERS)})"
 GEOMOPT_MIN_TABLE_RE = re.compile(
     r"\s*\|\s* (?P<step>[^|]+)" +
     labelled_floats(("lambda", "fdelta", "enthalpy"), sep=r"\s*\|\s*") +
@@ -288,11 +286,11 @@ def get_atom_parts(spec: str) -> Dict[str, str]:
 # Regexp to identify Mulliken ppoulation analysis line
 POPN_RE = re.compile(rf"\s*{ATREG}\s*(?P<spin_sep>up:)?" +
                      labelled_floats((*SHELLS, "total", "charge", "spin")) +
-                     "?"   # Spin is optional
+                     "?",   # Spin is optional
                      )
 
 POPN_RE_DN = re.compile(r"\s+\d+\s*dn:" +
-                        labelled_floats((*SHELLS, "total"))
+                        labelled_floats((*SHELLS, "total")),
                         )
 
 # Regexp for born charges
@@ -313,7 +311,7 @@ def get_atom_parts(spec: str) -> Dict[str, str]:
     # "(?:I|Ani)sotropic J-coupling" 3
     re.compile(rf"\s*\|\**\s*{ATREG}{labelled_floats(('fc','sd','para','dia','tot'))}\s*\|\s*"),
     # "Hyperfine Tensor" 4
-    re.compile(rf"\s*\|\s*{ATREG}{labelled_floats(('iso',))}\s*\|\s*")
+    re.compile(rf"\s*\|\s*{ATREG}{labelled_floats(('iso',))}\s*\|\s*"),
 )
 
 # MagRes Tasks
@@ -322,7 +320,7 @@ def get_atom_parts(spec: str) -> Dict[str, str]:
     "Chemical Shielding and Electric Field Gradient",
     "Electric Field Gradient",
     "(An)Isotropic J-coupling",
-    "Hyperfine"
+    "Hyperfine",
 )
 
 # Regexp to identify block in .phonon or .phonon_dos file

diff --git a/castep_outputs/utilities/datatypes.py b/castep_outputs/utilities/datatypes.py
@@ -220,7 +220,7 @@ class PSPotTableInfo(TypedDict, total=False):
     beta: Union[float, str]
     e: float
     j: int
-    l: int
+    l: int  # noqa: E741
     norm: int
     scheme: Literal["2b", "es", "ev", "fh", "pn", "pv", "qb", "qc", "tm"]
 

diff --git a/castep_outputs/utilities/filewrapper.py b/castep_outputs/utilities/filewrapper.py
@@ -30,7 +30,7 @@ def __next__(self):
         self._pos = self.file.tell()
         nextline = self.file.readline()
         if not nextline:
-            raise StopIteration()
+            raise StopIteration
         return nextline
 
     def rewind(self):
@@ -141,8 +141,8 @@ def get_lines(
         else:
             if not eof_possible:
                 if hasattr(in_file, 'name'):
-                    raise IOError(f"Unexpected end of file in {in_file.name}.")
-                raise IOError("Unexpected end of file.")
+                    raise OSError(f"Unexpected end of file in {in_file.name}.")
+                raise OSError("Unexpected end of file.")
 
         block._data = tuple(data)
         return block
@@ -210,8 +210,8 @@ def from_re(
         else:
             if not eof_possible:
                 if hasattr(in_file, 'name'):
-                    raise IOError(f"Unexpected end of file in {in_file.name}.")
-                raise IOError("Unexpected end of file.")
+                    raise OSError(f"Unexpected end of file in {in_file.name}.")
+                raise OSError("Unexpected end of file.")
 
         block._data = tuple(data)
         return block