From c3598908506b5cfb28b707204c315bf4d1626852 Mon Sep 17 00:00:00 2001 From: Chris Adams Date: Tue, 15 Oct 2024 16:35:23 -0400 Subject: [PATCH] Use Ruff + pre-commit for linting & formatting --- .github/workflows/test.yml | 8 +++++ .pre-commit-config.yaml | 36 +++++++++++++++++++ README.rst | 4 +-- bagit.py | 73 +++++++++++++++++++------------------- bench.py | 2 +- pyproject.toml | 1 - test.py | 2 +- 7 files changed, 84 insertions(+), 42 deletions(-) create mode 100644 .pre-commit-config.yaml diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 9185bec..b37ba86 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -7,7 +7,15 @@ on: branches: [master] jobs: + ruff: # https://docs.astral.sh/ruff + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - run: pip install --user ruff + - run: ruff check --output-format=github + test: + needs: ruff runs-on: ubuntu-latest strategy: fail-fast: false diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..9f1dc32 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,36 @@ +exclude: ".*test-data.*" + +repos: + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.6.9 + hooks: + - id: ruff + args: [--fix, --exit-non-zero-on-fix] + - id: ruff-format + + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 + hooks: + - id: check-added-large-files + args: ["--maxkb=128"] + - id: check-ast + - id: check-byte-order-marker + - id: check-case-conflict + - id: check-docstring-first + - id: check-executables-have-shebangs + - id: check-json + - id: check-merge-conflict + - id: check-symlinks + - id: check-xml + - id: check-yaml + args: ["--unsafe"] + - id: debug-statements + - id: detect-aws-credentials + args: ["--allow-missing-credentials"] + - id: detect-private-key + - id: end-of-file-fixer + - id: mixed-line-ending + args: ["--fix=lf"] + - id: trailing-whitespace + - id: pretty-format-json + args: ["--autofix", "--no-sort-keys", "--indent=4"] diff --git a/README.rst b/README.rst index d134c4b..c109c83 100644 --- a/README.rst +++ b/README.rst @@ -226,11 +226,11 @@ Contributing to bagit-python development Running the tests ~~~~~~~~~~~~~~~~~ -You can quickly run the tests by having setuptools install dependencies: +You can quickly run the tests using the built-in unittest framework: :: - python setup.py test + python -m unittest discover If you have Docker installed, you can run the tests under Linux inside a container: diff --git a/bagit.py b/bagit.py index 458fba8..944bf94 100755 --- a/bagit.py +++ b/bagit.py @@ -140,7 +140,7 @@ def find_locale_dir(): open_text_file = partial(codecs.open, encoding="utf-8", errors="strict") # This is the same as decoding the byte values in codecs.BOM: -UNICODE_BYTE_ORDER_MARK = "\uFEFF" +UNICODE_BYTE_ORDER_MARK = "\ufeff" def make_bag( @@ -422,8 +422,8 @@ def compare_manifests_with_fs(self): def compare_fetch_with_fs(self): """Compares the fetch entries with the files actually - in the payload, and returns a list of all the files - that still need to be fetched. + in the payload, and returns a list of all the files + that still need to be fetched. """ files_on_fs = set(self.payload_files()) @@ -449,7 +449,7 @@ def payload_files(self): yield rel_path def payload_entries(self): - """Return a dictionary of items """ + """Return a dictionary of items""" # Don't use dict comprehension (compatibility with Python < 2.7) return dict( (key, value) @@ -618,7 +618,9 @@ def is_valid(self, processes=1, fast=False, completeness_only=False): """ try: - self.validate(processes=processes, fast=fast, completeness_only=completeness_only) + self.validate( + processes=processes, fast=fast, completeness_only=completeness_only + ) except BagError: return False @@ -776,7 +778,10 @@ def validate_fetch(self): # each parsed url must resolve to a scheme and point to a netloc # if the scheme is file, netloc is not necessary - if not (all((parsed_url.scheme, parsed_url.netloc)) or parsed_url.scheme == "file"): + if not ( + all((parsed_url.scheme, parsed_url.netloc)) + or parsed_url.scheme == "file" + ): raise BagError(_("Malformed URL in fetch.txt: %s") % url) def _validate_contents(self, processes=1, fast=False, completeness_only=False): @@ -851,11 +856,11 @@ def _validate_completeness(self): only_in_manifests, only_on_fs = self.compare_manifests_with_fs() for path in only_in_manifests: e = FileMissing(path) - LOGGER.warning(force_unicode(e)) + LOGGER.warning(str(e)) errors.append(e) for path in only_on_fs: e = UnexpectedFile(path) - LOGGER.warning(force_unicode(e)) + LOGGER.warning(str(e)) errors.append(e) if errors: @@ -906,7 +911,7 @@ def _validate_entries(self, processes): e = ChecksumMismatch( rel_path, alg, stored_hash.lower(), computed_hash ) - LOGGER.warning(force_unicode(e)) + LOGGER.warning(str(e)) errors.append(e) if errors: @@ -963,7 +968,7 @@ def __init__(self, message, details=None): def __str__(self): if len(self.details) > 0: - details = "; ".join([force_unicode(e) for e in self.details]) + details = "; ".join([str(e) for e in self.details]) return "%s: %s" % (self.message, details) return self.message @@ -988,7 +993,7 @@ def __str__(self): return _( '%(path)s %(algorithm)s validation failed: expected="%(expected)s" found="%(found)s"' ) % { - "path": force_unicode(self.path), + "path": str(self.path), "algorithm": self.algorithm, "expected": self.expected, "found": self.found, @@ -997,9 +1002,9 @@ def __str__(self): class FileMissing(ManifestErrorDetail): def __str__(self): - return _( - "%s exists in manifest but was not found on filesystem" - ) % force_unicode(self.path) + return _("%s exists in manifest but was not found on filesystem") % str( + self.path + ) class UnexpectedFile(ManifestErrorDetail): @@ -1138,7 +1143,7 @@ def _calc_hashes(args): try: f_hashes = _calculate_file_hashes(full_path, f_hashers) except BagValidationError as e: - f_hashes = dict((alg, force_unicode(e)) for alg in f_hashers.keys()) + f_hashes = dict((alg, str(e)) for alg in f_hashers.keys()) return rel_path, f_hashes, hashes @@ -1161,7 +1166,7 @@ def _calculate_file_hashes(full_path, f_hashers): except (OSError, IOError) as e: raise BagValidationError( _("Could not read %(filename)s: %(error)s") - % {"filename": full_path, "error": force_unicode(e)} + % {"filename": full_path, "error": str(e)} ) return dict((alg, h.hexdigest()) for alg, h in f_hashers.items()) @@ -1187,11 +1192,11 @@ def _load_tag_file(tag_file_name, encoding="utf-8-sig"): def _parse_tags(tag_file): """Parses a tag file, according to RFC 2822. This - includes line folding, permitting extra-long - field values. + includes line folding, permitting extra-long + field values. - See http://www.faqs.org/rfcs/rfc2822.html for - more information. + See http://www.faqs.org/rfcs/rfc2822.html for + more information. """ tag_name = None @@ -1237,7 +1242,7 @@ def _make_tag_file(bag_info_path, bag_info): values = [values] for txt in values: # strip CR, LF and CRLF so they don't mess up the tag file - txt = re.sub(r"\n|\r|(\r\n)", "", force_unicode(txt)) + txt = re.sub(r"\n|\r|(\r\n)", "", str(txt)) f.write("%s: %s\n" % (h, txt)) @@ -1433,19 +1438,6 @@ def _decode_filename(s): return s -def force_unicode_py2(s): - """Reliably return a Unicode string given a possible unicode or byte string""" - if isinstance(s, str): - return s.decode("utf-8") - else: - return unicode(s) - - -if sys.version_info > (3, 0): - force_unicode = str -else: - force_unicode = force_unicode_py2 - # following code is used for command line program @@ -1531,7 +1523,10 @@ def _make_parser(): metadata_args = parser.add_argument_group(_("Optional Bag Metadata")) for header in STANDARD_BAG_INFO_HEADERS: metadata_args.add_argument( - "--%s" % header.lower(), type=str, action=BagHeaderAction, default=argparse.SUPPRESS + "--%s" % header.lower(), + type=str, + action=BagHeaderAction, + default=argparse.SUPPRESS, ) parser.add_argument( @@ -1574,7 +1569,9 @@ def main(): parser.error(_("--fast is only allowed as an option for --validate!")) if args.completeness_only and not args.validate: - parser.error(_("--completeness-only is only allowed as an option for --validate!")) + parser.error( + _("--completeness-only is only allowed as an option for --validate!") + ) _configure_logging(args) @@ -1593,7 +1590,9 @@ def main(): if args.fast: LOGGER.info(_("%s valid according to Payload-Oxum"), bag_dir) elif args.completeness_only: - LOGGER.info(_("%s is complete and valid according to Payload-Oxum"), bag_dir) + LOGGER.info( + _("%s is complete and valid according to Payload-Oxum"), bag_dir + ) else: LOGGER.info(_("%s is valid"), bag_dir) except BagError as e: diff --git a/bench.py b/bench.py index 37d14f5..06b4796 100755 --- a/bench.py +++ b/bench.py @@ -2,7 +2,7 @@ """ This is a little benchmarking script to exercise bagit.make_bag and -bagit.validate using 1-8 parallel processes. It will download some images +bagit.validate using 1-8 parallel processes. It will download some images from NASA for use in bagging the first time it is run. """ diff --git a/pyproject.toml b/pyproject.toml index 231d419..49db106 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,4 +34,3 @@ known_first_party = "bagit" [tool.coverage.run] branch = true include = "bagit.py" - diff --git a/test.py b/test.py index 0f32754..16652fb 100644 --- a/test.py +++ b/test.py @@ -444,7 +444,7 @@ def test_sha1_tagfile(self): bag = bagit.make_bag(self.tmpdir, checksum=["sha1"], bag_info=info) self.assertTrue(os.path.isfile(j(self.tmpdir, "tagmanifest-sha1.txt"))) self.assertEqual( - "f69110479d0d395f7c321b3860c2bc0c96ae9fe8", + "3f7423acbb8395ff11dfeb16b4172e7ccc2c529e", bag.entries["bag-info.txt"]["sha1"], )