From 47cdb781963814ed9fade82ee07b214161f5ad38 Mon Sep 17 00:00:00 2001 From: Lukasz Jachym Date: Wed, 9 Oct 2024 14:32:48 +0200 Subject: [PATCH 1/7] chore: switch to python version 3.11 --- .python-version | 2 +- .travis.yml | 3 ++- setup.py | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.python-version b/.python-version index 424e179..2c07333 100644 --- a/.python-version +++ b/.python-version @@ -1 +1 @@ -3.6.8 +3.11 diff --git a/.travis.yml b/.travis.yml index 1ef825f..05a231f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,7 @@ language: python +dist: jammy python: - - "3.6" + - "3.11" # command to install dependencies install: - make dev diff --git a/setup.py b/setup.py index 72fa18f..d7868d8 100644 --- a/setup.py +++ b/setup.py @@ -32,6 +32,7 @@ def read(f): setup( name='content-validator', version=version, + python_requires='>=3.11', description=('Content validator looks at text content and preforms different validation tasks'), classifiers=[ 'License :: OSI Approved :: BSD License', 'Intended Audience :: Developers', 'Programming Language :: Python' From 6f95ff570c09ed5673fd29c803eeacbe84d084be Mon Sep 17 00:00:00 2001 From: Lukasz Jachym Date: Wed, 9 Oct 2024 14:50:15 +0200 Subject: [PATCH 2/7] chore: switch test runner to pytest --- Makefile | 14 +++++++++----- setup.py | 6 +++--- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index b16471c..4e7b15f 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,9 @@ PYTHON=venv/bin/python3 PIP=venv/bin/pip -NOSE=venv/bin/nosetests +COVERAGE=venv/bin/coverage +TEST_RUNNER=venv/bin/pytest +TEST_RUNNER_FLAGS=-s --durations=3 --durations-min=0.005 FLAKE=venv/bin/flake8 PYPICLOUD_HOST=pypicloud.getkeepsafe.local PIP_ARGS=--extra-index=http://$(PYPICLOUD_HOST)/simple/ --trusted-host $(PYPICLOUD_HOST) @@ -30,14 +32,16 @@ flake: $(FLAKE) validator tests test: flake - $(NOSE) -s $(FLAGS) + $(COVERAGE) run -m pytest $(TEST_RUNNER_FLAGS) vtest: - $(NOSE) -s -v $(FLAGS) + $(COVERAGE) run -m pytest -v $(TEST_RUNNER_FLAGS) + +testloop: + while sleep 1; do $(TEST_RUNNER) -s --lf $(TEST_RUNNER_FLAGS); done cov cover coverage: - $(NOSE) -s --with-cover --cover-html --cover-html-dir ./coverage $(FLAGS) - echo "open file://`pwd`/coverage/index.html" + $(COVERAGE) report -m clean: rm -rf `find . -name __pycache__` diff --git a/setup.py b/setup.py index d7868d8..65a6ca6 100644 --- a/setup.py +++ b/setup.py @@ -19,9 +19,9 @@ def read(f): ] tests_require = [ - 'nose', - 'flake8==3.6.0', - 'coverage', + 'pytest >= 8', + 'coverage==7.6.1', + 'flake8==7.1.1', ] devtools_require = [ From 068ea652f69cc70811c6802b209e93b5530f1489 Mon Sep 17 00:00:00 2001 From: Lukasz Jachym Date: Wed, 9 Oct 2024 18:43:52 +0200 Subject: [PATCH 3/7] refactors: using pyupgrade --- tests/utils.py | 1 - validator/__init__.py | 10 +++++----- validator/checks/__init__.py | 4 ++-- validator/checks/java.py | 2 +- validator/checks/md.py | 4 ++-- validator/checks/url.py | 24 ++++++++++++------------ validator/errors.py | 6 +++--- validator/fs.py | 4 ++-- validator/parsers.py | 12 ++++++------ validator/reports.py | 26 +++++++++++++------------- 10 files changed, 46 insertions(+), 47 deletions(-) diff --git a/tests/utils.py b/tests/utils.py index 3948220..e3aa96c 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,4 +1,3 @@ - def read(path): with open(path) as fp: return fp.read() diff --git a/validator/__init__.py b/validator/__init__.py index c7301e4..42d7083 100644 --- a/validator/__init__.py +++ b/validator/__init__.py @@ -3,7 +3,7 @@ from . import parsers, checks, reports, fs -class Validator(object): +class Validator: def __init__(self, contents, parser, reader, check, reporter=None): self.contents = contents self.parser = parser @@ -24,7 +24,7 @@ async def async_validate(self): return errors -class ReportBuilder(object): +class ReportBuilder: def __init__(self, contents, parser, reader, check): self.contents = contents self.parser = parser @@ -49,7 +49,7 @@ def validate(self): return Validator(self.contents, self.parser, self.reader, self.check, reporter).validate() -class CheckBuilder(object): +class CheckBuilder: def __init__(self, contents, content_type, parser, reader): self.contents = contents self.content_type = content_type @@ -89,7 +89,7 @@ async def async_validate(self): return res -class ParserBuilder(object): +class ParserBuilder: def __init__(self, contents, reader=None): self.contents = contents self.content_type = 'txt' @@ -120,7 +120,7 @@ def check(self): return CheckBuilder(self.contents, self.content_type, parser, self.reader) -class ContentBuilder(object): +class ContentBuilder: def files(self, pattern, **kwargs): contents = fs.files(pattern, **kwargs) return ParserBuilder(contents, parsers.FileReader()) diff --git a/validator/checks/__init__.py b/validator/checks/__init__.py index 9dc692f..85bd1e8 100644 --- a/validator/checks/__init__.py +++ b/validator/checks/__init__.py @@ -21,7 +21,7 @@ def url_occurences(filetype): return UrlOccurenciesValidator() -def markdown(filetype, md_parser_cls: Type[MdParser] = MdParser): +def markdown(filetype, md_parser_cls: type[MdParser] = MdParser): if filetype not in ['txt', 'html']: raise UndefinedCheckTypeError('got filetype %s' % filetype) return MarkdownComparator(md_parser_cls) @@ -33,7 +33,7 @@ def java_args(filetype): return JavaComparator() -class ChainCheck(object): +class ChainCheck: def __init__(self, checks): self.checks = checks diff --git a/validator/checks/java.py b/validator/checks/java.py index 319ec2d..70cfd0d 100644 --- a/validator/checks/java.py +++ b/validator/checks/java.py @@ -6,7 +6,7 @@ REF_PATTERN = r'@string/\w+' -class JavaComparator(object): +class JavaComparator: def _get_args(self, content): return re.findall(ARG_PATTERN, content) diff --git a/validator/checks/md.py b/validator/checks/md.py index d8542a8..bb9ff9d 100644 --- a/validator/checks/md.py +++ b/validator/checks/md.py @@ -14,8 +14,8 @@ def save_file(content, filename): fp.write(content) -class MarkdownComparator(object): - def __init__(self, md_parser_cls: Type[MdParser] = MdParser): +class MarkdownComparator: + def __init__(self, md_parser_cls: type[MdParser] = MdParser): self._md_parser_cls = md_parser_cls def check(self, data, parser, reader): diff --git a/validator/checks/url.py b/validator/checks/url.py index dabd816..375899d 100644 --- a/validator/checks/url.py +++ b/validator/checks/url.py @@ -23,7 +23,7 @@ class MissingUrlExtractorError(Exception): # the job of extractors is to find all non-parametrized urls in the given text for later checks via UrlValidator # which examines is particular url leads to working webpage (200 status) # since we are interested in all urls (including parametrized) we need to sligthly change their API and behaviour -class TextUrlExtractor(object): +class TextUrlExtractor: def __init__(self, **kwargs): pass @@ -60,12 +60,12 @@ def _validate_email(self, email): return False def _extract_from_anchors(self, soup): - return set([a.get('href') or a.text for a in soup.find_all('a')]) + return {a.get('href') or a.text for a in soup.find_all('a')} def _extract_from_img(self, soup): if self.skip_images: return set() - return set([img.get('src') for img in soup.find_all('img')]) + return {img.get('src') for img in soup.find_all('img')} def _fix_url(self, url): result = '' @@ -82,7 +82,7 @@ def _fix_url(self, url): if re.match(self.url_pattern, full_url): result = full_url else: - logging.error('{} not tested'.format(url_parsed.geturl())) + logging.error(f'{url_parsed.geturl()} not tested') return result def extract_urls(self, content, keep_placeholders=False): @@ -96,20 +96,20 @@ def extract_urls(self, content, keep_placeholders=False): return result -class UrlStatusChecker(object): +class UrlStatusChecker: retry_max_count = 3 - def __init__(self, headers=None, exclude_urls_regexs: Optional[List[str]] = None): + def __init__(self, headers=None, exclude_urls_regexs: list[str] | None = None): self._exclude_urls_regex = exclude_urls_regexs or [] if self._exclude_urls_regex: - logging.warning('Excluded urls regexps: {}'.format(self._exclude_urls_regex)) + logging.warning(f'Excluded urls regexps: {self._exclude_urls_regex}') self._headers = headers or {} if 'User-Agent' not in self._headers: self._headers['User-Agent'] = DEFAULT_USER_AGENT async def _make_request(self, url): try: - logging.info('checking {}'.format(url)) + logging.info(f'checking {url}') async with aiohttp.request('get', url, headers=self._headers, allow_redirects=True) as res: return res.status except Exception: @@ -143,7 +143,7 @@ async def _check_urls_coro(self, urls, future): if not is_exluded: urls_without_excluded.append(url) else: - logging.warning('url {} excluded from status check'.format(url.url)) + logging.warning(f'url {url.url} excluded from status check') tasks = [self._request_status_code(url.url) for url in urls_without_excluded] results = await asyncio.gather(*tasks) for index, url in enumerate(urls_without_excluded): @@ -167,10 +167,10 @@ async def async_check(self, urls): return future.result() -class UrlValidator(object): +class UrlValidator: _extractors = {'txt': TextUrlExtractor, 'html': HtmlUrlExtractor} - def __init__(self, filetype, headers=None, exclude_status_check_regexs: Optional[List[str]] = None, **kwargs): + def __init__(self, filetype, headers=None, exclude_status_check_regexs: list[str] | None = None, **kwargs): self.client_headers = headers or {} self._excluded_status_check_regexs = exclude_status_check_regexs or [] extractor_class = self._extractors.get(filetype) @@ -179,7 +179,7 @@ def __init__(self, filetype, headers=None, exclude_status_check_regexs: Optional self.extractor = extractor_class(**kwargs) def _get_urls(self, data, parser, reader): - flat_data = set(p for sublist in data for p in sublist) + flat_data = {p for sublist in data for p in sublist} # TODO yield instead urls = {} for element in flat_data: diff --git a/validator/errors.py b/validator/errors.py index 082885a..9972f88 100644 --- a/validator/errors.py +++ b/validator/errors.py @@ -1,7 +1,7 @@ from collections import namedtuple -class UrlDiff(object): +class UrlDiff: def __init__(self, url, files=None, status_code=200, has_disallowed_chars=False): self.url = url @@ -10,7 +10,7 @@ def __init__(self, url, files=None, status_code=200, has_disallowed_chars=False) self.has_disallowed_chars = has_disallowed_chars def __str__(self): - return 'Url(%s, %s, %s, %s)' % (self.url, self.files, self.status_code, self.has_disallowed_chars) + return 'Url({}, {}, {}, {})'.format(self.url, self.files, self.status_code, self.has_disallowed_chars) def __repr__(self): return 'Url: %s' % self.url @@ -37,7 +37,7 @@ def is_valid(self): ContentData.__new__.__defaults__ = ('', ) * 2 -class MdDiff(object): +class MdDiff: def __init__(self, base, other, error_msgs): self.base = base diff --git a/validator/fs.py b/validator/fs.py index b87808d..6e12b16 100644 --- a/validator/fs.py +++ b/validator/fs.py @@ -91,10 +91,10 @@ def files(pattern, **kwargs): [[Path(path/to1/file1.txt), Path(path/to1/file2.txt)], [Path(path/to2/file1.txt), Path(path/to2/file2.txt)]] """ # extract named parameters from the pattern - params = set([p for p in map(lambda e: e[1], Formatter().parse(pattern)) if p]) + params = {p for p in map(lambda e: e[1], Formatter().parse(pattern)) if p} if params: if len(params - kwargs.keys()) > 0: - raise ValueError('missing parameters {} for pattern {}'.format(params - kwargs.keys(), pattern)) + raise ValueError(f'missing parameters {params - kwargs.keys()} for pattern {pattern}') return _params_pattern(pattern, params, **kwargs) else: return _no_params_pattern(pattern) diff --git a/validator/parsers.py b/validator/parsers.py index 379902b..3c87b89 100644 --- a/validator/parsers.py +++ b/validator/parsers.py @@ -9,22 +9,22 @@ def __init__(self, msg): super().__init__(msg) -class FileReader(object): +class FileReader: def read(self, path): return read_content(path) -class TxtReader(object): +class TxtReader: def read(self, content): return content -class MarkdownParser(object): +class MarkdownParser: def parse(self, content): return markdown.markdown(content) -class XmlParser(object): +class XmlParser: def __init__(self, query='*'): self.query = query @@ -38,12 +38,12 @@ def parse(self, content): return '\n\n'.join(texts) -class CsvParser(object): +class CsvParser: def parse(self, content): return '\n'.join(content.split(',')) -class ChainParser(object): +class ChainParser: def __init__(self, parsers): self.parsers = parsers diff --git a/validator/reports.py b/validator/reports.py index c052e62..a164f5b 100644 --- a/validator/reports.py +++ b/validator/reports.py @@ -6,7 +6,7 @@ from .errors import UrlDiff, MdDiff, UrlOccurencyDiff -class HtmlReporter(object): +class HtmlReporter: report_template = """ @@ -82,12 +82,12 @@ def __init__(self, output_directory='errors'): self.output_directory = output_directory def _add_content(self, soup, tag_id, content): - tags = soup.select('#{}'.format(tag_id)) + tags = soup.select(f'#{tag_id}') if tags and content: tags[0].append(content) else: - print('missing tag: %s, content %s' % (tag_id, content)) + print('missing tag: {}, content {}'.format(tag_id, content)) return soup # TODO just rewrite !!! @@ -99,7 +99,7 @@ def report(self, errors): # TODO use mustache for templates report_soup = BeautifulSoup(self.report_template, 'lxml') if isinstance(error, UrlDiff): - messages = ['{} returned with code {}'.format(error.url, error.status_code)] + messages = [f'{error.url} returned with code {error.status_code}'] self._add_content(report_soup, 'urls', '\n'.join(messages)) if isinstance(error, MdDiff): error_msgs = '
'.join(map(lambda i: str(i), error.error_msgs)) @@ -113,20 +113,20 @@ def report(self, errors): save_report(self.output_directory, error.other.original, report_soup.prettify()) -class ConsoleReporter(object): +class ConsoleReporter: def report(self, errors): for error in errors: if isinstance(error, UrlDiff): - print('{} returned with code {}'.format(error.url, error.status_code)) + print(f'{error.url} returned with code {error.status_code}') for path in error.files: - print('\t{}'.format(str(path))) + print(f'\t{str(path)}') print() if isinstance(error, MdDiff): - print('Files are different:\n\t{}\n\t{}\n\n'.format(str(error.base), str(error.other))) + print(f'Files are different:\n\t{str(error.base)}\n\t{str(error.other)}\n\n') -class StoreReporter(object): +class StoreReporter: def __init__(self): self.log = [] @@ -134,16 +134,16 @@ def __init__(self): def report(self, errors): for error in errors: if isinstance(error, UrlDiff): - self.log.append('%s returned with code %s for files' % (error.url, error.status_code)) + self.log.append('{} returned with code {} for files'.format(error.url, error.status_code)) for path in error.files: self.log.append('\t%s' % str(path)) if isinstance(error, MdDiff): - self.log.append('Files are different:\n\t%s\n\t%s\n\n' % (str(error.base), str(error.other))) + self.log.append('Files are different:\n\t{}\n\t{}\n\n'.format(str(error.base), str(error.other))) if isinstance(error, UrlOccurencyDiff): - self.log.append('Count of URLS in %s and %s are different' % (error.base_path, error.translation_path)) + self.log.append('Count of URLS in {} and {} are different'.format(error.base_path, error.translation_path)) -class ChainReporter(object): +class ChainReporter: def __init__(self, reporters): self.reporters = reporters From c275e325b08ad8c71a0332e3092351072e30ac49 Mon Sep 17 00:00:00 2001 From: Lukasz Jachym Date: Thu, 10 Oct 2024 13:57:16 +0200 Subject: [PATCH 4/7] fixes: dependencies versions to be compatible with aiohttp --- setup.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 65a6ca6..a8647e6 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,6 @@ import os from setuptools import setup, find_packages - version = '0.7.1' @@ -11,11 +10,11 @@ def read(f): install_requires = [ 'sdiff @ git+https://github.com/KeepSafe/html-structure-diff.git@0.4.1#egg=sdiff', - 'aiohttp >=3, <3.4', + 'aiohttp==3.8.5', 'Markdown', 'parse >=1, <2', 'beautifulsoup4 >=4, <5', - 'lxml >=3', + 'lxml<5', ] tests_require = [ From d9eefa9672687b4778c4dd0d98471655afd966ba Mon Sep 17 00:00:00 2001 From: Lukasz Jachym Date: Thu, 10 Oct 2024 13:57:54 +0200 Subject: [PATCH 5/7] fixes: flake errors after running pyupgrade --- setup.py | 2 +- validator/checks/__init__.py | 2 -- validator/checks/md.py | 1 - validator/checks/url.py | 1 - validator/reports.py | 2 +- 5 files changed, 2 insertions(+), 6 deletions(-) diff --git a/setup.py b/setup.py index a8647e6..ddf52ce 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ def read(f): install_requires = [ - 'sdiff @ git+https://github.com/KeepSafe/html-structure-diff.git@0.4.1#egg=sdiff', + 'sdiff @ git+https://github.com/KeepSafe/html-structure-diff.git@1.0.0#egg=sdiff', 'aiohttp==3.8.5', 'Markdown', 'parse >=1, <2', diff --git a/validator/checks/__init__.py b/validator/checks/__init__.py index 85bd1e8..61861f2 100644 --- a/validator/checks/__init__.py +++ b/validator/checks/__init__.py @@ -1,5 +1,3 @@ -from typing import Type - from sdiff import MdParser from .md import MarkdownComparator diff --git a/validator/checks/md.py b/validator/checks/md.py index bb9ff9d..70e6988 100644 --- a/validator/checks/md.py +++ b/validator/checks/md.py @@ -1,5 +1,4 @@ import re -from typing import Type from sdiff import diff, renderer, MdParser from markdown import markdown diff --git a/validator/checks/url.py b/validator/checks/url.py index 375899d..263a532 100644 --- a/validator/checks/url.py +++ b/validator/checks/url.py @@ -5,7 +5,6 @@ import string from bs4 import BeautifulSoup from urllib.parse import urlparse, urljoin -from typing import List, Optional from ..errors import UrlDiff, UrlOccurencyDiff diff --git a/validator/reports.py b/validator/reports.py index a164f5b..8100963 100644 --- a/validator/reports.py +++ b/validator/reports.py @@ -140,7 +140,7 @@ def report(self, errors): if isinstance(error, MdDiff): self.log.append('Files are different:\n\t{}\n\t{}\n\n'.format(str(error.base), str(error.other))) if isinstance(error, UrlOccurencyDiff): - self.log.append('Count of URLS in {} and {} are different'.format(error.base_path, error.translation_path)) + self.log.append(f'Count of URLS in {error.base_path} and {error.translation_path} are different') class ChainReporter: From 2d2a1e03f07a10a8c3926b1a4e67ec69043bb8e6 Mon Sep 17 00:00:00 2001 From: Lukasz Jachym Date: Thu, 10 Oct 2024 14:44:07 +0200 Subject: [PATCH 6/7] chore: bump version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index ddf52ce..8d38f71 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ import os from setuptools import setup, find_packages -version = '0.7.1' +version = '1.0.0' def read(f): From 39ba8647f07dce417280e77cfaa39687122e6030 Mon Sep 17 00:00:00 2001 From: Lukasz Jachym Date: Thu, 10 Oct 2024 20:39:15 +0200 Subject: [PATCH 7/7] adds: min coverage config --- .travis.yml | 1 + setup.cfg | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/.travis.yml b/.travis.yml index 05a231f..da12660 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,3 +8,4 @@ install: # command to run tests script: - make tests + - make coverage diff --git a/setup.cfg b/setup.cfg index 0945a29..15ae1fa 100644 --- a/setup.cfg +++ b/setup.cfg @@ -7,3 +7,9 @@ ignore = F403 [pep8] max-line-length = 120 + +[coverage:run] +branch = True + +[coverage:report] +fail_under = 96