Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: switch to python version 3.11 #34

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .python-version
Original file line number Diff line number Diff line change
@@ -1 +1 @@
3.6.8
3.11
4 changes: 3 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
language: python
dist: jammy
python:
- "3.6"
- "3.11"
# command to install dependencies
install:
- make dev
# command to run tests
script:
- make tests
- make coverage
14 changes: 9 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@

PYTHON=venv/bin/python3
PIP=venv/bin/pip
NOSE=venv/bin/nosetests
COVERAGE=venv/bin/coverage
TEST_RUNNER=venv/bin/pytest
TEST_RUNNER_FLAGS=-s --durations=3 --durations-min=0.005
FLAKE=venv/bin/flake8
PYPICLOUD_HOST=pypicloud.getkeepsafe.local
PIP_ARGS=--extra-index=http://$(PYPICLOUD_HOST)/simple/ --trusted-host $(PYPICLOUD_HOST)
Expand Down Expand Up @@ -30,14 +32,16 @@ flake:
$(FLAKE) validator tests

test: flake
$(NOSE) -s $(FLAGS)
$(COVERAGE) run -m pytest $(TEST_RUNNER_FLAGS)

vtest:
$(NOSE) -s -v $(FLAGS)
$(COVERAGE) run -m pytest -v $(TEST_RUNNER_FLAGS)

testloop:
while sleep 1; do $(TEST_RUNNER) -s --lf $(TEST_RUNNER_FLAGS); done

cov cover coverage:
$(NOSE) -s --with-cover --cover-html --cover-html-dir ./coverage $(FLAGS)
echo "open file://`pwd`/coverage/index.html"
$(COVERAGE) report -m

clean:
rm -rf `find . -name __pycache__`
Expand Down
6 changes: 6 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,9 @@ ignore = F403

[pep8]
max-line-length = 120

[coverage:run]
branch = True

[coverage:report]
fail_under = 96
16 changes: 8 additions & 8 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,26 @@
import os
from setuptools import setup, find_packages


version = '0.7.2'
version = '1.0.0'


def read(f):
return open(os.path.join(os.path.dirname(__file__), f)).read().strip()


install_requires = [
'sdiff @ git+https://github.com/KeepSafe/html-structure-diff.git@0.4.1#egg=sdiff',
'aiohttp >=3, <3.4',
'sdiff @ git+https://github.com/KeepSafe/html-structure-diff.git@1.0.0#egg=sdiff',
'aiohttp==3.8.5',
'Markdown',
'parse <= 1.8.2',
'beautifulsoup4 >=4, <5',
'lxml >=3',
'lxml<5',
]

tests_require = [
'nose',
'flake8==3.6.0',
'coverage',
'pytest >= 8',
'coverage==7.6.1',
'flake8==7.1.1',
]

devtools_require = [
Expand All @@ -32,6 +31,7 @@ def read(f):
setup(
name='content-validator',
version=version,
python_requires='>=3.11',
description=('Content validator looks at text content and preforms different validation tasks'),
classifiers=[
'License :: OSI Approved :: BSD License', 'Intended Audience :: Developers', 'Programming Language :: Python'
Expand Down
1 change: 0 additions & 1 deletion tests/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

def read(path):
with open(path) as fp:
return fp.read()
10 changes: 5 additions & 5 deletions validator/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from . import parsers, checks, reports, fs


class Validator(object):
class Validator:
def __init__(self, contents, parser, reader, check, reporter=None):
self.contents = contents
self.parser = parser
Expand All @@ -24,7 +24,7 @@ async def async_validate(self):
return errors


class ReportBuilder(object):
class ReportBuilder:
def __init__(self, contents, parser, reader, check):
self.contents = contents
self.parser = parser
Expand All @@ -49,7 +49,7 @@ def validate(self):
return Validator(self.contents, self.parser, self.reader, self.check, reporter).validate()


class CheckBuilder(object):
class CheckBuilder:
def __init__(self, contents, content_type, parser, reader):
self.contents = contents
self.content_type = content_type
Expand Down Expand Up @@ -89,7 +89,7 @@ async def async_validate(self):
return res


class ParserBuilder(object):
class ParserBuilder:
def __init__(self, contents, reader=None):
self.contents = contents
self.content_type = 'txt'
Expand Down Expand Up @@ -120,7 +120,7 @@ def check(self):
return CheckBuilder(self.contents, self.content_type, parser, self.reader)


class ContentBuilder(object):
class ContentBuilder:
def files(self, pattern, **kwargs):
contents = fs.files(pattern, **kwargs)
return ParserBuilder(contents, parsers.FileReader())
Expand Down
6 changes: 2 additions & 4 deletions validator/checks/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from typing import Type

from sdiff import MdParser

from .md import MarkdownComparator
Expand All @@ -21,7 +19,7 @@ def url_occurences(filetype):
return UrlOccurenciesValidator()


def markdown(filetype, md_parser_cls: Type[MdParser] = MdParser):
def markdown(filetype, md_parser_cls: type[MdParser] = MdParser):
if filetype not in ['txt', 'html']:
raise UndefinedCheckTypeError('got filetype %s' % filetype)
return MarkdownComparator(md_parser_cls)
Expand All @@ -33,7 +31,7 @@ def java_args(filetype):
return JavaComparator()


class ChainCheck(object):
class ChainCheck:
def __init__(self, checks):
self.checks = checks

Expand Down
2 changes: 1 addition & 1 deletion validator/checks/java.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
REF_PATTERN = r'@string/\w+'


class JavaComparator(object):
class JavaComparator:
def _get_args(self, content):
return re.findall(ARG_PATTERN, content)

Expand Down
5 changes: 2 additions & 3 deletions validator/checks/md.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import re
from typing import Type

from sdiff import diff, renderer, MdParser
from markdown import markdown
Expand All @@ -14,8 +13,8 @@ def save_file(content, filename):
fp.write(content)


class MarkdownComparator(object):
def __init__(self, md_parser_cls: Type[MdParser] = MdParser):
class MarkdownComparator:
def __init__(self, md_parser_cls: type[MdParser] = MdParser):
self._md_parser_cls = md_parser_cls

def check(self, data, parser, reader):
Expand Down
25 changes: 12 additions & 13 deletions validator/checks/url.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import string
from bs4 import BeautifulSoup
from urllib.parse import urlparse, urljoin
from typing import List, Optional

from ..errors import UrlDiff, UrlOccurencyDiff

Expand All @@ -23,7 +22,7 @@ class MissingUrlExtractorError(Exception):
# the job of extractors is to find all non-parametrized urls in the given text for later checks via UrlValidator
# which examines is particular url leads to working webpage (200 status)
# since we are interested in all urls (including parametrized) we need to sligthly change their API and behaviour
class TextUrlExtractor(object):
class TextUrlExtractor:
def __init__(self, **kwargs):
pass

Expand Down Expand Up @@ -60,12 +59,12 @@ def _validate_email(self, email):
return False

def _extract_from_anchors(self, soup):
return set([a.get('href') or a.text for a in soup.find_all('a')])
return {a.get('href') or a.text for a in soup.find_all('a')}

def _extract_from_img(self, soup):
if self.skip_images:
return set()
return set([img.get('src') for img in soup.find_all('img')])
return {img.get('src') for img in soup.find_all('img')}

def _fix_url(self, url):
result = ''
Expand All @@ -82,7 +81,7 @@ def _fix_url(self, url):
if re.match(self.url_pattern, full_url):
result = full_url
else:
logging.error('{} not tested'.format(url_parsed.geturl()))
logging.error(f'{url_parsed.geturl()} not tested')
return result

def extract_urls(self, content, keep_placeholders=False):
Expand All @@ -96,20 +95,20 @@ def extract_urls(self, content, keep_placeholders=False):
return result


class UrlStatusChecker(object):
class UrlStatusChecker:
retry_max_count = 3

def __init__(self, headers=None, exclude_urls_regexs: Optional[List[str]] = None):
def __init__(self, headers=None, exclude_urls_regexs: list[str] | None = None):
self._exclude_urls_regex = exclude_urls_regexs or []
if self._exclude_urls_regex:
logging.warning('Excluded urls regexps: {}'.format(self._exclude_urls_regex))
logging.warning(f'Excluded urls regexps: {self._exclude_urls_regex}')
self._headers = headers or {}
if 'User-Agent' not in self._headers:
self._headers['User-Agent'] = DEFAULT_USER_AGENT

async def _make_request(self, url):
try:
logging.info('checking {}'.format(url))
logging.info(f'checking {url}')
async with aiohttp.request('get', url, headers=self._headers, allow_redirects=True) as res:
return res.status
except Exception:
Expand Down Expand Up @@ -143,7 +142,7 @@ async def _check_urls_coro(self, urls, future):
if not is_exluded:
urls_without_excluded.append(url)
else:
logging.warning('url {} excluded from status check'.format(url.url))
logging.warning(f'url {url.url} excluded from status check')
tasks = [self._request_status_code(url.url) for url in urls_without_excluded]
results = await asyncio.gather(*tasks)
for index, url in enumerate(urls_without_excluded):
Expand All @@ -167,10 +166,10 @@ async def async_check(self, urls):
return future.result()


class UrlValidator(object):
class UrlValidator:
_extractors = {'txt': TextUrlExtractor, 'html': HtmlUrlExtractor}

def __init__(self, filetype, headers=None, exclude_status_check_regexs: Optional[List[str]] = None, **kwargs):
def __init__(self, filetype, headers=None, exclude_status_check_regexs: list[str] | None = None, **kwargs):
self.client_headers = headers or {}
self._excluded_status_check_regexs = exclude_status_check_regexs or []
extractor_class = self._extractors.get(filetype)
Expand All @@ -179,7 +178,7 @@ def __init__(self, filetype, headers=None, exclude_status_check_regexs: Optional
self.extractor = extractor_class(**kwargs)

def _get_urls(self, data, parser, reader):
flat_data = set(p for sublist in data for p in sublist)
flat_data = {p for sublist in data for p in sublist}
# TODO yield instead
urls = {}
for element in flat_data:
Expand Down
6 changes: 3 additions & 3 deletions validator/errors.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from collections import namedtuple


class UrlDiff(object):
class UrlDiff:

def __init__(self, url, files=None, status_code=200, has_disallowed_chars=False):
self.url = url
Expand All @@ -10,7 +10,7 @@ def __init__(self, url, files=None, status_code=200, has_disallowed_chars=False)
self.has_disallowed_chars = has_disallowed_chars

def __str__(self):
return 'Url(%s, %s, %s, %s)' % (self.url, self.files, self.status_code, self.has_disallowed_chars)
return 'Url({}, {}, {}, {})'.format(self.url, self.files, self.status_code, self.has_disallowed_chars)

def __repr__(self):
return 'Url: %s' % self.url
Expand All @@ -37,7 +37,7 @@ def is_valid(self):
ContentData.__new__.__defaults__ = ('', ) * 2


class MdDiff(object):
class MdDiff:

def __init__(self, base, other, error_msgs):
self.base = base
Expand Down
4 changes: 2 additions & 2 deletions validator/fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,10 @@ def files(pattern, **kwargs):
[[Path(path/to1/file1.txt), Path(path/to1/file2.txt)], [Path(path/to2/file1.txt), Path(path/to2/file2.txt)]]
"""
# extract named parameters from the pattern
params = set([p for p in map(lambda e: e[1], Formatter().parse(pattern)) if p])
params = {p for p in map(lambda e: e[1], Formatter().parse(pattern)) if p}
if params:
if len(params - kwargs.keys()) > 0:
raise ValueError('missing parameters {} for pattern {}'.format(params - kwargs.keys(), pattern))
raise ValueError(f'missing parameters {params - kwargs.keys()} for pattern {pattern}')
return _params_pattern(pattern, params, **kwargs)
else:
return _no_params_pattern(pattern)
Expand Down
12 changes: 6 additions & 6 deletions validator/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,22 @@ def __init__(self, msg):
super().__init__(msg)


class FileReader(object):
class FileReader:
def read(self, path):
return read_content(path)


class TxtReader(object):
class TxtReader:
def read(self, content):
return content


class MarkdownParser(object):
class MarkdownParser:
def parse(self, content):
return markdown.markdown(content)


class XmlParser(object):
class XmlParser:
def __init__(self, query='*'):
self.query = query

Expand All @@ -38,12 +38,12 @@ def parse(self, content):
return '\n\n'.join(texts)


class CsvParser(object):
class CsvParser:
def parse(self, content):
return '\n'.join(content.split(','))


class ChainParser(object):
class ChainParser:
def __init__(self, parsers):
self.parsers = parsers

Expand Down
Loading