diff --git a/app/main/check_packs/pack_config.py b/app/main/check_packs/pack_config.py index 0639689b..bdd53bd6 100644 --- a/app/main/check_packs/pack_config.py +++ b/app/main/check_packs/pack_config.py @@ -16,6 +16,8 @@ ['future_dev'], ['pres_banned_words_check'], ['pres_empty_slide'], + ['pres_banned_words_check'], + ['verify_git_link'], ] BASE_REPORT_CRITERION = [ ["simple_check"], diff --git a/app/main/checks/__init__.py b/app/main/checks/__init__.py index 8e643b62..8ddc897e 100644 --- a/app/main/checks/__init__.py +++ b/app/main/checks/__init__.py @@ -15,6 +15,7 @@ PresImageShareCheck.id: PresImageShareCheck, FurtherDev.id: FurtherDev, PresBannedWordsCheck.id: PresBannedWordsCheck, + PresVerifyGitLinkCheck.id: PresVerifyGitLinkCheck, PresEmptySlideCheck.id: PresEmptySlideCheck, }, 'report': { diff --git a/app/main/checks/presentation_checks/__init__.py b/app/main/checks/presentation_checks/__init__.py index d9df81ca..095bf658 100644 --- a/app/main/checks/presentation_checks/__init__.py +++ b/app/main/checks/presentation_checks/__init__.py @@ -10,4 +10,5 @@ from .pres_right_words import PresRightWordsCheck from .image_share import PresImageShareCheck from .banned_words import PresBannedWordsCheck +from .verify_git_link import PresVerifyGitLinkCheck from .empty_slide_check import PresEmptySlideCheck diff --git a/app/main/checks/presentation_checks/find_def_sld.py b/app/main/checks/presentation_checks/find_def_sld.py index 46c64b0f..baa6f031 100644 --- a/app/main/checks/presentation_checks/find_def_sld.py +++ b/app/main/checks/presentation_checks/find_def_sld.py @@ -8,18 +8,21 @@ class FindDefSld(BasePresCriterion): def __init__(self, file_info, key_slide): super().__init__(file_info) self.type_of_slide = key_slide + self.found_idxs = [] def check(self): - found_slides, found_idxs = [], [] + found_slides = [] for i, title in enumerate(self.file.get_titles(), 1): if str(title).lower().find(str(self.type_of_slide).lower()) != -1: found_slides.append(self.file.get_text_from_slides()[i - 1]) - found_idxs.append(i) + self.found_idxs.append(i) if len(found_slides) == 0: + self.file.found_index[str(self.type_of_slide)] = None return answer(False, 'Слайд не найден') else: - found_idxs = self.format_page_link(found_idxs) - return answer(True, 'Найден под номером: {}'.format(', '.join(map(str, found_idxs)))) + self.file.found_index[str(self.type_of_slide)] = ''.join(str(item) for item in self.found_idxs) + found_idxs_link = self.format_page_link(self.found_idxs) + return answer(True, 'Найден под номером: {}'.format(', '.join(map(str, found_idxs_link)))) @property def name(self): diff --git a/app/main/checks/presentation_checks/verify_git_link.py b/app/main/checks/presentation_checks/verify_git_link.py new file mode 100644 index 00000000..64135954 --- /dev/null +++ b/app/main/checks/presentation_checks/verify_git_link.py @@ -0,0 +1,89 @@ + +import re +import requests +from lxml import html +from urllib.parse import quote + +from .find_def_sld import FindDefSld +from ..base_check import BasePresCriterion, answer + +# for check if gitlab-repository is closed: +# GITLAB_URL = 'https://gitlab.com/api/v4' +# PRIVATE_TOKEN = 'glpat-JeZApxShRgB1nsGrMsst' + + +class PresVerifyGitLinkCheck(BasePresCriterion): + description = "Проверка действительности ссылки на github" + id = 'verify_git_link' + + def __init__(self, file_info, deep_check=True): + super().__init__(file_info) + self.deep_check = deep_check + self.wrong_repo_ref = [] + self.empty_repo_ref = [] + + # self.check_aprb = FindDefSld(file_info=file_info, key_slide="Апробация") + self.pattern_for_repo = r'((((((http(s)?://)?(github|gitlab|bitbucket)+)+(.com|.org)+)+/[a-zA-Z0-9_-]+)+/[a-zA-Z0-9_-]+)+/*)+' + self.pattern_for_repo_incorrect = r'\(github\.com\)|\(gitlab\.com\)|\(bitbucket\.org\)|\(github\)|\(gitlab\)|\(bitbucket\)' + self.pattern_repo_mention = r'репозиторий|репозитория|репозиторию|репозиториев|репозиториям|' + + def check(self): + string_result = 'Не пройдена!' + text_from_slide = [slide for page, slide in enumerate(self.file.get_text_from_slides(), 1)] + string_from_text = ' '.join(text_from_slide) + found_repo = re.findall(self.pattern_for_repo, string_from_text) + + if not found_repo: + return answer(True, 'Нечего проверять!') + + else: + if self.file.found_index['Апробация'] is not None: + page_aprb = self.file.found_index['Апробация'] + text_from_slide_aprb = [ + slide.replace(" ", '') for page, slide in enumerate(self.file.get_text_from_slides(), 1) + if str(page) == page_aprb] + + string_from_text_aprb = ' '.join(text_from_slide_aprb) + found_repo_aprb = re.findall(self.pattern_for_repo, string_from_text_aprb) + found_repo_aprb_incorrect = re.findall(self.pattern_for_repo_incorrect, string_from_text_aprb) + if found_repo_aprb_incorrect: + string_result += f"
В слайде 'Апробация' вместо выражений {', '.join([repr(repo) for repo in found_repo_aprb_incorrect])}" \ + f" следует привести ссылки вида 'https//github.com/...'" + if not found_repo_aprb and not found_repo_aprb_incorrect and re.findall(self.pattern_repo_mention, string_from_text_aprb): + string_result += f'
В слайде "Апробация" есть упоминания репозиториев,' \ + f'однако ссылки на них либо некорректны, либо отсутствуют.' + + for i in found_repo: + try: + link = requests.get(i[0]) + if link.status_code != 200: + raise requests.exceptions.ConnectionError + else: + if self.deep_check: + self.deep_check_repo(i, link) + except (requests.exceptions.SSLError, requests.exceptions.ConnectionError): + self.wrong_repo_ref.append(i[0]) + if self.wrong_repo_ref: + string_result += f"
Найдены несуществующие или закрытые репозитории: {', '.join([repr(repo) for repo in self.wrong_repo_ref])}" + check_result = False + if self.empty_repo_ref: + string_result += f"
Найдены пустые репозитории: {', '.join([repr(repo) for repo in self.empty_repo_ref])}" + check_result = False + else: + string_result = 'Пройдена!' + check_result = True + return answer(check_result, string_result) + + def deep_check_repo(self, repo, link): + if re.findall(r'github', repo[0]): + tree = html.fromstring(link.content) + if not tree.xpath("//a[@class ='js-navigation-open Link--primary']"): + self.empty_repo_ref.append(repo[0]) + + # if re.findall(r'gitlab', i[0]): + # project_id = quote(i[0].replace('https://gitlab.com/', ''), safe='') + # url = f'{GITLAB_URL}/projects/{project_id}?private_token={PRIVATE_TOKEN}' + # response = requests.get(url) + # project_info = response.json() + # if project_info['visibility'] == 'private': + # wrong_repo_ref.append(i[0]) diff --git a/app/main/presentations/pptx/presentation_pptx.py b/app/main/presentations/pptx/presentation_pptx.py index 869846e0..dd909f8c 100644 --- a/app/main/presentations/pptx/presentation_pptx.py +++ b/app/main/presentations/pptx/presentation_pptx.py @@ -9,6 +9,7 @@ def __init__(self, presentation_name): PresentationBasic.__init__(self, presentation_name) self.prs = Presentation(presentation_name) self.add_slides() + self.found_index = {} def add_slides(self): for index, slide in enumerate(self.prs.slides, 1): diff --git a/app/server.py b/app/server.py index 1bfe9127..7fc4bd58 100644 --- a/app/server.py +++ b/app/server.py @@ -275,6 +275,7 @@ def get_status(task_id): 'pres_image_share': 'Проверка доли объема презентации, приходящейся на изображения', 'pres_banned_words_check': 'Проверка наличия запретных слов в презентации', 'conclusion_actual': 'Соответствие заключения задачам', + 'verify_git_link': 'Проверка действительности ссылки на github', 'conclusion_along': 'Наличие направлений дальнейшего развития', 'simple_check': 'Простейшая проверка отчёта', 'banned_words_in_literature': 'Наличие запрещенных слов в списке литературы', diff --git a/requirements.txt b/requirements.txt index 0d87cf6c..47560892 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ werkzeug==2.0.0 Flask==2.0.3 jinja2==3.0.0 -requests==2.24.0 +requests~=2.31.0 python-pptx==0.6.18 odfpy==1.4.1 pymongo==3.11.1 @@ -25,6 +25,8 @@ docx2python~=2.0.4 oauthlib~=3.1.0 pdfplumber==0.6.1 pytest~=7.1.2 +lxml~=4.9.2 +urllib3~=2.0.3 filetype==1.2.0 language-tool-python==2.7.1 markdown==3.4.4