diff --git a/app/main/check_packs/pack_config.py b/app/main/check_packs/pack_config.py
index 0639689b..bdd53bd6 100644
--- a/app/main/check_packs/pack_config.py
+++ b/app/main/check_packs/pack_config.py
@@ -16,6 +16,8 @@
['future_dev'],
['pres_banned_words_check'],
['pres_empty_slide'],
+ ['pres_banned_words_check'],
+ ['verify_git_link'],
]
BASE_REPORT_CRITERION = [
["simple_check"],
diff --git a/app/main/checks/__init__.py b/app/main/checks/__init__.py
index 8e643b62..8ddc897e 100644
--- a/app/main/checks/__init__.py
+++ b/app/main/checks/__init__.py
@@ -15,6 +15,7 @@
PresImageShareCheck.id: PresImageShareCheck,
FurtherDev.id: FurtherDev,
PresBannedWordsCheck.id: PresBannedWordsCheck,
+ PresVerifyGitLinkCheck.id: PresVerifyGitLinkCheck,
PresEmptySlideCheck.id: PresEmptySlideCheck,
},
'report': {
diff --git a/app/main/checks/presentation_checks/__init__.py b/app/main/checks/presentation_checks/__init__.py
index d9df81ca..095bf658 100644
--- a/app/main/checks/presentation_checks/__init__.py
+++ b/app/main/checks/presentation_checks/__init__.py
@@ -10,4 +10,5 @@
from .pres_right_words import PresRightWordsCheck
from .image_share import PresImageShareCheck
from .banned_words import PresBannedWordsCheck
+from .verify_git_link import PresVerifyGitLinkCheck
from .empty_slide_check import PresEmptySlideCheck
diff --git a/app/main/checks/presentation_checks/find_def_sld.py b/app/main/checks/presentation_checks/find_def_sld.py
index 46c64b0f..baa6f031 100644
--- a/app/main/checks/presentation_checks/find_def_sld.py
+++ b/app/main/checks/presentation_checks/find_def_sld.py
@@ -8,18 +8,21 @@ class FindDefSld(BasePresCriterion):
def __init__(self, file_info, key_slide):
super().__init__(file_info)
self.type_of_slide = key_slide
+ self.found_idxs = []
def check(self):
- found_slides, found_idxs = [], []
+ found_slides = []
for i, title in enumerate(self.file.get_titles(), 1):
if str(title).lower().find(str(self.type_of_slide).lower()) != -1:
found_slides.append(self.file.get_text_from_slides()[i - 1])
- found_idxs.append(i)
+ self.found_idxs.append(i)
if len(found_slides) == 0:
+ self.file.found_index[str(self.type_of_slide)] = None
return answer(False, 'Слайд не найден')
else:
- found_idxs = self.format_page_link(found_idxs)
- return answer(True, 'Найден под номером: {}'.format(', '.join(map(str, found_idxs))))
+ self.file.found_index[str(self.type_of_slide)] = ''.join(str(item) for item in self.found_idxs)
+ found_idxs_link = self.format_page_link(self.found_idxs)
+ return answer(True, 'Найден под номером: {}'.format(', '.join(map(str, found_idxs_link))))
@property
def name(self):
diff --git a/app/main/checks/presentation_checks/verify_git_link.py b/app/main/checks/presentation_checks/verify_git_link.py
new file mode 100644
index 00000000..64135954
--- /dev/null
+++ b/app/main/checks/presentation_checks/verify_git_link.py
@@ -0,0 +1,89 @@
+
+import re
+import requests
+from lxml import html
+from urllib.parse import quote
+
+from .find_def_sld import FindDefSld
+from ..base_check import BasePresCriterion, answer
+
+# for check if gitlab-repository is closed:
+# GITLAB_URL = 'https://gitlab.com/api/v4'
+# PRIVATE_TOKEN = 'glpat-JeZApxShRgB1nsGrMsst'
+
+
+class PresVerifyGitLinkCheck(BasePresCriterion):
+ description = "Проверка действительности ссылки на github"
+ id = 'verify_git_link'
+
+ def __init__(self, file_info, deep_check=True):
+ super().__init__(file_info)
+ self.deep_check = deep_check
+ self.wrong_repo_ref = []
+ self.empty_repo_ref = []
+
+ # self.check_aprb = FindDefSld(file_info=file_info, key_slide="Апробация")
+ self.pattern_for_repo = r'((((((http(s)?://)?(github|gitlab|bitbucket)+)+(.com|.org)+)+/[a-zA-Z0-9_-]+)+/[a-zA-Z0-9_-]+)+/*)+'
+ self.pattern_for_repo_incorrect = r'\(github\.com\)|\(gitlab\.com\)|\(bitbucket\.org\)|\(github\)|\(gitlab\)|\(bitbucket\)'
+ self.pattern_repo_mention = r'репозиторий|репозитория|репозиторию|репозиториев|репозиториям|'
+
+ def check(self):
+ string_result = 'Не пройдена!'
+ text_from_slide = [slide for page, slide in enumerate(self.file.get_text_from_slides(), 1)]
+ string_from_text = ' '.join(text_from_slide)
+ found_repo = re.findall(self.pattern_for_repo, string_from_text)
+
+ if not found_repo:
+ return answer(True, 'Нечего проверять!')
+
+ else:
+ if self.file.found_index['Апробация'] is not None:
+ page_aprb = self.file.found_index['Апробация']
+ text_from_slide_aprb = [
+ slide.replace(" ", '') for page, slide in enumerate(self.file.get_text_from_slides(), 1)
+ if str(page) == page_aprb]
+
+ string_from_text_aprb = ' '.join(text_from_slide_aprb)
+ found_repo_aprb = re.findall(self.pattern_for_repo, string_from_text_aprb)
+ found_repo_aprb_incorrect = re.findall(self.pattern_for_repo_incorrect, string_from_text_aprb)
+ if found_repo_aprb_incorrect:
+ string_result += f"
В слайде 'Апробация' вместо выражений {', '.join([repr(repo) for repo in found_repo_aprb_incorrect])}" \
+ f" следует привести ссылки вида 'https//github.com/...'"
+ if not found_repo_aprb and not found_repo_aprb_incorrect and re.findall(self.pattern_repo_mention, string_from_text_aprb):
+ string_result += f'
В слайде "Апробация" есть упоминания репозиториев,' \
+ f'однако ссылки на них либо некорректны, либо отсутствуют.'
+
+ for i in found_repo:
+ try:
+ link = requests.get(i[0])
+ if link.status_code != 200:
+ raise requests.exceptions.ConnectionError
+ else:
+ if self.deep_check:
+ self.deep_check_repo(i, link)
+ except (requests.exceptions.SSLError, requests.exceptions.ConnectionError):
+ self.wrong_repo_ref.append(i[0])
+ if self.wrong_repo_ref:
+ string_result += f"
Найдены несуществующие или закрытые репозитории: {', '.join([repr(repo) for repo in self.wrong_repo_ref])}"
+ check_result = False
+ if self.empty_repo_ref:
+ string_result += f"
Найдены пустые репозитории: {', '.join([repr(repo) for repo in self.empty_repo_ref])}"
+ check_result = False
+ else:
+ string_result = 'Пройдена!'
+ check_result = True
+ return answer(check_result, string_result)
+
+ def deep_check_repo(self, repo, link):
+ if re.findall(r'github', repo[0]):
+ tree = html.fromstring(link.content)
+ if not tree.xpath("//a[@class ='js-navigation-open Link--primary']"):
+ self.empty_repo_ref.append(repo[0])
+
+ # if re.findall(r'gitlab', i[0]):
+ # project_id = quote(i[0].replace('https://gitlab.com/', ''), safe='')
+ # url = f'{GITLAB_URL}/projects/{project_id}?private_token={PRIVATE_TOKEN}'
+ # response = requests.get(url)
+ # project_info = response.json()
+ # if project_info['visibility'] == 'private':
+ # wrong_repo_ref.append(i[0])
diff --git a/app/main/presentations/pptx/presentation_pptx.py b/app/main/presentations/pptx/presentation_pptx.py
index 869846e0..dd909f8c 100644
--- a/app/main/presentations/pptx/presentation_pptx.py
+++ b/app/main/presentations/pptx/presentation_pptx.py
@@ -9,6 +9,7 @@ def __init__(self, presentation_name):
PresentationBasic.__init__(self, presentation_name)
self.prs = Presentation(presentation_name)
self.add_slides()
+ self.found_index = {}
def add_slides(self):
for index, slide in enumerate(self.prs.slides, 1):
diff --git a/app/server.py b/app/server.py
index 1bfe9127..7fc4bd58 100644
--- a/app/server.py
+++ b/app/server.py
@@ -275,6 +275,7 @@ def get_status(task_id):
'pres_image_share': 'Проверка доли объема презентации, приходящейся на изображения',
'pres_banned_words_check': 'Проверка наличия запретных слов в презентации',
'conclusion_actual': 'Соответствие заключения задачам',
+ 'verify_git_link': 'Проверка действительности ссылки на github',
'conclusion_along': 'Наличие направлений дальнейшего развития',
'simple_check': 'Простейшая проверка отчёта',
'banned_words_in_literature': 'Наличие запрещенных слов в списке литературы',
diff --git a/requirements.txt b/requirements.txt
index 0d87cf6c..47560892 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,7 @@
werkzeug==2.0.0
Flask==2.0.3
jinja2==3.0.0
-requests==2.24.0
+requests~=2.31.0
python-pptx==0.6.18
odfpy==1.4.1
pymongo==3.11.1
@@ -25,6 +25,8 @@ docx2python~=2.0.4
oauthlib~=3.1.0
pdfplumber==0.6.1
pytest~=7.1.2
+lxml~=4.9.2
+urllib3~=2.0.3
filetype==1.2.0
language-tool-python==2.7.1
markdown==3.4.4