Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

444 verify git link #452

Merged
merged 14 commits into from
Feb 19, 2024
2 changes: 2 additions & 0 deletions app/main/check_packs/pack_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
['future_dev'],
['pres_banned_words_check'],
['pres_empty_slide'],
['pres_banned_words_check'],
['verify_git_link'],
]
BASE_REPORT_CRITERION = [
["simple_check"],
Expand Down
1 change: 1 addition & 0 deletions app/main/checks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
PresImageShareCheck.id: PresImageShareCheck,
FurtherDev.id: FurtherDev,
PresBannedWordsCheck.id: PresBannedWordsCheck,
PresVerifyGitLinkCheck.id: PresVerifyGitLinkCheck,
PresEmptySlideCheck.id: PresEmptySlideCheck,
},
'report': {
Expand Down
1 change: 1 addition & 0 deletions app/main/checks/presentation_checks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@
from .pres_right_words import PresRightWordsCheck
from .image_share import PresImageShareCheck
from .banned_words import PresBannedWordsCheck
from .verify_git_link import PresVerifyGitLinkCheck
from .empty_slide_check import PresEmptySlideCheck
11 changes: 7 additions & 4 deletions app/main/checks/presentation_checks/find_def_sld.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,21 @@ class FindDefSld(BasePresCriterion):
def __init__(self, file_info, key_slide):
super().__init__(file_info)
self.type_of_slide = key_slide
self.found_idxs = []

def check(self):
found_slides, found_idxs = [], []
found_slides = []
for i, title in enumerate(self.file.get_titles(), 1):
if str(title).lower().find(str(self.type_of_slide).lower()) != -1:
found_slides.append(self.file.get_text_from_slides()[i - 1])
found_idxs.append(i)
self.found_idxs.append(i)
if len(found_slides) == 0:
self.file.found_index[str(self.type_of_slide)] = None
return answer(False, 'Слайд не найден')
else:
found_idxs = self.format_page_link(found_idxs)
return answer(True, 'Найден под номером: {}'.format(', '.join(map(str, found_idxs))))
self.file.found_index[str(self.type_of_slide)] = ''.join(str(item) for item in self.found_idxs)
found_idxs_link = self.format_page_link(self.found_idxs)
return answer(True, 'Найден под номером: {}'.format(', '.join(map(str, found_idxs_link))))

@property
def name(self):
Expand Down
89 changes: 89 additions & 0 deletions app/main/checks/presentation_checks/verify_git_link.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@

import re
import requests
from lxml import html
from urllib.parse import quote

from .find_def_sld import FindDefSld
from ..base_check import BasePresCriterion, answer

# for check if gitlab-repository is closed:
# GITLAB_URL = 'https://gitlab.com/api/v4'
# PRIVATE_TOKEN = 'glpat-JeZApxShRgB1nsGrMsst'


class PresVerifyGitLinkCheck(BasePresCriterion):
description = "Проверка действительности ссылки на github"
id = 'verify_git_link'

def __init__(self, file_info, deep_check=True):
super().__init__(file_info)
self.deep_check = deep_check
self.wrong_repo_ref = []
self.empty_repo_ref = []

# self.check_aprb = FindDefSld(file_info=file_info, key_slide="Апробация")
self.pattern_for_repo = r'((((((http(s)?://)?(github|gitlab|bitbucket)+)+(.com|.org)+)+/[a-zA-Z0-9_-]+)+/[a-zA-Z0-9_-]+)+/*)+'
self.pattern_for_repo_incorrect = r'\(github\.com\)|\(gitlab\.com\)|\(bitbucket\.org\)|\(github\)|\(gitlab\)|\(bitbucket\)'
self.pattern_repo_mention = r'репозиторий|репозитория|репозиторию|репозиториев|репозиториям|'

def check(self):
string_result = 'Не пройдена!'
text_from_slide = [slide for page, slide in enumerate(self.file.get_text_from_slides(), 1)]
string_from_text = ' '.join(text_from_slide)
found_repo = re.findall(self.pattern_for_repo, string_from_text)

if not found_repo:
return answer(True, 'Нечего проверять!')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

А почему тут True?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

На данный момент проверка сделана таким образом, что отсутствие ссылок на репозитории не является недочетом. Оценивается только их корректность (если они есть).
Уточните, пожалуйста, является ли наличие ссылок на репозитории необходимым условием для прохождения именно этой проверки?


else:
if self.file.found_index['Апробация'] is not None:
page_aprb = self.file.found_index['Апробация']
text_from_slide_aprb = [
slide.replace(" ", '') for page, slide in enumerate(self.file.get_text_from_slides(), 1)
if str(page) == page_aprb]

string_from_text_aprb = ' '.join(text_from_slide_aprb)
found_repo_aprb = re.findall(self.pattern_for_repo, string_from_text_aprb)
found_repo_aprb_incorrect = re.findall(self.pattern_for_repo_incorrect, string_from_text_aprb)
if found_repo_aprb_incorrect:
string_result += f" <br> В слайде 'Апробация' вместо выражений {', '.join([repr(repo) for repo in found_repo_aprb_incorrect])}" \
f" следует привести ссылки вида 'https//github.com/...'"
if not found_repo_aprb and not found_repo_aprb_incorrect and re.findall(self.pattern_repo_mention, string_from_text_aprb):
string_result += f' <br> В слайде "Апробация" есть упоминания репозиториев,' \
f'однако ссылки на них либо некорректны, либо отсутствуют.'

for i in found_repo:
try:
link = requests.get(i[0])
if link.status_code != 200:
raise requests.exceptions.ConnectionError
else:
if self.deep_check:
self.deep_check_repo(i, link)
except (requests.exceptions.SSLError, requests.exceptions.ConnectionError):
self.wrong_repo_ref.append(i[0])
if self.wrong_repo_ref:
string_result += f" <br> Найдены несуществующие или закрытые репозитории: {', '.join([repr(repo) for repo in self.wrong_repo_ref])}"
check_result = False
if self.empty_repo_ref:
string_result += f" <br> Найдены пустые репозитории: {', '.join([repr(repo) for repo in self.empty_repo_ref])}"
check_result = False
else:
string_result = 'Пройдена!'
check_result = True
return answer(check_result, string_result)

def deep_check_repo(self, repo, link):
if re.findall(r'github', repo[0]):
tree = html.fromstring(link.content)
if not tree.xpath("//a[@class ='js-navigation-open Link--primary']"):
self.empty_repo_ref.append(repo[0])

# if re.findall(r'gitlab', i[0]):
# project_id = quote(i[0].replace('https://gitlab.com/', ''), safe='')
# url = f'{GITLAB_URL}/projects/{project_id}?private_token={PRIVATE_TOKEN}'
# response = requests.get(url)
# project_info = response.json()
# if project_info['visibility'] == 'private':
# wrong_repo_ref.append(i[0])
1 change: 1 addition & 0 deletions app/main/presentations/pptx/presentation_pptx.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ def __init__(self, presentation_name):
PresentationBasic.__init__(self, presentation_name)
self.prs = Presentation(presentation_name)
self.add_slides()
self.found_index = {}

def add_slides(self):
for index, slide in enumerate(self.prs.slides, 1):
Expand Down
1 change: 1 addition & 0 deletions app/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@ def get_status(task_id):
'pres_image_share': 'Проверка доли объема презентации, приходящейся на изображения',
'pres_banned_words_check': 'Проверка наличия запретных слов в презентации',
'conclusion_actual': 'Соответствие заключения задачам',
'verify_git_link': 'Проверка действительности ссылки на github',
'conclusion_along': 'Наличие направлений дальнейшего развития',
'simple_check': 'Простейшая проверка отчёта',
'banned_words_in_literature': 'Наличие запрещенных слов в списке литературы',
Expand Down
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
werkzeug==2.0.0
Flask==2.0.3
jinja2==3.0.0
requests==2.24.0
requests~=2.31.0
python-pptx==0.6.18
odfpy==1.4.1
pymongo==3.11.1
Expand All @@ -25,6 +25,8 @@ docx2python~=2.0.4
oauthlib~=3.1.0
pdfplumber==0.6.1
pytest~=7.1.2
lxml~=4.9.2
urllib3~=2.0.3
filetype==1.2.0
language-tool-python==2.7.1
markdown==3.4.4
Loading