Skip to content

Commit

Permalink
Merge pull request #452 from moevm/444_verify_git_link
Browse files Browse the repository at this point in the history
444 verify git link
  • Loading branch information
HadronCollider authored Feb 19, 2024
2 parents 342728e + fd9dab0 commit f9d3640
Show file tree
Hide file tree
Showing 8 changed files with 105 additions and 5 deletions.
2 changes: 2 additions & 0 deletions app/main/check_packs/pack_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
['future_dev'],
['pres_banned_words_check'],
['pres_empty_slide'],
['pres_banned_words_check'],
['verify_git_link'],
]
BASE_REPORT_CRITERION = [
["simple_check"],
Expand Down
1 change: 1 addition & 0 deletions app/main/checks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
PresImageShareCheck.id: PresImageShareCheck,
FurtherDev.id: FurtherDev,
PresBannedWordsCheck.id: PresBannedWordsCheck,
PresVerifyGitLinkCheck.id: PresVerifyGitLinkCheck,
PresEmptySlideCheck.id: PresEmptySlideCheck,
},
'report': {
Expand Down
1 change: 1 addition & 0 deletions app/main/checks/presentation_checks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@
from .pres_right_words import PresRightWordsCheck
from .image_share import PresImageShareCheck
from .banned_words import PresBannedWordsCheck
from .verify_git_link import PresVerifyGitLinkCheck
from .empty_slide_check import PresEmptySlideCheck
11 changes: 7 additions & 4 deletions app/main/checks/presentation_checks/find_def_sld.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,21 @@ class FindDefSld(BasePresCriterion):
def __init__(self, file_info, key_slide):
super().__init__(file_info)
self.type_of_slide = key_slide
self.found_idxs = []

def check(self):
found_slides, found_idxs = [], []
found_slides = []
for i, title in enumerate(self.file.get_titles(), 1):
if str(title).lower().find(str(self.type_of_slide).lower()) != -1:
found_slides.append(self.file.get_text_from_slides()[i - 1])
found_idxs.append(i)
self.found_idxs.append(i)
if len(found_slides) == 0:
self.file.found_index[str(self.type_of_slide)] = None
return answer(False, 'Слайд не найден')
else:
found_idxs = self.format_page_link(found_idxs)
return answer(True, 'Найден под номером: {}'.format(', '.join(map(str, found_idxs))))
self.file.found_index[str(self.type_of_slide)] = ''.join(str(item) for item in self.found_idxs)
found_idxs_link = self.format_page_link(self.found_idxs)
return answer(True, 'Найден под номером: {}'.format(', '.join(map(str, found_idxs_link))))

@property
def name(self):
Expand Down
89 changes: 89 additions & 0 deletions app/main/checks/presentation_checks/verify_git_link.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@

import re
import requests
from lxml import html
from urllib.parse import quote

from .find_def_sld import FindDefSld
from ..base_check import BasePresCriterion, answer

# for check if gitlab-repository is closed:
# GITLAB_URL = 'https://gitlab.com/api/v4'
# PRIVATE_TOKEN = 'glpat-JeZApxShRgB1nsGrMsst'


class PresVerifyGitLinkCheck(BasePresCriterion):
description = "Проверка действительности ссылки на github"
id = 'verify_git_link'

def __init__(self, file_info, deep_check=True):
super().__init__(file_info)
self.deep_check = deep_check
self.wrong_repo_ref = []
self.empty_repo_ref = []

# self.check_aprb = FindDefSld(file_info=file_info, key_slide="Апробация")
self.pattern_for_repo = r'((((((http(s)?://)?(github|gitlab|bitbucket)+)+(.com|.org)+)+/[a-zA-Z0-9_-]+)+/[a-zA-Z0-9_-]+)+/*)+'
self.pattern_for_repo_incorrect = r'\(github\.com\)|\(gitlab\.com\)|\(bitbucket\.org\)|\(github\)|\(gitlab\)|\(bitbucket\)'
self.pattern_repo_mention = r'репозиторий|репозитория|репозиторию|репозиториев|репозиториям|'

def check(self):
string_result = 'Не пройдена!'
text_from_slide = [slide for page, slide in enumerate(self.file.get_text_from_slides(), 1)]
string_from_text = ' '.join(text_from_slide)
found_repo = re.findall(self.pattern_for_repo, string_from_text)

if not found_repo:
return answer(True, 'Нечего проверять!')

else:
if self.file.found_index['Апробация'] is not None:
page_aprb = self.file.found_index['Апробация']
text_from_slide_aprb = [
slide.replace(" ", '') for page, slide in enumerate(self.file.get_text_from_slides(), 1)
if str(page) == page_aprb]

string_from_text_aprb = ' '.join(text_from_slide_aprb)
found_repo_aprb = re.findall(self.pattern_for_repo, string_from_text_aprb)
found_repo_aprb_incorrect = re.findall(self.pattern_for_repo_incorrect, string_from_text_aprb)
if found_repo_aprb_incorrect:
string_result += f" <br> В слайде 'Апробация' вместо выражений {', '.join([repr(repo) for repo in found_repo_aprb_incorrect])}" \
f" следует привести ссылки вида 'https//github.com/...'"
if not found_repo_aprb and not found_repo_aprb_incorrect and re.findall(self.pattern_repo_mention, string_from_text_aprb):
string_result += f' <br> В слайде "Апробация" есть упоминания репозиториев,' \
f'однако ссылки на них либо некорректны, либо отсутствуют.'

for i in found_repo:
try:
link = requests.get(i[0])
if link.status_code != 200:
raise requests.exceptions.ConnectionError
else:
if self.deep_check:
self.deep_check_repo(i, link)
except (requests.exceptions.SSLError, requests.exceptions.ConnectionError):
self.wrong_repo_ref.append(i[0])
if self.wrong_repo_ref:
string_result += f" <br> Найдены несуществующие или закрытые репозитории: {', '.join([repr(repo) for repo in self.wrong_repo_ref])}"
check_result = False
if self.empty_repo_ref:
string_result += f" <br> Найдены пустые репозитории: {', '.join([repr(repo) for repo in self.empty_repo_ref])}"
check_result = False
else:
string_result = 'Пройдена!'
check_result = True
return answer(check_result, string_result)

def deep_check_repo(self, repo, link):
if re.findall(r'github', repo[0]):
tree = html.fromstring(link.content)
if not tree.xpath("//a[@class ='js-navigation-open Link--primary']"):
self.empty_repo_ref.append(repo[0])

# if re.findall(r'gitlab', i[0]):
# project_id = quote(i[0].replace('https://gitlab.com/', ''), safe='')
# url = f'{GITLAB_URL}/projects/{project_id}?private_token={PRIVATE_TOKEN}'
# response = requests.get(url)
# project_info = response.json()
# if project_info['visibility'] == 'private':
# wrong_repo_ref.append(i[0])
1 change: 1 addition & 0 deletions app/main/presentations/pptx/presentation_pptx.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ def __init__(self, presentation_name):
PresentationBasic.__init__(self, presentation_name)
self.prs = Presentation(presentation_name)
self.add_slides()
self.found_index = {}

def add_slides(self):
for index, slide in enumerate(self.prs.slides, 1):
Expand Down
1 change: 1 addition & 0 deletions app/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@ def get_status(task_id):
'pres_image_share': 'Проверка доли объема презентации, приходящейся на изображения',
'pres_banned_words_check': 'Проверка наличия запретных слов в презентации',
'conclusion_actual': 'Соответствие заключения задачам',
'verify_git_link': 'Проверка действительности ссылки на github',
'conclusion_along': 'Наличие направлений дальнейшего развития',
'simple_check': 'Простейшая проверка отчёта',
'banned_words_in_literature': 'Наличие запрещенных слов в списке литературы',
Expand Down
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
werkzeug==2.0.0
Flask==2.0.3
jinja2==3.0.0
requests==2.24.0
requests~=2.31.0
python-pptx==0.6.18
odfpy==1.4.1
pymongo==3.11.1
Expand All @@ -25,6 +25,8 @@ docx2python~=2.0.4
oauthlib~=3.1.0
pdfplumber==0.6.1
pytest~=7.1.2
lxml~=4.9.2
urllib3~=2.0.3
filetype==1.2.0
language-tool-python==2.7.1
markdown==3.4.4

0 comments on commit f9d3640

Please sign in to comment.