Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

444 verify git link #452

Merged
merged 14 commits into from
Feb 19, 2024
3 changes: 2 additions & 1 deletion app/main/check_packs/pack_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@
['pres_right_words'],
['pres_image_share'],
['future_dev'],
['pres_banned_words_check']
['pres_banned_words_check'],
['verify_git_link'],
]
BASE_REPORT_CRITERION = [
["simple_check"],
Expand Down
3 changes: 2 additions & 1 deletion app/main/checks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
PresRightWordsCheck.id: PresRightWordsCheck,
PresImageShareCheck.id: PresImageShareCheck,
FurtherDev.id: FurtherDev,
PresBannedWordsCheck.id: PresBannedWordsCheck
PresBannedWordsCheck.id: PresBannedWordsCheck,
PresVerifyGitLinkCheck.id: PresVerifyGitLinkCheck
},
'report': {
ReportSimpleCheck.id: ReportSimpleCheck,
Expand Down
1 change: 1 addition & 0 deletions app/main/checks/presentation_checks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@
from .pres_right_words import PresRightWordsCheck
from .image_share import PresImageShareCheck
from .banned_words import PresBannedWordsCheck
from .verify_git_link import PresVerifyGitLinkCheck
9 changes: 5 additions & 4 deletions app/main/checks/presentation_checks/find_def_sld.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,19 @@ class FindDefSld(BasePresCriterion):
def __init__(self, file_info, key_slide):
super().__init__(file_info)
self.type_of_slide = key_slide
self.found_idxs = []

def check(self):
found_slides, found_idxs = [], []
found_slides = []
for i, title in enumerate(self.file.get_titles(), 1):
if str(title).lower().find(str(self.type_of_slide).lower()) != -1:
found_slides.append(self.file.get_text_from_slides()[i - 1])
found_idxs.append(i)
self.found_idxs.append(i)
if len(found_slides) == 0:
return answer(False, 'Слайд не найден')
else:
found_idxs = self.format_page_link(found_idxs)
return answer(True, 'Найден под номером: {}'.format(', '.join(map(str, found_idxs))))
found_idxs_link = self.format_page_link(self.found_idxs)
return answer(True, 'Найден под номером: {}'.format(', '.join(map(str, found_idxs_link))))

@property
def name(self):
Expand Down
90 changes: 90 additions & 0 deletions app/main/checks/presentation_checks/verify_git_link.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@

import re
import requests
from lxml import html
from urllib.parse import quote

from .find_def_sld import FindDefSld
from ..base_check import BasePresCriterion, answer

# for check if gitlab-repository is closed:
GITLAB_URL = 'https://gitlab.com/api/v4'
PRIVATE_TOKEN = 'glpat-JeZApxShRgB1nsGrMsst'


class PresVerifyGitLinkCheck(BasePresCriterion):
description = "Проверка действительности ссылки на github"
id = 'verify_git_link'

def __init__(self, file_info):
super().__init__(file_info)
self.check_aprb = FindDefSld(file_info=file_info, key_slide="Апробация")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Создайте задачу на сохранение "важных" слайдов в структуре проверяемого файла (чтобы например после проверки FindDefSld в объекте появился соответствующий слайд / ссылка на него, чтобы в дальнейшем можно было его переиспользовать, если такое возможно)


def check(self):
wrong_repo_ref = []
empty_repo_ref = []
string_result = 'Не пройдена!'

self.check_aprb.check()
page_aprb = ''.join((str(item) for item in self.check_aprb.__getattribute__("found_idxs")))

text_from_slide = [slide for page, slide in enumerate(self.file.get_text_from_slides(), 1)]
text_from_slide_aprb = [
slide.replace(" ", '') for page, slide in enumerate(self.file.get_text_from_slides(), 1)
if str(page) == page_aprb]

string_from_text = ' '.join(text_from_slide)
string_from_text_aprb = ' '.join(text_from_slide_aprb)

found_repo = re.findall(
r'((((((http(s)?://)?(github|gitlab|bitbucket)+)+(.com|.org)+)+/[a-zA-Z0-9_-]+)+/[a-zA-Z0-9_-]+)+/*)+',
string_from_text)
found_repo_aprb = re.findall(
r'((((((http(s)?://)?(github|gitlab|bitbucket)+)+(.com|.org)+)+/[a-zA-Z0-9_-]+)+/[a-zA-Z0-9_-]+)+/*)+',
string_from_text_aprb)
found_repo_aprb_incorrect = re.findall(
r'\(github\.com\)|\(gitlab\.com\)|\(bitbucket\.org\)|\(github\)|\(gitlab\)|\(bitbucket\)',
string_from_text_aprb)
dart-mih marked this conversation as resolved.
Show resolved Hide resolved

if not found_repo:
return answer(True, 'Нечего проверять!')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

А почему тут True?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

На данный момент проверка сделана таким образом, что отсутствие ссылок на репозитории не является недочетом. Оценивается только их корректность (если они есть).
Уточните, пожалуйста, является ли наличие ссылок на репозитории необходимым условием для прохождения именно этой проверки?

else:
if found_repo_aprb_incorrect:
string_result += f" <br> В слайде 'Апробация' вместо выражений {', '.join([repr(repo) for repo in found_repo_aprb_incorrect])}" \
f" следует привести ссылки вида 'https//github.com/...'"
if not found_repo_aprb and not found_repo_aprb_incorrect and re.findall(
r'репозиторий|репозитория|репозиторию|репозиториев|репозиториям|', string_from_text_aprb):
string_result += f' <br> В слайде "Апробация" есть упоминания репозиториев,' \
f'однако ссылки на них либо некорректны, либо отсутствуют.'
for i in found_repo:
try:
link = requests.get(i[0])
if link.status_code != 200:
raise requests.exceptions.ConnectionError
if re.findall(r'github', i[0]):
tree = html.fromstring(link.content)
if not tree.xpath("//a[@class ='js-navigation-open Link--primary']"):
empty_repo_ref.append(i[0])
if re.findall(r'gitlab', i[0]):
project_id = quote(i[0].replace('https://gitlab.com/', ''), safe='')
url = f'{GITLAB_URL}/projects/{project_id}?private_token={PRIVATE_TOKEN}'
response = requests.get(url)
project_info = response.json()
if project_info['visibility'] == 'private':
wrong_repo_ref.append(i[0])
except (requests.exceptions.SSLError, requests.exceptions.ConnectionError):
wrong_repo_ref.append(i[0])
if wrong_repo_ref and not empty_repo_ref:
string_result += f" <br> Найдены несуществующие или закрытые репозитории: {', '.join([repr(repo) for repo in wrong_repo_ref])}"
check_result = False
elif empty_repo_ref and not wrong_repo_ref:
string_result += f" <br> Найдены пустые репозитории: {', '.join([repr(repo) for repo in empty_repo_ref])}"
check_result = False
elif empty_repo_ref and wrong_repo_ref:
string_result += f" <br> Найдены пустые репозитории: {', '.join([repr(repo) for repo in empty_repo_ref])}" \
f" <br> Также найдены несуществующие или закрытые репозитории: {', '.join([repr(repo) for repo in wrong_repo_ref])}"
check_result = False
else:
string_result = 'Пройдена!'
check_result = True
return answer(check_result, string_result)
5 changes: 4 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
werkzeug==2.0.0
Flask==2.0.3
jinja2==3.0.0
requests==2.24.0
requests~=2.31.0
python-pptx==0.6.18
odfpy==1.4.1
pymongo==3.11.1
Expand All @@ -25,3 +25,6 @@ docx2python~=2.0.4
oauthlib~=3.1.0
pdfplumber==0.6.1
pytest~=7.1.2

lxml~=4.9.2
urllib3~=2.0.3