Skip to content

Commit

Permalink
Merge branch 'master' into fix_table_bug_in_results
Browse files Browse the repository at this point in the history
  • Loading branch information
MarinaProsche committed Sep 30, 2024
2 parents fc2e885 + 788907d commit df66c38
Show file tree
Hide file tree
Showing 49 changed files with 1,193 additions and 107 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ jobs:
runs-on: ubuntu-20.04

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4

- name: Build system images (non-pulling)
run: |
Expand All @@ -16,10 +16,10 @@ jobs:
- name: Build docker-compose
run: |
cp .env_example .env
docker-compose build
docker compose build
- name: Run docker-compose
run: |
docker-compose up -d
docker compose up -d
sleep 10
- name: Run tests
run: |
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/selenium_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,17 @@ jobs:
runs-on: ubuntu-20.04

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4

- name: Build docker-compose with docker-compose-selenium (tests)
- name: Build docker compose with docker-compose-selenium (tests)
run: |
cp .env_example .env
cp app/VERSION_example.json app/VERSION.json
docker-compose -f docker-compose.yml -f docker-compose-selenium.yml build
docker compose -f docker-compose.yml -f docker-compose-selenium.yml build
- name: Run docker-compose with docker-compose-selenium (tests)
run: |
docker-compose -f docker-compose.yml -f docker-compose-selenium.yml up -d
docker compose -f docker-compose.yml -f docker-compose-selenium.yml up -d
chmod +x tests/scripts/docker_check_tests.sh
./tests/scripts/docker_check_tests.sh
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ SIGNUP_PAGE_ENABLED=...
CONSUMER_KEY=...
CONSUMER_SECRET=...
ACCESS_TOKEN=...
```

## Run
Expand Down
17 changes: 17 additions & 0 deletions app/db/db_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ def get_user(username):
else:
return None

def get_all_users():
return users_collection.find()

# Returns True if user was found and updated and false if not (username can not be changed!)
def edit_user(user):
Expand Down Expand Up @@ -306,6 +308,21 @@ def get_logs_cursor(filter={}, limit=10, offset=0, sort=None, order=None):

return rows, count

def get_user_cursor(filter={}, limit=10, offset=0, sort=None, order=None):
sort = 'username' if sort == 'username' else sort

count = users_collection.count_documents(filter)
rows = users_collection.find(filter)

if sort and order in ("asc, desc"):
rows = rows.sort(sort, pymongo.ASCENDING if order ==
"asc" else pymongo.DESCENDING)

rows = rows.skip(offset) if offset else rows
rows = rows.limit(limit) if limit else rows

return rows, count


# Get stats for one user, return a list in the form
# [check_id, login, time of check_id's creation, result(0/1)]
Expand Down
5 changes: 3 additions & 2 deletions app/main/check_packs/base_criterion_pack.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,9 @@ def check(self):
try:
criterion_check_result = criterion.check()
except Exception as e:
logger.error(f'{criterion.id}: oшибка во время проверки: {e}')
criterion_check_result = {'score': 0, 'verdict': [UNEXPECTED_CHECK_FAIL_MSG]}
err_msg = f'{criterion.id}: oшибка во время проверки: {e}'
logger.error(err_msg)
criterion_check_result = {'score': 0, 'verdict': [UNEXPECTED_CHECK_FAIL_MSG, f"Информация об ошибке для администратора: {err_msg}"]}
if criterion.priority and not criterion_check_result['score']:
failed_priority_check = True
criterion_check_result['verdict'] = [PRIORITY_CHECK_FAILED_MSG] + list(criterion_check_result['verdict'])
Expand Down
3 changes: 3 additions & 0 deletions app/main/check_packs/pack_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
['pres_empty_slide'],
['theme_in_pres_check'],
['verify_git_link'],
['pres_image_capture'],
]
BASE_REPORT_CRITERION = [
["simple_check"],
Expand All @@ -43,6 +44,8 @@
["spelling_check"],
["max_abstract_size_check"],
["theme_in_report_check"],
['key_words_report_check'],
["empty_task_page_check"],
]

DEFAULT_TYPE = 'pres'
Expand Down
1 change: 1 addition & 0 deletions app/main/checks/presentation_checks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@
from .find_theme_in_pres import FindThemeInPres
from .verify_git_link import PresVerifyGitLinkCheck
from .empty_slide_check import PresEmptySlideCheck
from .name_of_image_check import PresImageCaptureCheck
19 changes: 10 additions & 9 deletions app/main/checks/presentation_checks/find_def_sld.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,19 @@ def __init__(self, file_info, key_slide):
self.found_idxs = []

def check(self):
for i, title in enumerate(self.file.get_titles(), 1):
if str(title).lower().find(str(self.type_of_slide).lower()) != -1:
#found_slides.append(self.file.get_text_from_slides()[i - 1])
self.found_idxs.append(i)
if self.file is not None:
for i, title in enumerate(self.file.get_titles(), 1):
if str(title).lower().find(str(self.type_of_slide).lower()) != -1:
#found_slides.append(self.file.get_text_from_slides()[i - 1])
self.found_idxs.append(i)

# save fot future
self.file.found_index[str(self.type_of_slide)] = self.found_idxs.copy()
self.file.found_index[str(self.type_of_slide)] = self.found_idxs.copy()

if self.found_idxs:
return answer(True, 'Найден под номером: {}'.format(', '.join(map(str, self.format_page_link(self.found_idxs)))))
else:
return answer(False, 'Слайд не найден')
if self.found_idxs:
return answer(True, 'Найден под номером: {}'.format(', '.join(map(str, self.format_page_link(self.found_idxs)))))
else:
return answer(False, 'Слайд не найден')

@property
def name(self):
Expand Down
40 changes: 40 additions & 0 deletions app/main/checks/presentation_checks/name_of_image_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from ..base_check import BasePresCriterion, answer
from utils import name_of_image_check_results

class PresImageCaptureCheck(BasePresCriterion):
label = "Проверка наличия подписи к рисункам"
description = 'Подписи к рисункам должны содержать слово "Рисунок". Подпись к рисункам на слайдах без текста необязательна'
id = 'pres_image_capture'

def __init__(self, file_info):
super().__init__(file_info)

def check(self):
slides_without_capture = set()
slide_with_image_only = set()
result_str = 'Не пройдена! '
all_captions = []
for num, slide in enumerate(self.file.slides, 1):
captions = slide.get_captions()
if captions:
for caption in captions:
body_text = slide.get_text().replace(captions[0][0], '').replace(slide.get_title(), '').replace('<number>', '').replace(' ', '')
if body_text.strip() or slide.get_table():
all_captions.append(caption[0])
if 'Рисунок' not in caption[0]:
slides_without_capture.add(num)
else:
if caption[0] != slide.get_title():
slide_with_image_only.add(num)
if slides_without_capture:
result_str += (
'Подписи к рисункам на следующих слайдах отсутствуют или не содержат слова "Рисунок": {}'.format(
', '.join(self.format_page_link(sorted(slides_without_capture)))) + '<br>')
if slide_with_image_only:
result_str += (
'Подписи к рисункам на следующих слайдах без текста необязательны: {}'.format(
', '.join(self.format_page_link(sorted(slide_with_image_only)))) + '<br>')
if result_str:
return answer(False, name_of_image_check_results(result_str, all_captions))
else:
return answer(True, 'Пройдена!')
6 changes: 2 additions & 4 deletions app/main/checks/presentation_checks/sld_similarity.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from nlp.similarity_of_texts import check_similarity
from utils import get_text_from_slides, tasks_conclusions_feedback

from app.nlp.similarity_of_texts import check_similarity
from app.nlp.stemming import Stemming
from ..base_check import BasePresCriterion, answer


Expand All @@ -18,8 +18,6 @@ def __init__(self, file_info, goals='Цель и задачи', conclusion='За
def check(self):
goals = get_text_from_slides(self.file, self.goals)
conclusions = get_text_from_slides(self.file, self.conclusion)
if goals == "" or conclusions == "":
return answer(False, 'Задач или заключения не существует')

results = check_similarity(goals, conclusions)

Expand Down
4 changes: 3 additions & 1 deletion app/main/checks/report_checks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,6 @@
from .style_check import ReportStyleCheck
from .spelling_check import SpellingCheck
from .max_abstract_size_check import ReportMaxSizeOfAbstractCheck
from .template_name import ReportTemplateNameCheck
from .template_name import ReportTemplateNameCheck
from .key_words_check import KeyWordsReportCheck
from .empty_task_page_check import EmptyTaskPageCheck
27 changes: 22 additions & 5 deletions app/main/checks/report_checks/banned_words_check.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import re

from .style_check_settings import StyleCheckSettings
from ..base_check import BaseReportCriterion, answer, morph


Expand All @@ -8,15 +8,32 @@ class ReportBannedWordsCheck(BaseReportCriterion):
description = 'Запрещено упоминание слова "мы"'
id = 'banned_words_check'

def __init__(self, file_info, words=["мы"], min_count=3, max_count=6):
def __init__(self, file_info, headers_map=None):
super().__init__(file_info)
self.words = [morph.normal_forms(word)[0] for word in words]
self.min_count = min_count
self.max_count = max_count
self.words = []
self.min_count = 0
self.max_count = 0
if headers_map:
self.config = headers_map
else:
self.config = 'VKR_HEADERS' if (self.file_type['report_type'] == 'VKR') else 'LR_HEADERS'

def late_init(self):
self.headers_main = self.file.get_main_headers(self.file_type['report_type'])
if self.headers_main in StyleCheckSettings.CONFIGS.get(self.config):
self.words = [morph.normal_forms(word)[0] for word in StyleCheckSettings.CONFIGS.get(self.config)[self.headers_main]['banned_words']]
self.min_count = StyleCheckSettings.CONFIGS.get(self.config)[self.headers_main]['min_count_for_banned_words_check']
self.max_count = StyleCheckSettings.CONFIGS.get(self.config)[self.headers_main]['max_count_for_banned_words_check']
else:
if 'any_header' in StyleCheckSettings.CONFIGS.get(self.config):
self.words = [morph.normal_forms(word)[0] for word in StyleCheckSettings.CONFIGS.get(self.config)['any_header']['banned_words']]
self.min_count = StyleCheckSettings.CONFIGS.get(self.config)['any_header']['min_count_for_banned_words_check']
self.max_count = StyleCheckSettings.CONFIGS.get(self.config)['any_header']['max_count_for_banned_words_check']

def check(self):
if self.file.page_counter() < 4:
return answer(False, "В отчете недостаточно страниц. Нечего проверять.")
self.late_init()
detected_lines = {}
result_str = f'<b>Запрещенные слова: {"; ".join(self.words)}</b><br>'
count = 0
Expand Down
4 changes: 2 additions & 2 deletions app/main/checks/report_checks/chapters.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@ def __init__(self, file_info):
self.target_styles = StyleCheckSettings.VKR_CONFIG
self.target_styles = list(map(lambda elem: {
"style": self.construct_style_from_description(elem["style"])
}, self.target_styles))
}, self.target_styles.values()))
self.docx_styles = {}
self.style_regex = {}
self.config = 'VKR_HEADERS' if (self.file_type['report_type'] == 'VKR') else 'LR_HEADERS'
self.presets = StyleCheckSettings.CONFIGS.get(self.config)
level = 0
for format_description in self.presets:
for _, format_description in self.presets.items():
self.docx_styles.update({level: format_description["docx_style"]})
pattern = re.compile(format_description["regex"])
self.style_regex.update({level: pattern})
Expand Down
45 changes: 45 additions & 0 deletions app/main/checks/report_checks/empty_task_page_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import re
from ..base_check import BaseReportCriterion, answer

PAGE_NAME = "ЗАДАНИЕ НА ВЫПУСКНУЮ КВАЛИФИКАЦИОННУЮ РАБОТУ"


class EmptyTaskPageCheck(BaseReportCriterion):
label = "Проверка на пустоту страницы с заданием"
description = f'Страница "{PAGE_NAME}" должна содержать текст'
id = 'empty_task_page_check'

def __init__(self, file_info):
super().__init__(file_info)
self.check_words = {'студент', 'руководитель', 'тема работы'}
self.check_first_pattern = r'^студент+[а-яА-ЯёЁa-zA-Z]+группа\d+$'
self.check_date_pattern = r'^«\d+»[а-яА-ЯёЁa-zA-Z]+20\d+г«\d+»[а-яА-ЯёЁa-zA-Z]+20\d+г$'
self.result = {'Cтудент, Группа', 'Дата выдачи задания, Дата представления ВКР к защите', 'Студент', 'Руководитель', 'Тема работы'}

def check(self):
if self.file.page_counter() < 4:
return answer(False, "В отчете недостаточно страниц. Нечего проверять.")
rows_text = self.file.pdf_file.page_rows_text(1)
if 'ЗАДАНИЕ' not in rows_text[0][4]:
return answer(False, f'Страница "{PAGE_NAME}" не найдена. Убедитесь, что она является второй в документе и не содержит ошибок в заголовке.')
elif len(rows_text) < 4:
return answer(False, f'Страница "{PAGE_NAME}" не содержит текста.')
else:
start_string = 0
for row in rows_text:
row_string = row[4].replace('\n', '').replace('.', '').replace(' ', '').replace('_', '').lower()
if re.match(self.check_first_pattern, row_string):
self.result.discard('Cтудент, Группа')
start_string = row[5]
if re.match(self.check_date_pattern, row_string):
self.result.discard('Дата выдачи задания, Дата представления ВКР к защите')
for k in self.check_words:
for row in rows_text[start_string+1:]:
row_string = row[4].replace('\n', '').replace(' ', '').replace('_', '').lower()
if k.replace(' ', '') in row_string:
if len(row_string) > (len(k)+2):
self.result.discard(k.capitalize())
if not self.result:
return answer(True, 'Пройдена!')
else:
return answer(False, f'Некоторые необходимые поля пусты или отсутствуют. Проверьте поля: «{"», «".join(self.result)}»')
10 changes: 7 additions & 3 deletions app/main/checks/report_checks/find_theme_in_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,13 @@ def find_theme(self):
if key == 1:
lower_text = text_on_page.lower()
text_without_punct = lower_text.translate(str.maketrans('', '', string.punctuation))
list_full = text_without_punct.split()
start = list_full.index('тема') + 1
end = list_full.index('студент')
list_full = tuple(text_without_punct.split())
start, end = 0, len(list_full)
for index, value in enumerate(list_full):
if value == "тема":
start = index + 1
elif value in {"студент", "студентка"}:
end = index
list_theme = list_full[start:end]
lemma_theme = {MORPH_ANALYZER.parse(word)[0].normal_form for word in list_theme if
word not in stop_words}
Expand Down
10 changes: 8 additions & 2 deletions app/main/checks/report_checks/image_references.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,18 @@ def check(self):
if not len(self.headers):
return answer(False, "Не найдено ни одного заголовка.<br><br>Проверьте корректность использования стилей.")
number_of_images, all_numbers = self.count_images_vkr()
if not number_of_images:
count_file_image_object = self.file.pdf_file.get_image_num()
if count_file_image_object and not number_of_images:
return answer(False, f'В отчёте найдено {count_file_image_object} рисунков, но не найдено ни одной подписи рисунка.<br><br> Если в вашей работе присутствуют рисунки, убедитесь, что для их подписи был '
f'использован стиль {self.image_style}, и формат: '
f'"Рисунок <Номер рисунка> — <Название рисунка>".')
elif not number_of_images:
return answer(True, f'Не найдено ни одного рисунка.<br><br> Если в вашей работе присутствуют рисунки, убедитесь, что для их подписи был '
f'использован стиль {self.image_style}, и формат: '
f'"Рисунок <Номер рисунка> -- <Название рисунка>".')
f'"Рисунок <Номер рисунка> <Название рисунка>".')
else:
return answer(False, 'Во время обработки произошла критическая ошибка')

references = self.search_references()
if len(references.symmetric_difference(all_numbers)) == 0:
return answer(True, f"Пройдена!")
Expand Down
Loading

0 comments on commit df66c38

Please sign in to comment.