Skip to content

Commit

Permalink
Merge pull request #601 from moevm/476_opnp_criteria
Browse files Browse the repository at this point in the history
476 opnp criteria
  • Loading branch information
HadronCollider authored Sep 20, 2024
2 parents 4744393 + 6c78e3a commit 77a1540
Show file tree
Hide file tree
Showing 14 changed files with 216 additions and 52 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/selenium_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ jobs:
steps:
- uses: actions/checkout@v4

- name: Build docker-compose with docker-compose-selenium (tests)
- name: Build docker compose with docker-compose-selenium (tests)
run: |
cp .env_example .env
cp app/VERSION_example.json app/VERSION.json
Expand Down
1 change: 1 addition & 0 deletions app/main/check_packs/pack_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
["spelling_check"],
["max_abstract_size_check"],
["theme_in_report_check"],
['key_words_report_check'],
["empty_task_page_check"],
]

Expand Down
1 change: 1 addition & 0 deletions app/main/checks/report_checks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,5 @@
from .spelling_check import SpellingCheck
from .max_abstract_size_check import ReportMaxSizeOfAbstractCheck
from .template_name import ReportTemplateNameCheck
from .key_words_check import KeyWordsReportCheck
from .empty_task_page_check import EmptyTaskPageCheck
27 changes: 22 additions & 5 deletions app/main/checks/report_checks/banned_words_check.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import re

from .style_check_settings import StyleCheckSettings
from ..base_check import BaseReportCriterion, answer, morph


Expand All @@ -8,15 +8,32 @@ class ReportBannedWordsCheck(BaseReportCriterion):
description = 'Запрещено упоминание слова "мы"'
id = 'banned_words_check'

def __init__(self, file_info, words=["мы"], min_count=3, max_count=6):
def __init__(self, file_info, headers_map=None):
super().__init__(file_info)
self.words = [morph.normal_forms(word)[0] for word in words]
self.min_count = min_count
self.max_count = max_count
self.words = []
self.min_count = 0
self.max_count = 0
if headers_map:
self.config = headers_map
else:
self.config = 'VKR_HEADERS' if (self.file_type['report_type'] == 'VKR') else 'LR_HEADERS'

def late_init(self):
self.headers_main = self.file.get_main_headers(self.file_type['report_type'])
if self.headers_main in StyleCheckSettings.CONFIGS.get(self.config):
self.words = [morph.normal_forms(word)[0] for word in StyleCheckSettings.CONFIGS.get(self.config)[self.headers_main]['banned_words']]
self.min_count = StyleCheckSettings.CONFIGS.get(self.config)[self.headers_main]['min_count_for_banned_words_check']
self.max_count = StyleCheckSettings.CONFIGS.get(self.config)[self.headers_main]['max_count_for_banned_words_check']
else:
if 'any_header' in StyleCheckSettings.CONFIGS.get(self.config):
self.words = [morph.normal_forms(word)[0] for word in StyleCheckSettings.CONFIGS.get(self.config)['any_header']['banned_words']]
self.min_count = StyleCheckSettings.CONFIGS.get(self.config)['any_header']['min_count_for_banned_words_check']
self.max_count = StyleCheckSettings.CONFIGS.get(self.config)['any_header']['max_count_for_banned_words_check']

def check(self):
if self.file.page_counter() < 4:
return answer(False, "В отчете недостаточно страниц. Нечего проверять.")
self.late_init()
detected_lines = {}
result_str = f'<b>Запрещенные слова: {"; ".join(self.words)}</b><br>'
count = 0
Expand Down
4 changes: 2 additions & 2 deletions app/main/checks/report_checks/chapters.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@ def __init__(self, file_info):
self.target_styles = StyleCheckSettings.VKR_CONFIG
self.target_styles = list(map(lambda elem: {
"style": self.construct_style_from_description(elem["style"])
}, self.target_styles))
}, self.target_styles.values()))
self.docx_styles = {}
self.style_regex = {}
self.config = 'VKR_HEADERS' if (self.file_type['report_type'] == 'VKR') else 'LR_HEADERS'
self.presets = StyleCheckSettings.CONFIGS.get(self.config)
level = 0
for format_description in self.presets:
for _, format_description in self.presets.items():
self.docx_styles.update({level: format_description["docx_style"]})
pattern = re.compile(format_description["regex"])
self.style_regex.update({level: pattern})
Expand Down
66 changes: 66 additions & 0 deletions app/main/checks/report_checks/key_words_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import re
import string

from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from pymorphy2 import MorphAnalyzer
from ..base_check import BaseReportCriterion, answer


MORPH_ANALYZER = MorphAnalyzer()

class KeyWordsReportCheck(BaseReportCriterion):
label = 'Проверка наличия раздела "Ключевые слова" и упоминание их в тексте'
description = 'Раздел идет сразу после названия работы и содержит не менее трех ключевых слов. Слова упоминаются в тексте'
id = 'key_words_report_check'

def __init__(self, file_info, min_key_words = 3):
super().__init__(file_info)
self.min_key_words = min_key_words
self.chapters = []
self.text_par = []
self.lemme_list = []

def late_init(self):
self.chapters = self.file.make_chapters(self.file_type['report_type'])

def check(self):
key_words_chapter = self.file.paragraphs[1].lower()
if 'ключевые слова' not in key_words_chapter:
return answer(False, 'Раздел "Ключевые слова" не найден')
cleaned_str = re.sub(r'<[^>]*>', '', key_words_chapter)
final_str = cleaned_str.replace('ключевые слова', '').replace(':','')
key_words_result = [word.strip() for word in final_str.split(',')]
if len(key_words_result) < self.min_key_words:
return answer(False, f'Не пройдена! Количество ключевых слов должно быть не менее {self.min_key_words}')
stop_words = set(stopwords.words("russian"))
if self.file.page_counter() < 4:
return answer(False, "В отчете недостаточно страниц. Нечего проверять.")
self.late_init()
for intro in self.chapters:
header = intro["text"].lower()
if header not in ['аннотация', "ключевые слова"]:
self.intro = intro
for intro_par in self.intro['child']:
par = intro_par['text'].lower()
self.text_par.append(par)
for phrase in key_words_result:
words = word_tokenize(phrase)
words_lemma = [MORPH_ANALYZER.parse(w)[0].normal_form for w in words if w.lower() not in stop_words]
phrase_lemma = ' '.join(words_lemma)
self.lemme_list.append(phrase)
for text in self.text_par:
cleaned_text = re.sub(r'<[^>]*>', '', text)
translator = str.maketrans('', '', string.punctuation)
text_without_punct = cleaned_text.translate(translator)
word_in_text = word_tokenize(text_without_punct)
lemma_text = [MORPH_ANALYZER.parse(w)[0].normal_form for w in word_in_text if w.lower() not in stop_words]
lemma_text_str = ' '.join(lemma_text)
if phrase_lemma in lemma_text_str:
del self.lemme_list[-1]
break

if self.lemme_list:
return answer(False, f"Не пройдена! В тексте не найдены следующие ключевые слова: «{'», «'.join(self.lemme_list)}»")
else:
return answer(True, f'Пройдена!')
22 changes: 16 additions & 6 deletions app/main/checks/report_checks/literature_references.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import re

from .style_check_settings import StyleCheckSettings
from ..base_check import BaseReportCriterion, answer


Expand All @@ -8,17 +8,26 @@ class ReferencesToLiteratureCheck(BaseReportCriterion):
description = ''
id = 'literature_references'

def __init__(self, file_info, min_ref=1, max_ref=1000):
def __init__(self, file_info, min_ref=1, max_ref=1000, headers_map=None):
super().__init__(file_info)
self.headers = []
self.literature_header = []
self.name_pattern = r'список[ \t]*(использованных|использованной|)[ \t]*(источников|литературы)'
self.min_ref = min_ref
self.max_ref = max_ref
if headers_map:
self.config = headers_map
else:
self.config = 'VKR_HEADERS' if (self.file_type['report_type'] == 'VKR') else 'LR_HEADERS'

def late_init_vkr(self):
self.headers = self.file.make_chapters(self.file_type['report_type'])
self.literature_header = self.file.find_literature_vkr(self.file_type['report_type'])
self.headers_main = self.file.get_main_headers(self.file_type['report_type'])
if self.headers_main in StyleCheckSettings.CONFIGS.get(self.config):
self.min_ref = StyleCheckSettings.CONFIGS.get(self.config)[self.headers_main]['min_ref_for_literature_references_check']
self.max_ref = StyleCheckSettings.CONFIGS.get(self.config)[self.headers_main]['mах_ref_for_literature_references_check']
else:
if 'any_header' in StyleCheckSettings.CONFIGS.get(self.config):
self.min_ref = StyleCheckSettings.CONFIGS.get(self.config)['any_header']['min_ref_for_literature_references_check']
self.max_ref = StyleCheckSettings.CONFIGS.get(self.config)['any_header']['mах_ref_for_literature_references_check']

def check(self):
if self.file.page_counter() < 4:
Expand Down Expand Up @@ -51,7 +60,7 @@ def check(self):
all_numbers.add(i)
if len(references.symmetric_difference(all_numbers)) == 0:
if not self.min_ref <= number_of_sources <= self.max_ref:
return answer(False, f'Список источников оформлен верно, однако их количество ({number_of_sources}) не удовлетворяет необходимому критерию. <br> Количество источников должно быть от {self.min_ref} до {self.max_ref}.')
return answer(False, f'Список источников оформлен верно, однако их количество ({number_of_sources}) не удовлетворяет необходимому критерию. <br> Количество источников должно быть не менее {self.min_ref}.')
elif ref_sequence:
result_str += f"Источники должны нумероваться в порядке упоминания в тексте. Неправильные последовательности: {'; '.join(num for num in ref_sequence)}"
return answer(False, result_str)
Expand All @@ -74,6 +83,7 @@ def check(self):
<li>Убедитесь, что для ссылки на источник используются квадратные скобки;</li>
<li>Убедитесь, что для оформления списка литературы был использован нумированный список;</li>
<li>Убедитесь, что после и перед нумированным списком отсутствуют непустые абзацы.</li>
<li>Убедитесь, что один источник не разбит на двае строки клавишей "Enter".</li>
</ul>
'''
return answer(False, result_str)
Expand Down
18 changes: 10 additions & 8 deletions app/main/checks/report_checks/needed_headers_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,23 +18,25 @@ def __init__(self, file_info, main_heading_style="heading 2", headers_map=None):
self.config = headers_map
else:
self.config = 'VKR_HEADERS' if (self.file_type['report_type'] == 'VKR') else 'LR_HEADERS'
self.patterns = StyleCheckSettings.CONFIGS.get(self.config)[0]["headers"]
# self.patterns = StyleCheckSettings.CONFIGS.get(self.config)[0]["headers"]

def late_init(self):
self.headers = self.file.make_chapters(self.file_type['report_type'])
self.headers_page = self.file.find_header_page(self.file_type['report_type'])
self.chapters_str = self.file.show_chapters(self.file_type['report_type'])
# TODO: change
self.headers_main = self.file.get_main_headers(self.file_type['report_type'])
if self.headers_main == "Задание 1":
self.patterns = StyleCheckSettings.CONFIGS.get(self.config)[0]["headers"]
elif self.headers_main == "Задание 2":
self.patterns = StyleCheckSettings.CONFIGS.get(self.config)[1]["headers"]
if self.headers_main in StyleCheckSettings.CONFIGS.get(self.config):
self.patterns = StyleCheckSettings.CONFIGS.get(self.config)[self.headers_main]['headers']
else:
if 'any_header' in StyleCheckSettings.CONFIGS.get(self.config):
self.patterns = StyleCheckSettings.CONFIGS.get(self.config)['any_header']['headers']

def check(self):
if self.file.page_counter() < 4:
return answer(False, "В отчете недостаточно страниц. Нечего проверять.")
self.late_init()
if not self.patterns:
return answer(False, "Не удалось сформировать требуемые заголовки исходя из названия работы. Проверьте наименование работы.")
result_string = ''
patterns = []
for pattern in self.patterns:
Expand All @@ -55,7 +57,7 @@ def check(self):
if not result_string:
result_str = f'Все необходимые заголовки обнаружены!'
result_str += f'<br><br><b>Ниже представлена иерархия обработанных заголовков, ' \
f'сравните с Содержанием {self.format_page_link([self.headers_page])}:</b>'
f'сравните с Содержанием {self.format_page_link([self.headers_page])}:</b>'
result_str += self.chapters_str
result_str += '<br>Если список не точный, убедитесь, что для каждого заголовка указан верный стиль.'
return answer(True, result_str)
Expand All @@ -70,7 +72,7 @@ def check(self):
</ul>
'''
result_str += f'<br><br><b>Ниже представлена иерархия обработанных заголовков, ' \
f'сравните с Содержанием {self.format_page_link([self.headers_page])}:</b>'
f'сравните с Содержанием {self.format_page_link([self.headers_page])}:</b>'
result_str += self.chapters_str
result_str += '<br>Если список не точный, убедитесь, что для каждого заголовка указан верный стиль.'
return answer(False, result_str)
9 changes: 8 additions & 1 deletion app/main/checks/report_checks/section_component.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ def __init__(self, file_info, chapter='Введение', patterns=('цель',
super().__init__(file_info)
self.intro = {}
if headers_map:
self.chapter = StyleCheckSettings.CONFIGS.get(headers_map)[0]["headers"][0]
self.config = headers_map
self.chapter = ''
patterns = ('цель', 'задач')
else:
self.chapter = chapter
Expand All @@ -21,12 +22,18 @@ def __init__(self, file_info, chapter='Введение', patterns=('цель',
self.patterns.append({"name": pattern.capitalize(), "text": pattern, "marker": 0})

def late_init(self):
if not self.chapter:
self.headers_main = self.file.get_main_headers(self.file_type['report_type'])
if self.headers_main in StyleCheckSettings.CONFIGS.get(self.config):
self.chapter = StyleCheckSettings.CONFIGS.get(self.config)[self.headers_main]["header_for_report_section_component"]
self.chapters = self.file.make_chapters(self.file_type['report_type'])

def check(self):
if self.file.page_counter() < 4:
return answer(False, "В отчете недостаточно страниц. Нечего проверять.")
self.late_init()
if not self.chapter:
return answer(True, f'Данная проверка не предусмотрена для работы с темой "{self.headers_main}"')
result_str = ''
for intro in self.chapters:
header = intro["text"].lower()
Expand Down
2 changes: 1 addition & 1 deletion app/main/checks/report_checks/sections_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def check(self):
self.file.parse_effective_styles()
result = True
result_str = ""
for preset in self.presets:
for _, preset in self.presets.items():
full_style = self.construct_style_from_description(preset["style"])
precheck_dict = {key: preset["style"].get(key) for key in self.prechecked_props}
precheck_style = self.construct_style_from_description(precheck_dict)
Expand Down
2 changes: 1 addition & 1 deletion app/main/checks/report_checks/short_sections_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def __init__(self, file_info, min_section_count=5, min_section_len=20, main_head
if prechecked_props_lst is None:
prechecked_props_lst = StyleCheckSettings.PRECHECKED_PROPS
self.styles: List[Style] = []
for format_description in self.presets:
for _, format_description in self.presets.items():
prechecked_dict = {key: format_description["style"].get(key) for key in prechecked_props_lst}
style = Style()
style.__dict__.update(prechecked_dict)
Expand Down
2 changes: 1 addition & 1 deletion app/main/checks/report_checks/style_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def __init__(self, file_info, header_styles=None, target_styles=None, key_proper
self.target_styles))
if header_styles is None:
self.header_styles = []
for style_dict in StyleCheckSettings.LR_CONFIG:
for _, style_dict in StyleCheckSettings.LR_CONFIG.items():
header_style = {key: style_dict["style"].get(key) for key in StyleCheckSettings.PRECHECKED_PROPS}
style = Style()
style.__dict__.update(header_style)
Expand Down
Loading

0 comments on commit 77a1540

Please sign in to comment.