Skip to content

Commit

Permalink
resolve conflicts
Browse files Browse the repository at this point in the history
  • Loading branch information
MarinaProsche committed Nov 8, 2023
2 parents 91cfcbe + 9f7b7b6 commit dd1a5d9
Show file tree
Hide file tree
Showing 9 changed files with 316 additions and 4 deletions.
11 changes: 10 additions & 1 deletion app/main/reports/README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Запуск и тестирование

Пререквизиты: `argparse`, `python-docx`, `docx2python`, `re`, `subprocess`. Для парсинга `.doc`-файлов потребуется
Пререквизиты: `argparse`, `python-docx`, `docx2python`, `re`, `subprocess`, `markdown`. Для парсинга `.doc`-файлов потребуется
LibreOffice.

Здесь и далее считается, что корневая директория репозитория добавлена в `PYTHONPATH`.
Expand Down Expand Up @@ -65,3 +65,12 @@ Proof-of-concept парсинг файлов `.docx` с выводом стру
```bash
$ python3 -m app.main.mse22.pdf_document text_from_pages --filename path_to_file
```

## `MD`

Парсинг файлов `.md` с выводом структуры файла в текстовом виде в stdout.

```bash
$ python3 -m app.main.reports.md_uploader md_parser --mdfile path_to_md_file
```

1 change: 1 addition & 0 deletions app/main/reports/md_uploader/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .md_uploader import MdUpload
21 changes: 21 additions & 0 deletions app/main/reports/md_uploader/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import argparse

from .md_uploader import main as md_uploader_main


def parse_args():
parser = argparse.ArgumentParser(description='File md parser')
subparsers = parser.add_subparsers()
md_parser = subparsers.add_parser('md_parser', help='md document')
md_parser.add_argument('--mdfile', type=str, required=True, help='path to md file')
md_parser.set_defaults(func=md_uploader_main)
return parser.parse_args()


def main():
args = parse_args()
args.func(args)


if __name__ == '__main__':
main()
61 changes: 61 additions & 0 deletions app/main/reports/md_uploader/md_uploader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import markdown #installation: pip install markdown
import re

class MdUpload:
def __init__(self, path_to_md_file):
self.path_to_md_file = path_to_md_file
self.headers = []
self.chapters = []
self.paragraphs = []
self.html_text = ''
self.tables = []
self.chapter_with_text = []

def read_md_file(self):
with open(self.path_to_md_file, "r", encoding="utf-8") as f:
md_text = f.read()
return md_text

def get_html_from_md(self, md_text):
self.html_text = markdown.markdown(md_text)
self.paragraphs = self.html_text.split('\n')

def get_headers(self):
header_regex = "<h1>(.*?)<\/h1>"
self.headers = re.findall(header_regex, self.html_text)

def get_chapters(self):
chapter_regex = "<h2>(.*?)<\/h2>"
self.chapters = re.findall(chapter_regex, self.html_text)

def get_chapter_with_text(self):
text = self.html_text
chapter_name = ''
for chapter in self.chapters:
self.split_chapter = text.split("<h2>" + chapter + "</h2>")
self.chapter_with_text.append(chapter_name + self.split_chapter[-2])
chapter_name = chapter
text = self.split_chapter[-1]
self.chapter_with_text.append(chapter_name + text)

def get_tables_size(self):
count_table_line = 0
count_paragraph = len(self.paragraphs)
for line in self.paragraphs:
if "|" in line:
count_table_line +=1
return round(count_table_line/count_paragraph, 4)

def parse_md_file(self):
md_text = self.read_md_file()
self.get_html_from_md(md_text)
self.get_headers()
self.get_chapters()
self.get_chapter_with_text()
self.get_tables_size()
return f"Заголовки:\n{self.headers}\n\nГлавы:\n{self.chapters}\n\nГлавы с текстом:\n{self.chapter_with_text}\n\nДоля таблиц в тексте:\n{self.get_tables_size()}"

def main(args):
md_file = MdUpload(args.mdfile)
print(md_file.parse_md_file())

2 changes: 2 additions & 0 deletions app/servants/pre_luncher.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from db.db_methods import add_user, get_user, get_client, edit_user, save_criteria_pack
from main.check_packs.pack_config import BASE_PACKS, DEFAULT_REPORT_TYPE_INFO

from pymongo.errors import ConnectionFailure
from server import ALLOWED_EXTENSIONS

Expand Down Expand Up @@ -40,6 +41,7 @@ def init(app, debug):
user.criteria = BASE_PACKS[file_type].name
user.formats = list(ALLOWED_EXTENSIONS.get(file_type))
user.two_files = True

edit_user(user)

logger.info(f"Создан администратор по умолчанию: логин: {user.username}, пароль уточняйте у разработчика")
Expand Down
4 changes: 1 addition & 3 deletions assets/scripts/upload.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@ const file_input = $("#upload_file");
const file_label = $("#upload_file_label");
const return_file_label = file_label.text();

const pt = $("#upload_file_pdf");
pdf_uploaded = pt.length > 0 ? !!pt.prop("files")[0] : false; //проверка наличия pdf
const pdf_file_input = $("#upload_file_pdf");
const pdf_file_label = $("#upload_file_label_pdf");
const return_pdf_file_label = pdf_file_label.text();
Expand Down Expand Up @@ -60,7 +58,7 @@ const resetFileUpload = () => {

const changeUploadButton = () => {
if (pdf_uploaded || file_uploaded) {
const pdf_size = pdf_uploaded ? (pt.prop("files")[0]?.size || 0) : 0;
const pdf_size = pdf_file_input.prop("files")[0]?.size || 0;
const file_size = file_input.prop("files")[0]?.size || 0;
if (pdf_size + file_size <= file_upload_limit) {
if (file_uploaded)
Expand Down
Loading

0 comments on commit dd1a5d9

Please sign in to comment.