Skip to content

Commit

Permalink
resolve conflicts
Browse files Browse the repository at this point in the history
  • Loading branch information
MarinaProsche committed Nov 8, 2023
1 parent 9510244 commit 3c0ee06
Show file tree
Hide file tree
Showing 3 changed files with 0 additions and 69 deletions.
9 changes: 0 additions & 9 deletions app/main/reports/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,19 +65,10 @@ Proof-of-concept парсинг файлов `.docx` с выводом стру
```bash
$ python3 -m app.main.mse22.pdf_document text_from_pages --filename path_to_file
```
<<<<<<< HEAD
=======

>>>>>>> master
## `MD`

Парсинг файлов `.md` с выводом структуры файла в текстовом виде в stdout.

```bash
$ python3 -m app.main.reports.md_uploader md_parser --mdfile path_to_md_file
<<<<<<< HEAD
```
=======
```
>>>>>>> master
56 changes: 0 additions & 56 deletions app/main/reports/md_uploader/md_uploader.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
<<<<<<< HEAD
'''Available checks for md-file:
pack "BaseReportCriterionPackMd"
Expand Down Expand Up @@ -83,27 +82,10 @@ def __init__(self, path_to_md_file):
self.inline_shapes = []

def upload(self):
=======
import markdown #installation: pip install markdown
import re

class MdUpload:
def __init__(self, path_to_md_file):
self.path_to_md_file = path_to_md_file
self.headers = []
self.chapters = []
self.paragraphs = []
self.html_text = ''
self.tables = []
self.chapter_with_text = []

def read_md_file(self):
>>>>>>> master
with open(self.path_to_md_file, "r", encoding="utf-8") as f:
md_text = f.read()
return md_text

<<<<<<< HEAD
def parse(self, md_text):
self.html_text = markdown.markdown(md_text)
self.paragraphs = self.make_paragraphs(self.html_text)
Expand Down Expand Up @@ -219,29 +201,6 @@ def find_images(self):
total_height += width
self.inline_shapes.append((width, height))
return self.inline_shapes
=======
def get_html_from_md(self, md_text):
self.html_text = markdown.markdown(md_text)
self.paragraphs = self.html_text.split('\n')

def get_headers(self):
header_regex = "<h1>(.*?)<\/h1>"
self.headers = re.findall(header_regex, self.html_text)

def get_chapters(self):
chapter_regex = "<h2>(.*?)<\/h2>"
self.chapters = re.findall(chapter_regex, self.html_text)

def get_chapter_with_text(self):
text = self.html_text
chapter_name = ''
for chapter in self.chapters:
self.split_chapter = text.split("<h2>" + chapter + "</h2>")
self.chapter_with_text.append(chapter_name + self.split_chapter[-2])
chapter_name = chapter
text = self.split_chapter[-1]
self.chapter_with_text.append(chapter_name + text)
>>>>>>> master

def get_tables_size(self):
count_table_line = 0
Expand All @@ -251,7 +210,6 @@ def get_tables_size(self):
count_table_line +=1
return round(count_table_line/count_paragraph, 4)

<<<<<<< HEAD
def find_literature_vkr(self, work_type):
if not self.literature_header:
for header in self.make_chapters(work_type):
Expand All @@ -273,21 +231,7 @@ def parse_md_file(self):
self.find_literature_vkr(work_type="VKR")
return f"Заголовки:\n{self.headers_main}\n\nГлавы\n{self.chapters}\n\nИзображения:\n\n{self.inline_shapes}"

=======
def parse_md_file(self):
md_text = self.read_md_file()
self.get_html_from_md(md_text)
self.get_headers()
self.get_chapters()
self.get_chapter_with_text()
self.get_tables_size()
return f"Заголовки:\n{self.headers}\n\nГлавы:\n{self.chapters}\n\nГлавы с текстом:\n{self.chapter_with_text}\n\nДоля таблиц в тексте:\n{self.get_tables_size()}"
>>>>>>> master

def main(args):
md_file = MdUpload(args.mdfile)
print(md_file.parse_md_file())
<<<<<<< HEAD
=======

>>>>>>> master
4 changes: 0 additions & 4 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,5 @@ pdfplumber==0.6.1
pytest~=7.1.2
filetype==1.2.0
language-tool-python==2.7.1
<<<<<<< HEAD
markdown
md2pdf
=======
markdown==3.4.4
>>>>>>> master

0 comments on commit 3c0ee06

Please sign in to comment.