Skip to content

Commit

Permalink
TextStorage class
Browse files Browse the repository at this point in the history
  • Loading branch information
blindsphynx committed Nov 23, 2023
1 parent 9a927bb commit e54b162
Showing 1 changed file with 16 additions and 3 deletions.
19 changes: 16 additions & 3 deletions app/main/reports/text_storage/text_storage.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from app.main.reports.docx_uploader import DocxUploader
import pickle


class TextStorage:
Expand All @@ -23,11 +24,14 @@ def parse_headers_and_pages(self):

def parse_chapters(self):
for chapter in self.docx.chapters:
head = chapter["styled_text"]["text"]
if "ПРИЛОЖЕНИЕ" in head:
head = head.split(".")[0]
if chapter["child"] != [] and "heading" in chapter["style"]:
temp_text = ""
for i in range(len(chapter["child"])):
temp_text += chapter["child"][i]["styled_text"]["text"]
self.chapters.append({"header": chapter["styled_text"]["text"], "start_page": 0, "text": temp_text})
self.chapters.append({"header": head, "start_page": 0, "text": temp_text})


if __name__ == "__main__":
Expand All @@ -36,5 +40,14 @@ def parse_chapters(self):
text_storage = TextStorage(docx_path, pdf_path)
text_storage.parse_chapters()
text_storage.parse_headers_and_pages()
for ch in text_storage.chapters:
print(ch)
# for ch in text_storage.chapters:
# print(ch)

with open("pickle.bin", "wb") as outfile:
pickle.dump(text_storage.chapters, outfile)

with open("pickle.bin", "rb") as infile:
deserialized = pickle.load(infile)

if text_storage.chapters == deserialized:
print("Serialization is successful")

0 comments on commit e54b162

Please sign in to comment.