From 3bf002707c270a3b136320d6d3b1cb8352c93ccb Mon Sep 17 00:00:00 2001
From: krande <kristoffer_andersen@outlook.com>
Date: Thu, 18 Jan 2024 16:11:26 +0100
Subject: [PATCH 1/2] parse existing tables from markdown

---
 files/doc_regular_table/00-main/table.md | 14 +++++++++
 files/doc_regular_table/metadata.yaml    |  5 ++++
 src/paradoc/cli_app.py                   |  2 +-
 src/paradoc/common.py                    | 38 +++++++++++++++++++++++-
 src/paradoc/document.py                  | 26 +++++++++-------
 tests/tables/test_tables.py              |  7 +++++
 6 files changed, 79 insertions(+), 13 deletions(-)
 create mode 100644 files/doc_regular_table/00-main/table.md
 create mode 100644 files/doc_regular_table/metadata.yaml

diff --git a/files/doc_regular_table/00-main/table.md b/files/doc_regular_table/00-main/table.md
new file mode 100644
index 0000000..c595590
--- /dev/null
+++ b/files/doc_regular_table/00-main/table.md
@@ -0,0 +1,14 @@
+# A basic table
+
+Some text before the table
+
+|           | cat A [unit] | cat 2 [unitB] | num ex [-] |
+|:----------|-------------:|--------------:|-----------:|
+| example1  |         4000 |          1.13 |          6 |
+| example4  |         4000 |          2.15 |          6 |
+| example9  |         4000 |          4.04 |          6 |
+| example10 |         4500 |             2 |          6 |
+
+Table: A basic table {#tbl:a-basic-table}
+
+And some text after
\ No newline at end of file
diff --git a/files/doc_regular_table/metadata.yaml b/files/doc_regular_table/metadata.yaml
new file mode 100644
index 0000000..4aa9cf6
--- /dev/null
+++ b/files/doc_regular_table/metadata.yaml
@@ -0,0 +1,5 @@
+lang: en-GB
+linkReferences: true
+nameInLink: true
+figPrefix: "Figure"
+tblPrefix: "Table"
\ No newline at end of file
diff --git a/src/paradoc/cli_app.py b/src/paradoc/cli_app.py
index 3400d34..df224f2 100644
--- a/src/paradoc/cli_app.py
+++ b/src/paradoc/cli_app.py
@@ -11,7 +11,7 @@ def main(
     source_dir: str,
     report_name: str,
     auto_open: bool = False,
-    work_dir: str = None,
+    work_dir: str = "temp",
     export_format: ExportFormats = ExportFormats.DOCX,
 ):
     one = OneDoc(source_dir, work_dir=work_dir)
diff --git a/src/paradoc/common.py b/src/paradoc/common.py
index 23b216b..b1b1af7 100644
--- a/src/paradoc/common.py
+++ b/src/paradoc/common.py
@@ -41,6 +41,7 @@ class Table:
     add_link: bool = True
     md_instances: List[MarkDownFile] = field(default_factory=list)
     docx_instances: List[object] = field(default_factory=list)
+    link_name_override: str = None
 
     def __post_init__(self):
         if self.df is None:
@@ -61,9 +62,40 @@ def to_markdown(self, include_name_in_cell=False, flags=None):
             return tbl_str
         tbl_str += f"\n\nTable: {self.caption}"
         if self.add_link:
-            tbl_str += f" {{#tbl:{self.name}}}"
+            if self.link_name_override is None:
+                link_name = self.name
+            else:
+                link_name = self.link_name_override
+
+            tbl_str += f" {{#tbl:{link_name}}}"
         return tbl_str
 
+    @staticmethod
+    def from_markdown_str(table_str: str) -> Table:
+        """Parse a markdown table string and return a Table instance"""
+        lines = table_str.splitlines()
+        header = [x.strip() for x in lines[0].split("|")[1:-1]]
+        data = []
+        table_caption_str = None
+        for line in lines[2:]:
+            if line == "":
+                continue
+            if line.strip().startswith("Table:"):
+                table_caption_str = line.strip()
+                break
+            data.append([x.strip() for x in line.split("|")[1:-1]])
+
+        caption = table_caption_str.split("Table:")[1].strip()
+        caption = caption.split('{')[0].strip()
+        # Create a pandas DataFrame using the extracted header and data rows
+        df = pd.DataFrame(data, columns=header)
+        name = str(df.values[0][0])
+        tbl_ref = re.search(r"{#tbl:(.*?)}", table_str)
+        link_override = None
+        if tbl_ref is not None:
+            link_override = tbl_ref.group(1)
+        return Table(name=name, df=df, caption=caption, link_name_override=link_override)
+
 
 @dataclass
 class Figure:
@@ -110,6 +142,10 @@ def get_figures(self):
         regx = re.compile(r'<img src="(?P<file_path>.*?)" alt="(?P<caption>.*?)"\s*(?:width="(?P<width>.*?)"|)\/>')
         yield from regx.finditer(self.read_original_file())
 
+    def get_tables(self):
+        regx = re.compile(r'(\|.*?\nTable:.*?$)', re.MULTILINE | re.DOTALL)
+        yield from regx.finditer(self.read_original_file())
+
 
 class ExportFormats(str, Enum):
     DOCX = "docx"
diff --git a/src/paradoc/document.py b/src/paradoc/document.py
index 9fea5b7..678363a 100644
--- a/src/paradoc/document.py
+++ b/src/paradoc/document.py
@@ -61,16 +61,16 @@ class OneDoc:
     FORMATS = ExportFormats
 
     def __init__(
-        self,
-        source_dir=None,
-        main_prefix="00-main",
-        app_prefix="01-app",
-        clean_build_dir=True,
-        create_dirs=False,
-        output_dir=None,
-        work_dir="temp",
-        use_default_html_style=True,
-        **kwargs,
+            self,
+            source_dir=None,
+            main_prefix="00-main",
+            app_prefix="01-app",
+            clean_build_dir=True,
+            create_dirs=False,
+            output_dir=None,
+            work_dir="temp",
+            use_default_html_style=True,
+            **kwargs,
     ):
         self.source_dir = pathlib.Path().resolve().absolute() if source_dir is None else pathlib.Path(source_dir)
         self.work_dir = pathlib.Path(work_dir).resolve().absolute()
@@ -138,7 +138,7 @@ def _setup(self, create_dirs, clean_build_dir):
                 # Check if the figure is commented out
                 # Get first newline right before regex search found start and till the end (capture entire line)
                 start = fig.string[: fig.start()].rfind("\n") + 1
-                end = fig.string[fig.start() :].find("\n") + fig.start()
+                end = fig.string[fig.start():].find("\n") + fig.start()
                 line = fig.string[start:end]
                 if line.startswith("[//]: #"):
                     continue
@@ -155,6 +155,10 @@ def _setup(self, create_dirs, clean_build_dir):
                     )
                 self.figures[caption] = Figure(name, caption, ref, file_path, md_instance=md_file)
 
+            for re_table in md_file.get_tables():
+                table = Table.from_markdown_str(re_table.group(1))
+                self.tables[table.name] = table
+
         if clean_build_dir is True:
             shutil.rmtree(self.build_dir, ignore_errors=True)
 
diff --git a/tests/tables/test_tables.py b/tests/tables/test_tables.py
index 1938118..e33d8d2 100644
--- a/tests/tables/test_tables.py
+++ b/tests/tables/test_tables.py
@@ -16,3 +16,10 @@ def test_table(files_dir, test_dir):
     one.add_table("my_table_5", df, "No Space 3")
 
     one.compile("TableDoc")
+
+
+def test_regular_table(files_dir, test_dir):
+    report_dir = files_dir / "doc_regular_table"
+    one = OneDoc(report_dir, work_dir=test_dir / "doc_regular_table")
+
+    one.compile("TableDoc", export_format="docx")

From e9b986ef1a32b8d611b87272e3556c7b1e8ce9e9 Mon Sep 17 00:00:00 2001
From: krande <kristoffer_andersen@outlook.com>
Date: Thu, 18 Jan 2024 16:31:25 +0100
Subject: [PATCH 2/2] fix black formatting

---
 src/paradoc/common.py   |  4 ++--
 src/paradoc/document.py | 22 +++++++++++-----------
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/paradoc/common.py b/src/paradoc/common.py
index b1b1af7..6cdf142 100644
--- a/src/paradoc/common.py
+++ b/src/paradoc/common.py
@@ -86,7 +86,7 @@ def from_markdown_str(table_str: str) -> Table:
             data.append([x.strip() for x in line.split("|")[1:-1]])
 
         caption = table_caption_str.split("Table:")[1].strip()
-        caption = caption.split('{')[0].strip()
+        caption = caption.split("{")[0].strip()
         # Create a pandas DataFrame using the extracted header and data rows
         df = pd.DataFrame(data, columns=header)
         name = str(df.values[0][0])
@@ -143,7 +143,7 @@ def get_figures(self):
         yield from regx.finditer(self.read_original_file())
 
     def get_tables(self):
-        regx = re.compile(r'(\|.*?\nTable:.*?$)', re.MULTILINE | re.DOTALL)
+        regx = re.compile(r"(\|.*?\nTable:.*?$)", re.MULTILINE | re.DOTALL)
         yield from regx.finditer(self.read_original_file())
 
 
diff --git a/src/paradoc/document.py b/src/paradoc/document.py
index 678363a..aa43fc5 100644
--- a/src/paradoc/document.py
+++ b/src/paradoc/document.py
@@ -61,16 +61,16 @@ class OneDoc:
     FORMATS = ExportFormats
 
     def __init__(
-            self,
-            source_dir=None,
-            main_prefix="00-main",
-            app_prefix="01-app",
-            clean_build_dir=True,
-            create_dirs=False,
-            output_dir=None,
-            work_dir="temp",
-            use_default_html_style=True,
-            **kwargs,
+        self,
+        source_dir=None,
+        main_prefix="00-main",
+        app_prefix="01-app",
+        clean_build_dir=True,
+        create_dirs=False,
+        output_dir=None,
+        work_dir="temp",
+        use_default_html_style=True,
+        **kwargs,
     ):
         self.source_dir = pathlib.Path().resolve().absolute() if source_dir is None else pathlib.Path(source_dir)
         self.work_dir = pathlib.Path(work_dir).resolve().absolute()
@@ -138,7 +138,7 @@ def _setup(self, create_dirs, clean_build_dir):
                 # Check if the figure is commented out
                 # Get first newline right before regex search found start and till the end (capture entire line)
                 start = fig.string[: fig.start()].rfind("\n") + 1
-                end = fig.string[fig.start():].find("\n") + fig.start()
+                end = fig.string[fig.start() :].find("\n") + fig.start()
                 line = fig.string[start:end]
                 if line.startswith("[//]: #"):
                     continue