Skip to content

Commit

Permalink
Create function to convert uni format workbook into nested
Browse files Browse the repository at this point in the history
  • Loading branch information
istride committed Sep 23, 2024
1 parent c63c215 commit 682f224
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 8 deletions.
7 changes: 3 additions & 4 deletions src/rpft/converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import shutil
from pathlib import Path

from rpft.parsers.universal import create_workbook, parse_legacy_sheets
from rpft.parsers.universal import create_workbook, parse_legacy_sheets, parse_tables
from rpft.parsers.creation.contentindexparser import ContentIndexParser
from rpft.parsers.creation.tagmatcher import TagMatcher
from rpft.parsers.sheets import (
Expand Down Expand Up @@ -65,9 +65,8 @@ def uni_to_sheets(infile) -> bytes:
return book.export("xlsx")


def sheets_to_uni(infile) -> dict:
# TODO: convert uni sheets to dictionary
...
def sheets_to_uni(infile, fmt) -> list:
return parse_tables(create_sheet_reader(fmt, infile))


def get_content_index_parser(input_files, sheet_format, data_models, tags):
Expand Down
19 changes: 15 additions & 4 deletions src/rpft/parsers/universal.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,16 @@ def _(value: bool) -> str:
return str(value).lower()


# TODO: create a function to parse a list of tables i.e. a workbook
def parse_tables(reader: AbstractSheetReader) -> dict:
"""
Parse a workbook into a nested structure
"""
return [
parse_table(title, sheet.table.headers, sheet.table[:])
for title, sheet in reader.sheets.items()
]


def parse_table(
title: str = None,
headers: Sequence[str] = tuple(),
Expand All @@ -253,10 +262,12 @@ def parse_table(
"""
Parse data in tabular form into a nested structure
"""
title = title or "table"

if not headers or not rows:
return {title or "table": []}
return {title: []}

return create_obj(stream(title or "table", headers, rows))
return create_obj(stream(title, headers, rows))


def stream(
Expand Down Expand Up @@ -293,7 +304,7 @@ def create_obj(pairs):
for kp, v in pairs:
obj[kp] = v

return obj
return dict(obj)


def convert_cell(s: str, recursive=True) -> Any:
Expand Down
18 changes: 18 additions & 0 deletions tests/test_universal.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
create_workbook,
parse_legacy_sheets,
parse_table,
parse_tables,
tabulate,
)
from tablib import Dataset
Expand Down Expand Up @@ -343,6 +344,23 @@ def test_save_as_dict(self):
self.assertEqual(output, exp)


class TestConvertWorkbookToUniversal(TestCase):

def test_workbook_converts_to_list_of_objects(self):
workbook = DatasetSheetReader(
[
Dataset(("t1a1", "t1b1"), headers=("T1A", "T1B"), title="table1"),
Dataset(("t2a1", "t2b1"), headers=("T2A", "T2B"), title="table2"),
]
)

nested = parse_tables(workbook)

self.assertIsInstance(nested, list)
self.assertEqual(len(nested), 2)
self.assertTrue(all(type(o) is dict for o in nested))


class TestConvertTableToNested(TestCase):

def test_default_type_is_string(self):
Expand Down

0 comments on commit 682f224

Please sign in to comment.