diff --git a/HISTORY.md b/HISTORY.md index 2c1e59e8..17551004 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -5,6 +5,8 @@ ### Improvements - The html format now supports importing from HTML content (#243) +- The ODS format now supports importing from .ods files (#567). The support is + still a bit experimental. ### Changes diff --git a/docs/formats.rst b/docs/formats.rst index 636f7309..178edbda 100644 --- a/docs/formats.rst +++ b/docs/formats.rst @@ -145,12 +145,19 @@ If a title has been set, it will be exported as the table caption. ods === -Export data in OpenDocument Spreadsheet format. The ``ods`` format is currently -export-only. +Import/export data in OpenDocument Spreadsheet format. + +.. versionadded:: 3.6.0 + + Import functionality was added. This format is optional, install Tablib with ``pip install "tablib[ods]"`` to make the format available. +The ``import_set()`` method also supports a ``skip_lines`` parameter that you +can set to a number of lines that should be skipped before starting to read +data. + .. admonition:: Binary Warning :class:`Dataset.ods` contains binary data, so make sure to write in binary mode:: diff --git a/src/tablib/formats/_ods.py b/src/tablib/formats/_ods.py index ec618f67..346c5acc 100644 --- a/src/tablib/formats/_ods.py +++ b/src/tablib/formats/_ods.py @@ -2,10 +2,13 @@ """ import numbers +from datetime import date, datetime, time from io import BytesIO from odf import opendocument, style, table, text +import tablib + bold = style.Style(name="bold", family="paragraph") bold.addElement(style.TextProperties( fontweight="bold", @@ -49,6 +52,95 @@ def export_book(cls, databook): wb.save(stream) return stream.getvalue() + @classmethod + def import_sheet(cls, dset, sheet, headers=True, skip_lines=0): + """Populate dataset `dset` with sheet data.""" + + dset.title = sheet.getAttribute('name') + + def is_real_cell(cell): + return cell.hasChildNodes() or not cell.getAttribute('numbercolumnsrepeated') + + for i, row in enumerate(sheet.childNodes): + if row.tagName != 'table:table-row': + continue + if i < skip_lines: + continue + row_vals = [cls.read_cell(cell) for cell in row.childNodes if is_real_cell(cell)] + if not row_vals: + continue + if i == skip_lines and headers: + dset.headers = row_vals + else: + if i > skip_lines and len(row_vals) < dset.width: + row_vals += [''] * (dset.width - len(row_vals)) + dset.append(row_vals) + + @classmethod + def read_cell(cls, cell, value_type=None): + def convert_date(val): + if 'T' in val: + return datetime.strptime(val, "%Y-%m-%dT%H:%M:%S") + else: + return datetime.strptime(val, "%Y-%m-%d").date() + + if value_type is None: + value_type = cell.getAttribute('valuetype') + if value_type == 'date': + date_value = cell.getAttribute('datevalue') + if date_value: + return convert_date(date_value) + if value_type == 'time': + time_value = cell.getAttribute('timevalue') + return datetime.strptime(time_value, "%H:%M:%S").time() + if value_type == 'boolean': + bool_value = cell.getAttribute('booleanvalue') + return bool_value == 'true' + if not cell.childNodes: + value = getattr(cell, 'data', None) + if value is None: + value = cell.getAttribute('value') + if value is None: + return '' + if value_type == 'float': + return float(value) + if value_type == 'date': + return convert_date(value) + if value_type == 'boolean': + return convert_date(value) + return value # Any other type default to 'string' + + for subnode in cell.childNodes: + value = cls.read_cell(subnode, value_type) + if value: + return value + + @classmethod + def import_set(cls, dset, in_stream, headers=True, skip_lines=0): + """Populate dataset `dset` from ODS stream.""" + + dset.wipe() + + ods_book = opendocument.load(in_stream) + for sheet in ods_book.spreadsheet.childNodes: + if sheet.qname[1] == 'table': + cls.import_sheet(dset, sheet, headers, skip_lines) + + @classmethod + def import_book(cls, dbook, in_stream, headers=True): + """Populate databook `dbook` from ODS stream.""" + + dbook.wipe() + + ods_book = opendocument.load(in_stream) + + for sheet in ods_book.spreadsheet.childNodes: + if sheet.qname[1] != 'table': + continue + dset = tablib.Dataset() + cls.import_sheet(dset, sheet, headers) + dbook.add_sheet(dset) + @classmethod def dset_sheet(cls, dataset, ws): """Completes given worksheet from given Dataset.""" @@ -66,6 +158,14 @@ def dset_sheet(cls, dataset, ws): for j, col in enumerate(row): if isinstance(col, numbers.Number): cell = table.TableCell(valuetype="float", value=col) + elif isinstance(col, datetime): + cell = table.TableCell( + valuetype="date", value=col.strftime('%Y-%m-%dT%H:%M:%S') + ) + elif isinstance(col, date): + cell = table.TableCell(valuetype="date", datevalue=col.strftime('%Y-%m-%d')) + elif isinstance(col, time): + cell = table.TableCell(valuetype="time", timevalue=col.strftime('%H:%M:%S')) else: cell = table.TableCell(valuetype="string") cell.addElement(text.P(text=str(col), stylename=style)) diff --git a/tests/files/book.ods b/tests/files/book.ods new file mode 100644 index 00000000..a2697680 Binary files /dev/null and b/tests/files/book.ods differ diff --git a/tests/files/ragged.ods b/tests/files/ragged.ods new file mode 100644 index 00000000..e9e86c87 Binary files /dev/null and b/tests/files/ragged.ods differ diff --git a/tests/files/unknown_value_type.ods b/tests/files/unknown_value_type.ods new file mode 100644 index 00000000..7d22dfe9 Binary files /dev/null and b/tests/files/unknown_value_type.ods differ diff --git a/tests/test_tablib.py b/tests/test_tablib.py index 47659f84..01daae76 100755 --- a/tests/test_tablib.py +++ b/tests/test_tablib.py @@ -1107,13 +1107,52 @@ def test_tsv_export(self): class ODSTests(BaseTestCase): - def test_ods_export_datatypes(self): + def test_ods_export_import_set(self): + date = datetime.date(2019, 10, 4) date_time = datetime.datetime(2019, 10, 4, 12, 30, 8) - data.append(('string', '004', 42, 21.55, Decimal('34.5'), date_time)) - data.headers = ('string', 'start0', 'integer', 'float', 'decimal', 'date/time') - # ODS is currently write-only, just test that output doesn't crash. - assert data.ods is not None - assert len(data.ods) + time = datetime.time(14, 30) + data.append(('string', '004', 42, 21.55, Decimal('34.5'), date, time, date_time)) + data.headers = ( + 'string', 'start0', 'integer', 'float', 'decimal', 'date', 'time', 'date/time' + ) + _ods = data.ods + data.ods = _ods + self.assertEqual(data.dict[0]['string'], 'string') + self.assertEqual(data.dict[0]['start0'], '004') + self.assertEqual(data.dict[0]['integer'], 42) + self.assertEqual(data.dict[0]['float'], 21.55) + self.assertEqual(data.dict[0]['decimal'], 34.5) + self.assertEqual(data.dict[0]['date'], date) + self.assertEqual(data.dict[0]['time'], time) + self.assertEqual(data.dict[0]['date/time'], date_time) + + def test_ods_import_book(self): + ods_source = Path(__file__).parent / 'files' / 'book.ods' + with ods_source.open('rb') as fh: + dbook = tablib.Databook().load(fh, 'ods') + self.assertEqual(len(dbook.sheets()), 2) + + def test_ods_import_set_skip_lines(self): + data.append(('garbage', 'line', '')) + data.append(('', '', '')) + data.append(('id', 'name', 'description')) + _ods = data.ods + new_data = tablib.Dataset().load(_ods, skip_lines=2) + self.assertEqual(new_data.headers, ['id', 'name', 'description']) + + def test_ods_import_set_ragged(self): + ods_source = Path(__file__).parent / 'files' / 'ragged.ods' + with ods_source.open('rb') as fh: + dataset = tablib.Dataset().load(fh, 'ods') + self.assertEqual(dataset.pop(), (1, '', True, '')) + + def test_ods_unknown_value_type(self): + # The ods file was trafficked to contain: + # + ods_source = Path(__file__).parent / 'files' / 'unknown_value_type.ods' + with ods_source.open('rb') as fh: + dataset = tablib.Dataset().load(fh, 'ods') + self.assertEqual(dataset.pop(), ('abcd',)) class XLSTests(BaseTestCase):