Skip to content

Commit

Permalink
Add lazyness metadata to plugins
Browse files Browse the repository at this point in the history
  • Loading branch information
turicas committed Sep 11, 2017
1 parent c22f6f7 commit 5c6da04
Show file tree
Hide file tree
Showing 23 changed files with 68 additions and 5 deletions.
3 changes: 3 additions & 0 deletions rows/plugins/dicts.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ def import_from_dicts(data, samples=1000, *args, **kwargs):
*args, **kwargs)


import_from_dicts.is_lazy = True


def export_to_dicts(table, *args, **kwargs):
return [{key: getattr(row, key) for key in table.field_names}
for row in table]
5 changes: 5 additions & 0 deletions rows/plugins/ods.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,5 +103,10 @@ def import_from_ods(filename_or_fobj, index=0, *args, **kwargs):

max_length = max(len(row) for row in table_rows)
full_rows = complete_with_None(table_rows, max_length)

meta = {'imported_from': 'ods', 'filename': filename,}

return create_table(full_rows, meta=meta, *args, **kwargs)


import_from_ods.is_lazy = False
3 changes: 3 additions & 0 deletions rows/plugins/plugin_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,9 @@ def import_from_csv(filename_or_fobj, encoding='utf-8', dialect=None,
return create_table(reader, meta=meta, *args, **kwargs)


import_from_csv.is_lazy = True


def export_to_csv(table, filename_or_fobj=None, encoding='utf-8',
dialect=unicodecsv.excel, batch_size=100, *args, **kwargs):
"""Export a `rows.Table` to a CSV file
Expand Down
4 changes: 4 additions & 0 deletions rows/plugins/plugin_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,9 @@ def import_from_html(filename_or_fobj, encoding='utf-8', index=0,
return create_table(table_rows, meta=meta, *args, **kwargs)


import_from_html.is_lazy = False


def export_to_html(table, filename_or_fobj=None, encoding='utf-8', *args,
**kwargs):
serialized_table = serialize(table, *args, **kwargs)
Expand All @@ -106,6 +109,7 @@ def export_to_html(table, filename_or_fobj=None, encoding='utf-8', *args,
header = [' <th> {} </th>\n'.format(field) for field in fields]
result.extend(header)
result.extend([' </tr>\n', ' </thead>\n', '\n', ' <tbody>\n', '\n'])
# TODO: could be lazy so we don't need to store the whole table into memory
for index, row in enumerate(serialized_table, start=1):
css_class = 'odd' if index % 2 == 1 else 'even'
result.append(' <tr class="{}">\n'.format(css_class))
Expand Down
6 changes: 6 additions & 0 deletions rows/plugins/plugin_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ def import_from_json(filename_or_fobj, encoding='utf-8', *args, **kwargs):
filename, fobj = get_filename_and_fobj(filename_or_fobj)

json_obj = json.load(fobj, encoding=encoding)
# TODO: may use import_from_dicts here
field_names = list(json_obj[0].keys())
table_rows = [[item[key] for key in field_names] for item in json_obj]

Expand All @@ -47,6 +48,9 @@ def import_from_json(filename_or_fobj, encoding='utf-8', *args, **kwargs):
return create_table([field_names] + table_rows, meta=meta, *args, **kwargs)


import_from_json.is_lazy = False


def _convert(value, field_type, *args, **kwargs):
if value is None or field_type in (
fields.BinaryField,
Expand Down Expand Up @@ -77,6 +81,8 @@ def export_to_json(table, filename_or_fobj=None, encoding='utf-8', indent=None,
fields = table.fields
prepared_table = prepare_to_export(table, *args, **kwargs)
field_names = next(prepared_table)

# TODO: could be lazy so we don't need to store the whole table into memory
data = [{field_name: _convert(value, fields[field_name], *args, **kwargs)
for field_name, value in zip(field_names, row)}
for row in prepared_table]
Expand Down
6 changes: 5 additions & 1 deletion rows/plugins/plugin_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,12 @@ def import_from_parquet(filename_or_fobj, *args, **kwargs):
for schema in parquet._read_footer(fobj).schema
if schema.type is not None])
header = list(types.keys())
table_rows = list(parquet.reader(fobj)) # TODO: be lazy
# TODO: make it lazy
table_rows = list(parquet.reader(fobj))

meta = {'imported_from': 'parquet', 'filename': filename,}
return create_table([header] + table_rows, meta=meta, force_types=types,
*args, **kwargs)


import_from_parquet.is_lazy = False
3 changes: 3 additions & 0 deletions rows/plugins/sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,9 @@ def import_from_sqlite(filename_or_connection, table_name='table1', query=None,
return create_table(data, meta=meta, *args, **kwargs)


import_from_sqlite.is_lazy = True


def export_to_sqlite(table, filename_or_connection, table_name=None,
table_name_format='table{index}', batch_size=100,
*args, **kwargs):
Expand Down
7 changes: 7 additions & 0 deletions rows/plugins/txt.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,18 +34,24 @@ def import_from_txt(filename_or_fobj, encoding='utf-8', *args, **kwargs):
filename, fobj = get_filename_and_fobj(filename_or_fobj, mode='rb')
contents = fobj.read().decode(encoding).strip().splitlines()

# TODO: make it lazy

# remove '+----+----+' lines
contents = contents[1:-1]
del contents[1]

table_rows = [[value.strip() for value in row.split(PIPE)[1:-1]]
for row in contents]

meta = {'imported_from': 'txt',
'filename': filename,
'encoding': encoding,}
return create_table(table_rows, meta=meta, *args, **kwargs)


import_from_txt.is_lazy = False


def export_to_txt(table, filename_or_fobj=None, encoding=None,
*args, **kwargs):
'''Export a `rows.Table` to text
Expand All @@ -70,6 +76,7 @@ def export_to_txt(table, filename_or_fobj=None, encoding=None,
split_line = PLUS + PLUS.join(dashes) + PLUS

result = [split_line, header, split_line]
# TODO: make it lazy
for row in table_rows:
values = [value.rjust(max_sizes[field_name])
for field_name, value in zip(field_names, row)]
Expand Down
7 changes: 7 additions & 0 deletions rows/plugins/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,10 @@ def func(rows_data):
def create_table(data, meta=None, fields=None, skip_header=True,
import_fields=None, samples=None, force_types=None,
lazy=False, *args, **kwargs):
# TODO: change samples to be a fixed number
# TODO: may change samples logic (`float('inf')` or `all`)
# TODO: add auto_detect_types=True parameter

table_rows = iter(data)
sample_rows = []

Expand All @@ -163,6 +166,9 @@ def create_table(data, meta=None, fields=None, skip_header=True,
if not isinstance(fields, OrderedDict):
raise ValueError('`fields` must be an `OrderedDict`')

# TODO: if `fields` is set, we're going to have the wrong order,
# compared to the first row (header).

if skip_header:
_ = next(table_rows)

Expand All @@ -187,6 +193,7 @@ def create_table(data, meta=None, fields=None, skip_header=True,

if not lazy:
table = Table(fields=fields, meta=meta)

# TODO: put this inside Table.__init__
for row in chain(sample_rows, table_rows):
table.append({field_name: value
Expand Down
3 changes: 3 additions & 0 deletions rows/plugins/xls.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,9 @@ def import_from_xls(filename_or_fobj, sheet_name=None, sheet_index=0,
return create_table(table_rows, meta=meta, *args, **kwargs)


import_from_xls.is_lazy = False


def export_to_xls(table, filename_or_fobj=None, sheet_name='Sheet1', *args,
**kwargs):

Expand Down
4 changes: 4 additions & 0 deletions rows/plugins/xlsx.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,12 +77,16 @@ def import_from_xlsx(filename_or_fobj, sheet_name=None, sheet_index=0,
for row_index in range(start_row, end_row + 1)]

filename, _ = get_filename_and_fobj(filename_or_fobj, dont_open=True)

metadata = {'imported_from': 'xlsx',
'filename': filename,
'sheet_name': sheet_name, }
return create_table(table_rows, meta=metadata, *args, **kwargs)


import_from_xlsx.is_lazy = False


FORMATTING_STYLES = {
fields.DateField: 'YYYY-MM-DD',
fields.DatetimeField: 'YYYY-MM-DD HH:MM:SS',
Expand Down
4 changes: 4 additions & 0 deletions rows/plugins/xpath.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ def import_from_xpath(filename_or_fobj, rows_xpath, fields_xpath,

filename, fobj = get_filename_and_fobj(filename_or_fobj, mode='rb')
xml = fobj.read().decode(encoding)
# TODO: make it lazy (is it possible with lxml?)
tree = tree_from_string(xml)
row_elements = tree.xpath(rows_xpath)

Expand All @@ -80,3 +81,6 @@ def import_from_xpath(filename_or_fobj, rows_xpath, fields_xpath,
'filename': filename,
'encoding': encoding,}
return create_table([header] + result_rows, meta=meta, *args, **kwargs)


import_from_xpath.is_lazy = False
1 change: 1 addition & 0 deletions tests/tests_plugin_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ class PluginCsvTestCase(utils.RowsTestMixIn, unittest.TestCase):
def test_imports(self):
self.assertIs(rows.import_from_csv, rows.plugins.plugin_csv.import_from_csv)
self.assertIs(rows.export_to_csv, rows.plugins.plugin_csv.export_to_csv)
self.assertTrue(rows.import_from_csv.is_lazy)

@mock.patch('rows.plugins.plugin_csv.create_table')
def test_import_from_csv_uses_create_table(self, mocked_create_table):
Expand Down
1 change: 1 addition & 0 deletions tests/tests_plugin_dicts.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ def test_imports(self):
self.assertIs(rows.import_from_dicts,
rows.plugins.dicts.import_from_dicts)
self.assertIs(rows.export_to_dicts, rows.plugins.dicts.export_to_dicts)
self.assertTrue(rows.import_from_dicts.is_lazy)

@mock.patch('rows.plugins.dicts.create_table')
def test_import_from_dicts_uses_create_table(self, mocked_create_table):
Expand Down
5 changes: 3 additions & 2 deletions tests/tests_plugin_html.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# coding: utf-8

# Copyright 2014-2016 Álvaro Justen <https://github.com/turicas/rows/>
# Copyright 2014-2017 Álvaro Justen <https://github.com/turicas/rows/>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
Expand Down Expand Up @@ -54,6 +54,7 @@ def test_imports(self):
self.assertIs(rows.import_from_html,
rows.plugins.plugin_html.import_from_html)
self.assertIs(rows.export_to_html, rows.plugins.plugin_html.export_to_html)
self.assertFalse(rows.import_from_html.is_lazy)

def test_import_from_html_filename(self):
table = rows.import_from_html(self.filename, encoding=self.encoding)
Expand Down Expand Up @@ -89,7 +90,7 @@ def test_import_from_html_uses_create_table(self, mocked_create_table):
call = mocked_create_table.call_args
kwargs['meta'] = {'imported_from': 'html',
'filename': self.filename,
'encoding': 'iso-8859-1',}
'encoding': 'iso-8859-1', }
self.assertEqual(call[1], kwargs)

def test_export_to_html_filename(self):
Expand Down
1 change: 1 addition & 0 deletions tests/tests_plugin_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ def test_imports(self):
rows.plugins.plugin_json.import_from_json)
self.assertIs(rows.export_to_json,
rows.plugins.plugin_json.export_to_json)
self.assertFalse(rows.import_from_json.is_lazy)

@mock.patch('rows.plugins.plugin_json.create_table')
def test_import_from_json_uses_create_table(self, mocked_create_table):
Expand Down
1 change: 1 addition & 0 deletions tests/tests_plugin_ods.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class PluginOdsTestCase(utils.RowsTestMixIn, unittest.TestCase):

def test_imports(self):
self.assertIs(rows.import_from_ods, rows.plugins.ods.import_from_ods)
self.assertFalse(rows.import_from_ods.is_lazy)

@mock.patch('rows.plugins.ods.create_table')
def test_import_from_ods_uses_create_table(self, mocked_create_table):
Expand Down
1 change: 1 addition & 0 deletions tests/tests_plugin_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ class PluginParquetTestCase(unittest.TestCase):
def test_imports(self):
self.assertIs(rows.import_from_parquet,
rows.plugins.plugin_parquet.import_from_parquet)
self.assertFalse(rows.import_from_parquet.is_lazy)

@mock.patch('rows.plugins.plugin_parquet.create_table')
def test_import_from_parquet_uses_create_table(self, mocked_create_table):
Expand Down
1 change: 1 addition & 0 deletions tests/tests_plugin_sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def test_imports(self):
rows.plugins.sqlite.import_from_sqlite)
self.assertIs(rows.export_to_sqlite,
rows.plugins.sqlite.export_to_sqlite)
self.assertTrue(rows.import_from_sqlite.is_lazy)

@mock.patch('rows.plugins.sqlite.create_table')
def test_import_from_sqlite_uses_create_table(self, mocked_create_table):
Expand Down
1 change: 1 addition & 0 deletions tests/tests_plugin_txt.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ class PluginTxtTestCase(utils.RowsTestMixIn, unittest.TestCase):
def test_imports(self):
self.assertIs(rows.import_from_txt, rows.plugins.txt.import_from_txt)
self.assertIs(rows.export_to_txt, rows.plugins.txt.export_to_txt)
self.assertFalse(rows.import_from_txt.is_lazy)

@mock.patch('rows.plugins.txt.create_table')
def test_import_from_txt_uses_create_table(self, mocked_create_table):
Expand Down
1 change: 1 addition & 0 deletions tests/tests_plugin_xls.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ class PluginXlsTestCase(utils.RowsTestMixIn, unittest.TestCase):
def test_imports(self):
self.assertIs(rows.import_from_xls, rows.plugins.xls.import_from_xls)
self.assertIs(rows.export_to_xls, rows.plugins.xls.export_to_xls)
self.assertFalse(rows.import_from_xls.is_lazy)

@mock.patch('rows.plugins.xls.create_table')
def test_import_from_xls_uses_create_table(self, mocked_create_table):
Expand Down
1 change: 1 addition & 0 deletions tests/tests_plugin_xlsx.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def test_imports(self):
rows.plugins.xlsx.import_from_xlsx)
self.assertIs(rows.export_to_xlsx,
rows.plugins.xlsx.export_to_xlsx)
self.assertFalse(rows.import_from_xlsx.is_lazy)

@mock.patch('rows.plugins.xlsx.create_table')
def test_import_from_xlsx_uses_create_table(self, mocked_create_table):
Expand Down
4 changes: 2 additions & 2 deletions tests/tests_plugin_xpath.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,9 @@ def test_import_from_xpath_unescape_and_extract_text(self):
fields_xpath = OrderedDict([('name', './/text()'),
('link', './/a/@href')])
table = rows.import_from_xpath(BytesIO(html),
encoding='utf-8',
rows_xpath=rows_xpath,
fields_xpath=fields_xpath,
encoding='utf-8')
fields_xpath=fields_xpath)
self.assertEqual(table[0].name, 'Abadia de Goiás (GO)')
self.assertEqual(table[1].name, 'Abadiânia (GO)')

Expand Down

0 comments on commit 5c6da04

Please sign in to comment.