diff --git a/rows/plugins/dicts.py b/rows/plugins/dicts.py
index 9dc676d9..36e5fd7e 100644
--- a/rows/plugins/dicts.py
+++ b/rows/plugins/dicts.py
@@ -51,6 +51,9 @@ def import_from_dicts(data, samples=1000, *args, **kwargs):
*args, **kwargs)
+import_from_dicts.is_lazy = True
+
+
def export_to_dicts(table, *args, **kwargs):
return [{key: getattr(row, key) for key in table.field_names}
for row in table]
diff --git a/rows/plugins/ods.py b/rows/plugins/ods.py
index 76e37c32..cc291ae5 100644
--- a/rows/plugins/ods.py
+++ b/rows/plugins/ods.py
@@ -103,5 +103,10 @@ def import_from_ods(filename_or_fobj, index=0, *args, **kwargs):
max_length = max(len(row) for row in table_rows)
full_rows = complete_with_None(table_rows, max_length)
+
meta = {'imported_from': 'ods', 'filename': filename,}
+
return create_table(full_rows, meta=meta, *args, **kwargs)
+
+
+import_from_ods.is_lazy = False
diff --git a/rows/plugins/plugin_csv.py b/rows/plugins/plugin_csv.py
index 9bd18f72..0edf9176 100644
--- a/rows/plugins/plugin_csv.py
+++ b/rows/plugins/plugin_csv.py
@@ -119,6 +119,9 @@ def import_from_csv(filename_or_fobj, encoding='utf-8', dialect=None,
return create_table(reader, meta=meta, *args, **kwargs)
+import_from_csv.is_lazy = True
+
+
def export_to_csv(table, filename_or_fobj=None, encoding='utf-8',
dialect=unicodecsv.excel, batch_size=100, *args, **kwargs):
"""Export a `rows.Table` to a CSV file
diff --git a/rows/plugins/plugin_html.py b/rows/plugins/plugin_html.py
index c57b407f..ae874cdd 100644
--- a/rows/plugins/plugin_html.py
+++ b/rows/plugins/plugin_html.py
@@ -98,6 +98,9 @@ def import_from_html(filename_or_fobj, encoding='utf-8', index=0,
return create_table(table_rows, meta=meta, *args, **kwargs)
+import_from_html.is_lazy = False
+
+
def export_to_html(table, filename_or_fobj=None, encoding='utf-8', *args,
**kwargs):
serialized_table = serialize(table, *args, **kwargs)
@@ -106,6 +109,7 @@ def export_to_html(table, filename_or_fobj=None, encoding='utf-8', *args,
header = ['
{} | \n'.format(field) for field in fields]
result.extend(header)
result.extend([' \n', ' \n', '\n', ' \n', '\n'])
+ # TODO: could be lazy so we don't need to store the whole table into memory
for index, row in enumerate(serialized_table, start=1):
css_class = 'odd' if index % 2 == 1 else 'even'
result.append(' \n'.format(css_class))
diff --git a/rows/plugins/plugin_json.py b/rows/plugins/plugin_json.py
index 795b990e..e1947144 100644
--- a/rows/plugins/plugin_json.py
+++ b/rows/plugins/plugin_json.py
@@ -38,6 +38,7 @@ def import_from_json(filename_or_fobj, encoding='utf-8', *args, **kwargs):
filename, fobj = get_filename_and_fobj(filename_or_fobj)
json_obj = json.load(fobj, encoding=encoding)
+ # TODO: may use import_from_dicts here
field_names = list(json_obj[0].keys())
table_rows = [[item[key] for key in field_names] for item in json_obj]
@@ -47,6 +48,9 @@ def import_from_json(filename_or_fobj, encoding='utf-8', *args, **kwargs):
return create_table([field_names] + table_rows, meta=meta, *args, **kwargs)
+import_from_json.is_lazy = False
+
+
def _convert(value, field_type, *args, **kwargs):
if value is None or field_type in (
fields.BinaryField,
@@ -77,6 +81,8 @@ def export_to_json(table, filename_or_fobj=None, encoding='utf-8', indent=None,
fields = table.fields
prepared_table = prepare_to_export(table, *args, **kwargs)
field_names = next(prepared_table)
+
+ # TODO: could be lazy so we don't need to store the whole table into memory
data = [{field_name: _convert(value, fields[field_name], *args, **kwargs)
for field_name, value in zip(field_names, row)}
for row in prepared_table]
diff --git a/rows/plugins/plugin_parquet.py b/rows/plugins/plugin_parquet.py
index 2ceedcf3..6850bc45 100644
--- a/rows/plugins/plugin_parquet.py
+++ b/rows/plugins/plugin_parquet.py
@@ -56,8 +56,12 @@ def import_from_parquet(filename_or_fobj, *args, **kwargs):
for schema in parquet._read_footer(fobj).schema
if schema.type is not None])
header = list(types.keys())
- table_rows = list(parquet.reader(fobj)) # TODO: be lazy
+ # TODO: make it lazy
+ table_rows = list(parquet.reader(fobj))
meta = {'imported_from': 'parquet', 'filename': filename,}
return create_table([header] + table_rows, meta=meta, force_types=types,
*args, **kwargs)
+
+
+import_from_parquet.is_lazy = False
diff --git a/rows/plugins/sqlite.py b/rows/plugins/sqlite.py
index a5617efa..3a36ce9f 100644
--- a/rows/plugins/sqlite.py
+++ b/rows/plugins/sqlite.py
@@ -128,6 +128,9 @@ def import_from_sqlite(filename_or_connection, table_name='table1', query=None,
return create_table(data, meta=meta, *args, **kwargs)
+import_from_sqlite.is_lazy = True
+
+
def export_to_sqlite(table, filename_or_connection, table_name=None,
table_name_format='table{index}', batch_size=100,
*args, **kwargs):
diff --git a/rows/plugins/txt.py b/rows/plugins/txt.py
index 3f2fb658..b3f09db3 100644
--- a/rows/plugins/txt.py
+++ b/rows/plugins/txt.py
@@ -34,18 +34,24 @@ def import_from_txt(filename_or_fobj, encoding='utf-8', *args, **kwargs):
filename, fobj = get_filename_and_fobj(filename_or_fobj, mode='rb')
contents = fobj.read().decode(encoding).strip().splitlines()
+ # TODO: make it lazy
+
# remove '+----+----+' lines
contents = contents[1:-1]
del contents[1]
table_rows = [[value.strip() for value in row.split(PIPE)[1:-1]]
for row in contents]
+
meta = {'imported_from': 'txt',
'filename': filename,
'encoding': encoding,}
return create_table(table_rows, meta=meta, *args, **kwargs)
+import_from_txt.is_lazy = False
+
+
def export_to_txt(table, filename_or_fobj=None, encoding=None,
*args, **kwargs):
'''Export a `rows.Table` to text
@@ -70,6 +76,7 @@ def export_to_txt(table, filename_or_fobj=None, encoding=None,
split_line = PLUS + PLUS.join(dashes) + PLUS
result = [split_line, header, split_line]
+ # TODO: make it lazy
for row in table_rows:
values = [value.rjust(max_sizes[field_name])
for field_name, value in zip(field_names, row)]
diff --git a/rows/plugins/utils.py b/rows/plugins/utils.py
index 10a59981..725468ed 100644
--- a/rows/plugins/utils.py
+++ b/rows/plugins/utils.py
@@ -141,7 +141,10 @@ def func(rows_data):
def create_table(data, meta=None, fields=None, skip_header=True,
import_fields=None, samples=None, force_types=None,
lazy=False, *args, **kwargs):
+ # TODO: change samples to be a fixed number
+ # TODO: may change samples logic (`float('inf')` or `all`)
# TODO: add auto_detect_types=True parameter
+
table_rows = iter(data)
sample_rows = []
@@ -163,6 +166,9 @@ def create_table(data, meta=None, fields=None, skip_header=True,
if not isinstance(fields, OrderedDict):
raise ValueError('`fields` must be an `OrderedDict`')
+ # TODO: if `fields` is set, we're going to have the wrong order,
+ # compared to the first row (header).
+
if skip_header:
_ = next(table_rows)
@@ -187,6 +193,7 @@ def create_table(data, meta=None, fields=None, skip_header=True,
if not lazy:
table = Table(fields=fields, meta=meta)
+
# TODO: put this inside Table.__init__
for row in chain(sample_rows, table_rows):
table.append({field_name: value
diff --git a/rows/plugins/xls.py b/rows/plugins/xls.py
index 7de2a1da..4398cea8 100644
--- a/rows/plugins/xls.py
+++ b/rows/plugins/xls.py
@@ -156,6 +156,9 @@ def import_from_xls(filename_or_fobj, sheet_name=None, sheet_index=0,
return create_table(table_rows, meta=meta, *args, **kwargs)
+import_from_xls.is_lazy = False
+
+
def export_to_xls(table, filename_or_fobj=None, sheet_name='Sheet1', *args,
**kwargs):
diff --git a/rows/plugins/xlsx.py b/rows/plugins/xlsx.py
index 483ab64f..83f3bf56 100644
--- a/rows/plugins/xlsx.py
+++ b/rows/plugins/xlsx.py
@@ -77,12 +77,16 @@ def import_from_xlsx(filename_or_fobj, sheet_name=None, sheet_index=0,
for row_index in range(start_row, end_row + 1)]
filename, _ = get_filename_and_fobj(filename_or_fobj, dont_open=True)
+
metadata = {'imported_from': 'xlsx',
'filename': filename,
'sheet_name': sheet_name, }
return create_table(table_rows, meta=metadata, *args, **kwargs)
+import_from_xlsx.is_lazy = False
+
+
FORMATTING_STYLES = {
fields.DateField: 'YYYY-MM-DD',
fields.DatetimeField: 'YYYY-MM-DD HH:MM:SS',
diff --git a/rows/plugins/xpath.py b/rows/plugins/xpath.py
index 2fba3b3b..5b04f258 100644
--- a/rows/plugins/xpath.py
+++ b/rows/plugins/xpath.py
@@ -69,6 +69,7 @@ def import_from_xpath(filename_or_fobj, rows_xpath, fields_xpath,
filename, fobj = get_filename_and_fobj(filename_or_fobj, mode='rb')
xml = fobj.read().decode(encoding)
+ # TODO: make it lazy (is it possible with lxml?)
tree = tree_from_string(xml)
row_elements = tree.xpath(rows_xpath)
@@ -80,3 +81,6 @@ def import_from_xpath(filename_or_fobj, rows_xpath, fields_xpath,
'filename': filename,
'encoding': encoding,}
return create_table([header] + result_rows, meta=meta, *args, **kwargs)
+
+
+import_from_xpath.is_lazy = False
diff --git a/tests/tests_plugin_csv.py b/tests/tests_plugin_csv.py
index 100ce52a..f4d73008 100644
--- a/tests/tests_plugin_csv.py
+++ b/tests/tests_plugin_csv.py
@@ -54,6 +54,7 @@ class PluginCsvTestCase(utils.RowsTestMixIn, unittest.TestCase):
def test_imports(self):
self.assertIs(rows.import_from_csv, rows.plugins.plugin_csv.import_from_csv)
self.assertIs(rows.export_to_csv, rows.plugins.plugin_csv.export_to_csv)
+ self.assertTrue(rows.import_from_csv.is_lazy)
@mock.patch('rows.plugins.plugin_csv.create_table')
def test_import_from_csv_uses_create_table(self, mocked_create_table):
diff --git a/tests/tests_plugin_dicts.py b/tests/tests_plugin_dicts.py
index 3302dc46..49e39dfa 100644
--- a/tests/tests_plugin_dicts.py
+++ b/tests/tests_plugin_dicts.py
@@ -46,6 +46,7 @@ def test_imports(self):
self.assertIs(rows.import_from_dicts,
rows.plugins.dicts.import_from_dicts)
self.assertIs(rows.export_to_dicts, rows.plugins.dicts.export_to_dicts)
+ self.assertTrue(rows.import_from_dicts.is_lazy)
@mock.patch('rows.plugins.dicts.create_table')
def test_import_from_dicts_uses_create_table(self, mocked_create_table):
diff --git a/tests/tests_plugin_html.py b/tests/tests_plugin_html.py
index a55da85a..d41a0754 100644
--- a/tests/tests_plugin_html.py
+++ b/tests/tests_plugin_html.py
@@ -1,6 +1,6 @@
# coding: utf-8
-# Copyright 2014-2016 Álvaro Justen
+# Copyright 2014-2017 Álvaro Justen
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -54,6 +54,7 @@ def test_imports(self):
self.assertIs(rows.import_from_html,
rows.plugins.plugin_html.import_from_html)
self.assertIs(rows.export_to_html, rows.plugins.plugin_html.export_to_html)
+ self.assertFalse(rows.import_from_html.is_lazy)
def test_import_from_html_filename(self):
table = rows.import_from_html(self.filename, encoding=self.encoding)
@@ -89,7 +90,7 @@ def test_import_from_html_uses_create_table(self, mocked_create_table):
call = mocked_create_table.call_args
kwargs['meta'] = {'imported_from': 'html',
'filename': self.filename,
- 'encoding': 'iso-8859-1',}
+ 'encoding': 'iso-8859-1', }
self.assertEqual(call[1], kwargs)
def test_export_to_html_filename(self):
diff --git a/tests/tests_plugin_json.py b/tests/tests_plugin_json.py
index 57adc8a7..7795b064 100644
--- a/tests/tests_plugin_json.py
+++ b/tests/tests_plugin_json.py
@@ -47,6 +47,7 @@ def test_imports(self):
rows.plugins.plugin_json.import_from_json)
self.assertIs(rows.export_to_json,
rows.plugins.plugin_json.export_to_json)
+ self.assertFalse(rows.import_from_json.is_lazy)
@mock.patch('rows.plugins.plugin_json.create_table')
def test_import_from_json_uses_create_table(self, mocked_create_table):
diff --git a/tests/tests_plugin_ods.py b/tests/tests_plugin_ods.py
index c639184f..43c824c2 100644
--- a/tests/tests_plugin_ods.py
+++ b/tests/tests_plugin_ods.py
@@ -35,6 +35,7 @@ class PluginOdsTestCase(utils.RowsTestMixIn, unittest.TestCase):
def test_imports(self):
self.assertIs(rows.import_from_ods, rows.plugins.ods.import_from_ods)
+ self.assertFalse(rows.import_from_ods.is_lazy)
@mock.patch('rows.plugins.ods.create_table')
def test_import_from_ods_uses_create_table(self, mocked_create_table):
diff --git a/tests/tests_plugin_parquet.py b/tests/tests_plugin_parquet.py
index 0fbc7769..b73775d3 100644
--- a/tests/tests_plugin_parquet.py
+++ b/tests/tests_plugin_parquet.py
@@ -63,6 +63,7 @@ class PluginParquetTestCase(unittest.TestCase):
def test_imports(self):
self.assertIs(rows.import_from_parquet,
rows.plugins.plugin_parquet.import_from_parquet)
+ self.assertFalse(rows.import_from_parquet.is_lazy)
@mock.patch('rows.plugins.plugin_parquet.create_table')
def test_import_from_parquet_uses_create_table(self, mocked_create_table):
diff --git a/tests/tests_plugin_sqlite.py b/tests/tests_plugin_sqlite.py
index 40a78425..771f11c9 100644
--- a/tests/tests_plugin_sqlite.py
+++ b/tests/tests_plugin_sqlite.py
@@ -50,6 +50,7 @@ def test_imports(self):
rows.plugins.sqlite.import_from_sqlite)
self.assertIs(rows.export_to_sqlite,
rows.plugins.sqlite.export_to_sqlite)
+ self.assertTrue(rows.import_from_sqlite.is_lazy)
@mock.patch('rows.plugins.sqlite.create_table')
def test_import_from_sqlite_uses_create_table(self, mocked_create_table):
diff --git a/tests/tests_plugin_txt.py b/tests/tests_plugin_txt.py
index f04bc2f1..d530cfdb 100644
--- a/tests/tests_plugin_txt.py
+++ b/tests/tests_plugin_txt.py
@@ -41,6 +41,7 @@ class PluginTxtTestCase(utils.RowsTestMixIn, unittest.TestCase):
def test_imports(self):
self.assertIs(rows.import_from_txt, rows.plugins.txt.import_from_txt)
self.assertIs(rows.export_to_txt, rows.plugins.txt.export_to_txt)
+ self.assertFalse(rows.import_from_txt.is_lazy)
@mock.patch('rows.plugins.txt.create_table')
def test_import_from_txt_uses_create_table(self, mocked_create_table):
diff --git a/tests/tests_plugin_xls.py b/tests/tests_plugin_xls.py
index 9ca55a16..9a3001f0 100644
--- a/tests/tests_plugin_xls.py
+++ b/tests/tests_plugin_xls.py
@@ -46,6 +46,7 @@ class PluginXlsTestCase(utils.RowsTestMixIn, unittest.TestCase):
def test_imports(self):
self.assertIs(rows.import_from_xls, rows.plugins.xls.import_from_xls)
self.assertIs(rows.export_to_xls, rows.plugins.xls.export_to_xls)
+ self.assertFalse(rows.import_from_xls.is_lazy)
@mock.patch('rows.plugins.xls.create_table')
def test_import_from_xls_uses_create_table(self, mocked_create_table):
diff --git a/tests/tests_plugin_xlsx.py b/tests/tests_plugin_xlsx.py
index 820694f8..bbd869f8 100644
--- a/tests/tests_plugin_xlsx.py
+++ b/tests/tests_plugin_xlsx.py
@@ -42,6 +42,7 @@ def test_imports(self):
rows.plugins.xlsx.import_from_xlsx)
self.assertIs(rows.export_to_xlsx,
rows.plugins.xlsx.export_to_xlsx)
+ self.assertFalse(rows.import_from_xlsx.is_lazy)
@mock.patch('rows.plugins.xlsx.create_table')
def test_import_from_xlsx_uses_create_table(self, mocked_create_table):
diff --git a/tests/tests_plugin_xpath.py b/tests/tests_plugin_xpath.py
index aacd97d5..e9b08af9 100644
--- a/tests/tests_plugin_xpath.py
+++ b/tests/tests_plugin_xpath.py
@@ -107,9 +107,9 @@ def test_import_from_xpath_unescape_and_extract_text(self):
fields_xpath = OrderedDict([('name', './/text()'),
('link', './/a/@href')])
table = rows.import_from_xpath(BytesIO(html),
+ encoding='utf-8',
rows_xpath=rows_xpath,
- fields_xpath=fields_xpath,
- encoding='utf-8')
+ fields_xpath=fields_xpath)
self.assertEqual(table[0].name, 'Abadia de Goiás (GO)')
self.assertEqual(table[1].name, 'Abadiânia (GO)')