Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refs #256 - Implement class-based formats #395

Merged
merged 1 commit into from
Nov 2, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@

- Dropped Python 2 support

### Improvements

- Formats can now be dynamically registered through the
`tablib.formats.registry.refister` API (#256).

### Bugfixes

- Fixed a crash when exporting an empty string with the ReST format (#368)
Expand Down
37 changes: 22 additions & 15 deletions docs/development.rst
Original file line number Diff line number Diff line change
Expand Up @@ -90,32 +90,36 @@ Tablib features a micro-framework for adding format support.
The easiest way to understand it is to use it.
So, let's define our own format, named *xxx*.

1. Write a new format interface.
From version 1.0, Tablib formats are class-based and can be dynamically
registered.

:class:`tablib.core` follows a simple pattern for automatically utilizing your format throughout Tablib.
Function names are crucial.

Example **tablib/formats/_xxx.py**: ::
1. Write your custom format class::

class MyXXXFormatClass:
title = 'xxx'

def export_set(dset):
@classmethod
def export_set(cls, dset):
....
# returns string representation of given dataset

def export_book(dbook):
@classmethod
def export_book(cls, dbook):
....
# returns string representation of given databook

def import_set(dset, in_stream):
@classmethod
def import_set(cls, dset, in_stream):
...
# populates given Dataset with given datastream

def import_book(dbook, in_stream):
@classmethod
def import_book(cls, dbook, in_stream):
...
# returns Databook instance

def detect(stream):
@classmethod
def detect(cls, stream):
...
# returns True if given stream is parsable as xxx

Expand All @@ -124,15 +128,18 @@ So, let's define our own format, named *xxx*.
If the format excludes support for an import/export mechanism (*e.g.*
:class:`csv <tablib.Dataset.csv>` excludes
:class:`Databook <tablib.Databook>` support),
simply don't define the respective functions.
simply don't define the respective class methods.
Appropriate errors will be raised.

2. Add your new format module to the :class:`tablib.formats.available` tuple.
2. Register your class::

from tablib.formats import registry

3. Add a mock property to the :class:`Dataset <tablib.Dataset>` class with verbose `reStructured Text`_ docstring.
This alleviates IDE confusion, and allows for pretty auto-generated Sphinx_ documentation.
registry.register('xxx', MyXXXFormatClass())

4. Write respective :ref:`tests <testing>`.
3. From then on, you should be able to use your new custom format as if it were
a built-in Tablib format, e.g. using ``dataset.export('xxx')`` will use the
``MyXXXFormatClass.export_set`` method.

.. _testing:

Expand Down
2 changes: 1 addition & 1 deletion docs/tutorial.rst
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,7 @@ All we have to do is add them to a :class:`Databook` object... ::
... and export to Excel just like :class:`Datasets <Dataset>`. ::

with open('students.xls', 'wb') as f:
f.write(book.xls)
f.write(book.export('xls'))

The resulting ``students.xls`` file will contain a separate spreadsheet for each :class:`Dataset` object in the :class:`Databook`.

Expand Down
82 changes: 25 additions & 57 deletions src/tablib/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from operator import itemgetter

from tablib import formats
from tablib.formats import registry

__title__ = 'tablib'
__author__ = 'Kenneth Reitz'
Expand Down Expand Up @@ -145,8 +146,6 @@ class Dataset:

"""

_formats = {}

def __init__(self, *args, **kwargs):
self._data = list(Row(arg) for arg in args)
self.__headers = None
Expand All @@ -161,8 +160,6 @@ def __init__(self, *args, **kwargs):

self.title = kwargs.get('title')

self._register_formats()

def __len__(self):
return self.height

Expand Down Expand Up @@ -232,23 +229,11 @@ def __str__(self):
# Internals
# ---------

@classmethod
def _register_formats(cls):
"""Adds format properties."""
for fmt in formats.available:
try:
try:
setattr(cls, fmt.title, property(fmt.export_set, fmt.import_set))
setattr(cls, 'get_%s' % fmt.title, fmt.export_set)
setattr(cls, 'set_%s' % fmt.title, fmt.import_set)
cls._formats[fmt.title] = (fmt.export_set, fmt.import_set)
except AttributeError:
setattr(cls, fmt.title, property(fmt.export_set))
setattr(cls, 'get_%s' % fmt.title, fmt.export_set)
cls._formats[fmt.title] = (fmt.export_set, None)

except AttributeError:
cls._formats[fmt.title] = (None, None)
def _get_in_format(self, fmt, **kwargs):
return fmt.export_set(self, **kwargs)

def _set_in_format(self, fmt, *args, **kwargs):
return fmt.import_set(self, *args, **kwargs)

def _validate(self, row=None, col=None, safety=False):
"""Assures size of every row in dataset is of proper proportions."""
Expand Down Expand Up @@ -417,11 +402,14 @@ def load(self, in_stream, format=None, **kwargs):
if not format:
format = detect_format(in_stream)

export_set, import_set = self._formats.get(format, (None, None))
fmt = registry.get_format(format)
if not hasattr(fmt, 'import_set'):
raise UnsupportedFormat('Format {0} cannot be imported.'.format(format))

if not import_set:
raise UnsupportedFormat('Format {} cannot be imported.'.format(format))

import_set(self, in_stream, **kwargs)
fmt.import_set(self, in_stream, **kwargs)
return self

def export(self, format, **kwargs):
Expand All @@ -430,11 +418,11 @@ def export(self, format, **kwargs):

:param \\*\\*kwargs: (optional) custom configuration to the format `export_set`.
"""
export_set, import_set = self._formats.get(format, (None, None))
if not export_set:
fmt = registry.get_format(format)
if not hasattr(fmt, 'export_set'):
raise UnsupportedFormat('Format {} cannot be exported.'.format(format))

return export_set(self, **kwargs)
return fmt.export_set(self, **kwargs)

# -------
# Formats
Expand Down Expand Up @@ -1012,16 +1000,8 @@ class Databook:
"""A book of :class:`Dataset` objects.
"""

_formats = {}

def __init__(self, sets=None):

if sets is None:
self._datasets = list()
else:
self._datasets = sets

self._register_formats()
self._datasets = sets or []

def __repr__(self):
try:
Expand All @@ -1033,21 +1013,6 @@ def wipe(self):
"""Removes all :class:`Dataset` objects from the :class:`Databook`."""
self._datasets = []

@classmethod
def _register_formats(cls):
"""Adds format properties."""
for fmt in formats.available:
try:
try:
setattr(cls, fmt.title, property(fmt.export_book, fmt.import_book))
cls._formats[fmt.title] = (fmt.export_book, fmt.import_book)
except AttributeError:
setattr(cls, fmt.title, property(fmt.export_book))
cls._formats[fmt.title] = (fmt.export_book, None)

except AttributeError:
cls._formats[fmt.title] = (None, None)

def sheets(self):
return self._datasets

Expand Down Expand Up @@ -1089,11 +1054,11 @@ def load(self, in_stream, format, **kwargs):
if not format:
format = detect_format(in_stream)

export_book, import_book = self._formats.get(format, (None, None))
if not import_book:
fmt = registry.get_format(format)
if not hasattr(fmt, 'import_book'):
raise UnsupportedFormat('Format {} cannot be loaded.'.format(format))

import_book(self, in_stream, **kwargs)
fmt.import_book(self, in_stream, **kwargs)
return self

def export(self, format, **kwargs):
Expand All @@ -1102,16 +1067,16 @@ def export(self, format, **kwargs):

:param \\*\\*kwargs: (optional) custom configuration to the format `export_book`.
"""
export_book, import_book = self._formats.get(format, (None, None))
if not export_book:
fmt = registry.get_format(format)
if not hasattr(fmt, 'export_book'):
raise UnsupportedFormat('Format {} cannot be exported.'.format(format))

return export_book(self, **kwargs)
return fmt.export_book(self, **kwargs)


def detect_format(stream):
"""Return format name of given stream."""
for fmt in formats.available:
for fmt in registry.formats():
try:
if fmt.detect(stream):
return fmt.title
Expand Down Expand Up @@ -1149,3 +1114,6 @@ class HeadersNeeded(Exception):

class UnsupportedFormat(NotImplementedError):
"Format is not supported"


registry.register_builtins()
90 changes: 74 additions & 16 deletions src/tablib/formats/__init__.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,77 @@
""" Tablib - formats
"""
from collections import OrderedDict
from functools import partialmethod

from . import _csv as csv
from . import _dbf as dbf
from . import _df as df
from . import _html as html
from . import _jira as jira
from . import _json as json
from . import _latex as latex
from . import _ods as ods
from . import _rst as rst
from . import _tsv as tsv
from . import _xls as xls
from . import _xlsx as xlsx
from . import _yaml as yaml

# xlsx before as xls (xlrd) can also read xlsx
available = (json, xlsx, xls, yaml, csv, dbf, tsv, html, jira, latex, ods, df, rst)
from ._csv import CSVFormat
from ._dbf import DBFFormat
from ._df import DataFrameFormat
from ._html import HTMLFormat
from ._jira import JIRAFormat
from ._json import JSONFormat
from ._latex import LATEXFormat
from ._ods import ODSFormat
from ._rst import ReSTFormat
from ._tsv import TSVFormat
from ._xls import XLSFormat
from ._xlsx import XLSXFormat
from ._yaml import YAMLFormat


class Registry:
_formats = OrderedDict()

def register(self, key, format_):
from tablib.core import Databook, Dataset

# Create Databook.<format> read or read/write properties
try:
setattr(Databook, format_.title, property(format_.export_book, format_.import_book))
except AttributeError:
try:
setattr(Databook, format_.title, property(format_.export_book))
except AttributeError:
pass

# Create Dataset.<format> read or read/write properties,
# and Dataset.get_<format>/set_<format> methods.
try:
try:
setattr(Dataset, format_.title, property(format_.export_set, format_.import_set))
setattr(Dataset, 'get_%s' % format_.title, partialmethod(Dataset._get_in_format, format_))
setattr(Dataset, 'set_%s' % format_.title, partialmethod(Dataset._set_in_format, format_))
except AttributeError:
setattr(Dataset, format_.title, property(format_.export_set))
setattr(Dataset, 'get_%s' % format_.title, partialmethod(Dataset._get_in_format, format_))

except AttributeError:
raise Exception("Your format class should minimally implement the export_set interface.")

self._formats[key] = format_

def register_builtins(self):
# Registration ordering matters for autodetection.
self.register('json', JSONFormat())
# xlsx before as xls (xlrd) can also read xlsx
self.register('xlsx', XLSXFormat())
self.register('xls', XLSFormat())
self.register('yaml', YAMLFormat())
self.register('csv', CSVFormat())
self.register('tsv', TSVFormat())
self.register('ods', ODSFormat())
self.register('dbf', DBFFormat())
self.register('html', HTMLFormat())
self.register('jira', JIRAFormat())
self.register('latex', LATEXFormat())
self.register('df', DataFrameFormat())
self.register('rst', ReSTFormat())

def formats(self):
for frm in self._formats.values():
yield frm

def get_format(self, key):
return self._formats[key]


registry = Registry()
Loading