Skip to content

Commit

Permalink
Refs #256 - Implement class-based formats
Browse files Browse the repository at this point in the history
This allows to extend Tablib with new formats far more easily.
  • Loading branch information
claudep committed Oct 20, 2019
1 parent 1f000f2 commit 287cae1
Show file tree
Hide file tree
Showing 18 changed files with 1,066 additions and 1,013 deletions.
37 changes: 22 additions & 15 deletions docs/development.rst
Original file line number Diff line number Diff line change
Expand Up @@ -90,32 +90,36 @@ Tablib features a micro-framework for adding format support.
The easiest way to understand it is to use it.
So, let's define our own format, named *xxx*.

1. Write a new format interface.
From version 0.14.0, Tablib formats are class-based and can be dynamically
registered.

:class:`tablib.core` follows a simple pattern for automatically utilizing your format throughout Tablib.
Function names are crucial.

Example **tablib/formats/_xxx.py**: ::
1. Write your custom format class::

class MyXXXFormatClass:
title = 'xxx'

def export_set(dset):
@classmethod
def export_set(cls, dset):
....
# returns string representation of given dataset

def export_book(dbook):
@classmethod
def export_book(cls, dbook):
....
# returns string representation of given databook

def import_set(dset, in_stream):
@classmethod
def import_set(cls, dset, in_stream):
...
# populates given Dataset with given datastream

def import_book(dbook, in_stream):
@classmethod
def import_book(cls, dbook, in_stream):
...
# returns Databook instance

def detect(stream):
@classmethod
def detect(cls, stream):
...
# returns True if given stream is parsable as xxx

Expand All @@ -124,15 +128,18 @@ So, let's define our own format, named *xxx*.
If the format excludes support for an import/export mechanism (*e.g.*
:class:`csv <tablib.Dataset.csv>` excludes
:class:`Databook <tablib.Databook>` support),
simply don't define the respective functions.
simply don't define the respective class methods.
Appropriate errors will be raised.

2. Add your new format module to the :class:`tablib.formats.available` tuple.
2. Register your class::

from tablib.formats import registry

3. Add a mock property to the :class:`Dataset <tablib.Dataset>` class with verbose `reStructured Text`_ docstring.
This alleviates IDE confusion, and allows for pretty auto-generated Sphinx_ documentation.
registry.register('xxx', MyXXXFormatClass())

4. Write respective :ref:`tests <testing>`.
3. From then on, you should be able to use your new custom format as if it were
a built-in Tablib format, e.g. using ``dataset.export('xxx')`` will use the
``MyXXXFormatClass.export_set`` method.

.. _testing:

Expand Down
2 changes: 1 addition & 1 deletion docs/tutorial.rst
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@ All we have to do is add them to a :class:`Databook` object... ::
... and export to Excel just like :class:`Datasets <Dataset>`. ::

with open('students.xls', 'wb') as f:
f.write(book.xls)
f.write(book.export('xls'))

The resulting ``students.xls`` file will contain a separate spreadsheet for each :class:`Dataset` object in the :class:`Databook`.

Expand Down
82 changes: 25 additions & 57 deletions src/tablib/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from operator import itemgetter

from tablib import formats
from tablib.formats import registry


__title__ = 'tablib'
Expand Down Expand Up @@ -146,8 +147,6 @@ class Dataset:
"""

_formats = {}

def __init__(self, *args, **kwargs):
self._data = list(Row(arg) for arg in args)
self.__headers = None
Expand All @@ -162,8 +161,6 @@ def __init__(self, *args, **kwargs):

self.title = kwargs.get('title')

self._register_formats()

def __len__(self):
return self.height

Expand Down Expand Up @@ -233,23 +230,11 @@ def __str__(self):
# Internals
# ---------

@classmethod
def _register_formats(cls):
"""Adds format properties."""
for fmt in formats.available:
try:
try:
setattr(cls, fmt.title, property(fmt.export_set, fmt.import_set))
setattr(cls, 'get_%s' % fmt.title, fmt.export_set)
setattr(cls, 'set_%s' % fmt.title, fmt.import_set)
cls._formats[fmt.title] = (fmt.export_set, fmt.import_set)
except AttributeError:
setattr(cls, fmt.title, property(fmt.export_set))
setattr(cls, 'get_%s' % fmt.title, fmt.export_set)
cls._formats[fmt.title] = (fmt.export_set, None)

except AttributeError:
cls._formats[fmt.title] = (None, None)
def _get_in_format(self, fmt, **kwargs):
return fmt.export_set(self, **kwargs)

def _set_in_format(self, fmt, *args, **kwargs):
return fmt.import_set(self, *args, **kwargs)

def _validate(self, row=None, col=None, safety=False):
"""Assures size of every row in dataset is of proper proportions."""
Expand Down Expand Up @@ -418,11 +403,14 @@ def load(self, in_stream, format=None, **kwargs):
if not format:
format = detect_format(in_stream)

export_set, import_set = self._formats.get(format, (None, None))
fmt = registry.get_format(format)
if not hasattr(fmt, 'import_set'):
raise UnsupportedFormat('Format {0} cannot be imported.'.format(format))

if not import_set:
raise UnsupportedFormat('Format {} cannot be imported.'.format(format))

import_set(self, in_stream, **kwargs)
fmt.import_set(self, in_stream, **kwargs)
return self

def export(self, format, **kwargs):
Expand All @@ -431,11 +419,11 @@ def export(self, format, **kwargs):
:param \\*\\*kwargs: (optional) custom configuration to the format `export_set`.
"""
export_set, import_set = self._formats.get(format, (None, None))
if not export_set:
fmt = registry.get_format(format)
if not hasattr(fmt, 'export_set'):
raise UnsupportedFormat('Format {} cannot be exported.'.format(format))

return export_set(self, **kwargs)
return fmt.export_set(self, **kwargs)

# -------
# Formats
Expand Down Expand Up @@ -1013,16 +1001,8 @@ class Databook:
"""A book of :class:`Dataset` objects.
"""

_formats = {}

def __init__(self, sets=None):

if sets is None:
self._datasets = list()
else:
self._datasets = sets

self._register_formats()
self._datasets = sets or []

def __repr__(self):
try:
Expand All @@ -1034,21 +1014,6 @@ def wipe(self):
"""Removes all :class:`Dataset` objects from the :class:`Databook`."""
self._datasets = []

@classmethod
def _register_formats(cls):
"""Adds format properties."""
for fmt in formats.available:
try:
try:
setattr(cls, fmt.title, property(fmt.export_book, fmt.import_book))
cls._formats[fmt.title] = (fmt.export_book, fmt.import_book)
except AttributeError:
setattr(cls, fmt.title, property(fmt.export_book))
cls._formats[fmt.title] = (fmt.export_book, None)

except AttributeError:
cls._formats[fmt.title] = (None, None)

def sheets(self):
return self._datasets

Expand Down Expand Up @@ -1090,11 +1055,11 @@ def load(self, in_stream, format, **kwargs):
if not format:
format = detect_format(in_stream)

export_book, import_book = self._formats.get(format, (None, None))
if not import_book:
fmt = registry.get_format(format)
if not hasattr(fmt, 'import_book'):
raise UnsupportedFormat('Format {} cannot be loaded.'.format(format))

import_book(self, in_stream, **kwargs)
fmt.import_book(self, in_stream, **kwargs)
return self

def export(self, format, **kwargs):
Expand All @@ -1103,16 +1068,16 @@ def export(self, format, **kwargs):
:param \\*\\*kwargs: (optional) custom configuration to the format `export_book`.
"""
export_book, import_book = self._formats.get(format, (None, None))
if not export_book:
fmt = registry.get_format(format)
if not hasattr(fmt, 'export_book'):
raise UnsupportedFormat('Format {} cannot be exported.'.format(format))

return export_book(self, **kwargs)
return fmt.export_book(self, **kwargs)


def detect_format(stream):
"""Return format name of given stream."""
for fmt in formats.available:
for fmt in registry.formats():
try:
if fmt.detect(stream):
return fmt.title
Expand Down Expand Up @@ -1150,3 +1115,6 @@ class HeadersNeeded(Exception):

class UnsupportedFormat(NotImplementedError):
"Format is not supported"


registry.register_builtins()
90 changes: 74 additions & 16 deletions src/tablib/formats/__init__.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,77 @@
""" Tablib - formats
"""
from collections import OrderedDict
from functools import partialmethod

from . import _csv as csv
from . import _json as json
from . import _xls as xls
from . import _yaml as yaml
from . import _tsv as tsv
from . import _html as html
from . import _xlsx as xlsx
from . import _ods as ods
from . import _dbf as dbf
from . import _latex as latex
from . import _df as df
from . import _rst as rst
from . import _jira as jira

# xlsx before as xls (xlrd) can also read xlsx
available = (json, xlsx, xls, yaml, csv, dbf, tsv, html, jira, latex, ods, df, rst)
from ._csv import CSVFormat
from ._tsv import TSVFormat
from ._json import JSONFormat
from ._xls import XLSFormat
from ._xlsx import XLSXFormat
from ._ods import ODSFormat
from ._yaml import YAMLFormat
from ._dbf import DBFFormat
from ._html import HTMLFormat
from ._jira import JIRAFormat
from ._latex import LATEXFormat
from ._df import DataFrameFormat
from ._rst import ReSTFormat


class Registry:
_formats = OrderedDict()

def register(self, key, format_):
from tablib.core import Databook, Dataset

# Create Databook.<format> read or read/write properties
try:
setattr(Databook, format_.title, property(format_.export_book, format_.import_book))
except AttributeError:
try:
setattr(Databook, format_.title, property(format_.export_book))
except AttributeError:
pass

# Create Dataset.<format> read or read/write properties,
# and Dataset.get_<format>/set_<format> methods.
try:
try:
setattr(Dataset, format_.title, property(format_.export_set, format_.import_set))
setattr(Dataset, 'get_%s' % format_.title, partialmethod(Dataset._get_in_format, format_))
setattr(Dataset, 'set_%s' % format_.title, partialmethod(Dataset._set_in_format, format_))
except AttributeError:
setattr(Dataset, format_.title, property(format_.export_set))
setattr(Dataset, 'get_%s' % format_.title, partialmethod(Dataset._get_in_format, format_))

except AttributeError:
raise Exception("Your format class should minimally implement the export_set interface.")

self._formats[key] = format_

def register_builtins(self):
# Registration ordering matters for autodetection.
self.register('json', JSONFormat())
# xlsx before as xls (xlrd) can also read xlsx
self.register('xlsx', XLSXFormat())
self.register('xls', XLSFormat())
self.register('yaml', YAMLFormat())
self.register('csv', CSVFormat())
self.register('tsv', TSVFormat())
self.register('ods', ODSFormat())
self.register('dbf', DBFFormat())
self.register('html', HTMLFormat())
self.register('jira', JIRAFormat())
self.register('latex', LATEXFormat())
self.register('df', DataFrameFormat())
self.register('rst', ReSTFormat())

def formats(self):
for frm in self._formats.values():
yield frm

def get_format(self, key):
return self._formats[key]


registry = Registry()
Loading

0 comments on commit 287cae1

Please sign in to comment.