Skip to content

Commit

Permalink
Fixing broken unit tests (#112)
Browse files Browse the repository at this point in the history
  • Loading branch information
laconc authored Jul 5, 2019
1 parent 2dad657 commit 5eb3e7c
Show file tree
Hide file tree
Showing 14 changed files with 93 additions and 54 deletions.
8 changes: 5 additions & 3 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@ version: 2
jobs:
build:
docker:
- image: dataworld/pyenv-tox
- image: 621799806001.dkr.ecr.us-east-1.amazonaws.com/pyenv-tox:latest
aws_auth:
aws_access_key_id: $DEV_AWS_ACCESS_KEY_ID
aws_secret_access_key: $DEV_AWS_SECRET_ACCESS_KEY

working_directory: /root/data.world-py

Expand All @@ -19,8 +22,7 @@ jobs:

- run:
name: pyenv setup
command: |
pyenv local 2.7.13 3.4.5 3.5.2 3.6.0
command: pyenv local 2.7.16 3.5.7 3.6.8 3.7.3

- run:
name: tox
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ __pycache__
build
dist
docs/_build
testing

.python-version

# PyCharm files
*.iml
Expand Down
7 changes: 7 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Contributing Guidelines

### Issues

Issue reports are a great way to contribute to this project.
To the extent possible, make sure that your issue is detailed and not a duplicate.

Expand Down Expand Up @@ -39,6 +40,7 @@ $ git checkout -b my-feature-branch
```

### Write Tests

Try to write a test that reproduces the problem you're trying to fix or describes a feature that you want to build. Add tests to spec.

We definitely appreciate pull requests that highlight or reproduce a problem, even without a fix.
Expand All @@ -48,9 +50,11 @@ We definitely appreciate pull requests that highlight or reproduce a problem, ev
Implement your feature or bug fix. Make sure that all tests pass without errors.

Also, to make sure that your code follows our coding style guide and best practises, run the command;

```sh
$ flake8
```

Make sure to fix any errors that appear if any.

### Write Documentation
Expand All @@ -67,6 +71,7 @@ git config --global user.email "[email protected]"
```

Writing good commit logs is important. A commit log should describe what changed and why.

```sh
git add ...
git commit
Expand All @@ -79,6 +84,7 @@ git push origin my-feature-branch
```

### Make a Pull Request

Go to https://github.com/[YOUR_GITHUB_NAME]/data.world-py.git and select your feature branch. Click the 'Pull Request' button and fill out the form. Pull requests are usually reviewed within a few days.

# Release (for maintainers)
Expand All @@ -95,4 +101,5 @@ Release process:
2. Push respective tag to `release` branch (i.e. `git push origin [tag]^{}:release`)

# Thank you!

Thank you in advance, for contributing to this project!
2 changes: 1 addition & 1 deletion datadotworld/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
)
from datadotworld.datadotworld import DataDotWorld, UriParam # noqa: F401

__version__ = '1.6.1'
__version__ = '1.7.0'

# Convenience top-level functions

Expand Down
6 changes: 3 additions & 3 deletions datadotworld/datadotworld.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@

from __future__ import absolute_import

import numbers
import shutil
from datetime import datetime
from os import path
from warnings import warn, filterwarnings
import numbers

import requests

Expand Down Expand Up @@ -158,7 +158,7 @@ def load_dataset(self, dataset_key, force_update=False, auto_update=False):
else:
try:
dataset_info = self.api_client.get_dataset(dataset_key)
except RestApiError as e:
except RestApiError:
return LocalDataset(descriptor_file)

last_modified = datetime.strptime(dataset_info['updated'],
Expand Down Expand Up @@ -280,7 +280,7 @@ def open_remote_file(self, dataset_key, file_name,
raise RestApiError(cause=e)


class UriParam():
class UriParam:
"""Represents a URI value as a parameter to a SPARQL query"""
def __init__(self, uri):
"""
Expand Down
45 changes: 30 additions & 15 deletions datadotworld/models/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,13 @@
import os
import warnings
import io
from collections import OrderedDict
try:
from collections.abc import OrderedDict
except ImportError:
from collections import OrderedDict

import datapackage
from datapackage.resource import TabularResource
from jsontableschema.exceptions import SchemaValidationError
from os import path
from tableschema.exceptions import SchemaValidationError
from tabulator import Stream

from datadotworld.models.table_schema import (sanitize_resource_schema,
Expand Down Expand Up @@ -63,7 +64,7 @@ class LocalDataset(object):

def __init__(self, descriptor_file):

self._datapackage = datapackage.DataPackage(descriptor_file)
self._datapackage = datapackage.Package(descriptor_file)

self.__descriptor_file = descriptor_file
self.__base_path = os.path.dirname(
Expand All @@ -72,10 +73,11 @@ def __init__(self, descriptor_file):
# Index resources by name
self.__resources = {r.descriptor['name']: r
for r in self._datapackage.resources}
self.__tabular_resources = {k: sanitize_resource_schema(r)
self.__tabular_resources = {k: self._sanitize_resource(r)
for (k, r) in self.__resources.items()
if type(r) is TabularResource and
if r.tabular and
r.descriptor['path'].startswith('data')}

self.__invalid_schemas = [] # Resource names with invalid schemas

# All formats
Expand Down Expand Up @@ -115,6 +117,18 @@ def describe(self, resource=None):
else:
return self.__resources[resource].descriptor

@staticmethod
def _sanitize_resource(r):
"""Explicitly sets the encoding if it's missing & sanitizes the schema
:param r: resource
"""
if 'encoding' not in r.descriptor:
r.descriptor['encoding'] = 'utf-8'
r.commit()

return sanitize_resource_schema(r)

@memoized(key_mapper=lambda self, resource_name: resource_name)
def _load_raw_data(self, resource_name):
"""Extract raw data from resource
Expand All @@ -125,8 +139,8 @@ def _load_raw_data(self, resource_name):
# ``data`` will be returned as bytes.
upcast_resource = datapackage.Resource(
self.__resources[resource_name].descriptor,
default_base_path=self.__base_path)
return upcast_resource.data
base_path=self.__base_path)
return upcast_resource.raw_read()

@memoized(key_mapper=lambda self, resource_name: resource_name)
def _load_table(self, resource_name):
Expand All @@ -143,12 +157,13 @@ def _load_table(self, resource_name):
if 'schema' in tabular_resource.descriptor:
fields = [f['name'] for f in
tabular_resource.descriptor['schema']['fields']]
elif len(tabular_resource.data) > 0:
fields = tabular_resource.data[0].keys()
elif len(tabular_resource.read(keyed=True)) > 0:
fields = tabular_resource.read(keyed=True)[0].keys()

return [order_columns_in_row(fields, row) for row in
tabular_resource.data]
except (SchemaValidationError, ValueError, TypeError) as e:
tabular_resource.read(keyed=True)]
except (AttributeError, SchemaValidationError, ValueError, TypeError) \
as e:
warnings.warn(
'Unable to set column types automatically using {} schema. '
'Data types may need to be adjusted manually. '
Expand Down Expand Up @@ -181,7 +196,7 @@ def _load_dataframe(self, resource_name):

try:
return pandas.read_csv(
path.join(
os.path.join(
self.__base_path,
tabular_resource.descriptor['path']),
dtype=field_dtypes['other'],
Expand All @@ -193,7 +208,7 @@ def _load_dataframe(self, resource_name):
'schema. Data types may need to be adjusted manually. '
'Error: {}'.format(resource_name, e))
return pandas.read_csv(
path.join(
os.path.join(
self.__base_path,
tabular_resource.descriptor['path']))

Expand Down
7 changes: 5 additions & 2 deletions datadotworld/models/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,12 @@

from __future__ import absolute_import

from collections import OrderedDict
try:
from collections.abc import OrderedDict
except ImportError:
from collections import OrderedDict

from jsontableschema import Schema
from tableschema import Schema

from datadotworld.models import table_schema

Expand Down
7 changes: 5 additions & 2 deletions datadotworld/models/table_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@
# This product includes software developed at
# data.world, Inc.(http://data.world/).

from collections import OrderedDict, Counter
try:
from collections.abc import OrderedDict, Counter
except ImportError:
from collections import OrderedDict, Counter

#: Mapping of Table Schema field types to all suitable dtypes (pandas)
from warnings import warn
Expand Down Expand Up @@ -230,7 +233,7 @@ def _sanitize_schema(schema_descriptor):
"""
missing_type_support = False
try:
from jsontableschema import YearType, YearMonthType, DurationType # noqa
from tableschema import YearType, YearMonthType, DurationType # noqa
except ImportError:
missing_type_support = True

Expand Down
9 changes: 6 additions & 3 deletions datadotworld/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,12 @@

import functools
import re
from collections import Mapping

import collections
try:
import collections.abc as collections
from collections.abc import Mapping
except ImportError:
import collections
from collections import Mapping

DATASET_KEY_PATTERN = re.compile(
'^(?:https?://[^/]+/)?([a-z0-9-]+)/([a-z0-9-]+)$') # URLs and paths
Expand Down
6 changes: 3 additions & 3 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@ universal=1
test=pytest

[flake8]
exclude=datadotworld/client/_swagger/*, build, .eggs, .tox/*
exclude=datadotworld/client/_swagger/*, build, .eggs, .tox/*, testing

[coverage:run]
source=.
data_file=.coverage
omit=datadotworld/client/_swagger/*, build, .eggs/*
omit=datadotworld/client/_swagger/*, build, .eggs/*, testing

[coverage:report]
omit=datadotworld/client/_swagger/*, build, .eggs/*
omit=datadotworld/client/_swagger/*, build, .eggs/*, testing
20 changes: 11 additions & 9 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,9 @@ def find_version(*paths):
'Programming Language :: Python :: 2',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Topic :: Database :: Database Engines/Servers',
'Topic :: Scientific/Engineering :: Information Analysis',
'Topic :: Software Development :: Libraries :: Python Modules',
Expand All @@ -68,29 +68,31 @@ def find_version(*paths):
'certifi>=2017.04.17',
'click>=6.0,<7.0a',
'configparser>=3.5.0,<4.0a',
'datapackage>=0.8.8,<1.0a',
'jsontableschema>=0.10.0,<1.0a',
'datapackage>=1.6.2,<2.0a',
'tableschema>=1.5.2,<2.0a',
'python-dateutil>=2.6.0,<3.0a',
'requests>=2.0.0,<3.0a',
'six>=1.5.0,<2.0a',
'tabulator<=1.19.3',
'tabulator>=1.22.0',
'urllib3>=1.15,<2.0a',
'flake8>=2.6.0,<3.4.1a',
],
setup_requires=[
'pytest-runner>=2.11,<3.0a',
],
tests_require=[
'coverage>=4.4.2,<=4.5.1',
'coverage>=4.4.2,<=4.5.3',
'doublex>=1.8.4,<2.0a',
'flake8>=2.6.0,<=3.7.7',
'numpy<=1.16.4',
'pandas<0.25',
'pyhamcrest>=1.9.0,<2.0a',
'pytest>=4.6.3,<5.0a',
'responses>=0.5.1,<1.0a',
'pytest>=3.2.0,<4.0a',
'pandas<1.0a',
],
extras_require={
'pandas': [
'pandas<1.0a',
'numpy<=1.16.4',
'pandas<0.25',
],
},
entry_points={
Expand Down
Loading

0 comments on commit 5eb3e7c

Please sign in to comment.