Fixing broken unit tests (#112)

datadotworld · Jul 5, 2019 · 5eb3e7c · 5eb3e7c
1 parent 2dad657
commit 5eb3e7c
Show file tree

Hide file tree

Showing 14 changed files with 93 additions and 54 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -2,7 +2,10 @@ version: 2
 jobs:
   build:
     docker:
-      - image: dataworld/pyenv-tox
+      - image: 621799806001.dkr.ecr.us-east-1.amazonaws.com/pyenv-tox:latest
+        aws_auth:
+          aws_access_key_id: $DEV_AWS_ACCESS_KEY_ID
+          aws_secret_access_key: $DEV_AWS_SECRET_ACCESS_KEY
 
     working_directory: /root/data.world-py
 
@@ -19,8 +22,7 @@ jobs:
 
       - run:
           name: pyenv setup
-          command: |
-            pyenv local 2.7.13 3.4.5 3.5.2 3.6.0
+          command: pyenv local 2.7.16 3.5.7 3.6.8 3.7.3
 
       - run:
           name: tox

diff --git a/.gitignore b/.gitignore
@@ -8,6 +8,9 @@ __pycache__
 build
 dist
 docs/_build
+testing
+
+.python-version
 
 # PyCharm files
 *.iml

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -1,6 +1,7 @@
 # Contributing Guidelines
 
 ### Issues
+
 Issue reports are a great way to contribute to this project.
 To the extent possible, make sure that your issue is detailed and not a duplicate.
 
@@ -39,6 +40,7 @@ $ git checkout -b my-feature-branch
 ```
 
 ### Write Tests
+
 Try to write a test that reproduces the problem you're trying to fix or describes a feature that you want to build. Add tests to spec.
 
 We definitely appreciate pull requests that highlight or reproduce a problem, even without a fix.
@@ -48,9 +50,11 @@ We definitely appreciate pull requests that highlight or reproduce a problem, ev
 Implement your feature or bug fix. Make sure that all tests pass without errors.
 
 Also, to make sure that your code follows our coding style guide and best practises, run the command;
+
 ```sh
 $ flake8
 ```
+
 Make sure to fix any errors that appear if any.
 
 ### Write Documentation
@@ -67,6 +71,7 @@ git config --global user.email "[email protected]"
 ```
 
 Writing good commit logs is important. A commit log should describe what changed and why.
+
 ```sh
 git add ...
 git commit
@@ -79,6 +84,7 @@ git push origin my-feature-branch
 ```
 
 ### Make a Pull Request
+
 Go to https://github.com/[YOUR_GITHUB_NAME]/data.world-py.git and select your feature branch. Click the 'Pull Request' button and fill out the form. Pull requests are usually reviewed within a few days.
 
 # Release (for maintainers)
@@ -95,4 +101,5 @@ Release process:
 2. Push respective tag to `release` branch (i.e. `git push origin [tag]^{}:release`)
 
 # Thank you!
+
 Thank you in advance, for contributing to this project!
diff --git a/datadotworld/__init__.py b/datadotworld/__init__.py
@@ -35,7 +35,7 @@
 )
 from datadotworld.datadotworld import DataDotWorld, UriParam  # noqa: F401
 
-__version__ = '1.6.1'
+__version__ = '1.7.0'
 
 # Convenience top-level functions
 

diff --git a/datadotworld/datadotworld.py b/datadotworld/datadotworld.py
@@ -19,11 +19,11 @@
 
 from __future__ import absolute_import
 
+import numbers
 import shutil
 from datetime import datetime
 from os import path
 from warnings import warn, filterwarnings
-import numbers
 
 import requests
 
@@ -158,7 +158,7 @@ def load_dataset(self, dataset_key, force_update=False, auto_update=False):
         else:
             try:
                 dataset_info = self.api_client.get_dataset(dataset_key)
-            except RestApiError as e:
+            except RestApiError:
                 return LocalDataset(descriptor_file)
 
             last_modified = datetime.strptime(dataset_info['updated'],
@@ -280,7 +280,7 @@ def open_remote_file(self, dataset_key, file_name,
             raise RestApiError(cause=e)
 
 
-class UriParam():
+class UriParam:
     """Represents a URI value as a parameter to a SPARQL query"""
     def __init__(self, uri):
         """

diff --git a/datadotworld/models/dataset.py b/datadotworld/models/dataset.py
@@ -21,12 +21,13 @@
 import os
 import warnings
 import io
-from collections import OrderedDict
+try:
+    from collections.abc import OrderedDict
+except ImportError:
+    from collections import OrderedDict
 
 import datapackage
-from datapackage.resource import TabularResource
-from jsontableschema.exceptions import SchemaValidationError
-from os import path
+from tableschema.exceptions import SchemaValidationError
 from tabulator import Stream
 
 from datadotworld.models.table_schema import (sanitize_resource_schema,
@@ -63,7 +64,7 @@ class LocalDataset(object):
 
     def __init__(self, descriptor_file):
 
-        self._datapackage = datapackage.DataPackage(descriptor_file)
+        self._datapackage = datapackage.Package(descriptor_file)
 
         self.__descriptor_file = descriptor_file
         self.__base_path = os.path.dirname(
@@ -72,10 +73,11 @@ def __init__(self, descriptor_file):
         # Index resources by name
         self.__resources = {r.descriptor['name']: r
                             for r in self._datapackage.resources}
-        self.__tabular_resources = {k: sanitize_resource_schema(r)
+        self.__tabular_resources = {k: self._sanitize_resource(r)
                                     for (k, r) in self.__resources.items()
-                                    if type(r) is TabularResource and
+                                    if r.tabular and
                                     r.descriptor['path'].startswith('data')}
+
         self.__invalid_schemas = []  # Resource names with invalid schemas
 
         # All formats
@@ -115,6 +117,18 @@ def describe(self, resource=None):
         else:
             return self.__resources[resource].descriptor
 
+    @staticmethod
+    def _sanitize_resource(r):
+        """Explicitly sets the encoding if it's missing & sanitizes the schema
+
+        :param r: resource
+        """
+        if 'encoding' not in r.descriptor:
+            r.descriptor['encoding'] = 'utf-8'
+            r.commit()
+
+        return sanitize_resource_schema(r)
+
     @memoized(key_mapper=lambda self, resource_name: resource_name)
     def _load_raw_data(self, resource_name):
         """Extract raw data from resource
@@ -125,8 +139,8 @@ def _load_raw_data(self, resource_name):
         # ``data`` will be returned as bytes.
         upcast_resource = datapackage.Resource(
             self.__resources[resource_name].descriptor,
-            default_base_path=self.__base_path)
-        return upcast_resource.data
+            base_path=self.__base_path)
+        return upcast_resource.raw_read()
 
     @memoized(key_mapper=lambda self, resource_name: resource_name)
     def _load_table(self, resource_name):
@@ -143,12 +157,13 @@ def _load_table(self, resource_name):
             if 'schema' in tabular_resource.descriptor:
                 fields = [f['name'] for f in
                           tabular_resource.descriptor['schema']['fields']]
-            elif len(tabular_resource.data) > 0:
-                fields = tabular_resource.data[0].keys()
+            elif len(tabular_resource.read(keyed=True)) > 0:
+                fields = tabular_resource.read(keyed=True)[0].keys()
 
             return [order_columns_in_row(fields, row) for row in
-                    tabular_resource.data]
-        except (SchemaValidationError, ValueError, TypeError) as e:
+                    tabular_resource.read(keyed=True)]
+        except (AttributeError, SchemaValidationError, ValueError, TypeError) \
+                as e:
             warnings.warn(
                 'Unable to set column types automatically using {} schema. '
                 'Data types may need to be adjusted manually. '
@@ -181,7 +196,7 @@ def _load_dataframe(self, resource_name):
 
         try:
             return pandas.read_csv(
-                path.join(
+                os.path.join(
                     self.__base_path,
                     tabular_resource.descriptor['path']),
                 dtype=field_dtypes['other'],
@@ -193,7 +208,7 @@ def _load_dataframe(self, resource_name):
                 'schema. Data types may need to be adjusted manually. '
                 'Error: {}'.format(resource_name, e))
             return pandas.read_csv(
-                path.join(
+                os.path.join(
                     self.__base_path,
                     tabular_resource.descriptor['path']))
 

diff --git a/datadotworld/models/query.py b/datadotworld/models/query.py
@@ -19,9 +19,12 @@
 
 from __future__ import absolute_import
 
-from collections import OrderedDict
+try:
+    from collections.abc import OrderedDict
+except ImportError:
+    from collections import OrderedDict
 
-from jsontableschema import Schema
+from tableschema import Schema
 
 from datadotworld.models import table_schema
 

diff --git a/datadotworld/models/table_schema.py b/datadotworld/models/table_schema.py
@@ -17,7 +17,10 @@
 # This product includes software developed at
 # data.world, Inc.(http://data.world/).
 
-from collections import OrderedDict, Counter
+try:
+    from collections.abc import OrderedDict, Counter
+except ImportError:
+    from collections import OrderedDict, Counter
 
 #: Mapping of Table Schema field types to all suitable dtypes (pandas)
 from warnings import warn
@@ -230,7 +233,7 @@ def _sanitize_schema(schema_descriptor):
     """
     missing_type_support = False
     try:
-        from jsontableschema import YearType, YearMonthType, DurationType  # noqa
+        from tableschema import YearType, YearMonthType, DurationType  # noqa
     except ImportError:
         missing_type_support = True
 

diff --git a/datadotworld/util.py b/datadotworld/util.py
@@ -21,9 +21,12 @@
 
 import functools
 import re
-from collections import Mapping
-
-import collections
+try:
+    import collections.abc as collections
+    from collections.abc import Mapping
+except ImportError:
+    import collections
+    from collections import Mapping
 
 DATASET_KEY_PATTERN = re.compile(
     '^(?:https?://[^/]+/)?([a-z0-9-]+)/([a-z0-9-]+)$')  # URLs and paths

diff --git a/setup.cfg b/setup.cfg
@@ -5,12 +5,12 @@ universal=1
 test=pytest
 
 [flake8]
-exclude=datadotworld/client/_swagger/*, build, .eggs, .tox/*
+exclude=datadotworld/client/_swagger/*, build, .eggs, .tox/*, testing
 
 [coverage:run]
 source=.
 data_file=.coverage
-omit=datadotworld/client/_swagger/*, build, .eggs/*
+omit=datadotworld/client/_swagger/*, build, .eggs/*, testing
 
 [coverage:report]
-omit=datadotworld/client/_swagger/*, build, .eggs/*
+omit=datadotworld/client/_swagger/*, build, .eggs/*, testing
diff --git a/setup.py b/setup.py
@@ -57,9 +57,9 @@ def find_version(*paths):
         'Programming Language :: Python :: 2',
         'Programming Language :: Python :: 2.7',
         'Programming Language :: Python :: 3',
-        'Programming Language :: Python :: 3.4',
         'Programming Language :: Python :: 3.5',
         'Programming Language :: Python :: 3.6',
+        'Programming Language :: Python :: 3.7',
         'Topic :: Database :: Database Engines/Servers',
         'Topic :: Scientific/Engineering :: Information Analysis',
         'Topic :: Software Development :: Libraries :: Python Modules',
@@ -68,29 +68,31 @@ def find_version(*paths):
         'certifi>=2017.04.17',
         'click>=6.0,<7.0a',
         'configparser>=3.5.0,<4.0a',
-        'datapackage>=0.8.8,<1.0a',
-        'jsontableschema>=0.10.0,<1.0a',
+        'datapackage>=1.6.2,<2.0a',
+        'tableschema>=1.5.2,<2.0a',
         'python-dateutil>=2.6.0,<3.0a',
         'requests>=2.0.0,<3.0a',
         'six>=1.5.0,<2.0a',
-        'tabulator<=1.19.3',
+        'tabulator>=1.22.0',
         'urllib3>=1.15,<2.0a',
-        'flake8>=2.6.0,<3.4.1a',
     ],
     setup_requires=[
         'pytest-runner>=2.11,<3.0a',
     ],
     tests_require=[
-        'coverage>=4.4.2,<=4.5.1',
+        'coverage>=4.4.2,<=4.5.3',
         'doublex>=1.8.4,<2.0a',
+        'flake8>=2.6.0,<=3.7.7',
+        'numpy<=1.16.4',
+        'pandas<0.25',
         'pyhamcrest>=1.9.0,<2.0a',
+        'pytest>=4.6.3,<5.0a',
         'responses>=0.5.1,<1.0a',
-        'pytest>=3.2.0,<4.0a',
-        'pandas<1.0a',
     ],
     extras_require={
         'pandas': [
-            'pandas<1.0a',
+            'numpy<=1.16.4',
+            'pandas<0.25',
         ],
     },
     entry_points={
-Original file line number
+Diff line change
@@ Expand Up / @@ -8,6 +8,9 @@ __pycache__ @@
     build
     dist
     docs/_build
+    testing
+    .python-version
     # PyCharm files
     *.iml
@@ Expand Down @@