Skip to content

Commit

Permalink
Merge pull request #1 from MuckRock/duck
Browse files Browse the repository at this point in the history
pylint
  • Loading branch information
duckduckgrayduck authored Dec 2, 2023
2 parents 287e29d + b6848d9 commit 38fe0e0
Show file tree
Hide file tree
Showing 8 changed files with 49 additions and 39 deletions.
2 changes: 1 addition & 1 deletion .pylintrc
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[FORMAT]
max-line-length=88
good-names=i,x1,x2,y1,y2
good-names=i,x1,x2,y1,y2,id

[MESSAGES CONTROL]
disable=missing-docstring,too-many-ancestors,too-few-public-methods,no-else-return,no-member,attribute-defined-outside-init,similarities,import-outside-toplevel,cyclic-import,no-member,no-else-raise,too-many-instance-attributes,too-many-arguments,ungrouped-imports,useless-object-inheritance,no-else-continue
8 changes: 5 additions & 3 deletions documentcloud/addon.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class BaseAddOn:

def __init__(self):
args = self._parse_arguments()
client = self._create_client(args)
self._create_client(args)

# a unique identifier for this run
self.id = args.pop("id", None)
Expand Down Expand Up @@ -65,7 +65,7 @@ def _create_client(self, args):
self.client.refresh_token = args["refresh_token"]
if args["token"] is not None:
self.client.session.headers.update(
{"Authorization": "Bearer {}".format(args["token"])}
{"Authorization": f"Bearer {args['token']}"}
)

# custom user agent for AddOns
Expand Down Expand Up @@ -119,7 +119,7 @@ def _parse_arguments(self):

# validate parameter data
try:
with open("config.yaml") as config:
with open("config.yaml", encoding="utf-8") as config:
schema = yaml.safe_load(config)
args["data"] = fastjsonschema.validate(schema, args["data"])
# add title in case the add-on wants to reference its own title
Expand Down Expand Up @@ -207,6 +207,8 @@ def get_document_count(self):
documents = self.client.documents.search(self.query)
return documents.count

return 0

def get_documents(self):
"""Get documents from either selected or queried documents"""
if self.documents:
Expand Down
14 changes: 6 additions & 8 deletions documentcloud/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,7 @@ def get(self, id_, expand=None):
params = {"expand": ",".join(expand)}
else:
params = {}
response = self.client.get(
"{}/{}/".format(self.api_path, get_id(id_)), params=params
)
response = self.client.get(f"{self.api_path}/{get_id(id_)}/", params=params)
# pylint: disable=not-callable
return self.resource(self.client, response.json())

Expand All @@ -120,7 +118,7 @@ class ChildAPIClient(BaseAPIClient):
"""Base client for sub resources"""

def __init__(self, client, parent):
super(ChildAPIClient, self).__init__(client)
super().__init__(client)
self.parent = parent

def list(self, **params):
Expand Down Expand Up @@ -169,7 +167,7 @@ def delete(self):

class APISet(list):
def __init__(self, iterable, resource):
super(APISet, self).__init__(iterable)
super().__init__(iterable)
self.resource = resource
if not all(isinstance(obj, self.resource) for obj in self):
raise TypeError(
Expand All @@ -191,7 +189,7 @@ def append(self, obj):
raise DuplicateObjectError(
f"Object with ID {obj.id} appears in the list more than once"
)
super(APISet, self).append(copy(obj))
super().append(copy(obj))

def add(self, obj):
if not isinstance(obj, self.resource):
Expand All @@ -200,7 +198,7 @@ def add(self, obj):
)
# skip duplicates silently
if obj.id not in [i.id for i in self]:
super(APISet, self).append(copy(obj))
super().append(copy(obj))

def extend(self, list_):
if not all(isinstance(obj, self.resource) for obj in list_):
Expand All @@ -213,4 +211,4 @@ def extend(self, list_):
raise DuplicateObjectError(
f"Object with ID {id_} appears in the list more than once"
)
super(APISet, self).extend(copy(obj) for obj in list_)
super().extend(copy(obj) for obj in list_)
4 changes: 2 additions & 2 deletions documentcloud/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,6 @@ def raise_for_status(self, response):
response.raise_for_status()
except requests.exceptions.RequestException as exc:
if exc.response.status_code == 404:
raise DoesNotExistError(response=exc.response)
raise DoesNotExistError(response=exc.response) from exc
else:
raise APIError(response=exc.response)
raise APIError(response=exc.response) from exc
44 changes: 26 additions & 18 deletions documentcloud/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
"""

# Standard Library
import datetime
import logging
import os
import re
import warnings
import datetime
from functools import partial

# Third Party
Expand Down Expand Up @@ -62,7 +62,7 @@ def __init__(self, client, dict_):
dict_[f"_{name}"] = None
dict_[f"{name}_id"] = value

super(Document, self).__init__(client, dict_)
super().__init__(client, dict_)

self.sections = SectionClient(client, self)
self.annotations = AnnotationClient(client, self)
Expand Down Expand Up @@ -187,7 +187,10 @@ def get_page_text_url(self, page=1):
return f"{self.asset_url}documents/{self.id}/pages/{self.slug}-p{page}.txt"

def get_page_position_json_url(self, page=1):
return f"{self.asset_url}documents/{self.id}/pages/{self.slug}-p{page}.position.json"
return (
f"{self.asset_url}documents/{self.id}/pages/"
f"{self.slug}-p{page}.position.json"
)

def get_json_text_url(self):
return f"{self.asset_url}documents/{self.id}/{self.slug}.txt.json"
Expand Down Expand Up @@ -265,6 +268,16 @@ def list(self, **params):

def upload(self, pdf, **kwargs):
"""Upload a document"""

def check_size(size):
# DocumentCloud's size limit is set to 501MB to give people a little leeway
# for OS rounding
if size >= 501 * 1024 * 1024:
raise ValueError(
"The pdf you have submitted is over the DocumentCloud API's 500MB "
"file size limit. Split it into smaller pieces and try again."
)

# if they pass in a URL, use the URL upload flow
if is_url(pdf):
return self._upload_url(pdf, **kwargs)
Expand All @@ -275,19 +288,13 @@ def upload(self, pdf, **kwargs):
size = os.fstat(pdf.fileno()).st_size
except (AttributeError, OSError): # pragma: no cover
size = 0
check_size(size)
return self._upload_file(pdf, **kwargs)
else:
size = os.path.getsize(pdf)
pdf = open(pdf, "rb")

# DocumentCloud's size limit is set to 501MB to give people a little leeway
# for OS rounding
if size >= 501 * 1024 * 1024:
raise ValueError(
"The pdf you have submitted is over the DocumentCloud API's 500MB "
"file size limit. Split it into smaller pieces and try again."
)

return self._upload_file(pdf, **kwargs)
check_size(size)
with open(pdf, "rb") as pdf_file:
return self._upload_file(pdf_file, **kwargs)

def _format_upload_parameters(self, name, **kwargs):
"""Prepare upload parameters from kwargs"""
Expand Down Expand Up @@ -371,11 +378,13 @@ def _collect_files(self, path, extensions):

def upload_directory(self, path, handle_errors=False, extensions=".pdf", **kwargs):
"""Upload files with specified extensions in a directory"""
# pylint: disable=too-many-locals, too-many-branches

# Do not set the same title for all documents
kwargs.pop("title", None)

# If extensions are specified as None, it will check for all supported filetypes.
# If extensions are specified as None, it will check for all supported
# filetypes.
if extensions is None:
extensions = SUPPORTED_EXTENSIONS

Expand Down Expand Up @@ -447,9 +456,8 @@ def upload_directory(self, path, handle_errors=False, extensions=".pdf", **kwarg
for url, file_path in zip(presigned_urls, file_paths):
logger.info("Uploading %s to S3...", file_path)
try:
response = requests_retry_session().put(
url, data=open(file_path, "rb").read()
)
with open(file_path, "rb") as file:
response = requests_retry_session().put(url, data=file.read())
self.client.raise_for_status(response)
except (APIError, RequestException) as exc:
if handle_errors:
Expand Down
2 changes: 1 addition & 1 deletion documentcloud/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def __init__(self, *args, **kwargs):
else:
self.error = None
self.status_code = None
super(DocumentCloudError, self).__init__(*args, **kwargs)
super().__init__(*args, **kwargs)


class DuplicateObjectError(DocumentCloudError):
Expand Down
8 changes: 4 additions & 4 deletions documentcloud/projects.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class Project(BaseAPIObject):

def __init__(self, *args, **kwargs):
per_page = kwargs.pop("per_page", PER_PAGE_MAX)
super(Project, self).__init__(*args, **kwargs)
super().__init__(*args, **kwargs)
self._document_list = None
self._per_page = per_page

Expand All @@ -23,7 +23,7 @@ def __str__(self):

def save(self):
"""Add the documents to the project as well"""
super(Project, self).save()
super().save()
if self._document_list:
self.clear_documents()
self.add_documents(self._document_list)
Expand Down Expand Up @@ -100,7 +100,7 @@ def all(self, **params):
return self.list(user=self.client.user_id, **params)

def get(self, id=None, title=None):
# pylint:disable=redefined-builtin, arguments-differ
# pylint:disable=redefined-builtin, arguments-renamed
# pylint disables are necessary for backward compatibility
if id is not None and title is not None:
raise ValueError(
Expand All @@ -115,7 +115,7 @@ def get(self, id=None, title=None):
return self.get_by_title(title)

def get_by_id(self, id_):
return super(ProjectClient, self).get(id_)
return super().get(id_)

def get_by_title(self, title):
response = self.client.get(
Expand Down
6 changes: 4 additions & 2 deletions documentcloud/toolbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@
A few toys the API will use.
"""

# Standard Library
from itertools import zip_longest
from urllib.parse import urlparse

# Third Party
import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
from urllib.parse import urlparse
from itertools import zip_longest


def requests_retry_session(
Expand Down

0 comments on commit 38fe0e0

Please sign in to comment.