diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
index 6afe2da0..b6d9824a 100644
--- a/.github/workflows/tests.yaml
+++ b/.github/workflows/tests.yaml
@@ -26,7 +26,7 @@ jobs:
# Don't use macOS for now, it is currently unstable, otherwise slow. -- 2022-04-19, amo
# 'macos-latest',
]
- python-version: ['2.7']
+ python-version: ['3.8']
defaults:
run:
@@ -103,7 +103,7 @@ jobs:
- name: Setup Python
uses: actions/setup-python@v4
with:
- python-version: '2.7'
+ python-version: '3.8'
architecture: 'x64'
cache: 'pip'
cache-dependency-path: 'requirements-docs.txt'
diff --git a/CHANGES.rst b/CHANGES.rst
index 349304fb..06c14c37 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -142,6 +142,8 @@ Development
- [mw] Improve settings for having per-vendor OPS credentials
- [ui] More flexbox for header layout
- [ui] Improve comment editing usability
+- [mw] No need to manually encode form fields with "mechanize" anymore.
+ Thanks, `Kovid `_!
2019-05-08 0.165.0
diff --git a/Makefile b/Makefile
index 7950e4e1..821b7e77 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
#VERSION := $(shell cat patzilla/version.py | awk '{ print $$3 }' | tr -d "'")
#$(error VERSION=$(VERSION))
-$(eval venvpath := .venv2)
+$(eval venvpath := .venv3)
$(eval pip := $(venvpath)/bin/pip)
$(eval twine := $(venvpath)/bin/twine)
$(eval python := $(venvpath)/bin/python)
@@ -9,10 +9,8 @@ $(eval pserve := $(venvpath)/bin/pserve)
$(eval pytest := $(venvpath)/bin/pytest)
$(eval bumpversion := $(venvpath)/bin/bumpversion)
$(eval fab := $(venvpath)/bin/fab)
-
-$(eval venv3path := .venv)
-$(eval yarn := $(venv3path)/bin/yarn)
-$(eval npx := $(venv3path)/bin/npx)
+$(eval yarn := $(venvpath)/bin/yarn)
+$(eval npx := $(venvpath)/bin/npx)
setup: setup-py
@@ -65,7 +63,7 @@ upload-pypi:
# Setup Python virtualenv.
setup-virtualenv:
- @test -e $(python) || virtualenv --python=python2 $(venvpath)
+ @test -e $(python) || virtualenv --python=python3 $(venvpath)
setup-py: setup-virtualenv
$(pip) install --editable=.[test]
diff --git a/docs/conf.py b/docs/conf.py
index 3a19a2d2..5569cd62 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -49,18 +49,18 @@
master_doc = 'index'
# General information about the project.
-project = u'PatZilla'
-copyright = u'2013-2022, The PatZilla authors'
-author = u'The PatZilla authors'
+project = 'PatZilla'
+copyright = '2013-2023, The PatZilla authors'
+author = 'The PatZilla authors'
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
-version = u'0.169.3'
+version = '0.169.3'
# The full version, including alpha/beta/rc tags.
-release = u'0.169.3'
+release = '0.169.3'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
@@ -145,8 +145,8 @@
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
- (master_doc, 'PatZilla.tex', u'PatZilla Documentation',
- u'The PatZilla authors', 'manual'),
+ (master_doc, 'PatZilla.tex', 'PatZilla Documentation',
+ 'The PatZilla authors', 'manual'),
]
@@ -155,7 +155,7 @@
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
- (master_doc, 'patzilla', u'PatZilla Documentation',
+ (master_doc, 'patzilla', 'PatZilla Documentation',
[author], 1)
]
@@ -166,7 +166,7 @@
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
- (master_doc, 'PatZilla', u'PatZilla Documentation',
+ (master_doc, 'PatZilla', 'PatZilla Documentation',
author, 'PatZilla', 'One line description of project.',
'Miscellaneous'),
]
diff --git a/fabfile.py b/fabfile.py
index d0c5d1f5..377b1400 100644
--- a/fabfile.py
+++ b/fabfile.py
@@ -34,7 +34,7 @@ def install(version, target):
if not version:
version = pkg_version
- print 'Installing package {0}, version {1} to target {2}.'.format(*map(yellow, [pkg_name, version, target]))
+ print('Installing package {0}, version {1} to target {2}.'.format(*list(map(yellow, [pkg_name, version, target]))))
if env.confirm:
response = ask('Proceed (y/n)? ', ('y', 'n'))
else:
@@ -72,7 +72,7 @@ def install(version, target):
restart_service(target)
else:
- print yellow('Skipped package install due to user request.')
+ print(yellow('Skipped package install due to user request.'))
def setup_package(package, virtualenv, options=''):
#--index-url=http://c.pypi.python.org/simple
@@ -100,7 +100,7 @@ def restart_service(target):
if uwsgi_name:
run('service uwsgi reload %s' % uwsgi_name)
else:
- print(red('WARNING: Could not restart service "%s"' % target))
+ print((red('WARNING: Could not restart service "%s"' % target)))
@task
@hosts(INSTALLATION_HOST)
diff --git a/patzilla/access/cipo/drawing.py b/patzilla/access/cipo/drawing.py
index f15d59c4..3eb22f75 100644
--- a/patzilla/access/cipo/drawing.py
+++ b/patzilla/access/cipo/drawing.py
@@ -3,7 +3,7 @@
import re
import logging
import requests
-from BeautifulSoup import BeautifulSoup
+from bs4 import BeautifulSoup
from patzilla.util.numbers.common import split_patent_number
log = logging.getLogger(__name__)
@@ -70,7 +70,7 @@ def get_first_drawing_url(patent):
images_index_html = fetch_images_index(images_index_url)
soup = BeautifulSoup(images_index_html)
#
- first_drawing_url = cipo_baseurl + soup.find('img', src=re.compile(ur'/opic-cipo/cpd/page'))['src']
+ first_drawing_url = cipo_baseurl + soup.find('img', src=re.compile(r'/opic-cipo/cpd/page'))['src']
return first_drawing_url
@@ -83,6 +83,6 @@ def get_first_drawing_url(patent):
payload = fetch_first_drawing(split_patent_number(number))
if payload:
#print "payload length:", len(payload)
- print payload
+ print(payload)
else:
- print "not found"
+ print("not found")
diff --git a/patzilla/access/depatech/client.py b/patzilla/access/depatech/client.py
index 652bd6e1..011edff6 100644
--- a/patzilla/access/depatech/client.py
+++ b/patzilla/access/depatech/client.py
@@ -13,7 +13,7 @@
from patzilla.access.depatech import get_depatech_client
from patzilla.access.generic.exceptions import NoResultsException, GenericAdapterException, SearchException
from patzilla.access.generic.search import GenericSearchResponse, GenericSearchClient
-from patzilla.util.data.container import SmartBunch
+from patzilla.util.data.container import SmartMunch
from patzilla.util.numbers.normalize import normalize_patent
log = logging.getLogger(__name__)
@@ -55,7 +55,7 @@ def search(self, query, options=None):
return self.search_real(query, options=options)
def search_real(self, query, options=None):
- options = options or SmartBunch()
+ options = options or SmartMunch()
options.setdefault('offset', 0)
options.setdefault('limit', self.pagesize)
@@ -73,7 +73,7 @@ def search_real(self, query, options=None):
transport = 'json'
query.expression = self.translate_deparom_query(query.expression)
- log.info(u"{backend_name}: searching documents, expression='{0}', offset={1}, limit={2}; user={username}".format(
+ log.info("{backend_name}: searching documents, expression='{0}', offset={1}, limit={2}; user={username}".format(
query.expression, offset, limit, **self.__dict__))
starttime = timeit.default_timer()
@@ -92,7 +92,7 @@ def search_real(self, query, options=None):
'from': offset, 'size': limit,
}
- log.info(u'{backend_name}: query={query}, uri={uri}, params={params}, options={options}'.format(
+ log.info('{backend_name}: query={query}, uri={uri}, params={params}, options={options}'.format(
query=query, uri=uri, params=params, options=options.dump(), backend_name=self.backend_name))
# Perform search request
@@ -164,10 +164,10 @@ def search_real(self, query, options=None):
if 'reason' not in upstream_error:
upstream_error['reason'] = 'Reason unknown'
- message = u'Response status code: {code}\n\n{reason}'.format(**upstream_error)
+ message = 'Response status code: {code}\n\n{reason}'.format(**upstream_error)
raise self.search_failed(
- user_info=u'Error searching depa.tech.',
+ user_info='Error searching depa.tech.',
message=message,
response=response)
@@ -180,7 +180,7 @@ def translate_deparom_query(self, expression):
expression = expression.replace(upstream_prefix, '').replace('deparom:', '')
- log.info(u'{backend_name}: Translate DEPAROM query expression={expression}, uri={uri}'.format(
+ log.info('{backend_name}: Translate DEPAROM query expression={expression}, uri={uri}'.format(
expression=expression, uri=uri, backend_name=self.backend_name))
expression = upstream_prefix + expression
@@ -212,7 +212,7 @@ def translate_deparom_query(self, expression):
elif response.status_code >= 400:
- message = u'Reason unknown'
+ message = 'Reason unknown'
if response.headers.get('Content-Type', '').startswith('application/json'):
@@ -224,15 +224,15 @@ def translate_deparom_query(self, expression):
upstream_error['code'] = response_data['status']
if 'reason' not in upstream_error:
- upstream_error['reason'] = u'Reason unknown'
+ upstream_error['reason'] = 'Reason unknown'
- message = u'Response status code: {code}\n\n{reason}'.format(**upstream_error)
+ message = 'Response status code: {code}\n\n{reason}'.format(**upstream_error)
else:
message = response.content
raise self.search_failed(
- user_info=u'Translating DEPAROM query expression failed',
+ user_info='Translating DEPAROM query expression failed',
message=message,
response=response)
@@ -298,8 +298,8 @@ def read(self):
'name': 'depatech',
'time': self.input['took'],
'status': 'success',
- #'params': SmartBunch.bunchify(self.input['content']['responseHeader']['params']),
- #'pager': SmartBunch.bunchify(self.input['content']['responseHeader'].get('pager', {})),
+ #'params': SmartMunch.munchify(self.input['content']['responseHeader']['params']),
+ #'pager': SmartMunch.munchify(self.input['content']['responseHeader'].get('pager', {})),
})
self.meta.navigator.count_total = int(self.input['hits']['total'])
@@ -307,14 +307,14 @@ def read(self):
self.meta.navigator.offset = int(self.options.offset)
self.meta.navigator.limit = int(self.options.limit)
self.meta.navigator.max_hits = int(self.options.max_hits)
- self.meta.navigator.postprocess = SmartBunch()
+ self.meta.navigator.postprocess = SmartMunch()
# Read content
self.documents = self.input['hits']['hits']
self.read_documents()
def document_to_number(self, document):
- _id = document[u'_id']
+ _id = document['_id']
cc, docno, kindcode = _id.split('.')
publication_number = cc + docno + kindcode
number = normalize_patent(publication_number)
@@ -326,7 +326,7 @@ def document_to_family_id(self, document):
def depatech_search(query, options=None):
- options = options or SmartBunch()
+ options = options or SmartMunch()
client = get_depatech_client()
try:
diff --git a/patzilla/access/depatech/clientpool.py b/patzilla/access/depatech/clientpool.py
index 223d094d..62599608 100644
--- a/patzilla/access/depatech/clientpool.py
+++ b/patzilla/access/depatech/clientpool.py
@@ -3,8 +3,8 @@
import logging
import os
from pyramid.httpexceptions import HTTPUnauthorized
-from zope.interface.declarations import implements
from zope.interface.interface import Interface
+from zope.interface import implementer
from patzilla.access.depatech.client import DepaTechClient
from patzilla.access.generic.credentials import AbstractCredentialsGetter, DatasourceCredentialsManager
@@ -43,6 +43,8 @@ def from_settings(datasource_settings):
@staticmethod
def from_environment():
+ if not os.environ["DEPATECH_API_USERNAME"] or not os.environ["DEPATECH_API_PASSWORD"]:
+ raise KeyError("DEPATECH_API_USERNAME or DEPATECH_API_PASSWORD is empty")
return {
"api_username": os.environ["DEPATECH_API_USERNAME"],
"api_password": os.environ["DEPATECH_API_PASSWORD"],
@@ -78,13 +80,12 @@ class IDepaTechClientPool(Interface):
pass
+@implementer(IDepaTechClientPool)
class DepaTechClientPool(object):
"""
depa.tech client pool as Pyramid utility implementation.
"""
- implements(IDepaTechClientPool)
-
def __init__(self, api_uri):
logger.info("Creating upstream client pool for depa.tech")
self.api_uri = api_uri
diff --git a/patzilla/access/depatech/expression.py b/patzilla/access/depatech/expression.py
index ef6cebde..c6cd8491 100644
--- a/patzilla/access/depatech/expression.py
+++ b/patzilla/access/depatech/expression.py
@@ -21,7 +21,7 @@
class DepaTechGrammar(CQLGrammar):
def preconfigure(self):
CQLGrammar.preconfigure(self)
- self.cmp_single = u':'.split()
+ self.cmp_single = ':'.split()
class DepaTechParser(object):
@@ -161,7 +161,7 @@ def pair_to_elasticsearch(cls, key, value, modifiers=None):
return
expression = None
- format = u'{0}:{1}'
+ format = '{0}:{1}'
# ------------------------------------------
@@ -184,20 +184,20 @@ def pair_to_elasticsearch(cls, key, value, modifiers=None):
patent = patent_normalized
if patent:
- subexpression = u'PC:{country} AND DE:{number}'.format(**patent)
+ subexpression = 'PC:{country} AND DE:{number}'.format(**patent)
if patent['kind']:
- subexpression += u' AND KI:{kind}'.format(**patent)
- expression_parts.append(u'({})'.format(subexpression))
+ subexpression += ' AND KI:{kind}'.format(**patent)
+ expression_parts.append('({})'.format(subexpression))
# Application number
- subexpression = u'AN:{}'.format(value)
+ subexpression = 'AN:{}'.format(value)
expression_parts.append(subexpression)
- expression = u' OR '.join(expression_parts)
+ expression = ' OR '.join(expression_parts)
# Priority number
- subexpression = u'NP:{}'.format(value)
+ subexpression = 'NP:{}'.format(value)
expression_parts.append(subexpression)
- expression = u' OR '.join(expression_parts)
+ expression = ' OR '.join(expression_parts)
elif key == 'pubdate':
@@ -212,7 +212,7 @@ def pair_to_elasticsearch(cls, key, value, modifiers=None):
# e.g. 1991
if len(value) == 4 and value.isdigit():
- value = u'within {}0101,{}1231'.format(value, value)
+ value = 'within {}0101,{}1231'.format(value, value)
# e.g. 1990-2014, 1990 - 2014
value = year_range_to_within(value)
@@ -249,12 +249,12 @@ def pair_to_elasticsearch(cls, key, value, modifiers=None):
except Exception as ex:
message = 'depatech query: Invalid date or range expression "{0}". Reason: {1}.'.format(value, ex)
- logger.warn(message + ' Exception was: {0}'.format(_exception_traceback()))
+ logger.warning(message + ' Exception was: {0}'.format(_exception_traceback()))
return {'error': True, 'message': message}
elif key == 'inventor' or key == 'applicant':
if not has_booleans(value) and should_be_quoted(value):
- value = u'"{0}"'.format(value)
+ value = '"{0}"'.format(value)
elif key == 'class':
@@ -268,7 +268,7 @@ def pair_to_elasticsearch(cls, key, value, modifiers=None):
# Put value into parenthesis, to properly capture expressions
if value:
- value = u'({value})'.format(value=value)
+ value = '({value})'.format(value=value)
# Parse value as simple query expression
query_object = CQL(cql=value)
@@ -290,7 +290,7 @@ def pair_to_elasticsearch(cls, key, value, modifiers=None):
# ------------------------------------------
if key in ['fulltext', 'inventor', 'applicant', 'country', 'citation']:
if has_booleans(value) and not should_be_quoted(value):
- value = u'({0})'.format(value)
+ value = '({0})'.format(value)
# ------------------------------------------
# expression formatter
@@ -358,15 +358,15 @@ def triple_callback(token, index, binop, term):
def format_expression(format, fieldname, value):
expression = None
- if type(fieldname) in types.StringTypes:
+ if type(fieldname) in (str,):
expression = format.format(fieldname, value)
- elif type(fieldname) is types.ListType:
+ elif type(fieldname) is list:
subexpressions = []
for fieldname in fieldname:
subexpressions.append(format.format(fieldname, value))
expression = ' or '.join(subexpressions)
# surround with parentheses
- expression = u'({0})'.format(expression)
+ expression = '({0})'.format(expression)
return expression
def lucene_convert_class(value):
@@ -395,4 +395,4 @@ def should_be_quoted(value):
if __name__ == '__main__':
- print DepaTechParser('IC:G01F000184').keywords
+ print(DepaTechParser('IC:G01F000184').keywords)
diff --git a/patzilla/access/depatech/expression.rst b/patzilla/access/depatech/expression.rst
index eed14b55..78adcac3 100644
--- a/patzilla/access/depatech/expression.rst
+++ b/patzilla/access/depatech/expression.rst
@@ -20,30 +20,30 @@ Empty query
IPC/CPC
=======
>>> DepaTechParser('H01F7/00').dumps()
-u'H01F7/00'
+'H01F7/00'
# Rewrite all patent classifications from depa.tech format to OPS format
>>> DepaTechParser('IC:G01F000184').parse().rewrite_classes_ops().dumps()
-u'IC : G01F1/84'
+'IC : G01F1/84'
>>> DepaTechParser('IC:G01F000184').keywords
-[u'G01F1/84']
+['G01F1/84']
>>> DepaTechExpression.pair_to_elasticsearch('class', 'H04L12/433 or H04L12/24')
-{'query': u'((IC:H04L0012433 OR NC:H04L0012433) OR (IC:H04L001224 OR NC:H04L001224))'}
+{'query': '((IC:H04L0012433 OR NC:H04L0012433) OR (IC:H04L001224 OR NC:H04L001224))'}
>>> DepaTechExpression.pair_to_elasticsearch('class', 'H01F7/00 or (H01F7/02 and H02K7/1876)')
-{'query': u'((IC:H01F000700 OR NC:H01F000700) OR ((IC:H01F000702 OR NC:H01F000702) AND (IC:H02K00071876 OR NC:H02K00071876)))'}
+{'query': '((IC:H01F000700 OR NC:H01F000700) OR ((IC:H01F000702 OR NC:H01F000702) AND (IC:H02K00071876 OR NC:H02K00071876)))'}
>>> DepaTechExpression.pair_to_elasticsearch('class', 'H01F7/00 not (H01F7/02 or H02K7/1876)')
-{'query': u'((IC:H01F000700 OR NC:H01F000700) NOT ((IC:H01F000702 OR NC:H01F000702) OR (IC:H02K00071876 OR NC:H02K00071876)))'}
+{'query': '((IC:H01F000700 OR NC:H01F000700) NOT ((IC:H01F000702 OR NC:H01F000702) OR (IC:H02K00071876 OR NC:H02K00071876)))'}
Publication date
================
>>> DepaTechExpression.pair_to_elasticsearch('pubdate', 'foobar')
-{'message': 'depatech query: Invalid date or range expression "foobar". Reason: foobar.', 'error': True}
+{'error': True, 'message': 'depatech query: Invalid date or range expression "foobar". Reason: foobar.'}
*********
@@ -54,39 +54,39 @@ Simple expressions
==================
>>> DepaTechParser('GT:bildschirm').keywords
-[u'bildschirm']
+['bildschirm']
>>> DepaTechExpression.pair_to_elasticsearch('fulltext', 'bildschirm')
-{'query': u'(AB:bildschirm OR GT:bildschirm OR ET:bildschirm OR FT:bildschirm)'}
+{'query': '(AB:bildschirm OR GT:bildschirm OR ET:bildschirm OR FT:bildschirm)'}
>>> DepaTechParser('GT:bildschirm or AB:fahrzeug').keywords
-[u'bildschirm', u'fahrzeug']
+['bildschirm', 'fahrzeug']
>>> DepaTechExpression.pair_to_elasticsearch('fulltext', 'bildschirm or fahrzeug')
-{'query': u'(AB:(bildschirm OR fahrzeug) OR GT:(bildschirm OR fahrzeug) OR ET:(bildschirm OR fahrzeug) OR FT:(bildschirm OR fahrzeug))'}
+{'query': '(AB:(bildschirm OR fahrzeug) OR GT:(bildschirm OR fahrzeug) OR ET:(bildschirm OR fahrzeug) OR FT:(bildschirm OR fahrzeug))'}
>>> DepaTechParser('GT:bildschirm and AB:(fahrzeug or pkw)').keywords
-[u'bildschirm', u'fahrzeug', u'pkw']
+['bildschirm', 'fahrzeug', 'pkw']
>>> DepaTechExpression.pair_to_elasticsearch('fulltext', 'bildschirm and (fahrzeug or pkw)')
-{'query': u'(AB:(bildschirm AND (fahrzeug OR pkw)) OR GT:(bildschirm AND (fahrzeug OR pkw)) OR ET:(bildschirm AND (fahrzeug OR pkw)) OR FT:(bildschirm AND (fahrzeug OR pkw)))'}
+{'query': '(AB:(bildschirm AND (fahrzeug OR pkw)) OR GT:(bildschirm AND (fahrzeug OR pkw)) OR ET:(bildschirm AND (fahrzeug OR pkw)) OR FT:(bildschirm AND (fahrzeug OR pkw)))'}
>>> DepaTechParser('GT:bildschirm and AB:(fahrzeug or pkw not lkw)').keywords
-[u'bildschirm', u'fahrzeug', u'pkw', u'lkw']
+['bildschirm', 'fahrzeug', 'pkw', 'lkw']
>>> DepaTechExpression.pair_to_elasticsearch('fulltext', 'bildschirm and (fahrzeug or pkw not lkw)')
-{'query': u'(AB:(bildschirm AND (fahrzeug OR pkw NOT lkw)) OR GT:(bildschirm AND (fahrzeug OR pkw NOT lkw)) OR ET:(bildschirm AND (fahrzeug OR pkw NOT lkw)) OR FT:(bildschirm AND (fahrzeug OR pkw NOT lkw)))'}
+{'query': '(AB:(bildschirm AND (fahrzeug OR pkw NOT lkw)) OR GT:(bildschirm AND (fahrzeug OR pkw NOT lkw)) OR ET:(bildschirm AND (fahrzeug OR pkw NOT lkw)) OR FT:(bildschirm AND (fahrzeug OR pkw NOT lkw)))'}
>>> DepaTechParser('AB:fahrzeug or AB:pkw').keywords
-[u'fahrzeug', u'pkw']
+['fahrzeug', 'pkw']
>>> DepaTechParser('AB:fahrzeug not GT:pkw').keywords
-[u'fahrzeug', u'pkw']
+['fahrzeug', 'pkw']
@@ -97,17 +97,17 @@ Queries without proper fieldnames like AB:, GT:, AB:, etc. on the left side of t
>>> DepaTechParser('bildschirm').dumps()
-u'bildschirm'
+'bildschirm'
>>> DepaTechExpression.pair_to_elasticsearch('fulltext', 'bildschirm')
-{'query': u'(AB:bildschirm OR GT:bildschirm OR ET:bildschirm OR FT:bildschirm)'}
+{'query': '(AB:bildschirm OR GT:bildschirm OR ET:bildschirm OR FT:bildschirm)'}
>>> DepaTechParser('bildschirm and fahrzeug').dumps()
-u'bildschirm and fahrzeug'
+'bildschirm and fahrzeug'
>>> DepaTechExpression.pair_to_elasticsearch('fulltext', 'bildschirm and fahrzeug')
-{'query': u'(AB:(bildschirm AND fahrzeug) OR GT:(bildschirm AND fahrzeug) OR ET:(bildschirm AND fahrzeug) OR FT:(bildschirm AND fahrzeug))'}
+{'query': '(AB:(bildschirm AND fahrzeug) OR GT:(bildschirm AND fahrzeug) OR ET:(bildschirm AND fahrzeug) OR FT:(bildschirm AND fahrzeug))'}
@@ -115,22 +115,22 @@ Expressions containing quoted words
===================================
>>> DepaTechParser('"bildschirm"').dumps()
-u'"bildschirm"'
+'"bildschirm"'
>>> DepaTechParser('"bildschirm"').keywords
[]
>>> DepaTechExpression.pair_to_elasticsearch('fulltext', '"bildschirm"')
-{'query': u'(AB:"bildschirm" OR GT:"bildschirm" OR ET:"bildschirm" OR FT:"bildschirm")'}
+{'query': '(AB:"bildschirm" OR GT:"bildschirm" OR ET:"bildschirm" OR FT:"bildschirm")'}
>>> DepaTechParser('AB:"bildschirm"').dumps()
-u'AB : "bildschirm"'
+'AB : "bildschirm"'
>>> DepaTechParser('AB:"bildschirm"').keywords
-[u'bildschirm']
+['bildschirm']
>>> DepaTechParser('AB:(("aussto*" OR "eject*" OR pusher*) AND (verriegel* OR lock* OR sperr*))').keywords
-[u'aussto', u'eject', u'pusher', u'verriegel', u'lock', u'sperr']
+['aussto', 'eject', 'pusher', 'verriegel', 'lock', 'sperr']
@@ -138,19 +138,19 @@ Keyword extraction
==================
>>> DepaTechParser(DepaTechExpression.pair_to_elasticsearch('class', 'H01F7/00')['query']).keywords
-[u'H01F7/00']
+['H01F7/00']
>>> DepaTechParser(DepaTechExpression.pair_to_elasticsearch('class', 'H01F7/00 not (H01F7/02 or H02K7/1876)')['query']).keywords
-[u'H01F7/00', u'H01F7/02', u'H02K7/1876']
+['H01F7/00', 'H01F7/02', 'H02K7/1876']
>>> DepaTechParser(DepaTechExpression.pair_to_elasticsearch('fulltext', 'bildschirm')['query']).keywords
-[u'bildschirm']
+['bildschirm']
>>> DepaTechParser(DepaTechExpression.pair_to_elasticsearch('fulltext', '"bildschirm"')['query']).keywords
-[u'bildschirm']
+['bildschirm']
>>> DepaTechParser(DepaTechExpression.pair_to_elasticsearch('fulltext', 'GT:bildschirm OR AB:(fahrzeug OR pkw)')['query']).keywords
-[u'bildschirm', u'fahrzeug', u'pkw']
+['bildschirm', 'fahrzeug', 'pkw']
@@ -160,18 +160,18 @@ From the wild
Umlauts
-------
->>> DepaTechParser(u'AB:((*messschieber* OR *meßschieber*) AND *digital* )').dumps()
-u'((AB : *messschieber* or AB : *me\xdfschieber*) and AB : *digital*)'
+>>> DepaTechParser('AB:((*messschieber* OR *meßschieber*) AND *digital* )').dumps()
+'((AB : *messschieber* or AB : *me\xdfschieber*) and AB : *digital*)'
->>> DepaTechParser(u'AB:((*messschieber* OR *meßschieber*) AND *digital* )').keywords
-[u'messschieber', u'me\xdfschieber', u'digital']
+>>> DepaTechParser('AB:((*messschieber* OR *meßschieber*) AND *digital* )').keywords
+['messschieber', 'me\xdfschieber', 'digital']
More
----
->>> DepaTechParser(u'ET:(energy and water) or AB:(waves or Tide) and AB:"90°"').keywords
-[u'energy', u'water', u'waves', u'Tide', u'90\xb0']
+>>> DepaTechParser('ET:(energy and water) or AB:(waves or Tide) and AB:"90°"').keywords
+['energy', 'water', 'waves', 'Tide', '90\xb0']
->>> DepaTechParser(u'AB:(((bremsgefühl* or pedalgefühl) and (*simulator or simul*)) and (separ* or getrennt* or entkoppel* or entkoppl* or decoupl*) and (eigenständig* or independent* or autonom*))').keywords
-[u'bremsgef\xfchl', u'pedalgef\xfchl', u'simulator', u'simul', u'separ', u'getrennt', u'entkoppel', u'entkoppl', u'decoupl', u'eigenst\xe4ndig', u'independent', u'autonom']
+>>> DepaTechParser('AB:(((bremsgefühl* or pedalgefühl) and (*simulator or simul*)) and (separ* or getrennt* or entkoppel* or entkoppl* or decoupl*) and (eigenständig* or independent* or autonom*))').keywords
+['bremsgef\xfchl', 'pedalgef\xfchl', 'simulator', 'simul', 'separ', 'getrennt', 'entkoppel', 'entkoppl', 'decoupl', 'eigenst\xe4ndig', 'independent', 'autonom']
diff --git a/patzilla/access/dpma/depatisconnect.py b/patzilla/access/dpma/depatisconnect.py
index 0065bb25..0d42d769 100644
--- a/patzilla/access/dpma/depatisconnect.py
+++ b/patzilla/access/dpma/depatisconnect.py
@@ -4,9 +4,9 @@
import json
import logging
import requests
-import xmlrpclib
-from StringIO import StringIO
-from ConfigParser import NoOptionError
+import xmlrpc.client
+from io import StringIO
+from configparser import NoOptionError
from lxml import etree as ET
from lxml.builder import E
from cornice.util import to_list
@@ -72,7 +72,7 @@ def run_acquisition(document_number, doctypes=None):
url = archive_service_baseurl + '/RPC2'
transport = RequestsTransport(session=get_client(), timeout=(2, 17))
transport.use_https = use_https
- server = xmlrpclib.ServerProxy(url, transport=transport)
+ server = xmlrpc.client.ServerProxy(url, transport=transport)
return server.runAcquisition(numbers, doctypes)
def fetch_xml(number):
@@ -313,4 +313,4 @@ def depatisconnect_abstracts(document_number, language=None, invalidate=False):
# Failed on 2018-04-23
#response = depatisconnect_claims('USD813591S')
- print json.dumps(response)
+ print(json.dumps(response))
diff --git a/patzilla/access/dpma/depatisnet.py b/patzilla/access/dpma/depatisnet.py
index 1b000fa3..4b0f42cc 100644
--- a/patzilla/access/dpma/depatisnet.py
+++ b/patzilla/access/dpma/depatisnet.py
@@ -1,15 +1,15 @@
# -*- coding: utf-8 -*-
# (c) 2014-2015 Andreas Motl, Elmyra UG
-import re
import sys
import json
import types
import logging
-import urllib2
+import urllib.request, urllib.error, urllib.parse
import mechanize
-import cookielib
-from BeautifulSoup import BeautifulSoup
-from xlrd import open_workbook
+import re
+import http.cookiejar
+from bs4 import BeautifulSoup
+from xlrd3 import open_workbook
from patzilla.access.generic.search import GenericSearchResponse
from patzilla.util.date import from_german, date_iso
from patzilla.util.network.browser import regular_user_agent
@@ -44,7 +44,7 @@ class DpmaDepatisnetAccess:
]
def __init__(self):
- print 'DpmaDepatisnetAccess.__init__'
+ print('DpmaDepatisnetAccess.__init__')
self.baseurl = 'https://depatisnet.dpma.de/DepatisNet'
self.searchurl_cql = self.baseurl + '/depatisnet?action=experte&switchToLang=en'
self.searchurl_ikofax = self.baseurl + '/depatisnet?action=ikofax&switchToLang=en'
@@ -65,7 +65,7 @@ def setup_browser(self):
# http://wwwsearch.sourceforge.net/mechanize/
# https://github.com/python-mechanize/mechanize
self.browser = mechanize.Browser()
- self.browser.set_cookiejar(cookielib.LWPCookieJar())
+ self.browser.set_cookiejar(http.cookiejar.LWPCookieJar())
self.browser.addheaders = [('User-Agent', regular_user_agent)]
# ignore robots.txt
self.browser.set_handle_robots(False)
@@ -85,7 +85,7 @@ def search_patents(self, query, options=None):
limit = options.get('limit')
max_hits = options.get('max_hits')
- logger.info(u'Searching documents. query="%s", options=%s' % (query, options))
+ logger.info('Searching documents. query="%s", options=%s' % (query, options))
# 0. create browser instance
if not self.browser:
@@ -97,7 +97,7 @@ def search_patents(self, query, options=None):
search_url = self.searchurl_ikofax
try:
self.browser.open(search_url)
- except urllib2.HTTPError as ex:
+ except urllib.error.HTTPError as ex:
logger.critical('Hard error with DEPATISnet: {}'.format(ex))
self.logout()
raise
@@ -107,7 +107,7 @@ def search_patents(self, query, options=None):
self.browser.select_form(nr=0)
#self.browser.select_form(name='form')
- self.browser['query'] = query.encode('iso-8859-1')
+ self.browser['query'] = query
self.browser['hitsPerPage'] = [str(limit)]
self.browser['maxHitsUser'] = [str(max_hits)]
@@ -127,7 +127,7 @@ def search_patents(self, query, options=None):
#self.browser['so'] = ['desc']
# sort by user selection
- if 'sorting' in options and type(options['sorting']) is types.DictionaryType:
+ if 'sorting' in options and type(options['sorting']) is dict:
self.browser['sf'] = [options['sorting']['field']]
self.browser['so'] = [options['sorting']['order']]
@@ -197,7 +197,7 @@ def search_patents(self, query, options=None):
results = self.read_xls_response(xls_response)
except Exception as ex:
logger.error('Problem downloading results in XLS format: {}'.format(ex))
- ex.http_response = ex.read()
+ #ex.http_response = ex.read()
raise
# debugging
@@ -225,22 +225,24 @@ def find_errors(self, body):
'otherwise don\'t hesitate to report this problem to us.')
# Check for error messages
- soup = BeautifulSoup(body)
+ soup = BeautifulSoup(body, 'lxml')
error_message = soup.find('div', {'id': 'errormsg'})
if error_message:
parts = []
[s.extract() for s in error_message('a')]
[parts.append(s.extract()) for s in error_message('p', {'class': 'headline'})]
reason = ', '.join([part.getText() for part in parts])
- error_message = u'{}\n{}'.format(reason, str(error_message))
+ error_message = '{}\n{}'.format(reason, str(error_message))
else:
error_message = ''
- if u'An error has occurred' in body:
- error_message = error_message.replace('\t', '').replace('\r\n', '\n').strip()
+ # Compute error message.
+ prefix = 'Upstream service: '
+ if 'An error has occurred' in body:
+ error_message = prefix + error_message.replace('\t', '').replace('\r\n', '\n').strip()
raise SyntaxError(error_message)
- return error_message
+ return prefix + error_message
def read_xls_response(self, xls_response):
data = excel_to_dict(xls_response.read())
@@ -307,8 +309,8 @@ def read(self):
# TODO: Reference from IFI CLAIMS, fill up/unify.
#'time': self.input['time'],
#'status': self.input['status'],
- #'params': SmartBunch.bunchify(self.input['content']['responseHeader']['params']),
- #'pager': SmartBunch.bunchify(self.input['content']['responseHeader'].get('pager', {})),
+ #'params': SmartMunch.munchify(self.input['content']['responseHeader']['params']),
+ #'pager': SmartMunch.munchify(self.input['content']['responseHeader'].get('pager', {})),
})
self.meta.navigator.count_total = int(self.input['hits'])
@@ -317,7 +319,7 @@ def read(self):
# TODO: Fill up?
#self.meta.navigator.offset = int(self.meta.upstream.Offset)
#self.meta.navigator.limit = int(self.meta.upstream.Limit)
- #self.meta.navigator.postprocess = SmartBunch()
+ #self.meta.navigator.postprocess = SmartMunch()
# Propagate user message
@@ -355,17 +357,17 @@ def excel_to_dict(payload):
start_row = 0
# upstream added new status line to first row, e.g. "Search query: pn=(EP666666) Status: 25.09.2015"
- if u'Search query' in sheet.cell(0, 0).value:
+ if 'Search query' in sheet.cell(0, 0).value:
start_row = 1
# read header values
- keys = [sheet.cell(start_row, col_index).value for col_index in xrange(sheet.ncols)]
+ keys = [sheet.cell(start_row, col_index).value for col_index in range(sheet.ncols)]
# read sheet content
dict_list = []
- for row_index in xrange(start_row + 1, sheet.nrows):
+ for row_index in range(start_row + 1, sheet.nrows):
d = {keys[col_index]: sheet.cell(row_index, col_index).value
- for col_index in xrange(sheet.ncols)}
+ for col_index in range(sheet.ncols)}
dict_list.append(d)
return dict_list
@@ -390,4 +392,4 @@ def excel_to_dict(payload):
else:
data = depatisnet.search_patents('BI=bagger and PC=DE')
- print json.dumps(data)
+ print(json.dumps(data))
diff --git a/patzilla/access/dpma/dpmaregister.py b/patzilla/access/dpma/dpmaregister.py
index a662ef49..cd12451f 100644
--- a/patzilla/access/dpma/dpmaregister.py
+++ b/patzilla/access/dpma/dpmaregister.py
@@ -11,12 +11,12 @@
import operator
import mechanicalsoup
from beaker.cache import cache_region
-from bunch import bunchify
+from munch import munchify
from docopt import docopt
from pprint import pformat
from jsonpointer import JsonPointer, JsonPointerException
from xml.etree.ElementTree import fromstring
-from BeautifulSoup import BeautifulSoup
+from bs4 import BeautifulSoup
from collections import namedtuple, OrderedDict
from patzilla.access.dpma.util import dpma_file_number
from patzilla.boot.cache import configure_cache_backend
@@ -247,7 +247,7 @@ def search_patent(self, patent):
# has to be adjusted.
time.sleep(1.0)
- if "/TSPD" in self.response.content:
+ if b"/TSPD" in self.response.content:
raise ValueError("Site is protected by F5 Advanced WAF")
# Debugging
@@ -283,7 +283,7 @@ def search_patent(self, patent):
return [entry]
# Sanity checks
- if "0 result/s" in response.content:
+ if b"0 result/s" in response.content:
msg = 'No search results for "{}"'.format(patent)
logger.warning(msg)
raise NoResults(msg)
@@ -311,7 +311,7 @@ def parse_reference_link(self, link, patent):
msg = "Could not parse document reference from link '%s' (patent='%s')" % (link, patent)
logger.error(msg)
raise Exception(msg)
- label = link.find(text=True)
+ label = link.find(string=True)
return reference, label
def fetch_reference(self, result, language):
@@ -369,7 +369,7 @@ def html_compact(self):
PDF-Download
"""
- soup = BeautifulSoup(self.html)
+ soup = BeautifulSoup(self.html, "lxml")
soup_content = soup.find('table', {'id': 'verfahrensdaten_tabelle'})
@@ -528,13 +528,13 @@ def decode(self):
self.decode_badgerfish()
# Document numbers
- self.application_reference = map(
+ self.application_reference = list(map(
operator.itemgetter('document_id'),
- self.convert_list(self.query_data(self.pointer_application_reference)))
+ self.convert_list(self.query_data(self.pointer_application_reference))))
- self.publication_reference = map(
+ self.publication_reference = list(map(
operator.itemgetter('document_id'),
- self.convert_list(self.query_data(self.pointer_publication_reference)))
+ self.convert_list(self.query_data(self.pointer_publication_reference))))
# Classifications
self.classifications['ipcr'] = self.convert_list(self.query_data(self.pointer_classifications_ipcr))
@@ -565,9 +565,9 @@ def decode(self):
self.designated_states = self.convert_list(self.query_data(self.pointer_designated_states))
# Citations
- self.references_cited = map(
+ self.references_cited = list(map(
operator.attrgetter('document_id.doc_number'),
- bunchify(self.convert_list(self.query_data(self.pointer_references_cited))))
+ munchify(self.convert_list(self.query_data(self.pointer_references_cited)))))
# office-specific-bib-data
self.office_specific_bibdata = self.convert_dict(self.query_data(self.pointer_office_specific_bibdata))
@@ -590,7 +590,7 @@ def convert_list(cls, things_raw, nested_element='$'):
things = []
for thing in to_list(things_raw):
if not thing: continue
- if nested_element in thing and len(thing.keys()) == 1:
+ if nested_element in thing and len(list(thing.keys())) == 1:
thing = thing[nested_element]
if isinstance(thing, dict):
thing = cls.convert_dict(thing)
@@ -606,7 +606,7 @@ def convert_dict(cls, data):
return {}
newdata = OrderedDict()
- for key, value in data.items():
+ for key, value in list(data.items()):
# Decode nested text or recurse
if '$' in value:
diff --git a/patzilla/access/epo/espacenet/client_html.py b/patzilla/access/epo/espacenet/client_html.py
index d5202db4..caa83a7f 100644
--- a/patzilla/access/epo/espacenet/client_html.py
+++ b/patzilla/access/epo/espacenet/client_html.py
@@ -97,7 +97,7 @@ def espacenet_fetch_html(document_number, section, element_id=None, element_clas
else:
- if 'Entity not found' in response.content:
+ if b'Entity not found' in response.content:
raise KeyError(message_404)
else:
raise ValueError(message_fail)
diff --git a/patzilla/access/epo/espacenet/pyramid.py b/patzilla/access/epo/espacenet/pyramid.py
index db3038bb..b36da609 100644
--- a/patzilla/access/epo/espacenet/pyramid.py
+++ b/patzilla/access/epo/espacenet/pyramid.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# (c) 2015-2018 Andreas Motl, Elmyra UG
-from __future__ import absolute_import
+
import logging
from cornice.service import Service
from pyramid.httpexceptions import HTTPBadRequest, HTTPNotFound
diff --git a/patzilla/access/epo/ops/api.py b/patzilla/access/epo/ops/api.py
index 5a11e3cf..e6da53c2 100644
--- a/patzilla/access/epo/ops/api.py
+++ b/patzilla/access/epo/ops/api.py
@@ -167,7 +167,7 @@ def match_filter(item, filter):
original_publication_numbers += representation_pubrefs_docdb
# Debugging
- #print 'representation_pubref_epodoc:', representation_pubref_epodoc
+ #print( 'representation_pubref_epodoc:', representation_pubref_epodoc)
#print 'representation_pubrefs_docdb:', representation_pubrefs_docdb
# Fetch family members. When failing, use first cycle as representation.
@@ -310,7 +310,7 @@ def ops_published_data_search_real(constituents, query, range):
ops = get_ops_client()
# Send request to OPS.
- range_begin, range_end = map(int, range.split('-'))
+ range_begin, range_end = list(map(int, range.split('-')))
response = ops.published_data_search(
query, range_begin=range_begin, range_end=range_end, constituents=to_list(constituents))
@@ -461,7 +461,7 @@ def image_representative_from_family(patent, countries, func_filter=None):
# Compute alternative family members sorted by given countries
alternatives = family.publications_by_country(exclude=[document], countries=countries)
if func_filter:
- alternatives = filter(func_filter, alternatives)
+ alternatives = list(filter(func_filter, alternatives))
if alternatives:
# TODO: Currently using first item as representative. This might change.
@@ -583,7 +583,7 @@ def inquire_images(document):
def is_fulldocument(node):
- return '@desc' in node and node['@desc'] == u'FullDocument'
+ return '@desc' in node and node['@desc'] == 'FullDocument'
def is_amendment_only(node):
@@ -602,7 +602,7 @@ def is_amendment_only(node):
"""
if is_fulldocument(node):
sections = to_list(node.get('ops:document-section', []))
- if len(sections) == 1 and sections[0]['@name'] == u'AMENDMENT':
+ if len(sections) == 1 and sections[0]['@name'] == 'AMENDMENT':
return True
return False
@@ -659,7 +659,7 @@ def get_ops_image(document, page, kind, format=None):
# 1. Inquire images to compute url to image resource
image_info = inquire_images(document)
if image_info:
- if image_info.has_key(kind):
+ if kind in image_info:
drawing_node = image_info.get(kind)
link = drawing_node['@link']
@@ -670,7 +670,7 @@ def get_ops_image(document, page, kind, format=None):
page = page + start_page - 1
# fallback chain, if no drawings are available
- elif image_info.has_key('JapaneseAbstract'):
+ elif 'JapaneseAbstract' in image_info:
drawing_node = image_info.get('JapaneseAbstract')
link = drawing_node['@link']
page = 1
@@ -885,7 +885,7 @@ def handle_error(response, location):
# Compute name
name = 'http-response'
- body = response_dict['content']
+ body = str(response_dict['content'],'UTF-8')
if 'CLIENT.CQL' in body:
name = 'expression'
@@ -901,44 +901,44 @@ def handle_error(response, location):
response_json.status = response.status_code
# countermeasure against "_JSONError: " or the like
- response_json.detail = str(response.status_code) + ' ' + response.reason + ': ' + response.content
+ response_json.detail = str(response.status_code) + ' ' + str(response.reason) + ': ' + str(response.content)
#print "response:", response
if len(request.errors) == 1:
error_info = request.errors[0].get('description')
if error_info.get('status_code') == 404:
- error_content = error_info.get('content', '')
+ error_content = error_info.get('content', b'')
url = error_info.get('url')
status = str(error_info.get('status_code', '')) + ' ' + error_info.get('reason', '')
- if 'CLIENT.InvalidCountryCode' in error_content:
+ if b'CLIENT.InvalidCountryCode' in error_content:
ops_code = 'CLIENT.InvalidCountryCode'
- message = u'OPS API response ({status}, {ops_code}). url={url}'.format(status=status, ops_code=ops_code, url=url)
+ message = 'OPS API response ({status}, {ops_code}). url={url}'.format(status=status, ops_code=ops_code, url=url)
log.error(message)
return response_json
- if 'SERVER.EntityNotFound' in error_content:
+ if b'SERVER.EntityNotFound' in error_content:
ops_code = 'SERVER.EntityNotFound'
- message = u'OPS API response ({status}, {ops_code}). url={url}'.format(status=status, ops_code=ops_code, url=url)
+ message = 'OPS API response ({status}, {ops_code}). url={url}'.format(status=status, ops_code=ops_code, url=url)
log.warning(message)
return response_json
- if 'OPS - 404' in error_content or 'Page not found' in error_content:
+ if b'OPS - 404' in error_content or b'Page not found' in error_content:
ops_code = '404 OPS Page not found'
- message = u'OPS API response ({status}, {ops_code}). url={url}'.format(status=status, ops_code=ops_code, url=url)
+ message = 'OPS API response ({status}, {ops_code}). url={url}'.format(status=status, ops_code=ops_code, url=url)
log.error(message)
- log.error(u'OPS API errors:\n{}'.format(pformat(request.errors)))
+ log.error('OPS API errors:\n{}'.format(pformat(request.errors)))
response_json.status_code = 502
return response_json
- if 'This API version is not supported' in error_content:
+ if b'This API version is not supported' in error_content:
ops_code = '404 API version not supported'
- message = u'OPS API response ({status}, {ops_code}). url={url}'.format(status=status, ops_code=ops_code, url=url)
+ message = 'OPS API response ({status}, {ops_code}). url={url}'.format(status=status, ops_code=ops_code, url=url)
log.error(message)
response_json.status_code = 502
return response_json
- log.error(u'OPS API errors:\n{}'.format(pformat(request.errors)))
+ log.error('OPS API errors:\n{}'.format(pformat(request.errors)))
return response_json
@@ -972,7 +972,7 @@ def pdf_document_build(patent):
# 3. add pdf metadata
page_sections = None
- if resource_info.has_key('ops:document-section'):
+ if 'ops:document-section' in resource_info:
page_sections = resource_info['ops:document-section']
#pprint(page_sections)
@@ -1028,7 +1028,7 @@ def ops_document_kindcodes(patent):
for document in documents:
# TODO: check whether a single occurrance of "not found" should really raise this exception
- if document.has_key('@status') and document['@status'] == 'not found':
+ if '@status' in document and document['@status'] == 'not found':
error = HTTPNotFound(error_msg_access)
raise error
@@ -1080,7 +1080,7 @@ def analytics_family(query):
# B. Enrich all family representatives
# http://ops.epo.org/3.1/rest-services/family/application/docdb/US19288494.xml
- for family_id, document_number in family_representatives.iteritems():
+ for family_id, document_number in family_representatives.items():
payload.setdefault(family_id, {})
@@ -1246,7 +1246,7 @@ def __init__(self):
self.items = []
def __repr__(self):
- return u'<{name} object at 0x{id}>\nitems:\n{items}'.format(name=self.__class__.__name__, id=id(self), items=pformat(self.items))
+ return '<{name} object at 0x{id}>\nitems:\n{items}'.format(name=self.__class__.__name__, id=id(self), items=pformat(self.items))
def publications_by_country(self, exclude=None, countries=None):
exclude = exclude or []
@@ -1290,13 +1290,13 @@ def _find_publication_number_by_prio_number():
def _format_title(title):
- return u'[{0}] {1}'.format(title.get(u'@lang', u'').upper() or u'', title[u'$'] or u'')
+ return '[{0}] {1}'.format(title.get('@lang', '').upper() or '', title['$'] or '')
def _format_abstract(abstract):
if not abstract: return
lines = to_list(abstract['p'])
- lines = map(lambda line: line['$'], lines)
- return u'[{0}] {1}'.format(abstract.get(u'@lang', u'').upper() or u'', '\n'.join(lines))
+ lines = [line['$'] for line in lines]
+ return '[{0}] {1}'.format(abstract.get('@lang', '').upper() or '', '\n'.join(lines))
def _mogrify_parties(partylist, name):
results = []
@@ -1307,9 +1307,9 @@ def _mogrify_parties(partylist, name):
parties[key][party['@data-format']] = party[name]['name']['$']
for key in sorted(parties.keys()):
- name_epodoc = parties[key]['epodoc'].replace(u'\u2002', u' ')
+ name_epodoc = parties[key]['epodoc'].replace('\u2002', ' ')
name_original = parties[key]['original']
- entry = u'{0}; {1}'.format(name_epodoc, name_original)
+ entry = '{0}; {1}'.format(name_epodoc, name_original)
results.append(entry)
return results
@@ -1338,13 +1338,13 @@ def _result_list_compact(response):
try:
titles = to_list(pointer_invention_title.resolve(result))
- titles = map(_format_title, titles)
+ titles = list(map(_format_title, titles))
except JsonPointerException:
titles = None
try:
abstracts = to_list(pointer_abstract.resolve(result))
- abstracts = map(_format_abstract, abstracts)
+ abstracts = list(map(_format_abstract, abstracts))
except JsonPointerException:
abstracts = None
@@ -1382,10 +1382,10 @@ def _summarize_metrics(payload, kind):
except KeyError:
return 'error while computing value'
- total_response_size_entries = filter(lambda item: item['name'] == kind, metrics)[0]['values']
+ total_response_size_entries = [item for item in metrics if item['name'] == kind][0]['values']
#print total_response_size_entries
- total_response_sizes = map(lambda item: float(item['value']), total_response_size_entries)
+ total_response_sizes = [float(item['value']) for item in total_response_size_entries]
#print total_response_sizes
total = sum(total_response_sizes)
@@ -1421,6 +1421,6 @@ def ops_service_usage(date_begin, date_end):
if __name__ == '__main__': # pragma: nocover
data = ops_service_usage('06/11/2014', '09/12/2014')
- print 'Time range: {0}'.format(data['time-range'])
- print 'Response size: {0}G'.format(data['response-size'] / float(10**9))
- print 'Message count: {0}'.format(data['message-count'])
+ print('Time range: {0}'.format(data['time-range']))
+ print('Response size: {0}G'.format(data['response-size'] / float(10**9)))
+ print('Message count: {0}'.format(data['message-count']))
diff --git a/patzilla/access/epo/ops/client.py b/patzilla/access/epo/ops/client.py
index a0037443..75070fd4 100644
--- a/patzilla/access/epo/ops/client.py
+++ b/patzilla/access/epo/ops/client.py
@@ -7,7 +7,7 @@
from mock import mock
from pyramid.httpexceptions import HTTPUnauthorized
from pyramid.threadlocal import get_current_registry
-from zope.interface.declarations import implements
+from zope.interface import implementer
from zope.interface.interface import Interface
from zope.interface.interfaces import ComponentLookupError
@@ -38,6 +38,8 @@ def from_settings(datasource_settings):
@staticmethod
def from_environment():
+ if not os.environ["OPS_API_CONSUMER_KEY"] or not os.environ["OPS_API_CONSUMER_SECRET"]:
+ raise KeyError("OPS_API_CONSUMER_KEY or OPS_API_CONSUMER_SECRET is empty")
return {
"consumer_key": os.environ["OPS_API_CONSUMER_KEY"],
"consumer_secret": os.environ["OPS_API_CONSUMER_SECRET"],
@@ -72,14 +74,12 @@ def attach_ops_client(event):
class IOpsClientPool(Interface):
pass
-
+@implementer(IOpsClientPool)
class OpsClientPool(object):
"""
EPO/OPS client pool as Pyramid utility implementation.
"""
- implements(IOpsClientPool)
-
def __init__(self):
logger.info("Creating upstream client pool for EPO/OPS")
self.clients = {}
diff --git a/patzilla/access/epo/ops/commands.py b/patzilla/access/epo/ops/commands.py
index cd94aa02..faab8cb8 100644
--- a/patzilla/access/epo/ops/commands.py
+++ b/patzilla/access/epo/ops/commands.py
@@ -13,6 +13,7 @@
export OPS_API_CONSUMER_SECRET=rrXdr5WA7x9tudmP
patzilla ops search "txt=(wind or solar) and energy"
+
Use configuration file::
export PATZILLA_CONFIG=patzilla/config/development-local.ini
@@ -20,6 +21,7 @@
"""
import json
import logging
+import sys
from datetime import date, timedelta
import click
@@ -132,7 +134,7 @@ def image(ctx, document, page, kind, format):
Access the OPS image acquisition API, see OPS handbook section 3.1.3.
"""
payload = get_ops_image(document, page, kind, format)
- print(payload)
+ sys.stdout.buffer.write(payload)
ops_cli.add_command(cmd=usage)
diff --git a/patzilla/access/epo/publicationserver/client.py b/patzilla/access/epo/publicationserver/client.py
index 5e9a38bd..5777e48a 100644
--- a/patzilla/access/epo/publicationserver/client.py
+++ b/patzilla/access/epo/publicationserver/client.py
@@ -25,7 +25,7 @@ def fetch_pdf(document_number):
patent = normalize_patent(document_number, as_dict=True, provider='espacenet')
- url_tpl = u'https://data.epo.org/publication-server/pdf-document?cc=EP&pn={number}&ki={kind}'
+ url_tpl = 'https://data.epo.org/publication-server/pdf-document?cc=EP&pn={number}&ki={kind}'
url = url_tpl.format(**patent)
@@ -63,4 +63,4 @@ def fetch_pdf(document_number):
if __name__ == '__main__':
- print fetch_pdf('EP666666A2')
+ print(fetch_pdf('EP666666A2'))
diff --git a/patzilla/access/generic/exceptions.py b/patzilla/access/generic/exceptions.py
index 7e9c4224..6b188cef 100644
--- a/patzilla/access/generic/exceptions.py
+++ b/patzilla/access/generic/exceptions.py
@@ -14,11 +14,11 @@ class GenericAdapterException(Exception):
def __init__(self, *args, **kwargs):
self.data = None
- if kwargs.has_key('data'):
+ if 'data' in kwargs:
self.data = kwargs['data']
self.user_info = ''
- if kwargs.has_key('user_info'):
+ if 'user_info' in kwargs:
self.user_info = kwargs['user_info']
super(GenericAdapterException, self).__init__(*args)
@@ -30,11 +30,11 @@ def get_message(self):
#message_parts.append(ex.user_info)
message['user'] = cgi.escape(self.user_info)
if hasattr(self, 'message'):
- message_parts.append(self.__class__.__name__ + u': ' + u'{message}
'.format(message=cgi.escape(self.message)))
+ message_parts.append(self.__class__.__name__ + ': ' + '{message}
'.format(message=cgi.escape(self.message)))
if hasattr(self, 'details'):
- message_parts.append(u'{message}
'.format(message=cgi.escape(self.details)))
+ message_parts.append('{message}
'.format(message=cgi.escape(self.details)))
- message['details'] = u'
'.join(message_parts)
+ message['details'] = '
'.join(message_parts)
return message
diff --git a/patzilla/access/generic/pdf.py b/patzilla/access/generic/pdf.py
index c46ba6e5..8516b01f 100644
--- a/patzilla/access/generic/pdf.py
+++ b/patzilla/access/generic/pdf.py
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
# (c) 2013-2022 The PatZilla Developers
import logging
-from StringIO import StringIO
+from io import StringIO
from zipfile import ZipFile, ZipInfo, ZIP_DEFLATED
import attr
@@ -56,7 +56,7 @@ def pdf_universal_real(patent, response):
if document is None:
log.error('Locating a document at the domestic office requires '
'a decoded document number for "{}"'.format(patent))
- raise ValueError(u'Unable to decode document number {}'.format(patent))
+ raise ValueError('Unable to decode document number {}'.format(patent))
# 1. If it's an EP document, try European publication server first.
if response.pdf is None and document.country == 'EP':
@@ -92,7 +92,7 @@ def pdf_universal_real(patent, response):
try:
# Skip requests for documents w/o kindcode
if not document.kind:
- raise ValueError(u'No kindcode for patent: {}'.format(patent))
+ raise ValueError('No kindcode for patent: {}'.format(patent))
response.pdf = depatisconnect_fetch_pdf(number_normalized)
response.datasource = 'dpma'
diff --git a/patzilla/access/generic/search.py b/patzilla/access/generic/search.py
index 3077c8be..1c819e45 100644
--- a/patzilla/access/generic/search.py
+++ b/patzilla/access/generic/search.py
@@ -4,7 +4,7 @@
import logging
from pprint import pprint
from collections import defaultdict
-from patzilla.util.data.container import SmartBunch
+from patzilla.util.data.container import SmartMunch
from patzilla.util.numbers.normalize import normalize_patent
from patzilla.access.generic.exceptions import SearchException
@@ -13,27 +13,27 @@
class GenericSearchClient(object):
def lm(self, message):
- message = u'{backend_name}: {message}'.format(message=message, **self.__dict__)
+ message = '{backend_name}: {message}'.format(message=message, **self.__dict__)
return message
def search_failed(self, message=None, response=None, user_info=None, ex=None, meta=None):
# Compute user info
- user_info = user_info or u'Search failed with unknown reason, please report this error to us.'
+ user_info = user_info or 'Search failed with unknown reason, please report this error to us.'
meta = meta or {}
# Compute reason and status
- message = message or u'unknown'
+ message = message or 'unknown'
if ex:
- message = u'{}: {}'.format(ex.__class__.__name__, ex.message)
+ message = '{}: {}'.format(ex.__class__.__name__, ex.message)
# Compute and emit log message
- log_message = u'{backend_name}: Search failed. message={message}'.format(message=message, **self.__dict__)
+ log_message = '{backend_name}: Search failed. message={message}'.format(message=message, **self.__dict__)
if meta:
- log_message += u', meta=' + unicode(meta)
+ log_message += ', meta=' + str(meta)
if response:
- status = unicode(response.status_code) + u' ' + response.reason
- log_message += u', status={status}, response=\n{response}'.format(status=status, response=response.content.decode('utf-8'))
+ status = str(response.status_code) + ' ' + response.reason
+ log_message += ', status={status}, response=\n{response}'.format(status=status, response=response.content.decode('utf-8'))
log.error(log_message)
# Return exception object
@@ -50,7 +50,7 @@ def crawl(self, constituents, expression, chunksize):
# fetch first chunk (1-chunksize) from upstream
#first_chunk = self.search(expression, 0, chunksize)
- first_chunk = self.search_method(expression, SmartBunch({'offset': 0, 'limit': chunksize}))
+ first_chunk = self.search_method(expression, SmartMunch({'offset': 0, 'limit': chunksize}))
#print first_chunk
#total_count = int(first_chunk['meta'].get('pager', {}).get('totalEntries', 0))
@@ -82,7 +82,7 @@ def crawl(self, constituents, expression, chunksize):
time.sleep(1)
log.info(self.lm('Crawling from offset {offset}'.format(offset=offset)))
- chunk = self.search_method(expression, SmartBunch({'offset': offset, 'limit': chunksize}))
+ chunk = self.search_method(expression, SmartMunch({'offset': offset, 'limit': chunksize}))
chunks.append(chunk)
@@ -128,7 +128,7 @@ def __init__(self, input, options=None):
# Input data and options
self.input = input
- self.options = options and SmartBunch.bunchify(options) or SmartBunch()
+ self.options = options and SmartMunch.munchify(options) or SmartMunch()
# Setup data structures
self.setup()
@@ -146,13 +146,13 @@ def setup(self):
self.documents = []
# Metadata information, upstream (raw) and downstream (unified)
- self.meta = SmartBunch.bunchify({
+ self.meta = SmartMunch.munchify({
'navigator': {},
'upstream': {},
})
# Output information, upstream (raw) and downstream (unified)
- self.output = SmartBunch.bunchify({
+ self.output = SmartMunch.munchify({
'meta': {},
'numbers': [],
'details': [],
@@ -177,8 +177,8 @@ def read_documents(self):
if number_normalized:
number = number_normalized
- document[u'publication_number'] = number
- document[u'upstream_provider'] = self.meta.upstream.name
+ document['publication_number'] = number
+ document['upstream_provider'] = self.meta.upstream.name
def render(self):
@@ -209,14 +209,14 @@ def remove_family_members(self):
seen = {}
removed = []
removed_map = defaultdict(list)
- stats = SmartBunch(removed = 0)
+ stats = SmartMunch(removed = 0)
def family_remover(item):
fam = self.document_to_family_id(item)
# Sanity checks on family id
# Do not remove documents without valid family id
- if not fam or fam in [u'0', u'-1']:
+ if not fam or fam in ['0', '-1']:
return True
# "Seen" filtering logic
@@ -233,7 +233,7 @@ def family_remover(item):
# Update metadata and content
# 1. Apply family cleansing filter to main documents response
- self.documents = filter(family_remover, self.documents)
+ self.documents = list(filter(family_remover, self.documents))
#print 'removed_map:'; pprint(removed_map)
# 2. Add list of removed family members to output
diff --git a/patzilla/access/google/search.py b/patzilla/access/google/search.py
index a55b81c5..b8888e0c 100644
--- a/patzilla/access/google/search.py
+++ b/patzilla/access/google/search.py
@@ -2,11 +2,13 @@
# (c) 2014 Andreas Motl, Elmyra UG
import json
from pyramid.encode import urlencode
-import re
+# py27 import re
import sys
import logging
import requests
-from BeautifulSoup import BeautifulSoup
+# py27 from BeautifulSoup import BeautifulSoup
+from bs4 import BeautifulSoup
+
from patzilla.util.expression.keywords import keywords_from_boolean_expression
from patzilla.util.numbers.normalize import normalize_patent
@@ -88,7 +90,7 @@ def tweak_captcha_response(self, body):
captcha_form['action'] = baseurl + '/' + captcha_form['action']
newbody = str(soup)
- print newbody
+ print(newbody)
return newbody
def parse_response(self, body):
@@ -163,7 +165,7 @@ def parse_response(self, body):
'message': message,
}
- print payload
+ print(payload)
return payload
@@ -226,7 +228,7 @@ def pair_to_term(cls, key, value):
value_normalized = normalize_patent(value)
if value_normalized:
value = value_normalized
- term = u'{0}:{1}'.format(fieldname, value)
+ term = '{0}:{1}'.format(fieldname, value)
else:
term = value
@@ -243,7 +245,7 @@ def serialize(self):
"""
query_params = []
tbs_params = []
- for key, value in self.criteria.iteritems():
+ for key, value in self.criteria.items():
term = self.pair_to_term(key, value)
if term['parameter'] == 'q':
query_params.append(term['term'])
@@ -265,7 +267,7 @@ def serialize(self):
def get_keywords(self):
keywords = []
- for key, value in self.criteria.iteritems():
+ for key, value in self.criteria.items():
keywords += keywords_from_boolean_expression(key, value)
return keywords
@@ -282,4 +284,4 @@ def get_keywords(self):
#data = google.search('matrix', 19900)
data = google.search('intitle:matrix', 19900)
- print data
+ print(data)
diff --git a/patzilla/access/ificlaims/api.py b/patzilla/access/ificlaims/api.py
index 7680403f..2c30de9e 100644
--- a/patzilla/access/ificlaims/api.py
+++ b/patzilla/access/ificlaims/api.py
@@ -157,7 +157,7 @@ def ificlaims_download_multi(numberlist, formats):
for format in formats:
- format_parts = format.split(u':')
+ format_parts = format.split(':')
# decode modifiers
if len(format_parts) == 1:
@@ -235,7 +235,7 @@ def ificlaims_download_single(number, format, options=None):
try:
response = ificlaims_download(number, format, options)
- except IFIClaimsException, ex:
+ except IFIClaimsException as ex:
logger.warn('IFI: IFIClaimsException for number={number}, format={format}, options={options}: {ex}'.format(**locals()))
if response.payload:
diff --git a/patzilla/access/ificlaims/client.py b/patzilla/access/ificlaims/client.py
index db9a7f2a..0febf090 100644
--- a/patzilla/access/ificlaims/client.py
+++ b/patzilla/access/ificlaims/client.py
@@ -16,7 +16,7 @@
from patzilla.access.generic.exceptions import NoResultsException, GenericAdapterException, SearchException
from patzilla.access.generic.search import GenericSearchResponse, GenericSearchClient
from patzilla.access.ificlaims import get_ificlaims_client
-from patzilla.util.data.container import SmartBunch
+from patzilla.util.data.container import SmartMunch
from patzilla.util.numbers.normalize import normalize_patent
log = logging.getLogger(__name__)
@@ -73,7 +73,7 @@ def search_real(self, query, options=None):
query.setdefault('filter', '')
- options = options or SmartBunch()
+ options = options or SmartMunch()
options.setdefault('offset', 0)
options.setdefault('limit', self.pagesize)
@@ -141,36 +141,36 @@ def search_real(self, query, options=None):
if 'msg' not in upstream_error:
upstream_error['msg'] = 'Reason unknown'
- message = u'Response status code: {code}\n\n{msg}'.format(**upstream_error)
+ message = 'Response status code: {code}\n\n{msg}'.format(**upstream_error)
# Enrich "maxClauseCount" message, e.g. raised by {!complexphrase}text:"auto* AND leucht*"~5
- if upstream_error["code"] == 500 and u'maxClauseCount is set to' in upstream_error["msg"]:
+ if upstream_error["code"] == 500 and 'maxClauseCount is set to' in upstream_error["msg"]:
raise self.search_failed(
- user_info=u'Too many terms in phrase expression, wildcard term prefixes might by too short.',
+ user_info='Too many terms in phrase expression, wildcard term prefixes might by too short.',
message=message,
response=response)
# Enrich "no servers hosting shard" message
elif upstream_error["code"] == 503 and \
(
- u'no servers hosting shard' in upstream_error["msg"] or \
- u'No server is available' in upstream_error["msg"]
+ 'no servers hosting shard' in upstream_error["msg"] or \
+ 'No server is available' in upstream_error["msg"]
):
raise self.search_failed(
- user_info=u'Error while connecting to upstream database. Database might be offline.',
+ user_info='Error while connecting to upstream database. Database might be offline.',
message=message,
response=response)
# Regular traceback
elif upstream_error["code"] == 500 and 'trace' in upstream_error:
- message = u'Response status code: {code}\n\n{trace}'.format(**upstream_error)
+ message = 'Response status code: {code}\n\n{trace}'.format(**upstream_error)
raise self.search_failed(
- user_info=u'Unknown exception at search backend',
+ user_info='Unknown exception at search backend',
message=message,
response=response)
# Enrich "SyntaxError" exception
- elif upstream_error["code"] == 400 and u'ParseException' in upstream_error["msg"]:
+ elif upstream_error["code"] == 400 and 'ParseException' in upstream_error["msg"]:
user_info = re.sub(
r'.*(Encountered.*at line.*?\.).*',
r'SyntaxError, can not parse query expression: \1',
@@ -207,7 +207,7 @@ def search_real(self, query, options=None):
user_info = None
if response_data['message'] == 'JSON error: failed to read response object':
- user_info = u'Error while connecting to upstream database. Database might be offline.'
+ user_info = 'Error while connecting to upstream database. Database might be offline.'
raise self.search_failed(
user_info=user_info,
@@ -237,7 +237,7 @@ def search_real(self, query, options=None):
message = json.dumps(upstream_error)
raise self.search_failed(
- user_info=u'Error while connecting to upstream database. Database might be offline.',
+ user_info='Error while connecting to upstream database. Database might be offline.',
message=message,
response=response)
@@ -252,9 +252,10 @@ def text_fetch(self, ucid, format='xml'):
EP666666A2 => EP0666666A2 (EP0666666A3, EP0666666B1)
"""
- log.info(u"{backend_name}: text_fetch, ucid={ucid}, format={format}; user={username}".format(
+ log.info("{backend_name}: text_fetch, ucid={ucid}, format={format}; user={username}".format(
ucid=ucid, format=format, **self.__dict__))
+
starttime = timeit.default_timer()
if not self.token or self.stale:
@@ -288,7 +289,7 @@ def text_fetch(self, ucid, format='xml'):
@cache_region('longer')
def attachment_list(self, ucid):
- log.info(u"{backend_name}: attachment_list, ucid={ucid}; user={username}".format(ucid=ucid, **self.__dict__))
+ log.info("{backend_name}: attachment_list, ucid={ucid}; user={username}".format(ucid=ucid, **self.__dict__))
if not self.token or self.stale:
self.login()
@@ -310,14 +311,14 @@ def attachment_list(self, ucid):
data = json.loads(response.content)
return data
else:
- log.error(u"{backend_name}: attachment_list, ucid={ucid}, status={status}, response={response}".format(
+ log.error("{backend_name}: attachment_list, ucid={ucid}, status={status}, response={response}".format(
ucid=ucid, status=response.status_code, response=response.content , **self.__dict__))
@cache_region('longer')
def attachment_fetch(self, path):
- log.info(u"{backend_name}: attachment_fetch, path={path}; user={username}".format(path=path, **self.__dict__))
+ log.info("{backend_name}: attachment_fetch, path={path}; user={username}".format(path=path, **self.__dict__))
if not self.token or self.stale:
self.login()
@@ -341,18 +342,19 @@ def attachment_fetch(self, path):
return response.content
else:
- log.error(u"{backend_name}: attachment_fetch, path={path}, status={status}, response={response}".format(
+ log.error("{backend_name}: attachment_fetch, path={path}, status={status}, response={response}".format(
path=path, status=response.status_code, response=response.content , **self.__dict__))
def pdf_fetch(self, ucid):
- log.info(u"{backend_name}: pdf_fetch, ucid={ucid}; user={username}".format(ucid=ucid, **self.__dict__))
+ log.info("{backend_name}: pdf_fetch, ucid={ucid}; user={username}".format(ucid=ucid, **self.__dict__))
attachments_response = self.attachment_list(ucid)
if not attachments_response:
return
+
#print 'attachments_response:'
#pprint(attachments_response)
@@ -435,7 +437,7 @@ def tif_attachments(self, ucid):
"""
# filter tif references only
- tif_attachments = filter(lambda attachment: attachment['media'] in ['image/tiff', 'image/jpeg'], attachments)
+ tif_attachments = [attachment for attachment in attachments if attachment['media'] in ['image/tiff', 'image/jpeg']]
#print 'tif_attachments:'
#pprint(tif_attachments)
return tif_attachments
@@ -443,7 +445,7 @@ def tif_attachments(self, ucid):
def tif_fetch(self, ucid, seq=1):
- log.info(u"{backend_name}: tif_fetch, ucid={ucid}, seq={seq}; user={username}".format(ucid=ucid, seq=seq, **self.__dict__))
+ log.info("{backend_name}: tif_fetch, ucid={ucid}, seq={seq}; user={username}".format(ucid=ucid, seq=seq, **self.__dict__))
tif_attachments = self.tif_attachments(ucid)
@@ -464,7 +466,7 @@ def tif_fetch(self, ucid, seq=1):
@cache_region('longer')
def png_fetch(self, ucid, seq=1):
- log.info(u"{backend_name}: png_fetch, ucid={ucid}, seq={seq}; user={username}".format(ucid=ucid, seq=seq, **self.__dict__))
+ log.info("{backend_name}: png_fetch, ucid={ucid}, seq={seq}; user={username}".format(ucid=ucid, seq=seq, **self.__dict__))
tif = self.tif_fetch(ucid, seq)
if tif:
png = to_png(tif)
@@ -520,22 +522,22 @@ def read(self):
'name': 'ifi',
'time': self.input['time'],
'status': self.input['status'],
- 'params': SmartBunch.bunchify(self.input['content']['responseHeader']['params']),
- 'pager': SmartBunch.bunchify(self.input['content']['responseHeader'].get('pager', {})),
+ 'params': SmartMunch.munchify(self.input['content']['responseHeader']['params']),
+ 'pager': SmartMunch.munchify(self.input['content']['responseHeader'].get('pager', {})),
})
self.meta.navigator.count_total = int(self.meta.upstream.pager.totalEntries)
self.meta.navigator.count_page = int(self.meta.upstream.pager.entriesOnThisPage)
self.meta.navigator.offset = int(self.meta.upstream.params.start)
self.meta.navigator.limit = int(self.meta.upstream.params.rows)
- self.meta.navigator.postprocess = SmartBunch()
+ self.meta.navigator.postprocess = SmartMunch()
# Read content
self.documents = self.input['content']['response']['docs']
self.read_documents()
def document_to_number(self, document):
- ucid = document[u'ucid']
+ ucid = document['ucid']
cc, docno, kindcode = ucid.split('-')
number = cc + docno + kindcode
number_normalized = normalize_patent(number)
@@ -548,7 +550,7 @@ def document_to_family_id(self, document):
def ificlaims_client(options=None):
- options = options or SmartBunch()
+ options = options or SmartMunch()
if 'vendor' in options and options.vendor == 'serviva':
client = get_serviva_client()
else:
@@ -576,7 +578,7 @@ def ificlaims_fetch(resource, format, options=None):
@cache_region('search')
def ificlaims_search(query, options=None):
- options = options or SmartBunch()
+ options = options or SmartMunch()
client = ificlaims_client(options=options)
try:
diff --git a/patzilla/access/ificlaims/clientpool.py b/patzilla/access/ificlaims/clientpool.py
index 1e0fc64e..a926ad16 100644
--- a/patzilla/access/ificlaims/clientpool.py
+++ b/patzilla/access/ificlaims/clientpool.py
@@ -4,7 +4,7 @@
import os
from pyramid.httpexceptions import HTTPUnauthorized
-from zope.interface.declarations import implements
+from zope.interface import implementer
from zope.interface.interface import Interface
from patzilla.access.generic.credentials import AbstractCredentialsGetter, DatasourceCredentialsManager
@@ -46,6 +46,8 @@ def from_settings(datasource_settings):
@staticmethod
def from_environment():
+ if not os.environ["IFICLAIMS_API_USERNAME"] or not os.environ["IFICLAIMS_API_PASSWORD"]:
+ raise KeyError("IFICLAIMS_API_USERNAME or IFICLAIMS_API_PASSWORD is empty")
return {
"api_username": os.environ["IFICLAIMS_API_USERNAME"],
"api_password": os.environ["IFICLAIMS_API_PASSWORD"],
@@ -81,12 +83,12 @@ class IIFIClaimsClientPool(Interface):
pass
+@implementer(IIFIClaimsClientPool)
class IFIClaimsClientPool(object):
"""
IFI CLAIMS client pool as Pyramid utility implementation.
"""
- implements(IIFIClaimsClientPool)
def __init__(self, api_uri, api_uri_json):
logger.info("Creating upstream client pool for IFI CLAIMS")
diff --git a/patzilla/access/ificlaims/commands.py b/patzilla/access/ificlaims/commands.py
index d44f2d56..1fe7f7fe 100644
--- a/patzilla/access/ificlaims/commands.py
+++ b/patzilla/access/ificlaims/commands.py
@@ -33,7 +33,7 @@
from patzilla.boot.cache import configure_cache_backend
from patzilla.boot.config import BootConfiguration
from patzilla.util.config import get_configfile_from_commandline
-from patzilla.util.data.container import SmartBunch, jd
+from patzilla.util.data.container import SmartMunch, jd
from patzilla.boot.framework import pyramid_setup
@@ -79,7 +79,7 @@ def search(ctx, expression, request_json):
# Invoke API and output result.
logger.warning("Only the first 100 hits will be displayed. The CLI currently does not employ paging.")
- results = client.search(SmartBunch({'expression': expression}), SmartBunch({'offset': 0, 'limit': 100}))
+ results = client.search(SmartMunch({'expression': expression}), SmartMunch({'offset': 0, 'limit': 100}))
print(jd(results))
@@ -95,11 +95,12 @@ def make_request(client):
#results = client.search('pa:siemens OR pa:bosch', 0, 10)
#results = client.search('pa:(siemens OR bosch)', 0, 10)
#results = client.search('text:"solar energy"', 0, 10)
- results = client.search(SmartBunch({'expression': 'text:solar energy'}), SmartBunch({'offset': 0, 'limit': 10}))
- #results = client.search(SmartBunch({'expression': '{!complexphrase inOrder=true}"siemen* *haus"'}), SmartBunch({'offset': 0, 'limit': 10}))
+ results = client.search(SmartMunch({'expression': 'text:solar energy'}), SmartMunch({'offset': 0, 'limit': 10}))
+ #results = client.search(SmartMunch({'expression': '{!complexphrase inOrder=true}"siemen* *haus"'}), SmartMunch({'offset': 0, 'limit': 10}))
#results = client.search(u'text:抑血管生成素的药物用途', 0, 10)
#results = client.search(u'text:放射線を照射する放射線源と', 0, 10)
- #results = client.search(SmartBunch({'expression': 'pnctry:(de OR ep OR wo OR cn OR jp OR tw) AND pa:"taiwan paiho" AND pd:[20170101 TO 20170731]'}), SmartBunch({'offset': 0, 'limit': 50}))
+ #results = client.search(SmartMunch({'expression': 'pnctry:(de OR ep OR wo OR cn OR jp OR tw) AND pa:"taiwan paiho" AND pd:[20170101 TO 20170731]'}), SmartMunch({'offset': 0, 'limit': 50}))
+
#results = client.text_fetch('US-20100077592-A1')
#results = client.text_fetch('CN-1055497-A')
diff --git a/patzilla/access/ificlaims/expression.py b/patzilla/access/ificlaims/expression.py
index 9496b698..64a8a704 100644
--- a/patzilla/access/ificlaims/expression.py
+++ b/patzilla/access/ificlaims/expression.py
@@ -22,7 +22,7 @@
class IFIClaimsGrammar(CQLGrammar):
def preconfigure(self):
CQLGrammar.preconfigure(self)
- self.cmp_single = u':'.split()
+ self.cmp_single = ':'.split()
class IFIClaimsParser(object):
@@ -60,8 +60,8 @@ def trim_complexphrase(self):
after: text:((parallel* AND schalt*) AND (antrieb* AND stufe*))
"""
#print >>sys.stderr, 'expression-before:', self.expression
- self.expression = re.sub(u'"(.+?)"~\d+', u'(\\1)', self.expression)
- self.expression = self.expression.replace(u'{!complexphrase}', '')
+ self.expression = re.sub('"(.+?)"~\d+', '(\\1)', self.expression)
+ self.expression = self.expression.replace('{!complexphrase}', '')
#print >>sys.stderr, 'expression-after :', self.expression
@property
@@ -192,7 +192,7 @@ def pair_to_solr(cls, key, value, modifiers=None):
return
expression = None
- format = u'{0}:{1}'
+ format = '{0}:{1}'
# ------------------------------------------
@@ -230,7 +230,7 @@ def pair_to_solr(cls, key, value, modifiers=None):
# within 2009-08-20,2011-03-03
if 'within' in value:
within_dates = parse_date_within(value)
- elements_are_years = all([len(value) == 4 and value.isdigit() for value in within_dates.values()])
+ elements_are_years = all([len(value) == 4 and value.isdigit() for value in list(within_dates.values())])
if elements_are_years:
fieldname = 'pdyear'
@@ -258,12 +258,12 @@ def pair_to_solr(cls, key, value, modifiers=None):
except Exception as ex:
message = 'IFI CLAIMS query: Invalid date or range expression "{0}". Reason: {1}.'.format(value, ex)
- logger.warn(message + '\nException was:\n{0}'.format(_exception_traceback()))
+ logger.warning(message + '\nException was:\n{0}'.format(_exception_traceback()))
return {'error': True, 'message': message}
elif key == 'inventor' or key == 'applicant':
if not has_booleans(value) and should_be_quoted(value):
- value = u'"{0}"'.format(value)
+ value = '"{0}"'.format(value)
elif key == 'class':
@@ -277,7 +277,7 @@ def pair_to_solr(cls, key, value, modifiers=None):
# Put value into parenthesis, to properly capture expressions
if value:
- value = u'({value})'.format(value=value)
+ value = '({value})'.format(value=value)
# Parse value as simple query expression
query_object = CQL(cql=value)
@@ -297,7 +297,7 @@ def pair_to_solr(cls, key, value, modifiers=None):
# ------------------------------------------
if key in ['fulltext', 'inventor', 'applicant', 'country', 'citation']:
if has_booleans(value) and not should_be_quoted(value) and not '{!complexphrase' in value:
- value = u'({0})'.format(value)
+ value = '({0})'.format(value)
# ------------------------------------------
# expression formatter
@@ -368,15 +368,15 @@ def triple_callback(token, index, binop, term):
def format_expression(format, fieldname, value):
expression = None
- if type(fieldname) in types.StringTypes:
+ if type(fieldname) in (str,):
expression = format.format(fieldname, value)
- elif type(fieldname) is types.ListType:
+ elif type(fieldname) is list:
subexpressions = []
for fieldname in fieldname:
subexpressions.append(format.format(fieldname, value))
expression = ' or '.join(subexpressions)
# surround with parentheses
- expression = u'({0})'.format(expression)
+ expression = '({0})'.format(expression)
return expression
def ifi_convert_class(value):
@@ -406,5 +406,5 @@ def should_be_quoted(value):
if __name__ == '__main__':
- print IFIClaimsParser('{!complexphrase}text:"(aussto* OR eject* OR pusher*) AND (verriegel* OR lock* OR sperr*)"~6').keywords
- print IFIClaimsParser('{!complexphrase}text:"parallel* AND schalt*"~6 AND ((ic:F16H006104 OR cpc:F16H006104))').keywords
+ print(IFIClaimsParser('{!complexphrase}text:"(aussto* OR eject* OR pusher*) AND (verriegel* OR lock* OR sperr*)"~6').keywords)
+ print(IFIClaimsParser('{!complexphrase}text:"parallel* AND schalt*"~6 AND ((ic:F16H006104 OR cpc:F16H006104))').keywords)
diff --git a/patzilla/access/ificlaims/expression.rst b/patzilla/access/ificlaims/expression.rst
index 2162ee87..61c78a07 100644
--- a/patzilla/access/ificlaims/expression.rst
+++ b/patzilla/access/ificlaims/expression.rst
@@ -20,30 +20,30 @@ Empty query
IPC/CPC
=======
>>> IFIClaimsParser('H01F7/00').dumps()
-u'H01F7/00'
+'H01F7/00'
# Rewrite all patent classifications from IFI format to OPS format
>>> IFIClaimsParser('ic:G01F000184').parse().rewrite_classes_ops().dumps()
-u'ic : G01F1/84'
+'ic : G01F1/84'
>>> IFIClaimsParser('ic:G01F000184').keywords
-[u'G01F1/84']
+['G01F1/84']
>>> IFIClaimsExpression.pair_to_solr('class', 'H04L12/433 or H04L12/24')
-{'query': u'((ic:H04L0012433 OR cpc:H04L0012433) OR (ic:H04L001224 OR cpc:H04L001224))'}
+{'query': '((ic:H04L0012433 OR cpc:H04L0012433) OR (ic:H04L001224 OR cpc:H04L001224))'}
>>> IFIClaimsExpression.pair_to_solr('class', 'H01F7/00 or (H01F7/02 and H02K7/1876)')
-{'query': u'((ic:H01F000700 OR cpc:H01F000700) OR ((ic:H01F000702 OR cpc:H01F000702) AND (ic:H02K00071876 OR cpc:H02K00071876)))'}
+{'query': '((ic:H01F000700 OR cpc:H01F000700) OR ((ic:H01F000702 OR cpc:H01F000702) AND (ic:H02K00071876 OR cpc:H02K00071876)))'}
>>> IFIClaimsExpression.pair_to_solr('class', 'H01F7/00 not (H01F7/02 or H02K7/1876)')
-{'query': u'((ic:H01F000700 OR cpc:H01F000700) NOT ((ic:H01F000702 OR cpc:H01F000702) OR (ic:H02K00071876 OR cpc:H02K00071876)))'}
+{'query': '((ic:H01F000700 OR cpc:H01F000700) NOT ((ic:H01F000702 OR cpc:H01F000702) OR (ic:H02K00071876 OR cpc:H02K00071876)))'}
Publication date
================
>>> IFIClaimsExpression.pair_to_solr('pubdate', 'foobar')
-{'message': 'IFI CLAIMS query: Invalid date or range expression "foobar". Reason: foobar.', 'error': True}
+{'error': True, 'message': 'IFI CLAIMS query: Invalid date or range expression "foobar". Reason: foobar.'}
*********
@@ -54,39 +54,39 @@ Simple expressions
==================
>>> IFIClaimsParser('ttl:bildschirm').keywords
-[u'bildschirm']
+['bildschirm']
>>> IFIClaimsExpression.pair_to_solr('fulltext', 'bildschirm')
-{'query': u'text:bildschirm'}
+{'query': 'text:bildschirm'}
>>> IFIClaimsParser('ttl:bildschirm or ab:fahrzeug').keywords
-[u'bildschirm', u'fahrzeug']
+['bildschirm', 'fahrzeug']
>>> IFIClaimsExpression.pair_to_solr('fulltext', 'bildschirm or fahrzeug')
-{'query': u'text:(bildschirm OR fahrzeug)'}
+{'query': 'text:(bildschirm OR fahrzeug)'}
>>> IFIClaimsParser('ttl:bildschirm and ab:(fahrzeug or pkw)').keywords
-[u'bildschirm', u'fahrzeug', u'pkw']
+['bildschirm', 'fahrzeug', 'pkw']
>>> IFIClaimsExpression.pair_to_solr('fulltext', 'bildschirm and (fahrzeug or pkw)')
-{'query': u'text:(bildschirm AND (fahrzeug OR pkw))'}
+{'query': 'text:(bildschirm AND (fahrzeug OR pkw))'}
>>> IFIClaimsParser('ttl:bildschirm and ab:(fahrzeug or pkw not lkw)').keywords
-[u'bildschirm', u'fahrzeug', u'pkw', u'lkw']
+['bildschirm', 'fahrzeug', 'pkw', 'lkw']
>>> IFIClaimsExpression.pair_to_solr('fulltext', 'bildschirm and (fahrzeug or pkw not lkw)')
-{'query': u'text:(bildschirm AND (fahrzeug OR pkw NOT lkw))'}
+{'query': 'text:(bildschirm AND (fahrzeug OR pkw NOT lkw))'}
>>> IFIClaimsParser('ab:fahrzeug or ab:pkw').keywords
-[u'fahrzeug', u'pkw']
+['fahrzeug', 'pkw']
>>> IFIClaimsParser('ab:fahrzeug not ttl:pkw').keywords
-[u'fahrzeug', u'pkw']
+['fahrzeug', 'pkw']
@@ -96,22 +96,22 @@ Expressions with proximity operators
Queries based on the proximity of words to each other in a document.
>>> IFIClaimsParser('text:((aussto* OR eject* OR pusher*) AND (verriegel* OR lock* OR sperr*))').keywords
-[u'aussto', u'eject', u'pusher', u'verriegel', u'lock', u'sperr']
+['aussto', 'eject', 'pusher', 'verriegel', 'lock', 'sperr']
>>> IFIClaimsParser('{!complexphrase}text:"(aussto* OR eject* OR pusher*) AND (verriegel* OR lock* OR sperr*)"~6').keywords
-[u'aussto', u'eject', u'pusher', u'verriegel', u'lock', u'sperr']
+['aussto', 'eject', 'pusher', 'verriegel', 'lock', 'sperr']
>>> IFIClaimsExpression.pair_to_solr('fulltext', '{!complexphrase}text:"(aussto* OR eject* OR pusher*) AND (verriegel* OR lock* OR sperr*)"~6')
{'query': '{!complexphrase}text:"(aussto* OR eject* OR pusher*) AND (verriegel* OR lock* OR sperr*)"~6'}
>>> IFIClaimsParser('{!complexphrase}text:"parallel* AND schalt*"~6 AND ((ic:F16H006104 OR cpc:F16H006104))').keywords
-[u'parallel', u'schalt', u'F16H61/04']
+['parallel', 'schalt', 'F16H61/04']
>>> IFIClaimsParser('((ic:F16H006104 OR cpc:F16H006104)) AND {!complexphrase}text:"parallel* AND schalt*"~6').keywords
-[u'F16H61/04', u'parallel', u'schalt']
+['F16H61/04', 'parallel', 'schalt']
>>> IFIClaimsParser('{!complexphrase}text:("parallel* AND schalt*"~6 AND "antrieb* AND stufe*"~3)').keywords
-[u'parallel', u'schalt', u'antrieb', u'stufe']
+['parallel', 'schalt', 'antrieb', 'stufe']
@@ -122,17 +122,17 @@ Queries without proper fieldnames like ab=, ti=, bi=, etc. on the left side of t
>>> IFIClaimsParser('bildschirm').dumps()
-u'bildschirm'
+'bildschirm'
>>> IFIClaimsExpression.pair_to_solr('fulltext', 'bildschirm')
-{'query': u'text:bildschirm'}
+{'query': 'text:bildschirm'}
>>> IFIClaimsParser('bildschirm and fahrzeug').dumps()
-u'bildschirm and fahrzeug'
+'bildschirm and fahrzeug'
>>> IFIClaimsExpression.pair_to_solr('fulltext', 'bildschirm and fahrzeug')
-{'query': u'text:(bildschirm AND fahrzeug)'}
+{'query': 'text:(bildschirm AND fahrzeug)'}
@@ -140,22 +140,22 @@ Expressions containing quoted words
===================================
>>> IFIClaimsParser('"bildschirm"').dumps()
-u'"bildschirm"'
+'"bildschirm"'
>>> IFIClaimsParser('"bildschirm"').keywords
[]
>>> IFIClaimsExpression.pair_to_solr('fulltext', '"bildschirm"')
-{'query': u'text:"bildschirm"'}
+{'query': 'text:"bildschirm"'}
>>> IFIClaimsParser('ab:"bildschirm"').dumps()
-u'ab : "bildschirm"'
+'ab : "bildschirm"'
>>> IFIClaimsParser('ab:"bildschirm"').keywords
-[u'bildschirm']
+['bildschirm']
>>> IFIClaimsParser('text:(("aussto*" OR "eject*" OR pusher*) AND (verriegel* OR lock* OR sperr*))').keywords
-[u'aussto', u'eject', u'pusher', u'verriegel', u'lock', u'sperr']
+['aussto', 'eject', 'pusher', 'verriegel', 'lock', 'sperr']
@@ -163,19 +163,19 @@ Keyword extraction
==================
>>> IFIClaimsParser(IFIClaimsExpression.pair_to_solr('class', 'H01F7/00')['query']).keywords
-[u'H01F7/00']
+['H01F7/00']
>>> IFIClaimsParser(IFIClaimsExpression.pair_to_solr('class', 'H01F7/00 not (H01F7/02 or H02K7/1876)')['query']).keywords
-[u'H01F7/00', u'H01F7/02', u'H02K7/1876']
+['H01F7/00', 'H01F7/02', 'H02K7/1876']
>>> IFIClaimsParser(IFIClaimsExpression.pair_to_solr('fulltext', 'bildschirm')['query']).keywords
-[u'bildschirm']
+['bildschirm']
>>> IFIClaimsParser(IFIClaimsExpression.pair_to_solr('fulltext', '"bildschirm"')['query']).keywords
-[u'bildschirm']
+['bildschirm']
>>> IFIClaimsParser(IFIClaimsExpression.pair_to_solr('fulltext', 'ttl:bildschirm OR ab:(fahrzeug OR pkw)')['query']).keywords
-[u'bildschirm', u'fahrzeug', u'pkw']
+['bildschirm', 'fahrzeug', 'pkw']
@@ -185,18 +185,18 @@ From the wild
Umlauts
-------
->>> IFIClaimsParser(u'tac:((*messschieber* OR *meßschieber*) AND *digital* )').dumps()
-u'((tac : *messschieber* or tac : *me\xdfschieber*) and tac : *digital*)'
+>>> IFIClaimsParser('tac:((*messschieber* OR *meßschieber*) AND *digital* )').dumps()
+'((tac : *messschieber* or tac : *me\xdfschieber*) and tac : *digital*)'
->>> IFIClaimsParser(u'tac:((*messschieber* OR *meßschieber*) AND *digital* )').keywords
-[u'messschieber', u'me\xdfschieber', u'digital']
+>>> IFIClaimsParser('tac:((*messschieber* OR *meßschieber*) AND *digital* )').keywords
+['messschieber', 'me\xdfschieber', 'digital']
More
----
->>> IFIClaimsParser(u'ttl:(energy and water) or ab:(waves or Tide) and clm:"90°"').keywords
-[u'energy', u'water', u'waves', u'Tide', u'90\xb0']
+>>> IFIClaimsParser('ttl:(energy and water) or ab:(waves or Tide) and clm:"90°"').keywords
+['energy', 'water', 'waves', 'Tide', '90\xb0']
->>> IFIClaimsParser(u'text:(((bremsgefühl* or pedalgefühl) and (*simulator or simul*)) and (separ* or getrennt* or entkoppel* or entkoppl* or decoupl*) and (eigenständig* or independent* or autonom*))').keywords
-[u'bremsgef\xfchl', u'pedalgef\xfchl', u'simulator', u'simul', u'separ', u'getrennt', u'entkoppel', u'entkoppl', u'decoupl', u'eigenst\xe4ndig', u'independent', u'autonom']
+>>> IFIClaimsParser('text:(((bremsgefühl* or pedalgefühl) and (*simulator or simul*)) and (separ* or getrennt* or entkoppel* or entkoppl* or decoupl*) and (eigenständig* or independent* or autonom*))').keywords
+['bremsgef\xfchl', 'pedalgef\xfchl', 'simulator', 'simul', 'separ', 'getrennt', 'entkoppel', 'entkoppl', 'decoupl', 'eigenst\xe4ndig', 'independent', 'autonom']
diff --git a/patzilla/access/office.py b/patzilla/access/office.py
index 43531d75..da08fb70 100644
--- a/patzilla/access/office.py
+++ b/patzilla/access/office.py
@@ -76,5 +76,5 @@ def jump_office(request):
else:
return url
- return HTTPNotFound(u'Could not locate document "{document_number}" at {office}/{service}.'.format(
+ return HTTPNotFound('Could not locate document "{document_number}" at {office}/{service}.'.format(
document_number=document_number, office=office, service=service))
diff --git a/patzilla/access/sip/client.py b/patzilla/access/sip/client.py
index 9eba41bb..11de635d 100644
--- a/patzilla/access/sip/client.py
+++ b/patzilla/access/sip/client.py
@@ -9,7 +9,7 @@
from patzilla.access.generic.exceptions import NoResultsException, GenericAdapterException
from patzilla.access.generic.search import GenericSearchResponse, GenericSearchClient
from patzilla.access.sip import get_sip_client
-from patzilla.util.data.container import SmartBunch
+from patzilla.util.data.container import SmartMunch
"""
@@ -27,9 +27,9 @@ class SipException(GenericAdapterException):
def __init__(self, *args, **kwargs):
self.sip_info = ''
super(SipException, self).__init__(*args)
- if kwargs.has_key('sip_info'):
+ if 'sip_info' in kwargs:
self.sip_info = kwargs['sip_info']
- if kwargs.has_key('sip_response'):
+ if 'sip_response' in kwargs:
self.sip_info = kwargs['sip_response'].get_childvalue('Info')
if self.sip_info:
self.user_info = self.sip_info
@@ -98,7 +98,7 @@ def logout(self):
def search(self, expression, options=None):
- options = options or SmartBunch()
+ options = options or SmartMunch()
options.setdefault('offset', 0)
options.setdefault('limit', self.pagesize)
@@ -106,7 +106,7 @@ def search(self, expression, options=None):
offset = options.offset
limit = options.limit
- log.info(u"{backend_name}: searching documents, expression='{0}', offset={1}, limit={2}".format(
+ log.info("{backend_name}: searching documents, expression='{0}', offset={1}, limit={2}".format(
expression, offset, limit, **self.__dict__))
if not self.sessionid or self.stale:
@@ -116,11 +116,11 @@ def search(self, expression, options=None):
try:
response = requests.post(self.uri + '/search/new', data={'session': self.sessionid, 'searchtree': expression})
except (ConnectionError, ConnectTimeout) as ex:
- log.error(u'SIP search for user "{username}" at "{uri}" failed. Reason: {0} {1}.'.format(
+ log.error('SIP search for user "{username}" at "{uri}" failed. Reason: {0} {1}.'.format(
ex.__class__, ex.message, username=self.username, uri=self.uri))
self.logout()
raise SearchException(ex.message,
- sip_info=u'Error or timeout while connecting to upstream database. Database might be offline.')
+ sip_info='Error or timeout while connecting to upstream database. Database might be offline.')
# Process search response
if response.status_code == 200:
@@ -129,7 +129,7 @@ def search(self, expression, options=None):
search_response = self._search_parse_xml(response.content)
if search_response['success'] == 'false':
- raise SearchException(u'Search failed', sip_response=search_response['response'])
+ raise SearchException('Search failed', sip_response=search_response['response'])
if 'ResultSetId' in search_response['data']:
@@ -145,7 +145,7 @@ def search(self, expression, options=None):
#print "SIP search results:", search_results
duration = timeit.default_timer() - starttime
- log.info(u'Search succeeded. duration={0}s, search_info={1}'.format(round(duration, 1), search_info))
+ log.info('Search succeeded. duration={0}s, search_info={1}'.format(round(duration, 1), search_info))
upstream_response = {
'info': search_info,
@@ -159,33 +159,33 @@ def search(self, expression, options=None):
duration = round(duration, 1)
# TODO: Unify between SIP and IFI CLAIMS
- log.info(u'{backend_name}: Search succeeded. duration={duration}s, meta=\n{meta}'.format(
+ log.info('{backend_name}: Search succeeded. duration={duration}s, meta=\n{meta}'.format(
duration=duration, meta=result['meta'].prettify(), **self.__dict__))
if not result['numbers']:
- log.warn(u'{backend_name} search from "{user}" for "{expression}" had empty results.'.format(
+ log.warn('{backend_name} search from "{user}" for "{expression}" had empty results.'.format(
user=self.username, expression=expression, **self.__dict__
))
return result
else:
- message = u'Search failed. Reason: Upstream response lacks valid ResultSetId. content={0}'.format(response.text)
- raise SearchException(message, sip_info=u'Search failed. Search response could not be parsed.')
+ message = 'Search failed. Reason: Upstream response lacks valid ResultSetId. content={0}'.format(response.text)
+ raise SearchException(message, sip_info='Search failed. Search response could not be parsed.')
except Exception as ex:
- log.error(u'Search failed. {name}: {message}. expression={expression}, response={response}'.format(
+ log.error('Search failed. {name}: {message}. expression={expression}, response={response}'.format(
name=ex.__class__.__name__, message=ex.message, response=response.text, expression=expression))
raise
else:
response_status = str(response.status_code) + ' ' + response.reason
- message = u'SIP search failed. Reason: response status != 200. status={0}, content={1}'.format(
+ message = 'SIP search failed. Reason: response status != 200. status={0}, content={1}'.format(
response_status,
response.text)
log.error(message)
raise SearchException(message,
- sip_info=u'HTTP error "{status}" while searching upstream database'.format(status=response_status))
+ sip_info='HTTP error "{status}" while searching upstream database'.format(status=response_status))
def getresults(self, resultid, options):
@@ -207,23 +207,23 @@ def getresults(self, resultid, options):
raise SearchException(message)
duration = timeit.default_timer() - starttime
- log.info(u'SIP getresults succeeded. duration={0}s'.format(round(duration, 1)))
+ log.info('SIP getresults succeeded. duration={0}s'.format(round(duration, 1)))
return results
except SearchException:
raise
except Exception as ex:
- message = u'SIP getresults failed. Unknown exception. Reason: {0} {1}'.format(
+ message = 'SIP getresults failed. Unknown exception. Reason: {0} {1}'.format(
ex.__class__, ex.message)
- logmessage = u'{}. response={}'.format(message, response.text)
+ logmessage = '{}. response={}'.format(message, response.text)
log.error(logmessage)
raise SearchException(message)
else:
- message = u'SIP getresults failed. status_code={0}'.format(
+ message = 'SIP getresults failed. status_code={0}'.format(
str(response.status_code) + ' ' + response.reason)
- logmessage = u'{}. response={}'.format(message, response.text)
+ logmessage = '{}. response={}'.format(message, response.text)
log.error(logmessage)
raise SearchException(message)
@@ -243,8 +243,8 @@ def _login_parse_xml(self, xml):
'this happens regularly on Wednesday evenings at 17:00 hours UTC (19:00 hours CEST)
' \
'and usually does not take longer than one hour.'
- if error.sip_info == u'i':
- error.sip_info = u'Login failed'
+ if error.sip_info == 'i':
+ error.sip_info = 'Login failed'
raise error
def _search_parse_xml(self, xml):
@@ -329,15 +329,15 @@ def read(self):
# TODO: Reference from IFI CLAIMS, fill up/unify.
#'time': self.input['time'],
#'status': self.input['status'],
- #'params': SmartBunch.bunchify(self.input['content']['responseHeader']['params']),
- #'pager': SmartBunch.bunchify(self.input['content']['responseHeader'].get('pager', {})),
+ #'params': SmartMunch.munchify(self.input['content']['responseHeader']['params']),
+ #'pager': SmartMunch.munchify(self.input['content']['responseHeader'].get('pager', {})),
})
self.meta.navigator.count_total = int(self.meta.upstream.MemCount)
self.meta.navigator.count_page = len(self.input['results'])
self.meta.navigator.offset = int(self.meta.upstream.Offset)
self.meta.navigator.limit = int(self.meta.upstream.Limit)
- self.meta.navigator.postprocess = SmartBunch()
+ self.meta.navigator.postprocess = SmartMunch()
# Read content
"""
diff --git a/patzilla/access/sip/clientpool.py b/patzilla/access/sip/clientpool.py
index f28a8c3c..1c8e679d 100644
--- a/patzilla/access/sip/clientpool.py
+++ b/patzilla/access/sip/clientpool.py
@@ -4,8 +4,9 @@
import os
from pyramid.httpexceptions import HTTPUnauthorized
-from zope.interface.declarations import implements
+from zope.interface import implementer
from zope.interface.interface import Interface
+from zope.interface import implementer
from patzilla.access.generic.credentials import AbstractCredentialsGetter, DatasourceCredentialsManager
from patzilla.access.sip.client import SipClient
@@ -45,6 +46,8 @@ def from_settings(datasource_settings):
@staticmethod
def from_environment():
+ if not os.environ["SIP_API_USERNAME"] or not os.environ["SIP_API_PASSWORD"]:
+ raise KeyError("SIP_API_USERNAME or SIP_API_PASSWORD is empty")
return {
"api_username": os.environ["SIP_API_USERNAME"],
"api_password": os.environ["SIP_API_PASSWORD"],
@@ -80,13 +83,12 @@ class ISipClientPool(Interface):
pass
+@implementer(ISipClientPool)
class SipClientPool(object):
"""
SIP client pool as Pyramid utility implementation.
"""
- implements(ISipClientPool)
-
def __init__(self, api_uri):
logger.info("Creating upstream client pool for SIP")
self.api_uri = api_uri
@@ -103,3 +105,4 @@ def get(self, identifier, credentials=None, debug=False):
uri=self.api_uri, username=credentials['api_username'], password=credentials['api_password'])
return self.clients.get(identifier)
+
diff --git a/patzilla/access/sip/concordance.py b/patzilla/access/sip/concordance.py
index 210371ab..ac9f44f8 100644
--- a/patzilla/access/sip/concordance.py
+++ b/patzilla/access/sip/concordance.py
@@ -202,7 +202,7 @@ def decode_row(row):
try:
stream = DictReader(csvfile)
- print stream.fieldnames
+ print(stream.fieldnames)
except Exception as ex:
log.error('SIP CPC class map: Reading CSV file {} failed: {}'.format(filename, ex.message))
return
@@ -225,7 +225,7 @@ def decode_row(row):
return
ws = wb.active
- print 'XLSX row 1:', [cell.value for cell in ws.rows[0]]
+ print('XLSX row 1:', [cell.value for cell in ws.rows[0]])
stream = ws.rows[1:20]
#sys.exit(1)
diff --git a/patzilla/access/sip/expression.py b/patzilla/access/sip/expression.py
index b5254f38..cc0e1583 100644
--- a/patzilla/access/sip/expression.py
+++ b/patzilla/access/sip/expression.py
@@ -49,16 +49,16 @@ class SipExpression(object):
}
sip_xml_expression_templates = {
- 'patentnumber': u'{value}',
- 'fulltext': u'{value}',
+ 'patentnumber': '{value}',
+ 'fulltext': '{value}',
#'applicant': u'{value}',
#'inventor': u'{value}',
- 'applicant': u'{value}',
- 'inventor': u'{value}',
+ 'applicant': '{value}',
+ 'inventor': '{value}',
'pubdate': {
- 'both': u'',
- 'startdate': u'',
- 'enddate': u'',
+ 'both': '',
+ 'startdate': '',
+ 'enddate': '',
}
}
@@ -83,11 +83,11 @@ def pair_to_sip_xml(cls, key, value, modifiers):
# {u'fulltext': {u'claim': True, u'abstract': True, u'description': True, u'title': True}
# ->
# {u'fulltext': {u'claim': 'true', u'abstract': 'true', u'description': 'true', u'title': 'true'}
- for modifier_field, modifier_values in modifiers.iteritems():
- if type(modifiers[modifier_field]) is types.DictionaryType:
- for modifier_name, modifier_value in modifiers[modifier_field].iteritems():
+ for modifier_field, modifier_values in modifiers.items():
+ if type(modifiers[modifier_field]) is dict:
+ for modifier_name, modifier_value in modifiers[modifier_field].items():
modifiers[modifier_field][modifier_name] = str(modifier_value).lower()
- elif type(modifiers[modifier_field]) is types.BooleanType:
+ elif type(modifiers[modifier_field]) is bool:
modifiers[modifier_field] = str(modifiers[modifier_field]).lower()
xml_part = None
@@ -99,7 +99,7 @@ def pair_to_sip_xml(cls, key, value, modifiers):
if len(value) == 4 and value.isdigit():
# e.g. 1978
- value = u'within {year}-01-01,{year}-12-31'.format(year=value)
+ value = 'within {year}-01-01,{year}-12-31'.format(year=value)
# e.g. 1990-2014, 1990 - 2014
value = year_range_to_within(value)
@@ -198,13 +198,13 @@ def pair_to_sip_xml(cls, key, value, modifiers):
#print pretty_print(xml_part)
except FulltextDecodingError as ex:
- return {'error': True, 'message': unicode(ex)}
+ return {'error': True, 'message': str(ex)}
except pyparsing.ParseException as ex:
- return {'error': True, 'message': u'' + ex.explanation + '
'}
+ return {'error': True, 'message': '' + ex.explanation + '
'}
except SyntaxError as ex:
- return {'error': True, 'message': u'' + unicode(ex) + '
'}
+ return {'error': True, 'message': '' + str(ex) + '
'}
elif key in cls.sip_xml_expression_templates:
template = cls.sip_xml_expression_templates[key]
@@ -232,7 +232,7 @@ def pair_to_sip_xml(cls, key, value, modifiers):
def compute_modifiers(cls, modifiers):
# prefer defaults (all True), but mixin modifiers from query
- for modifier_field, modifier_values in cls.modifier_defaults.iteritems():
+ for modifier_field, modifier_values in cls.modifier_defaults.items():
if modifier_field in cls.modifier_defaults:
backup = deepcopy(modifiers.get(modifier_field, {}))
modifiers[modifier_field] = cls.modifier_defaults[modifier_field]
@@ -313,8 +313,8 @@ def to_etree(self, expression):
result = self.parser._parser(expression, parseAll=True)
except pyparsing.ParseException as ex:
- ex.explanation = u'%s\n%s\n%s' % (expression, u' ' * ex.loc + u'^\n', ex)
- logger.error(u'\n%s', ex.explanation)
+ ex.explanation = '%s\n%s\n%s' % (expression, ' ' * ex.loc + '^\n', ex)
+ logger.error('\n%s', ex.explanation)
raise
#print 'result:', result, type(result), dir(result)
@@ -487,16 +487,16 @@ def parse(self):
def eexists(element, name):
return element.find(name) is not None
child_constraints =\
- all(map(lambda x: eexists(root, x), ['index', 'binop'])) and \
- any(map(lambda x: eexists(root, x), ['value', 'quotes']))
+ all([eexists(root, x) for x in ['index', 'binop']]) and \
+ any([eexists(root, x) for x in ['value', 'quotes']])
if root.tag == 'parenthesis' and child_constraints:
root.tag = 'term'
# also rewrite all other parenthesis looking like terms
for parens in root.iter('parenthesis'):
child_constraints =\
- all(map(lambda x: eexists(parens, x), ['index', 'binop'])) and\
- any(map(lambda x: eexists(parens, x), ['value', 'quotes', 'or', 'and', 'not']))
+ all([eexists(parens, x) for x in ['index', 'binop']]) and\
+ any([eexists(parens, x) for x in ['value', 'quotes', 'or', 'and', 'not']])
if child_constraints:
parens.tag = 'term'
@@ -522,7 +522,7 @@ def eexists(element, name):
elif boolean_content:
value = self.convert_boolean_nodes(term)
- value = value.replace(u'and not', u'not')
+ value = value.replace('and not', 'not')
# 2. expand triple
@@ -600,7 +600,7 @@ def convert_elements(self, root, element, tags):
# skip elements without a valid representation on this level, e.g. "(ab=fahrzeug or ab=pkw)"
if not value:
return root
- value = value.replace(u'and not', u'not')
+ value = value.replace('and not', 'not')
elif tag in ['near', 'span']:
value = self.convert_proximity_nodes(element_nested)
@@ -628,13 +628,13 @@ def _get_index_binop(self, element):
if index_node is not None:
index = index_node.text
else:
- index = u'bi'
+ index = 'bi'
# 2. binop
if binop_node is not None:
binop = binop_node.text
else:
- binop = u'='
+ binop = '='
return index, binop
@@ -667,14 +667,14 @@ def convert_proximity_nodes(self, container):
# fall back to using already translated "text" nodes
if value:
- expression = map(lambda x: x.text, value)
- map(lambda x: self.keyword_add(x), expression)
+ expression = [x.text for x in value]
+ list(map(lambda x: self.keyword_add(x), expression))
elif text:
- expression = map(lambda x: '({0})'.format(x.text), text)
+ expression = ['({0})'.format(x.text) for x in text]
- expression = u' '.join(expression)
+ expression = ' '.join(expression)
distance = distance[0].text
- value = u'{operator}({expression}, {distance})'.format(operator=container.tag, expression=expression, distance=distance)
+ value = '{operator}({expression}, {distance})'.format(operator=container.tag, expression=expression, distance=distance)
return value
def convert_boolean_nodes(self, node):
@@ -693,7 +693,7 @@ def convert_boolean_nodes(self, node):
elif element.tag == 'parenthesis':
result = self.convert_boolean_nodes(element)
if result:
- result = u'(' + result + u')'
+ result = '(' + result + ')'
child_values.append(result)
elif element.tag in ['near', 'span']:
@@ -706,9 +706,9 @@ def convert_boolean_nodes(self, node):
pass
if len(child_values) == 1 and node.tag == 'not':
- child_values = [u'not ' + child_values[0]]
+ child_values = ['not ' + child_values[0]]
- return u' {0} '.format(node.tag).join(child_values)
+ return ' {0} '.format(node.tag).join(child_values)
def decode_quoted_value(self, element):
"""
@@ -731,15 +731,15 @@ def decode_quoted_value(self, element):
value = element.text
elif element.tag == 'quotes':
- values = map(lambda x: x.text, element.iter('value'))
- value = u'"{0}"'.format(u' '.join(values))
+ values = [x.text for x in element.iter('value')]
+ value = '"{0}"'.format(' '.join(values))
return value
def expand_fulltext(self, value, origin=None, modifiers=None):
triple = value
- origin = origin or u'{0}{1}{2}'.format(*triple)
+ origin = origin or '{0}{1}{2}'.format(*triple)
ft_field, ft_op, ft_value = triple
@@ -753,15 +753,15 @@ def expand_fulltext(self, value, origin=None, modifiers=None):
try:
ft_modifier = SipExpression.fulltext_field_modifier_map[ft_field]
except KeyError:
- message = u'SIP expression "{0}" contains unknown index "{1}".'.format(origin, ft_field)
+ message = 'SIP expression "{0}" contains unknown index "{1}".'.format(origin, ft_field)
logger.warn(message)
raise FulltextDecodingError(message)
ft_modifiers = SipExpression.fulltext_modifiers_off.copy()
- if type(ft_modifier) in types.StringTypes:
+ if type(ft_modifier) in (str,):
ft_modifiers.update({ft_modifier: 'true'})
- elif type(ft_modifier) is types.ListType:
+ elif type(ft_modifier) is list:
for ft_mod_item in ft_modifier:
ft_modifiers.update({ft_mod_item: 'true'})
@@ -776,10 +776,10 @@ def strip_accents(s):
#return ''.join((c for c in unicodedata.normalize('NFD', unicode(s)) if unicodedata.category(c) != 'Mn'))
result = []
for char in s:
- if char.lower() in u'äöüß':
+ if char.lower() in 'äöüß':
result.append(char)
else:
- char_decomposed = unicodedata.normalize('NFD', unicode(char))
+ char_decomposed = unicodedata.normalize('NFD', str(char))
for cd in char_decomposed:
if unicodedata.category(cd) != 'Mn':
result.append(cd)
diff --git a/patzilla/access/sip/pyramid_service.py b/patzilla/access/sip/pyramid_service.py
index 51fb44b5..225a3928 100644
--- a/patzilla/access/sip/pyramid_service.py
+++ b/patzilla/access/sip/pyramid_service.py
@@ -12,7 +12,7 @@
from patzilla.access.sip.client import sip_published_data_search, sip_published_data_crawl, SearchException
from patzilla.access.sip.client import LoginException
from patzilla.util.cql.util import should_be_quoted
-from patzilla.util.data.container import SmartBunch
+from patzilla.util.data.container import SmartMunch
from patzilla.util.python import _exception_traceback
log = logging.getLogger(__name__)
@@ -58,7 +58,7 @@ def sip_published_data_search_handler(request):
# - sorting
# - whether to remove family members
# - whether to return all family members
- options = SmartBunch()
+ options = SmartMunch()
options.update({
'limit': limit,
'offset': offset_remote,
@@ -94,8 +94,8 @@ def sip_published_data_search_handler(request):
return ex.data
except OperationFailure as ex:
- message = unicode(ex)
- message = re.sub(u'namespace: .*', u'', message)
+ message = str(ex)
+ message = re.sub('namespace: .*', '', message)
request.errors.add('sip-search', 'internals', message)
log.error(request.errors)
@@ -126,7 +126,7 @@ def sip_published_data_crawl_handler(request):
if hasattr(ex, 'user_info'):
message = ex.user_info
else:
- message = unicode(ex)
+ message = str(ex)
request.errors.add('sip-crawl', 'crawl', message)
log.error(request.errors)
- log.error(u'query="{0}", exception:\n{1}'.format(query, _exception_traceback()))
+ log.error('query="{0}", exception:\n{1}'.format(query, _exception_traceback()))
diff --git a/patzilla/access/uspto/pdf.py b/patzilla/access/uspto/pdf.py
index a31b8cbf..ef9b7ccc 100644
--- a/patzilla/access/uspto/pdf.py
+++ b/patzilla/access/uspto/pdf.py
@@ -119,7 +119,7 @@ def get_reference_type(document):
Analyze document number to tell application vs. patent (publication, grant) numbers apart.
The basic heuristic is to assume e.g. US2007231208A1 (4+6=10 chars) to be an application.
"""
- if document is None or not (hasattr(document, "number") and isinstance(document.number, (int, str, unicode))):
+ if document is None or not (hasattr(document, "number") and isinstance(document.number, (int, str, bytes))):
raise ValueError("Unknown document reference type: {}".format(document))
number_length = len(str(document.number))
reference_type = None
diff --git a/patzilla/boot/config.py b/patzilla/boot/config.py
index ae75b9f9..c3d02580 100644
--- a/patzilla/boot/config.py
+++ b/patzilla/boot/config.py
@@ -90,7 +90,7 @@ def tmpfile(self, payload, suffix=None):
"""
Create a temporary file with given content.
"""
- tmp = tempfile.NamedTemporaryFile(suffix=suffix)
+ tmp = tempfile.NamedTemporaryFile(mode='w+', suffix=suffix)
self._tmpfiles.append(tmp)
tmp.write(payload)
tmp.flush()
diff --git a/patzilla/navigator/export.py b/patzilla/navigator/export.py
index 8328001e..6bb6251a 100644
--- a/patzilla/navigator/export.py
+++ b/patzilla/navigator/export.py
@@ -15,7 +15,7 @@
from io import BytesIO
from textwrap import dedent
from lxml import etree as ET
-from bunch import bunchify, Bunch
+from munch import munchify, Munch
from json.encoder import JSONEncoder
from zipfile import ZipFile, ZIP_DEFLATED
from collections import OrderedDict
@@ -34,7 +34,7 @@
class Dossier(object):
- summary_template = dedent(u"""
+ summary_template = dedent("""
Summary
The research about »{project_name}«
@@ -53,7 +53,7 @@ class Dossier(object):
""").strip()
def __init__(self, data):
- self.data = bunchify(data)
+ self.data = munchify(data)
self.prepare_dataframes()
self.make_metadata()
@@ -61,7 +61,7 @@ def make_metadata(self):
self.metadata = ReportMetadata()
- self.metadata.set('producer', u'IP Navigator')
+ self.metadata.set('producer', 'IP Navigator')
# Project metadata
self.metadata.set('project_name', self.data.project.name)
@@ -120,7 +120,7 @@ def prepare_dataframes(self):
# Queries
- queries = map(self.query_criteria_smoother, self.data.get('queries', []))
+ queries = list(map(self.query_criteria_smoother, self.data.get('queries', [])))
self.df_queries = pandas.DataFrame(queries, columns=['criteria', 'query_expression', 'result_count', 'datasource', 'created'])
self.df_queries.rename(columns={'query_expression': 'expression', 'result_count': 'hits', 'created': 'timestamp'}, inplace=True)
@@ -155,10 +155,10 @@ def get_summary(self):
def get_metadata(self):
return self.format_with_metadata(
- u'Author: {author_name} <{author_email}>\n'
- u'Created: {project_created}\n'
- u'Updated: {project_modified}\n'
- u'Producer: {producer}')
+ 'Author: {author_name} <{author_email}>\n'
+ 'Created: {project_created}\n'
+ 'Updated: {project_modified}\n'
+ 'Producer: {producer}')
@staticmethod
def to_csv(dataframe):
@@ -189,7 +189,7 @@ def to_zip(self, request=None, options=None):
# TODO: Text representations for biblio, register, family
# TODO: PDF Extracts
- options = options or bunchify({'report': {}, 'media': {}})
+ options = options or munchify({'report': {}, 'media': {}})
# Remove entries with empty/undefined document numbers
@@ -203,7 +203,7 @@ def to_zip(self, request=None, options=None):
with ZipFile(buffer, 'w', ZIP_DEFLATED) as zipfile:
# FIXME: Add TERMS (liability waiver) and more...
- zipfile.writestr('@readme.txt', u'Zip archive created by IP Navigator.')
+ zipfile.writestr('@readme.txt', 'Zip archive created by IP Navigator.')
# Add text summary
zipfile.writestr('@metadata.txt', self.get_metadata().encode('utf-8'))
@@ -224,8 +224,8 @@ def to_zip(self, request=None, options=None):
try:
zipfile.writestr('report/@dossier.pdf', DossierXlsx(self.data).to_pdf(payload=workbook_payload))
except Exception as ex:
- log.error(u'Rendering dossier to PDF failed. ' \
- u'Exception: {ex}\n{trace}'.format(ex=ex, trace=exception_traceback()))
+ log.error('Rendering dossier to PDF failed. ' \
+ 'Exception: {ex}\n{trace}'.format(ex=ex, trace=exception_traceback()))
# Add CSV
if options.report.csv:
@@ -263,7 +263,7 @@ def to_zip(self, request=None, options=None):
if not document or not document.strip():
continue
- log.info(u'Data acquisition for document {document}'.format(document=document))
+ log.info('Data acquisition for document {document}'.format(document=document))
status.setdefault(document, OrderedDict())
patent = decode_patent_number(document)
@@ -272,7 +272,7 @@ def to_zip(self, request=None, options=None):
if options.media.biblio:
try:
biblio_payload = get_ops_biblio_data('publication', document, xml=True)
- zipfile.writestr(u'media/xml/{document}.biblio.xml'.format(document=document), biblio_payload)
+ zipfile.writestr('media/xml/{document}.biblio.xml'.format(document=document), biblio_payload)
status[document]['biblio'] = True
except Exception as ex:
@@ -290,14 +290,14 @@ def to_zip(self, request=None, options=None):
# Write XML
document_number = encode_epodoc_number(patent)
description_payload = ops_description(document_number, xml=True)
- zipfile.writestr(u'media/xml/{document}.description.xml'.format(document=document), description_payload)
+ zipfile.writestr('media/xml/{document}.description.xml'.format(document=document), description_payload)
status[document]['description'] = True
# Write TEXT
with ignored():
text_payload = self.get_fulltext(description_payload, 'description')
if text_payload:
- zipfile.writestr(u'media/txt/{document}.description.txt'.format(document=document), text_payload.encode('utf-8'))
+ zipfile.writestr('media/txt/{document}.description.txt'.format(document=document), text_payload.encode('utf-8'))
except Exception as ex:
self.handle_exception(ex, 'description', document)
@@ -313,14 +313,14 @@ def to_zip(self, request=None, options=None):
# Write XML
document_number = encode_epodoc_number(patent)
claims_payload = ops_claims(document_number, xml=True)
- zipfile.writestr(u'media/xml/{document}.claims.xml'.format(document=document), claims_payload)
+ zipfile.writestr('media/xml/{document}.claims.xml'.format(document=document), claims_payload)
status[document]['claims'] = True
# Write TEXT
with ignored():
text_payload = self.get_fulltext(claims_payload.replace('', '').replace('
', '
'), 'claims')
if text_payload:
- zipfile.writestr(u'media/txt/{document}.claims.txt'.format(document=document), text_payload.encode('utf-8'))
+ zipfile.writestr('media/txt/{document}.claims.txt'.format(document=document), text_payload.encode('utf-8'))
except Exception as ex:
self.handle_exception(ex, 'claims', document)
@@ -332,7 +332,7 @@ def to_zip(self, request=None, options=None):
try:
register_payload = ops_register('publication', document, xml=True)
- zipfile.writestr(u'media/xml/{document}.register.xml'.format(document=document), register_payload)
+ zipfile.writestr('media/xml/{document}.register.xml'.format(document=document), register_payload)
status[document]['register'] = True
except Exception as ex:
@@ -346,7 +346,7 @@ def to_zip(self, request=None, options=None):
try:
document_number = encode_epodoc_number(patent, options={'nokind': True})
family_payload = ops_family_inpadoc('publication', document_number, 'biblio', xml=True)
- zipfile.writestr(u'media/xml/{document}.family.xml'.format(document=document), family_payload)
+ zipfile.writestr('media/xml/{document}.family.xml'.format(document=document), family_payload)
status[document]['family'] = True
except Exception as ex:
@@ -368,20 +368,20 @@ def to_zip(self, request=None, options=None):
delivered_items = []
missing_items = []
- for document, kinds in status.iteritems():
+ for document, kinds in status.items():
delivered = []
missing = []
- for kind, ok in kinds.iteritems():
+ for kind, ok in kinds.items():
if ok:
delivered.append(kind)
else:
missing.append(kind)
if delivered:
- item = u'{document:20}{delivered}'.format(document=document, delivered=u', '.join(delivered))
+ item = '{document:20}{delivered}'.format(document=document, delivered=', '.join(delivered))
delivered_items.append(item)
if missing:
- item = u'{document:20}{missing}'.format(document=document, missing=u', '.join(missing))
+ item = '{document:20}{missing}'.format(document=document, missing=', '.join(missing))
missing_items.append(item)
if delivered_items or missing_items:
@@ -409,13 +409,13 @@ def to_zip(self, request=None, options=None):
def handle_exception(self, ex, service_name, document):
if isinstance(ex, (_JSONError, HTTPError)) and hasattr(ex, 'status_int') and ex.status_int == 404:
- log.warning(u'XML({service_name}, {document}) not found'.format(service_name=service_name, document=document))
+ log.warning('XML({service_name}, {document}) not found'.format(service_name=service_name, document=document))
# Signal exception has been handled (ignored)
return True
else:
- log.warning(u'XML({service_name}, {document}) failed. ' \
- u'Exception:\n{trace}'.format(service_name=service_name, document=document, trace=exception_traceback()))
+ log.warning('XML({service_name}, {document}) failed. ' \
+ 'Exception:\n{trace}'.format(service_name=service_name, document=document, trace=exception_traceback()))
# Signal exception should be re-raised, maybe
return False
@@ -464,7 +464,7 @@ def default(self, o):
return JSONEncoder.default(self, o)
"""
- if isinstance(o, (numpy.bool_,)):
+ if isinstance(o, numpy.bool_):
return bool(o)
raise TypeError(repr(o) + " is not JSON serializable")
@@ -512,9 +512,9 @@ def create(self):
def set_header_footer(self, worksheet):
# http://xlsxwriter.readthedocs.io/example_headers_footers.html
- header = u'&LIP Navigator&RSearch report'
+ header = '&LIP Navigator&RSearch report'
worksheet.set_header(header)
- footer = u'&L&L&D &T&C&A&RPage &P of &N'
+ footer = '&L&L&D &T&C&A&RPage &P of &N'
worksheet.set_footer(footer)
def write_cover_sheet(self):
@@ -529,7 +529,7 @@ def write_cover_sheet(self):
cover_sheet = self.workbook.add_worksheet('cover')
self.set_header_footer(cover_sheet)
- title = u'Dossier »{name}«'.format(name=self.data.project.name)
+ title = 'Dossier »{name}«'.format(name=self.data.project.name)
title_format = self.workbook.add_format({'align': 'center', 'valign': 'vcenter', 'font_size': 17, 'bold': True})
cover_sheet.merge_range('A1:I2', title, title_format)
@@ -545,7 +545,7 @@ def write_cover_sheet(self):
footnote_format = self.workbook.add_format({'font_size': 9})
- footnote = dedent(u"""
+ footnote = dedent("""
Please have a look at the other worksheets in
this workbook for more detailed information about
all queries, comments and document numbers
@@ -554,7 +554,7 @@ def write_cover_sheet(self):
summary = self.generate_with_metadata(self.summary_template, emphasis=blue)
- args = list(summary) + ['\n'] + [footnote_format, u'\n\n' + footnote]
+ args = list(summary) + ['\n'] + [footnote_format, '\n\n' + footnote]
args.append(cell_format)
cover_sheet.write_rich_string('B10', *args)
@@ -571,7 +571,7 @@ def write_numberlist_sheets(self):
sheets['rated'] = self.data.get('collections', {}).get('rated')
sheets['dismissed'] = self.data.get('collections', {}).get('dismissed')
sheets['seen'] = self.data.get('collections', {}).get('seen')
- for sheet_name, entries in sheets.iteritems():
+ for sheet_name, entries in sheets.items():
#print 'entries:'; pprint(entries)
@@ -581,10 +581,10 @@ def write_numberlist_sheets(self):
first = {}
# Create pandas DataFrame
- if type(first) in types.StringTypes:
+ if type(first) in (str,):
df = pandas.DataFrame(entries, columns=['PN'])
- elif isinstance(first, (types.DictionaryType, Bunch)):
+ elif isinstance(first, (dict, Munch)):
df = pandas.DataFrame(entries, columns=['number', 'score', 'timestamp', 'url'])
df.rename(columns={'number': 'document', 'url': 'display'}, inplace=True)
@@ -720,7 +720,7 @@ def to_pdf(self, payload=None):
if process.status_code == 0:
#pdf_name = os.path.join(pdf_path, os.path.basename(xlsx_file.name).replace('.xlsx', '.pdf'))
- payload = file(pdf_path, 'r').read()
+ payload = open(pdf_path, 'rb').read()
#shutil.rmtree(pdf_path)
os.unlink(pdf_path)
return payload
@@ -750,7 +750,7 @@ def set(self, key, value):
# https://stackoverflow.com/questions/17215400/python-format-string-unused-named-arguments/17215533#17215533
def __missing__(self, key):
- return u'n/a'
+ return 'n/a'
# Machinery for monkeypatching XlsxWriter's Worksheet's ``write_url`` method
@@ -763,7 +763,7 @@ def write_url_deduce_title(self, row, col, url, cell_format=None, string=None, t
if string is None:
string = os.path.basename(url)
if tip is None:
- tip = u'Open "{name}" in Patent Navigator'.format(name=string)
+ tip = 'Open "{name}" in Patent Navigator'.format(name=string)
return self.write_url_dist(row, col, url, cell_format=cell_format, string=string, tip=tip)
def workbook_add_sheet_hook(self, name=None):
@@ -810,8 +810,8 @@ def _vgenerate(self, format_string, args, kwargs, used_args, recursion_depth):
obj = self.convert_field(obj, conversion)
# expand the format spec, if needed
- format_spec = self._vformat(format_spec, args, kwargs,
- used_args, recursion_depth-1)
+ #format_spec = self._vformat(format_spec, args, kwargs,
+ #used_args, recursion_depth-1)
# format the object and append to the result
if 'emphasis' in kwargs:
diff --git a/patzilla/navigator/services/__init__.py b/patzilla/navigator/services/__init__.py
index be31f855..8355b43e 100644
--- a/patzilla/navigator/services/__init__.py
+++ b/patzilla/navigator/services/__init__.py
@@ -35,12 +35,11 @@ def handle_generic_exception(request, ex, backend_name, query):
module_name = ex.__class__.__module__
class_name = ex.__class__.__name__
- reason = u'{}.{}: {}'.format(module_name, class_name, ex.message)
+ reason = '{}.{}: {}'.format(module_name, class_name, str(ex))
- logger.critical(u'{backend_name} error: query="{query}", reason={reason}\nresponse:\n{http_response}\nexception:\n{exception}'.format(
- exception=_exception_traceback(), **locals()))
+ logger.exception('{backend_name} error: query="{query}", reason={reason}\nresponse:\n{http_response}'.format(**locals()))
- message = u'An exception occurred while processing your query.
\nReason: {}
\n'.format(reason)
+ message = 'An exception occurred while processing your query.
\nReason: {}
\n'.format(reason)
if module_name == 'pymongo.errors':
message += 'Error connecting to cache database. Please report this problem to us.'
diff --git a/patzilla/navigator/services/admin.py b/patzilla/navigator/services/admin.py
index 28aef93b..62db0094 100644
--- a/patzilla/navigator/services/admin.py
+++ b/patzilla/navigator/services/admin.py
@@ -30,7 +30,7 @@ def admin_users_emails_handler(request):
continue
user_emails.append(user.username.lower())
- payload = u'\n'.join(user_emails)
+ payload = '\n'.join(user_emails)
return Response(payload, content_type='text/plain', charset='utf-8')
diff --git a/patzilla/navigator/services/analytics.py b/patzilla/navigator/services/analytics.py
index 3bce27ce..c73923da 100644
--- a/patzilla/navigator/services/analytics.py
+++ b/patzilla/navigator/services/analytics.py
@@ -3,7 +3,7 @@
import logging
import datetime
import operator
-import HTMLParser
+import html.parser
from arrow.arrow import Arrow
from cornice.service import Service
from dateutil.relativedelta import relativedelta
@@ -63,7 +63,7 @@ def _decode_expression_from_query(request):
# decode query parameters into datasource and criteria
decoded = {}
params = dict(request.params)
- if params.has_key('datasource'):
+ if 'datasource' in params:
decoded['datasource'] = params['datasource'].lower()
del params['datasource']
decoded.update({'criteria': params})
@@ -96,7 +96,7 @@ def __init__(self, datasource, criteria, kind):
if self.kind == self.OLDEST:
- self.date_from = Arrow.fromdatetime(datetime.datetime(1800, 01, 01))
+ self.date_from = Arrow.fromdatetime(datetime.datetime(1800, 0o1, 0o1))
self.date_to = Arrow.fromdatetime(datetime.datetime(1899, 12, 31))
self.factor = +1
@@ -106,7 +106,7 @@ def __init__(self, datasource, criteria, kind):
self.machine.add_transition('step', 'right', 'whole', unless='is_ready', after=['range_shrink'])
elif self.kind == self.NEWEST:
- self.date_from = Arrow.fromdatetime(datetime.datetime(2000, 01, 01))
+ self.date_from = Arrow.fromdatetime(datetime.datetime(2000, 0o1, 0o1))
self.date_to = Arrow.utcnow()
self.date_to += relativedelta(months=12-self.date_to.month, days=31-self.date_to.day)
self.factor = -1
@@ -124,7 +124,7 @@ def __init__(self, datasource, criteria, kind):
def runquery(self):
criteria = self.criteria.copy()
- criteria['pubdate'] = u'within {date_from},{date_to}'.format(
+ criteria['pubdate'] = 'within {date_from},{date_to}'.format(
date_from=self.date_from.format('YYYY-MM-DD'), date_to=self.date_to.format('YYYY-MM-DD'))
query = make_expression_filter({
@@ -199,10 +199,10 @@ def work(self):
debug = False
while True:
if debug:
- print '-' * 42
- print 'state:', self.state
- print 'delta:', self.delta
- print 'querycount:', self.querycount
+ print('-' * 42)
+ print('state:', self.state)
+ print('delta:', self.delta)
+ print('querycount:', self.querycount)
if self.state == 'finished' or self.querycount > 15:
break
self.step()
@@ -294,7 +294,7 @@ def analytics_applicants_distinct_handler(request):
#print 'results:', results
applicants = {}
- htmlparser = HTMLParser.HTMLParser()
+ htmlparser = html.parser.HTMLParser()
for item in results['details']:
applicant = item.get('applicant')
if applicant:
diff --git a/patzilla/navigator/services/depatech.py b/patzilla/navigator/services/depatech.py
index f852a81d..bae75f29 100644
--- a/patzilla/navigator/services/depatech.py
+++ b/patzilla/navigator/services/depatech.py
@@ -14,7 +14,7 @@
from patzilla.util.expression.keywords import keywords_to_response
from patzilla.navigator.services.util import request_to_options
from patzilla.access.generic.exceptions import NoResultsException, SearchException
-from patzilla.util.data.container import SmartBunch
+from patzilla.util.data.container import SmartMunch
from patzilla.util.python import _exception_traceback
log = logging.getLogger(__name__)
@@ -37,7 +37,7 @@
@status_upstream_depatech.get()
def status_upstream_depatech_handler(request):
client = get_depatech_client()
- query = SmartBunch({
+ query = SmartMunch({
'expression': '(PC:DE AND DE:212016000074 AND KI:U1) OR AN:DE212016000074U1 OR NP:DE212016000074U1',
})
data = client.search_real(query)
@@ -53,7 +53,7 @@ def depatech_published_data_search_handler(request):
# Get hold of query expression and filter
expression = request.params.get('expression', '')
filter = request.params.get('filter', '')
- query = SmartBunch({
+ query = SmartMunch({
'syntax': 'lucene',
'expression': expression,
'filter': filter,
@@ -84,7 +84,7 @@ def depatech_published_data_search_handler(request):
# - limit
# - sorting
# - whether to remove family members
- options = SmartBunch()
+ options = SmartMunch()
options.update({
'limit': limit,
'offset': offset_remote,
@@ -103,7 +103,7 @@ def depatech_published_data_search_handler(request):
log.warn(request.errors)
except SyntaxError as ex:
- request.errors.add('depatech-search', 'expression', unicode(ex.msg))
+ request.errors.add('depatech-search', 'expression', str(ex.msg))
log.warn(request.errors)
except SearchException as ex:
@@ -117,7 +117,7 @@ def depatech_published_data_search_handler(request):
return ex.data
except OperationFailure as ex:
- message = unicode(ex)
+ message = str(ex)
request.errors.add('depatech-search', 'internals', message)
log.error(request.errors)
@@ -131,7 +131,7 @@ def depatech_published_data_crawl_handler(request):
"""Crawl published-data at MTC depa.tech"""
# Get hold of query expression and filter
- query = SmartBunch({
+ query = SmartMunch({
'expression': request.params.get('expression', ''),
'filter': request.params.get('filter', ''),
})
@@ -151,6 +151,6 @@ def depatech_published_data_crawl_handler(request):
return result
except Exception as ex:
- request.errors.add('depatech-crawl', 'crawl', unicode(ex))
+ request.errors.add('depatech-crawl', 'crawl', str(ex))
log.error(request.errors)
- log.error(u'query="{0}", exception:\n{1}'.format(query, _exception_traceback()))
+ log.error('query="{0}", exception:\n{1}'.format(query, _exception_traceback()))
diff --git a/patzilla/navigator/services/dpma.py b/patzilla/navigator/services/dpma.py
index a83c99bb..9bc363e0 100644
--- a/patzilla/navigator/services/dpma.py
+++ b/patzilla/navigator/services/dpma.py
@@ -94,7 +94,7 @@ def prepare_search(request):
expression = expression.replace('ikofax:', '')
syntax = 'ikofax'
- log.info(u'DEPATISnet query: {}, syntax: {}'.format(expression, syntax))
+ log.info('DEPATISnet query: {}, syntax: {}'.format(expression, syntax))
# Compute query options, like
# - limit
@@ -112,7 +112,7 @@ def prepare_search(request):
elif syntax == 'ikofax':
search = ikofax_prepare_query(expression)
else:
- request.errors.add('depatisnet-search', 'expression', u'Unknown syntax {}'.format(syntax))
+ request.errors.add('depatisnet-search', 'expression', 'Unknown syntax {}'.format(syntax))
# Propagate keywords to highlighting component
keywords_to_response(request, search=search)
@@ -165,10 +165,10 @@ def depatisnet_published_data_crawl_handler(request):
http_response = None
if hasattr(ex, 'http_response'):
http_response = ex.http_response
- log.error(u'DEPATISnet crawler error: query="{0}", reason={1}\nresponse:\n{2}\nexception:\n{3}'.format(
+ log.error('DEPATISnet crawler error: query="{0}", reason={1}\nresponse:\n{2}\nexception:\n{3}'.format(
query, ex, http_response, _exception_traceback()))
- message = u'An exception occurred while processing your query
Reason: {}'.format(ex)
+ message = 'An exception occurred while processing your query
Reason: {}'.format(ex)
request.errors.add('depatisnet-search', 'crawl', message)
diff --git a/patzilla/navigator/services/ificlaims.py b/patzilla/navigator/services/ificlaims.py
index 2897a109..b7ac45ee 100644
--- a/patzilla/navigator/services/ificlaims.py
+++ b/patzilla/navigator/services/ificlaims.py
@@ -18,7 +18,7 @@
from patzilla.access.ificlaims.api import ificlaims_download, ificlaims_download_multi
from patzilla.access.ificlaims.client import IFIClaimsException, IFIClaimsFormatException, LoginException, ificlaims_search, ificlaims_crawl, ificlaims_client
from patzilla.access.ificlaims.expression import should_be_quoted, IFIClaimsParser
-from patzilla.util.data.container import SmartBunch
+from patzilla.util.data.container import SmartMunch
from patzilla.util.data.zip import zip_multi
from patzilla.util.python import _exception_traceback
@@ -51,7 +51,7 @@
@status_upstream_ificlaims.get()
def status_upstream_ificlaims_handler(request):
client = ificlaims_client()
- query = SmartBunch({
+ query = SmartMunch({
'expression': 'pn:EP0666666',
})
data = client.search_real(query)
@@ -71,7 +71,7 @@ def ificlaims_download_handler(request):
try:
response = ificlaims_download(resource, format, options)
- except IFIClaimsException, ex:
+ except IFIClaimsException as ex:
if type(ex) is IFIClaimsFormatException:
raise HTTPNotFound(ex)
else:
@@ -102,16 +102,16 @@ def ificlaims_deliver_handler(request):
"""Deliver resources from IFI CLAIMS Direct in bulk"""
kind = request.matchdict['kind']
- formats = map(unicode.strip, request.params.get('formats', u'').lower().split(u','))
- numberlist = filter(lambda item: bool(item), map(unicode.strip, re.split('[\n,]', request.params.get('numberlist', u''))))
+ formats = list(map(str.strip, request.params.get('formats', '').lower().split(',')))
+ numberlist = [item for item in map(str.strip, re.split('[\n,]', request.params.get('numberlist', ''))) if bool(item)]
if kind == 'zip':
multi = ificlaims_download_multi(numberlist, formats)
#for entry in multi['results']:
# print 'entry:', entry
- print 'report:'
- print json.dumps(multi['report'], indent=4)
+ print('report:')
+ print(json.dumps(multi['report'], indent=4))
payload = zip_multi(multi)
@@ -138,7 +138,7 @@ def ificlaims_published_data_search_handler(request):
"""Search for published-data at IFI CLAIMS Direct"""
# Get hold of query expression and filter
- query = SmartBunch({
+ query = SmartMunch({
'expression': request.params.get('expression', ''),
'filter': request.params.get('filter', ''),
})
@@ -162,7 +162,7 @@ def ificlaims_published_data_search_handler(request):
# - limit
# - sorting
# - whether to remove family members
- options = SmartBunch()
+ options = SmartMunch()
options.update({
'limit': limit,
'offset': offset_remote,
@@ -181,7 +181,7 @@ def ificlaims_published_data_search_handler(request):
log.warn(request.errors)
except SyntaxError as ex:
- request.errors.add('ificlaims-search', 'expression', unicode(ex.msg))
+ request.errors.add('ificlaims-search', 'expression', str(ex.msg))
log.warn(request.errors)
except SearchException as ex:
@@ -195,7 +195,7 @@ def ificlaims_published_data_search_handler(request):
return ex.data
except OperationFailure as ex:
- message = unicode(ex)
+ message = str(ex)
request.errors.add('ificlaims-search', 'internals', message)
log.error(request.errors)
@@ -209,7 +209,7 @@ def ificlaims_published_data_crawl_handler(request):
"""Crawl published-data at IFI CLAIMS Direct"""
# Get hold of query expression and filter
- query = SmartBunch({
+ query = SmartMunch({
'expression': request.params.get('expression', ''),
'filter': request.params.get('filter', ''),
})
@@ -229,6 +229,6 @@ def ificlaims_published_data_crawl_handler(request):
return result
except Exception as ex:
- request.errors.add('ificlaims-crawl', 'crawl', unicode(ex))
+ request.errors.add('ificlaims-crawl', 'crawl', str(ex))
log.error(request.errors)
- log.error(u'query="{0}", exception:\n{1}'.format(query, _exception_traceback()))
+ log.error('query="{0}", exception:\n{1}'.format(query, _exception_traceback()))
diff --git a/patzilla/navigator/services/ops.py b/patzilla/navigator/services/ops.py
index 08316486..ef9eaf74 100644
--- a/patzilla/navigator/services/ops.py
+++ b/patzilla/navigator/services/ops.py
@@ -89,12 +89,12 @@ def ops_published_data_search_handler(request):
# CQL query string
query = request.params.get('expression', '')
- log.info(u'query raw: %s', query)
+ log.info('query raw: %s', query)
# Transcode CQL query expression
search = cql_prepare_query(query)
- log.info(u'query cql: %s', search.expression)
+ log.info('query cql: %s', search.expression)
# range: x-y, maximum delta is 100, default is 25
range = request.params.get('range')
@@ -136,7 +136,7 @@ def ops_published_data_crawl_handler(request):
# CQL query string
query = request.params.get('expression', '')
- log.info(u'query raw: ' + query)
+ log.info('query raw: ' + query)
# Transcode CQL query expression
search = cql_prepare_query(query)
@@ -144,7 +144,7 @@ def ops_published_data_crawl_handler(request):
# Propagate keywords to highlighting component
keywords_to_response(request, search=search)
- log.info(u'query cql: ' + search.expression)
+ log.info('query cql: ' + search.expression)
chunksize = int(request.params.get('chunksize', '100'))
@@ -154,7 +154,7 @@ def ops_published_data_crawl_handler(request):
return result
except Exception as ex:
- log.error(u'OPS crawler error: query="{0}", reason={1}, Exception was:\n{2}'.format(query, ex, _exception_traceback()))
+ log.error('OPS crawler error: query="{0}", reason={1}, Exception was:\n{2}'.format(query, ex, _exception_traceback()))
request.errors.add('ops-published-data-crawl', 'query', str(ex))
diff --git a/patzilla/navigator/services/util.py b/patzilla/navigator/services/util.py
index 652a78d0..1221fe1f 100644
--- a/patzilla/navigator/services/util.py
+++ b/patzilla/navigator/services/util.py
@@ -5,7 +5,7 @@
import logging
import mimetypes
from pprint import pprint
-from bunch import bunchify
+from munch import munchify
from cornice.service import Service
from pyramid.settings import asbool
from pyramid.threadlocal import get_current_request
@@ -13,7 +13,7 @@
from patzilla.navigator.export import Dossier, DossierXlsx
from patzilla.util.config import read_list
from patzilla.util.cql.util import pair_to_cql
-from patzilla.util.data.container import SmartBunch
+from patzilla.util.data.container import SmartMunch
from patzilla.util.expression.keywords import keywords_from_boolean_expression
from patzilla.util.numbers.numberlists import parse_numberlist, normalize_numbers
from patzilla.util.python import exception_traceback
@@ -55,9 +55,9 @@ def query_expression_util_handler(request):
# TODO: improve error handling
data = request.json
- log.info(u'[{userid}] Expression data: {data}'.format(userid=request.user.userid, data=data))
+ log.info('[{userid}] Expression data: {data}'.format(userid=request.user.userid, data=data))
expression_data = make_expression_filter(data)
- log.info(u'[{userid}] Expression query: {expression_data}'.format(userid=request.user.userid, expression_data=expression_data))
+ log.info('[{userid}] Expression query: {expression_data}'.format(userid=request.user.userid, expression_data=expression_data))
return expression_data
@@ -100,7 +100,7 @@ def make_expression_filter(data):
else:
# Bring criteria in order: Process "fulltext" first
- keys = criteria.keys()
+ keys = list(criteria.keys())
if 'fulltext' in keys:
keys.remove('fulltext')
keys.insert(0, 'fulltext')
@@ -132,7 +132,7 @@ def make_expression_filter(data):
elif datasource == 'sip':
expression_part = SipExpression.pair_to_sip_xml(key, value, modifiers)
if expression_part:
- if expression_part.has_key('keywords'):
+ if 'keywords' in expression_part:
keywords += expression_part['keywords']
else:
keywords += keywords_from_boolean_expression(key, value)
@@ -147,7 +147,7 @@ def make_expression_filter(data):
else:
expression_part = IFIClaimsExpression.pair_to_solr(key, value, modifiers)
if expression_part:
- if expression_part.has_key('keywords'):
+ if 'keywords' in expression_part:
keywords += expression_part['keywords']
else:
keywords += keywords_from_boolean_expression(key, value)
@@ -157,13 +157,13 @@ def make_expression_filter(data):
expression_part = DepaTechExpression.pair_to_elasticsearch(key, value, modifiers)
if expression_part:
- if expression_part.has_key('keywords'):
+ if 'keywords' in expression_part:
keywords += expression_part['keywords']
else:
keywords += keywords_from_boolean_expression(key, value)
# Accumulate expression part
- error_tpl = u'Criteria "{0}: {1}" has invalid format, datasource={2}.'
+ error_tpl = 'Criteria "{0}: {1}" has invalid format, datasource={2}.'
if not expression_part:
message = error_tpl.format(key, value, datasource)
log.warn(message)
@@ -171,7 +171,7 @@ def make_expression_filter(data):
elif 'error' in expression_part:
message = error_tpl.format(key, value, datasource)
- message += u'
' + expression_part['message']
+ message += '
' + expression_part['message']
log.warn(message)
request.errors.add('query-expression-utility-service', 'comfort-form', message)
@@ -181,12 +181,12 @@ def make_expression_filter(data):
expression_parts.append(query)
# Accumulate filter part
- error_tpl = u'Filter "{0}: {1}" has invalid format, datasource={2}.'
+ error_tpl = 'Filter "{0}: {1}" has invalid format, datasource={2}.'
if filter_part:
if 'error' in filter_part:
message = error_tpl.format(key, value, datasource)
- message += u'
' + filter_part['message']
+ message += '
' + filter_part['message']
log.warn(message)
request.errors.add('query-expression-utility-service', 'comfort-form', message)
@@ -251,8 +251,8 @@ def request_to_options(request, options):
options.update({'feature_family_replace': True})
# this is awful, switch to JSON POST
- for key, value in request.params.iteritems():
- if key.startswith(u'query_data[sorting]'):
+ for key, value in request.params.items():
+ if key.startswith('query_data[sorting]'):
key = key.replace('query_data[sorting]', '').replace('[', '').replace(']', '')
options.setdefault('sorting', {})
options['sorting'][key] = value
@@ -288,7 +288,7 @@ def export_util_handler(request):
elif output_kind == 'dossier':
log.info('Starting dossier export to format "{format}"'.format(format=output_format))
- data = bunchify(json.loads(request.params.get('json')))
+ data = munchify(json.loads(request.params.get('json')))
# Debugging
#print 'dossier-data:'; pprint(data.toDict())
@@ -314,10 +314,10 @@ def export_util_handler(request):
payload = dossier.to_zip(request=request, options=data.get('options'))
else:
- return HTTPBadRequest(u'Export format "{format}" is unknown.'.format(format=output_format))
+ return HTTPBadRequest('Export format "{format}" is unknown.'.format(format=output_format))
except Exception as ex:
- message = u'Exporting format "{format}" failed.'.format(format=output_format)
+ message = 'Exporting format "{format}" failed.'.format(format=output_format)
log.error('{message}. Exception:\n{trace}'.format(message=message, trace=exception_traceback()))
return HTTPServerError(message)
@@ -350,7 +350,7 @@ def issue_reporter_handler(request):
report_data = request.json
report_data.setdefault('application', {})
- report = SmartBunch.bunchify(report_data)
+ report = SmartMunch.munchify(report_data)
# Add user information to issue report
user = request.user
@@ -361,7 +361,7 @@ def issue_reporter_handler(request):
user.upstream_credentials = None
# Serialize user object and attach to report
- report.application.user = SmartBunch(json.loads(user.to_json()))
+ report.application.user = SmartMunch(json.loads(user.to_json()))
# Send the whole beast to the standard application log
log.error('Issue report [{targets}]:\n{report}'.format(
diff --git a/patzilla/navigator/settings.py b/patzilla/navigator/settings.py
index 7c809e8a..4f9fc082 100644
--- a/patzilla/navigator/settings.py
+++ b/patzilla/navigator/settings.py
@@ -13,7 +13,8 @@
from patzilla.util.config import read_list, asbool, get_configuration
from patzilla.util.date import datetime_isoformat, unixtime_to_datetime
from patzilla.util.python import _exception_traceback
-from patzilla.util.data.container import SmartBunch
+from patzilla.util.data.container import SmartMunch
+
log = logging.getLogger(__name__)
@@ -51,8 +52,7 @@ def get_application_settings(self):
# TODO: Optimize: Only read once, not on each request!
# FIXME: Maybe do the same what `attach_ops_client` does?
# `if '/static' in event.request.url: return`.
- settings = get_configuration(self.configfile, kind=SmartBunch)
-
+ settings = get_configuration(self.configfile, kind=SmartMunch)
# Add some global settings
settings['software_version'] = __version__
@@ -66,10 +66,10 @@ def get_application_settings(self):
def get_datasource_settings(self, vendor=None):
# Container for datasource settings.
- datasource_settings = SmartBunch({
+ datasource_settings = SmartMunch({
'datasources': [],
- 'datasource': SmartBunch(),
- 'total': SmartBunch.bunchify({'fulltext_countries': [], 'details_countries': []}),
+ 'datasource': SmartMunch(),
+ 'total': SmartMunch.munchify({'fulltext_countries': [], 'details_countries': []}),
})
# Read datasource settings from configuration.
@@ -77,7 +77,7 @@ def get_datasource_settings(self, vendor=None):
datasource_settings.protected_fields = read_list(self.application_settings.get('ip_navigator', {}).get('datasources_protected_fields'))
for datasource in datasource_settings.datasources:
- datasource_info = SmartBunch()
+ datasource_info = SmartMunch()
if vendor is None:
settings_key = 'datasource:{name}'.format(name=datasource)
else:
@@ -88,10 +88,10 @@ def get_datasource_settings(self, vendor=None):
datasource_info.setdefault('fulltext_countries', read_list(ds_settings.get('fulltext_countries', '')))
datasource_info.setdefault('details_enabled', asbool(ds_settings.get('details_enabled', False)))
datasource_info.setdefault('details_countries', read_list(ds_settings.get('details_countries', '')))
- for key, value in ds_settings.iteritems():
+ for key, value in ds_settings.items():
datasource_info.setdefault(key, value)
- datasource_settings.datasource[datasource] = SmartBunch.bunchify(datasource_info)
+ datasource_settings.datasource[datasource] = SmartMunch.munchify(datasource_info)
# Aggregate data for all countries.
datasource_settings.total.fulltext_countries += datasource_info['fulltext_countries']
@@ -101,9 +101,9 @@ def get_datasource_settings(self, vendor=None):
def get_vendor_settings(self):
# Container for vendor settings
- vendor_settings = SmartBunch({
+ vendor_settings = SmartMunch({
'vendors': [],
- 'vendor': SmartBunch(),
+ 'vendor': SmartMunch(),
})
# Read vendor settings from configuration
@@ -122,8 +122,8 @@ def get_vendor_settings(self):
vendor=vendor, configfile=self.configfile))
vendor_info = self.application_settings.get(settings_key, {})
- for key, value in vendor_info.iteritems():
- vendor_info[key] = value.decode('utf-8')
+ for key, value in vendor_info.items():
+ vendor_info[key] = value
if 'hostname_matches' in vendor_info:
vendor_info.hostname_matches = read_list(vendor_info.hostname_matches)
@@ -135,7 +135,7 @@ def get_vendor_settings(self):
vendor_info.datasource_settings = self.get_datasource_settings(vendor)
# Collect all vendor settings.
- vendor_settings.vendor[vendor] = SmartBunch.bunchify(vendor_info)
+ vendor_settings.vendor[vendor] = SmartMunch.munchify(vendor_info)
return vendor_settings
@@ -146,9 +146,9 @@ def get_email_settings(self, vendor):
"""
# Container for email settings
- email_settings = SmartBunch({
+ email_settings = SmartMunch({
'addressbook': [],
- 'content': SmartBunch(),
+ 'content': SmartMunch(),
})
for setting_name in ['addressbook', 'content']:
@@ -160,8 +160,8 @@ def get_email_settings(self, vendor):
if defaults and specific:
thing.update(deepcopy(specific))
- for key, value in thing.items():
- thing[key] = value.decode('utf-8')
+ for key, value in list(thing.items()):
+ thing[key] = value
email_settings[setting_name] = thing
@@ -227,7 +227,7 @@ def effective_vendor(self):
# Skip resolving effective vendor when no vendors are configured at all
if self.registry.vendor_settings is None:
- return SmartBunch()
+ return SmartMunch()
# Select vendor by matching hostnames
vendor_names = self.registry.vendor_settings.vendors
@@ -281,12 +281,12 @@ def theme_settings(self):
'ui.version': software_version_link,
'ui.page.title': vendor.get('page_title', ''), # + ' ' + self.beta_badge,
'ui.page.subtitle': '',
- 'ui.page.footer': 'Data sources: ' + u', '.join(data_source_list),
+ 'ui.page.footer': 'Data sources: ' + ', '.join(data_source_list),
}
# Transfer all properties having designated prefixes 1:1
prefixes = ['ui.', 'feature.']
- for key, value in vendor.iteritems():
+ for key, value in vendor.items():
for prefix in prefixes:
if key.startswith(prefix):
if key.endswith('.enabled'):
@@ -304,10 +304,10 @@ def datasource_settings(self):
Return datasource settings while accounting for sensible settings like API URI and credentials.
"""
request = get_current_request()
- datasource_settings = SmartBunch.bunchify(request.registry.datasource_settings)
+ datasource_settings = SmartMunch.munchify(request.registry.datasource_settings)
if 'protected_fields' in datasource_settings:
for fieldname in datasource_settings.protected_fields:
- for name, settings in datasource_settings.datasource.iteritems():
+ for name, settings in datasource_settings.datasource.items():
if fieldname in settings:
del settings[fieldname]
del datasource_settings['protected_fields']
@@ -363,7 +363,7 @@ def config_parameters(self):
isviewer = 'patentview' in host or 'viewer' in host or 'patview' in host
# 1. don't allow "query" from outside on view-only domains
- if request_params.has_key('query') and isviewer:
+ if 'query' in request_params and isviewer:
log.warning('Parameter "query=%s" not allowed on host "%s", purging it', request_params['query'], host)
del request_params['query']
@@ -388,7 +388,7 @@ def config_parameters(self):
# C. parameter firewall, OUTPUT
# remove "opaque parameter"
- if params.has_key('op'):
+ if 'op' in params:
del params['op']
@@ -409,7 +409,7 @@ def config_parameters(self):
params['datasources_enabled'].append(datasource)
# E. backward-compat amendments
- for key, value in params.iteritems():
+ for key, value in params.items():
if key.startswith('ship_'):
newkey = key.replace('ship_', 'ship-')
params[newkey] = value
diff --git a/patzilla/navigator/tools/browser_database_tool.py b/patzilla/navigator/tools/browser_database_tool.py
index 98e4c8f8..ec1bc5ab 100755
--- a/patzilla/navigator/tools/browser_database_tool.py
+++ b/patzilla/navigator/tools/browser_database_tool.py
@@ -12,17 +12,17 @@
def purge_titles(data):
# Purge "title" attributes from BasketEntry objects
- for name, entity in data['database'].iteritems():
+ for name, entity in data['database'].items():
if name.startswith('BasketEntry'):
if 'title' in entity:
del entity['title']
if 'number' in entity:
- entity['number'] = entity['number'].strip(u'★ ')
+ entity['number'] = entity['number'].strip('★ ')
def purge_numbers_seen(data):
# Purge all BasketEntry objects with "seen==true"
keys = []
- for name, item in data['database'].iteritems():
+ for name, item in data['database'].items():
if name.startswith('BasketEntry/'):
if 'seen' in item and item['seen'] == True:
keys.append(name)
@@ -32,7 +32,7 @@ def purge_numbers_seen(data):
def purge_projects(data):
# Purge "project" attributes from all "Query/..." objects
- for name, item in data['database'].iteritems():
+ for name, item in data['database'].items():
if name.startswith('Query/'):
if 'project' in item:
del item['project']
@@ -51,7 +51,7 @@ def main():
#purge_projects(data)
# Save database file
- print json.dumps(data, indent=4)
+ print(json.dumps(data, indent=4))
if __name__ == '__main__':
diff --git a/patzilla/navigator/util.py b/patzilla/navigator/util.py
index ad77fc2f..df5314a4 100644
--- a/patzilla/navigator/util.py
+++ b/patzilla/navigator/util.py
@@ -6,7 +6,7 @@
def get_exception_message(ex, add_traceback=False):
name = ex.__class__.__name__
- description = '%s: %s' % (name, unicode(ex.message))
+ description = '%s: %s' % (name, str(ex.message))
if add_traceback:
description += '\n' + get_safe_traceback(ex)
return description
@@ -24,7 +24,7 @@ def safe_value(value):
e.g. CaseInsensitiveDict to dict
"""
if hasattr(value, 'items') and callable(value.items):
- return dict(value.items())
+ return dict(list(value.items()))
else:
return value
@@ -35,7 +35,7 @@ def dict_subset(bigdict, *wanted_keys):
def dict_prefix_key(d, prefix):
# prefix keys in dictionary
new = {}
- for key, value in d.iteritems():
+ for key, value in d.items():
key = prefix + key
new[key] = value
return new
@@ -53,7 +53,7 @@ def dict_merge(dct, merge_dct):
:param merge_dct: dct merged into dct
:return: None
"""
- for k, v in merge_dct.iteritems():
+ for k, v in merge_dct.items():
if (k in dct and isinstance(dct[k], dict)
and isinstance(merge_dct[k], collections.Mapping)):
dict_merge(dct[k], merge_dct[k])
diff --git a/patzilla/navigator/views.py b/patzilla/navigator/views.py
index 36050c9b..0389bb7e 100644
--- a/patzilla/navigator/views.py
+++ b/patzilla/navigator/views.py
@@ -137,7 +137,7 @@ def navigator_quick(request):
# Compute query expression
expression = compute_expression(field, value, value2, parameters=request.params)
- print 'quick expression:', expression
+ print('quick expression:', expression)
#return get_redirect_query(request, expression, query_args=query_args)
return get_redirect_query(request, expression)
@@ -150,7 +150,7 @@ def compute_expression(field, value, value2=None, **kwargs):
field = 'pn'
if field in ['cl', 'ipc', 'ic', 'cpc', 'cpci', 'cpca']:
- value = value.replace(u'-', u'/')
+ value = value.replace('-', '/')
quotable = True
if field in ['pa', 'applicant']:
@@ -159,38 +159,38 @@ def compute_expression(field, value, value2=None, **kwargs):
# apply blacklist
blacklist = [
- u'GmbH & Co. KG',
- u'GmbH',
- u' KG',
- u' AG',
- u'& Co.',
+ 'GmbH & Co. KG',
+ 'GmbH',
+ ' KG',
+ ' AG',
+ '& Co.',
]
replacements = {
- u' and ': u' ',
- u' or ': u' ',
- u' not ': u' ',
+ ' and ': ' ',
+ ' or ': ' ',
+ ' not ': ' ',
}
for black in blacklist:
pattern = re.compile(re.escape(black), re.IGNORECASE)
- value = pattern.sub(u'', value).strip()
- for replacement_key, replacement_value in replacements.iteritems():
+ value = pattern.sub('', value).strip()
+ for replacement_key, replacement_value in replacements.items():
#value = value.replace(replacement_key, replacement_value)
pattern = re.compile(replacement_key, re.IGNORECASE)
value = pattern.sub(replacement_value, value).strip()
# make query expression
- parts_raw = re.split(u'[ -]*', value)
+ parts_raw = re.split('[ -]*', value)
umlaut_map = {
- u'ä': u'ae',
- u'ö': u'oe',
- u'ü': u'ue',
- u'Ä': u'Ae',
- u'Ö': u'Oe',
- u'Ü': u'Ue',
- u'ß': u'ss',
+ 'ä': 'ae',
+ 'ö': 'oe',
+ 'ü': 'ue',
+ 'Ä': 'Ae',
+ 'Ö': 'Oe',
+ 'Ü': 'Ue',
+ 'ß': 'ss',
}
def replace_parts(thing):
- for umlaut, replacement in umlaut_map.iteritems():
+ for umlaut, replacement in umlaut_map.items():
thing = thing.replace(umlaut, replacement)
return thing
@@ -198,22 +198,22 @@ def replace_parts(thing):
for part in parts_raw:
# "Alfred H. Schütte" => Alfred Schütte
- if re.match(u'^(\w\.)+$', part):
+ if re.match('^(\w\.)+$', part):
continue
part_normalized = replace_parts(part)
if part != part_normalized:
- part = u'({} or {})'.format(part, part_normalized)
+ part = '({} or {})'.format(part, part_normalized)
parts.append(part)
- value = u' and '.join(parts)
+ value = ' and '.join(parts)
#value = u'({})'.format(value)
- if quotable and u' ' in value:
- value = u'"{0}"'.format(value)
+ if quotable and ' ' in value:
+ value = '"{0}"'.format(value)
- query = u'{field}={value}'.format(**locals())
+ query = '{field}={value}'.format(**locals())
if field in ['pd', 'publicationdate']:
if 'W' in value:
diff --git a/patzilla/util/config/__init__.py b/patzilla/util/config/__init__.py
index 0c94ff3a..e332bccb 100644
--- a/patzilla/util/config/__init__.py
+++ b/patzilla/util/config/__init__.py
@@ -4,7 +4,7 @@
import logging
import sys
from glob import glob
-from ConfigParser import ConfigParser
+from configparser import ConfigParser
logger = logging.getLogger(__name__)
@@ -29,7 +29,7 @@ def get_configuration(*args, **kwargs):
logger.info('Effective configuration files: {}'.format(make_list(used)))
return config
else:
- msg = u'Could not read settings from configuration files: {}'.format(config_files)
+ msg = 'Could not read settings from configuration files: {}'.format(config_files)
logger.critical(msg)
raise ValueError(msg)
@@ -76,22 +76,22 @@ def asbool(s):
s = str(s).strip()
return s.lower() in truthy
-def read_list(string, separator=u','):
+def read_list(string, separator=','):
if string is None:
return []
elif isinstance(string, list):
return string
- result = map(unicode.strip, string.split(separator))
+ result = list(map(str.strip, string.split(separator)))
if len(result) == 1 and not result[0]:
result = []
return result
-def make_list(items, separator=u', '):
+def make_list(items, separator=', '):
return separator.join(items)
def normalize_docopt_options(options):
normalized = {}
- for key, value in options.items():
+ for key, value in list(options.items()):
key = key.strip('--<>')
normalized[key] = value
return normalized
diff --git a/patzilla/util/cql/cheshire3/__init__.py b/patzilla/util/cql/cheshire3/__init__.py
index 35e47286..aad5781f 100644
--- a/patzilla/util/cql/cheshire3/__init__.py
+++ b/patzilla/util/cql/cheshire3/__init__.py
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
# (c) 2014 Andreas Motl, Elmyra UG
-import parser as cheshire3_parser
-from parser import SearchClause, CQLParser, Diagnostic
+from . import parser as cheshire3_parser
+from .parser import SearchClause, CQLParser, Diagnostic
from patzilla.util.numbers.normalize import normalize_patent
@@ -14,7 +14,7 @@ class SmartSearchClause(SearchClause):
def toCQL(self):
text = []
- for p in self.prefixes.keys():
+ for p in list(self.prefixes.keys()):
if (p != ''):
text.append('>%s="%s"' % (p, self.prefixes[p]))
else:
diff --git a/patzilla/util/cql/cheshire3/parser.py b/patzilla/util/cql/cheshire3/parser.py
index 15504717..610af8e9 100644
--- a/patzilla/util/cql/cheshire3/parser.py
+++ b/patzilla/util/cql/cheshire3/parser.py
@@ -19,8 +19,8 @@
from shlex import shlex
from xml.sax.saxutils import escape
-from StringIO import StringIO
-from __builtin__ import isinstance
+from io import StringIO
+from builtins import isinstance
serverChoiceRelation = "="
serverChoiceIndex = "cql.serverchoice"
@@ -75,7 +75,7 @@ def __init__(self):
def toXCQL(self, depth=0):
space = " " * depth
xml = ['{s}\n']
- for p in self.prefixes.keys():
+ for p in list(self.prefixes.keys()):
xml.extend(["{s} \n",
"{s} {name}\n",
"{s} {ident}\n",
@@ -221,7 +221,7 @@ def toCQL(self):
txt = []
if (self.prefixes):
ptxt = []
- for p in self.prefixes.keys():
+ for p in list(self.prefixes.keys()):
if p != '':
ptxt.append('>%s="%s"' % (p, self.prefixes[p]))
else:
@@ -236,7 +236,7 @@ def toCQL(self):
txt.append("sortBy")
for sk in self.sortKeys:
txt.append(sk.toCQL())
- return u"({0})".format(u" ".join(txt))
+ return "({0})".format(" ".join(txt))
def getResultSetId(self, top=None):
if (
@@ -315,7 +315,7 @@ def toXCQL(self, depth=0):
def toCQL(self):
text = []
- for p in self.prefixes.keys():
+ for p in list(self.prefixes.keys()):
if p != '':
text.append('>%s="%s"' % (p, self.prefixes[p]))
else:
@@ -406,7 +406,7 @@ def toXCQL(self, depth=0):
def toCQL(self):
txt = [self.value]
- txt.extend(map(str, self.modifiers))
+ txt.extend(list(map(str, self.modifiers)))
return '/'.join(txt)
@@ -572,7 +572,6 @@ def __init__(self, thing):
shlex.__init__(self, thing)
self.wordchars += "!@#$%^&*-+{}[];,.?|~`:\\"
# self.wordchars += ''.join(map(chr, range(128,254)))
- self.wordchars = self.wordchars.decode('utf-8')
def read_token(self):
"Read a token from the input stream (no pushback or inclusions)"
@@ -774,7 +773,7 @@ def query(self):
left.sortKeys = self.sortQuery()
else:
break
- for p in prefs.keys():
+ for p in list(prefs.keys()):
left.addPrefix(p, prefs[p])
return left
@@ -812,7 +811,7 @@ def subQuery(self):
prefs = self.prefixes()
if (prefs):
object = self.query()
- for p in prefs.keys():
+ for p in list(prefs.keys()):
object.addPrefix(p, prefs[p])
else:
object = self.clause()
@@ -847,7 +846,7 @@ def clause(self):
elif self.currentToken == ">":
prefs = self.prefixes()
object = self.clause()
- for p in prefs.keys():
+ for p in list(prefs.keys()):
object.addPrefix(p, prefs[p])
return object
@@ -915,12 +914,6 @@ def relation(self):
def parse(query):
"""Return a searchClause/triple object from CQL string"""
- if type(query) == str:
- try:
- query = query.decode("utf-8")
- except Exception, e:
- raise
-
q = StringIO(query)
lexer = CQLshlex(q)
parser = CQLParser(lexer)
diff --git a/patzilla/util/cql/cheshire3/test_cheshire3.py b/patzilla/util/cql/cheshire3/test_cheshire3.py
index daab7bb1..89020576 100644
--- a/patzilla/util/cql/cheshire3/test_cheshire3.py
+++ b/patzilla/util/cql/cheshire3/test_cheshire3.py
@@ -54,13 +54,13 @@ def test_value_shortcut_notation_fail(self):
self.do_parse('ti=(foo and bar baz) and pc=qux')
self.assertEqual(
str(cm.exception),
- "info:srw/diagnostic/1/10 [Malformed Query]: Expected Boolean or closing parenthesis but got: u'baz'")
+ "info:srw/diagnostic/1/10 [Malformed Query]: Expected Boolean or closing parenthesis but got: 'baz'")
def test_boolean_german(self):
self.assertEqual(self.do_parse('bi=foo und bi=bar'), '(bi = "foo" und bi = "bar")')
def test_utf8(self):
- self.assertEqual(self.do_parse('ab=radaufstandskraft or ab=radaufstandskräfte?'), u'(ab = "radaufstandskraft" or ab = "radaufstandskr\xe4fte?")')
+ self.assertEqual(self.do_parse('ab=radaufstandskraft or ab=radaufstandskräfte?'), '(ab = "radaufstandskraft" or ab = "radaufstandskr\xe4fte?")')
if __name__ == '__main__':
unittest.main()
diff --git a/patzilla/util/cql/pyparsing/__init__.py b/patzilla/util/cql/pyparsing/__init__.py
index f916d5aa..9bf78eda 100644
--- a/patzilla/util/cql/pyparsing/__init__.py
+++ b/patzilla/util/cql/pyparsing/__init__.py
@@ -45,7 +45,7 @@ def parse(self):
>>> tokens = parse_cql('foo=bar')
>>> tokens
- ([(['foo', u'=', 'bar'], {'triple': [((['foo', u'=', 'bar'], {}), 0)]})], {})
+ ParseResults([ParseResults(['foo', '=', 'bar'], {'triple': ['foo', '=', 'bar']})], {})
"""
@@ -60,13 +60,13 @@ def parse(self):
#if self.logging:
# log.info(u'tokens: %s', tokens.pformat())
- except pyparsing.ParseException as ex:
- ex.explanation = u'%s\n%s\n%s' % (ex.pstr, u' ' * ex.loc + u'^\n', ex)
+ except pyparsing.exceptions.ParseException as ex:
+ ex.explanation = '%s\n%s\n%s' % (ex.pstr, ' ' * ex.loc + '^\n', ex)
#if self.logging:
# log.error('\n%s', ex.explanation)
- log.warning(u'Query expression "{query}" is invalid. ' \
- u'Reason: {reason}\n{location}'.format(
- query=self.cql, reason=unicode(ex), location=ex.explanation))
+ log.warning('Query expression "{query}" is invalid. ' \
+ 'Reason: {reason}\n{location}'.format(
+ query=self.cql, reason=str(ex), location=ex.explanation))
raise
return tokens
diff --git a/patzilla/util/cql/pyparsing/demo.py b/patzilla/util/cql/pyparsing/demo.py
index 47573044..157773c6 100644
--- a/patzilla/util/cql/pyparsing/demo.py
+++ b/patzilla/util/cql/pyparsing/demo.py
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
# (c) 2014 Andreas Motl, Elmyra UG
from . import CQL
-from serializer import tokens_to_cql, expand_shortcut_notation, get_triples, get_keywords, normalize_patentnumbers
+from .serializer import tokens_to_cql, expand_shortcut_notation, get_triples, get_keywords, normalize_patentnumbers
def parse_cql(cql):
c = CQL(cql)
@@ -24,9 +24,9 @@ def enrich_cql(cql):
def dump_results(tokens):
cql = tokens_to_cql(tokens)
- print "=" * 42
- print "tokens:", tokens
- print "cql:", cql
+ print("=" * 42)
+ print("tokens:", tokens)
+ print("cql:", cql)
def rundemo():
@@ -80,11 +80,11 @@ def rundemo():
# B.3. dump all triples
triples = []
get_triples(tokens, triples)
- print "triples:", triples
+ print("triples:", triples)
# B.4. dump all keywords
keywords = get_keywords(triples)
- print "keywords:", keywords
+ print("keywords:", keywords)
if __name__ == '__main__':
diff --git a/patzilla/util/cql/pyparsing/parser.py b/patzilla/util/cql/pyparsing/parser.py
index 12c6cfb2..5ab35363 100644
--- a/patzilla/util/cql/pyparsing/parser.py
+++ b/patzilla/util/cql/pyparsing/parser.py
@@ -21,11 +21,10 @@
Keyword, CaselessKeyword, \
Regex, \
alphas, nums, alphanums, quotedString, \
- oneOf, upcaseTokens, delimitedList, restOfLine, \
+ oneOf, common, delimitedList, restOfLine, \
Forward, Group, Combine, Optional, ZeroOrMore, OneOrMore, \
NotAny, Suppress, FollowedBy, StringEnd, \
ParseResults, ParseException, removeQuotes
-from patzilla.util.cql.pyparsing.util import get_literals
log = logging.getLogger(__name__)
@@ -54,19 +53,19 @@
TODO: maybe extract this to a different place, since ..services is also using it
"""
-wildcards = u'*?#!'
+wildcards = '*?#!'
# - classification terms (IPC, CPC) may contain forward slashes and dashes, e.g. H04L12/433, F17D5-00
# - numeric terms may contain punctuation (,.), e.g. 2.45
# - dates may contain dashes, e.g. M11-2009
-separators = u'/,.-'
+separators = '/,.-'
# limited set of unicode characters
#umlauts = u'äöüÄÖÜß'
# all unicode characters
# http://stackoverflow.com/questions/2339386/python-pyparsing-unicode-characters/2340659#2340659
-unicode_printables = u''.join(unichr(c) for c in xrange(65536) if unichr(c).isalnum() and not unichr(c).isspace())
+unicode_printables = ''.join(chr(c) for c in range(65536) if chr(c).isalnum() and not chr(c).isspace())
# indexchars
indexchars = alphanums + '{}!'
@@ -92,16 +91,17 @@ def __init__(self):
def preconfigure(self):
# Binary comparison operators
- self.cmp_single = u'= != < > <= >='.split()
- self.cmp_perl = u'eq ne lt gt le ge'.split()
- self.cmp_cql = u'exact within encloses all any any/relevant any/rel.lr'.split()
+ self.cmp_single = '= != < > <= >='.split()
+ self.cmp_perl = 'eq ne lt gt le ge'.split()
+ self.cmp_cql = 'exact within encloses all any any/relevant any/rel.lr'.split()
# Boolean operators
# TODO: Configure german operators with DPMAGrammar only
- self.and_ = CaselessKeyword("and") | CaselessKeyword("UND")
- self.or_ = CaselessKeyword("or") | CaselessKeyword("ODER")
- self.not_ = CaselessKeyword("not") | CaselessKeyword("NICHT")
- self.prox_ = CaselessKeyword("prox") | CaselessKeyword("NAHE")
+ self.booleans = ("and", "UND", "or", "ODER", "not", "NICHT", "prox", "NAHE")
+ self.and_ = CaselessKeyword(self.booleans[0]) | CaselessKeyword(self.booleans[1])
+ self.or_ = CaselessKeyword(self.booleans[2]) | CaselessKeyword(self.booleans[3])
+ self.not_ = CaselessKeyword(self.booleans[4]) | CaselessKeyword(self.booleans[5])
+ self.prox_ = CaselessKeyword(self.booleans[6]) | CaselessKeyword(self.booleans[7])
# Neighbourhood term operators
self.neighbourhood_symbols = '(W) (NOTW) (#W) (A) (#A) (P) (L)'.split()
@@ -112,7 +112,6 @@ def configure(self):
self.binop_symbols = self.cmp_single + self.cmp_perl + self.cmp_cql
# Boolean operators
- self.booleans = get_literals(self.and_, self.or_, self.not_, self.prox_)
self.booleans_or = ( self.and_ | self.or_ | self.not_ | self.prox_ )
# Neighbourhood term operators
@@ -134,7 +133,7 @@ def build(self):
# ------------------------------------------
# C. building blocks
# ------------------------------------------
- self.termop = Regex( "|".join(self.neighbourhood_symbols), re.IGNORECASE ).setParseAction( upcaseTokens ).setName("termop")
+ self.termop = Regex( "|".join(self.neighbourhood_symbols), re.IGNORECASE ).setParseAction( common.upcase_tokens ).setName("termop")
termword = Word(self.unicode_printables + self.separators + self.wildcards).setName("term")
termword_termop = (termword + OneOrMore( self.termop + termword ))
diff --git a/patzilla/util/cql/pyparsing/searchparser.py b/patzilla/util/cql/pyparsing/searchparser.py
index 880ebad1..e4b26ae1 100644
--- a/patzilla/util/cql/pyparsing/searchparser.py
+++ b/patzilla/util/cql/pyparsing/searchparser.py
@@ -65,18 +65,27 @@
Set = set
except NameError:
from sets import Set
+
from patzilla.util.cql.pyparsing.parser import separators, wildcards
+import sys
+_IS_PYTHON_3 = (sys.version_info[0] >= 3)
+if _IS_PYTHON_3:
+ Set = set
+else:
+ from sets import Set
+
# define characters comprising a word
#wordchars = alphanums + separators + wildcards
# all unicode characters
# http://stackoverflow.com/questions/2339386/python-pyparsing-unicode-characters/2340659#2340659
-unicode_printables = u''.join(unichr(c) for c in xrange(65536) if unichr(c).isalnum() and not unichr(c).isspace())
-more_chars = u'°'
+unicode_printables = ''.join(chr(c) for c in range(65536) if chr(c).isalnum() and not chr(c).isspace())
+more_chars = '°'
wordchars = unicode_printables + more_chars + separators + wildcards
+
class SearchQueryParser:
def __init__(self):
@@ -272,14 +281,14 @@ class ParserTest(SearchQueryParser):
}
def GetWord(self, word):
- if (self.index.has_key(word)):
+ if (word in self.index):
return self.index[word]
else:
return Set()
def GetWordWildcard(self, word):
result = Set()
- for item in self.index.keys():
+ for item in list(self.index.keys()):
if word == item[0:len(word)]:
result = result.union(self.index[item])
return result
@@ -292,27 +301,27 @@ def GetQuotes(self, search_string, tmp_result):
return result
def GetNot(self, not_set):
- all = Set(self.docs.keys())
+ all = Set(list(self.docs.keys()))
return all.difference(not_set)
def Test(self):
all_ok = True
- for item in self.tests.keys():
- print item
+ for item in list(self.tests.keys()):
+ print(item)
r = self.Parse(item)
e = self.tests[item]
- print 'Result: %s' % r
- print 'Expect: %s' % e
+ print('Result: %s' % r)
+ print('Expect: %s' % e)
if e == r:
- print 'Test OK'
+ print('Test OK')
else:
all_ok = False
- print '>>>>>>>>>>>>>>>>>>>>>>Test ERROR<<<<<<<<<<<<<<<<<<<<<'
- print ''
+ print('>>>>>>>>>>>>>>>>>>>>>>Test ERROR<<<<<<<<<<<<<<<<<<<<<')
+ print('')
return all_ok
if __name__=='__main__':
if ParserTest().Test():
- print 'All tests OK'
+ print('All tests OK')
else:
- print 'One or more tests FAILED'
\ No newline at end of file
+ print('One or more tests FAILED')
diff --git a/patzilla/util/cql/pyparsing/serializer.py b/patzilla/util/cql/pyparsing/serializer.py
index a078b5f8..3abe241b 100644
--- a/patzilla/util/cql/pyparsing/serializer.py
+++ b/patzilla/util/cql/pyparsing/serializer.py
@@ -3,7 +3,7 @@
import re
import types
import logging
-import StringIO
+import io
from pyparsing import ParseResults
from patzilla.util.cql.pyparsing.parser import CQLGrammar
from patzilla.util.cql.pyparsing.util import walk_token_results, token_to_triple
@@ -29,10 +29,10 @@ def tokens_to_cql(tokens):
>>> tokens = parse_cql('foo=bar and baz=(qux or quux)')
>>> tokens_to_cql(tokens)
- u'foo=bar and baz=(qux or quux)'
+ 'foo=bar and baz=(qux or quux)'
"""
- buffer = StringIO.StringIO()
+ buffer = io.StringIO()
tokens_to_cql_buffer(tokens, buffer)
buffer.seek(0)
return buffer.read()
@@ -51,23 +51,23 @@ def tokens_to_cql_buffer(tokens, buffer):
# surround binop with spaces for all operators but equality (=)
if binop != '=':
- triple[1] = u' {0} '.format(binop)
+ triple[1] = ' {0} '.format(binop)
- payload = u''.join(triple)
+ payload = ''.join(triple)
else:
- payload = u''.join(token)
+ payload = ''.join(token)
buffer.write(payload)
elif name.startswith('subquery'):
tokens_to_cql_buffer(token, buffer)
- elif tokentype in types.StringTypes:
+ elif tokentype in (str,):
out = token
# surround all boolean operators with whitespace
if token in grammar.booleans:
- out = u' {0} '.format(token)
+ out = ' {0} '.format(token)
buffer.write(out)
def normalize_patentnumbers(tokens):
@@ -77,7 +77,7 @@ def normalize_patentnumbers(tokens):
>>> tokens = parse_cql('pn=EP666666')
>>> normalize_patentnumbers(tokens)
>>> tokens_to_cql(tokens)
- u'pn=EP0666666'
+ 'pn=EP0666666'
"""
def action(token, index, binop, term):
@@ -99,15 +99,15 @@ def get_keywords(triples, whitelist_indexes=None):
>>> triples = []; get_triples(parse_cql('txt=foo or (bi=bar or bi=baz)'), triples)
>>> get_keywords(triples)
- [u'foo', u'bar', u'baz']
+ ['foo', 'bar', 'baz']
>>> triples = []; get_triples(parse_cql('pa all "central, intelligence, agency"'), triples)
>>> get_keywords(triples)
- [u'central', u'intelligence', u'agency']
+ ['central', 'intelligence', 'agency']
>>> triples = []; get_triples(parse_cql('foo=bar and baz=qux'), triples)
>>> get_keywords(triples, ['baz'])
- [u'qux']
+ ['qux']
"""
keywords = []
@@ -143,11 +143,11 @@ def trim_keywords(keywords):
keywords and a list of keyword elements for multi-term keywords
Example:
- >>> trim_keywords([u'!!!daimler?', u'Misch?(P)?wasser'])
- [u'daimler', [u'Misch', u'wasser']]
+ >>> trim_keywords(['!!!daimler?', 'Misch?(P)?wasser'])
+ ['daimler', ['Misch', 'wasser']]
- >>> trim_keywords([u'"foo"', u'" bar "'])
- [u'foo', u'bar']
+ >>> trim_keywords(['"foo"', '" bar "'])
+ ['foo', 'bar']
"""
keywords_trimmed = []
@@ -164,7 +164,7 @@ def get_triples(tokens, triples):
>>> triples = []; get_triples(parse_cql('foo=bar and baz=(qux or quux)'), triples)
>>> triples
- [['foo', u'=', 'bar'], ['qux'], ['quux']]
+ [['foo', '=', 'bar'], ['qux'], ['quux']]
"""
for token in tokens:
@@ -184,7 +184,7 @@ def expand_shortcut_notation(tokens, index=None, binop=None):
>>> tokens = parse_cql('foo=bar and baz=(qux or quux)')
>>> expand_shortcut_notation(tokens)
>>> tokens_to_cql(tokens)
- u'foo=bar and (baz=qux or baz=quux)'
+ 'foo=bar and (baz=qux or baz=quux)'
"""
for token in tokens:
@@ -200,7 +200,7 @@ def expand_shortcut_notation(tokens, index=None, binop=None):
# If it does, put term inside parenthesis, which got lost while performing shortcut expansion.
if token:
if re.match('.*(?:' + grammar.termop.pattern + ').*', token[0], flags=grammar.termop.flags):
- token[0] = u'({0})'.format(token[0])
+ token[0] = '({0})'.format(token[0])
# Process triple in value shortcut notation (contains only the single term).
# Take action: Insert index and binop from subquery context.
diff --git a/patzilla/util/cql/pyparsing/test/01_spec.rst b/patzilla/util/cql/pyparsing/test/01_spec.rst
index de7571ed..a5395600 100644
--- a/patzilla/util/cql/pyparsing/test/01_spec.rst
+++ b/patzilla/util/cql/pyparsing/test/01_spec.rst
@@ -20,52 +20,52 @@ Simple queries
==============
>>> CQL('dinosaur').dumps()
-u'dinosaur'
+'dinosaur'
>>> CQL('"complete dinosaur"').dumps()
-u'"complete dinosaur"'
+'"complete dinosaur"'
>>> CQL('title = "complete dinosaur"').dumps()
-u'title="complete dinosaur"'
+'title="complete dinosaur"'
>>> CQL('title exact "the complete dinosaur"').dumps()
-u'title exact "the complete dinosaur"'
+'title exact "the complete dinosaur"'
Queries using Boolean logic
===========================
>>> CQL('dinosaur or bird').dumps()
-u'dinosaur or bird'
+'dinosaur or bird'
.. note:: **FIXME: enhance grammar**
>>> #CQL('Palomar assignment and "ice age"').dumps()
>>> CQL('dinosaur not reptile').dumps()
-u'dinosaur not reptile'
+'dinosaur not reptile'
>>> CQL('dinosaur and bird or dinobird').dumps()
-u'dinosaur and bird or dinobird'
+'dinosaur and bird or dinobird'
>>> CQL('(bird or dinosaur) and (feathers or scales)').dumps()
-u'(bird or dinosaur) and (feathers or scales)'
+'(bird or dinosaur) and (feathers or scales)'
>>> CQL('"feathered dinosaur" and (yixian or jehol)').dumps()
-u'"feathered dinosaur" and (yixian or jehol)'
+'"feathered dinosaur" and (yixian or jehol)'
Queries accessing publication indexes
=====================================
>>> CQL('publicationYear < 1980').dumps()
-u'publicationYear < 1980'
+'publicationYear < 1980'
>>> CQL('lengthOfFemur > 2.4').dumps()
-u'lengthOfFemur > 2.4'
+'lengthOfFemur > 2.4'
>>> CQL('bioMass >= 100').dumps()
-u'bioMass >= 100'
+'bioMass >= 100'
Queries based on the proximity of words to each other in a document
@@ -82,17 +82,17 @@ Queries across multiple dimensions
==================================
>>> CQL('date within "2002 2005"').dumps()
-u'date within "2002 2005"'
+'date within "2002 2005"'
>>> CQL('dateRange encloses 2003').dumps()
-u'dateRange encloses 2003'
+'dateRange encloses 2003'
Queries based on relevance
==========================
>>> CQL('subject any/relevant "fish frog"').dumps()
-u'subject any/relevant "fish frog"'
+'subject any/relevant "fish frog"'
>>> CQL('subject any/rel.lr "fish frog"').dumps()
-u'subject any/rel.lr "fish frog"'
+'subject any/rel.lr "fish frog"'
diff --git a/patzilla/util/cql/pyparsing/test/05_misc.rst b/patzilla/util/cql/pyparsing/test/05_misc.rst
index ed175ba6..4ccdce8d 100644
--- a/patzilla/util/cql/pyparsing/test/05_misc.rst
+++ b/patzilla/util/cql/pyparsing/test/05_misc.rst
@@ -15,14 +15,14 @@ Queries with UTF-8 characters
Try parsing a query containing utf-8 characters.
->>> CQL(u'title=molécules').dumps()
-u'title=mol\xe9cules'
+>>> CQL('title=molécules').dumps()
+'title=mol\xe9cules'
->>> CQL(u'inventor="CEGARRA SERRANO JOSÉ MARIANO"').dumps()
-u'inventor="CEGARRA SERRANO JOS\xc9 MARIANO"'
+>>> CQL('inventor="CEGARRA SERRANO JOSÉ MARIANO"').dumps()
+'inventor="CEGARRA SERRANO JOS\xc9 MARIANO"'
->>> CQL(u'ab=radaufstandskraft or ab=radaufstandskräfte?').dumps()
-u'ab=radaufstandskraft or ab=radaufstandskr\xe4fte?'
+>>> CQL('ab=radaufstandskraft or ab=radaufstandskräfte?').dumps()
+'ab=radaufstandskraft or ab=radaufstandskr\xe4fte?'
# TODO: use more esoteric utf-8 characters, e.g. special chars et al.
@@ -30,7 +30,7 @@ Queries using wildcards
=======================
>>> CQL('txt=footw or txt=footw? or txt=footw# or txt=footw! and txt=footw*re').dumps()
-u'txt=footw or txt=footw? or txt=footw# or txt=footw! and txt=footw*re'
+'txt=footw or txt=footw? or txt=footw# or txt=footw! and txt=footw*re'
Query with comments
@@ -41,16 +41,16 @@ Query with comments
... (baz or qux)) -- comment 2
...
... """).dumps()
-u'foo=(bar and (baz or qux))'
+'foo=(bar and (baz or qux))'
Weird queries
=============
>>> CQL(' foobar ').dumps()
-u'foobar'
+'foobar'
>>> CQL('(((foobar)))').dumps()
-u'(((foobar)))'
+'(((foobar)))'
Queries with errors
@@ -61,14 +61,14 @@ Nonsense
>>> CQL('foo bar', logging=False).dumps()
Traceback (most recent call last):
...
-ParseException: Expected end of text (at char 4), (line:1, col:5)
+ParseException: Expected end of text, found 'bar' (at char 4), (line:1, col:5)
Lacking terms
-------------
>>> CQL('foo=', logging=False).dumps()
Traceback (most recent call last):
...
-ParseException: Expected term (at char 4), (line:1, col:5)
+ParseException: Expected end of text, found 'bar' (at char 9), (line:1, col:10)
>>> CQL('foo= and bar=', logging=False).dumps()
Traceback (most recent call last):
@@ -92,12 +92,12 @@ Unknown binops
>>> CQL('foo % bar', logging=False).dumps()
Traceback (most recent call last):
...
-ParseException: Expected end of text (at char 4), (line:1, col:5)
+ParseException: Expected end of text, found 'bar' (at char 4), (line:1, col:5)
Error explanation
-----------------
>>> try:
-... CQL(u'foo bar', logging=False).dumps()
+... CQL('foo bar', logging=False).dumps()
... except Exception as ex:
... ex.explanation
-u'foo bar\n ^\n\nExpected end of text (at char 4), (line:1, col:5)'
+"foo bar\n ^\n\nExpected end of text, found 'bar' (at char 4), (line:1, col:5)"
diff --git a/patzilla/util/cql/pyparsing/test/10_extensions.rst b/patzilla/util/cql/pyparsing/test/10_extensions.rst
index 73c72e1e..02d217bc 100644
--- a/patzilla/util/cql/pyparsing/test/10_extensions.rst
+++ b/patzilla/util/cql/pyparsing/test/10_extensions.rst
@@ -18,13 +18,13 @@ Patent number normalization
First, check parsing and reproducing a query for a publication number without normalization applied:
>>> CQL('pn=EP666666').dumps()
-u'pn=EP666666'
+'pn=EP666666'
Then, check whether normalization works correctly. Here, the EP document number should get zero-padded properly:
>>> CQL('pn=EP666666').normalize_numbers().dumps()
-u'pn=EP0666666'
+'pn=EP0666666'
Keyword extraction
@@ -33,13 +33,13 @@ Keyword extraction
First, make sure the query can actually be parsed:
>>> CQL('bi=greifer and pc=de').dumps()
-u'bi=greifer and pc=de'
+'bi=greifer and pc=de'
Then, check the list of extracted keywords:
>>> CQL('bi=greifer and pc=de').keywords()
-[u'greifer']
+['greifer']
Details
@@ -53,8 +53,8 @@ because index name "pc" is not whitelisted.
We can have a look at the layer below, where raw triples got extracted from the query string,
that's the step just before collecting the keywords:
->>> CQL(u'bi=greifer and pc=de').triples()
-[[u'bi', u'=', u'greifer'], [u'pc', u'=', u'de']]
+>>> CQL('bi=greifer and pc=de').triples()
+[['bi', '=', 'greifer'], ['pc', '=', 'de']]
This shows we also have access to the "pc=de" condition if
there's demand for enhanced query analytics in the future.
@@ -70,13 +70,13 @@ Parse and reproduce a cql query containing a nested expression in value shortcut
Our old token-based parser wasn't capable doing this.
>>> CQL('bi=(socke and (Inlay or Teile)) and pc=de').dumps()
-u'bi=(socke and (Inlay or Teile)) and pc=de'
+'bi=(socke and (Inlay or Teile)) and pc=de'
Expand the value shortcut notation:
>>> CQL('bi=(socke and (Inlay or Teile)) and pc=de').expand_shortcuts().dumps()
-u'(bi=socke and (bi=Inlay or bi=Teile)) and pc=de'
+'(bi=socke and (bi=Inlay or bi=Teile)) and pc=de'
Special operators
@@ -86,7 +86,7 @@ Boolean operators (binops) in german
------------------------------------
>>> CQL('BI=Socke und PA=onion').dumps()
-u'BI=Socke UND PA=onion'
+'BI=Socke UND PA=onion'
@@ -108,7 +108,7 @@ Verbatim reproduction
The query should be reproduced verbatim when not applying any expansion or normalization:
>>> CQL(query).dumps()
-u'pn=(EP666666 or EP666667) or (cpc=H04L12/433 and txt=communication?)'
+'pn=(EP666666 or EP666667) or (cpc=H04L12/433 and txt=communication?)'
Polishing
@@ -116,12 +116,12 @@ Polishing
After shortcut expansion and number normalization, we should see zero-padded EP document numbers:
>>> CQL(query).polish().dumps()
-u'(pn=EP0666666 or pn=EP0666667) or (cpc=H04L12/433 and txt=communication?)'
+'(pn=EP0666666 or pn=EP0666667) or (cpc=H04L12/433 and txt=communication?)'
Terms from conditions for classification- or fulltext-indexes should count towards keywords:
>>> CQL(query).polish().keywords()
-[u'H04L12/433', u'communication']
+['H04L12/433', 'communication']
Details
@@ -130,13 +130,13 @@ Even without polishing the query, the keywords should be the same,
since "cpc" and "txt" conditions both are not in value shortcut notation.
>>> CQL(query).keywords()
-[u'H04L12/433', u'communication']
+['H04L12/433', 'communication']
On the other hand, number normalization for numbers in value shortcut notation
obviously does not work when not having shortcut expansion applied before:
>>> CQL('pn=(EP666666 or EP666667)').normalize_numbers().dumps()
-u'pn=(EP666666 or EP666667)'
+'pn=(EP666666 or EP666667)'
Nesting and keywords
@@ -146,4 +146,4 @@ We especially want to properly extract keywords from nested expressions,
even when they are in value shortcut notation.
>>> CQL('bi=(socke and (Inlay or Teile)) and pc=de').expand_shortcuts().keywords()
-[u'socke', u'Inlay', u'Teile']
+['socke', 'Inlay', 'Teile']
diff --git a/patzilla/util/cql/pyparsing/test/15_ops.rst b/patzilla/util/cql/pyparsing/test/15_ops.rst
index eae17e30..65d244f8 100644
--- a/patzilla/util/cql/pyparsing/test/15_ops.rst
+++ b/patzilla/util/cql/pyparsing/test/15_ops.rst
@@ -20,7 +20,7 @@ Date range
Test date range condition used when extrapolating from vanity url, e.g. /publicationdate/2014W10.
>>> CQL('publicationdate within 2014-03-10,2014-03-16').dumps()
-u'publicationdate within 2014-03-10,2014-03-16'
+'publicationdate within 2014-03-10,2014-03-16'
Examples from OPS reference guide
@@ -37,23 +37,23 @@ CQL examples
Original CQL examples from reference guide.
>>> CQL('ti all "green, energy"').dumps()
-u'ti all "green, energy"'
+'ti all "green, energy"'
.. note:: **FIXME: enhance grammar**
>>> #CQL('ti=green prox/unit=world ti=energy').dumps()
>>> CQL('pd within "20051212 20051214"').dumps()
-u'pd within "20051212 20051214"'
+'pd within "20051212 20051214"'
>>> CQL('pd="20051212 20051214"').dumps()
-u'pd="20051212 20051214"'
+'pd="20051212 20051214"'
>>> CQL('ia any "John, Smith"').dumps()
-u'ia any "John, Smith"'
+'ia any "John, Smith"'
>>> CQL('pn=EP and pr=GB').dumps()
-u'pn=EP and pr=GB'
+'pn=EP and pr=GB'
.. note:: **FIXME: enhance grammar**
@@ -62,19 +62,19 @@ u'pn=EP and pr=GB'
>>> #CQL('(ta=green prox/distance<=3 ta=energy) or (ta=renewable prox/distance<=3 ta=energy)').dumps()
>>> CQL('pa all "central, intelligence, agency" and US').dumps()
-u'pa all "central, intelligence, agency" and US'
+'pa all "central, intelligence, agency" and US'
>>> CQL('pa all "central, intelligence, agency" and US and pd>2000').dumps()
-u'pa all "central, intelligence, agency" and US and pd > 2000'
+'pa all "central, intelligence, agency" and US and pd > 2000'
>>> CQL('pd < 18000101').dumps()
-u'pd < 18000101'
+'pd < 18000101'
>>> CQL('ta=synchroni#ed').dumps()
-u'ta=synchroni#ed'
+'ta=synchroni#ed'
>>> CQL('EP and 2009 and Smith').dumps()
-u'EP and 2009 and Smith'
+'EP and 2009 and Smith'
.. note:: **FIXME: enhance grammar**
@@ -91,23 +91,23 @@ Shortcut notation expansion
All these should not be affected by any query manipulation. Prove that.
>>> CQL('pa all "central, intelligence, agency" and US').polish().dumps()
-u'pa all "central, intelligence, agency" and US'
+'pa all "central, intelligence, agency" and US'
>>> CQL('pa all "central, intelligence, agency" and US and pd>2000').polish().dumps()
-u'pa all "central, intelligence, agency" and US and pd > 2000'
+'pa all "central, intelligence, agency" and US and pd > 2000'
>>> CQL('EP and 2009 and Smith').polish().dumps()
-u'EP and 2009 and Smith'
+'EP and 2009 and Smith'
Keyword extraction
------------------
>>> CQL('pa all "central, intelligence, agency" and US').polish().keywords()
-[u'central', u'intelligence', u'agency']
+['central', 'intelligence', 'agency']
>>> CQL('pa all "central intelligence agency" and US').polish().keywords()
-[u'central', u'intelligence', u'agency']
+['central', 'intelligence', 'agency']
.. note:: **FIXME: enhance parser smartness: follow rules outlined on p. 148, section 4.2. CQL index catalogue**
diff --git a/patzilla/util/cql/pyparsing/test/20_depatisnet.rst b/patzilla/util/cql/pyparsing/test/20_depatisnet.rst
index 3f22af00..176137dd 100644
--- a/patzilla/util/cql/pyparsing/test/20_depatisnet.rst
+++ b/patzilla/util/cql/pyparsing/test/20_depatisnet.rst
@@ -27,19 +27,19 @@ Test some logic operators localized to german.
Getting started
---------------
>>> CQL('bi=(greifer oder bagger)').dumps()
-u'bi=(greifer ODER bagger)'
+'bi=(greifer ODER bagger)'
Made up
-------
Try to understand the query.
->>> CQL(u'bi=((wasser UND Getränk) NICHT (?hahn oder ?zapf oder (kühl? oder ?kühl)))').dumps()
-u'bi=((wasser UND Getr\xe4nk) NICHT (?hahn ODER ?zapf ODER (k\xfchl? ODER ?k\xfchl)))'
+>>> CQL('bi=((wasser UND Getränk) NICHT (?hahn oder ?zapf oder (kühl? oder ?kühl)))').dumps()
+'bi=((wasser UND Getr\xe4nk) NICHT (?hahn ODER ?zapf ODER (k\xfchl? ODER ?k\xfchl)))'
Extract keywords from query.
->>> CQL(u'bi=((wasser UND Getränk) NICHT (?hahn oder ?zapf oder (kühl? oder ?kühl)))').polish().keywords()
-[u'wasser', u'Getr\xe4nk', u'hahn', u'zapf', u'k\xfchl', u'k\xfchl']
+>>> CQL('bi=((wasser UND Getränk) NICHT (?hahn oder ?zapf oder (kühl? oder ?kühl)))').polish().keywords()
+['wasser', 'Getr\xe4nk', 'hahn', 'zapf', 'k\xfchl', 'k\xfchl']
Neighbourhood operators
@@ -50,18 +50,18 @@ Getting started
Try a bareword query string containing a neighbourhood term operator:
->>> CQL(u'L(W)Serine').dumps()
-u'L(W)Serine'
+>>> CQL('L(W)Serine').dumps()
+'L(W)Serine'
Try the same in the context of a real condition (triple):
->>> CQL(u'ab=(L(W)Serine)').dumps()
-u'ab=(L(W)Serine)'
+>>> CQL('ab=(L(W)Serine)').dumps()
+'ab=(L(W)Serine)'
Check this works caseless as well:
->>> CQL(u'L(w)Serine').dumps()
-u'L(W)Serine'
+>>> CQL('L(w)Serine').dumps()
+'L(W)Serine'
Made up
@@ -69,14 +69,14 @@ Made up
Try some more complex queries containing neighbourhood term operators and wildcards.
->>> CQL(u'bi=(Cry1?(L)resist?)').dumps()
-u'bi=(Cry1?(L)resist?)'
+>>> CQL('bi=(Cry1?(L)resist?)').dumps()
+'bi=(Cry1?(L)resist?)'
->>> CQL(u'bi=(Cry1?(5A)tox?)').dumps()
-u'bi=(Cry1?(5A)tox?)'
+>>> CQL('bi=(Cry1?(5A)tox?)').dumps()
+'bi=(Cry1?(5A)tox?)'
->>> CQL(u'bi=(Misch?(P)?wasser)').dumps()
-u'bi=(Misch?(P)?wasser)'
+>>> CQL('bi=(Misch?(P)?wasser)').dumps()
+'bi=(Misch?(P)?wasser)'
@@ -93,91 +93,91 @@ Search examples
---------------
>>> CQL('PA= siemens').dumps()
-u'PA=siemens'
+'PA=siemens'
>>> CQL('PUB= 01.03.2010 UND PA= siemens').dumps()
-u'PUB=01.03.2010 UND PA=siemens'
+'PUB=01.03.2010 UND PA=siemens'
>>> CQL('PA= siemens UND IN= Braun UND PUB>= 01.03.2010').dumps()
-u'PA=siemens UND IN=Braun UND PUB >= 01.03.2010'
+'PA=siemens UND IN=Braun UND PUB >= 01.03.2010'
>>> CQL('PUB= M11-2009 UND PA= daimler?').dumps()
-u'PUB=M11-2009 UND PA=daimler?'
+'PUB=M11-2009 UND PA=daimler?'
->>> CQL(u'AB = !!!lösung').dumps()
-u'AB=!!!l\xf6sung'
+>>> CQL('AB = !!!lösung').dumps()
+'AB=!!!l\xf6sung'
>>> CQL('TI = ###heizung').dumps()
-u'TI=###heizung'
+'TI=###heizung'
>>> CQL('CL = ?fahrzeug').dumps()
-u'CL=?fahrzeug'
+'CL=?fahrzeug'
>>> CQL('BI= (programmabschnitt# UND administra?)').dumps()
-u'BI=(programmabschnitt# UND administra?)'
+'BI=(programmabschnitt# UND administra?)'
>>> CQL('ICB=F17D5/00').dumps()
-u'ICB=F17D5/00'
+'ICB=F17D5/00'
>>> CQL('ICB=F17D5-00').dumps()
-u'ICB=F17D5-00'
+'ICB=F17D5-00'
>>> CQL("ICB='F17D 5/00'").dumps()
-u"ICB='F17D 5/00'"
+"ICB='F17D 5/00'"
>>> CQL('ICB=F17D0005000000').dumps()
-u'ICB=F17D0005000000'
+'ICB=F17D0005000000'
>>> CQL('ICP=F17D5/00M').dumps()
-u'ICP=F17D5/00M'
+'ICP=F17D5/00M'
>>> CQL('ICP=F17D5-00M').dumps()
-u'ICP=F17D5-00M'
+'ICP=F17D5-00M'
>>> CQL("ICP='F17D 5/00 M'").dumps()
-u"ICP='F17D 5/00 M'"
+"ICP='F17D 5/00 M'"
>>> CQL('ICP=F17D000500000M').dumps()
-u'ICP=F17D000500000M'
+'ICP=F17D000500000M'
>>> CQL('ICB=F04D13/?').dumps()
-u'ICB=F04D13/?'
+'ICB=F04D13/?'
>>> CQL('ICB=F04D13-?').dumps()
-u'ICB=F04D13-?'
+'ICB=F04D13-?'
>>> CQL("ICB='F04D 13/?'").dumps()
-u"ICB='F04D 13/?'"
+"ICB='F04D 13/?'"
>>> CQL('ICB=F04D0013?').dumps()
-u'ICB=F04D0013?'
+'ICB=F04D0013?'
Search examples for the proximity operator (NOTW)
-------------------------------------------------
>>> CQL('Bi= (Regler und (mechanische(NOTW)Regler))').dumps()
-u'Bi=(Regler UND (mechanische(NOTW)Regler))'
+'Bi=(Regler UND (mechanische(NOTW)Regler))'
>>> CQL('Bi= (Regler und (mechanische (NOTW) Regler))').dumps()
-u'Bi=(Regler UND (mechanische (NOTW) Regler))'
+'Bi=(Regler UND (mechanische (NOTW) Regler))'
Searches in the text fields "Title", "Abstract", "Description", "Claims", "Full text data"
------------------------------------------------------------------------------------------
>>> CQL('TI = ( DVB(W)T )').dumps()
-u'TI=(DVB(W)T)'
+'TI=(DVB(W)T)'
>>> CQL('Bi= (personalcomputer oder (personal(W)computer))').dumps()
-u'Bi=(personalcomputer ODER (personal(W)computer))'
+'Bi=(personalcomputer ODER (personal(W)computer))'
Searches in the fields "Applicant/owner", "Inventor"
----------------------------------------------------
>>> CQL('PA = ( Anna(L)Huber )').dumps()
-u'PA=(Anna(L)Huber)'
+'PA=(Anna(L)Huber)'
Keywords
@@ -185,7 +185,7 @@ Keywords
Try some more complex queries containing *value shortcut notations*, *neighbourhood term operators* and *wildcards*.
->>> largequery = u"""
+>>> largequery = """
... (PA= siemens UND IN= Braun UND PUB>= 01.03.2010) or
... (PUB=M11-2009 UND PA=daimler?) or
... (AB = (!!!lösung or ###heizung or ?fahrzeug)) or
@@ -195,10 +195,10 @@ Try some more complex queries containing *value shortcut notations*, *neighbourh
... """
>>> CQL(largequery).dumps()
-u"(PA=siemens UND IN=Braun UND PUB >= 01.03.2010) or (PUB=M11-2009 UND PA=daimler?) or (AB=(!!!l\xf6sung or ###heizung or ?fahrzeug)) or (ICB='F17D 5/00' or ICB=F04D13-?) or bi=(mechanische (NOTW) Regler) or bi=(Cry1?(L)resist? or Cry1?(5A)tox? or Misch?(P)?wasser)"
+"(PA=siemens UND IN=Braun UND PUB >= 01.03.2010) or (PUB=M11-2009 UND PA=daimler?) or (AB=(!!!l\xf6sung or ###heizung or ?fahrzeug)) or (ICB='F17D 5/00' or ICB=F04D13-?) or bi=(mechanische (NOTW) Regler) or bi=(Cry1?(L)resist? or Cry1?(5A)tox? or Misch?(P)?wasser)"
>>> CQL(largequery).keywords()
-[u'siemens', u'Braun', u'daimler', u'F17D 5/00', u'F04D13-', [u'mechanische', u'Regler']]
+['siemens', 'Braun', 'daimler', 'F17D 5/00', 'F04D13-', ['mechanische', 'Regler']]
Polishing
@@ -207,20 +207,20 @@ Polishing
Polishing a query, especially the shortcut notation expansion, should not corrupt query syntax.
>>> CQL('TI = ( DVB(W)T )').polish().dumps()
-u'TI=(DVB(W)T)'
+'TI=(DVB(W)T)'
>>> CQL('Bi= (personalcomputer oder (personal(W)computer))').polish().dumps()
-u'(Bi=personalcomputer ODER (Bi=(personal(W)computer)))'
+'(Bi=personalcomputer ODER (Bi=(personal(W)computer)))'
>>> CQL('bi=(Cry1?(L)resist?)').polish().dumps()
-u'bi=(Cry1?(L)resist?)'
+'bi=(Cry1?(L)resist?)'
>>> CQL(largequery).polish().dumps()
-u"(PA=siemens UND IN=Braun UND PUB >= 01.03.2010) or (PUB=M11-2009 UND PA=daimler?) or ((AB=!!!l\xf6sung or AB=###heizung or AB=?fahrzeug)) or (ICB='F17D 5/00' or ICB=F04D13-?) or bi=(mechanische (NOTW) Regler) or (bi=(Cry1?(L)resist?) or bi=(Cry1?(5A)tox?) or bi=(Misch?(P)?wasser))"
+"(PA=siemens UND IN=Braun UND PUB >= 01.03.2010) or (PUB=M11-2009 UND PA=daimler?) or ((AB=!!!l\xf6sung or AB=###heizung or AB=?fahrzeug)) or (ICB='F17D 5/00' or ICB=F04D13-?) or bi=(mechanische (NOTW) Regler) or (bi=(Cry1?(L)resist?) or bi=(Cry1?(5A)tox?) or bi=(Misch?(P)?wasser))"
>>> CQL(largequery).polish().keywords()
-[u'siemens', u'Braun', u'daimler', u'l\xf6sung', u'heizung', u'fahrzeug', u'F17D 5/00', u'F04D13-', [u'mechanische', u'Regler'], [u'Cry1', u'resist'], [u'Cry1', u'tox'], [u'Misch', u'wasser']]
+['siemens', 'Braun', 'daimler', 'l\xf6sung', 'heizung', 'fahrzeug', 'F17D 5/00', 'F04D13-', ['mechanische', 'Regler'], ['Cry1', 'resist'], ['Cry1', 'tox'], ['Misch', 'wasser']]
From the wild
@@ -233,18 +233,18 @@ Query 1
Reproduce verbatim:
->>> print(CQL(u'(ab=radaufstandskraft or ab=radaufstandskräfte?)').dumps())
+>>> print(CQL('(ab=radaufstandskraft or ab=radaufstandskräfte?)').dumps())
(ab=radaufstandskraft or ab=radaufstandskräfte?)
Reproduce with polishing:
->>> print(CQL(u'(ab=radaufstandskraft or ab=radaufstandskräfte?)').polish().dumps())
+>>> print(CQL('(ab=radaufstandskraft or ab=radaufstandskräfte?)').polish().dumps())
(ab=radaufstandskraft or ab=radaufstandskräfte?)
Extract keywords after polishing:
->>> CQL(u'(ab=radaufstandskraft or ab=radaufstandskräfte?)').polish().keywords()
-[u'radaufstandskraft', u'radaufstandskr\xe4fte']
+>>> CQL('(ab=radaufstandskraft or ab=radaufstandskräfte?)').polish().keywords()
+['radaufstandskraft', 'radaufstandskr\xe4fte']
Query 2
@@ -252,18 +252,18 @@ Query 2
Reproduce verbatim:
->>> print(CQL(u'bi=( ( warm(P)walzen) AND ( band(P)mitte and messung) ) oder bi=( ( warm and walzen) AND ( band and säbel and messung) ) oder bi=((warm and walzen)and (mitten und messung)) oder BI =((reversiergerüst)und(breitenmessung))').dumps())
+>>> print(CQL('bi=( ( warm(P)walzen) AND ( band(P)mitte and messung) ) oder bi=( ( warm and walzen) AND ( band and säbel and messung) ) oder bi=((warm and walzen)and (mitten und messung)) oder BI =((reversiergerüst)und(breitenmessung))').dumps())
bi=((warm(P)walzen) and (band(P)mitte and messung)) ODER bi=((warm and walzen) and (band and säbel and messung)) ODER bi=((warm and walzen) and (mitten UND messung)) ODER BI=((reversiergerüst) UND (breitenmessung))
Reproduce with polishing:
->>> print(CQL(u'bi=( ( warm(P)walzen) AND ( band(P)mitte and messung) ) oder bi=( ( warm and walzen) AND ( band and säbel and messung) ) oder bi=((warm and walzen)and (mitten und messung)) oder BI =((reversiergerüst)und(breitenmessung))').polish().dumps())
+>>> print(CQL('bi=( ( warm(P)walzen) AND ( band(P)mitte and messung) ) oder bi=( ( warm and walzen) AND ( band and säbel and messung) ) oder bi=((warm and walzen)and (mitten und messung)) oder BI =((reversiergerüst)und(breitenmessung))').polish().dumps())
((bi=(warm(P)walzen)) and (bi=(band(P)mitte) and bi=messung)) ODER ((bi=warm and bi=walzen) and (bi=band and bi=säbel and bi=messung)) ODER ((bi=warm and bi=walzen) and (bi=mitten UND bi=messung)) ODER ((BI=reversiergerüst) UND (BI=breitenmessung))
Extract keywords after polishing:
->>> CQL(u'bi=( ( warm(P)walzen) AND ( band(P)mitte and messung) ) oder bi=( ( warm and walzen) AND ( band and säbel and messung) ) oder bi=((warm and walzen)and (mitten und messung)) oder BI =((reversiergerüst)und(breitenmessung))').polish().keywords()
-[[u'warm', u'walzen'], [u'band', u'mitte'], u'messung', u'warm', u'walzen', u'band', u's\xe4bel', u'messung', u'warm', u'walzen', u'mitten', u'messung', u'reversierger\xfcst', u'breitenmessung']
+>>> CQL('bi=( ( warm(P)walzen) AND ( band(P)mitte and messung) ) oder bi=( ( warm and walzen) AND ( band and säbel and messung) ) oder bi=((warm and walzen)and (mitten und messung)) oder BI =((reversiergerüst)und(breitenmessung))').polish().keywords()
+[['warm', 'walzen'], ['band', 'mitte'], 'messung', 'warm', 'walzen', 'band', 's\xe4bel', 'messung', 'warm', 'walzen', 'mitten', 'messung', 'reversierger\xfcst', 'breitenmessung']
Query 3
@@ -271,18 +271,18 @@ Query 3
Reproduce verbatim:
->>> print(CQL(u'bi=( ( hot(P)rolling) AND ( strip(P)center and measurement) oder ( hot and rolling) AND ( strip and camber and measurement) ) oder bi=((reversing and mill)and (camber)) ODER bi=( ( hot and steel) AND (center and measurement) ) ODER BI =((hot(P)slab) und(position(P)measurement)) ODER BI =((hot(P)strip) und(position(P)measurement))').dumps())
+>>> print(CQL('bi=( ( hot(P)rolling) AND ( strip(P)center and measurement) oder ( hot and rolling) AND ( strip and camber and measurement) ) oder bi=((reversing and mill)and (camber)) ODER bi=( ( hot and steel) AND (center and measurement) ) ODER BI =((hot(P)slab) und(position(P)measurement)) ODER BI =((hot(P)strip) und(position(P)measurement))').dumps())
bi=((hot(P)rolling) and (strip(P)center and measurement) ODER (hot and rolling) and (strip and camber and measurement)) ODER bi=((reversing and mill) and (camber)) ODER bi=((hot and steel) and (center and measurement)) ODER BI=((hot(P)slab) UND (position(P)measurement)) ODER BI=((hot(P)strip) UND (position(P)measurement))
Reproduce with polishing:
->>> print(CQL(u'bi=( ( hot(P)rolling) AND ( strip(P)center and measurement) oder ( hot and rolling) AND ( strip and camber and measurement) ) oder bi=((reversing and mill)and (camber)) ODER bi=( ( hot and steel) AND (center and measurement) ) ODER BI =((hot(P)slab) und(position(P)measurement)) ODER BI =((hot(P)strip) und(position(P)measurement))').polish().dumps())
+>>> print(CQL('bi=( ( hot(P)rolling) AND ( strip(P)center and measurement) oder ( hot and rolling) AND ( strip and camber and measurement) ) oder bi=((reversing and mill)and (camber)) ODER bi=( ( hot and steel) AND (center and measurement) ) ODER BI =((hot(P)slab) und(position(P)measurement)) ODER BI =((hot(P)strip) und(position(P)measurement))').polish().dumps())
((bi=(hot(P)rolling)) and (bi=(strip(P)center) and bi=measurement) ODER (bi=hot and bi=rolling) and (bi=strip and bi=camber and bi=measurement)) ODER ((bi=reversing and bi=mill) and (bi=camber)) ODER ((bi=hot and bi=steel) and (bi=center and bi=measurement)) ODER ((BI=(hot(P)slab)) UND (BI=(position(P)measurement))) ODER ((BI=(hot(P)strip)) UND (BI=(position(P)measurement)))
Extract keywords after polishing:
->>> CQL(u'bi=( ( hot(P)rolling) AND ( strip(P)center and measurement) oder ( hot and rolling) AND ( strip and camber and measurement) ) oder bi=((reversing and mill)and (camber)) ODER bi=( ( hot and steel) AND (center and measurement) ) ODER BI =((hot(P)slab) und(position(P)measurement)) ODER BI =((hot(P)strip) und(position(P)measurement))').polish().keywords()
-[[u'hot', u'rolling'], [u'strip', u'center'], u'measurement', u'hot', u'rolling', u'strip', u'camber', u'measurement', u'reversing', u'mill', u'camber', u'hot', u'steel', u'center', u'measurement', [u'hot', u'slab'], [u'position', u'measurement'], [u'hot', u'strip'], [u'position', u'measurement']]
+>>> CQL('bi=( ( hot(P)rolling) AND ( strip(P)center and measurement) oder ( hot and rolling) AND ( strip and camber and measurement) ) oder bi=((reversing and mill)and (camber)) ODER bi=( ( hot and steel) AND (center and measurement) ) ODER BI =((hot(P)slab) und(position(P)measurement)) ODER BI =((hot(P)strip) und(position(P)measurement))').polish().keywords()
+[['hot', 'rolling'], ['strip', 'center'], 'measurement', 'hot', 'rolling', 'strip', 'camber', 'measurement', 'reversing', 'mill', 'camber', 'hot', 'steel', 'center', 'measurement', ['hot', 'slab'], ['position', 'measurement'], ['hot', 'strip'], ['position', 'measurement']]
Query 4
@@ -290,15 +290,15 @@ Query 4
Reproduce verbatim:
->>> print(CQL(u'BI=((finne? or (flying(1a)buttress?) or fins or effillee?) and (viergelenk? or mehrgelenk? or quadrilateral? or quadruple? or (four(w)joint) or quadrilaterale or quatre))').dumps())
+>>> print(CQL('BI=((finne? or (flying(1a)buttress?) or fins or effillee?) and (viergelenk? or mehrgelenk? or quadrilateral? or quadruple? or (four(w)joint) or quadrilaterale or quatre))').dumps())
BI=((finne? or (flying(1A)buttress?) or fins or effillee?) and (viergelenk? or mehrgelenk? or quadrilateral? or quadruple? or (four(W)joint) or quadrilaterale or quatre))
Reproduce with polishing:
->>> print(CQL(u'BI=((finne? or (flying(1a)buttress?) or fins or effillee?) and (viergelenk? or mehrgelenk? or quadrilateral? or quadruple? or (four(w)joint) or quadrilaterale or quatre))').polish().dumps())
+>>> print(CQL('BI=((finne? or (flying(1a)buttress?) or fins or effillee?) and (viergelenk? or mehrgelenk? or quadrilateral? or quadruple? or (four(w)joint) or quadrilaterale or quatre))').polish().dumps())
((BI=finne? or (BI=(flying(1A)buttress?)) or BI=fins or BI=effillee?) and (BI=viergelenk? or BI=mehrgelenk? or BI=quadrilateral? or BI=quadruple? or (BI=(four(W)joint)) or BI=quadrilaterale or BI=quatre))
Extract keywords after polishing:
->>> CQL(u'BI=((finne? or (flying(1a)buttress?) or fins or effillee?) and (viergelenk? or mehrgelenk? or quadrilateral? or quadruple? or (four(w)joint) or quadrilaterale or quatre))').polish().keywords()
-[u'finne', [u'flying', u'buttress'], u'fins', u'effillee', u'viergelenk', u'mehrgelenk', u'quadrilateral', u'quadruple', [u'four', u'joint'], u'quadrilaterale', u'quatre']
+>>> CQL('BI=((finne? or (flying(1a)buttress?) or fins or effillee?) and (viergelenk? or mehrgelenk? or quadrilateral? or quadruple? or (four(w)joint) or quadrilaterale or quatre))').polish().keywords()
+['finne', ['flying', 'buttress'], 'fins', 'effillee', 'viergelenk', 'mehrgelenk', 'quadrilateral', 'quadruple', ['four', 'joint'], 'quadrilaterale', 'quatre']
diff --git a/patzilla/util/cql/pyparsing/test/30_ificlaims.rst b/patzilla/util/cql/pyparsing/test/30_ificlaims.rst
index 3659f21e..0385cfc7 100644
--- a/patzilla/util/cql/pyparsing/test/30_ificlaims.rst
+++ b/patzilla/util/cql/pyparsing/test/30_ificlaims.rst
@@ -29,16 +29,16 @@ Test some logic operators localized to german.
Getting started
---------------
>>> CQL('pnctry:EP AND text:vibrat*').dumps()
-u'pnctry : EP and text : vibrat*'
+'pnctry : EP and text : vibrat*'
Made up
-------
Try to understand the query.
->>> CQL(u'(pnctry:EP and (pnctry:EP AND text:vibrat* AND (ic:G01F000184 OR cpc:G01F000184)))').dumps()
-u'(pnctry : EP and (pnctry : EP and text : vibrat* and (ic : G01F000184 or cpc : G01F000184)))'
+>>> CQL('(pnctry:EP and (pnctry:EP AND text:vibrat* AND (ic:G01F000184 OR cpc:G01F000184)))').dumps()
+'(pnctry : EP and (pnctry : EP and text : vibrat* and (ic : G01F000184 or cpc : G01F000184)))'
Extract keywords from query.
->>> CQL(u'(pnctry:EP and (pnctry:EP AND text:vibrat* AND (ic:G01F000184 OR cpc:G01F000184)))').polish().keywords()
-[u'vibrat', u'G01F000184', u'G01F000184']
+>>> CQL('(pnctry:EP and (pnctry:EP AND text:vibrat* AND (ic:G01F000184 OR cpc:G01F000184)))').polish().keywords()
+['vibrat', 'G01F000184', 'G01F000184']
diff --git a/patzilla/util/cql/pyparsing/util.py b/patzilla/util/cql/pyparsing/util.py
index a202a99b..199a804c 100644
--- a/patzilla/util/cql/pyparsing/util.py
+++ b/patzilla/util/cql/pyparsing/util.py
@@ -2,14 +2,6 @@
# (c) 2014-2016 Andreas Motl, Elmyra UG
from pyparsing import ParseResults
-def get_literals(*elements):
- literals = []
- for element in elements:
- for literal in element:
- literal = unicode(literal).strip('"').strip("'")
- literals.append(literal)
- return literals
-
def walk_token_results(tokens, *args, **kwargs):
for token in tokens:
diff --git a/patzilla/util/cql/util.py b/patzilla/util/cql/util.py
index 07787f47..d117bf5f 100644
--- a/patzilla/util/cql/util.py
+++ b/patzilla/util/cql/util.py
@@ -15,7 +15,7 @@ def pair_to_cql(datasource, key, value):
return
cql_part = None
- format = u'{0}=({1})'
+ format = '{0}=({1})'
# Special processing rules for depatisnet
if datasource == 'depatisnet':
@@ -94,7 +94,7 @@ def pair_to_cql(datasource, key, value):
if key == 'inventor' or key == 'applicant':
if not has_booleans(value) and should_be_quoted(value):
- value = u'"{0}"'.format(value)
+ value = '"{0}"'.format(value)
if key == 'pubdate':
diff --git a/patzilla/util/crypto/jwt.py b/patzilla/util/crypto/jwt.py
index 204727ba..194728f4 100644
--- a/patzilla/util/crypto/jwt.py
+++ b/patzilla/util/crypto/jwt.py
@@ -1,13 +1,15 @@
# -*- coding: utf-8 -*-
-# (c) 2014-2022 Andreas Motl
-from __future__ import absolute_import
+
+# (c) 2014 Andreas Motl, Elmyra UG
+
import logging
from datetime import datetime, timedelta
import python_jwt
from jwcrypto import jwk
from zope.interface.interface import Interface
-from zope.interface.declarations import implements
+#from zope.interface.declarations import implements
+from zope.interface import implementer
log = logging.getLogger(__name__)
@@ -16,6 +18,7 @@ class ISigner(Interface):
pass
+@implementer(ISigner)
class JwtSigner(object):
"""
Generate and verify JSON Web Tokens.
@@ -26,7 +29,7 @@ class JwtSigner(object):
- https://jwcrypto.readthedocs.io/
"""
- implements(ISigner)
+# py27 implements(ISigner)
def __init__(self, key=None, ttl=None):
self.key = key
@@ -86,7 +89,7 @@ def unsign(self, token):
iat_skew=timedelta(minutes=5),
)
- if not payload.has_key('data'):
+ if 'data' not in payload:
error_payload = {
'location': 'JSON Web Token',
'name': self.__class__.__name__,
diff --git a/patzilla/util/data/container.py b/patzilla/util/data/container.py
index d9c06532..22b3b05e 100644
--- a/patzilla/util/data/container.py
+++ b/patzilla/util/data/container.py
@@ -2,11 +2,11 @@
# (c) 2016 Andreas Motl, Elmyra UG
import json
import types
-from bunch import Bunch
+from munch import Munch
from jsonpointer import JsonPointer
-class SmartBunch(Bunch):
+class SmartMunch(Munch):
def dump(self):
return self.toJSON()
@@ -18,15 +18,15 @@ def prettify(self):
return self.pretty()
@classmethod
- def bunchify(cls, x):
+ def munchify(cls, x):
"""
- Recursively transforms a dictionary into a SmartBunch via copy.
- Generic "bunchify", also works with descendants of Bunch.
+ Recursively transforms a dictionary into a SmartMunch via copy.
+ Generic "munchify", also works with descendants of Munch.
"""
if isinstance(x, dict):
- return cls( (k, cls.bunchify(v)) for k,v in x.iteritems() )
+ return cls( (k, cls.munchify(v)) for k,v in x.items() )
elif isinstance(x, (list, tuple)):
- return type(x)( cls.bunchify(v) for v in x )
+ return type(x)( cls.munchify(v) for v in x )
else:
return x
@@ -35,7 +35,7 @@ def unique_sequence(seq):
# https://stackoverflow.com/questions/480214/how-do-you-remove-duplicates-from-a-list-in-python-whilst-preserving-order/480227#480227
seen = set()
seen_add = seen.add
- unhashable_types = (types.ListType, types.DictionaryType)
+ unhashable_types = (list, dict)
return [x for x in seq if type(x) in unhashable_types or not (x in seen or seen_add(x))]
diff --git a/patzilla/util/data/orderedset.py b/patzilla/util/data/orderedset.py
index 5ba05be5..43e45da1 100644
--- a/patzilla/util/data/orderedset.py
+++ b/patzilla/util/data/orderedset.py
@@ -2,7 +2,7 @@
# Set that remembers original insertion order.
import collections
-class OrderedSet(collections.MutableSet):
+class OrderedSet(collections.abc.MutableSet):
def __init__(self, iterable=None):
self.end = end = []
@@ -64,6 +64,6 @@ def __eq__(self, other):
if __name__ == '__main__':
s = OrderedSet('abracadaba')
t = OrderedSet('simsalabim')
- print(s | t)
- print(s & t)
- print(s - t)
+ print((s | t))
+ print((s & t))
+ print((s - t))
diff --git a/patzilla/util/data/zip.py b/patzilla/util/data/zip.py
index 9c9b472b..9879aa9c 100644
--- a/patzilla/util/data/zip.py
+++ b/patzilla/util/data/zip.py
@@ -17,7 +17,7 @@ def zip_multi(multi):
now = time.localtime(time.time())[:6]
# http://stackoverflow.com/questions/434641/how-do-i-set-permissions-attributes-on-a-file-in-a-zip-file-using-pythons-zip/434689#434689
- unix_permissions = 0644 << 16L
+ unix_permissions = 0o644 << 16
# add index file for drawings
"""
diff --git a/patzilla/util/database/beaker_mongodb.py b/patzilla/util/database/beaker_mongodb.py
index 4ccab80c..21938c99 100644
--- a/patzilla/util/database/beaker_mongodb.py
+++ b/patzilla/util/database/beaker_mongodb.py
@@ -184,235 +184,194 @@
before upgrading to 0.5+ and be aware that it will generate new caches.
-
+A part of this code is a copy of https://raw.githubusercontent.com/bbangert/beaker/master/beaker/ext/mongodb.py 2023-03-22
"""
-import logging
-from beaker.container import NamespaceManager, Container
-from beaker.exceptions import InvalidCacheBackendError, MissingCacheParameter
-from beaker.synchronization import null_synchronizer
-from beaker.util import verify_directory, SyncDict
-
-from StringIO import StringIO
-try:
- import cPickle as pickle
-except ImportError:
- import pickle
+import datetime
+import os
+import threading
+import time
+import pickle
try:
- from pymongo.connection import Connection
+ import pymongo
+ import pymongo.errors
import bson
- import bson.errors
except ImportError:
- raise InvalidCacheBackendError("Unable to load the pymongo driver.")
-
-log = logging.getLogger(__name__)
-#log.setLevel(logging.DEBUG)
-
-class MongoDBNamespaceManager(NamespaceManager):
- clients = SyncDict()
- _pickle = True
- _sparse = False
+ pymongo = None
+ bson = None
- # TODO _- support write concern / safe
- def __init__(self, namespace, url=None, data_dir=None, skip_pickle=False,
- sparse_collection=False, **params):
- NamespaceManager.__init__(self, namespace)
+from beaker.container import NamespaceManager
+from beaker.synchronization import SynchronizerImpl
+from beaker.util import SyncDict, machine_identifier
+from beaker.crypto.util import sha1
+from beaker._compat import string_type, PY2
- if not url:
- raise MissingCacheParameter("MongoDB url is required")
- if skip_pickle:
- log.info("Disabling pickling for namespace: %s" % self.namespace)
- self._pickle = False
+class MongoNamespaceManager(NamespaceManager):
+ """Provides the :class:`.NamespaceManager` API over MongoDB.
- if sparse_collection:
- log.info("Separating data to one row per key (sparse collection) for ns %s ." % self.namespace)
- self._sparse = True
+ Provided ``url`` can be both a mongodb connection string or
+ an already existing MongoClient instance.
- # Temporarily uses a local copy of the functions until pymongo upgrades to new parser code
- (host_list, database, username, password, collection, options) = _parse_uri(url)
-
- if database and host_list:
- data_key = "mongodb:%s" % (database)
- else:
- raise MissingCacheParameter("Invalid Cache URL. Cannot parse.")
-
- def _create_mongo_conn():
- host_uri = 'mongodb://'
- for x in host_list:
- host_uri += '%s:%s' % x
- log.info("Host URI: %s" % host_uri)
- conn = Connection(host_uri, slave_okay=options.get('slaveok', False))
+ The data will be stored into ``beaker_cache`` collection of the
+ *default database*, so make sure your connection string or
+ MongoClient point to a default database.
+ """
+ MAX_KEY_LENGTH = 1024
- db = conn[database]
+ clients = SyncDict()
- if username:
- log.info("Attempting to authenticate %s/%s " % (username, password))
- if not db.authenticate(username, password):
- raise InvalidCacheBackendError('Cannot authenticate to '
- ' MongoDB.')
- return db[collection]
+ def __init__(self, namespace, url, **kw):
+ super(MongoNamespaceManager, self).__init__(namespace)
+ self.lock_dir = None # MongoDB uses mongo itself for locking.
- self.mongo = MongoDBNamespaceManager.clients.get(data_key,
- _create_mongo_conn)
+ if pymongo is None:
+ raise RuntimeError('pymongo3 is not available')
- def get_creation_lock(self, key):
- """@TODO - stop hitting filesystem for this...
- I think mongo can properly avoid dog piling for us.
- """
- return null_synchronizer()
-
- def do_remove(self):
- """Clears the entire filesystem (drops the collection)"""
- log.debug("[MongoDB] Remove namespace: %s" % self.namespace)
- q = {}
- if self._sparse:
- q = {'_id.namespace': self.namespace}
+ if isinstance(url, string_type):
+ self.client = MongoNamespaceManager.clients.get(url, pymongo.MongoClient, url)
else:
- q = {'_id': self.namespace}
-
- log.debug("[MongoDB] Remove Query: %s" % q)
- self.mongo.remove(q)
+ self.client = url
+ self.db = self.client.get_default_database()
+
+ def _format_key(self, key):
+ if not isinstance(key, str):
+ key = key.decode('ascii')
+ if len(key) > (self.MAX_KEY_LENGTH - len(self.namespace) - 1):
+ if not PY2:
+ key = key.encode('utf-8')
+ key = sha1(key).hexdigest()
+ return '%s:%s' % (self.namespace, key)
+ def get_creation_lock(self, key):
+ return MongoSynchronizer(self._format_key(key), self.client)
def __getitem__(self, key):
- log.debug("[MongoDB %s] Get Key: %s" % (self.mongo,
- key))
-
- _id = {}
- fields = {}
- if self._sparse:
- _id = {
- 'namespace': self.namespace,
- 'key': key
- }
- fields['data'] = True
- else:
- _id = self.namespace
- fields['data.' + key] = True
-
- log.debug("[MongoDB] Get Query: id == %s Fields: %s", _id, fields)
- result = self.mongo.find_one({'_id': _id}, fields=fields)
- log.debug("[MongoDB] Get Result: %s", result)
-
- if result:
- """Running into instances in which mongo is returning
- -1, which causes an error as __len__ should return 0
- or positive integers, hence the check of size explicit"""
- log.debug("Result: %s", result)
- data = result.get('data', None)
- log.debug("Data: %s", data)
- if self._sparse:
- value = data
- else:
- value = data.get(key, None)
-
- if not value:
- return None
-
- if self._pickle or key == 'session':
- value = _depickle(value)
- else:
- if value['pickled']:
- value = (value['stored'], value['expires'], _depickle(value['value']))
- else:
- value = (value['stored'], value['expires'], value['value'])
-
- log.debug("[key: %s] Value: %s" % (key, value))
-
- return value
- else:
- return None
-
+ self._clear_expired()
+ entry = self.db.backer_cache.find_one({'_id': self._format_key(key)})
+ if entry is None:
+ raise KeyError(key)
+ return pickle.loads(entry['value'])
def __contains__(self, key):
- def _has():
- result = self.__getitem__(key)
- if result:
- log.debug("[MongoDB] %s == %s" % (key, result))
- return result is not None
- else:
- return False
-
- log.debug("[MongoDB] Has '%s'? " % key)
- ret = _has()
-
-
- return ret
+ self._clear_expired()
+ entry = self.db.backer_cache.find_one({'_id': self._format_key(key)})
+ return entry is not None
def has_key(self, key):
return key in self
def set_value(self, key, value, expiretime=None):
- log.debug("[MongoDB %s] Set Key: %s (Expiry: %s) ... " %
- (self.mongo, key, expiretime))
+ self._clear_expired()
- _id = {}
- doc = {}
+ expiration = None
+ if expiretime is not None:
+ expiration = time.time() + expiretime
- if self._pickle or key == 'session':
- try:
- value = pickle.dumps(value)
- except:
- log.exception("Failed to pickle value.")
- else:
- value = {
- 'stored': value[0],
- 'expires': value[1],
- 'value': value[2],
- 'pickled': False
- }
- try:
- bson.BSON.encode(value)
- except:
- log.warning("Value is not bson serializable, pickling inner value.")
- value['value'] = pickle.dumps(value['value'])
- value['pickled'] = True
+ value = pickle.dumps(value)
+ self.db.backer_cache.update_one({'_id': self._format_key(key)},
+ {'$set': {'value': bson.Binary(value),
+ 'expiration': expiration}},
+ upsert=True)
+
+ def __setitem__(self, key, value):
+ self.set_value(key, value)
+
+ def __delitem__(self, key):
+ self._clear_expired()
+ self.db.backer_cache.delete_many({'_id': self._format_key(key)})
+ def do_remove(self):
+ self.db.backer_cache.delete_many({'_id': {'$regex': '^%s' % self.namespace}})
+ def keys(self):
+ return [e['key'].split(':', 1)[-1] for e in self.db.backer_cache.find_all(
+ {'_id': {'$regex': '^%s' % self.namespace}}
+ )]
- if self._sparse:
- _id = {
- 'namespace': self.namespace,
- 'key': key
- }
+ def _clear_expired(self):
+ now = time.time()
+ self.db.backer_cache.delete_many({'_id': {'$regex': '^%s' % self.namespace},
+ 'expiration': {'$ne': None, '$lte': now}})
- doc['data'] = bson.Binary(value)
- doc['_id'] = _id
- if expiretime:
- # TODO - What is the datatype of this? it should be instantiated as a datetime instance
- doc['valid_until'] = expiretime
- else:
- _id = self.namespace
- doc['$set'] = {'data.' + key: bson.Binary(value)}
- if expiretime:
- # TODO - What is the datatype of this? it should be instantiated as a datetime instance
- doc['$set']['valid_until'] = expiretime
- log.debug("Upserting Doc '%s' to _id '%s'" % (doc, _id))
- self.mongo.update({"_id": _id}, doc, upsert=True, safe=True)
+class MongoSynchronizer(SynchronizerImpl):
+ """Provides a Writer/Reader lock based on MongoDB.
- def __setitem__(self, key, value):
- self.set_value(key, value)
+ Provided ``url`` can be both a mongodb connection string or
+ an already existing MongoClient instance.
- def __delitem__(self, key):
- """Delete JUST the key, by setting it to None."""
- if self._sparse:
- self.mongo.remove({'_id.namespace': self.namespace})
- else:
- self.mongo.update({'_id': self.namespace},
- {'$unset': {'data.' + key: True}}, upsert=False)
+ The data will be stored into ``beaker_locks`` collection of the
+ *default database*, so make sure your connection string or
+ MongoClient point to a default database.
- def keys(self):
- if self._sparse:
- return [row['_id']['field'] for row in self.mongo.find({'_id.namespace': self.namespace}, {'_id': True})]
+ Locks are identified by local machine, PID and threadid, so
+ are suitable for use in both local and distributed environments.
+ """
+ # If a cache entry generation function can take a lot,
+ # but 15 minutes is more than a reasonable time.
+ LOCK_EXPIRATION = 900
+ MACHINE_ID = machine_identifier()
+
+ def __init__(self, identifier, url):
+ super(MongoSynchronizer, self).__init__()
+ self.identifier = identifier
+ if isinstance(url, string_type):
+ self.client = MongoNamespaceManager.clients.get(url, pymongo.MongoClient, url)
else:
- return self.mongo.find_one({'_id': self.namespace}, {'data': True}).get('data', {})
+ self.client = url
+ self.db = self.client.get_default_database()
+
+ def _clear_expired_locks(self):
+ now = datetime.datetime.utcnow()
+ expired = now - datetime.timedelta(seconds=self.LOCK_EXPIRATION)
+ self.db.beaker_locks.delete_many({'_id': self.identifier, 'timestamp': {'$lte': expired}})
+ return now
+
+ def _get_owner_id(self):
+ return '%s-%s-%s' % (self.MACHINE_ID, os.getpid(), threading.current_thread().ident)
+
+ def do_release_read_lock(self):
+ owner_id = self._get_owner_id()
+ self.db.beaker_locks.update_one({'_id': self.identifier, 'readers': owner_id},
+ {'$pull': {'readers': owner_id}})
+
+ def do_acquire_read_lock(self, wait):
+ now = self._clear_expired_locks()
+ owner_id = self._get_owner_id()
+ while True:
+ try:
+ self.db.beaker_locks.update_one({'_id': self.identifier, 'owner': None},
+ {'$set': {'timestamp': now},
+ '$push': {'readers': owner_id}},
+ upsert=True)
+ return True
+ except pymongo.errors.DuplicateKeyError:
+ if not wait:
+ return False
+ time.sleep(0.2)
+
+ def do_release_write_lock(self):
+ self.db.beaker_locks.delete_one({'_id': self.identifier, 'owner': self._get_owner_id()})
+
+ def do_acquire_write_lock(self, wait):
+ now = self._clear_expired_locks()
+ owner_id = self._get_owner_id()
+ while True:
+ try:
+ self.db.beaker_locks.update_one({'_id': self.identifier, 'owner': None,
+ 'readers': []},
+ {'$set': {'owner': owner_id,
+ 'timestamp': now}},
+ upsert=True)
+ return True
+ except pymongo.errors.DuplicateKeyError:
+ if not wait:
+ return False
+ time.sleep(0.2)
-class MongoDBContainer(Container):
- namespace_class = MongoDBNamespaceManager
def _partition(source, sub):
"""Our own string partitioning method.
@@ -499,6 +458,6 @@ def _parse_uri(uri, default_port=27017):
def _depickle(value):
try:
return pickle.loads(value)
- except Exception, e:
+ except Exception as e:
log.exception("Failed to unpickle value '{0}'.".format(e))
return None
diff --git a/patzilla/util/database/beaker_mongodb_gridfs.py b/patzilla/util/database/beaker_mongodb_gridfs.py
index 605a0c70..e9aff35f 100644
--- a/patzilla/util/database/beaker_mongodb_gridfs.py
+++ b/patzilla/util/database/beaker_mongodb_gridfs.py
@@ -1,5 +1,8 @@
+import pickle
+import logging as log
from mongodb_gridfs_beaker import MongoDBGridFSNamespaceManager, log, pickle
+
def includeme(config):
# Monkey patch 3rd party class to fix runtime error
diff --git a/patzilla/util/date/__init__.py b/patzilla/util/date/__init__.py
index a683524f..4be8b63a 100644
--- a/patzilla/util/date/__init__.py
+++ b/patzilla/util/date/__init__.py
@@ -111,7 +111,7 @@ def parse_date_within(value):
"""
value = value.replace('within', '').strip().strip('"')
parts = value.split(',')
- parts = map(unicode.strip, parts)
+ parts = list(map(str.strip, parts))
result = {
'startdate': parts[0],
'enddate': parts[1],
@@ -123,12 +123,12 @@ def year_range_to_within(value):
Parse year ranges like "1990-2014" or "1990 - 2014"
and convert into "within 1990,2014" expression
"""
- if value.count(u'-') == 1:
- parts = value.split(u'-')
+ if value.count('-') == 1:
+ parts = value.split('-')
parts = [part.strip() for part in parts]
year_from, year_to = parts
if len(year_from) == 4 and len(year_to) == 4:
- value = u'within {year_from},{year_to}'.format(**locals())
+ value = 'within {year_from},{year_to}'.format(**locals())
return value
def week_range(date):
diff --git a/patzilla/util/email/core.py b/patzilla/util/email/core.py
index 899b3c72..8e14d2cd 100644
--- a/patzilla/util/email/core.py
+++ b/patzilla/util/email/core.py
@@ -17,7 +17,7 @@
log = logging.getLogger(__name__)
-def build_email(mail_to, subject, body_text, mail_from=u'test@example.org', reply_to=None, attachments=None, mime_headers=None):
+def build_email(mail_to, subject, body_text, mail_from='test@example.org', reply_to=None, attachments=None, mime_headers=None):
"""
Flexible Multipart MIME message builder.
@@ -53,11 +53,11 @@ def build_email(mail_to, subject, body_text, mail_from=u'test@example.org', repl
}
# Subject header
- mime_headers.update({u'Subject': Header(s=subject, charset='utf-8')})
+ mime_headers.update({'Subject': Header(s=subject, charset='utf-8')})
# Add address headers
- for key, item in address_headers.iteritems():
+ for key, item in address_headers.items():
if isinstance(item, AddressList):
# v1
@@ -70,7 +70,7 @@ def build_email(mail_to, subject, body_text, mail_from=u'test@example.org', repl
message[key] = value
# Add more headers
- for key, value in mime_headers.iteritems():
+ for key, value in mime_headers.items():
#message.add_header(key, value)
if value:
message[key] = value
@@ -97,7 +97,7 @@ def build_email(mail_to, subject, body_text, mail_from=u'test@example.org', repl
# multipart attachments
# ------------------------------------------
# from https://docs.python.org/2/library/email-examples.html
- for filename, payload in attachments.iteritems():
+ for filename, payload in attachments.items():
# Guess the content type based on the file's extension. Encoding
# will be ignored, although we should check for simple things like
@@ -149,10 +149,10 @@ def build_email(mail_to, subject, body_text, mail_from=u'test@example.org', repl
return payload
-def send_email(mail_to, message, smtp_settings=None, mail_from=u'test@example.org'):
+def send_email(mail_to, message, smtp_settings=None, mail_from='test@example.org'):
smtp_settings = smtp_settings or {}
- smtp_settings.setdefault('hostname', u'localhost')
+ smtp_settings.setdefault('hostname', 'localhost')
smtp_settings.setdefault('port', 25)
# sanity checks
@@ -191,7 +191,7 @@ def send_email(mail_to, message, smtp_settings=None, mail_from=u'test@example.or
def format_addresslist(addresslist):
#print 'addresslist:', addresslist.addresslist
- return map(formataddr, addresslist.addresslist)
+ return list(map(formataddr, addresslist.addresslist))
def fix_addresslist(addresslist):
diff --git a/patzilla/util/email/message.py b/patzilla/util/email/message.py
index 7baca1ee..37f7ea62 100644
--- a/patzilla/util/email/message.py
+++ b/patzilla/util/email/message.py
@@ -6,7 +6,7 @@
import logging
import textwrap
from copy import deepcopy
-from core import build_email, send_email
+from .core import build_email, send_email
from patzilla.util.config import read_config, to_list
log = logging.getLogger(__name__)
@@ -38,25 +38,25 @@ def add_reply(self, address):
def send(self, subject='', message='', files=None):
- recipients = u', '.join(self.recipients)
- reply_to = u', '.join(self.reply_to)
+ recipients = ', '.join(self.recipients)
+ reply_to = ', '.join(self.reply_to)
files = files or {}
# get smtp addressing information from settings
- smtp_host = self.smtp_settings.get('hostname', u'localhost')
- mail_from = self.email_settings['addressbook'].get('from', u'test@example.org')
+ smtp_host = self.smtp_settings.get('hostname', 'localhost')
+ mail_from = self.email_settings['addressbook'].get('from', 'test@example.org')
# log smtp settings
smtp_settings_log = deepcopy(self.smtp_settings)
if 'password' in smtp_settings_log:
del smtp_settings_log['password']
- log.info(u'Sending email to "{recipients}". smtp settings: {smtp_settings}'.format(
+ log.info('Sending email to "{recipients}". smtp settings: {smtp_settings}'.format(
recipients=recipients, smtp_settings=smtp_settings_log))
# build subject
event_date = time.strftime('%Y-%m-%d')
event_time = time.strftime('%H:%M:%S')
- subject_real = u''
+ subject_real = ''
if 'subject_prefix' in self.email_settings['content']:
prefix = self.email_settings['content'].get('subject_prefix')
if not prefix.endswith(' '):
@@ -64,14 +64,14 @@ def send(self, subject='', message='', files=None):
subject_real += prefix
#subject_real += u'{subject} on {event_date} at {event_time}'.format(**locals())
- subject_real += u'{}'.format(subject)
+ subject_real += '{}'.format(subject)
- filenames = u'\n'.join([u'- ' + entry for entry in files.keys()])
+ filenames = '\n'.join(['- ' + entry for entry in list(files.keys())])
body_template = textwrap.dedent(self.email_settings['content'].get('body', '')).strip()
if 'signature' in self.email_settings['content']:
- body_template += u'\n\n--\n' + textwrap.dedent(self.email_settings['content']['signature']).strip()
+ body_template += '\n\n--\n' + textwrap.dedent(self.email_settings['content']['signature']).strip()
body_template = body_template.replace('\\n', '\r')
@@ -96,11 +96,11 @@ def send(self, subject='', message='', files=None):
# smtplib.SMTPServerDisconnected: Connection unexpectedly closed
#
send_email(recipients, message, smtp_settings=self.smtp_settings, mail_from=mail_from)
- log.info(u'Email to recipients "{recipients}" sent successfully'.format(recipients=recipients))
+ log.info('Email to recipients "{recipients}" sent successfully'.format(recipients=recipients))
except Exception as ex:
# TODO: catch traceback when running in commandline mode
- log.error(u'Error sending email: {failure}'.format(failure=ex))
+ log.error('Error sending email: {failure}'.format(failure=ex))
raise
@@ -123,10 +123,10 @@ def send(self, subject='', message='', files=None):
message = EmailMessage(settings['smtp'], settings['email'], {'subject_prefix': 'acme-product'})
message.add_recipient('test@example.org')
message.send(
- subject = u'Self-test email from Räuber Hotzenplotz',
- message = u'Self-test email from Räuber Hotzenplotz',
+ subject = 'Self-test email from Räuber Hotzenplotz',
+ message = 'Self-test email from Räuber Hotzenplotz',
files = {
- u'test.txt': u'☠☠☠ SKULL AND CROSSBONES ☠☠☠',
- u'test.json': json.dumps(u'☠☠☠ SKULL AND CROSSBONES ☠☠☠'),
+ 'test.txt': '☠☠☠ SKULL AND CROSSBONES ☠☠☠',
+ 'test.json': json.dumps('☠☠☠ SKULL AND CROSSBONES ☠☠☠'),
}
)
diff --git a/patzilla/util/expression/__init__.py b/patzilla/util/expression/__init__.py
index e96ed902..43dbceac 100644
--- a/patzilla/util/expression/__init__.py
+++ b/patzilla/util/expression/__init__.py
@@ -28,8 +28,8 @@ class SearchExpression(object):
def parse_expression(self, query):
- logger.info(u'Parsing search expression "{query}" with syntax "{syntax}" and grammar "{grammar}"'.format(
- query=query, syntax=self.syntax, grammar=self.grammar and self.grammar.__name__ or u'default'))
+ logger.info('Parsing search expression "{query}" with syntax "{syntax}" and grammar "{grammar}"'.format(
+ query=query, syntax=self.syntax, grammar=self.grammar and self.grammar.__name__ or 'default'))
if self.syntax == 'cql':
self.parse_expression_cql(query)
@@ -40,8 +40,8 @@ def parse_expression(self, query):
def parse_expression_cql(self, expression):
# Fixup query: Wrap into quotes if CQL expression is a) unspecific, b) contains spaces and c) is still unquoted
- if should_be_quoted(expression) and u'within' not in expression:
- expression = u'"%s"' % expression
+ if should_be_quoted(expression) and 'within' not in expression:
+ expression = '"%s"' % expression
# Parse and recompile CQL query string to apply number normalization
query_object = None
@@ -59,11 +59,11 @@ def parse_expression_cql(self, expression):
expression = query_recompiled
if query_recompiled != expression:
- logger.info(u'Recompiled search expression to "{query}"'.format(query=expression))
+ logger.info('Recompiled search expression to "{query}"'.format(query=expression))
except Exception as ex:
# TODO: Can we get more details from diagnostic information to just stop here w/o propagating obviously wrong query to OPS?
- logger.warn(u'CQL parse error: query="{0}", reason={1}, Exception was:\n{2}'.format(expression, ex, _exception_traceback()))
+ logger.warn('CQL parse error: query="{0}", reason={1}, Exception was:\n{2}'.format(expression, ex, _exception_traceback()))
self.cql_parser = query_object
self.expression = expression
diff --git a/patzilla/util/expression/keywords.py b/patzilla/util/expression/keywords.py
index e5bd7b3b..bfb3f422 100644
--- a/patzilla/util/expression/keywords.py
+++ b/patzilla/util/expression/keywords.py
@@ -64,7 +64,7 @@ def scan_keywords(op, keywords):
#print "op.index:", op.index
#print "op.term:", op.term
if str(op.index) in keyword_fields:
- keyword = clean_keyword(unicode(op.term))
+ keyword = clean_keyword(str(op.term))
keywords.append(keyword)
hasattr(op, 'leftOperand') and scan_keywords(op.leftOperand, keywords)
@@ -76,7 +76,7 @@ def keywords_to_response(request, search):
Propagate keywords to client for highlighting
"""
- logger.info(u'Propagating keywords from "{origin}": {keywords}'.format(
+ logger.info('Propagating keywords from "{origin}": {keywords}'.format(
origin=search.keywords_origin, keywords=search.keywords))
request.response.headers['X-PatZilla-Query-Keywords'] = json.dumps(search.keywords)
diff --git a/patzilla/util/image/convert.py b/patzilla/util/image/convert.py
index 63e53dee..2c6ccbee 100644
--- a/patzilla/util/image/convert.py
+++ b/patzilla/util/image/convert.py
@@ -3,13 +3,13 @@
import os
import shutil
import tempfile
-from pathlib2 import Path
+from pathlib import Path
import requests
import where
import logging
import datetime
-import StringIO
+import io
import subprocess
from six import BytesIO
from tempfile import NamedTemporaryFile
@@ -195,7 +195,7 @@ def run_imagemagick(command, input=None):
def png_resize(png_payload, width):
- image = Image.open(StringIO.StringIO(png_payload)).convert('RGB')
+ image = Image.open(io.StringIO(png_payload)).convert('RGB')
image_width = image.size[0]
image_height = image.size[1]
@@ -209,13 +209,13 @@ def png_resize(png_payload, width):
#size = (int(width), int(image_height * aspect))
size = (int(width), int(image_height / scale_factor))
#print "size:", size
- print "Resizing image from %s to %s" % (image.size, size)
+ print("Resizing image from %s to %s" % (image.size, size))
image.thumbnail(size, Image.ANTIALIAS)
#image.resize(size, Image.ANTIALIAS)
#print "thumbnail done"
- png = StringIO.StringIO()
+ png = io.StringIO()
image.save(png, 'PNG')
#print "image saved to memory"
diff --git a/patzilla/util/ipc/parser.py b/patzilla/util/ipc/parser.py
index e8701e54..e94d54a9 100644
--- a/patzilla/util/ipc/parser.py
+++ b/patzilla/util/ipc/parser.py
@@ -10,7 +10,7 @@ def decodeMatchToDict(match, key_suffix):
if match:
# transfer data from match groups to instance variable,
# making all values uppercase
- for key, value in match.groupdict().iteritems():
+ for key, value in match.groupdict().items():
if key.endswith(key_suffix):
key = key.replace(key_suffix, '')
if value:
@@ -56,7 +56,7 @@ def decode(self):
m = self.r.match(self.raw)
self.ipc = decodeMatchToDict(m, '__1')
if not self.ipc:
- raise ValueError, "IPCR class '%s' could not be decoded" % self.raw
+ raise ValueError("IPCR class '%s' could not be decoded" % self.raw)
def fix(self):
@@ -82,7 +82,7 @@ def asDict(self):
def formatFlexible(self, class_padding='', group_subgroup_delimiter='', group_padding='', subgroup_padding=''):
if not self.ipc['section']:
- raise ValueError, "IPCR class '%s' could not be formatted" % self.raw
+ raise ValueError("IPCR class '%s' could not be formatted" % self.raw)
ipc_serialized = self.ipc['section']
diff --git a/patzilla/util/network/browser.py b/patzilla/util/network/browser.py
index 51f61cc1..3545dbff 100644
--- a/patzilla/util/network/browser.py
+++ b/patzilla/util/network/browser.py
@@ -1,4 +1,4 @@
# -*- coding: utf-8 -*-
# (c) 2017-2019 Andreas Motl
-regular_user_agent = u'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:107.0) Gecko/20100101 Firefox/107.0'
+regular_user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:107.0) Gecko/20100101 Firefox/107.0'
diff --git a/patzilla/util/network/requests_xmlrpclib.py b/patzilla/util/network/requests_xmlrpclib.py
index 18dbbc2b..e61eaf61 100644
--- a/patzilla/util/network/requests_xmlrpclib.py
+++ b/patzilla/util/network/requests_xmlrpclib.py
@@ -6,17 +6,14 @@
Usage:
- >>> import xmlrpclib
+ >>> import xmlrpc.client
>>> #from transport import RequestsTransport
- >>> s = xmlrpclib.ServerProxy('http://yoursite.com/xmlrpc', transport=RequestsTransport())
+ >>> s = xmlrpc.client.ServerProxy('http://yoursite.com/xmlrpc', transport=RequestsTransport())
>>> #s.demo.sayHello()
Hello!
"""
-try:
- import xmlrpc.client as xmlrpc
-except ImportError:
- import xmlrpclib as xmlrpc
+import xmlrpc.client as xmlrpc
import requests
class RequestsTransport(xmlrpc.Transport):
diff --git a/patzilla/util/numbers/common.py b/patzilla/util/numbers/common.py
index ac5af45a..f44f1e25 100644
--- a/patzilla/util/numbers/common.py
+++ b/patzilla/util/numbers/common.py
@@ -3,7 +3,7 @@
import re
import types
import logging
-from patzilla.util.data.container import SmartBunch
+from patzilla.util.data.container import SmartMunch
from patzilla.util.numbers.helper import strip_spaces
"""
@@ -13,7 +13,7 @@
log = logging.getLogger(__name__)
-class DocumentIdentifierBunch(SmartBunch):
+class DocumentIdentifierBunch(SmartMunch):
def __str__(self):
return self.dump()
@@ -29,12 +29,12 @@ def join_patent(patent):
return number
def decode_patent_number(patent):
- if isinstance(patent, types.StringTypes):
+ if isinstance(patent, (str,)):
decoded = split_patent_number(patent)
- elif isinstance(patent, types.DictionaryType):
+ elif isinstance(patent, dict):
decoded = patent
else:
- raise TypeError(u'Document number "{patent}" of type "{type}" could not be decoded'.format(patent=patent, type=type(patent)))
+ raise TypeError('Document number "{patent}" of type "{type}" could not be decoded'.format(patent=patent, type=type(patent)))
return decoded
def split_patent_number(patent_number):
@@ -154,7 +154,7 @@ def split_patent_number(patent_number):
return dib
else:
- log.error(u'Unable to parse patent number "{0}"'.format(patent_number))
+ log.error('Unable to parse patent number "{0}"'.format(patent_number))
def split_patent_number_more(patent):
diff --git a/patzilla/util/numbers/denormalize.py b/patzilla/util/numbers/denormalize.py
index 6395e2a6..9b4deee2 100644
--- a/patzilla/util/numbers/denormalize.py
+++ b/patzilla/util/numbers/denormalize.py
@@ -121,16 +121,16 @@ def test_denormalization():
WO1990004917
"""
- print "-" * 30
- print "original\tdenormalized"
- print "-" * 30
+ print("-" * 30)
+ print("original\tdenormalized")
+ print("-" * 30)
for number in payload.split("\n"):
if not number or number == "\n": continue
if number.startswith('---'):
- print number
+ print(number)
continue
number_denormalized = join_patent(denormalize_patent(split_patent_number(number)))
- print "%s\t%s" % (number, number_denormalized)
+ print("%s\t%s" % (number, number_denormalized))
if __name__ == "__main__":
diff --git a/patzilla/util/numbers/helper.py b/patzilla/util/numbers/helper.py
index 044ba815..520c49fc 100644
--- a/patzilla/util/numbers/helper.py
+++ b/patzilla/util/numbers/helper.py
@@ -22,11 +22,11 @@ def strip_spaces(number):
number = r_invalid.sub('', number)
return number
-def read_numbersfile(file):
- fh = open(file, 'r')
+def read_numbersfile(_file):
+ fh = open(_file, 'r')
numbers_raw = fh.readlines()
fh.close()
- numbers = map(lambda number: number.strip(" ;\"'\t\n\r"), numbers_raw)
+ numbers = [number.strip(" ;\"'\t\n\r") for number in numbers_raw]
numbers = [number for number in numbers if number and not number.startswith('#')]
return numbers
diff --git a/patzilla/util/numbers/normalize.py b/patzilla/util/numbers/normalize.py
index 1dd5da49..0b60f9d9 100644
--- a/patzilla/util/numbers/normalize.py
+++ b/patzilla/util/numbers/normalize.py
@@ -194,7 +194,7 @@ def normalize_patent(number, as_dict=False, as_string=False, fix_kindcode=False,
provider = 'ops'
# 1. handle patent dicts or convert (split) from string
- if isinstance(number, types.DictionaryType):
+ if isinstance(number, dict):
patent = number
else:
patent = split_patent_number(number)
@@ -209,7 +209,7 @@ def normalize_patent(number, as_dict=False, as_string=False, fix_kindcode=False,
# 3. result handling
# 3.a) default mechanism: return what we've got
- if isinstance(number, types.DictionaryType):
+ if isinstance(number, dict):
result = patent_normalized
else:
result = join_patent(patent_normalized)
@@ -622,7 +622,7 @@ def normalize_patent_it(patent):
# filter: special document handling (with alphanumeric prefixes)
# trim and pad sequential number with zeros to get total length of 7 characters for patent number
- if patched.has_key('number-type') and patched.has_key('number-real'):
+ if 'number-type' in patched and 'number-real' in patched:
subtype = patched['number-type']
seqnumber = patched['number-real']
patched['number'] = subtype + seqnumber.lstrip('0')
@@ -671,16 +671,16 @@ def normalization_example(): # pragma: nocover
'JP3657641B2',
]
- print "-" * 30
- print '{0}{1}'.format("original".ljust(20), "normalized")
- print "-" * 30
+ print("-" * 30)
+ print('{0}{1}'.format("original".ljust(20), "normalized"))
+ print("-" * 30)
for number in numbers:
if number.find('---') != -1:
- print number
+ print(number)
continue
result = normalize_patent(number)
#result = join_patent(patch_patent_old_archive(patent))
- print "{0}{1}".format(number.ljust(20), result)
+ print("{0}{1}".format(number.ljust(20), result))
if __name__ == "__main__": # pragma: nocover
diff --git a/patzilla/util/numbers/numberlists.py b/patzilla/util/numbers/numberlists.py
index d6341e32..22ceb2ad 100644
--- a/patzilla/util/numbers/numberlists.py
+++ b/patzilla/util/numbers/numberlists.py
@@ -4,13 +4,13 @@
from patzilla.util.numbers.normalize import normalize_patent
def parse_numberlist(rawdata):
- pattern = re.compile(u'[,\n]')
+ pattern = re.compile('[,\n]')
entries = pattern.split(rawdata)
- entries = map(unicode.strip, entries)
+ entries = list(map(str.strip, entries))
return entries
def normalize_numbers(entries):
- entries = map(lambda s: s.replace(u' ', u''), entries)
+ entries = [s.replace(' ', '') for s in entries]
response = {'valid': [], 'invalid': [], 'all': []}
for entry in entries:
entry_normalized = normalize_patent(entry, fix_kindcode=True)
diff --git a/patzilla/util/python/__init__.py b/patzilla/util/python/__init__.py
index 4974efcc..5a69667c 100644
--- a/patzilla/util/python/__init__.py
+++ b/patzilla/util/python/__init__.py
@@ -2,7 +2,7 @@
# (c) 2014 Andreas Motl, Elmyra UG
import sys
import traceback
-from StringIO import StringIO
+from io import StringIO
def exception_traceback(exc_info=None):
"""
diff --git a/patzilla/util/text/format.py b/patzilla/util/text/format.py
index ae59c647..9093becf 100644
--- a/patzilla/util/text/format.py
+++ b/patzilla/util/text/format.py
@@ -2,9 +2,9 @@
# (c) 2014-2016 Andreas Motl, Elmyra UG
import re
-_slugify_strip_re = re.compile(r'[^\w\s-]')
-_slugify_strip_wo_equals_re = re.compile(r'[^\w\s=-]')
-_slugify_hyphenate_re = re.compile(r'[-\s]+')
+_slugify_strip_re = re.compile(rb'[^\w\s-]')
+_slugify_strip_wo_equals_re = re.compile(rb'[^\w\s=-]')
+_slugify_hyphenate_re = re.compile(rb'[-\s]+')
def slugify(value, strip_equals=True, lowercase=True):
"""
Normalizes string, converts to lowercase, removes non-alpha characters,
@@ -15,19 +15,23 @@ def slugify(value, strip_equals=True, lowercase=True):
Via http://code.activestate.com/recipes/577257-slugify-make-a-string-usable-in-a-url-or-filename/
"""
import unicodedata
- if not isinstance(value, unicode):
- value = unicode(value)
+ if not isinstance(value, str):
+ value = str(value)
value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore')
_strip_re = _slugify_strip_re
if not strip_equals:
_strip_re = _slugify_strip_wo_equals_re
- value = unicode(_strip_re.sub('', value).strip())
+
+ if isinstance(value, str):
+ value = _strip_re.sub('', value).strip()
+ else:
+ value = _strip_re.sub(b'', value).strip()
if lowercase:
value = value.lower()
- value = _slugify_hyphenate_re.sub('-', value)
+ value = _slugify_hyphenate_re.sub(b'-', value)
return value
def text_indent(text, amount=4, ch=' '):
diff --git a/patzilla/util/web/email/submit.py b/patzilla/util/web/email/submit.py
index 2504401e..e8353e2e 100644
--- a/patzilla/util/web/email/submit.py
+++ b/patzilla/util/web/email/submit.py
@@ -5,7 +5,7 @@
from validate_email import validate_email
from pyramid.threadlocal import get_current_request
from patzilla.util.config import read_config, read_list, to_list
-from patzilla.util.data.container import SmartBunch
+from patzilla.util.data.container import SmartMunch
from patzilla.util.email.message import EmailMessage
log = logging.getLogger(__name__)
@@ -68,23 +68,23 @@ def email_issue_report(report, recipients):
recipients = to_list(recipients)
identifier = None
- if isinstance(report, SmartBunch):
+ if isinstance(report, SmartMunch):
identifier = report.meta.id
# Build reasonable subject
- subject = u'Product issue'
+ subject = 'Product issue'
if 'dialog' in report and 'what' in report.dialog:
- subject = u'[{}] '.format(report.dialog.what) + subject
+ subject = '[{}] '.format(report.dialog.what) + subject
if identifier:
- subject += u' #' + identifier
+ subject += ' #' + identifier
# Build reasonable message
- message = u''
+ message = ''
if 'dialog' in report and 'remark' in report.dialog:
message = report.dialog.remark
# Add JSON report as attachment
- files = {u'report.json': report.pretty()}
+ files = {'report.json': report.pretty()}
email = message_factory(recipients=recipients)
email.send(
diff --git a/patzilla/util/web/identity/store.py b/patzilla/util/web/identity/store.py
index 92a69d45..982538e0 100644
--- a/patzilla/util/web/identity/store.py
+++ b/patzilla/util/web/identity/store.py
@@ -12,8 +12,8 @@
from mongoengine.fields import StringField, ListField, DateTimeField, DictField
from mongoengine.errors import NotUniqueError
from pyramid.threadlocal import get_current_request
-from zope.interface.declarations import implements
from zope.interface.interface import Interface
+from zope.interface import implementer
log = logging.getLogger(__name__)
@@ -133,9 +133,10 @@ class UserMetrics(Document):
class IUserMetricsManager(Interface):
pass
+@implementer(IUserMetricsManager)
class UserMetricsManager(object):
- implements(IUserMetricsManager)
+# py27 implements(IUserMetricsManager)
def measure_upstream(self, upstream, volume):
diff --git a/patzilla/util/web/pyramid/cornice.py b/patzilla/util/web/pyramid/cornice.py
index b7dadcae..89a6ee9b 100644
--- a/patzilla/util/web/pyramid/cornice.py
+++ b/patzilla/util/web/pyramid/cornice.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# (c) 2017 Andreas Motl, Elmyra UG
-from __future__ import absolute_import
+
from cornice.errors import Errors
def add_location_whitelisted(self, location, name=None, description=None, **kw):
diff --git a/patzilla/util/web/pyramid/renderer.py b/patzilla/util/web/pyramid/renderer.py
index 78a06af4..d941add6 100644
--- a/patzilla/util/web/pyramid/renderer.py
+++ b/patzilla/util/web/pyramid/renderer.py
@@ -18,7 +18,7 @@ def __call__(self, data, context):
content_type = (context['request'].accept.best_match(acceptable)
or acceptable[0])
response.content_type = content_type
- print "data:", data
+ print("data:", data)
return 'hello'
#return json.dumps(data, use_decimal=True)
diff --git a/patzilla/util/web/util/xmlrpclib.py b/patzilla/util/web/util/xmlrpclib.py
index 50c5f6de..df3353b6 100644
--- a/patzilla/util/web/util/xmlrpclib.py
+++ b/patzilla/util/web/util/xmlrpclib.py
@@ -1,9 +1,9 @@
# -*- coding: utf-8 -*-
# (c) 2014-2015 Andreas Motl, Elmyra UG
-from __future__ import absolute_import
+
import sys
import socket
-import xmlrpclib
+import xmlrpc.client
import ssl
# https://stackoverflow.com/questions/372365/set-timeout-for-xmlrpclib-serverproxy/14397619#14397619
@@ -24,7 +24,7 @@ def __enter__(self):
if self.__timeout:
self.__prevDefaultTimeout = socket.getdefaulttimeout()
socket.setdefaulttimeout(self.__timeout)
- proxy = xmlrpclib.Server(self.__url, allow_none=True)
+ proxy = xmlrpc.client.Server(self.__url, allow_none=True)
except Exception as ex:
raise Exception("Unable create XMLRPC-proxy for url '%s': %s" % (self.__url, ex))
diff --git a/patzilla/util/web/uwsgi/uwsgidecorators.py b/patzilla/util/web/uwsgi/uwsgidecorators.py
index 79c08ea1..29b20c36 100644
--- a/patzilla/util/web/uwsgi/uwsgidecorators.py
+++ b/patzilla/util/web/uwsgi/uwsgidecorators.py
@@ -4,7 +4,7 @@
from threading import Thread
try:
- import cPickle as pickle
+ import pickle as pickle
except:
import pickle
diff --git a/patzilla/util/xml/format.py b/patzilla/util/xml/format.py
index 49d32120..1ee0b738 100644
--- a/patzilla/util/xml/format.py
+++ b/patzilla/util/xml/format.py
@@ -69,5 +69,5 @@ def data(self, root):
return super(BadgerFishNoNamespace, self).data(root)
def clean_tag(self, node):
- if isinstance(node.tag, basestring):
+ if isinstance(node.tag, str):
node.tag = re.sub('{.*}', '', node.tag)
diff --git a/pserve.py b/pserve.py
new file mode 100644
index 00000000..4ddeca3f
--- /dev/null
+++ b/pserve.py
@@ -0,0 +1,10 @@
+#!/home/frank/DATA/Envs/env1/bin/python3
+# -*- coding: utf-8 -*-
+import regex as re
+import sys
+
+from pyramid.scripts.pserve import main
+
+if __name__ == '__main__':
+ sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0])
+ sys.exit(main())
diff --git a/setup.cfg b/setup.cfg
index 28b62b06..5622110a 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -13,6 +13,8 @@ addopts = -rA -vvv
--app-cache-backend=filesystem
patzilla tests -k 'not uspto'
+doctest_optionflags = NORMALIZE_WHITESPACE IGNORE_EXCEPTION_DETAIL
+
log_level = DEBUG
log_cli_level = DEBUG
diff --git a/setup.py b/setup.py
index e75bff7d..763fadec 100644
--- a/setup.py
+++ b/setup.py
@@ -10,7 +10,7 @@
# Environment
# ----------------------------------------------
'six>=1.10.0',
- 'mock>=3,<4', # 4.0.3
+ 'mock',
# ----------------------------------------------
# Backend
@@ -41,9 +41,9 @@
# Database and storage
# Can't upgrade to pymongo-3.5.1 due to "from pymongo.connection import Connection"
# usage in "mongodb_gridfs_beaker" module.
- 'pymongo<3', # 3.13.0, 4.3.3
- 'mongodb_gridfs_beaker==0.5.4',
- 'mongoengine==0.13.0', # 0.24.1
+ 'pymongo', # 3.13.0, 4.3.3
+ 'mongodb_gridfs_beaker@https://github.com/ip-tools/mongodb_gridfs_beaker/archive/0.6.0dev1.tar.gz#egg=mongodb_gridfs_beaker',
+ 'mongoengine==0.20.0', # 0.27.0
'python-magic<1',
# Web services
@@ -51,7 +51,7 @@
# Authorization
'pycryptodome>=3,<4',
- 'python-jwt>=3.3.4,<4',
+ 'python-jwt',
'pbkdf2==1.3',
@@ -73,8 +73,8 @@
'ndg-httpsclient<1',
# HTML
- 'BeautifulSoup<4',
- 'html2text==2016.9.19', # 2020.1.16
+ 'beautifulsoup4',
+ 'html2text',
# XML
# Remark: Both lxml 3.8.0 and 4.0.0 will segfault on Debian Wheezy (7.11)
@@ -92,19 +92,19 @@
# Data handling
'attrs',
- 'Bunch==1.0.1', # Maybe switch to "Munch"
- 'pyparsing==2.0.2', # 2.2.2, 2.3.1, 2.4.7, 3.0.8
+ 'Munch',
+ 'pyparsing<4', # 3.0.9
'python-dateutil<3',
'ago==0.0.9', # 0.0.93
'arrow==0.10.0', # 0.12.1
'validate_email<2',
- 'numpy==1.16.6', # 1.22.3
- 'pandas==0.18.1', # 0.22.0, 0.25.3, 1.4.2
- 'pathlib2<3',
+ 'numpy>=1.16.6', # 1.22.3
+ 'pandas', # 0.22.0, 0.25.3, 1.4.2
+ 'pathlib',
# Data formatting
- 'openpyxl>=2.4.2,<3',
- 'xlrd==0.9.3', # 0.9.4, 1.2.0, 2.0.1
+ 'openpyxl',
+ 'xlrd3',
'XlsxWriter==0.9.3', # 1.4.5, 2.0.0, 3.0.3
# Data conversion
@@ -215,8 +215,6 @@
extras_require={
'test': test_requires,
},
- dependency_links=[
- ],
entry_points={
'paste.app_factory': [
diff --git a/tests/__init__.py b/tests/__init__.py
index b06494c4..30067b0a 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -11,7 +11,7 @@ def suppress_warnings():
"""
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=DeprecationWarning)
- import pandas.util.nosetester
+ import numpy.testing
suppress_warnings()
diff --git a/tests/access/test_dpma_register.py b/tests/access/test_dpma_register.py
index ba2aa470..1180615e 100644
--- a/tests/access/test_dpma_register.py
+++ b/tests/access/test_dpma_register.py
@@ -37,10 +37,10 @@ def test_dpmaregister_url_de():
def test_dpmaregister_xml():
with F5WafWrapper():
xml = access_register("WO2008034638", output_format="xml")
- assert '' in xml
- assert "" in xml
- assert "" in xml
+ assert b'' in xml
+ assert b"" in xml
+ assert b"" in xml
def test_dpmaregister_json():
@@ -72,12 +72,12 @@ def test_dpmaregister_html_compact_de():
def test_dpmaregister_pdf_compact_en():
with F5WafWrapper():
pdf = access_register("EP666666", output_format="pdf")
- assert "File number 695 34 171.5" in pdf
- assert "Most recent update in DPMAregister on Jan 7, 2017" in pdf
+ assert b"File number 695 34 171.5" in pdf
+ assert b"Most recent update in DPMAregister on Jan 7, 2017" in pdf
def test_dpmaregister_pdf_compact_de():
with F5WafWrapper():
pdf = access_register("EP666666", output_format="pdf", language="de")
- assert "Aktenzeichen 695 34 171.5" in pdf
- assert "letzte Aktualisierung in DPMAregister am 07.01.2017" in pdf
+ assert b"Aktenzeichen 695 34 171.5" in pdf
+ assert b"letzte Aktualisierung in DPMAregister am 07.01.2017" in pdf
diff --git a/tests/access/test_epo_ops.py b/tests/access/test_epo_ops.py
index b30051e2..fead9461 100644
--- a/tests/access/test_epo_ops.py
+++ b/tests/access/test_epo_ops.py
@@ -50,7 +50,7 @@ def test_baseurl(app_request):
response = client._make_request(
OPS_BASE_URI, data={}, extra_headers={"Accept": "*"}, use_get=True,
)
- assert "EPO - Open Patent Services (OPS)" in response.content
+ assert b"EPO - Open Patent Services (OPS)" in response.content
def test_search_full_success(app_request):
@@ -74,15 +74,15 @@ def test_search_biblio_compact_success(app_request):
assert jpath('/0/pubdate', compact) == "1995-08-09"
assert jpath('/1/pubnumber', compact) == "EP0666667"
assert jpath('/1/pubdate', compact) == "1995-08-09"
- assert compact[0].keys() == compact[1].keys() == [
+ assert sorted(compact[0].keys()) == sorted(compact[1].keys()) == [
+ 'abstract',
'appdate',
'applicant',
- 'pubdate',
'appnumber',
- 'title',
- 'abstract',
- 'pubnumber',
'inventor',
+ 'pubdate',
+ 'pubnumber',
+ 'title',
]
@@ -140,7 +140,7 @@ def test_search_swap_family(app_request):
total_result_count = int(jpath('/ops:world-patent-data/ops:biblio-search/@total-result-count', results.data))
assert total_result_count == 2
- assert results.selected_numbers == [u'DE69534171T2', u'EP0666667A2']
+ assert results.selected_numbers == ['DE69534171T2', 'EP0666667A2']
def test_crawl(app_request):
@@ -188,13 +188,13 @@ def test_biblio_data_json_success(app_request):
assert len(documents) == 3
assert kindcodes == ["A2", "A3", "B1"]
assert attributes == [
- u'@country',
- u'@doc-number',
- u'@family-id',
- u'@kind',
- u'@system',
- u'abstract',
- u'bibliographic-data',
+ '@country',
+ '@doc-number',
+ '@family-id',
+ '@kind',
+ '@system',
+ 'abstract',
+ 'bibliographic-data',
]
@@ -218,7 +218,7 @@ def test_biblio_data_xml_success(app_request):
Proof getting bibliographic for a specific document in XML format works.
"""
results = get_ops_biblio_data("publication", "EP0666666", xml=True)
- assert results.startswith('')
+ assert results.startswith(b'')
def test_document_kindcodes_success(app_request):
@@ -275,31 +275,31 @@ def test_family_members(app_request):
pubnumbers = sorted([item["publication"]["number-docdb"] for item in members.items])
assert appnumbers == [
- u'CA2142029A',
- u'CA2142029A',
- u'DE69534171T',
- u'DE69534171T',
- u'EP95480005A',
- u'EP95480005A',
- u'EP95480005A',
- u'JP29020894A',
- u'JP29020894A',
- u'US19288494A',
- u'US47157195A',
+ 'CA2142029A',
+ 'CA2142029A',
+ 'DE69534171T',
+ 'DE69534171T',
+ 'EP95480005A',
+ 'EP95480005A',
+ 'EP95480005A',
+ 'JP29020894A',
+ 'JP29020894A',
+ 'US19288494A',
+ 'US47157195A',
]
assert pubnumbers == [
- u'CA2142029A1',
- u'CA2142029C',
- u'DE69534171D1',
- u'DE69534171T2',
- u'EP0666666A2',
- u'EP0666666A3',
- u'EP0666666B1',
- u'JP2613027B2',
- u'JPH07231328A',
- u'US5467352A',
- u'US5572526A',
+ 'CA2142029A1',
+ 'CA2142029C',
+ 'DE69534171D1',
+ 'DE69534171T2',
+ 'EP0666666A2',
+ 'EP0666666A3',
+ 'EP0666666B1',
+ 'JP2613027B2',
+ 'JPH07231328A',
+ 'US5467352A',
+ 'US5572526A',
]
@@ -435,8 +435,8 @@ def test_description_xml_success(app_request):
Acquire full text "description" in XML format.
"""
data = ops_description("EP666666A2", xml=True)
- assert data.startswith('')
- assert "The present invention generally relates to multi-node communication systems with shared resources." in data
+ assert data.startswith(b'')
+ assert b"The present invention generally relates to multi-node communication systems with shared resources." in data
def test_description_failure(app_request):
@@ -485,8 +485,8 @@ def test_claims_xml_success(app_request):
Acquire full text "claims" in XML format.
"""
data = ops_claims("EP666666A2", xml=True)
- assert data.startswith('')
- assert "1. In a communication system having a plurality of nodes" in data
+ assert data.startswith(b'')
+ assert b"1. In a communication system having a plurality of nodes" in data
def test_claims_failure(app_request):
@@ -531,7 +531,7 @@ def test_family_docdb_xml_success(app_request):
document_number="EP0666666A2",
constituents="biblio",
)
- assert response.startswith('')
+ assert response.startswith(b'')
def test_family_docdb_xml_not_found_failure(app_request):
@@ -558,7 +558,7 @@ def test_register_json_success(app_request):
def test_register_xml_success(app_request):
response = ops_register(reference_type="publication", document_number="EP0666666A2", xml=True)
- assert response.startswith('')
+ assert response.startswith(b'')
def test_register_not_found_failure(app_request):
@@ -573,4 +573,4 @@ def test_register_not_found_failure(app_request):
def test_service_usage(app_request):
response = ops_service_usage("01/01/2022", "02/01/2022")
- assert response.keys() == ["response-size", "time-range", "message-count"]
+ assert sorted(response.keys()) == ["message-count", "response-size", "time-range"]
diff --git a/tests/access/test_uspto.py b/tests/access/test_uspto.py
index f3503a3b..8852600d 100644
--- a/tests/access/test_uspto.py
+++ b/tests/access/test_uspto.py
@@ -6,7 +6,7 @@
import re
import pytest
-from bunch import Bunch
+from munch import Munch
from pyramid.httpexceptions import HTTPNotFound
from patzilla.access.uspto.image import fetch_first_drawing
@@ -161,9 +161,9 @@ def test_fetch_url_failure():
def test_get_reference_type_valid():
- assert get_reference_type(Bunch(number="2022110447")) == UsptoPdfReferenceType.APPLICATION
- assert get_reference_type(Bunch(number="2548918")) == UsptoPdfReferenceType.PUBLICATION
- assert get_reference_type(Bunch(number=1)) == UsptoPdfReferenceType.PUBLICATION
+ assert get_reference_type(Munch(number="2022110447")) == UsptoPdfReferenceType.APPLICATION
+ assert get_reference_type(Munch(number="2548918")) == UsptoPdfReferenceType.PUBLICATION
+ assert get_reference_type(Munch(number=1)) == UsptoPdfReferenceType.PUBLICATION
def test_get_reference_type_invalid():
@@ -172,9 +172,9 @@ def test_get_reference_type_invalid():
assert ex.match(re.escape("Unknown document reference type: None"))
with pytest.raises(ValueError) as ex:
- get_reference_type(Bunch())
+ get_reference_type(Munch())
assert ex.match(re.escape("Unknown document reference type:"))
with pytest.raises(ValueError) as ex:
- get_reference_type(Bunch(number=None))
+ get_reference_type(Munch(number=None))
assert ex.match(re.escape("Unknown document reference type:"))
diff --git a/tests/commands/test_commands_ops.py b/tests/commands/test_commands_ops.py
index c2830056..a6b63f4b 100644
--- a/tests/commands/test_commands_ops.py
+++ b/tests/commands/test_commands_ops.py
@@ -76,8 +76,8 @@ def test_command_ops_image_fulldocument_pdf_success():
result = runner.invoke(cli, "ops image --document=EP0666666B1 --page=1", catch_exceptions=False)
assert result.exit_code == 0
- assert result.stdout.startswith("%PDF-1.4")
- assert 30000 < len(result.stdout) < 50000
+ assert result.stdout_bytes.startswith(b"%PDF-1.4")
+ assert 30_000 < len(result.stdout_bytes) < 150_000
def test_command_ops_image_fulldocument_tiff_success():
@@ -89,7 +89,7 @@ def test_command_ops_image_fulldocument_tiff_success():
result = runner.invoke(cli, "ops image --document=EP0666666B1 --page=1 --format=tiff", catch_exceptions=False)
assert result.exit_code == 0
- assert result.stdout.startswith(b"\x4d\x4d\x00\x2a")
+ assert result.stdout_bytes.startswith(b"\x4d\x4d\x00\x2a")
def test_command_ops_image_drawing_pdf_success():
@@ -101,8 +101,8 @@ def test_command_ops_image_drawing_pdf_success():
result = runner.invoke(cli, "ops image --document=EP0666666B1 --kind=FullDocumentDrawing --page=1", catch_exceptions=False)
assert result.exit_code == 0
- assert result.stdout.startswith("%PDF-1.4")
- assert 10000 < len(result.stdout) < 20000
+ assert result.stdout_bytes.startswith(b"%PDF-1.4")
+ assert 10_000 < len(result.stdout_bytes) < 20_000
def test_command_ops_image_failure():
diff --git a/tests/test_numberlists.py b/tests/test_numberlists.py
index 860eb35a..63096e49 100644
--- a/tests/test_numberlists.py
+++ b/tests/test_numberlists.py
@@ -5,26 +5,26 @@ def test_parse_numberlist():
"""
Proof that conveniently parsing a list of items works.
"""
- assert parse_numberlist(u"foo , bar") == [u'foo', u'bar']
- assert parse_numberlist(u"foo \n bar") == [u'foo', u'bar']
+ assert parse_numberlist("foo , bar") == ['foo', 'bar']
+ assert parse_numberlist("foo \n bar") == ['foo', 'bar']
def test_normalize_numbers_valid():
"""
Normalize a list of valid patent numbers.
"""
- assert normalize_numbers([u'EP666666B1', u'EP1000000']) == {'all': [u'EP0666666B1', u'EP1000000'], 'invalid': [], 'valid': [u'EP0666666B1', u'EP1000000']}
+ assert normalize_numbers(['EP666666B1', 'EP1000000']) == {'all': ['EP0666666B1', 'EP1000000'], 'invalid': [], 'valid': ['EP0666666B1', 'EP1000000']}
def test_normalize_numbers_invalid():
"""
Normalize a list of invalid patent numbers.
"""
- assert normalize_numbers([u'foo', u'bar']) == {'all': [u'foo', u'bar'], 'invalid': [u'foo', u'bar'], 'valid': []}
+ assert normalize_numbers(['foo', 'bar']) == {'all': ['foo', 'bar'], 'invalid': ['foo', 'bar'], 'valid': []}
def test_normalize_numbers_mixed():
"""
Normalize a list of both valid and invalid patent numbers.
"""
- assert normalize_numbers([u'EP666666B1', u'foobar']) == {'all': [u'EP0666666B1', u'foobar'], 'invalid': [u'foobar'], 'valid': [u'EP0666666B1']}
+ assert normalize_numbers(['EP666666B1', 'foobar']) == {'all': ['EP0666666B1', 'foobar'], 'invalid': ['foobar'], 'valid': ['EP0666666B1']}
diff --git a/tests/util/test_jwt.py b/tests/util/test_jwt.py
index 9fb7f71e..c7204585 100644
--- a/tests/util/test_jwt.py
+++ b/tests/util/test_jwt.py
@@ -59,7 +59,7 @@ def test_signer_sign_invalid_expiration(jwt_signer):
"""
with pytest.raises(ValueError) as ex:
jwt_signer.sign("foo", ttl="bar")
- assert ex.match("value=bar, type= is an invalid JWT expiration date")
+ assert ex.match("value=bar, type= is an invalid JWT expiration date, use `datetime.datetime` or `datetime.timedelta")
def test_signer_unsign_expired_token():
@@ -77,7 +77,7 @@ def test_signer_unsign_expired_token():
'location': 'JSON Web Token',
'name': '_JWTError',
'jwt_expiry': 1640995200,
- 'jwt_header': {u'alg': u'RS256', u'typ': u'JWT'},
+ 'jwt_header': {'alg': 'RS256', 'typ': 'JWT'},
}
@@ -117,8 +117,8 @@ def test_signer_unsign_invalid_payload(jwt_signer):
assert value == {
'location': 'JSON Web Token',
- 'jwt_header': {u'alg': u'RS256', u'typ': u'JWT'},
+ 'jwt_header': {'alg': 'RS256', 'typ': 'JWT'},
'description': 'No "data" attribute in payload/claims',
'name': 'JwtSigner',
- 'jwt_payload': {u'foo': u'bar', u'exp': 2145916800},
+ 'jwt_payload': {'foo': 'bar', 'exp': 2145916800},
}
diff --git a/tests/util/test_numbers_common.py b/tests/util/test_numbers_common.py
index a6ebb516..1233a8e7 100644
--- a/tests/util/test_numbers_common.py
+++ b/tests/util/test_numbers_common.py
@@ -27,11 +27,11 @@ def generate(data):
class TestNumberDecoding:
- @pytest.mark.parametrize("number,expected,computed", generate(good), ids=good.keys())
+ @pytest.mark.parametrize("number,expected,computed", generate(good), ids=list(good.keys()))
def testDecodeOK(self, number, expected, computed):
self.check_ok(number, expected, computed)
- @pytest.mark.parametrize("number,expected,computed", generate(bad), ids=bad.keys())
+ @pytest.mark.parametrize("number,expected,computed", generate(bad), ids=list(bad.keys()))
def testDecodeBAD(self, number, expected, computed):
self.check_ok(number, expected, computed)
diff --git a/tests/util/test_numbers_helper.py b/tests/util/test_numbers_helper.py
index 989ccaf7..c3ecb799 100644
--- a/tests/util/test_numbers_helper.py
+++ b/tests/util/test_numbers_helper.py
@@ -18,6 +18,6 @@ def test_read_numbersfile():
"""
# TODO: Need to adjust for Python 3, see https://stackoverflow.com/a/34677735.
- with patch("__builtin__.open", mock_open(read_data=data)) as mock_file:
+ with patch("builtins.open", mock_open(read_data=data)) as mock_file:
numbers = read_numbersfile(None)
assert numbers == ['EP666666', 'EP666667', 'EP666668', 'EP666669']
diff --git a/tests/util/test_numbers_normalize.py b/tests/util/test_numbers_normalize.py
index 6930587e..2fe9e69b 100644
--- a/tests/util/test_numbers_normalize.py
+++ b/tests/util/test_numbers_normalize.py
@@ -595,11 +595,11 @@ def normalize_patent_us_smart(input):
class TestNumberNormalization:
- @pytest.mark.parametrize("number,expected,computed", generate(t, fun=partial(normalize_patent, fix_kindcode=True, for_ops=True)), ids=t.keys())
+ @pytest.mark.parametrize("number,expected,computed", generate(t, fun=partial(normalize_patent, fix_kindcode=True, for_ops=True)), ids=list(t.keys()))
def testDecodeOK(self, number, expected, computed):
self.check_ok(number, expected, computed)
- @pytest.mark.parametrize("number,expected,computed", generate(depatisconnect_cases, fun=partial(depatisconnect_alternatives)), ids=depatisconnect_cases.keys())
+ @pytest.mark.parametrize("number,expected,computed", generate(depatisconnect_cases, fun=partial(depatisconnect_alternatives)), ids=list(depatisconnect_cases.keys()))
def test_depatisconnect_alternatives(self, number, expected, computed):
self.check_ok(number, expected, computed)
diff --git a/tests/util/test_python.py b/tests/util/test_python.py
index ad8638a2..d3ce8955 100644
--- a/tests/util/test_python.py
+++ b/tests/util/test_python.py
@@ -8,11 +8,11 @@
def test_run_command_success_basic():
- assert run_command(["echo", "foo"]).read().strip() == "foo"
+ assert run_command(["echo", "foo"]).read().strip() == b"foo"
def test_run_command_success_input():
- assert run_command(["cat"], input="foo").read().strip() == "foo"
+ assert run_command(["cat"], input=b"foo").read().strip() == b"foo"
def test_run_command_failure_not_found():
@@ -29,8 +29,8 @@ def test_run_command_failure_program_error():
def test_run_command_failure_input_error():
with pytest.raises(RuntimeError) as ex:
- run_command(["true"], input={"abc": "def"})
- assert ex.match('Command "true" failed, returncode=None, exception=unhashable type, stderr=')
+ run_command(["true"], input={b"abc": b"def"})
+ assert ex.match('Command "true" failed, returncode=None, exception=memoryview: a bytes-like object is required, not \'dict\', stderr=')
def test_memoize():
@@ -49,4 +49,4 @@ def test_exception_traceback(capsys):
output = exception_traceback()
assert "Traceback (most recent call last)" in output
- assert "NameError: global name 'foobar' is not defined" in output
+ assert "NameError: name \'foobar\' is not defined" in output
diff --git a/tests/util/test_text_format.py b/tests/util/test_text_format.py
index e0174517..a9680957 100644
--- a/tests/util/test_text_format.py
+++ b/tests/util/test_text_format.py
@@ -4,14 +4,14 @@
def test_slugify():
- assert slugify("Franz jagt Trueffel.") == "franz-jagt-trueffel"
- assert slugify(u"Franz jagt Trüffel -=- im Wald. 👋") == "franz-jagt-truffel-im-wald"
- assert slugify(u"Franz jagt Trüffel -=- im Wald. 👋", strip_equals=False) == "franz-jagt-truffel-=-im-wald"
- assert slugify(u"Franz jagt Trüffel -=- im Wald. 👋", lowercase=False) == "Franz-jagt-Truffel-im-Wald"
+ assert slugify("Franz jagt Trueffel.") == b"franz-jagt-trueffel"
+ assert slugify("Franz jagt Trüffel -=- im Wald. 👋") == b"franz-jagt-truffel-im-wald"
+ assert slugify("Franz jagt Trüffel -=- im Wald. 👋", strip_equals=False) == b"franz-jagt-truffel-=-im-wald"
+ assert slugify("Franz jagt Trüffel -=- im Wald. 👋", lowercase=False) == b"Franz-jagt-Truffel-im-Wald"
def test_text_indent():
- assert text_indent(u"Franz jagt Trüffel.\nIm Wald.\n\n👋") == u"""
+ assert text_indent("Franz jagt Trüffel.\nIm Wald.\n\n👋") == """
Franz jagt Trüffel.
Im Wald.