diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 6afe2da0..b6d9824a 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -26,7 +26,7 @@ jobs: # Don't use macOS for now, it is currently unstable, otherwise slow. -- 2022-04-19, amo # 'macos-latest', ] - python-version: ['2.7'] + python-version: ['3.8'] defaults: run: @@ -103,7 +103,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v4 with: - python-version: '2.7' + python-version: '3.8' architecture: 'x64' cache: 'pip' cache-dependency-path: 'requirements-docs.txt' diff --git a/CHANGES.rst b/CHANGES.rst index 349304fb..06c14c37 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -142,6 +142,8 @@ Development - [mw] Improve settings for having per-vendor OPS credentials - [ui] More flexbox for header layout - [ui] Improve comment editing usability +- [mw] No need to manually encode form fields with "mechanize" anymore. + Thanks, `Kovid `_! 2019-05-08 0.165.0 diff --git a/Makefile b/Makefile index 7950e4e1..821b7e77 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ #VERSION := $(shell cat patzilla/version.py | awk '{ print $$3 }' | tr -d "'") #$(error VERSION=$(VERSION)) -$(eval venvpath := .venv2) +$(eval venvpath := .venv3) $(eval pip := $(venvpath)/bin/pip) $(eval twine := $(venvpath)/bin/twine) $(eval python := $(venvpath)/bin/python) @@ -9,10 +9,8 @@ $(eval pserve := $(venvpath)/bin/pserve) $(eval pytest := $(venvpath)/bin/pytest) $(eval bumpversion := $(venvpath)/bin/bumpversion) $(eval fab := $(venvpath)/bin/fab) - -$(eval venv3path := .venv) -$(eval yarn := $(venv3path)/bin/yarn) -$(eval npx := $(venv3path)/bin/npx) +$(eval yarn := $(venvpath)/bin/yarn) +$(eval npx := $(venvpath)/bin/npx) setup: setup-py @@ -65,7 +63,7 @@ upload-pypi: # Setup Python virtualenv. setup-virtualenv: - @test -e $(python) || virtualenv --python=python2 $(venvpath) + @test -e $(python) || virtualenv --python=python3 $(venvpath) setup-py: setup-virtualenv $(pip) install --editable=.[test] diff --git a/docs/conf.py b/docs/conf.py index 3a19a2d2..5569cd62 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -49,18 +49,18 @@ master_doc = 'index' # General information about the project. -project = u'PatZilla' -copyright = u'2013-2022, The PatZilla authors' -author = u'The PatZilla authors' +project = 'PatZilla' +copyright = '2013-2023, The PatZilla authors' +author = 'The PatZilla authors' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. -version = u'0.169.3' +version = '0.169.3' # The full version, including alpha/beta/rc tags. -release = u'0.169.3' +release = '0.169.3' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -145,8 +145,8 @@ # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - (master_doc, 'PatZilla.tex', u'PatZilla Documentation', - u'The PatZilla authors', 'manual'), + (master_doc, 'PatZilla.tex', 'PatZilla Documentation', + 'The PatZilla authors', 'manual'), ] @@ -155,7 +155,7 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ - (master_doc, 'patzilla', u'PatZilla Documentation', + (master_doc, 'patzilla', 'PatZilla Documentation', [author], 1) ] @@ -166,7 +166,7 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - (master_doc, 'PatZilla', u'PatZilla Documentation', + (master_doc, 'PatZilla', 'PatZilla Documentation', author, 'PatZilla', 'One line description of project.', 'Miscellaneous'), ] diff --git a/fabfile.py b/fabfile.py index d0c5d1f5..377b1400 100644 --- a/fabfile.py +++ b/fabfile.py @@ -34,7 +34,7 @@ def install(version, target): if not version: version = pkg_version - print 'Installing package {0}, version {1} to target {2}.'.format(*map(yellow, [pkg_name, version, target])) + print('Installing package {0}, version {1} to target {2}.'.format(*list(map(yellow, [pkg_name, version, target])))) if env.confirm: response = ask('Proceed (y/n)? ', ('y', 'n')) else: @@ -72,7 +72,7 @@ def install(version, target): restart_service(target) else: - print yellow('Skipped package install due to user request.') + print(yellow('Skipped package install due to user request.')) def setup_package(package, virtualenv, options=''): #--index-url=http://c.pypi.python.org/simple @@ -100,7 +100,7 @@ def restart_service(target): if uwsgi_name: run('service uwsgi reload %s' % uwsgi_name) else: - print(red('WARNING: Could not restart service "%s"' % target)) + print((red('WARNING: Could not restart service "%s"' % target))) @task @hosts(INSTALLATION_HOST) diff --git a/patzilla/access/cipo/drawing.py b/patzilla/access/cipo/drawing.py index f15d59c4..3eb22f75 100644 --- a/patzilla/access/cipo/drawing.py +++ b/patzilla/access/cipo/drawing.py @@ -3,7 +3,7 @@ import re import logging import requests -from BeautifulSoup import BeautifulSoup +from bs4 import BeautifulSoup from patzilla.util.numbers.common import split_patent_number log = logging.getLogger(__name__) @@ -70,7 +70,7 @@ def get_first_drawing_url(patent): images_index_html = fetch_images_index(images_index_url) soup = BeautifulSoup(images_index_html) # Canadian Patent Document 141597. Drawings page. Image 1 of 3 - first_drawing_url = cipo_baseurl + soup.find('img', src=re.compile(ur'/opic-cipo/cpd/page'))['src'] + first_drawing_url = cipo_baseurl + soup.find('img', src=re.compile(r'/opic-cipo/cpd/page'))['src'] return first_drawing_url @@ -83,6 +83,6 @@ def get_first_drawing_url(patent): payload = fetch_first_drawing(split_patent_number(number)) if payload: #print "payload length:", len(payload) - print payload + print(payload) else: - print "not found" + print("not found") diff --git a/patzilla/access/depatech/client.py b/patzilla/access/depatech/client.py index 652bd6e1..011edff6 100644 --- a/patzilla/access/depatech/client.py +++ b/patzilla/access/depatech/client.py @@ -13,7 +13,7 @@ from patzilla.access.depatech import get_depatech_client from patzilla.access.generic.exceptions import NoResultsException, GenericAdapterException, SearchException from patzilla.access.generic.search import GenericSearchResponse, GenericSearchClient -from patzilla.util.data.container import SmartBunch +from patzilla.util.data.container import SmartMunch from patzilla.util.numbers.normalize import normalize_patent log = logging.getLogger(__name__) @@ -55,7 +55,7 @@ def search(self, query, options=None): return self.search_real(query, options=options) def search_real(self, query, options=None): - options = options or SmartBunch() + options = options or SmartMunch() options.setdefault('offset', 0) options.setdefault('limit', self.pagesize) @@ -73,7 +73,7 @@ def search_real(self, query, options=None): transport = 'json' query.expression = self.translate_deparom_query(query.expression) - log.info(u"{backend_name}: searching documents, expression='{0}', offset={1}, limit={2}; user={username}".format( + log.info("{backend_name}: searching documents, expression='{0}', offset={1}, limit={2}; user={username}".format( query.expression, offset, limit, **self.__dict__)) starttime = timeit.default_timer() @@ -92,7 +92,7 @@ def search_real(self, query, options=None): 'from': offset, 'size': limit, } - log.info(u'{backend_name}: query={query}, uri={uri}, params={params}, options={options}'.format( + log.info('{backend_name}: query={query}, uri={uri}, params={params}, options={options}'.format( query=query, uri=uri, params=params, options=options.dump(), backend_name=self.backend_name)) # Perform search request @@ -164,10 +164,10 @@ def search_real(self, query, options=None): if 'reason' not in upstream_error: upstream_error['reason'] = 'Reason unknown' - message = u'Response status code: {code}\n\n{reason}'.format(**upstream_error) + message = 'Response status code: {code}\n\n{reason}'.format(**upstream_error) raise self.search_failed( - user_info=u'Error searching depa.tech.', + user_info='Error searching depa.tech.', message=message, response=response) @@ -180,7 +180,7 @@ def translate_deparom_query(self, expression): expression = expression.replace(upstream_prefix, '').replace('deparom:', '') - log.info(u'{backend_name}: Translate DEPAROM query expression={expression}, uri={uri}'.format( + log.info('{backend_name}: Translate DEPAROM query expression={expression}, uri={uri}'.format( expression=expression, uri=uri, backend_name=self.backend_name)) expression = upstream_prefix + expression @@ -212,7 +212,7 @@ def translate_deparom_query(self, expression): elif response.status_code >= 400: - message = u'Reason unknown' + message = 'Reason unknown' if response.headers.get('Content-Type', '').startswith('application/json'): @@ -224,15 +224,15 @@ def translate_deparom_query(self, expression): upstream_error['code'] = response_data['status'] if 'reason' not in upstream_error: - upstream_error['reason'] = u'Reason unknown' + upstream_error['reason'] = 'Reason unknown' - message = u'Response status code: {code}\n\n{reason}'.format(**upstream_error) + message = 'Response status code: {code}\n\n{reason}'.format(**upstream_error) else: message = response.content raise self.search_failed( - user_info=u'Translating DEPAROM query expression failed', + user_info='Translating DEPAROM query expression failed', message=message, response=response) @@ -298,8 +298,8 @@ def read(self): 'name': 'depatech', 'time': self.input['took'], 'status': 'success', - #'params': SmartBunch.bunchify(self.input['content']['responseHeader']['params']), - #'pager': SmartBunch.bunchify(self.input['content']['responseHeader'].get('pager', {})), + #'params': SmartMunch.munchify(self.input['content']['responseHeader']['params']), + #'pager': SmartMunch.munchify(self.input['content']['responseHeader'].get('pager', {})), }) self.meta.navigator.count_total = int(self.input['hits']['total']) @@ -307,14 +307,14 @@ def read(self): self.meta.navigator.offset = int(self.options.offset) self.meta.navigator.limit = int(self.options.limit) self.meta.navigator.max_hits = int(self.options.max_hits) - self.meta.navigator.postprocess = SmartBunch() + self.meta.navigator.postprocess = SmartMunch() # Read content self.documents = self.input['hits']['hits'] self.read_documents() def document_to_number(self, document): - _id = document[u'_id'] + _id = document['_id'] cc, docno, kindcode = _id.split('.') publication_number = cc + docno + kindcode number = normalize_patent(publication_number) @@ -326,7 +326,7 @@ def document_to_family_id(self, document): def depatech_search(query, options=None): - options = options or SmartBunch() + options = options or SmartMunch() client = get_depatech_client() try: diff --git a/patzilla/access/depatech/clientpool.py b/patzilla/access/depatech/clientpool.py index 223d094d..62599608 100644 --- a/patzilla/access/depatech/clientpool.py +++ b/patzilla/access/depatech/clientpool.py @@ -3,8 +3,8 @@ import logging import os from pyramid.httpexceptions import HTTPUnauthorized -from zope.interface.declarations import implements from zope.interface.interface import Interface +from zope.interface import implementer from patzilla.access.depatech.client import DepaTechClient from patzilla.access.generic.credentials import AbstractCredentialsGetter, DatasourceCredentialsManager @@ -43,6 +43,8 @@ def from_settings(datasource_settings): @staticmethod def from_environment(): + if not os.environ["DEPATECH_API_USERNAME"] or not os.environ["DEPATECH_API_PASSWORD"]: + raise KeyError("DEPATECH_API_USERNAME or DEPATECH_API_PASSWORD is empty") return { "api_username": os.environ["DEPATECH_API_USERNAME"], "api_password": os.environ["DEPATECH_API_PASSWORD"], @@ -78,13 +80,12 @@ class IDepaTechClientPool(Interface): pass +@implementer(IDepaTechClientPool) class DepaTechClientPool(object): """ depa.tech client pool as Pyramid utility implementation. """ - implements(IDepaTechClientPool) - def __init__(self, api_uri): logger.info("Creating upstream client pool for depa.tech") self.api_uri = api_uri diff --git a/patzilla/access/depatech/expression.py b/patzilla/access/depatech/expression.py index ef6cebde..c6cd8491 100644 --- a/patzilla/access/depatech/expression.py +++ b/patzilla/access/depatech/expression.py @@ -21,7 +21,7 @@ class DepaTechGrammar(CQLGrammar): def preconfigure(self): CQLGrammar.preconfigure(self) - self.cmp_single = u':'.split() + self.cmp_single = ':'.split() class DepaTechParser(object): @@ -161,7 +161,7 @@ def pair_to_elasticsearch(cls, key, value, modifiers=None): return expression = None - format = u'{0}:{1}' + format = '{0}:{1}' # ------------------------------------------ @@ -184,20 +184,20 @@ def pair_to_elasticsearch(cls, key, value, modifiers=None): patent = patent_normalized if patent: - subexpression = u'PC:{country} AND DE:{number}'.format(**patent) + subexpression = 'PC:{country} AND DE:{number}'.format(**patent) if patent['kind']: - subexpression += u' AND KI:{kind}'.format(**patent) - expression_parts.append(u'({})'.format(subexpression)) + subexpression += ' AND KI:{kind}'.format(**patent) + expression_parts.append('({})'.format(subexpression)) # Application number - subexpression = u'AN:{}'.format(value) + subexpression = 'AN:{}'.format(value) expression_parts.append(subexpression) - expression = u' OR '.join(expression_parts) + expression = ' OR '.join(expression_parts) # Priority number - subexpression = u'NP:{}'.format(value) + subexpression = 'NP:{}'.format(value) expression_parts.append(subexpression) - expression = u' OR '.join(expression_parts) + expression = ' OR '.join(expression_parts) elif key == 'pubdate': @@ -212,7 +212,7 @@ def pair_to_elasticsearch(cls, key, value, modifiers=None): # e.g. 1991 if len(value) == 4 and value.isdigit(): - value = u'within {}0101,{}1231'.format(value, value) + value = 'within {}0101,{}1231'.format(value, value) # e.g. 1990-2014, 1990 - 2014 value = year_range_to_within(value) @@ -249,12 +249,12 @@ def pair_to_elasticsearch(cls, key, value, modifiers=None): except Exception as ex: message = 'depatech query: Invalid date or range expression "{0}". Reason: {1}.'.format(value, ex) - logger.warn(message + ' Exception was: {0}'.format(_exception_traceback())) + logger.warning(message + ' Exception was: {0}'.format(_exception_traceback())) return {'error': True, 'message': message} elif key == 'inventor' or key == 'applicant': if not has_booleans(value) and should_be_quoted(value): - value = u'"{0}"'.format(value) + value = '"{0}"'.format(value) elif key == 'class': @@ -268,7 +268,7 @@ def pair_to_elasticsearch(cls, key, value, modifiers=None): # Put value into parenthesis, to properly capture expressions if value: - value = u'({value})'.format(value=value) + value = '({value})'.format(value=value) # Parse value as simple query expression query_object = CQL(cql=value) @@ -290,7 +290,7 @@ def pair_to_elasticsearch(cls, key, value, modifiers=None): # ------------------------------------------ if key in ['fulltext', 'inventor', 'applicant', 'country', 'citation']: if has_booleans(value) and not should_be_quoted(value): - value = u'({0})'.format(value) + value = '({0})'.format(value) # ------------------------------------------ # expression formatter @@ -358,15 +358,15 @@ def triple_callback(token, index, binop, term): def format_expression(format, fieldname, value): expression = None - if type(fieldname) in types.StringTypes: + if type(fieldname) in (str,): expression = format.format(fieldname, value) - elif type(fieldname) is types.ListType: + elif type(fieldname) is list: subexpressions = [] for fieldname in fieldname: subexpressions.append(format.format(fieldname, value)) expression = ' or '.join(subexpressions) # surround with parentheses - expression = u'({0})'.format(expression) + expression = '({0})'.format(expression) return expression def lucene_convert_class(value): @@ -395,4 +395,4 @@ def should_be_quoted(value): if __name__ == '__main__': - print DepaTechParser('IC:G01F000184').keywords + print(DepaTechParser('IC:G01F000184').keywords) diff --git a/patzilla/access/depatech/expression.rst b/patzilla/access/depatech/expression.rst index eed14b55..78adcac3 100644 --- a/patzilla/access/depatech/expression.rst +++ b/patzilla/access/depatech/expression.rst @@ -20,30 +20,30 @@ Empty query IPC/CPC ======= >>> DepaTechParser('H01F7/00').dumps() -u'H01F7/00' +'H01F7/00' # Rewrite all patent classifications from depa.tech format to OPS format >>> DepaTechParser('IC:G01F000184').parse().rewrite_classes_ops().dumps() -u'IC : G01F1/84' +'IC : G01F1/84' >>> DepaTechParser('IC:G01F000184').keywords -[u'G01F1/84'] +['G01F1/84'] >>> DepaTechExpression.pair_to_elasticsearch('class', 'H04L12/433 or H04L12/24') -{'query': u'((IC:H04L0012433 OR NC:H04L0012433) OR (IC:H04L001224 OR NC:H04L001224))'} +{'query': '((IC:H04L0012433 OR NC:H04L0012433) OR (IC:H04L001224 OR NC:H04L001224))'} >>> DepaTechExpression.pair_to_elasticsearch('class', 'H01F7/00 or (H01F7/02 and H02K7/1876)') -{'query': u'((IC:H01F000700 OR NC:H01F000700) OR ((IC:H01F000702 OR NC:H01F000702) AND (IC:H02K00071876 OR NC:H02K00071876)))'} +{'query': '((IC:H01F000700 OR NC:H01F000700) OR ((IC:H01F000702 OR NC:H01F000702) AND (IC:H02K00071876 OR NC:H02K00071876)))'} >>> DepaTechExpression.pair_to_elasticsearch('class', 'H01F7/00 not (H01F7/02 or H02K7/1876)') -{'query': u'((IC:H01F000700 OR NC:H01F000700) NOT ((IC:H01F000702 OR NC:H01F000702) OR (IC:H02K00071876 OR NC:H02K00071876)))'} +{'query': '((IC:H01F000700 OR NC:H01F000700) NOT ((IC:H01F000702 OR NC:H01F000702) OR (IC:H02K00071876 OR NC:H02K00071876)))'} Publication date ================ >>> DepaTechExpression.pair_to_elasticsearch('pubdate', 'foobar') -{'message': 'depatech query: Invalid date or range expression "foobar". Reason: foobar.', 'error': True} +{'error': True, 'message': 'depatech query: Invalid date or range expression "foobar". Reason: foobar.'} ********* @@ -54,39 +54,39 @@ Simple expressions ================== >>> DepaTechParser('GT:bildschirm').keywords -[u'bildschirm'] +['bildschirm'] >>> DepaTechExpression.pair_to_elasticsearch('fulltext', 'bildschirm') -{'query': u'(AB:bildschirm OR GT:bildschirm OR ET:bildschirm OR FT:bildschirm)'} +{'query': '(AB:bildschirm OR GT:bildschirm OR ET:bildschirm OR FT:bildschirm)'} >>> DepaTechParser('GT:bildschirm or AB:fahrzeug').keywords -[u'bildschirm', u'fahrzeug'] +['bildschirm', 'fahrzeug'] >>> DepaTechExpression.pair_to_elasticsearch('fulltext', 'bildschirm or fahrzeug') -{'query': u'(AB:(bildschirm OR fahrzeug) OR GT:(bildschirm OR fahrzeug) OR ET:(bildschirm OR fahrzeug) OR FT:(bildschirm OR fahrzeug))'} +{'query': '(AB:(bildschirm OR fahrzeug) OR GT:(bildschirm OR fahrzeug) OR ET:(bildschirm OR fahrzeug) OR FT:(bildschirm OR fahrzeug))'} >>> DepaTechParser('GT:bildschirm and AB:(fahrzeug or pkw)').keywords -[u'bildschirm', u'fahrzeug', u'pkw'] +['bildschirm', 'fahrzeug', 'pkw'] >>> DepaTechExpression.pair_to_elasticsearch('fulltext', 'bildschirm and (fahrzeug or pkw)') -{'query': u'(AB:(bildschirm AND (fahrzeug OR pkw)) OR GT:(bildschirm AND (fahrzeug OR pkw)) OR ET:(bildschirm AND (fahrzeug OR pkw)) OR FT:(bildschirm AND (fahrzeug OR pkw)))'} +{'query': '(AB:(bildschirm AND (fahrzeug OR pkw)) OR GT:(bildschirm AND (fahrzeug OR pkw)) OR ET:(bildschirm AND (fahrzeug OR pkw)) OR FT:(bildschirm AND (fahrzeug OR pkw)))'} >>> DepaTechParser('GT:bildschirm and AB:(fahrzeug or pkw not lkw)').keywords -[u'bildschirm', u'fahrzeug', u'pkw', u'lkw'] +['bildschirm', 'fahrzeug', 'pkw', 'lkw'] >>> DepaTechExpression.pair_to_elasticsearch('fulltext', 'bildschirm and (fahrzeug or pkw not lkw)') -{'query': u'(AB:(bildschirm AND (fahrzeug OR pkw NOT lkw)) OR GT:(bildschirm AND (fahrzeug OR pkw NOT lkw)) OR ET:(bildschirm AND (fahrzeug OR pkw NOT lkw)) OR FT:(bildschirm AND (fahrzeug OR pkw NOT lkw)))'} +{'query': '(AB:(bildschirm AND (fahrzeug OR pkw NOT lkw)) OR GT:(bildschirm AND (fahrzeug OR pkw NOT lkw)) OR ET:(bildschirm AND (fahrzeug OR pkw NOT lkw)) OR FT:(bildschirm AND (fahrzeug OR pkw NOT lkw)))'} >>> DepaTechParser('AB:fahrzeug or AB:pkw').keywords -[u'fahrzeug', u'pkw'] +['fahrzeug', 'pkw'] >>> DepaTechParser('AB:fahrzeug not GT:pkw').keywords -[u'fahrzeug', u'pkw'] +['fahrzeug', 'pkw'] @@ -97,17 +97,17 @@ Queries without proper fieldnames like AB:, GT:, AB:, etc. on the left side of t >>> DepaTechParser('bildschirm').dumps() -u'bildschirm' +'bildschirm' >>> DepaTechExpression.pair_to_elasticsearch('fulltext', 'bildschirm') -{'query': u'(AB:bildschirm OR GT:bildschirm OR ET:bildschirm OR FT:bildschirm)'} +{'query': '(AB:bildschirm OR GT:bildschirm OR ET:bildschirm OR FT:bildschirm)'} >>> DepaTechParser('bildschirm and fahrzeug').dumps() -u'bildschirm and fahrzeug' +'bildschirm and fahrzeug' >>> DepaTechExpression.pair_to_elasticsearch('fulltext', 'bildschirm and fahrzeug') -{'query': u'(AB:(bildschirm AND fahrzeug) OR GT:(bildschirm AND fahrzeug) OR ET:(bildschirm AND fahrzeug) OR FT:(bildschirm AND fahrzeug))'} +{'query': '(AB:(bildschirm AND fahrzeug) OR GT:(bildschirm AND fahrzeug) OR ET:(bildschirm AND fahrzeug) OR FT:(bildschirm AND fahrzeug))'} @@ -115,22 +115,22 @@ Expressions containing quoted words =================================== >>> DepaTechParser('"bildschirm"').dumps() -u'"bildschirm"' +'"bildschirm"' >>> DepaTechParser('"bildschirm"').keywords [] >>> DepaTechExpression.pair_to_elasticsearch('fulltext', '"bildschirm"') -{'query': u'(AB:"bildschirm" OR GT:"bildschirm" OR ET:"bildschirm" OR FT:"bildschirm")'} +{'query': '(AB:"bildschirm" OR GT:"bildschirm" OR ET:"bildschirm" OR FT:"bildschirm")'} >>> DepaTechParser('AB:"bildschirm"').dumps() -u'AB : "bildschirm"' +'AB : "bildschirm"' >>> DepaTechParser('AB:"bildschirm"').keywords -[u'bildschirm'] +['bildschirm'] >>> DepaTechParser('AB:(("aussto*" OR "eject*" OR pusher*) AND (verriegel* OR lock* OR sperr*))').keywords -[u'aussto', u'eject', u'pusher', u'verriegel', u'lock', u'sperr'] +['aussto', 'eject', 'pusher', 'verriegel', 'lock', 'sperr'] @@ -138,19 +138,19 @@ Keyword extraction ================== >>> DepaTechParser(DepaTechExpression.pair_to_elasticsearch('class', 'H01F7/00')['query']).keywords -[u'H01F7/00'] +['H01F7/00'] >>> DepaTechParser(DepaTechExpression.pair_to_elasticsearch('class', 'H01F7/00 not (H01F7/02 or H02K7/1876)')['query']).keywords -[u'H01F7/00', u'H01F7/02', u'H02K7/1876'] +['H01F7/00', 'H01F7/02', 'H02K7/1876'] >>> DepaTechParser(DepaTechExpression.pair_to_elasticsearch('fulltext', 'bildschirm')['query']).keywords -[u'bildschirm'] +['bildschirm'] >>> DepaTechParser(DepaTechExpression.pair_to_elasticsearch('fulltext', '"bildschirm"')['query']).keywords -[u'bildschirm'] +['bildschirm'] >>> DepaTechParser(DepaTechExpression.pair_to_elasticsearch('fulltext', 'GT:bildschirm OR AB:(fahrzeug OR pkw)')['query']).keywords -[u'bildschirm', u'fahrzeug', u'pkw'] +['bildschirm', 'fahrzeug', 'pkw'] @@ -160,18 +160,18 @@ From the wild Umlauts ------- ->>> DepaTechParser(u'AB:((*messschieber* OR *meßschieber*) AND *digital* )').dumps() -u'((AB : *messschieber* or AB : *me\xdfschieber*) and AB : *digital*)' +>>> DepaTechParser('AB:((*messschieber* OR *meßschieber*) AND *digital* )').dumps() +'((AB : *messschieber* or AB : *me\xdfschieber*) and AB : *digital*)' ->>> DepaTechParser(u'AB:((*messschieber* OR *meßschieber*) AND *digital* )').keywords -[u'messschieber', u'me\xdfschieber', u'digital'] +>>> DepaTechParser('AB:((*messschieber* OR *meßschieber*) AND *digital* )').keywords +['messschieber', 'me\xdfschieber', 'digital'] More ---- ->>> DepaTechParser(u'ET:(energy and water) or AB:(waves or Tide) and AB:"90°"').keywords -[u'energy', u'water', u'waves', u'Tide', u'90\xb0'] +>>> DepaTechParser('ET:(energy and water) or AB:(waves or Tide) and AB:"90°"').keywords +['energy', 'water', 'waves', 'Tide', '90\xb0'] ->>> DepaTechParser(u'AB:(((bremsgefühl* or pedalgefühl) and (*simulator or simul*)) and (separ* or getrennt* or entkoppel* or entkoppl* or decoupl*) and (eigenständig* or independent* or autonom*))').keywords -[u'bremsgef\xfchl', u'pedalgef\xfchl', u'simulator', u'simul', u'separ', u'getrennt', u'entkoppel', u'entkoppl', u'decoupl', u'eigenst\xe4ndig', u'independent', u'autonom'] +>>> DepaTechParser('AB:(((bremsgefühl* or pedalgefühl) and (*simulator or simul*)) and (separ* or getrennt* or entkoppel* or entkoppl* or decoupl*) and (eigenständig* or independent* or autonom*))').keywords +['bremsgef\xfchl', 'pedalgef\xfchl', 'simulator', 'simul', 'separ', 'getrennt', 'entkoppel', 'entkoppl', 'decoupl', 'eigenst\xe4ndig', 'independent', 'autonom'] diff --git a/patzilla/access/dpma/depatisconnect.py b/patzilla/access/dpma/depatisconnect.py index 0065bb25..0d42d769 100644 --- a/patzilla/access/dpma/depatisconnect.py +++ b/patzilla/access/dpma/depatisconnect.py @@ -4,9 +4,9 @@ import json import logging import requests -import xmlrpclib -from StringIO import StringIO -from ConfigParser import NoOptionError +import xmlrpc.client +from io import StringIO +from configparser import NoOptionError from lxml import etree as ET from lxml.builder import E from cornice.util import to_list @@ -72,7 +72,7 @@ def run_acquisition(document_number, doctypes=None): url = archive_service_baseurl + '/RPC2' transport = RequestsTransport(session=get_client(), timeout=(2, 17)) transport.use_https = use_https - server = xmlrpclib.ServerProxy(url, transport=transport) + server = xmlrpc.client.ServerProxy(url, transport=transport) return server.runAcquisition(numbers, doctypes) def fetch_xml(number): @@ -313,4 +313,4 @@ def depatisconnect_abstracts(document_number, language=None, invalidate=False): # Failed on 2018-04-23 #response = depatisconnect_claims('USD813591S') - print json.dumps(response) + print(json.dumps(response)) diff --git a/patzilla/access/dpma/depatisnet.py b/patzilla/access/dpma/depatisnet.py index 1b000fa3..4b0f42cc 100644 --- a/patzilla/access/dpma/depatisnet.py +++ b/patzilla/access/dpma/depatisnet.py @@ -1,15 +1,15 @@ # -*- coding: utf-8 -*- # (c) 2014-2015 Andreas Motl, Elmyra UG -import re import sys import json import types import logging -import urllib2 +import urllib.request, urllib.error, urllib.parse import mechanize -import cookielib -from BeautifulSoup import BeautifulSoup -from xlrd import open_workbook +import re +import http.cookiejar +from bs4 import BeautifulSoup +from xlrd3 import open_workbook from patzilla.access.generic.search import GenericSearchResponse from patzilla.util.date import from_german, date_iso from patzilla.util.network.browser import regular_user_agent @@ -44,7 +44,7 @@ class DpmaDepatisnetAccess: ] def __init__(self): - print 'DpmaDepatisnetAccess.__init__' + print('DpmaDepatisnetAccess.__init__') self.baseurl = 'https://depatisnet.dpma.de/DepatisNet' self.searchurl_cql = self.baseurl + '/depatisnet?action=experte&switchToLang=en' self.searchurl_ikofax = self.baseurl + '/depatisnet?action=ikofax&switchToLang=en' @@ -65,7 +65,7 @@ def setup_browser(self): # http://wwwsearch.sourceforge.net/mechanize/ # https://github.com/python-mechanize/mechanize self.browser = mechanize.Browser() - self.browser.set_cookiejar(cookielib.LWPCookieJar()) + self.browser.set_cookiejar(http.cookiejar.LWPCookieJar()) self.browser.addheaders = [('User-Agent', regular_user_agent)] # ignore robots.txt self.browser.set_handle_robots(False) @@ -85,7 +85,7 @@ def search_patents(self, query, options=None): limit = options.get('limit') max_hits = options.get('max_hits') - logger.info(u'Searching documents. query="%s", options=%s' % (query, options)) + logger.info('Searching documents. query="%s", options=%s' % (query, options)) # 0. create browser instance if not self.browser: @@ -97,7 +97,7 @@ def search_patents(self, query, options=None): search_url = self.searchurl_ikofax try: self.browser.open(search_url) - except urllib2.HTTPError as ex: + except urllib.error.HTTPError as ex: logger.critical('Hard error with DEPATISnet: {}'.format(ex)) self.logout() raise @@ -107,7 +107,7 @@ def search_patents(self, query, options=None): self.browser.select_form(nr=0) #self.browser.select_form(name='form') - self.browser['query'] = query.encode('iso-8859-1') + self.browser['query'] = query self.browser['hitsPerPage'] = [str(limit)] self.browser['maxHitsUser'] = [str(max_hits)] @@ -127,7 +127,7 @@ def search_patents(self, query, options=None): #self.browser['so'] = ['desc'] # sort by user selection - if 'sorting' in options and type(options['sorting']) is types.DictionaryType: + if 'sorting' in options and type(options['sorting']) is dict: self.browser['sf'] = [options['sorting']['field']] self.browser['so'] = [options['sorting']['order']] @@ -197,7 +197,7 @@ def search_patents(self, query, options=None): results = self.read_xls_response(xls_response) except Exception as ex: logger.error('Problem downloading results in XLS format: {}'.format(ex)) - ex.http_response = ex.read() + #ex.http_response = ex.read() raise # debugging @@ -225,22 +225,24 @@ def find_errors(self, body): 'otherwise don\'t hesitate to report this problem to us.') # Check for error messages - soup = BeautifulSoup(body) + soup = BeautifulSoup(body, 'lxml') error_message = soup.find('div', {'id': 'errormsg'}) if error_message: parts = [] [s.extract() for s in error_message('a')] [parts.append(s.extract()) for s in error_message('p', {'class': 'headline'})] reason = ', '.join([part.getText() for part in parts]) - error_message = u'{}\n{}'.format(reason, str(error_message)) + error_message = '{}\n{}'.format(reason, str(error_message)) else: error_message = '' - if u'An error has occurred' in body: - error_message = error_message.replace('\t', '').replace('\r\n', '\n').strip() + # Compute error message. + prefix = 'Upstream service: ' + if 'An error has occurred' in body: + error_message = prefix + error_message.replace('\t', '').replace('\r\n', '\n').strip() raise SyntaxError(error_message) - return error_message + return prefix + error_message def read_xls_response(self, xls_response): data = excel_to_dict(xls_response.read()) @@ -307,8 +309,8 @@ def read(self): # TODO: Reference from IFI CLAIMS, fill up/unify. #'time': self.input['time'], #'status': self.input['status'], - #'params': SmartBunch.bunchify(self.input['content']['responseHeader']['params']), - #'pager': SmartBunch.bunchify(self.input['content']['responseHeader'].get('pager', {})), + #'params': SmartMunch.munchify(self.input['content']['responseHeader']['params']), + #'pager': SmartMunch.munchify(self.input['content']['responseHeader'].get('pager', {})), }) self.meta.navigator.count_total = int(self.input['hits']) @@ -317,7 +319,7 @@ def read(self): # TODO: Fill up? #self.meta.navigator.offset = int(self.meta.upstream.Offset) #self.meta.navigator.limit = int(self.meta.upstream.Limit) - #self.meta.navigator.postprocess = SmartBunch() + #self.meta.navigator.postprocess = SmartMunch() # Propagate user message @@ -355,17 +357,17 @@ def excel_to_dict(payload): start_row = 0 # upstream added new status line to first row, e.g. "Search query: pn=(EP666666) Status: 25.09.2015" - if u'Search query' in sheet.cell(0, 0).value: + if 'Search query' in sheet.cell(0, 0).value: start_row = 1 # read header values - keys = [sheet.cell(start_row, col_index).value for col_index in xrange(sheet.ncols)] + keys = [sheet.cell(start_row, col_index).value for col_index in range(sheet.ncols)] # read sheet content dict_list = [] - for row_index in xrange(start_row + 1, sheet.nrows): + for row_index in range(start_row + 1, sheet.nrows): d = {keys[col_index]: sheet.cell(row_index, col_index).value - for col_index in xrange(sheet.ncols)} + for col_index in range(sheet.ncols)} dict_list.append(d) return dict_list @@ -390,4 +392,4 @@ def excel_to_dict(payload): else: data = depatisnet.search_patents('BI=bagger and PC=DE') - print json.dumps(data) + print(json.dumps(data)) diff --git a/patzilla/access/dpma/dpmaregister.py b/patzilla/access/dpma/dpmaregister.py index a662ef49..cd12451f 100644 --- a/patzilla/access/dpma/dpmaregister.py +++ b/patzilla/access/dpma/dpmaregister.py @@ -11,12 +11,12 @@ import operator import mechanicalsoup from beaker.cache import cache_region -from bunch import bunchify +from munch import munchify from docopt import docopt from pprint import pformat from jsonpointer import JsonPointer, JsonPointerException from xml.etree.ElementTree import fromstring -from BeautifulSoup import BeautifulSoup +from bs4 import BeautifulSoup from collections import namedtuple, OrderedDict from patzilla.access.dpma.util import dpma_file_number from patzilla.boot.cache import configure_cache_backend @@ -247,7 +247,7 @@ def search_patent(self, patent): # has to be adjusted. time.sleep(1.0) - if "/TSPD" in self.response.content: + if b"/TSPD" in self.response.content: raise ValueError("Site is protected by F5 Advanced WAF") # Debugging @@ -283,7 +283,7 @@ def search_patent(self, patent): return [entry] # Sanity checks - if "0 result/s" in response.content: + if b"0 result/s" in response.content: msg = 'No search results for "{}"'.format(patent) logger.warning(msg) raise NoResults(msg) @@ -311,7 +311,7 @@ def parse_reference_link(self, link, patent): msg = "Could not parse document reference from link '%s' (patent='%s')" % (link, patent) logger.error(msg) raise Exception(msg) - label = link.find(text=True) + label = link.find(string=True) return reference, label def fetch_reference(self, result, language): @@ -369,7 +369,7 @@ def html_compact(self): PDF-Download """ - soup = BeautifulSoup(self.html) + soup = BeautifulSoup(self.html, "lxml") soup_content = soup.find('table', {'id': 'verfahrensdaten_tabelle'}) @@ -528,13 +528,13 @@ def decode(self): self.decode_badgerfish() # Document numbers - self.application_reference = map( + self.application_reference = list(map( operator.itemgetter('document_id'), - self.convert_list(self.query_data(self.pointer_application_reference))) + self.convert_list(self.query_data(self.pointer_application_reference)))) - self.publication_reference = map( + self.publication_reference = list(map( operator.itemgetter('document_id'), - self.convert_list(self.query_data(self.pointer_publication_reference))) + self.convert_list(self.query_data(self.pointer_publication_reference)))) # Classifications self.classifications['ipcr'] = self.convert_list(self.query_data(self.pointer_classifications_ipcr)) @@ -565,9 +565,9 @@ def decode(self): self.designated_states = self.convert_list(self.query_data(self.pointer_designated_states)) # Citations - self.references_cited = map( + self.references_cited = list(map( operator.attrgetter('document_id.doc_number'), - bunchify(self.convert_list(self.query_data(self.pointer_references_cited)))) + munchify(self.convert_list(self.query_data(self.pointer_references_cited))))) # office-specific-bib-data self.office_specific_bibdata = self.convert_dict(self.query_data(self.pointer_office_specific_bibdata)) @@ -590,7 +590,7 @@ def convert_list(cls, things_raw, nested_element='$'): things = [] for thing in to_list(things_raw): if not thing: continue - if nested_element in thing and len(thing.keys()) == 1: + if nested_element in thing and len(list(thing.keys())) == 1: thing = thing[nested_element] if isinstance(thing, dict): thing = cls.convert_dict(thing) @@ -606,7 +606,7 @@ def convert_dict(cls, data): return {} newdata = OrderedDict() - for key, value in data.items(): + for key, value in list(data.items()): # Decode nested text or recurse if '$' in value: diff --git a/patzilla/access/epo/espacenet/client_html.py b/patzilla/access/epo/espacenet/client_html.py index d5202db4..caa83a7f 100644 --- a/patzilla/access/epo/espacenet/client_html.py +++ b/patzilla/access/epo/espacenet/client_html.py @@ -97,7 +97,7 @@ def espacenet_fetch_html(document_number, section, element_id=None, element_clas else: - if 'Entity not found' in response.content: + if b'Entity not found' in response.content: raise KeyError(message_404) else: raise ValueError(message_fail) diff --git a/patzilla/access/epo/espacenet/pyramid.py b/patzilla/access/epo/espacenet/pyramid.py index db3038bb..b36da609 100644 --- a/patzilla/access/epo/espacenet/pyramid.py +++ b/patzilla/access/epo/espacenet/pyramid.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # (c) 2015-2018 Andreas Motl, Elmyra UG -from __future__ import absolute_import + import logging from cornice.service import Service from pyramid.httpexceptions import HTTPBadRequest, HTTPNotFound diff --git a/patzilla/access/epo/ops/api.py b/patzilla/access/epo/ops/api.py index 5a11e3cf..e6da53c2 100644 --- a/patzilla/access/epo/ops/api.py +++ b/patzilla/access/epo/ops/api.py @@ -167,7 +167,7 @@ def match_filter(item, filter): original_publication_numbers += representation_pubrefs_docdb # Debugging - #print 'representation_pubref_epodoc:', representation_pubref_epodoc + #print( 'representation_pubref_epodoc:', representation_pubref_epodoc) #print 'representation_pubrefs_docdb:', representation_pubrefs_docdb # Fetch family members. When failing, use first cycle as representation. @@ -310,7 +310,7 @@ def ops_published_data_search_real(constituents, query, range): ops = get_ops_client() # Send request to OPS. - range_begin, range_end = map(int, range.split('-')) + range_begin, range_end = list(map(int, range.split('-'))) response = ops.published_data_search( query, range_begin=range_begin, range_end=range_end, constituents=to_list(constituents)) @@ -461,7 +461,7 @@ def image_representative_from_family(patent, countries, func_filter=None): # Compute alternative family members sorted by given countries alternatives = family.publications_by_country(exclude=[document], countries=countries) if func_filter: - alternatives = filter(func_filter, alternatives) + alternatives = list(filter(func_filter, alternatives)) if alternatives: # TODO: Currently using first item as representative. This might change. @@ -583,7 +583,7 @@ def inquire_images(document): def is_fulldocument(node): - return '@desc' in node and node['@desc'] == u'FullDocument' + return '@desc' in node and node['@desc'] == 'FullDocument' def is_amendment_only(node): @@ -602,7 +602,7 @@ def is_amendment_only(node): """ if is_fulldocument(node): sections = to_list(node.get('ops:document-section', [])) - if len(sections) == 1 and sections[0]['@name'] == u'AMENDMENT': + if len(sections) == 1 and sections[0]['@name'] == 'AMENDMENT': return True return False @@ -659,7 +659,7 @@ def get_ops_image(document, page, kind, format=None): # 1. Inquire images to compute url to image resource image_info = inquire_images(document) if image_info: - if image_info.has_key(kind): + if kind in image_info: drawing_node = image_info.get(kind) link = drawing_node['@link'] @@ -670,7 +670,7 @@ def get_ops_image(document, page, kind, format=None): page = page + start_page - 1 # fallback chain, if no drawings are available - elif image_info.has_key('JapaneseAbstract'): + elif 'JapaneseAbstract' in image_info: drawing_node = image_info.get('JapaneseAbstract') link = drawing_node['@link'] page = 1 @@ -885,7 +885,7 @@ def handle_error(response, location): # Compute name name = 'http-response' - body = response_dict['content'] + body = str(response_dict['content'],'UTF-8') if 'CLIENT.CQL' in body: name = 'expression' @@ -901,44 +901,44 @@ def handle_error(response, location): response_json.status = response.status_code # countermeasure against "_JSONError: " or the like - response_json.detail = str(response.status_code) + ' ' + response.reason + ': ' + response.content + response_json.detail = str(response.status_code) + ' ' + str(response.reason) + ': ' + str(response.content) #print "response:", response if len(request.errors) == 1: error_info = request.errors[0].get('description') if error_info.get('status_code') == 404: - error_content = error_info.get('content', '') + error_content = error_info.get('content', b'') url = error_info.get('url') status = str(error_info.get('status_code', '')) + ' ' + error_info.get('reason', '') - if 'CLIENT.InvalidCountryCode' in error_content: + if b'CLIENT.InvalidCountryCode' in error_content: ops_code = 'CLIENT.InvalidCountryCode' - message = u'OPS API response ({status}, {ops_code}). url={url}'.format(status=status, ops_code=ops_code, url=url) + message = 'OPS API response ({status}, {ops_code}). url={url}'.format(status=status, ops_code=ops_code, url=url) log.error(message) return response_json - if 'SERVER.EntityNotFound' in error_content: + if b'SERVER.EntityNotFound' in error_content: ops_code = 'SERVER.EntityNotFound' - message = u'OPS API response ({status}, {ops_code}). url={url}'.format(status=status, ops_code=ops_code, url=url) + message = 'OPS API response ({status}, {ops_code}). url={url}'.format(status=status, ops_code=ops_code, url=url) log.warning(message) return response_json - if 'OPS - 404' in error_content or 'Page not found' in error_content: + if b'OPS - 404' in error_content or b'Page not found' in error_content: ops_code = '404 OPS Page not found' - message = u'OPS API response ({status}, {ops_code}). url={url}'.format(status=status, ops_code=ops_code, url=url) + message = 'OPS API response ({status}, {ops_code}). url={url}'.format(status=status, ops_code=ops_code, url=url) log.error(message) - log.error(u'OPS API errors:\n{}'.format(pformat(request.errors))) + log.error('OPS API errors:\n{}'.format(pformat(request.errors))) response_json.status_code = 502 return response_json - if 'This API version is not supported' in error_content: + if b'This API version is not supported' in error_content: ops_code = '404 API version not supported' - message = u'OPS API response ({status}, {ops_code}). url={url}'.format(status=status, ops_code=ops_code, url=url) + message = 'OPS API response ({status}, {ops_code}). url={url}'.format(status=status, ops_code=ops_code, url=url) log.error(message) response_json.status_code = 502 return response_json - log.error(u'OPS API errors:\n{}'.format(pformat(request.errors))) + log.error('OPS API errors:\n{}'.format(pformat(request.errors))) return response_json @@ -972,7 +972,7 @@ def pdf_document_build(patent): # 3. add pdf metadata page_sections = None - if resource_info.has_key('ops:document-section'): + if 'ops:document-section' in resource_info: page_sections = resource_info['ops:document-section'] #pprint(page_sections) @@ -1028,7 +1028,7 @@ def ops_document_kindcodes(patent): for document in documents: # TODO: check whether a single occurrance of "not found" should really raise this exception - if document.has_key('@status') and document['@status'] == 'not found': + if '@status' in document and document['@status'] == 'not found': error = HTTPNotFound(error_msg_access) raise error @@ -1080,7 +1080,7 @@ def analytics_family(query): # B. Enrich all family representatives # http://ops.epo.org/3.1/rest-services/family/application/docdb/US19288494.xml - for family_id, document_number in family_representatives.iteritems(): + for family_id, document_number in family_representatives.items(): payload.setdefault(family_id, {}) @@ -1246,7 +1246,7 @@ def __init__(self): self.items = [] def __repr__(self): - return u'<{name} object at 0x{id}>\nitems:\n{items}'.format(name=self.__class__.__name__, id=id(self), items=pformat(self.items)) + return '<{name} object at 0x{id}>\nitems:\n{items}'.format(name=self.__class__.__name__, id=id(self), items=pformat(self.items)) def publications_by_country(self, exclude=None, countries=None): exclude = exclude or [] @@ -1290,13 +1290,13 @@ def _find_publication_number_by_prio_number(): def _format_title(title): - return u'[{0}] {1}'.format(title.get(u'@lang', u'').upper() or u'', title[u'$'] or u'') + return '[{0}] {1}'.format(title.get('@lang', '').upper() or '', title['$'] or '') def _format_abstract(abstract): if not abstract: return lines = to_list(abstract['p']) - lines = map(lambda line: line['$'], lines) - return u'[{0}] {1}'.format(abstract.get(u'@lang', u'').upper() or u'', '\n'.join(lines)) + lines = [line['$'] for line in lines] + return '[{0}] {1}'.format(abstract.get('@lang', '').upper() or '', '\n'.join(lines)) def _mogrify_parties(partylist, name): results = [] @@ -1307,9 +1307,9 @@ def _mogrify_parties(partylist, name): parties[key][party['@data-format']] = party[name]['name']['$'] for key in sorted(parties.keys()): - name_epodoc = parties[key]['epodoc'].replace(u'\u2002', u' ') + name_epodoc = parties[key]['epodoc'].replace('\u2002', ' ') name_original = parties[key]['original'] - entry = u'{0}; {1}'.format(name_epodoc, name_original) + entry = '{0}; {1}'.format(name_epodoc, name_original) results.append(entry) return results @@ -1338,13 +1338,13 @@ def _result_list_compact(response): try: titles = to_list(pointer_invention_title.resolve(result)) - titles = map(_format_title, titles) + titles = list(map(_format_title, titles)) except JsonPointerException: titles = None try: abstracts = to_list(pointer_abstract.resolve(result)) - abstracts = map(_format_abstract, abstracts) + abstracts = list(map(_format_abstract, abstracts)) except JsonPointerException: abstracts = None @@ -1382,10 +1382,10 @@ def _summarize_metrics(payload, kind): except KeyError: return 'error while computing value' - total_response_size_entries = filter(lambda item: item['name'] == kind, metrics)[0]['values'] + total_response_size_entries = [item for item in metrics if item['name'] == kind][0]['values'] #print total_response_size_entries - total_response_sizes = map(lambda item: float(item['value']), total_response_size_entries) + total_response_sizes = [float(item['value']) for item in total_response_size_entries] #print total_response_sizes total = sum(total_response_sizes) @@ -1421,6 +1421,6 @@ def ops_service_usage(date_begin, date_end): if __name__ == '__main__': # pragma: nocover data = ops_service_usage('06/11/2014', '09/12/2014') - print 'Time range: {0}'.format(data['time-range']) - print 'Response size: {0}G'.format(data['response-size'] / float(10**9)) - print 'Message count: {0}'.format(data['message-count']) + print('Time range: {0}'.format(data['time-range'])) + print('Response size: {0}G'.format(data['response-size'] / float(10**9))) + print('Message count: {0}'.format(data['message-count'])) diff --git a/patzilla/access/epo/ops/client.py b/patzilla/access/epo/ops/client.py index a0037443..75070fd4 100644 --- a/patzilla/access/epo/ops/client.py +++ b/patzilla/access/epo/ops/client.py @@ -7,7 +7,7 @@ from mock import mock from pyramid.httpexceptions import HTTPUnauthorized from pyramid.threadlocal import get_current_registry -from zope.interface.declarations import implements +from zope.interface import implementer from zope.interface.interface import Interface from zope.interface.interfaces import ComponentLookupError @@ -38,6 +38,8 @@ def from_settings(datasource_settings): @staticmethod def from_environment(): + if not os.environ["OPS_API_CONSUMER_KEY"] or not os.environ["OPS_API_CONSUMER_SECRET"]: + raise KeyError("OPS_API_CONSUMER_KEY or OPS_API_CONSUMER_SECRET is empty") return { "consumer_key": os.environ["OPS_API_CONSUMER_KEY"], "consumer_secret": os.environ["OPS_API_CONSUMER_SECRET"], @@ -72,14 +74,12 @@ def attach_ops_client(event): class IOpsClientPool(Interface): pass - +@implementer(IOpsClientPool) class OpsClientPool(object): """ EPO/OPS client pool as Pyramid utility implementation. """ - implements(IOpsClientPool) - def __init__(self): logger.info("Creating upstream client pool for EPO/OPS") self.clients = {} diff --git a/patzilla/access/epo/ops/commands.py b/patzilla/access/epo/ops/commands.py index cd94aa02..faab8cb8 100644 --- a/patzilla/access/epo/ops/commands.py +++ b/patzilla/access/epo/ops/commands.py @@ -13,6 +13,7 @@ export OPS_API_CONSUMER_SECRET=rrXdr5WA7x9tudmP patzilla ops search "txt=(wind or solar) and energy" + Use configuration file:: export PATZILLA_CONFIG=patzilla/config/development-local.ini @@ -20,6 +21,7 @@ """ import json import logging +import sys from datetime import date, timedelta import click @@ -132,7 +134,7 @@ def image(ctx, document, page, kind, format): Access the OPS image acquisition API, see OPS handbook section 3.1.3. """ payload = get_ops_image(document, page, kind, format) - print(payload) + sys.stdout.buffer.write(payload) ops_cli.add_command(cmd=usage) diff --git a/patzilla/access/epo/publicationserver/client.py b/patzilla/access/epo/publicationserver/client.py index 5e9a38bd..5777e48a 100644 --- a/patzilla/access/epo/publicationserver/client.py +++ b/patzilla/access/epo/publicationserver/client.py @@ -25,7 +25,7 @@ def fetch_pdf(document_number): patent = normalize_patent(document_number, as_dict=True, provider='espacenet') - url_tpl = u'https://data.epo.org/publication-server/pdf-document?cc=EP&pn={number}&ki={kind}' + url_tpl = 'https://data.epo.org/publication-server/pdf-document?cc=EP&pn={number}&ki={kind}' url = url_tpl.format(**patent) @@ -63,4 +63,4 @@ def fetch_pdf(document_number): if __name__ == '__main__': - print fetch_pdf('EP666666A2') + print(fetch_pdf('EP666666A2')) diff --git a/patzilla/access/generic/exceptions.py b/patzilla/access/generic/exceptions.py index 7e9c4224..6b188cef 100644 --- a/patzilla/access/generic/exceptions.py +++ b/patzilla/access/generic/exceptions.py @@ -14,11 +14,11 @@ class GenericAdapterException(Exception): def __init__(self, *args, **kwargs): self.data = None - if kwargs.has_key('data'): + if 'data' in kwargs: self.data = kwargs['data'] self.user_info = '' - if kwargs.has_key('user_info'): + if 'user_info' in kwargs: self.user_info = kwargs['user_info'] super(GenericAdapterException, self).__init__(*args) @@ -30,11 +30,11 @@ def get_message(self): #message_parts.append(ex.user_info) message['user'] = cgi.escape(self.user_info) if hasattr(self, 'message'): - message_parts.append(self.__class__.__name__ + u': ' + u'
{message}
'.format(message=cgi.escape(self.message))) + message_parts.append(self.__class__.__name__ + ': ' + '
{message}
'.format(message=cgi.escape(self.message))) if hasattr(self, 'details'): - message_parts.append(u'
{message}
'.format(message=cgi.escape(self.details))) + message_parts.append('
{message}
'.format(message=cgi.escape(self.details))) - message['details'] = u'
'.join(message_parts) + message['details'] = '
'.join(message_parts) return message diff --git a/patzilla/access/generic/pdf.py b/patzilla/access/generic/pdf.py index c46ba6e5..8516b01f 100644 --- a/patzilla/access/generic/pdf.py +++ b/patzilla/access/generic/pdf.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # (c) 2013-2022 The PatZilla Developers import logging -from StringIO import StringIO +from io import StringIO from zipfile import ZipFile, ZipInfo, ZIP_DEFLATED import attr @@ -56,7 +56,7 @@ def pdf_universal_real(patent, response): if document is None: log.error('Locating a document at the domestic office requires ' 'a decoded document number for "{}"'.format(patent)) - raise ValueError(u'Unable to decode document number {}'.format(patent)) + raise ValueError('Unable to decode document number {}'.format(patent)) # 1. If it's an EP document, try European publication server first. if response.pdf is None and document.country == 'EP': @@ -92,7 +92,7 @@ def pdf_universal_real(patent, response): try: # Skip requests for documents w/o kindcode if not document.kind: - raise ValueError(u'No kindcode for patent: {}'.format(patent)) + raise ValueError('No kindcode for patent: {}'.format(patent)) response.pdf = depatisconnect_fetch_pdf(number_normalized) response.datasource = 'dpma' diff --git a/patzilla/access/generic/search.py b/patzilla/access/generic/search.py index 3077c8be..1c819e45 100644 --- a/patzilla/access/generic/search.py +++ b/patzilla/access/generic/search.py @@ -4,7 +4,7 @@ import logging from pprint import pprint from collections import defaultdict -from patzilla.util.data.container import SmartBunch +from patzilla.util.data.container import SmartMunch from patzilla.util.numbers.normalize import normalize_patent from patzilla.access.generic.exceptions import SearchException @@ -13,27 +13,27 @@ class GenericSearchClient(object): def lm(self, message): - message = u'{backend_name}: {message}'.format(message=message, **self.__dict__) + message = '{backend_name}: {message}'.format(message=message, **self.__dict__) return message def search_failed(self, message=None, response=None, user_info=None, ex=None, meta=None): # Compute user info - user_info = user_info or u'Search failed with unknown reason, please report this error to us.' + user_info = user_info or 'Search failed with unknown reason, please report this error to us.' meta = meta or {} # Compute reason and status - message = message or u'unknown' + message = message or 'unknown' if ex: - message = u'{}: {}'.format(ex.__class__.__name__, ex.message) + message = '{}: {}'.format(ex.__class__.__name__, ex.message) # Compute and emit log message - log_message = u'{backend_name}: Search failed. message={message}'.format(message=message, **self.__dict__) + log_message = '{backend_name}: Search failed. message={message}'.format(message=message, **self.__dict__) if meta: - log_message += u', meta=' + unicode(meta) + log_message += ', meta=' + str(meta) if response: - status = unicode(response.status_code) + u' ' + response.reason - log_message += u', status={status}, response=\n{response}'.format(status=status, response=response.content.decode('utf-8')) + status = str(response.status_code) + ' ' + response.reason + log_message += ', status={status}, response=\n{response}'.format(status=status, response=response.content.decode('utf-8')) log.error(log_message) # Return exception object @@ -50,7 +50,7 @@ def crawl(self, constituents, expression, chunksize): # fetch first chunk (1-chunksize) from upstream #first_chunk = self.search(expression, 0, chunksize) - first_chunk = self.search_method(expression, SmartBunch({'offset': 0, 'limit': chunksize})) + first_chunk = self.search_method(expression, SmartMunch({'offset': 0, 'limit': chunksize})) #print first_chunk #total_count = int(first_chunk['meta'].get('pager', {}).get('totalEntries', 0)) @@ -82,7 +82,7 @@ def crawl(self, constituents, expression, chunksize): time.sleep(1) log.info(self.lm('Crawling from offset {offset}'.format(offset=offset))) - chunk = self.search_method(expression, SmartBunch({'offset': offset, 'limit': chunksize})) + chunk = self.search_method(expression, SmartMunch({'offset': offset, 'limit': chunksize})) chunks.append(chunk) @@ -128,7 +128,7 @@ def __init__(self, input, options=None): # Input data and options self.input = input - self.options = options and SmartBunch.bunchify(options) or SmartBunch() + self.options = options and SmartMunch.munchify(options) or SmartMunch() # Setup data structures self.setup() @@ -146,13 +146,13 @@ def setup(self): self.documents = [] # Metadata information, upstream (raw) and downstream (unified) - self.meta = SmartBunch.bunchify({ + self.meta = SmartMunch.munchify({ 'navigator': {}, 'upstream': {}, }) # Output information, upstream (raw) and downstream (unified) - self.output = SmartBunch.bunchify({ + self.output = SmartMunch.munchify({ 'meta': {}, 'numbers': [], 'details': [], @@ -177,8 +177,8 @@ def read_documents(self): if number_normalized: number = number_normalized - document[u'publication_number'] = number - document[u'upstream_provider'] = self.meta.upstream.name + document['publication_number'] = number + document['upstream_provider'] = self.meta.upstream.name def render(self): @@ -209,14 +209,14 @@ def remove_family_members(self): seen = {} removed = [] removed_map = defaultdict(list) - stats = SmartBunch(removed = 0) + stats = SmartMunch(removed = 0) def family_remover(item): fam = self.document_to_family_id(item) # Sanity checks on family id # Do not remove documents without valid family id - if not fam or fam in [u'0', u'-1']: + if not fam or fam in ['0', '-1']: return True # "Seen" filtering logic @@ -233,7 +233,7 @@ def family_remover(item): # Update metadata and content # 1. Apply family cleansing filter to main documents response - self.documents = filter(family_remover, self.documents) + self.documents = list(filter(family_remover, self.documents)) #print 'removed_map:'; pprint(removed_map) # 2. Add list of removed family members to output diff --git a/patzilla/access/google/search.py b/patzilla/access/google/search.py index a55b81c5..b8888e0c 100644 --- a/patzilla/access/google/search.py +++ b/patzilla/access/google/search.py @@ -2,11 +2,13 @@ # (c) 2014 Andreas Motl, Elmyra UG import json from pyramid.encode import urlencode -import re +# py27 import re import sys import logging import requests -from BeautifulSoup import BeautifulSoup +# py27 from BeautifulSoup import BeautifulSoup +from bs4 import BeautifulSoup + from patzilla.util.expression.keywords import keywords_from_boolean_expression from patzilla.util.numbers.normalize import normalize_patent @@ -88,7 +90,7 @@ def tweak_captcha_response(self, body): captcha_form['action'] = baseurl + '/' + captcha_form['action'] newbody = str(soup) - print newbody + print(newbody) return newbody def parse_response(self, body): @@ -163,7 +165,7 @@ def parse_response(self, body): 'message': message, } - print payload + print(payload) return payload @@ -226,7 +228,7 @@ def pair_to_term(cls, key, value): value_normalized = normalize_patent(value) if value_normalized: value = value_normalized - term = u'{0}:{1}'.format(fieldname, value) + term = '{0}:{1}'.format(fieldname, value) else: term = value @@ -243,7 +245,7 @@ def serialize(self): """ query_params = [] tbs_params = [] - for key, value in self.criteria.iteritems(): + for key, value in self.criteria.items(): term = self.pair_to_term(key, value) if term['parameter'] == 'q': query_params.append(term['term']) @@ -265,7 +267,7 @@ def serialize(self): def get_keywords(self): keywords = [] - for key, value in self.criteria.iteritems(): + for key, value in self.criteria.items(): keywords += keywords_from_boolean_expression(key, value) return keywords @@ -282,4 +284,4 @@ def get_keywords(self): #data = google.search('matrix', 19900) data = google.search('intitle:matrix', 19900) - print data + print(data) diff --git a/patzilla/access/ificlaims/api.py b/patzilla/access/ificlaims/api.py index 7680403f..2c30de9e 100644 --- a/patzilla/access/ificlaims/api.py +++ b/patzilla/access/ificlaims/api.py @@ -157,7 +157,7 @@ def ificlaims_download_multi(numberlist, formats): for format in formats: - format_parts = format.split(u':') + format_parts = format.split(':') # decode modifiers if len(format_parts) == 1: @@ -235,7 +235,7 @@ def ificlaims_download_single(number, format, options=None): try: response = ificlaims_download(number, format, options) - except IFIClaimsException, ex: + except IFIClaimsException as ex: logger.warn('IFI: IFIClaimsException for number={number}, format={format}, options={options}: {ex}'.format(**locals())) if response.payload: diff --git a/patzilla/access/ificlaims/client.py b/patzilla/access/ificlaims/client.py index db9a7f2a..0febf090 100644 --- a/patzilla/access/ificlaims/client.py +++ b/patzilla/access/ificlaims/client.py @@ -16,7 +16,7 @@ from patzilla.access.generic.exceptions import NoResultsException, GenericAdapterException, SearchException from patzilla.access.generic.search import GenericSearchResponse, GenericSearchClient from patzilla.access.ificlaims import get_ificlaims_client -from patzilla.util.data.container import SmartBunch +from patzilla.util.data.container import SmartMunch from patzilla.util.numbers.normalize import normalize_patent log = logging.getLogger(__name__) @@ -73,7 +73,7 @@ def search_real(self, query, options=None): query.setdefault('filter', '') - options = options or SmartBunch() + options = options or SmartMunch() options.setdefault('offset', 0) options.setdefault('limit', self.pagesize) @@ -141,36 +141,36 @@ def search_real(self, query, options=None): if 'msg' not in upstream_error: upstream_error['msg'] = 'Reason unknown' - message = u'Response status code: {code}\n\n{msg}'.format(**upstream_error) + message = 'Response status code: {code}\n\n{msg}'.format(**upstream_error) # Enrich "maxClauseCount" message, e.g. raised by {!complexphrase}text:"auto* AND leucht*"~5 - if upstream_error["code"] == 500 and u'maxClauseCount is set to' in upstream_error["msg"]: + if upstream_error["code"] == 500 and 'maxClauseCount is set to' in upstream_error["msg"]: raise self.search_failed( - user_info=u'Too many terms in phrase expression, wildcard term prefixes might by too short.', + user_info='Too many terms in phrase expression, wildcard term prefixes might by too short.', message=message, response=response) # Enrich "no servers hosting shard" message elif upstream_error["code"] == 503 and \ ( - u'no servers hosting shard' in upstream_error["msg"] or \ - u'No server is available' in upstream_error["msg"] + 'no servers hosting shard' in upstream_error["msg"] or \ + 'No server is available' in upstream_error["msg"] ): raise self.search_failed( - user_info=u'Error while connecting to upstream database. Database might be offline.', + user_info='Error while connecting to upstream database. Database might be offline.', message=message, response=response) # Regular traceback elif upstream_error["code"] == 500 and 'trace' in upstream_error: - message = u'Response status code: {code}\n\n{trace}'.format(**upstream_error) + message = 'Response status code: {code}\n\n{trace}'.format(**upstream_error) raise self.search_failed( - user_info=u'Unknown exception at search backend', + user_info='Unknown exception at search backend', message=message, response=response) # Enrich "SyntaxError" exception - elif upstream_error["code"] == 400 and u'ParseException' in upstream_error["msg"]: + elif upstream_error["code"] == 400 and 'ParseException' in upstream_error["msg"]: user_info = re.sub( r'.*(Encountered.*at line.*?\.).*', r'SyntaxError, can not parse query expression: \1', @@ -207,7 +207,7 @@ def search_real(self, query, options=None): user_info = None if response_data['message'] == 'JSON error: failed to read response object': - user_info = u'Error while connecting to upstream database. Database might be offline.' + user_info = 'Error while connecting to upstream database. Database might be offline.' raise self.search_failed( user_info=user_info, @@ -237,7 +237,7 @@ def search_real(self, query, options=None): message = json.dumps(upstream_error) raise self.search_failed( - user_info=u'Error while connecting to upstream database. Database might be offline.', + user_info='Error while connecting to upstream database. Database might be offline.', message=message, response=response) @@ -252,9 +252,10 @@ def text_fetch(self, ucid, format='xml'): EP666666A2 => EP0666666A2 (EP0666666A3, EP0666666B1) """ - log.info(u"{backend_name}: text_fetch, ucid={ucid}, format={format}; user={username}".format( + log.info("{backend_name}: text_fetch, ucid={ucid}, format={format}; user={username}".format( ucid=ucid, format=format, **self.__dict__)) + starttime = timeit.default_timer() if not self.token or self.stale: @@ -288,7 +289,7 @@ def text_fetch(self, ucid, format='xml'): @cache_region('longer') def attachment_list(self, ucid): - log.info(u"{backend_name}: attachment_list, ucid={ucid}; user={username}".format(ucid=ucid, **self.__dict__)) + log.info("{backend_name}: attachment_list, ucid={ucid}; user={username}".format(ucid=ucid, **self.__dict__)) if not self.token or self.stale: self.login() @@ -310,14 +311,14 @@ def attachment_list(self, ucid): data = json.loads(response.content) return data else: - log.error(u"{backend_name}: attachment_list, ucid={ucid}, status={status}, response={response}".format( + log.error("{backend_name}: attachment_list, ucid={ucid}, status={status}, response={response}".format( ucid=ucid, status=response.status_code, response=response.content , **self.__dict__)) @cache_region('longer') def attachment_fetch(self, path): - log.info(u"{backend_name}: attachment_fetch, path={path}; user={username}".format(path=path, **self.__dict__)) + log.info("{backend_name}: attachment_fetch, path={path}; user={username}".format(path=path, **self.__dict__)) if not self.token or self.stale: self.login() @@ -341,18 +342,19 @@ def attachment_fetch(self, path): return response.content else: - log.error(u"{backend_name}: attachment_fetch, path={path}, status={status}, response={response}".format( + log.error("{backend_name}: attachment_fetch, path={path}, status={status}, response={response}".format( path=path, status=response.status_code, response=response.content , **self.__dict__)) def pdf_fetch(self, ucid): - log.info(u"{backend_name}: pdf_fetch, ucid={ucid}; user={username}".format(ucid=ucid, **self.__dict__)) + log.info("{backend_name}: pdf_fetch, ucid={ucid}; user={username}".format(ucid=ucid, **self.__dict__)) attachments_response = self.attachment_list(ucid) if not attachments_response: return + #print 'attachments_response:' #pprint(attachments_response) @@ -435,7 +437,7 @@ def tif_attachments(self, ucid): """ # filter tif references only - tif_attachments = filter(lambda attachment: attachment['media'] in ['image/tiff', 'image/jpeg'], attachments) + tif_attachments = [attachment for attachment in attachments if attachment['media'] in ['image/tiff', 'image/jpeg']] #print 'tif_attachments:' #pprint(tif_attachments) return tif_attachments @@ -443,7 +445,7 @@ def tif_attachments(self, ucid): def tif_fetch(self, ucid, seq=1): - log.info(u"{backend_name}: tif_fetch, ucid={ucid}, seq={seq}; user={username}".format(ucid=ucid, seq=seq, **self.__dict__)) + log.info("{backend_name}: tif_fetch, ucid={ucid}, seq={seq}; user={username}".format(ucid=ucid, seq=seq, **self.__dict__)) tif_attachments = self.tif_attachments(ucid) @@ -464,7 +466,7 @@ def tif_fetch(self, ucid, seq=1): @cache_region('longer') def png_fetch(self, ucid, seq=1): - log.info(u"{backend_name}: png_fetch, ucid={ucid}, seq={seq}; user={username}".format(ucid=ucid, seq=seq, **self.__dict__)) + log.info("{backend_name}: png_fetch, ucid={ucid}, seq={seq}; user={username}".format(ucid=ucid, seq=seq, **self.__dict__)) tif = self.tif_fetch(ucid, seq) if tif: png = to_png(tif) @@ -520,22 +522,22 @@ def read(self): 'name': 'ifi', 'time': self.input['time'], 'status': self.input['status'], - 'params': SmartBunch.bunchify(self.input['content']['responseHeader']['params']), - 'pager': SmartBunch.bunchify(self.input['content']['responseHeader'].get('pager', {})), + 'params': SmartMunch.munchify(self.input['content']['responseHeader']['params']), + 'pager': SmartMunch.munchify(self.input['content']['responseHeader'].get('pager', {})), }) self.meta.navigator.count_total = int(self.meta.upstream.pager.totalEntries) self.meta.navigator.count_page = int(self.meta.upstream.pager.entriesOnThisPage) self.meta.navigator.offset = int(self.meta.upstream.params.start) self.meta.navigator.limit = int(self.meta.upstream.params.rows) - self.meta.navigator.postprocess = SmartBunch() + self.meta.navigator.postprocess = SmartMunch() # Read content self.documents = self.input['content']['response']['docs'] self.read_documents() def document_to_number(self, document): - ucid = document[u'ucid'] + ucid = document['ucid'] cc, docno, kindcode = ucid.split('-') number = cc + docno + kindcode number_normalized = normalize_patent(number) @@ -548,7 +550,7 @@ def document_to_family_id(self, document): def ificlaims_client(options=None): - options = options or SmartBunch() + options = options or SmartMunch() if 'vendor' in options and options.vendor == 'serviva': client = get_serviva_client() else: @@ -576,7 +578,7 @@ def ificlaims_fetch(resource, format, options=None): @cache_region('search') def ificlaims_search(query, options=None): - options = options or SmartBunch() + options = options or SmartMunch() client = ificlaims_client(options=options) try: diff --git a/patzilla/access/ificlaims/clientpool.py b/patzilla/access/ificlaims/clientpool.py index 1e0fc64e..a926ad16 100644 --- a/patzilla/access/ificlaims/clientpool.py +++ b/patzilla/access/ificlaims/clientpool.py @@ -4,7 +4,7 @@ import os from pyramid.httpexceptions import HTTPUnauthorized -from zope.interface.declarations import implements +from zope.interface import implementer from zope.interface.interface import Interface from patzilla.access.generic.credentials import AbstractCredentialsGetter, DatasourceCredentialsManager @@ -46,6 +46,8 @@ def from_settings(datasource_settings): @staticmethod def from_environment(): + if not os.environ["IFICLAIMS_API_USERNAME"] or not os.environ["IFICLAIMS_API_PASSWORD"]: + raise KeyError("IFICLAIMS_API_USERNAME or IFICLAIMS_API_PASSWORD is empty") return { "api_username": os.environ["IFICLAIMS_API_USERNAME"], "api_password": os.environ["IFICLAIMS_API_PASSWORD"], @@ -81,12 +83,12 @@ class IIFIClaimsClientPool(Interface): pass +@implementer(IIFIClaimsClientPool) class IFIClaimsClientPool(object): """ IFI CLAIMS client pool as Pyramid utility implementation. """ - implements(IIFIClaimsClientPool) def __init__(self, api_uri, api_uri_json): logger.info("Creating upstream client pool for IFI CLAIMS") diff --git a/patzilla/access/ificlaims/commands.py b/patzilla/access/ificlaims/commands.py index d44f2d56..1fe7f7fe 100644 --- a/patzilla/access/ificlaims/commands.py +++ b/patzilla/access/ificlaims/commands.py @@ -33,7 +33,7 @@ from patzilla.boot.cache import configure_cache_backend from patzilla.boot.config import BootConfiguration from patzilla.util.config import get_configfile_from_commandline -from patzilla.util.data.container import SmartBunch, jd +from patzilla.util.data.container import SmartMunch, jd from patzilla.boot.framework import pyramid_setup @@ -79,7 +79,7 @@ def search(ctx, expression, request_json): # Invoke API and output result. logger.warning("Only the first 100 hits will be displayed. The CLI currently does not employ paging.") - results = client.search(SmartBunch({'expression': expression}), SmartBunch({'offset': 0, 'limit': 100})) + results = client.search(SmartMunch({'expression': expression}), SmartMunch({'offset': 0, 'limit': 100})) print(jd(results)) @@ -95,11 +95,12 @@ def make_request(client): #results = client.search('pa:siemens OR pa:bosch', 0, 10) #results = client.search('pa:(siemens OR bosch)', 0, 10) #results = client.search('text:"solar energy"', 0, 10) - results = client.search(SmartBunch({'expression': 'text:solar energy'}), SmartBunch({'offset': 0, 'limit': 10})) - #results = client.search(SmartBunch({'expression': '{!complexphrase inOrder=true}"siemen* *haus"'}), SmartBunch({'offset': 0, 'limit': 10})) + results = client.search(SmartMunch({'expression': 'text:solar energy'}), SmartMunch({'offset': 0, 'limit': 10})) + #results = client.search(SmartMunch({'expression': '{!complexphrase inOrder=true}"siemen* *haus"'}), SmartMunch({'offset': 0, 'limit': 10})) #results = client.search(u'text:抑血管生成素的药物用途', 0, 10) #results = client.search(u'text:放射線を照射する放射線源と', 0, 10) - #results = client.search(SmartBunch({'expression': 'pnctry:(de OR ep OR wo OR cn OR jp OR tw) AND pa:"taiwan paiho" AND pd:[20170101 TO 20170731]'}), SmartBunch({'offset': 0, 'limit': 50})) + #results = client.search(SmartMunch({'expression': 'pnctry:(de OR ep OR wo OR cn OR jp OR tw) AND pa:"taiwan paiho" AND pd:[20170101 TO 20170731]'}), SmartMunch({'offset': 0, 'limit': 50})) + #results = client.text_fetch('US-20100077592-A1') #results = client.text_fetch('CN-1055497-A') diff --git a/patzilla/access/ificlaims/expression.py b/patzilla/access/ificlaims/expression.py index 9496b698..64a8a704 100644 --- a/patzilla/access/ificlaims/expression.py +++ b/patzilla/access/ificlaims/expression.py @@ -22,7 +22,7 @@ class IFIClaimsGrammar(CQLGrammar): def preconfigure(self): CQLGrammar.preconfigure(self) - self.cmp_single = u':'.split() + self.cmp_single = ':'.split() class IFIClaimsParser(object): @@ -60,8 +60,8 @@ def trim_complexphrase(self): after: text:((parallel* AND schalt*) AND (antrieb* AND stufe*)) """ #print >>sys.stderr, 'expression-before:', self.expression - self.expression = re.sub(u'"(.+?)"~\d+', u'(\\1)', self.expression) - self.expression = self.expression.replace(u'{!complexphrase}', '') + self.expression = re.sub('"(.+?)"~\d+', '(\\1)', self.expression) + self.expression = self.expression.replace('{!complexphrase}', '') #print >>sys.stderr, 'expression-after :', self.expression @property @@ -192,7 +192,7 @@ def pair_to_solr(cls, key, value, modifiers=None): return expression = None - format = u'{0}:{1}' + format = '{0}:{1}' # ------------------------------------------ @@ -230,7 +230,7 @@ def pair_to_solr(cls, key, value, modifiers=None): # within 2009-08-20,2011-03-03 if 'within' in value: within_dates = parse_date_within(value) - elements_are_years = all([len(value) == 4 and value.isdigit() for value in within_dates.values()]) + elements_are_years = all([len(value) == 4 and value.isdigit() for value in list(within_dates.values())]) if elements_are_years: fieldname = 'pdyear' @@ -258,12 +258,12 @@ def pair_to_solr(cls, key, value, modifiers=None): except Exception as ex: message = 'IFI CLAIMS query: Invalid date or range expression "{0}". Reason: {1}.'.format(value, ex) - logger.warn(message + '\nException was:\n{0}'.format(_exception_traceback())) + logger.warning(message + '\nException was:\n{0}'.format(_exception_traceback())) return {'error': True, 'message': message} elif key == 'inventor' or key == 'applicant': if not has_booleans(value) and should_be_quoted(value): - value = u'"{0}"'.format(value) + value = '"{0}"'.format(value) elif key == 'class': @@ -277,7 +277,7 @@ def pair_to_solr(cls, key, value, modifiers=None): # Put value into parenthesis, to properly capture expressions if value: - value = u'({value})'.format(value=value) + value = '({value})'.format(value=value) # Parse value as simple query expression query_object = CQL(cql=value) @@ -297,7 +297,7 @@ def pair_to_solr(cls, key, value, modifiers=None): # ------------------------------------------ if key in ['fulltext', 'inventor', 'applicant', 'country', 'citation']: if has_booleans(value) and not should_be_quoted(value) and not '{!complexphrase' in value: - value = u'({0})'.format(value) + value = '({0})'.format(value) # ------------------------------------------ # expression formatter @@ -368,15 +368,15 @@ def triple_callback(token, index, binop, term): def format_expression(format, fieldname, value): expression = None - if type(fieldname) in types.StringTypes: + if type(fieldname) in (str,): expression = format.format(fieldname, value) - elif type(fieldname) is types.ListType: + elif type(fieldname) is list: subexpressions = [] for fieldname in fieldname: subexpressions.append(format.format(fieldname, value)) expression = ' or '.join(subexpressions) # surround with parentheses - expression = u'({0})'.format(expression) + expression = '({0})'.format(expression) return expression def ifi_convert_class(value): @@ -406,5 +406,5 @@ def should_be_quoted(value): if __name__ == '__main__': - print IFIClaimsParser('{!complexphrase}text:"(aussto* OR eject* OR pusher*) AND (verriegel* OR lock* OR sperr*)"~6').keywords - print IFIClaimsParser('{!complexphrase}text:"parallel* AND schalt*"~6 AND ((ic:F16H006104 OR cpc:F16H006104))').keywords + print(IFIClaimsParser('{!complexphrase}text:"(aussto* OR eject* OR pusher*) AND (verriegel* OR lock* OR sperr*)"~6').keywords) + print(IFIClaimsParser('{!complexphrase}text:"parallel* AND schalt*"~6 AND ((ic:F16H006104 OR cpc:F16H006104))').keywords) diff --git a/patzilla/access/ificlaims/expression.rst b/patzilla/access/ificlaims/expression.rst index 2162ee87..61c78a07 100644 --- a/patzilla/access/ificlaims/expression.rst +++ b/patzilla/access/ificlaims/expression.rst @@ -20,30 +20,30 @@ Empty query IPC/CPC ======= >>> IFIClaimsParser('H01F7/00').dumps() -u'H01F7/00' +'H01F7/00' # Rewrite all patent classifications from IFI format to OPS format >>> IFIClaimsParser('ic:G01F000184').parse().rewrite_classes_ops().dumps() -u'ic : G01F1/84' +'ic : G01F1/84' >>> IFIClaimsParser('ic:G01F000184').keywords -[u'G01F1/84'] +['G01F1/84'] >>> IFIClaimsExpression.pair_to_solr('class', 'H04L12/433 or H04L12/24') -{'query': u'((ic:H04L0012433 OR cpc:H04L0012433) OR (ic:H04L001224 OR cpc:H04L001224))'} +{'query': '((ic:H04L0012433 OR cpc:H04L0012433) OR (ic:H04L001224 OR cpc:H04L001224))'} >>> IFIClaimsExpression.pair_to_solr('class', 'H01F7/00 or (H01F7/02 and H02K7/1876)') -{'query': u'((ic:H01F000700 OR cpc:H01F000700) OR ((ic:H01F000702 OR cpc:H01F000702) AND (ic:H02K00071876 OR cpc:H02K00071876)))'} +{'query': '((ic:H01F000700 OR cpc:H01F000700) OR ((ic:H01F000702 OR cpc:H01F000702) AND (ic:H02K00071876 OR cpc:H02K00071876)))'} >>> IFIClaimsExpression.pair_to_solr('class', 'H01F7/00 not (H01F7/02 or H02K7/1876)') -{'query': u'((ic:H01F000700 OR cpc:H01F000700) NOT ((ic:H01F000702 OR cpc:H01F000702) OR (ic:H02K00071876 OR cpc:H02K00071876)))'} +{'query': '((ic:H01F000700 OR cpc:H01F000700) NOT ((ic:H01F000702 OR cpc:H01F000702) OR (ic:H02K00071876 OR cpc:H02K00071876)))'} Publication date ================ >>> IFIClaimsExpression.pair_to_solr('pubdate', 'foobar') -{'message': 'IFI CLAIMS query: Invalid date or range expression "foobar". Reason: foobar.', 'error': True} +{'error': True, 'message': 'IFI CLAIMS query: Invalid date or range expression "foobar". Reason: foobar.'} ********* @@ -54,39 +54,39 @@ Simple expressions ================== >>> IFIClaimsParser('ttl:bildschirm').keywords -[u'bildschirm'] +['bildschirm'] >>> IFIClaimsExpression.pair_to_solr('fulltext', 'bildschirm') -{'query': u'text:bildschirm'} +{'query': 'text:bildschirm'} >>> IFIClaimsParser('ttl:bildschirm or ab:fahrzeug').keywords -[u'bildschirm', u'fahrzeug'] +['bildschirm', 'fahrzeug'] >>> IFIClaimsExpression.pair_to_solr('fulltext', 'bildschirm or fahrzeug') -{'query': u'text:(bildschirm OR fahrzeug)'} +{'query': 'text:(bildschirm OR fahrzeug)'} >>> IFIClaimsParser('ttl:bildschirm and ab:(fahrzeug or pkw)').keywords -[u'bildschirm', u'fahrzeug', u'pkw'] +['bildschirm', 'fahrzeug', 'pkw'] >>> IFIClaimsExpression.pair_to_solr('fulltext', 'bildschirm and (fahrzeug or pkw)') -{'query': u'text:(bildschirm AND (fahrzeug OR pkw))'} +{'query': 'text:(bildschirm AND (fahrzeug OR pkw))'} >>> IFIClaimsParser('ttl:bildschirm and ab:(fahrzeug or pkw not lkw)').keywords -[u'bildschirm', u'fahrzeug', u'pkw', u'lkw'] +['bildschirm', 'fahrzeug', 'pkw', 'lkw'] >>> IFIClaimsExpression.pair_to_solr('fulltext', 'bildschirm and (fahrzeug or pkw not lkw)') -{'query': u'text:(bildschirm AND (fahrzeug OR pkw NOT lkw))'} +{'query': 'text:(bildschirm AND (fahrzeug OR pkw NOT lkw))'} >>> IFIClaimsParser('ab:fahrzeug or ab:pkw').keywords -[u'fahrzeug', u'pkw'] +['fahrzeug', 'pkw'] >>> IFIClaimsParser('ab:fahrzeug not ttl:pkw').keywords -[u'fahrzeug', u'pkw'] +['fahrzeug', 'pkw'] @@ -96,22 +96,22 @@ Expressions with proximity operators Queries based on the proximity of words to each other in a document. >>> IFIClaimsParser('text:((aussto* OR eject* OR pusher*) AND (verriegel* OR lock* OR sperr*))').keywords -[u'aussto', u'eject', u'pusher', u'verriegel', u'lock', u'sperr'] +['aussto', 'eject', 'pusher', 'verriegel', 'lock', 'sperr'] >>> IFIClaimsParser('{!complexphrase}text:"(aussto* OR eject* OR pusher*) AND (verriegel* OR lock* OR sperr*)"~6').keywords -[u'aussto', u'eject', u'pusher', u'verriegel', u'lock', u'sperr'] +['aussto', 'eject', 'pusher', 'verriegel', 'lock', 'sperr'] >>> IFIClaimsExpression.pair_to_solr('fulltext', '{!complexphrase}text:"(aussto* OR eject* OR pusher*) AND (verriegel* OR lock* OR sperr*)"~6') {'query': '{!complexphrase}text:"(aussto* OR eject* OR pusher*) AND (verriegel* OR lock* OR sperr*)"~6'} >>> IFIClaimsParser('{!complexphrase}text:"parallel* AND schalt*"~6 AND ((ic:F16H006104 OR cpc:F16H006104))').keywords -[u'parallel', u'schalt', u'F16H61/04'] +['parallel', 'schalt', 'F16H61/04'] >>> IFIClaimsParser('((ic:F16H006104 OR cpc:F16H006104)) AND {!complexphrase}text:"parallel* AND schalt*"~6').keywords -[u'F16H61/04', u'parallel', u'schalt'] +['F16H61/04', 'parallel', 'schalt'] >>> IFIClaimsParser('{!complexphrase}text:("parallel* AND schalt*"~6 AND "antrieb* AND stufe*"~3)').keywords -[u'parallel', u'schalt', u'antrieb', u'stufe'] +['parallel', 'schalt', 'antrieb', 'stufe'] @@ -122,17 +122,17 @@ Queries without proper fieldnames like ab=, ti=, bi=, etc. on the left side of t >>> IFIClaimsParser('bildschirm').dumps() -u'bildschirm' +'bildschirm' >>> IFIClaimsExpression.pair_to_solr('fulltext', 'bildschirm') -{'query': u'text:bildschirm'} +{'query': 'text:bildschirm'} >>> IFIClaimsParser('bildschirm and fahrzeug').dumps() -u'bildschirm and fahrzeug' +'bildschirm and fahrzeug' >>> IFIClaimsExpression.pair_to_solr('fulltext', 'bildschirm and fahrzeug') -{'query': u'text:(bildschirm AND fahrzeug)'} +{'query': 'text:(bildschirm AND fahrzeug)'} @@ -140,22 +140,22 @@ Expressions containing quoted words =================================== >>> IFIClaimsParser('"bildschirm"').dumps() -u'"bildschirm"' +'"bildschirm"' >>> IFIClaimsParser('"bildschirm"').keywords [] >>> IFIClaimsExpression.pair_to_solr('fulltext', '"bildschirm"') -{'query': u'text:"bildschirm"'} +{'query': 'text:"bildschirm"'} >>> IFIClaimsParser('ab:"bildschirm"').dumps() -u'ab : "bildschirm"' +'ab : "bildschirm"' >>> IFIClaimsParser('ab:"bildschirm"').keywords -[u'bildschirm'] +['bildschirm'] >>> IFIClaimsParser('text:(("aussto*" OR "eject*" OR pusher*) AND (verriegel* OR lock* OR sperr*))').keywords -[u'aussto', u'eject', u'pusher', u'verriegel', u'lock', u'sperr'] +['aussto', 'eject', 'pusher', 'verriegel', 'lock', 'sperr'] @@ -163,19 +163,19 @@ Keyword extraction ================== >>> IFIClaimsParser(IFIClaimsExpression.pair_to_solr('class', 'H01F7/00')['query']).keywords -[u'H01F7/00'] +['H01F7/00'] >>> IFIClaimsParser(IFIClaimsExpression.pair_to_solr('class', 'H01F7/00 not (H01F7/02 or H02K7/1876)')['query']).keywords -[u'H01F7/00', u'H01F7/02', u'H02K7/1876'] +['H01F7/00', 'H01F7/02', 'H02K7/1876'] >>> IFIClaimsParser(IFIClaimsExpression.pair_to_solr('fulltext', 'bildschirm')['query']).keywords -[u'bildschirm'] +['bildschirm'] >>> IFIClaimsParser(IFIClaimsExpression.pair_to_solr('fulltext', '"bildschirm"')['query']).keywords -[u'bildschirm'] +['bildschirm'] >>> IFIClaimsParser(IFIClaimsExpression.pair_to_solr('fulltext', 'ttl:bildschirm OR ab:(fahrzeug OR pkw)')['query']).keywords -[u'bildschirm', u'fahrzeug', u'pkw'] +['bildschirm', 'fahrzeug', 'pkw'] @@ -185,18 +185,18 @@ From the wild Umlauts ------- ->>> IFIClaimsParser(u'tac:((*messschieber* OR *meßschieber*) AND *digital* )').dumps() -u'((tac : *messschieber* or tac : *me\xdfschieber*) and tac : *digital*)' +>>> IFIClaimsParser('tac:((*messschieber* OR *meßschieber*) AND *digital* )').dumps() +'((tac : *messschieber* or tac : *me\xdfschieber*) and tac : *digital*)' ->>> IFIClaimsParser(u'tac:((*messschieber* OR *meßschieber*) AND *digital* )').keywords -[u'messschieber', u'me\xdfschieber', u'digital'] +>>> IFIClaimsParser('tac:((*messschieber* OR *meßschieber*) AND *digital* )').keywords +['messschieber', 'me\xdfschieber', 'digital'] More ---- ->>> IFIClaimsParser(u'ttl:(energy and water) or ab:(waves or Tide) and clm:"90°"').keywords -[u'energy', u'water', u'waves', u'Tide', u'90\xb0'] +>>> IFIClaimsParser('ttl:(energy and water) or ab:(waves or Tide) and clm:"90°"').keywords +['energy', 'water', 'waves', 'Tide', '90\xb0'] ->>> IFIClaimsParser(u'text:(((bremsgefühl* or pedalgefühl) and (*simulator or simul*)) and (separ* or getrennt* or entkoppel* or entkoppl* or decoupl*) and (eigenständig* or independent* or autonom*))').keywords -[u'bremsgef\xfchl', u'pedalgef\xfchl', u'simulator', u'simul', u'separ', u'getrennt', u'entkoppel', u'entkoppl', u'decoupl', u'eigenst\xe4ndig', u'independent', u'autonom'] +>>> IFIClaimsParser('text:(((bremsgefühl* or pedalgefühl) and (*simulator or simul*)) and (separ* or getrennt* or entkoppel* or entkoppl* or decoupl*) and (eigenständig* or independent* or autonom*))').keywords +['bremsgef\xfchl', 'pedalgef\xfchl', 'simulator', 'simul', 'separ', 'getrennt', 'entkoppel', 'entkoppl', 'decoupl', 'eigenst\xe4ndig', 'independent', 'autonom'] diff --git a/patzilla/access/office.py b/patzilla/access/office.py index 43531d75..da08fb70 100644 --- a/patzilla/access/office.py +++ b/patzilla/access/office.py @@ -76,5 +76,5 @@ def jump_office(request): else: return url - return HTTPNotFound(u'Could not locate document "{document_number}" at {office}/{service}.'.format( + return HTTPNotFound('Could not locate document "{document_number}" at {office}/{service}.'.format( document_number=document_number, office=office, service=service)) diff --git a/patzilla/access/sip/client.py b/patzilla/access/sip/client.py index 9eba41bb..11de635d 100644 --- a/patzilla/access/sip/client.py +++ b/patzilla/access/sip/client.py @@ -9,7 +9,7 @@ from patzilla.access.generic.exceptions import NoResultsException, GenericAdapterException from patzilla.access.generic.search import GenericSearchResponse, GenericSearchClient from patzilla.access.sip import get_sip_client -from patzilla.util.data.container import SmartBunch +from patzilla.util.data.container import SmartMunch """ @@ -27,9 +27,9 @@ class SipException(GenericAdapterException): def __init__(self, *args, **kwargs): self.sip_info = '' super(SipException, self).__init__(*args) - if kwargs.has_key('sip_info'): + if 'sip_info' in kwargs: self.sip_info = kwargs['sip_info'] - if kwargs.has_key('sip_response'): + if 'sip_response' in kwargs: self.sip_info = kwargs['sip_response'].get_childvalue('Info') if self.sip_info: self.user_info = self.sip_info @@ -98,7 +98,7 @@ def logout(self): def search(self, expression, options=None): - options = options or SmartBunch() + options = options or SmartMunch() options.setdefault('offset', 0) options.setdefault('limit', self.pagesize) @@ -106,7 +106,7 @@ def search(self, expression, options=None): offset = options.offset limit = options.limit - log.info(u"{backend_name}: searching documents, expression='{0}', offset={1}, limit={2}".format( + log.info("{backend_name}: searching documents, expression='{0}', offset={1}, limit={2}".format( expression, offset, limit, **self.__dict__)) if not self.sessionid or self.stale: @@ -116,11 +116,11 @@ def search(self, expression, options=None): try: response = requests.post(self.uri + '/search/new', data={'session': self.sessionid, 'searchtree': expression}) except (ConnectionError, ConnectTimeout) as ex: - log.error(u'SIP search for user "{username}" at "{uri}" failed. Reason: {0} {1}.'.format( + log.error('SIP search for user "{username}" at "{uri}" failed. Reason: {0} {1}.'.format( ex.__class__, ex.message, username=self.username, uri=self.uri)) self.logout() raise SearchException(ex.message, - sip_info=u'Error or timeout while connecting to upstream database. Database might be offline.') + sip_info='Error or timeout while connecting to upstream database. Database might be offline.') # Process search response if response.status_code == 200: @@ -129,7 +129,7 @@ def search(self, expression, options=None): search_response = self._search_parse_xml(response.content) if search_response['success'] == 'false': - raise SearchException(u'Search failed', sip_response=search_response['response']) + raise SearchException('Search failed', sip_response=search_response['response']) if 'ResultSetId' in search_response['data']: @@ -145,7 +145,7 @@ def search(self, expression, options=None): #print "SIP search results:", search_results duration = timeit.default_timer() - starttime - log.info(u'Search succeeded. duration={0}s, search_info={1}'.format(round(duration, 1), search_info)) + log.info('Search succeeded. duration={0}s, search_info={1}'.format(round(duration, 1), search_info)) upstream_response = { 'info': search_info, @@ -159,33 +159,33 @@ def search(self, expression, options=None): duration = round(duration, 1) # TODO: Unify between SIP and IFI CLAIMS - log.info(u'{backend_name}: Search succeeded. duration={duration}s, meta=\n{meta}'.format( + log.info('{backend_name}: Search succeeded. duration={duration}s, meta=\n{meta}'.format( duration=duration, meta=result['meta'].prettify(), **self.__dict__)) if not result['numbers']: - log.warn(u'{backend_name} search from "{user}" for "{expression}" had empty results.'.format( + log.warn('{backend_name} search from "{user}" for "{expression}" had empty results.'.format( user=self.username, expression=expression, **self.__dict__ )) return result else: - message = u'Search failed. Reason: Upstream response lacks valid ResultSetId. content={0}'.format(response.text) - raise SearchException(message, sip_info=u'Search failed. Search response could not be parsed.') + message = 'Search failed. Reason: Upstream response lacks valid ResultSetId. content={0}'.format(response.text) + raise SearchException(message, sip_info='Search failed. Search response could not be parsed.') except Exception as ex: - log.error(u'Search failed. {name}: {message}. expression={expression}, response={response}'.format( + log.error('Search failed. {name}: {message}. expression={expression}, response={response}'.format( name=ex.__class__.__name__, message=ex.message, response=response.text, expression=expression)) raise else: response_status = str(response.status_code) + ' ' + response.reason - message = u'SIP search failed. Reason: response status != 200. status={0}, content={1}'.format( + message = 'SIP search failed. Reason: response status != 200. status={0}, content={1}'.format( response_status, response.text) log.error(message) raise SearchException(message, - sip_info=u'HTTP error "{status}" while searching upstream database'.format(status=response_status)) + sip_info='HTTP error "{status}" while searching upstream database'.format(status=response_status)) def getresults(self, resultid, options): @@ -207,23 +207,23 @@ def getresults(self, resultid, options): raise SearchException(message) duration = timeit.default_timer() - starttime - log.info(u'SIP getresults succeeded. duration={0}s'.format(round(duration, 1))) + log.info('SIP getresults succeeded. duration={0}s'.format(round(duration, 1))) return results except SearchException: raise except Exception as ex: - message = u'SIP getresults failed. Unknown exception. Reason: {0} {1}'.format( + message = 'SIP getresults failed. Unknown exception. Reason: {0} {1}'.format( ex.__class__, ex.message) - logmessage = u'{}. response={}'.format(message, response.text) + logmessage = '{}. response={}'.format(message, response.text) log.error(logmessage) raise SearchException(message) else: - message = u'SIP getresults failed. status_code={0}'.format( + message = 'SIP getresults failed. status_code={0}'.format( str(response.status_code) + ' ' + response.reason) - logmessage = u'{}. response={}'.format(message, response.text) + logmessage = '{}. response={}'.format(message, response.text) log.error(logmessage) raise SearchException(message) @@ -243,8 +243,8 @@ def _login_parse_xml(self, xml): 'this happens regularly on Wednesday evenings at 17:00 hours UTC (19:00 hours CEST)
' \ 'and usually does not take longer than one hour.' - if error.sip_info == u'i': - error.sip_info = u'Login failed' + if error.sip_info == 'i': + error.sip_info = 'Login failed' raise error def _search_parse_xml(self, xml): @@ -329,15 +329,15 @@ def read(self): # TODO: Reference from IFI CLAIMS, fill up/unify. #'time': self.input['time'], #'status': self.input['status'], - #'params': SmartBunch.bunchify(self.input['content']['responseHeader']['params']), - #'pager': SmartBunch.bunchify(self.input['content']['responseHeader'].get('pager', {})), + #'params': SmartMunch.munchify(self.input['content']['responseHeader']['params']), + #'pager': SmartMunch.munchify(self.input['content']['responseHeader'].get('pager', {})), }) self.meta.navigator.count_total = int(self.meta.upstream.MemCount) self.meta.navigator.count_page = len(self.input['results']) self.meta.navigator.offset = int(self.meta.upstream.Offset) self.meta.navigator.limit = int(self.meta.upstream.Limit) - self.meta.navigator.postprocess = SmartBunch() + self.meta.navigator.postprocess = SmartMunch() # Read content """ diff --git a/patzilla/access/sip/clientpool.py b/patzilla/access/sip/clientpool.py index f28a8c3c..1c8e679d 100644 --- a/patzilla/access/sip/clientpool.py +++ b/patzilla/access/sip/clientpool.py @@ -4,8 +4,9 @@ import os from pyramid.httpexceptions import HTTPUnauthorized -from zope.interface.declarations import implements +from zope.interface import implementer from zope.interface.interface import Interface +from zope.interface import implementer from patzilla.access.generic.credentials import AbstractCredentialsGetter, DatasourceCredentialsManager from patzilla.access.sip.client import SipClient @@ -45,6 +46,8 @@ def from_settings(datasource_settings): @staticmethod def from_environment(): + if not os.environ["SIP_API_USERNAME"] or not os.environ["SIP_API_PASSWORD"]: + raise KeyError("SIP_API_USERNAME or SIP_API_PASSWORD is empty") return { "api_username": os.environ["SIP_API_USERNAME"], "api_password": os.environ["SIP_API_PASSWORD"], @@ -80,13 +83,12 @@ class ISipClientPool(Interface): pass +@implementer(ISipClientPool) class SipClientPool(object): """ SIP client pool as Pyramid utility implementation. """ - implements(ISipClientPool) - def __init__(self, api_uri): logger.info("Creating upstream client pool for SIP") self.api_uri = api_uri @@ -103,3 +105,4 @@ def get(self, identifier, credentials=None, debug=False): uri=self.api_uri, username=credentials['api_username'], password=credentials['api_password']) return self.clients.get(identifier) + diff --git a/patzilla/access/sip/concordance.py b/patzilla/access/sip/concordance.py index 210371ab..ac9f44f8 100644 --- a/patzilla/access/sip/concordance.py +++ b/patzilla/access/sip/concordance.py @@ -202,7 +202,7 @@ def decode_row(row): try: stream = DictReader(csvfile) - print stream.fieldnames + print(stream.fieldnames) except Exception as ex: log.error('SIP CPC class map: Reading CSV file {} failed: {}'.format(filename, ex.message)) return @@ -225,7 +225,7 @@ def decode_row(row): return ws = wb.active - print 'XLSX row 1:', [cell.value for cell in ws.rows[0]] + print('XLSX row 1:', [cell.value for cell in ws.rows[0]]) stream = ws.rows[1:20] #sys.exit(1) diff --git a/patzilla/access/sip/expression.py b/patzilla/access/sip/expression.py index b5254f38..cc0e1583 100644 --- a/patzilla/access/sip/expression.py +++ b/patzilla/access/sip/expression.py @@ -49,16 +49,16 @@ class SipExpression(object): } sip_xml_expression_templates = { - 'patentnumber': u'{value}', - 'fulltext': u'{value}', + 'patentnumber': '{value}', + 'fulltext': '{value}', #'applicant': u'{value}', #'inventor': u'{value}', - 'applicant': u'{value}', - 'inventor': u'{value}', + 'applicant': '{value}', + 'inventor': '{value}', 'pubdate': { - 'both': u'', - 'startdate': u'', - 'enddate': u'', + 'both': '', + 'startdate': '', + 'enddate': '', } } @@ -83,11 +83,11 @@ def pair_to_sip_xml(cls, key, value, modifiers): # {u'fulltext': {u'claim': True, u'abstract': True, u'description': True, u'title': True} # -> # {u'fulltext': {u'claim': 'true', u'abstract': 'true', u'description': 'true', u'title': 'true'} - for modifier_field, modifier_values in modifiers.iteritems(): - if type(modifiers[modifier_field]) is types.DictionaryType: - for modifier_name, modifier_value in modifiers[modifier_field].iteritems(): + for modifier_field, modifier_values in modifiers.items(): + if type(modifiers[modifier_field]) is dict: + for modifier_name, modifier_value in modifiers[modifier_field].items(): modifiers[modifier_field][modifier_name] = str(modifier_value).lower() - elif type(modifiers[modifier_field]) is types.BooleanType: + elif type(modifiers[modifier_field]) is bool: modifiers[modifier_field] = str(modifiers[modifier_field]).lower() xml_part = None @@ -99,7 +99,7 @@ def pair_to_sip_xml(cls, key, value, modifiers): if len(value) == 4 and value.isdigit(): # e.g. 1978 - value = u'within {year}-01-01,{year}-12-31'.format(year=value) + value = 'within {year}-01-01,{year}-12-31'.format(year=value) # e.g. 1990-2014, 1990 - 2014 value = year_range_to_within(value) @@ -198,13 +198,13 @@ def pair_to_sip_xml(cls, key, value, modifiers): #print pretty_print(xml_part) except FulltextDecodingError as ex: - return {'error': True, 'message': unicode(ex)} + return {'error': True, 'message': str(ex)} except pyparsing.ParseException as ex: - return {'error': True, 'message': u'
' + ex.explanation + '
'} + return {'error': True, 'message': '
' + ex.explanation + '
'} except SyntaxError as ex: - return {'error': True, 'message': u'
' + unicode(ex) + '
'} + return {'error': True, 'message': '
' + str(ex) + '
'} elif key in cls.sip_xml_expression_templates: template = cls.sip_xml_expression_templates[key] @@ -232,7 +232,7 @@ def pair_to_sip_xml(cls, key, value, modifiers): def compute_modifiers(cls, modifiers): # prefer defaults (all True), but mixin modifiers from query - for modifier_field, modifier_values in cls.modifier_defaults.iteritems(): + for modifier_field, modifier_values in cls.modifier_defaults.items(): if modifier_field in cls.modifier_defaults: backup = deepcopy(modifiers.get(modifier_field, {})) modifiers[modifier_field] = cls.modifier_defaults[modifier_field] @@ -313,8 +313,8 @@ def to_etree(self, expression): result = self.parser._parser(expression, parseAll=True) except pyparsing.ParseException as ex: - ex.explanation = u'%s\n%s\n%s' % (expression, u' ' * ex.loc + u'^\n', ex) - logger.error(u'\n%s', ex.explanation) + ex.explanation = '%s\n%s\n%s' % (expression, ' ' * ex.loc + '^\n', ex) + logger.error('\n%s', ex.explanation) raise #print 'result:', result, type(result), dir(result) @@ -487,16 +487,16 @@ def parse(self): def eexists(element, name): return element.find(name) is not None child_constraints =\ - all(map(lambda x: eexists(root, x), ['index', 'binop'])) and \ - any(map(lambda x: eexists(root, x), ['value', 'quotes'])) + all([eexists(root, x) for x in ['index', 'binop']]) and \ + any([eexists(root, x) for x in ['value', 'quotes']]) if root.tag == 'parenthesis' and child_constraints: root.tag = 'term' # also rewrite all other parenthesis looking like terms for parens in root.iter('parenthesis'): child_constraints =\ - all(map(lambda x: eexists(parens, x), ['index', 'binop'])) and\ - any(map(lambda x: eexists(parens, x), ['value', 'quotes', 'or', 'and', 'not'])) + all([eexists(parens, x) for x in ['index', 'binop']]) and\ + any([eexists(parens, x) for x in ['value', 'quotes', 'or', 'and', 'not']]) if child_constraints: parens.tag = 'term' @@ -522,7 +522,7 @@ def eexists(element, name): elif boolean_content: value = self.convert_boolean_nodes(term) - value = value.replace(u'and not', u'not') + value = value.replace('and not', 'not') # 2. expand triple @@ -600,7 +600,7 @@ def convert_elements(self, root, element, tags): # skip elements without a valid representation on this level, e.g. "(ab=fahrzeug or ab=pkw)" if not value: return root - value = value.replace(u'and not', u'not') + value = value.replace('and not', 'not') elif tag in ['near', 'span']: value = self.convert_proximity_nodes(element_nested) @@ -628,13 +628,13 @@ def _get_index_binop(self, element): if index_node is not None: index = index_node.text else: - index = u'bi' + index = 'bi' # 2. binop if binop_node is not None: binop = binop_node.text else: - binop = u'=' + binop = '=' return index, binop @@ -667,14 +667,14 @@ def convert_proximity_nodes(self, container): # fall back to using already translated "text" nodes if value: - expression = map(lambda x: x.text, value) - map(lambda x: self.keyword_add(x), expression) + expression = [x.text for x in value] + list(map(lambda x: self.keyword_add(x), expression)) elif text: - expression = map(lambda x: '({0})'.format(x.text), text) + expression = ['({0})'.format(x.text) for x in text] - expression = u' '.join(expression) + expression = ' '.join(expression) distance = distance[0].text - value = u'{operator}({expression}, {distance})'.format(operator=container.tag, expression=expression, distance=distance) + value = '{operator}({expression}, {distance})'.format(operator=container.tag, expression=expression, distance=distance) return value def convert_boolean_nodes(self, node): @@ -693,7 +693,7 @@ def convert_boolean_nodes(self, node): elif element.tag == 'parenthesis': result = self.convert_boolean_nodes(element) if result: - result = u'(' + result + u')' + result = '(' + result + ')' child_values.append(result) elif element.tag in ['near', 'span']: @@ -706,9 +706,9 @@ def convert_boolean_nodes(self, node): pass if len(child_values) == 1 and node.tag == 'not': - child_values = [u'not ' + child_values[0]] + child_values = ['not ' + child_values[0]] - return u' {0} '.format(node.tag).join(child_values) + return ' {0} '.format(node.tag).join(child_values) def decode_quoted_value(self, element): """ @@ -731,15 +731,15 @@ def decode_quoted_value(self, element): value = element.text elif element.tag == 'quotes': - values = map(lambda x: x.text, element.iter('value')) - value = u'"{0}"'.format(u' '.join(values)) + values = [x.text for x in element.iter('value')] + value = '"{0}"'.format(' '.join(values)) return value def expand_fulltext(self, value, origin=None, modifiers=None): triple = value - origin = origin or u'{0}{1}{2}'.format(*triple) + origin = origin or '{0}{1}{2}'.format(*triple) ft_field, ft_op, ft_value = triple @@ -753,15 +753,15 @@ def expand_fulltext(self, value, origin=None, modifiers=None): try: ft_modifier = SipExpression.fulltext_field_modifier_map[ft_field] except KeyError: - message = u'SIP expression "{0}" contains unknown index "{1}".'.format(origin, ft_field) + message = 'SIP expression "{0}" contains unknown index "{1}".'.format(origin, ft_field) logger.warn(message) raise FulltextDecodingError(message) ft_modifiers = SipExpression.fulltext_modifiers_off.copy() - if type(ft_modifier) in types.StringTypes: + if type(ft_modifier) in (str,): ft_modifiers.update({ft_modifier: 'true'}) - elif type(ft_modifier) is types.ListType: + elif type(ft_modifier) is list: for ft_mod_item in ft_modifier: ft_modifiers.update({ft_mod_item: 'true'}) @@ -776,10 +776,10 @@ def strip_accents(s): #return ''.join((c for c in unicodedata.normalize('NFD', unicode(s)) if unicodedata.category(c) != 'Mn')) result = [] for char in s: - if char.lower() in u'äöüß': + if char.lower() in 'äöüß': result.append(char) else: - char_decomposed = unicodedata.normalize('NFD', unicode(char)) + char_decomposed = unicodedata.normalize('NFD', str(char)) for cd in char_decomposed: if unicodedata.category(cd) != 'Mn': result.append(cd) diff --git a/patzilla/access/sip/pyramid_service.py b/patzilla/access/sip/pyramid_service.py index 51fb44b5..225a3928 100644 --- a/patzilla/access/sip/pyramid_service.py +++ b/patzilla/access/sip/pyramid_service.py @@ -12,7 +12,7 @@ from patzilla.access.sip.client import sip_published_data_search, sip_published_data_crawl, SearchException from patzilla.access.sip.client import LoginException from patzilla.util.cql.util import should_be_quoted -from patzilla.util.data.container import SmartBunch +from patzilla.util.data.container import SmartMunch from patzilla.util.python import _exception_traceback log = logging.getLogger(__name__) @@ -58,7 +58,7 @@ def sip_published_data_search_handler(request): # - sorting # - whether to remove family members # - whether to return all family members - options = SmartBunch() + options = SmartMunch() options.update({ 'limit': limit, 'offset': offset_remote, @@ -94,8 +94,8 @@ def sip_published_data_search_handler(request): return ex.data except OperationFailure as ex: - message = unicode(ex) - message = re.sub(u'namespace: .*', u'', message) + message = str(ex) + message = re.sub('namespace: .*', '', message) request.errors.add('sip-search', 'internals', message) log.error(request.errors) @@ -126,7 +126,7 @@ def sip_published_data_crawl_handler(request): if hasattr(ex, 'user_info'): message = ex.user_info else: - message = unicode(ex) + message = str(ex) request.errors.add('sip-crawl', 'crawl', message) log.error(request.errors) - log.error(u'query="{0}", exception:\n{1}'.format(query, _exception_traceback())) + log.error('query="{0}", exception:\n{1}'.format(query, _exception_traceback())) diff --git a/patzilla/access/uspto/pdf.py b/patzilla/access/uspto/pdf.py index a31b8cbf..ef9b7ccc 100644 --- a/patzilla/access/uspto/pdf.py +++ b/patzilla/access/uspto/pdf.py @@ -119,7 +119,7 @@ def get_reference_type(document): Analyze document number to tell application vs. patent (publication, grant) numbers apart. The basic heuristic is to assume e.g. US2007231208A1 (4+6=10 chars) to be an application. """ - if document is None or not (hasattr(document, "number") and isinstance(document.number, (int, str, unicode))): + if document is None or not (hasattr(document, "number") and isinstance(document.number, (int, str, bytes))): raise ValueError("Unknown document reference type: {}".format(document)) number_length = len(str(document.number)) reference_type = None diff --git a/patzilla/boot/config.py b/patzilla/boot/config.py index ae75b9f9..c3d02580 100644 --- a/patzilla/boot/config.py +++ b/patzilla/boot/config.py @@ -90,7 +90,7 @@ def tmpfile(self, payload, suffix=None): """ Create a temporary file with given content. """ - tmp = tempfile.NamedTemporaryFile(suffix=suffix) + tmp = tempfile.NamedTemporaryFile(mode='w+', suffix=suffix) self._tmpfiles.append(tmp) tmp.write(payload) tmp.flush() diff --git a/patzilla/navigator/export.py b/patzilla/navigator/export.py index 8328001e..6bb6251a 100644 --- a/patzilla/navigator/export.py +++ b/patzilla/navigator/export.py @@ -15,7 +15,7 @@ from io import BytesIO from textwrap import dedent from lxml import etree as ET -from bunch import bunchify, Bunch +from munch import munchify, Munch from json.encoder import JSONEncoder from zipfile import ZipFile, ZIP_DEFLATED from collections import OrderedDict @@ -34,7 +34,7 @@ class Dossier(object): - summary_template = dedent(u""" + summary_template = dedent(""" Summary The research about »{project_name}« @@ -53,7 +53,7 @@ class Dossier(object): """).strip() def __init__(self, data): - self.data = bunchify(data) + self.data = munchify(data) self.prepare_dataframes() self.make_metadata() @@ -61,7 +61,7 @@ def make_metadata(self): self.metadata = ReportMetadata() - self.metadata.set('producer', u'IP Navigator') + self.metadata.set('producer', 'IP Navigator') # Project metadata self.metadata.set('project_name', self.data.project.name) @@ -120,7 +120,7 @@ def prepare_dataframes(self): # Queries - queries = map(self.query_criteria_smoother, self.data.get('queries', [])) + queries = list(map(self.query_criteria_smoother, self.data.get('queries', []))) self.df_queries = pandas.DataFrame(queries, columns=['criteria', 'query_expression', 'result_count', 'datasource', 'created']) self.df_queries.rename(columns={'query_expression': 'expression', 'result_count': 'hits', 'created': 'timestamp'}, inplace=True) @@ -155,10 +155,10 @@ def get_summary(self): def get_metadata(self): return self.format_with_metadata( - u'Author: {author_name} <{author_email}>\n' - u'Created: {project_created}\n' - u'Updated: {project_modified}\n' - u'Producer: {producer}') + 'Author: {author_name} <{author_email}>\n' + 'Created: {project_created}\n' + 'Updated: {project_modified}\n' + 'Producer: {producer}') @staticmethod def to_csv(dataframe): @@ -189,7 +189,7 @@ def to_zip(self, request=None, options=None): # TODO: Text representations for biblio, register, family # TODO: PDF Extracts - options = options or bunchify({'report': {}, 'media': {}}) + options = options or munchify({'report': {}, 'media': {}}) # Remove entries with empty/undefined document numbers @@ -203,7 +203,7 @@ def to_zip(self, request=None, options=None): with ZipFile(buffer, 'w', ZIP_DEFLATED) as zipfile: # FIXME: Add TERMS (liability waiver) and more... - zipfile.writestr('@readme.txt', u'Zip archive created by IP Navigator.') + zipfile.writestr('@readme.txt', 'Zip archive created by IP Navigator.') # Add text summary zipfile.writestr('@metadata.txt', self.get_metadata().encode('utf-8')) @@ -224,8 +224,8 @@ def to_zip(self, request=None, options=None): try: zipfile.writestr('report/@dossier.pdf', DossierXlsx(self.data).to_pdf(payload=workbook_payload)) except Exception as ex: - log.error(u'Rendering dossier to PDF failed. ' \ - u'Exception: {ex}\n{trace}'.format(ex=ex, trace=exception_traceback())) + log.error('Rendering dossier to PDF failed. ' \ + 'Exception: {ex}\n{trace}'.format(ex=ex, trace=exception_traceback())) # Add CSV if options.report.csv: @@ -263,7 +263,7 @@ def to_zip(self, request=None, options=None): if not document or not document.strip(): continue - log.info(u'Data acquisition for document {document}'.format(document=document)) + log.info('Data acquisition for document {document}'.format(document=document)) status.setdefault(document, OrderedDict()) patent = decode_patent_number(document) @@ -272,7 +272,7 @@ def to_zip(self, request=None, options=None): if options.media.biblio: try: biblio_payload = get_ops_biblio_data('publication', document, xml=True) - zipfile.writestr(u'media/xml/{document}.biblio.xml'.format(document=document), biblio_payload) + zipfile.writestr('media/xml/{document}.biblio.xml'.format(document=document), biblio_payload) status[document]['biblio'] = True except Exception as ex: @@ -290,14 +290,14 @@ def to_zip(self, request=None, options=None): # Write XML document_number = encode_epodoc_number(patent) description_payload = ops_description(document_number, xml=True) - zipfile.writestr(u'media/xml/{document}.description.xml'.format(document=document), description_payload) + zipfile.writestr('media/xml/{document}.description.xml'.format(document=document), description_payload) status[document]['description'] = True # Write TEXT with ignored(): text_payload = self.get_fulltext(description_payload, 'description') if text_payload: - zipfile.writestr(u'media/txt/{document}.description.txt'.format(document=document), text_payload.encode('utf-8')) + zipfile.writestr('media/txt/{document}.description.txt'.format(document=document), text_payload.encode('utf-8')) except Exception as ex: self.handle_exception(ex, 'description', document) @@ -313,14 +313,14 @@ def to_zip(self, request=None, options=None): # Write XML document_number = encode_epodoc_number(patent) claims_payload = ops_claims(document_number, xml=True) - zipfile.writestr(u'media/xml/{document}.claims.xml'.format(document=document), claims_payload) + zipfile.writestr('media/xml/{document}.claims.xml'.format(document=document), claims_payload) status[document]['claims'] = True # Write TEXT with ignored(): text_payload = self.get_fulltext(claims_payload.replace('', '

').replace('', '

'), 'claims') if text_payload: - zipfile.writestr(u'media/txt/{document}.claims.txt'.format(document=document), text_payload.encode('utf-8')) + zipfile.writestr('media/txt/{document}.claims.txt'.format(document=document), text_payload.encode('utf-8')) except Exception as ex: self.handle_exception(ex, 'claims', document) @@ -332,7 +332,7 @@ def to_zip(self, request=None, options=None): try: register_payload = ops_register('publication', document, xml=True) - zipfile.writestr(u'media/xml/{document}.register.xml'.format(document=document), register_payload) + zipfile.writestr('media/xml/{document}.register.xml'.format(document=document), register_payload) status[document]['register'] = True except Exception as ex: @@ -346,7 +346,7 @@ def to_zip(self, request=None, options=None): try: document_number = encode_epodoc_number(patent, options={'nokind': True}) family_payload = ops_family_inpadoc('publication', document_number, 'biblio', xml=True) - zipfile.writestr(u'media/xml/{document}.family.xml'.format(document=document), family_payload) + zipfile.writestr('media/xml/{document}.family.xml'.format(document=document), family_payload) status[document]['family'] = True except Exception as ex: @@ -368,20 +368,20 @@ def to_zip(self, request=None, options=None): delivered_items = [] missing_items = [] - for document, kinds in status.iteritems(): + for document, kinds in status.items(): delivered = [] missing = [] - for kind, ok in kinds.iteritems(): + for kind, ok in kinds.items(): if ok: delivered.append(kind) else: missing.append(kind) if delivered: - item = u'{document:20}{delivered}'.format(document=document, delivered=u', '.join(delivered)) + item = '{document:20}{delivered}'.format(document=document, delivered=', '.join(delivered)) delivered_items.append(item) if missing: - item = u'{document:20}{missing}'.format(document=document, missing=u', '.join(missing)) + item = '{document:20}{missing}'.format(document=document, missing=', '.join(missing)) missing_items.append(item) if delivered_items or missing_items: @@ -409,13 +409,13 @@ def to_zip(self, request=None, options=None): def handle_exception(self, ex, service_name, document): if isinstance(ex, (_JSONError, HTTPError)) and hasattr(ex, 'status_int') and ex.status_int == 404: - log.warning(u'XML({service_name}, {document}) not found'.format(service_name=service_name, document=document)) + log.warning('XML({service_name}, {document}) not found'.format(service_name=service_name, document=document)) # Signal exception has been handled (ignored) return True else: - log.warning(u'XML({service_name}, {document}) failed. ' \ - u'Exception:\n{trace}'.format(service_name=service_name, document=document, trace=exception_traceback())) + log.warning('XML({service_name}, {document}) failed. ' \ + 'Exception:\n{trace}'.format(service_name=service_name, document=document, trace=exception_traceback())) # Signal exception should be re-raised, maybe return False @@ -464,7 +464,7 @@ def default(self, o): return JSONEncoder.default(self, o) """ - if isinstance(o, (numpy.bool_,)): + if isinstance(o, numpy.bool_): return bool(o) raise TypeError(repr(o) + " is not JSON serializable") @@ -512,9 +512,9 @@ def create(self): def set_header_footer(self, worksheet): # http://xlsxwriter.readthedocs.io/example_headers_footers.html - header = u'&LIP Navigator&RSearch report' + header = '&LIP Navigator&RSearch report' worksheet.set_header(header) - footer = u'&L&L&D &T&C&A&RPage &P of &N' + footer = '&L&L&D &T&C&A&RPage &P of &N' worksheet.set_footer(footer) def write_cover_sheet(self): @@ -529,7 +529,7 @@ def write_cover_sheet(self): cover_sheet = self.workbook.add_worksheet('cover') self.set_header_footer(cover_sheet) - title = u'Dossier »{name}«'.format(name=self.data.project.name) + title = 'Dossier »{name}«'.format(name=self.data.project.name) title_format = self.workbook.add_format({'align': 'center', 'valign': 'vcenter', 'font_size': 17, 'bold': True}) cover_sheet.merge_range('A1:I2', title, title_format) @@ -545,7 +545,7 @@ def write_cover_sheet(self): footnote_format = self.workbook.add_format({'font_size': 9}) - footnote = dedent(u""" + footnote = dedent(""" Please have a look at the other worksheets in this workbook for more detailed information about all queries, comments and document numbers @@ -554,7 +554,7 @@ def write_cover_sheet(self): summary = self.generate_with_metadata(self.summary_template, emphasis=blue) - args = list(summary) + ['\n'] + [footnote_format, u'\n\n' + footnote] + args = list(summary) + ['\n'] + [footnote_format, '\n\n' + footnote] args.append(cell_format) cover_sheet.write_rich_string('B10', *args) @@ -571,7 +571,7 @@ def write_numberlist_sheets(self): sheets['rated'] = self.data.get('collections', {}).get('rated') sheets['dismissed'] = self.data.get('collections', {}).get('dismissed') sheets['seen'] = self.data.get('collections', {}).get('seen') - for sheet_name, entries in sheets.iteritems(): + for sheet_name, entries in sheets.items(): #print 'entries:'; pprint(entries) @@ -581,10 +581,10 @@ def write_numberlist_sheets(self): first = {} # Create pandas DataFrame - if type(first) in types.StringTypes: + if type(first) in (str,): df = pandas.DataFrame(entries, columns=['PN']) - elif isinstance(first, (types.DictionaryType, Bunch)): + elif isinstance(first, (dict, Munch)): df = pandas.DataFrame(entries, columns=['number', 'score', 'timestamp', 'url']) df.rename(columns={'number': 'document', 'url': 'display'}, inplace=True) @@ -720,7 +720,7 @@ def to_pdf(self, payload=None): if process.status_code == 0: #pdf_name = os.path.join(pdf_path, os.path.basename(xlsx_file.name).replace('.xlsx', '.pdf')) - payload = file(pdf_path, 'r').read() + payload = open(pdf_path, 'rb').read() #shutil.rmtree(pdf_path) os.unlink(pdf_path) return payload @@ -750,7 +750,7 @@ def set(self, key, value): # https://stackoverflow.com/questions/17215400/python-format-string-unused-named-arguments/17215533#17215533 def __missing__(self, key): - return u'n/a' + return 'n/a' # Machinery for monkeypatching XlsxWriter's Worksheet's ``write_url`` method @@ -763,7 +763,7 @@ def write_url_deduce_title(self, row, col, url, cell_format=None, string=None, t if string is None: string = os.path.basename(url) if tip is None: - tip = u'Open "{name}" in Patent Navigator'.format(name=string) + tip = 'Open "{name}" in Patent Navigator'.format(name=string) return self.write_url_dist(row, col, url, cell_format=cell_format, string=string, tip=tip) def workbook_add_sheet_hook(self, name=None): @@ -810,8 +810,8 @@ def _vgenerate(self, format_string, args, kwargs, used_args, recursion_depth): obj = self.convert_field(obj, conversion) # expand the format spec, if needed - format_spec = self._vformat(format_spec, args, kwargs, - used_args, recursion_depth-1) + #format_spec = self._vformat(format_spec, args, kwargs, + #used_args, recursion_depth-1) # format the object and append to the result if 'emphasis' in kwargs: diff --git a/patzilla/navigator/services/__init__.py b/patzilla/navigator/services/__init__.py index be31f855..8355b43e 100644 --- a/patzilla/navigator/services/__init__.py +++ b/patzilla/navigator/services/__init__.py @@ -35,12 +35,11 @@ def handle_generic_exception(request, ex, backend_name, query): module_name = ex.__class__.__module__ class_name = ex.__class__.__name__ - reason = u'{}.{}: {}'.format(module_name, class_name, ex.message) + reason = '{}.{}: {}'.format(module_name, class_name, str(ex)) - logger.critical(u'{backend_name} error: query="{query}", reason={reason}\nresponse:\n{http_response}\nexception:\n{exception}'.format( - exception=_exception_traceback(), **locals())) + logger.exception('{backend_name} error: query="{query}", reason={reason}\nresponse:\n{http_response}'.format(**locals())) - message = u'An exception occurred while processing your query.
\nReason: {}

\n'.format(reason) + message = 'An exception occurred while processing your query.
\nReason: {}

\n'.format(reason) if module_name == 'pymongo.errors': message += 'Error connecting to cache database. Please report this problem to us.' diff --git a/patzilla/navigator/services/admin.py b/patzilla/navigator/services/admin.py index 28aef93b..62db0094 100644 --- a/patzilla/navigator/services/admin.py +++ b/patzilla/navigator/services/admin.py @@ -30,7 +30,7 @@ def admin_users_emails_handler(request): continue user_emails.append(user.username.lower()) - payload = u'\n'.join(user_emails) + payload = '\n'.join(user_emails) return Response(payload, content_type='text/plain', charset='utf-8') diff --git a/patzilla/navigator/services/analytics.py b/patzilla/navigator/services/analytics.py index 3bce27ce..c73923da 100644 --- a/patzilla/navigator/services/analytics.py +++ b/patzilla/navigator/services/analytics.py @@ -3,7 +3,7 @@ import logging import datetime import operator -import HTMLParser +import html.parser from arrow.arrow import Arrow from cornice.service import Service from dateutil.relativedelta import relativedelta @@ -63,7 +63,7 @@ def _decode_expression_from_query(request): # decode query parameters into datasource and criteria decoded = {} params = dict(request.params) - if params.has_key('datasource'): + if 'datasource' in params: decoded['datasource'] = params['datasource'].lower() del params['datasource'] decoded.update({'criteria': params}) @@ -96,7 +96,7 @@ def __init__(self, datasource, criteria, kind): if self.kind == self.OLDEST: - self.date_from = Arrow.fromdatetime(datetime.datetime(1800, 01, 01)) + self.date_from = Arrow.fromdatetime(datetime.datetime(1800, 0o1, 0o1)) self.date_to = Arrow.fromdatetime(datetime.datetime(1899, 12, 31)) self.factor = +1 @@ -106,7 +106,7 @@ def __init__(self, datasource, criteria, kind): self.machine.add_transition('step', 'right', 'whole', unless='is_ready', after=['range_shrink']) elif self.kind == self.NEWEST: - self.date_from = Arrow.fromdatetime(datetime.datetime(2000, 01, 01)) + self.date_from = Arrow.fromdatetime(datetime.datetime(2000, 0o1, 0o1)) self.date_to = Arrow.utcnow() self.date_to += relativedelta(months=12-self.date_to.month, days=31-self.date_to.day) self.factor = -1 @@ -124,7 +124,7 @@ def __init__(self, datasource, criteria, kind): def runquery(self): criteria = self.criteria.copy() - criteria['pubdate'] = u'within {date_from},{date_to}'.format( + criteria['pubdate'] = 'within {date_from},{date_to}'.format( date_from=self.date_from.format('YYYY-MM-DD'), date_to=self.date_to.format('YYYY-MM-DD')) query = make_expression_filter({ @@ -199,10 +199,10 @@ def work(self): debug = False while True: if debug: - print '-' * 42 - print 'state:', self.state - print 'delta:', self.delta - print 'querycount:', self.querycount + print('-' * 42) + print('state:', self.state) + print('delta:', self.delta) + print('querycount:', self.querycount) if self.state == 'finished' or self.querycount > 15: break self.step() @@ -294,7 +294,7 @@ def analytics_applicants_distinct_handler(request): #print 'results:', results applicants = {} - htmlparser = HTMLParser.HTMLParser() + htmlparser = html.parser.HTMLParser() for item in results['details']: applicant = item.get('applicant') if applicant: diff --git a/patzilla/navigator/services/depatech.py b/patzilla/navigator/services/depatech.py index f852a81d..bae75f29 100644 --- a/patzilla/navigator/services/depatech.py +++ b/patzilla/navigator/services/depatech.py @@ -14,7 +14,7 @@ from patzilla.util.expression.keywords import keywords_to_response from patzilla.navigator.services.util import request_to_options from patzilla.access.generic.exceptions import NoResultsException, SearchException -from patzilla.util.data.container import SmartBunch +from patzilla.util.data.container import SmartMunch from patzilla.util.python import _exception_traceback log = logging.getLogger(__name__) @@ -37,7 +37,7 @@ @status_upstream_depatech.get() def status_upstream_depatech_handler(request): client = get_depatech_client() - query = SmartBunch({ + query = SmartMunch({ 'expression': '(PC:DE AND DE:212016000074 AND KI:U1) OR AN:DE212016000074U1 OR NP:DE212016000074U1', }) data = client.search_real(query) @@ -53,7 +53,7 @@ def depatech_published_data_search_handler(request): # Get hold of query expression and filter expression = request.params.get('expression', '') filter = request.params.get('filter', '') - query = SmartBunch({ + query = SmartMunch({ 'syntax': 'lucene', 'expression': expression, 'filter': filter, @@ -84,7 +84,7 @@ def depatech_published_data_search_handler(request): # - limit # - sorting # - whether to remove family members - options = SmartBunch() + options = SmartMunch() options.update({ 'limit': limit, 'offset': offset_remote, @@ -103,7 +103,7 @@ def depatech_published_data_search_handler(request): log.warn(request.errors) except SyntaxError as ex: - request.errors.add('depatech-search', 'expression', unicode(ex.msg)) + request.errors.add('depatech-search', 'expression', str(ex.msg)) log.warn(request.errors) except SearchException as ex: @@ -117,7 +117,7 @@ def depatech_published_data_search_handler(request): return ex.data except OperationFailure as ex: - message = unicode(ex) + message = str(ex) request.errors.add('depatech-search', 'internals', message) log.error(request.errors) @@ -131,7 +131,7 @@ def depatech_published_data_crawl_handler(request): """Crawl published-data at MTC depa.tech""" # Get hold of query expression and filter - query = SmartBunch({ + query = SmartMunch({ 'expression': request.params.get('expression', ''), 'filter': request.params.get('filter', ''), }) @@ -151,6 +151,6 @@ def depatech_published_data_crawl_handler(request): return result except Exception as ex: - request.errors.add('depatech-crawl', 'crawl', unicode(ex)) + request.errors.add('depatech-crawl', 'crawl', str(ex)) log.error(request.errors) - log.error(u'query="{0}", exception:\n{1}'.format(query, _exception_traceback())) + log.error('query="{0}", exception:\n{1}'.format(query, _exception_traceback())) diff --git a/patzilla/navigator/services/dpma.py b/patzilla/navigator/services/dpma.py index a83c99bb..9bc363e0 100644 --- a/patzilla/navigator/services/dpma.py +++ b/patzilla/navigator/services/dpma.py @@ -94,7 +94,7 @@ def prepare_search(request): expression = expression.replace('ikofax:', '') syntax = 'ikofax' - log.info(u'DEPATISnet query: {}, syntax: {}'.format(expression, syntax)) + log.info('DEPATISnet query: {}, syntax: {}'.format(expression, syntax)) # Compute query options, like # - limit @@ -112,7 +112,7 @@ def prepare_search(request): elif syntax == 'ikofax': search = ikofax_prepare_query(expression) else: - request.errors.add('depatisnet-search', 'expression', u'Unknown syntax {}'.format(syntax)) + request.errors.add('depatisnet-search', 'expression', 'Unknown syntax {}'.format(syntax)) # Propagate keywords to highlighting component keywords_to_response(request, search=search) @@ -165,10 +165,10 @@ def depatisnet_published_data_crawl_handler(request): http_response = None if hasattr(ex, 'http_response'): http_response = ex.http_response - log.error(u'DEPATISnet crawler error: query="{0}", reason={1}\nresponse:\n{2}\nexception:\n{3}'.format( + log.error('DEPATISnet crawler error: query="{0}", reason={1}\nresponse:\n{2}\nexception:\n{3}'.format( query, ex, http_response, _exception_traceback())) - message = u'An exception occurred while processing your query
Reason: {}'.format(ex) + message = 'An exception occurred while processing your query
Reason: {}'.format(ex) request.errors.add('depatisnet-search', 'crawl', message) diff --git a/patzilla/navigator/services/ificlaims.py b/patzilla/navigator/services/ificlaims.py index 2897a109..b7ac45ee 100644 --- a/patzilla/navigator/services/ificlaims.py +++ b/patzilla/navigator/services/ificlaims.py @@ -18,7 +18,7 @@ from patzilla.access.ificlaims.api import ificlaims_download, ificlaims_download_multi from patzilla.access.ificlaims.client import IFIClaimsException, IFIClaimsFormatException, LoginException, ificlaims_search, ificlaims_crawl, ificlaims_client from patzilla.access.ificlaims.expression import should_be_quoted, IFIClaimsParser -from patzilla.util.data.container import SmartBunch +from patzilla.util.data.container import SmartMunch from patzilla.util.data.zip import zip_multi from patzilla.util.python import _exception_traceback @@ -51,7 +51,7 @@ @status_upstream_ificlaims.get() def status_upstream_ificlaims_handler(request): client = ificlaims_client() - query = SmartBunch({ + query = SmartMunch({ 'expression': 'pn:EP0666666', }) data = client.search_real(query) @@ -71,7 +71,7 @@ def ificlaims_download_handler(request): try: response = ificlaims_download(resource, format, options) - except IFIClaimsException, ex: + except IFIClaimsException as ex: if type(ex) is IFIClaimsFormatException: raise HTTPNotFound(ex) else: @@ -102,16 +102,16 @@ def ificlaims_deliver_handler(request): """Deliver resources from IFI CLAIMS Direct in bulk""" kind = request.matchdict['kind'] - formats = map(unicode.strip, request.params.get('formats', u'').lower().split(u',')) - numberlist = filter(lambda item: bool(item), map(unicode.strip, re.split('[\n,]', request.params.get('numberlist', u'')))) + formats = list(map(str.strip, request.params.get('formats', '').lower().split(','))) + numberlist = [item for item in map(str.strip, re.split('[\n,]', request.params.get('numberlist', ''))) if bool(item)] if kind == 'zip': multi = ificlaims_download_multi(numberlist, formats) #for entry in multi['results']: # print 'entry:', entry - print 'report:' - print json.dumps(multi['report'], indent=4) + print('report:') + print(json.dumps(multi['report'], indent=4)) payload = zip_multi(multi) @@ -138,7 +138,7 @@ def ificlaims_published_data_search_handler(request): """Search for published-data at IFI CLAIMS Direct""" # Get hold of query expression and filter - query = SmartBunch({ + query = SmartMunch({ 'expression': request.params.get('expression', ''), 'filter': request.params.get('filter', ''), }) @@ -162,7 +162,7 @@ def ificlaims_published_data_search_handler(request): # - limit # - sorting # - whether to remove family members - options = SmartBunch() + options = SmartMunch() options.update({ 'limit': limit, 'offset': offset_remote, @@ -181,7 +181,7 @@ def ificlaims_published_data_search_handler(request): log.warn(request.errors) except SyntaxError as ex: - request.errors.add('ificlaims-search', 'expression', unicode(ex.msg)) + request.errors.add('ificlaims-search', 'expression', str(ex.msg)) log.warn(request.errors) except SearchException as ex: @@ -195,7 +195,7 @@ def ificlaims_published_data_search_handler(request): return ex.data except OperationFailure as ex: - message = unicode(ex) + message = str(ex) request.errors.add('ificlaims-search', 'internals', message) log.error(request.errors) @@ -209,7 +209,7 @@ def ificlaims_published_data_crawl_handler(request): """Crawl published-data at IFI CLAIMS Direct""" # Get hold of query expression and filter - query = SmartBunch({ + query = SmartMunch({ 'expression': request.params.get('expression', ''), 'filter': request.params.get('filter', ''), }) @@ -229,6 +229,6 @@ def ificlaims_published_data_crawl_handler(request): return result except Exception as ex: - request.errors.add('ificlaims-crawl', 'crawl', unicode(ex)) + request.errors.add('ificlaims-crawl', 'crawl', str(ex)) log.error(request.errors) - log.error(u'query="{0}", exception:\n{1}'.format(query, _exception_traceback())) + log.error('query="{0}", exception:\n{1}'.format(query, _exception_traceback())) diff --git a/patzilla/navigator/services/ops.py b/patzilla/navigator/services/ops.py index 08316486..ef9eaf74 100644 --- a/patzilla/navigator/services/ops.py +++ b/patzilla/navigator/services/ops.py @@ -89,12 +89,12 @@ def ops_published_data_search_handler(request): # CQL query string query = request.params.get('expression', '') - log.info(u'query raw: %s', query) + log.info('query raw: %s', query) # Transcode CQL query expression search = cql_prepare_query(query) - log.info(u'query cql: %s', search.expression) + log.info('query cql: %s', search.expression) # range: x-y, maximum delta is 100, default is 25 range = request.params.get('range') @@ -136,7 +136,7 @@ def ops_published_data_crawl_handler(request): # CQL query string query = request.params.get('expression', '') - log.info(u'query raw: ' + query) + log.info('query raw: ' + query) # Transcode CQL query expression search = cql_prepare_query(query) @@ -144,7 +144,7 @@ def ops_published_data_crawl_handler(request): # Propagate keywords to highlighting component keywords_to_response(request, search=search) - log.info(u'query cql: ' + search.expression) + log.info('query cql: ' + search.expression) chunksize = int(request.params.get('chunksize', '100')) @@ -154,7 +154,7 @@ def ops_published_data_crawl_handler(request): return result except Exception as ex: - log.error(u'OPS crawler error: query="{0}", reason={1}, Exception was:\n{2}'.format(query, ex, _exception_traceback())) + log.error('OPS crawler error: query="{0}", reason={1}, Exception was:\n{2}'.format(query, ex, _exception_traceback())) request.errors.add('ops-published-data-crawl', 'query', str(ex)) diff --git a/patzilla/navigator/services/util.py b/patzilla/navigator/services/util.py index 652a78d0..1221fe1f 100644 --- a/patzilla/navigator/services/util.py +++ b/patzilla/navigator/services/util.py @@ -5,7 +5,7 @@ import logging import mimetypes from pprint import pprint -from bunch import bunchify +from munch import munchify from cornice.service import Service from pyramid.settings import asbool from pyramid.threadlocal import get_current_request @@ -13,7 +13,7 @@ from patzilla.navigator.export import Dossier, DossierXlsx from patzilla.util.config import read_list from patzilla.util.cql.util import pair_to_cql -from patzilla.util.data.container import SmartBunch +from patzilla.util.data.container import SmartMunch from patzilla.util.expression.keywords import keywords_from_boolean_expression from patzilla.util.numbers.numberlists import parse_numberlist, normalize_numbers from patzilla.util.python import exception_traceback @@ -55,9 +55,9 @@ def query_expression_util_handler(request): # TODO: improve error handling data = request.json - log.info(u'[{userid}] Expression data: {data}'.format(userid=request.user.userid, data=data)) + log.info('[{userid}] Expression data: {data}'.format(userid=request.user.userid, data=data)) expression_data = make_expression_filter(data) - log.info(u'[{userid}] Expression query: {expression_data}'.format(userid=request.user.userid, expression_data=expression_data)) + log.info('[{userid}] Expression query: {expression_data}'.format(userid=request.user.userid, expression_data=expression_data)) return expression_data @@ -100,7 +100,7 @@ def make_expression_filter(data): else: # Bring criteria in order: Process "fulltext" first - keys = criteria.keys() + keys = list(criteria.keys()) if 'fulltext' in keys: keys.remove('fulltext') keys.insert(0, 'fulltext') @@ -132,7 +132,7 @@ def make_expression_filter(data): elif datasource == 'sip': expression_part = SipExpression.pair_to_sip_xml(key, value, modifiers) if expression_part: - if expression_part.has_key('keywords'): + if 'keywords' in expression_part: keywords += expression_part['keywords'] else: keywords += keywords_from_boolean_expression(key, value) @@ -147,7 +147,7 @@ def make_expression_filter(data): else: expression_part = IFIClaimsExpression.pair_to_solr(key, value, modifiers) if expression_part: - if expression_part.has_key('keywords'): + if 'keywords' in expression_part: keywords += expression_part['keywords'] else: keywords += keywords_from_boolean_expression(key, value) @@ -157,13 +157,13 @@ def make_expression_filter(data): expression_part = DepaTechExpression.pair_to_elasticsearch(key, value, modifiers) if expression_part: - if expression_part.has_key('keywords'): + if 'keywords' in expression_part: keywords += expression_part['keywords'] else: keywords += keywords_from_boolean_expression(key, value) # Accumulate expression part - error_tpl = u'Criteria "{0}: {1}" has invalid format, datasource={2}.' + error_tpl = 'Criteria "{0}: {1}" has invalid format, datasource={2}.' if not expression_part: message = error_tpl.format(key, value, datasource) log.warn(message) @@ -171,7 +171,7 @@ def make_expression_filter(data): elif 'error' in expression_part: message = error_tpl.format(key, value, datasource) - message += u'
' + expression_part['message'] + message += '
' + expression_part['message'] log.warn(message) request.errors.add('query-expression-utility-service', 'comfort-form', message) @@ -181,12 +181,12 @@ def make_expression_filter(data): expression_parts.append(query) # Accumulate filter part - error_tpl = u'Filter "{0}: {1}" has invalid format, datasource={2}.' + error_tpl = 'Filter "{0}: {1}" has invalid format, datasource={2}.' if filter_part: if 'error' in filter_part: message = error_tpl.format(key, value, datasource) - message += u'
' + filter_part['message'] + message += '
' + filter_part['message'] log.warn(message) request.errors.add('query-expression-utility-service', 'comfort-form', message) @@ -251,8 +251,8 @@ def request_to_options(request, options): options.update({'feature_family_replace': True}) # this is awful, switch to JSON POST - for key, value in request.params.iteritems(): - if key.startswith(u'query_data[sorting]'): + for key, value in request.params.items(): + if key.startswith('query_data[sorting]'): key = key.replace('query_data[sorting]', '').replace('[', '').replace(']', '') options.setdefault('sorting', {}) options['sorting'][key] = value @@ -288,7 +288,7 @@ def export_util_handler(request): elif output_kind == 'dossier': log.info('Starting dossier export to format "{format}"'.format(format=output_format)) - data = bunchify(json.loads(request.params.get('json'))) + data = munchify(json.loads(request.params.get('json'))) # Debugging #print 'dossier-data:'; pprint(data.toDict()) @@ -314,10 +314,10 @@ def export_util_handler(request): payload = dossier.to_zip(request=request, options=data.get('options')) else: - return HTTPBadRequest(u'Export format "{format}" is unknown.'.format(format=output_format)) + return HTTPBadRequest('Export format "{format}" is unknown.'.format(format=output_format)) except Exception as ex: - message = u'Exporting format "{format}" failed.'.format(format=output_format) + message = 'Exporting format "{format}" failed.'.format(format=output_format) log.error('{message}. Exception:\n{trace}'.format(message=message, trace=exception_traceback())) return HTTPServerError(message) @@ -350,7 +350,7 @@ def issue_reporter_handler(request): report_data = request.json report_data.setdefault('application', {}) - report = SmartBunch.bunchify(report_data) + report = SmartMunch.munchify(report_data) # Add user information to issue report user = request.user @@ -361,7 +361,7 @@ def issue_reporter_handler(request): user.upstream_credentials = None # Serialize user object and attach to report - report.application.user = SmartBunch(json.loads(user.to_json())) + report.application.user = SmartMunch(json.loads(user.to_json())) # Send the whole beast to the standard application log log.error('Issue report [{targets}]:\n{report}'.format( diff --git a/patzilla/navigator/settings.py b/patzilla/navigator/settings.py index 7c809e8a..4f9fc082 100644 --- a/patzilla/navigator/settings.py +++ b/patzilla/navigator/settings.py @@ -13,7 +13,8 @@ from patzilla.util.config import read_list, asbool, get_configuration from patzilla.util.date import datetime_isoformat, unixtime_to_datetime from patzilla.util.python import _exception_traceback -from patzilla.util.data.container import SmartBunch +from patzilla.util.data.container import SmartMunch + log = logging.getLogger(__name__) @@ -51,8 +52,7 @@ def get_application_settings(self): # TODO: Optimize: Only read once, not on each request! # FIXME: Maybe do the same what `attach_ops_client` does? # `if '/static' in event.request.url: return`. - settings = get_configuration(self.configfile, kind=SmartBunch) - + settings = get_configuration(self.configfile, kind=SmartMunch) # Add some global settings settings['software_version'] = __version__ @@ -66,10 +66,10 @@ def get_application_settings(self): def get_datasource_settings(self, vendor=None): # Container for datasource settings. - datasource_settings = SmartBunch({ + datasource_settings = SmartMunch({ 'datasources': [], - 'datasource': SmartBunch(), - 'total': SmartBunch.bunchify({'fulltext_countries': [], 'details_countries': []}), + 'datasource': SmartMunch(), + 'total': SmartMunch.munchify({'fulltext_countries': [], 'details_countries': []}), }) # Read datasource settings from configuration. @@ -77,7 +77,7 @@ def get_datasource_settings(self, vendor=None): datasource_settings.protected_fields = read_list(self.application_settings.get('ip_navigator', {}).get('datasources_protected_fields')) for datasource in datasource_settings.datasources: - datasource_info = SmartBunch() + datasource_info = SmartMunch() if vendor is None: settings_key = 'datasource:{name}'.format(name=datasource) else: @@ -88,10 +88,10 @@ def get_datasource_settings(self, vendor=None): datasource_info.setdefault('fulltext_countries', read_list(ds_settings.get('fulltext_countries', ''))) datasource_info.setdefault('details_enabled', asbool(ds_settings.get('details_enabled', False))) datasource_info.setdefault('details_countries', read_list(ds_settings.get('details_countries', ''))) - for key, value in ds_settings.iteritems(): + for key, value in ds_settings.items(): datasource_info.setdefault(key, value) - datasource_settings.datasource[datasource] = SmartBunch.bunchify(datasource_info) + datasource_settings.datasource[datasource] = SmartMunch.munchify(datasource_info) # Aggregate data for all countries. datasource_settings.total.fulltext_countries += datasource_info['fulltext_countries'] @@ -101,9 +101,9 @@ def get_datasource_settings(self, vendor=None): def get_vendor_settings(self): # Container for vendor settings - vendor_settings = SmartBunch({ + vendor_settings = SmartMunch({ 'vendors': [], - 'vendor': SmartBunch(), + 'vendor': SmartMunch(), }) # Read vendor settings from configuration @@ -122,8 +122,8 @@ def get_vendor_settings(self): vendor=vendor, configfile=self.configfile)) vendor_info = self.application_settings.get(settings_key, {}) - for key, value in vendor_info.iteritems(): - vendor_info[key] = value.decode('utf-8') + for key, value in vendor_info.items(): + vendor_info[key] = value if 'hostname_matches' in vendor_info: vendor_info.hostname_matches = read_list(vendor_info.hostname_matches) @@ -135,7 +135,7 @@ def get_vendor_settings(self): vendor_info.datasource_settings = self.get_datasource_settings(vendor) # Collect all vendor settings. - vendor_settings.vendor[vendor] = SmartBunch.bunchify(vendor_info) + vendor_settings.vendor[vendor] = SmartMunch.munchify(vendor_info) return vendor_settings @@ -146,9 +146,9 @@ def get_email_settings(self, vendor): """ # Container for email settings - email_settings = SmartBunch({ + email_settings = SmartMunch({ 'addressbook': [], - 'content': SmartBunch(), + 'content': SmartMunch(), }) for setting_name in ['addressbook', 'content']: @@ -160,8 +160,8 @@ def get_email_settings(self, vendor): if defaults and specific: thing.update(deepcopy(specific)) - for key, value in thing.items(): - thing[key] = value.decode('utf-8') + for key, value in list(thing.items()): + thing[key] = value email_settings[setting_name] = thing @@ -227,7 +227,7 @@ def effective_vendor(self): # Skip resolving effective vendor when no vendors are configured at all if self.registry.vendor_settings is None: - return SmartBunch() + return SmartMunch() # Select vendor by matching hostnames vendor_names = self.registry.vendor_settings.vendors @@ -281,12 +281,12 @@ def theme_settings(self): 'ui.version': software_version_link, 'ui.page.title': vendor.get('page_title', ''), # + '   ' + self.beta_badge, 'ui.page.subtitle': '', - 'ui.page.footer': 'Data sources: ' + u', '.join(data_source_list), + 'ui.page.footer': 'Data sources: ' + ', '.join(data_source_list), } # Transfer all properties having designated prefixes 1:1 prefixes = ['ui.', 'feature.'] - for key, value in vendor.iteritems(): + for key, value in vendor.items(): for prefix in prefixes: if key.startswith(prefix): if key.endswith('.enabled'): @@ -304,10 +304,10 @@ def datasource_settings(self): Return datasource settings while accounting for sensible settings like API URI and credentials. """ request = get_current_request() - datasource_settings = SmartBunch.bunchify(request.registry.datasource_settings) + datasource_settings = SmartMunch.munchify(request.registry.datasource_settings) if 'protected_fields' in datasource_settings: for fieldname in datasource_settings.protected_fields: - for name, settings in datasource_settings.datasource.iteritems(): + for name, settings in datasource_settings.datasource.items(): if fieldname in settings: del settings[fieldname] del datasource_settings['protected_fields'] @@ -363,7 +363,7 @@ def config_parameters(self): isviewer = 'patentview' in host or 'viewer' in host or 'patview' in host # 1. don't allow "query" from outside on view-only domains - if request_params.has_key('query') and isviewer: + if 'query' in request_params and isviewer: log.warning('Parameter "query=%s" not allowed on host "%s", purging it', request_params['query'], host) del request_params['query'] @@ -388,7 +388,7 @@ def config_parameters(self): # C. parameter firewall, OUTPUT # remove "opaque parameter" - if params.has_key('op'): + if 'op' in params: del params['op'] @@ -409,7 +409,7 @@ def config_parameters(self): params['datasources_enabled'].append(datasource) # E. backward-compat amendments - for key, value in params.iteritems(): + for key, value in params.items(): if key.startswith('ship_'): newkey = key.replace('ship_', 'ship-') params[newkey] = value diff --git a/patzilla/navigator/tools/browser_database_tool.py b/patzilla/navigator/tools/browser_database_tool.py index 98e4c8f8..ec1bc5ab 100755 --- a/patzilla/navigator/tools/browser_database_tool.py +++ b/patzilla/navigator/tools/browser_database_tool.py @@ -12,17 +12,17 @@ def purge_titles(data): # Purge "title" attributes from BasketEntry objects - for name, entity in data['database'].iteritems(): + for name, entity in data['database'].items(): if name.startswith('BasketEntry'): if 'title' in entity: del entity['title'] if 'number' in entity: - entity['number'] = entity['number'].strip(u'★ ') + entity['number'] = entity['number'].strip('★ ') def purge_numbers_seen(data): # Purge all BasketEntry objects with "seen==true" keys = [] - for name, item in data['database'].iteritems(): + for name, item in data['database'].items(): if name.startswith('BasketEntry/'): if 'seen' in item and item['seen'] == True: keys.append(name) @@ -32,7 +32,7 @@ def purge_numbers_seen(data): def purge_projects(data): # Purge "project" attributes from all "Query/..." objects - for name, item in data['database'].iteritems(): + for name, item in data['database'].items(): if name.startswith('Query/'): if 'project' in item: del item['project'] @@ -51,7 +51,7 @@ def main(): #purge_projects(data) # Save database file - print json.dumps(data, indent=4) + print(json.dumps(data, indent=4)) if __name__ == '__main__': diff --git a/patzilla/navigator/util.py b/patzilla/navigator/util.py index ad77fc2f..df5314a4 100644 --- a/patzilla/navigator/util.py +++ b/patzilla/navigator/util.py @@ -6,7 +6,7 @@ def get_exception_message(ex, add_traceback=False): name = ex.__class__.__name__ - description = '%s: %s' % (name, unicode(ex.message)) + description = '%s: %s' % (name, str(ex.message)) if add_traceback: description += '\n' + get_safe_traceback(ex) return description @@ -24,7 +24,7 @@ def safe_value(value): e.g. CaseInsensitiveDict to dict """ if hasattr(value, 'items') and callable(value.items): - return dict(value.items()) + return dict(list(value.items())) else: return value @@ -35,7 +35,7 @@ def dict_subset(bigdict, *wanted_keys): def dict_prefix_key(d, prefix): # prefix keys in dictionary new = {} - for key, value in d.iteritems(): + for key, value in d.items(): key = prefix + key new[key] = value return new @@ -53,7 +53,7 @@ def dict_merge(dct, merge_dct): :param merge_dct: dct merged into dct :return: None """ - for k, v in merge_dct.iteritems(): + for k, v in merge_dct.items(): if (k in dct and isinstance(dct[k], dict) and isinstance(merge_dct[k], collections.Mapping)): dict_merge(dct[k], merge_dct[k]) diff --git a/patzilla/navigator/views.py b/patzilla/navigator/views.py index 36050c9b..0389bb7e 100644 --- a/patzilla/navigator/views.py +++ b/patzilla/navigator/views.py @@ -137,7 +137,7 @@ def navigator_quick(request): # Compute query expression expression = compute_expression(field, value, value2, parameters=request.params) - print 'quick expression:', expression + print('quick expression:', expression) #return get_redirect_query(request, expression, query_args=query_args) return get_redirect_query(request, expression) @@ -150,7 +150,7 @@ def compute_expression(field, value, value2=None, **kwargs): field = 'pn' if field in ['cl', 'ipc', 'ic', 'cpc', 'cpci', 'cpca']: - value = value.replace(u'-', u'/') + value = value.replace('-', '/') quotable = True if field in ['pa', 'applicant']: @@ -159,38 +159,38 @@ def compute_expression(field, value, value2=None, **kwargs): # apply blacklist blacklist = [ - u'GmbH & Co. KG', - u'GmbH', - u' KG', - u' AG', - u'& Co.', + 'GmbH & Co. KG', + 'GmbH', + ' KG', + ' AG', + '& Co.', ] replacements = { - u' and ': u' ', - u' or ': u' ', - u' not ': u' ', + ' and ': ' ', + ' or ': ' ', + ' not ': ' ', } for black in blacklist: pattern = re.compile(re.escape(black), re.IGNORECASE) - value = pattern.sub(u'', value).strip() - for replacement_key, replacement_value in replacements.iteritems(): + value = pattern.sub('', value).strip() + for replacement_key, replacement_value in replacements.items(): #value = value.replace(replacement_key, replacement_value) pattern = re.compile(replacement_key, re.IGNORECASE) value = pattern.sub(replacement_value, value).strip() # make query expression - parts_raw = re.split(u'[ -]*', value) + parts_raw = re.split('[ -]*', value) umlaut_map = { - u'ä': u'ae', - u'ö': u'oe', - u'ü': u'ue', - u'Ä': u'Ae', - u'Ö': u'Oe', - u'Ü': u'Ue', - u'ß': u'ss', + 'ä': 'ae', + 'ö': 'oe', + 'ü': 'ue', + 'Ä': 'Ae', + 'Ö': 'Oe', + 'Ü': 'Ue', + 'ß': 'ss', } def replace_parts(thing): - for umlaut, replacement in umlaut_map.iteritems(): + for umlaut, replacement in umlaut_map.items(): thing = thing.replace(umlaut, replacement) return thing @@ -198,22 +198,22 @@ def replace_parts(thing): for part in parts_raw: # "Alfred H. Schütte" => Alfred Schütte - if re.match(u'^(\w\.)+$', part): + if re.match('^(\w\.)+$', part): continue part_normalized = replace_parts(part) if part != part_normalized: - part = u'({} or {})'.format(part, part_normalized) + part = '({} or {})'.format(part, part_normalized) parts.append(part) - value = u' and '.join(parts) + value = ' and '.join(parts) #value = u'({})'.format(value) - if quotable and u' ' in value: - value = u'"{0}"'.format(value) + if quotable and ' ' in value: + value = '"{0}"'.format(value) - query = u'{field}={value}'.format(**locals()) + query = '{field}={value}'.format(**locals()) if field in ['pd', 'publicationdate']: if 'W' in value: diff --git a/patzilla/util/config/__init__.py b/patzilla/util/config/__init__.py index 0c94ff3a..e332bccb 100644 --- a/patzilla/util/config/__init__.py +++ b/patzilla/util/config/__init__.py @@ -4,7 +4,7 @@ import logging import sys from glob import glob -from ConfigParser import ConfigParser +from configparser import ConfigParser logger = logging.getLogger(__name__) @@ -29,7 +29,7 @@ def get_configuration(*args, **kwargs): logger.info('Effective configuration files: {}'.format(make_list(used))) return config else: - msg = u'Could not read settings from configuration files: {}'.format(config_files) + msg = 'Could not read settings from configuration files: {}'.format(config_files) logger.critical(msg) raise ValueError(msg) @@ -76,22 +76,22 @@ def asbool(s): s = str(s).strip() return s.lower() in truthy -def read_list(string, separator=u','): +def read_list(string, separator=','): if string is None: return [] elif isinstance(string, list): return string - result = map(unicode.strip, string.split(separator)) + result = list(map(str.strip, string.split(separator))) if len(result) == 1 and not result[0]: result = [] return result -def make_list(items, separator=u', '): +def make_list(items, separator=', '): return separator.join(items) def normalize_docopt_options(options): normalized = {} - for key, value in options.items(): + for key, value in list(options.items()): key = key.strip('--<>') normalized[key] = value return normalized diff --git a/patzilla/util/cql/cheshire3/__init__.py b/patzilla/util/cql/cheshire3/__init__.py index 35e47286..aad5781f 100644 --- a/patzilla/util/cql/cheshire3/__init__.py +++ b/patzilla/util/cql/cheshire3/__init__.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # (c) 2014 Andreas Motl, Elmyra UG -import parser as cheshire3_parser -from parser import SearchClause, CQLParser, Diagnostic +from . import parser as cheshire3_parser +from .parser import SearchClause, CQLParser, Diagnostic from patzilla.util.numbers.normalize import normalize_patent @@ -14,7 +14,7 @@ class SmartSearchClause(SearchClause): def toCQL(self): text = [] - for p in self.prefixes.keys(): + for p in list(self.prefixes.keys()): if (p != ''): text.append('>%s="%s"' % (p, self.prefixes[p])) else: diff --git a/patzilla/util/cql/cheshire3/parser.py b/patzilla/util/cql/cheshire3/parser.py index 15504717..610af8e9 100644 --- a/patzilla/util/cql/cheshire3/parser.py +++ b/patzilla/util/cql/cheshire3/parser.py @@ -19,8 +19,8 @@ from shlex import shlex from xml.sax.saxutils import escape -from StringIO import StringIO -from __builtin__ import isinstance +from io import StringIO +from builtins import isinstance serverChoiceRelation = "=" serverChoiceIndex = "cql.serverchoice" @@ -75,7 +75,7 @@ def __init__(self): def toXCQL(self, depth=0): space = " " * depth xml = ['{s}\n'] - for p in self.prefixes.keys(): + for p in list(self.prefixes.keys()): xml.extend(["{s} \n", "{s} {name}\n", "{s} {ident}\n", @@ -221,7 +221,7 @@ def toCQL(self): txt = [] if (self.prefixes): ptxt = [] - for p in self.prefixes.keys(): + for p in list(self.prefixes.keys()): if p != '': ptxt.append('>%s="%s"' % (p, self.prefixes[p])) else: @@ -236,7 +236,7 @@ def toCQL(self): txt.append("sortBy") for sk in self.sortKeys: txt.append(sk.toCQL()) - return u"({0})".format(u" ".join(txt)) + return "({0})".format(" ".join(txt)) def getResultSetId(self, top=None): if ( @@ -315,7 +315,7 @@ def toXCQL(self, depth=0): def toCQL(self): text = [] - for p in self.prefixes.keys(): + for p in list(self.prefixes.keys()): if p != '': text.append('>%s="%s"' % (p, self.prefixes[p])) else: @@ -406,7 +406,7 @@ def toXCQL(self, depth=0): def toCQL(self): txt = [self.value] - txt.extend(map(str, self.modifiers)) + txt.extend(list(map(str, self.modifiers))) return '/'.join(txt) @@ -572,7 +572,6 @@ def __init__(self, thing): shlex.__init__(self, thing) self.wordchars += "!@#$%^&*-+{}[];,.?|~`:\\" # self.wordchars += ''.join(map(chr, range(128,254))) - self.wordchars = self.wordchars.decode('utf-8') def read_token(self): "Read a token from the input stream (no pushback or inclusions)" @@ -774,7 +773,7 @@ def query(self): left.sortKeys = self.sortQuery() else: break - for p in prefs.keys(): + for p in list(prefs.keys()): left.addPrefix(p, prefs[p]) return left @@ -812,7 +811,7 @@ def subQuery(self): prefs = self.prefixes() if (prefs): object = self.query() - for p in prefs.keys(): + for p in list(prefs.keys()): object.addPrefix(p, prefs[p]) else: object = self.clause() @@ -847,7 +846,7 @@ def clause(self): elif self.currentToken == ">": prefs = self.prefixes() object = self.clause() - for p in prefs.keys(): + for p in list(prefs.keys()): object.addPrefix(p, prefs[p]) return object @@ -915,12 +914,6 @@ def relation(self): def parse(query): """Return a searchClause/triple object from CQL string""" - if type(query) == str: - try: - query = query.decode("utf-8") - except Exception, e: - raise - q = StringIO(query) lexer = CQLshlex(q) parser = CQLParser(lexer) diff --git a/patzilla/util/cql/cheshire3/test_cheshire3.py b/patzilla/util/cql/cheshire3/test_cheshire3.py index daab7bb1..89020576 100644 --- a/patzilla/util/cql/cheshire3/test_cheshire3.py +++ b/patzilla/util/cql/cheshire3/test_cheshire3.py @@ -54,13 +54,13 @@ def test_value_shortcut_notation_fail(self): self.do_parse('ti=(foo and bar baz) and pc=qux') self.assertEqual( str(cm.exception), - "info:srw/diagnostic/1/10 [Malformed Query]: Expected Boolean or closing parenthesis but got: u'baz'") + "info:srw/diagnostic/1/10 [Malformed Query]: Expected Boolean or closing parenthesis but got: 'baz'") def test_boolean_german(self): self.assertEqual(self.do_parse('bi=foo und bi=bar'), '(bi = "foo" und bi = "bar")') def test_utf8(self): - self.assertEqual(self.do_parse('ab=radaufstandskraft or ab=radaufstandskräfte?'), u'(ab = "radaufstandskraft" or ab = "radaufstandskr\xe4fte?")') + self.assertEqual(self.do_parse('ab=radaufstandskraft or ab=radaufstandskräfte?'), '(ab = "radaufstandskraft" or ab = "radaufstandskr\xe4fte?")') if __name__ == '__main__': unittest.main() diff --git a/patzilla/util/cql/pyparsing/__init__.py b/patzilla/util/cql/pyparsing/__init__.py index f916d5aa..9bf78eda 100644 --- a/patzilla/util/cql/pyparsing/__init__.py +++ b/patzilla/util/cql/pyparsing/__init__.py @@ -45,7 +45,7 @@ def parse(self): >>> tokens = parse_cql('foo=bar') >>> tokens - ([(['foo', u'=', 'bar'], {'triple': [((['foo', u'=', 'bar'], {}), 0)]})], {}) + ParseResults([ParseResults(['foo', '=', 'bar'], {'triple': ['foo', '=', 'bar']})], {}) """ @@ -60,13 +60,13 @@ def parse(self): #if self.logging: # log.info(u'tokens: %s', tokens.pformat()) - except pyparsing.ParseException as ex: - ex.explanation = u'%s\n%s\n%s' % (ex.pstr, u' ' * ex.loc + u'^\n', ex) + except pyparsing.exceptions.ParseException as ex: + ex.explanation = '%s\n%s\n%s' % (ex.pstr, ' ' * ex.loc + '^\n', ex) #if self.logging: # log.error('\n%s', ex.explanation) - log.warning(u'Query expression "{query}" is invalid. ' \ - u'Reason: {reason}\n{location}'.format( - query=self.cql, reason=unicode(ex), location=ex.explanation)) + log.warning('Query expression "{query}" is invalid. ' \ + 'Reason: {reason}\n{location}'.format( + query=self.cql, reason=str(ex), location=ex.explanation)) raise return tokens diff --git a/patzilla/util/cql/pyparsing/demo.py b/patzilla/util/cql/pyparsing/demo.py index 47573044..157773c6 100644 --- a/patzilla/util/cql/pyparsing/demo.py +++ b/patzilla/util/cql/pyparsing/demo.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # (c) 2014 Andreas Motl, Elmyra UG from . import CQL -from serializer import tokens_to_cql, expand_shortcut_notation, get_triples, get_keywords, normalize_patentnumbers +from .serializer import tokens_to_cql, expand_shortcut_notation, get_triples, get_keywords, normalize_patentnumbers def parse_cql(cql): c = CQL(cql) @@ -24,9 +24,9 @@ def enrich_cql(cql): def dump_results(tokens): cql = tokens_to_cql(tokens) - print "=" * 42 - print "tokens:", tokens - print "cql:", cql + print("=" * 42) + print("tokens:", tokens) + print("cql:", cql) def rundemo(): @@ -80,11 +80,11 @@ def rundemo(): # B.3. dump all triples triples = [] get_triples(tokens, triples) - print "triples:", triples + print("triples:", triples) # B.4. dump all keywords keywords = get_keywords(triples) - print "keywords:", keywords + print("keywords:", keywords) if __name__ == '__main__': diff --git a/patzilla/util/cql/pyparsing/parser.py b/patzilla/util/cql/pyparsing/parser.py index 12c6cfb2..5ab35363 100644 --- a/patzilla/util/cql/pyparsing/parser.py +++ b/patzilla/util/cql/pyparsing/parser.py @@ -21,11 +21,10 @@ Keyword, CaselessKeyword, \ Regex, \ alphas, nums, alphanums, quotedString, \ - oneOf, upcaseTokens, delimitedList, restOfLine, \ + oneOf, common, delimitedList, restOfLine, \ Forward, Group, Combine, Optional, ZeroOrMore, OneOrMore, \ NotAny, Suppress, FollowedBy, StringEnd, \ ParseResults, ParseException, removeQuotes -from patzilla.util.cql.pyparsing.util import get_literals log = logging.getLogger(__name__) @@ -54,19 +53,19 @@ TODO: maybe extract this to a different place, since ..services is also using it """ -wildcards = u'*?#!' +wildcards = '*?#!' # - classification terms (IPC, CPC) may contain forward slashes and dashes, e.g. H04L12/433, F17D5-00 # - numeric terms may contain punctuation (,.), e.g. 2.45 # - dates may contain dashes, e.g. M11-2009 -separators = u'/,.-' +separators = '/,.-' # limited set of unicode characters #umlauts = u'äöüÄÖÜß' # all unicode characters # http://stackoverflow.com/questions/2339386/python-pyparsing-unicode-characters/2340659#2340659 -unicode_printables = u''.join(unichr(c) for c in xrange(65536) if unichr(c).isalnum() and not unichr(c).isspace()) +unicode_printables = ''.join(chr(c) for c in range(65536) if chr(c).isalnum() and not chr(c).isspace()) # indexchars indexchars = alphanums + '{}!' @@ -92,16 +91,17 @@ def __init__(self): def preconfigure(self): # Binary comparison operators - self.cmp_single = u'= != < > <= >='.split() - self.cmp_perl = u'eq ne lt gt le ge'.split() - self.cmp_cql = u'exact within encloses all any any/relevant any/rel.lr'.split() + self.cmp_single = '= != < > <= >='.split() + self.cmp_perl = 'eq ne lt gt le ge'.split() + self.cmp_cql = 'exact within encloses all any any/relevant any/rel.lr'.split() # Boolean operators # TODO: Configure german operators with DPMAGrammar only - self.and_ = CaselessKeyword("and") | CaselessKeyword("UND") - self.or_ = CaselessKeyword("or") | CaselessKeyword("ODER") - self.not_ = CaselessKeyword("not") | CaselessKeyword("NICHT") - self.prox_ = CaselessKeyword("prox") | CaselessKeyword("NAHE") + self.booleans = ("and", "UND", "or", "ODER", "not", "NICHT", "prox", "NAHE") + self.and_ = CaselessKeyword(self.booleans[0]) | CaselessKeyword(self.booleans[1]) + self.or_ = CaselessKeyword(self.booleans[2]) | CaselessKeyword(self.booleans[3]) + self.not_ = CaselessKeyword(self.booleans[4]) | CaselessKeyword(self.booleans[5]) + self.prox_ = CaselessKeyword(self.booleans[6]) | CaselessKeyword(self.booleans[7]) # Neighbourhood term operators self.neighbourhood_symbols = '(W) (NOTW) (#W) (A) (#A) (P) (L)'.split() @@ -112,7 +112,6 @@ def configure(self): self.binop_symbols = self.cmp_single + self.cmp_perl + self.cmp_cql # Boolean operators - self.booleans = get_literals(self.and_, self.or_, self.not_, self.prox_) self.booleans_or = ( self.and_ | self.or_ | self.not_ | self.prox_ ) # Neighbourhood term operators @@ -134,7 +133,7 @@ def build(self): # ------------------------------------------ # C. building blocks # ------------------------------------------ - self.termop = Regex( "|".join(self.neighbourhood_symbols), re.IGNORECASE ).setParseAction( upcaseTokens ).setName("termop") + self.termop = Regex( "|".join(self.neighbourhood_symbols), re.IGNORECASE ).setParseAction( common.upcase_tokens ).setName("termop") termword = Word(self.unicode_printables + self.separators + self.wildcards).setName("term") termword_termop = (termword + OneOrMore( self.termop + termword )) diff --git a/patzilla/util/cql/pyparsing/searchparser.py b/patzilla/util/cql/pyparsing/searchparser.py index 880ebad1..e4b26ae1 100644 --- a/patzilla/util/cql/pyparsing/searchparser.py +++ b/patzilla/util/cql/pyparsing/searchparser.py @@ -65,18 +65,27 @@ Set = set except NameError: from sets import Set + from patzilla.util.cql.pyparsing.parser import separators, wildcards +import sys +_IS_PYTHON_3 = (sys.version_info[0] >= 3) +if _IS_PYTHON_3: + Set = set +else: + from sets import Set + # define characters comprising a word #wordchars = alphanums + separators + wildcards # all unicode characters # http://stackoverflow.com/questions/2339386/python-pyparsing-unicode-characters/2340659#2340659 -unicode_printables = u''.join(unichr(c) for c in xrange(65536) if unichr(c).isalnum() and not unichr(c).isspace()) -more_chars = u'°' +unicode_printables = ''.join(chr(c) for c in range(65536) if chr(c).isalnum() and not chr(c).isspace()) +more_chars = '°' wordchars = unicode_printables + more_chars + separators + wildcards + class SearchQueryParser: def __init__(self): @@ -272,14 +281,14 @@ class ParserTest(SearchQueryParser): } def GetWord(self, word): - if (self.index.has_key(word)): + if (word in self.index): return self.index[word] else: return Set() def GetWordWildcard(self, word): result = Set() - for item in self.index.keys(): + for item in list(self.index.keys()): if word == item[0:len(word)]: result = result.union(self.index[item]) return result @@ -292,27 +301,27 @@ def GetQuotes(self, search_string, tmp_result): return result def GetNot(self, not_set): - all = Set(self.docs.keys()) + all = Set(list(self.docs.keys())) return all.difference(not_set) def Test(self): all_ok = True - for item in self.tests.keys(): - print item + for item in list(self.tests.keys()): + print(item) r = self.Parse(item) e = self.tests[item] - print 'Result: %s' % r - print 'Expect: %s' % e + print('Result: %s' % r) + print('Expect: %s' % e) if e == r: - print 'Test OK' + print('Test OK') else: all_ok = False - print '>>>>>>>>>>>>>>>>>>>>>>Test ERROR<<<<<<<<<<<<<<<<<<<<<' - print '' + print('>>>>>>>>>>>>>>>>>>>>>>Test ERROR<<<<<<<<<<<<<<<<<<<<<') + print('') return all_ok if __name__=='__main__': if ParserTest().Test(): - print 'All tests OK' + print('All tests OK') else: - print 'One or more tests FAILED' \ No newline at end of file + print('One or more tests FAILED') diff --git a/patzilla/util/cql/pyparsing/serializer.py b/patzilla/util/cql/pyparsing/serializer.py index a078b5f8..3abe241b 100644 --- a/patzilla/util/cql/pyparsing/serializer.py +++ b/patzilla/util/cql/pyparsing/serializer.py @@ -3,7 +3,7 @@ import re import types import logging -import StringIO +import io from pyparsing import ParseResults from patzilla.util.cql.pyparsing.parser import CQLGrammar from patzilla.util.cql.pyparsing.util import walk_token_results, token_to_triple @@ -29,10 +29,10 @@ def tokens_to_cql(tokens): >>> tokens = parse_cql('foo=bar and baz=(qux or quux)') >>> tokens_to_cql(tokens) - u'foo=bar and baz=(qux or quux)' + 'foo=bar and baz=(qux or quux)' """ - buffer = StringIO.StringIO() + buffer = io.StringIO() tokens_to_cql_buffer(tokens, buffer) buffer.seek(0) return buffer.read() @@ -51,23 +51,23 @@ def tokens_to_cql_buffer(tokens, buffer): # surround binop with spaces for all operators but equality (=) if binop != '=': - triple[1] = u' {0} '.format(binop) + triple[1] = ' {0} '.format(binop) - payload = u''.join(triple) + payload = ''.join(triple) else: - payload = u''.join(token) + payload = ''.join(token) buffer.write(payload) elif name.startswith('subquery'): tokens_to_cql_buffer(token, buffer) - elif tokentype in types.StringTypes: + elif tokentype in (str,): out = token # surround all boolean operators with whitespace if token in grammar.booleans: - out = u' {0} '.format(token) + out = ' {0} '.format(token) buffer.write(out) def normalize_patentnumbers(tokens): @@ -77,7 +77,7 @@ def normalize_patentnumbers(tokens): >>> tokens = parse_cql('pn=EP666666') >>> normalize_patentnumbers(tokens) >>> tokens_to_cql(tokens) - u'pn=EP0666666' + 'pn=EP0666666' """ def action(token, index, binop, term): @@ -99,15 +99,15 @@ def get_keywords(triples, whitelist_indexes=None): >>> triples = []; get_triples(parse_cql('txt=foo or (bi=bar or bi=baz)'), triples) >>> get_keywords(triples) - [u'foo', u'bar', u'baz'] + ['foo', 'bar', 'baz'] >>> triples = []; get_triples(parse_cql('pa all "central, intelligence, agency"'), triples) >>> get_keywords(triples) - [u'central', u'intelligence', u'agency'] + ['central', 'intelligence', 'agency'] >>> triples = []; get_triples(parse_cql('foo=bar and baz=qux'), triples) >>> get_keywords(triples, ['baz']) - [u'qux'] + ['qux'] """ keywords = [] @@ -143,11 +143,11 @@ def trim_keywords(keywords): keywords and a list of keyword elements for multi-term keywords Example: - >>> trim_keywords([u'!!!daimler?', u'Misch?(P)?wasser']) - [u'daimler', [u'Misch', u'wasser']] + >>> trim_keywords(['!!!daimler?', 'Misch?(P)?wasser']) + ['daimler', ['Misch', 'wasser']] - >>> trim_keywords([u'"foo"', u'" bar "']) - [u'foo', u'bar'] + >>> trim_keywords(['"foo"', '" bar "']) + ['foo', 'bar'] """ keywords_trimmed = [] @@ -164,7 +164,7 @@ def get_triples(tokens, triples): >>> triples = []; get_triples(parse_cql('foo=bar and baz=(qux or quux)'), triples) >>> triples - [['foo', u'=', 'bar'], ['qux'], ['quux']] + [['foo', '=', 'bar'], ['qux'], ['quux']] """ for token in tokens: @@ -184,7 +184,7 @@ def expand_shortcut_notation(tokens, index=None, binop=None): >>> tokens = parse_cql('foo=bar and baz=(qux or quux)') >>> expand_shortcut_notation(tokens) >>> tokens_to_cql(tokens) - u'foo=bar and (baz=qux or baz=quux)' + 'foo=bar and (baz=qux or baz=quux)' """ for token in tokens: @@ -200,7 +200,7 @@ def expand_shortcut_notation(tokens, index=None, binop=None): # If it does, put term inside parenthesis, which got lost while performing shortcut expansion. if token: if re.match('.*(?:' + grammar.termop.pattern + ').*', token[0], flags=grammar.termop.flags): - token[0] = u'({0})'.format(token[0]) + token[0] = '({0})'.format(token[0]) # Process triple in value shortcut notation (contains only the single term). # Take action: Insert index and binop from subquery context. diff --git a/patzilla/util/cql/pyparsing/test/01_spec.rst b/patzilla/util/cql/pyparsing/test/01_spec.rst index de7571ed..a5395600 100644 --- a/patzilla/util/cql/pyparsing/test/01_spec.rst +++ b/patzilla/util/cql/pyparsing/test/01_spec.rst @@ -20,52 +20,52 @@ Simple queries ============== >>> CQL('dinosaur').dumps() -u'dinosaur' +'dinosaur' >>> CQL('"complete dinosaur"').dumps() -u'"complete dinosaur"' +'"complete dinosaur"' >>> CQL('title = "complete dinosaur"').dumps() -u'title="complete dinosaur"' +'title="complete dinosaur"' >>> CQL('title exact "the complete dinosaur"').dumps() -u'title exact "the complete dinosaur"' +'title exact "the complete dinosaur"' Queries using Boolean logic =========================== >>> CQL('dinosaur or bird').dumps() -u'dinosaur or bird' +'dinosaur or bird' .. note:: **FIXME: enhance grammar** >>> #CQL('Palomar assignment and "ice age"').dumps() >>> CQL('dinosaur not reptile').dumps() -u'dinosaur not reptile' +'dinosaur not reptile' >>> CQL('dinosaur and bird or dinobird').dumps() -u'dinosaur and bird or dinobird' +'dinosaur and bird or dinobird' >>> CQL('(bird or dinosaur) and (feathers or scales)').dumps() -u'(bird or dinosaur) and (feathers or scales)' +'(bird or dinosaur) and (feathers or scales)' >>> CQL('"feathered dinosaur" and (yixian or jehol)').dumps() -u'"feathered dinosaur" and (yixian or jehol)' +'"feathered dinosaur" and (yixian or jehol)' Queries accessing publication indexes ===================================== >>> CQL('publicationYear < 1980').dumps() -u'publicationYear < 1980' +'publicationYear < 1980' >>> CQL('lengthOfFemur > 2.4').dumps() -u'lengthOfFemur > 2.4' +'lengthOfFemur > 2.4' >>> CQL('bioMass >= 100').dumps() -u'bioMass >= 100' +'bioMass >= 100' Queries based on the proximity of words to each other in a document @@ -82,17 +82,17 @@ Queries across multiple dimensions ================================== >>> CQL('date within "2002 2005"').dumps() -u'date within "2002 2005"' +'date within "2002 2005"' >>> CQL('dateRange encloses 2003').dumps() -u'dateRange encloses 2003' +'dateRange encloses 2003' Queries based on relevance ========================== >>> CQL('subject any/relevant "fish frog"').dumps() -u'subject any/relevant "fish frog"' +'subject any/relevant "fish frog"' >>> CQL('subject any/rel.lr "fish frog"').dumps() -u'subject any/rel.lr "fish frog"' +'subject any/rel.lr "fish frog"' diff --git a/patzilla/util/cql/pyparsing/test/05_misc.rst b/patzilla/util/cql/pyparsing/test/05_misc.rst index ed175ba6..4ccdce8d 100644 --- a/patzilla/util/cql/pyparsing/test/05_misc.rst +++ b/patzilla/util/cql/pyparsing/test/05_misc.rst @@ -15,14 +15,14 @@ Queries with UTF-8 characters Try parsing a query containing utf-8 characters. ->>> CQL(u'title=molécules').dumps() -u'title=mol\xe9cules' +>>> CQL('title=molécules').dumps() +'title=mol\xe9cules' ->>> CQL(u'inventor="CEGARRA SERRANO JOSÉ MARIANO"').dumps() -u'inventor="CEGARRA SERRANO JOS\xc9 MARIANO"' +>>> CQL('inventor="CEGARRA SERRANO JOSÉ MARIANO"').dumps() +'inventor="CEGARRA SERRANO JOS\xc9 MARIANO"' ->>> CQL(u'ab=radaufstandskraft or ab=radaufstandskräfte?').dumps() -u'ab=radaufstandskraft or ab=radaufstandskr\xe4fte?' +>>> CQL('ab=radaufstandskraft or ab=radaufstandskräfte?').dumps() +'ab=radaufstandskraft or ab=radaufstandskr\xe4fte?' # TODO: use more esoteric utf-8 characters, e.g. special chars et al. @@ -30,7 +30,7 @@ Queries using wildcards ======================= >>> CQL('txt=footw or txt=footw? or txt=footw# or txt=footw! and txt=footw*re').dumps() -u'txt=footw or txt=footw? or txt=footw# or txt=footw! and txt=footw*re' +'txt=footw or txt=footw? or txt=footw# or txt=footw! and txt=footw*re' Query with comments @@ -41,16 +41,16 @@ Query with comments ... (baz or qux)) -- comment 2 ... ... """).dumps() -u'foo=(bar and (baz or qux))' +'foo=(bar and (baz or qux))' Weird queries ============= >>> CQL(' foobar ').dumps() -u'foobar' +'foobar' >>> CQL('(((foobar)))').dumps() -u'(((foobar)))' +'(((foobar)))' Queries with errors @@ -61,14 +61,14 @@ Nonsense >>> CQL('foo bar', logging=False).dumps() Traceback (most recent call last): ... -ParseException: Expected end of text (at char 4), (line:1, col:5) +ParseException: Expected end of text, found 'bar' (at char 4), (line:1, col:5) Lacking terms ------------- >>> CQL('foo=', logging=False).dumps() Traceback (most recent call last): ... -ParseException: Expected term (at char 4), (line:1, col:5) +ParseException: Expected end of text, found 'bar' (at char 9), (line:1, col:10) >>> CQL('foo= and bar=', logging=False).dumps() Traceback (most recent call last): @@ -92,12 +92,12 @@ Unknown binops >>> CQL('foo % bar', logging=False).dumps() Traceback (most recent call last): ... -ParseException: Expected end of text (at char 4), (line:1, col:5) +ParseException: Expected end of text, found 'bar' (at char 4), (line:1, col:5) Error explanation ----------------- >>> try: -... CQL(u'foo bar', logging=False).dumps() +... CQL('foo bar', logging=False).dumps() ... except Exception as ex: ... ex.explanation -u'foo bar\n ^\n\nExpected end of text (at char 4), (line:1, col:5)' +"foo bar\n ^\n\nExpected end of text, found 'bar' (at char 4), (line:1, col:5)" diff --git a/patzilla/util/cql/pyparsing/test/10_extensions.rst b/patzilla/util/cql/pyparsing/test/10_extensions.rst index 73c72e1e..02d217bc 100644 --- a/patzilla/util/cql/pyparsing/test/10_extensions.rst +++ b/patzilla/util/cql/pyparsing/test/10_extensions.rst @@ -18,13 +18,13 @@ Patent number normalization First, check parsing and reproducing a query for a publication number without normalization applied: >>> CQL('pn=EP666666').dumps() -u'pn=EP666666' +'pn=EP666666' Then, check whether normalization works correctly. Here, the EP document number should get zero-padded properly: >>> CQL('pn=EP666666').normalize_numbers().dumps() -u'pn=EP0666666' +'pn=EP0666666' Keyword extraction @@ -33,13 +33,13 @@ Keyword extraction First, make sure the query can actually be parsed: >>> CQL('bi=greifer and pc=de').dumps() -u'bi=greifer and pc=de' +'bi=greifer and pc=de' Then, check the list of extracted keywords: >>> CQL('bi=greifer and pc=de').keywords() -[u'greifer'] +['greifer'] Details @@ -53,8 +53,8 @@ because index name "pc" is not whitelisted. We can have a look at the layer below, where raw triples got extracted from the query string, that's the step just before collecting the keywords: ->>> CQL(u'bi=greifer and pc=de').triples() -[[u'bi', u'=', u'greifer'], [u'pc', u'=', u'de']] +>>> CQL('bi=greifer and pc=de').triples() +[['bi', '=', 'greifer'], ['pc', '=', 'de']] This shows we also have access to the "pc=de" condition if there's demand for enhanced query analytics in the future. @@ -70,13 +70,13 @@ Parse and reproduce a cql query containing a nested expression in value shortcut Our old token-based parser wasn't capable doing this. >>> CQL('bi=(socke and (Inlay or Teile)) and pc=de').dumps() -u'bi=(socke and (Inlay or Teile)) and pc=de' +'bi=(socke and (Inlay or Teile)) and pc=de' Expand the value shortcut notation: >>> CQL('bi=(socke and (Inlay or Teile)) and pc=de').expand_shortcuts().dumps() -u'(bi=socke and (bi=Inlay or bi=Teile)) and pc=de' +'(bi=socke and (bi=Inlay or bi=Teile)) and pc=de' Special operators @@ -86,7 +86,7 @@ Boolean operators (binops) in german ------------------------------------ >>> CQL('BI=Socke und PA=onion').dumps() -u'BI=Socke UND PA=onion' +'BI=Socke UND PA=onion' @@ -108,7 +108,7 @@ Verbatim reproduction The query should be reproduced verbatim when not applying any expansion or normalization: >>> CQL(query).dumps() -u'pn=(EP666666 or EP666667) or (cpc=H04L12/433 and txt=communication?)' +'pn=(EP666666 or EP666667) or (cpc=H04L12/433 and txt=communication?)' Polishing @@ -116,12 +116,12 @@ Polishing After shortcut expansion and number normalization, we should see zero-padded EP document numbers: >>> CQL(query).polish().dumps() -u'(pn=EP0666666 or pn=EP0666667) or (cpc=H04L12/433 and txt=communication?)' +'(pn=EP0666666 or pn=EP0666667) or (cpc=H04L12/433 and txt=communication?)' Terms from conditions for classification- or fulltext-indexes should count towards keywords: >>> CQL(query).polish().keywords() -[u'H04L12/433', u'communication'] +['H04L12/433', 'communication'] Details @@ -130,13 +130,13 @@ Even without polishing the query, the keywords should be the same, since "cpc" and "txt" conditions both are not in value shortcut notation. >>> CQL(query).keywords() -[u'H04L12/433', u'communication'] +['H04L12/433', 'communication'] On the other hand, number normalization for numbers in value shortcut notation obviously does not work when not having shortcut expansion applied before: >>> CQL('pn=(EP666666 or EP666667)').normalize_numbers().dumps() -u'pn=(EP666666 or EP666667)' +'pn=(EP666666 or EP666667)' Nesting and keywords @@ -146,4 +146,4 @@ We especially want to properly extract keywords from nested expressions, even when they are in value shortcut notation. >>> CQL('bi=(socke and (Inlay or Teile)) and pc=de').expand_shortcuts().keywords() -[u'socke', u'Inlay', u'Teile'] +['socke', 'Inlay', 'Teile'] diff --git a/patzilla/util/cql/pyparsing/test/15_ops.rst b/patzilla/util/cql/pyparsing/test/15_ops.rst index eae17e30..65d244f8 100644 --- a/patzilla/util/cql/pyparsing/test/15_ops.rst +++ b/patzilla/util/cql/pyparsing/test/15_ops.rst @@ -20,7 +20,7 @@ Date range Test date range condition used when extrapolating from vanity url, e.g. /publicationdate/2014W10. >>> CQL('publicationdate within 2014-03-10,2014-03-16').dumps() -u'publicationdate within 2014-03-10,2014-03-16' +'publicationdate within 2014-03-10,2014-03-16' Examples from OPS reference guide @@ -37,23 +37,23 @@ CQL examples Original CQL examples from reference guide. >>> CQL('ti all "green, energy"').dumps() -u'ti all "green, energy"' +'ti all "green, energy"' .. note:: **FIXME: enhance grammar** >>> #CQL('ti=green prox/unit=world ti=energy').dumps() >>> CQL('pd within "20051212 20051214"').dumps() -u'pd within "20051212 20051214"' +'pd within "20051212 20051214"' >>> CQL('pd="20051212 20051214"').dumps() -u'pd="20051212 20051214"' +'pd="20051212 20051214"' >>> CQL('ia any "John, Smith"').dumps() -u'ia any "John, Smith"' +'ia any "John, Smith"' >>> CQL('pn=EP and pr=GB').dumps() -u'pn=EP and pr=GB' +'pn=EP and pr=GB' .. note:: **FIXME: enhance grammar** @@ -62,19 +62,19 @@ u'pn=EP and pr=GB' >>> #CQL('(ta=green prox/distance<=3 ta=energy) or (ta=renewable prox/distance<=3 ta=energy)').dumps() >>> CQL('pa all "central, intelligence, agency" and US').dumps() -u'pa all "central, intelligence, agency" and US' +'pa all "central, intelligence, agency" and US' >>> CQL('pa all "central, intelligence, agency" and US and pd>2000').dumps() -u'pa all "central, intelligence, agency" and US and pd > 2000' +'pa all "central, intelligence, agency" and US and pd > 2000' >>> CQL('pd < 18000101').dumps() -u'pd < 18000101' +'pd < 18000101' >>> CQL('ta=synchroni#ed').dumps() -u'ta=synchroni#ed' +'ta=synchroni#ed' >>> CQL('EP and 2009 and Smith').dumps() -u'EP and 2009 and Smith' +'EP and 2009 and Smith' .. note:: **FIXME: enhance grammar** @@ -91,23 +91,23 @@ Shortcut notation expansion All these should not be affected by any query manipulation. Prove that. >>> CQL('pa all "central, intelligence, agency" and US').polish().dumps() -u'pa all "central, intelligence, agency" and US' +'pa all "central, intelligence, agency" and US' >>> CQL('pa all "central, intelligence, agency" and US and pd>2000').polish().dumps() -u'pa all "central, intelligence, agency" and US and pd > 2000' +'pa all "central, intelligence, agency" and US and pd > 2000' >>> CQL('EP and 2009 and Smith').polish().dumps() -u'EP and 2009 and Smith' +'EP and 2009 and Smith' Keyword extraction ------------------ >>> CQL('pa all "central, intelligence, agency" and US').polish().keywords() -[u'central', u'intelligence', u'agency'] +['central', 'intelligence', 'agency'] >>> CQL('pa all "central intelligence agency" and US').polish().keywords() -[u'central', u'intelligence', u'agency'] +['central', 'intelligence', 'agency'] .. note:: **FIXME: enhance parser smartness: follow rules outlined on p. 148, section 4.2. CQL index catalogue** diff --git a/patzilla/util/cql/pyparsing/test/20_depatisnet.rst b/patzilla/util/cql/pyparsing/test/20_depatisnet.rst index 3f22af00..176137dd 100644 --- a/patzilla/util/cql/pyparsing/test/20_depatisnet.rst +++ b/patzilla/util/cql/pyparsing/test/20_depatisnet.rst @@ -27,19 +27,19 @@ Test some logic operators localized to german. Getting started --------------- >>> CQL('bi=(greifer oder bagger)').dumps() -u'bi=(greifer ODER bagger)' +'bi=(greifer ODER bagger)' Made up ------- Try to understand the query. ->>> CQL(u'bi=((wasser UND Getränk) NICHT (?hahn oder ?zapf oder (kühl? oder ?kühl)))').dumps() -u'bi=((wasser UND Getr\xe4nk) NICHT (?hahn ODER ?zapf ODER (k\xfchl? ODER ?k\xfchl)))' +>>> CQL('bi=((wasser UND Getränk) NICHT (?hahn oder ?zapf oder (kühl? oder ?kühl)))').dumps() +'bi=((wasser UND Getr\xe4nk) NICHT (?hahn ODER ?zapf ODER (k\xfchl? ODER ?k\xfchl)))' Extract keywords from query. ->>> CQL(u'bi=((wasser UND Getränk) NICHT (?hahn oder ?zapf oder (kühl? oder ?kühl)))').polish().keywords() -[u'wasser', u'Getr\xe4nk', u'hahn', u'zapf', u'k\xfchl', u'k\xfchl'] +>>> CQL('bi=((wasser UND Getränk) NICHT (?hahn oder ?zapf oder (kühl? oder ?kühl)))').polish().keywords() +['wasser', 'Getr\xe4nk', 'hahn', 'zapf', 'k\xfchl', 'k\xfchl'] Neighbourhood operators @@ -50,18 +50,18 @@ Getting started Try a bareword query string containing a neighbourhood term operator: ->>> CQL(u'L(W)Serine').dumps() -u'L(W)Serine' +>>> CQL('L(W)Serine').dumps() +'L(W)Serine' Try the same in the context of a real condition (triple): ->>> CQL(u'ab=(L(W)Serine)').dumps() -u'ab=(L(W)Serine)' +>>> CQL('ab=(L(W)Serine)').dumps() +'ab=(L(W)Serine)' Check this works caseless as well: ->>> CQL(u'L(w)Serine').dumps() -u'L(W)Serine' +>>> CQL('L(w)Serine').dumps() +'L(W)Serine' Made up @@ -69,14 +69,14 @@ Made up Try some more complex queries containing neighbourhood term operators and wildcards. ->>> CQL(u'bi=(Cry1?(L)resist?)').dumps() -u'bi=(Cry1?(L)resist?)' +>>> CQL('bi=(Cry1?(L)resist?)').dumps() +'bi=(Cry1?(L)resist?)' ->>> CQL(u'bi=(Cry1?(5A)tox?)').dumps() -u'bi=(Cry1?(5A)tox?)' +>>> CQL('bi=(Cry1?(5A)tox?)').dumps() +'bi=(Cry1?(5A)tox?)' ->>> CQL(u'bi=(Misch?(P)?wasser)').dumps() -u'bi=(Misch?(P)?wasser)' +>>> CQL('bi=(Misch?(P)?wasser)').dumps() +'bi=(Misch?(P)?wasser)' @@ -93,91 +93,91 @@ Search examples --------------- >>> CQL('PA= siemens').dumps() -u'PA=siemens' +'PA=siemens' >>> CQL('PUB= 01.03.2010 UND PA= siemens').dumps() -u'PUB=01.03.2010 UND PA=siemens' +'PUB=01.03.2010 UND PA=siemens' >>> CQL('PA= siemens UND IN= Braun UND PUB>= 01.03.2010').dumps() -u'PA=siemens UND IN=Braun UND PUB >= 01.03.2010' +'PA=siemens UND IN=Braun UND PUB >= 01.03.2010' >>> CQL('PUB= M11-2009 UND PA= daimler?').dumps() -u'PUB=M11-2009 UND PA=daimler?' +'PUB=M11-2009 UND PA=daimler?' ->>> CQL(u'AB = !!!lösung').dumps() -u'AB=!!!l\xf6sung' +>>> CQL('AB = !!!lösung').dumps() +'AB=!!!l\xf6sung' >>> CQL('TI = ###heizung').dumps() -u'TI=###heizung' +'TI=###heizung' >>> CQL('CL = ?fahrzeug').dumps() -u'CL=?fahrzeug' +'CL=?fahrzeug' >>> CQL('BI= (programmabschnitt# UND administra?)').dumps() -u'BI=(programmabschnitt# UND administra?)' +'BI=(programmabschnitt# UND administra?)' >>> CQL('ICB=F17D5/00').dumps() -u'ICB=F17D5/00' +'ICB=F17D5/00' >>> CQL('ICB=F17D5-00').dumps() -u'ICB=F17D5-00' +'ICB=F17D5-00' >>> CQL("ICB='F17D 5/00'").dumps() -u"ICB='F17D 5/00'" +"ICB='F17D 5/00'" >>> CQL('ICB=F17D0005000000').dumps() -u'ICB=F17D0005000000' +'ICB=F17D0005000000' >>> CQL('ICP=F17D5/00M').dumps() -u'ICP=F17D5/00M' +'ICP=F17D5/00M' >>> CQL('ICP=F17D5-00M').dumps() -u'ICP=F17D5-00M' +'ICP=F17D5-00M' >>> CQL("ICP='F17D 5/00 M'").dumps() -u"ICP='F17D 5/00 M'" +"ICP='F17D 5/00 M'" >>> CQL('ICP=F17D000500000M').dumps() -u'ICP=F17D000500000M' +'ICP=F17D000500000M' >>> CQL('ICB=F04D13/?').dumps() -u'ICB=F04D13/?' +'ICB=F04D13/?' >>> CQL('ICB=F04D13-?').dumps() -u'ICB=F04D13-?' +'ICB=F04D13-?' >>> CQL("ICB='F04D 13/?'").dumps() -u"ICB='F04D 13/?'" +"ICB='F04D 13/?'" >>> CQL('ICB=F04D0013?').dumps() -u'ICB=F04D0013?' +'ICB=F04D0013?' Search examples for the proximity operator (NOTW) ------------------------------------------------- >>> CQL('Bi= (Regler und (mechanische(NOTW)Regler))').dumps() -u'Bi=(Regler UND (mechanische(NOTW)Regler))' +'Bi=(Regler UND (mechanische(NOTW)Regler))' >>> CQL('Bi= (Regler und (mechanische (NOTW) Regler))').dumps() -u'Bi=(Regler UND (mechanische (NOTW) Regler))' +'Bi=(Regler UND (mechanische (NOTW) Regler))' Searches in the text fields "Title", "Abstract", "Description", "Claims", "Full text data" ------------------------------------------------------------------------------------------ >>> CQL('TI = ( DVB(W)T )').dumps() -u'TI=(DVB(W)T)' +'TI=(DVB(W)T)' >>> CQL('Bi= (personalcomputer oder (personal(W)computer))').dumps() -u'Bi=(personalcomputer ODER (personal(W)computer))' +'Bi=(personalcomputer ODER (personal(W)computer))' Searches in the fields "Applicant/owner", "Inventor" ---------------------------------------------------- >>> CQL('PA = ( Anna(L)Huber )').dumps() -u'PA=(Anna(L)Huber)' +'PA=(Anna(L)Huber)' Keywords @@ -185,7 +185,7 @@ Keywords Try some more complex queries containing *value shortcut notations*, *neighbourhood term operators* and *wildcards*. ->>> largequery = u""" +>>> largequery = """ ... (PA= siemens UND IN= Braun UND PUB>= 01.03.2010) or ... (PUB=M11-2009 UND PA=daimler?) or ... (AB = (!!!lösung or ###heizung or ?fahrzeug)) or @@ -195,10 +195,10 @@ Try some more complex queries containing *value shortcut notations*, *neighbourh ... """ >>> CQL(largequery).dumps() -u"(PA=siemens UND IN=Braun UND PUB >= 01.03.2010) or (PUB=M11-2009 UND PA=daimler?) or (AB=(!!!l\xf6sung or ###heizung or ?fahrzeug)) or (ICB='F17D 5/00' or ICB=F04D13-?) or bi=(mechanische (NOTW) Regler) or bi=(Cry1?(L)resist? or Cry1?(5A)tox? or Misch?(P)?wasser)" +"(PA=siemens UND IN=Braun UND PUB >= 01.03.2010) or (PUB=M11-2009 UND PA=daimler?) or (AB=(!!!l\xf6sung or ###heizung or ?fahrzeug)) or (ICB='F17D 5/00' or ICB=F04D13-?) or bi=(mechanische (NOTW) Regler) or bi=(Cry1?(L)resist? or Cry1?(5A)tox? or Misch?(P)?wasser)" >>> CQL(largequery).keywords() -[u'siemens', u'Braun', u'daimler', u'F17D 5/00', u'F04D13-', [u'mechanische', u'Regler']] +['siemens', 'Braun', 'daimler', 'F17D 5/00', 'F04D13-', ['mechanische', 'Regler']] Polishing @@ -207,20 +207,20 @@ Polishing Polishing a query, especially the shortcut notation expansion, should not corrupt query syntax. >>> CQL('TI = ( DVB(W)T )').polish().dumps() -u'TI=(DVB(W)T)' +'TI=(DVB(W)T)' >>> CQL('Bi= (personalcomputer oder (personal(W)computer))').polish().dumps() -u'(Bi=personalcomputer ODER (Bi=(personal(W)computer)))' +'(Bi=personalcomputer ODER (Bi=(personal(W)computer)))' >>> CQL('bi=(Cry1?(L)resist?)').polish().dumps() -u'bi=(Cry1?(L)resist?)' +'bi=(Cry1?(L)resist?)' >>> CQL(largequery).polish().dumps() -u"(PA=siemens UND IN=Braun UND PUB >= 01.03.2010) or (PUB=M11-2009 UND PA=daimler?) or ((AB=!!!l\xf6sung or AB=###heizung or AB=?fahrzeug)) or (ICB='F17D 5/00' or ICB=F04D13-?) or bi=(mechanische (NOTW) Regler) or (bi=(Cry1?(L)resist?) or bi=(Cry1?(5A)tox?) or bi=(Misch?(P)?wasser))" +"(PA=siemens UND IN=Braun UND PUB >= 01.03.2010) or (PUB=M11-2009 UND PA=daimler?) or ((AB=!!!l\xf6sung or AB=###heizung or AB=?fahrzeug)) or (ICB='F17D 5/00' or ICB=F04D13-?) or bi=(mechanische (NOTW) Regler) or (bi=(Cry1?(L)resist?) or bi=(Cry1?(5A)tox?) or bi=(Misch?(P)?wasser))" >>> CQL(largequery).polish().keywords() -[u'siemens', u'Braun', u'daimler', u'l\xf6sung', u'heizung', u'fahrzeug', u'F17D 5/00', u'F04D13-', [u'mechanische', u'Regler'], [u'Cry1', u'resist'], [u'Cry1', u'tox'], [u'Misch', u'wasser']] +['siemens', 'Braun', 'daimler', 'l\xf6sung', 'heizung', 'fahrzeug', 'F17D 5/00', 'F04D13-', ['mechanische', 'Regler'], ['Cry1', 'resist'], ['Cry1', 'tox'], ['Misch', 'wasser']] From the wild @@ -233,18 +233,18 @@ Query 1 Reproduce verbatim: ->>> print(CQL(u'(ab=radaufstandskraft or ab=radaufstandskräfte?)').dumps()) +>>> print(CQL('(ab=radaufstandskraft or ab=radaufstandskräfte?)').dumps()) (ab=radaufstandskraft or ab=radaufstandskräfte?) Reproduce with polishing: ->>> print(CQL(u'(ab=radaufstandskraft or ab=radaufstandskräfte?)').polish().dumps()) +>>> print(CQL('(ab=radaufstandskraft or ab=radaufstandskräfte?)').polish().dumps()) (ab=radaufstandskraft or ab=radaufstandskräfte?) Extract keywords after polishing: ->>> CQL(u'(ab=radaufstandskraft or ab=radaufstandskräfte?)').polish().keywords() -[u'radaufstandskraft', u'radaufstandskr\xe4fte'] +>>> CQL('(ab=radaufstandskraft or ab=radaufstandskräfte?)').polish().keywords() +['radaufstandskraft', 'radaufstandskr\xe4fte'] Query 2 @@ -252,18 +252,18 @@ Query 2 Reproduce verbatim: ->>> print(CQL(u'bi=( ( warm(P)walzen) AND ( band(P)mitte and messung) ) oder bi=( ( warm and walzen) AND ( band and säbel and messung) ) oder bi=((warm and walzen)and (mitten und messung)) oder BI =((reversiergerüst)und(breitenmessung))').dumps()) +>>> print(CQL('bi=( ( warm(P)walzen) AND ( band(P)mitte and messung) ) oder bi=( ( warm and walzen) AND ( band and säbel and messung) ) oder bi=((warm and walzen)and (mitten und messung)) oder BI =((reversiergerüst)und(breitenmessung))').dumps()) bi=((warm(P)walzen) and (band(P)mitte and messung)) ODER bi=((warm and walzen) and (band and säbel and messung)) ODER bi=((warm and walzen) and (mitten UND messung)) ODER BI=((reversiergerüst) UND (breitenmessung)) Reproduce with polishing: ->>> print(CQL(u'bi=( ( warm(P)walzen) AND ( band(P)mitte and messung) ) oder bi=( ( warm and walzen) AND ( band and säbel and messung) ) oder bi=((warm and walzen)and (mitten und messung)) oder BI =((reversiergerüst)und(breitenmessung))').polish().dumps()) +>>> print(CQL('bi=( ( warm(P)walzen) AND ( band(P)mitte and messung) ) oder bi=( ( warm and walzen) AND ( band and säbel and messung) ) oder bi=((warm and walzen)and (mitten und messung)) oder BI =((reversiergerüst)und(breitenmessung))').polish().dumps()) ((bi=(warm(P)walzen)) and (bi=(band(P)mitte) and bi=messung)) ODER ((bi=warm and bi=walzen) and (bi=band and bi=säbel and bi=messung)) ODER ((bi=warm and bi=walzen) and (bi=mitten UND bi=messung)) ODER ((BI=reversiergerüst) UND (BI=breitenmessung)) Extract keywords after polishing: ->>> CQL(u'bi=( ( warm(P)walzen) AND ( band(P)mitte and messung) ) oder bi=( ( warm and walzen) AND ( band and säbel and messung) ) oder bi=((warm and walzen)and (mitten und messung)) oder BI =((reversiergerüst)und(breitenmessung))').polish().keywords() -[[u'warm', u'walzen'], [u'band', u'mitte'], u'messung', u'warm', u'walzen', u'band', u's\xe4bel', u'messung', u'warm', u'walzen', u'mitten', u'messung', u'reversierger\xfcst', u'breitenmessung'] +>>> CQL('bi=( ( warm(P)walzen) AND ( band(P)mitte and messung) ) oder bi=( ( warm and walzen) AND ( band and säbel and messung) ) oder bi=((warm and walzen)and (mitten und messung)) oder BI =((reversiergerüst)und(breitenmessung))').polish().keywords() +[['warm', 'walzen'], ['band', 'mitte'], 'messung', 'warm', 'walzen', 'band', 's\xe4bel', 'messung', 'warm', 'walzen', 'mitten', 'messung', 'reversierger\xfcst', 'breitenmessung'] Query 3 @@ -271,18 +271,18 @@ Query 3 Reproduce verbatim: ->>> print(CQL(u'bi=( ( hot(P)rolling) AND ( strip(P)center and measurement) oder ( hot and rolling) AND ( strip and camber and measurement) ) oder bi=((reversing and mill)and (camber)) ODER bi=( ( hot and steel) AND (center and measurement) ) ODER BI =((hot(P)slab) und(position(P)measurement)) ODER BI =((hot(P)strip) und(position(P)measurement))').dumps()) +>>> print(CQL('bi=( ( hot(P)rolling) AND ( strip(P)center and measurement) oder ( hot and rolling) AND ( strip and camber and measurement) ) oder bi=((reversing and mill)and (camber)) ODER bi=( ( hot and steel) AND (center and measurement) ) ODER BI =((hot(P)slab) und(position(P)measurement)) ODER BI =((hot(P)strip) und(position(P)measurement))').dumps()) bi=((hot(P)rolling) and (strip(P)center and measurement) ODER (hot and rolling) and (strip and camber and measurement)) ODER bi=((reversing and mill) and (camber)) ODER bi=((hot and steel) and (center and measurement)) ODER BI=((hot(P)slab) UND (position(P)measurement)) ODER BI=((hot(P)strip) UND (position(P)measurement)) Reproduce with polishing: ->>> print(CQL(u'bi=( ( hot(P)rolling) AND ( strip(P)center and measurement) oder ( hot and rolling) AND ( strip and camber and measurement) ) oder bi=((reversing and mill)and (camber)) ODER bi=( ( hot and steel) AND (center and measurement) ) ODER BI =((hot(P)slab) und(position(P)measurement)) ODER BI =((hot(P)strip) und(position(P)measurement))').polish().dumps()) +>>> print(CQL('bi=( ( hot(P)rolling) AND ( strip(P)center and measurement) oder ( hot and rolling) AND ( strip and camber and measurement) ) oder bi=((reversing and mill)and (camber)) ODER bi=( ( hot and steel) AND (center and measurement) ) ODER BI =((hot(P)slab) und(position(P)measurement)) ODER BI =((hot(P)strip) und(position(P)measurement))').polish().dumps()) ((bi=(hot(P)rolling)) and (bi=(strip(P)center) and bi=measurement) ODER (bi=hot and bi=rolling) and (bi=strip and bi=camber and bi=measurement)) ODER ((bi=reversing and bi=mill) and (bi=camber)) ODER ((bi=hot and bi=steel) and (bi=center and bi=measurement)) ODER ((BI=(hot(P)slab)) UND (BI=(position(P)measurement))) ODER ((BI=(hot(P)strip)) UND (BI=(position(P)measurement))) Extract keywords after polishing: ->>> CQL(u'bi=( ( hot(P)rolling) AND ( strip(P)center and measurement) oder ( hot and rolling) AND ( strip and camber and measurement) ) oder bi=((reversing and mill)and (camber)) ODER bi=( ( hot and steel) AND (center and measurement) ) ODER BI =((hot(P)slab) und(position(P)measurement)) ODER BI =((hot(P)strip) und(position(P)measurement))').polish().keywords() -[[u'hot', u'rolling'], [u'strip', u'center'], u'measurement', u'hot', u'rolling', u'strip', u'camber', u'measurement', u'reversing', u'mill', u'camber', u'hot', u'steel', u'center', u'measurement', [u'hot', u'slab'], [u'position', u'measurement'], [u'hot', u'strip'], [u'position', u'measurement']] +>>> CQL('bi=( ( hot(P)rolling) AND ( strip(P)center and measurement) oder ( hot and rolling) AND ( strip and camber and measurement) ) oder bi=((reversing and mill)and (camber)) ODER bi=( ( hot and steel) AND (center and measurement) ) ODER BI =((hot(P)slab) und(position(P)measurement)) ODER BI =((hot(P)strip) und(position(P)measurement))').polish().keywords() +[['hot', 'rolling'], ['strip', 'center'], 'measurement', 'hot', 'rolling', 'strip', 'camber', 'measurement', 'reversing', 'mill', 'camber', 'hot', 'steel', 'center', 'measurement', ['hot', 'slab'], ['position', 'measurement'], ['hot', 'strip'], ['position', 'measurement']] Query 4 @@ -290,15 +290,15 @@ Query 4 Reproduce verbatim: ->>> print(CQL(u'BI=((finne? or (flying(1a)buttress?) or fins or effillee?) and (viergelenk? or mehrgelenk? or quadrilateral? or quadruple? or (four(w)joint) or quadrilaterale or quatre))').dumps()) +>>> print(CQL('BI=((finne? or (flying(1a)buttress?) or fins or effillee?) and (viergelenk? or mehrgelenk? or quadrilateral? or quadruple? or (four(w)joint) or quadrilaterale or quatre))').dumps()) BI=((finne? or (flying(1A)buttress?) or fins or effillee?) and (viergelenk? or mehrgelenk? or quadrilateral? or quadruple? or (four(W)joint) or quadrilaterale or quatre)) Reproduce with polishing: ->>> print(CQL(u'BI=((finne? or (flying(1a)buttress?) or fins or effillee?) and (viergelenk? or mehrgelenk? or quadrilateral? or quadruple? or (four(w)joint) or quadrilaterale or quatre))').polish().dumps()) +>>> print(CQL('BI=((finne? or (flying(1a)buttress?) or fins or effillee?) and (viergelenk? or mehrgelenk? or quadrilateral? or quadruple? or (four(w)joint) or quadrilaterale or quatre))').polish().dumps()) ((BI=finne? or (BI=(flying(1A)buttress?)) or BI=fins or BI=effillee?) and (BI=viergelenk? or BI=mehrgelenk? or BI=quadrilateral? or BI=quadruple? or (BI=(four(W)joint)) or BI=quadrilaterale or BI=quatre)) Extract keywords after polishing: ->>> CQL(u'BI=((finne? or (flying(1a)buttress?) or fins or effillee?) and (viergelenk? or mehrgelenk? or quadrilateral? or quadruple? or (four(w)joint) or quadrilaterale or quatre))').polish().keywords() -[u'finne', [u'flying', u'buttress'], u'fins', u'effillee', u'viergelenk', u'mehrgelenk', u'quadrilateral', u'quadruple', [u'four', u'joint'], u'quadrilaterale', u'quatre'] +>>> CQL('BI=((finne? or (flying(1a)buttress?) or fins or effillee?) and (viergelenk? or mehrgelenk? or quadrilateral? or quadruple? or (four(w)joint) or quadrilaterale or quatre))').polish().keywords() +['finne', ['flying', 'buttress'], 'fins', 'effillee', 'viergelenk', 'mehrgelenk', 'quadrilateral', 'quadruple', ['four', 'joint'], 'quadrilaterale', 'quatre'] diff --git a/patzilla/util/cql/pyparsing/test/30_ificlaims.rst b/patzilla/util/cql/pyparsing/test/30_ificlaims.rst index 3659f21e..0385cfc7 100644 --- a/patzilla/util/cql/pyparsing/test/30_ificlaims.rst +++ b/patzilla/util/cql/pyparsing/test/30_ificlaims.rst @@ -29,16 +29,16 @@ Test some logic operators localized to german. Getting started --------------- >>> CQL('pnctry:EP AND text:vibrat*').dumps() -u'pnctry : EP and text : vibrat*' +'pnctry : EP and text : vibrat*' Made up ------- Try to understand the query. ->>> CQL(u'(pnctry:EP and (pnctry:EP AND text:vibrat* AND (ic:G01F000184 OR cpc:G01F000184)))').dumps() -u'(pnctry : EP and (pnctry : EP and text : vibrat* and (ic : G01F000184 or cpc : G01F000184)))' +>>> CQL('(pnctry:EP and (pnctry:EP AND text:vibrat* AND (ic:G01F000184 OR cpc:G01F000184)))').dumps() +'(pnctry : EP and (pnctry : EP and text : vibrat* and (ic : G01F000184 or cpc : G01F000184)))' Extract keywords from query. ->>> CQL(u'(pnctry:EP and (pnctry:EP AND text:vibrat* AND (ic:G01F000184 OR cpc:G01F000184)))').polish().keywords() -[u'vibrat', u'G01F000184', u'G01F000184'] +>>> CQL('(pnctry:EP and (pnctry:EP AND text:vibrat* AND (ic:G01F000184 OR cpc:G01F000184)))').polish().keywords() +['vibrat', 'G01F000184', 'G01F000184'] diff --git a/patzilla/util/cql/pyparsing/util.py b/patzilla/util/cql/pyparsing/util.py index a202a99b..199a804c 100644 --- a/patzilla/util/cql/pyparsing/util.py +++ b/patzilla/util/cql/pyparsing/util.py @@ -2,14 +2,6 @@ # (c) 2014-2016 Andreas Motl, Elmyra UG from pyparsing import ParseResults -def get_literals(*elements): - literals = [] - for element in elements: - for literal in element: - literal = unicode(literal).strip('"').strip("'") - literals.append(literal) - return literals - def walk_token_results(tokens, *args, **kwargs): for token in tokens: diff --git a/patzilla/util/cql/util.py b/patzilla/util/cql/util.py index 07787f47..d117bf5f 100644 --- a/patzilla/util/cql/util.py +++ b/patzilla/util/cql/util.py @@ -15,7 +15,7 @@ def pair_to_cql(datasource, key, value): return cql_part = None - format = u'{0}=({1})' + format = '{0}=({1})' # Special processing rules for depatisnet if datasource == 'depatisnet': @@ -94,7 +94,7 @@ def pair_to_cql(datasource, key, value): if key == 'inventor' or key == 'applicant': if not has_booleans(value) and should_be_quoted(value): - value = u'"{0}"'.format(value) + value = '"{0}"'.format(value) if key == 'pubdate': diff --git a/patzilla/util/crypto/jwt.py b/patzilla/util/crypto/jwt.py index 204727ba..194728f4 100644 --- a/patzilla/util/crypto/jwt.py +++ b/patzilla/util/crypto/jwt.py @@ -1,13 +1,15 @@ # -*- coding: utf-8 -*- -# (c) 2014-2022 Andreas Motl -from __future__ import absolute_import + +# (c) 2014 Andreas Motl, Elmyra UG + import logging from datetime import datetime, timedelta import python_jwt from jwcrypto import jwk from zope.interface.interface import Interface -from zope.interface.declarations import implements +#from zope.interface.declarations import implements +from zope.interface import implementer log = logging.getLogger(__name__) @@ -16,6 +18,7 @@ class ISigner(Interface): pass +@implementer(ISigner) class JwtSigner(object): """ Generate and verify JSON Web Tokens. @@ -26,7 +29,7 @@ class JwtSigner(object): - https://jwcrypto.readthedocs.io/ """ - implements(ISigner) +# py27 implements(ISigner) def __init__(self, key=None, ttl=None): self.key = key @@ -86,7 +89,7 @@ def unsign(self, token): iat_skew=timedelta(minutes=5), ) - if not payload.has_key('data'): + if 'data' not in payload: error_payload = { 'location': 'JSON Web Token', 'name': self.__class__.__name__, diff --git a/patzilla/util/data/container.py b/patzilla/util/data/container.py index d9c06532..22b3b05e 100644 --- a/patzilla/util/data/container.py +++ b/patzilla/util/data/container.py @@ -2,11 +2,11 @@ # (c) 2016 Andreas Motl, Elmyra UG import json import types -from bunch import Bunch +from munch import Munch from jsonpointer import JsonPointer -class SmartBunch(Bunch): +class SmartMunch(Munch): def dump(self): return self.toJSON() @@ -18,15 +18,15 @@ def prettify(self): return self.pretty() @classmethod - def bunchify(cls, x): + def munchify(cls, x): """ - Recursively transforms a dictionary into a SmartBunch via copy. - Generic "bunchify", also works with descendants of Bunch. + Recursively transforms a dictionary into a SmartMunch via copy. + Generic "munchify", also works with descendants of Munch. """ if isinstance(x, dict): - return cls( (k, cls.bunchify(v)) for k,v in x.iteritems() ) + return cls( (k, cls.munchify(v)) for k,v in x.items() ) elif isinstance(x, (list, tuple)): - return type(x)( cls.bunchify(v) for v in x ) + return type(x)( cls.munchify(v) for v in x ) else: return x @@ -35,7 +35,7 @@ def unique_sequence(seq): # https://stackoverflow.com/questions/480214/how-do-you-remove-duplicates-from-a-list-in-python-whilst-preserving-order/480227#480227 seen = set() seen_add = seen.add - unhashable_types = (types.ListType, types.DictionaryType) + unhashable_types = (list, dict) return [x for x in seq if type(x) in unhashable_types or not (x in seen or seen_add(x))] diff --git a/patzilla/util/data/orderedset.py b/patzilla/util/data/orderedset.py index 5ba05be5..43e45da1 100644 --- a/patzilla/util/data/orderedset.py +++ b/patzilla/util/data/orderedset.py @@ -2,7 +2,7 @@ # Set that remembers original insertion order. import collections -class OrderedSet(collections.MutableSet): +class OrderedSet(collections.abc.MutableSet): def __init__(self, iterable=None): self.end = end = [] @@ -64,6 +64,6 @@ def __eq__(self, other): if __name__ == '__main__': s = OrderedSet('abracadaba') t = OrderedSet('simsalabim') - print(s | t) - print(s & t) - print(s - t) + print((s | t)) + print((s & t)) + print((s - t)) diff --git a/patzilla/util/data/zip.py b/patzilla/util/data/zip.py index 9c9b472b..9879aa9c 100644 --- a/patzilla/util/data/zip.py +++ b/patzilla/util/data/zip.py @@ -17,7 +17,7 @@ def zip_multi(multi): now = time.localtime(time.time())[:6] # http://stackoverflow.com/questions/434641/how-do-i-set-permissions-attributes-on-a-file-in-a-zip-file-using-pythons-zip/434689#434689 - unix_permissions = 0644 << 16L + unix_permissions = 0o644 << 16 # add index file for drawings """ diff --git a/patzilla/util/database/beaker_mongodb.py b/patzilla/util/database/beaker_mongodb.py index 4ccab80c..21938c99 100644 --- a/patzilla/util/database/beaker_mongodb.py +++ b/patzilla/util/database/beaker_mongodb.py @@ -184,235 +184,194 @@ before upgrading to 0.5+ and be aware that it will generate new caches. - +A part of this code is a copy of https://raw.githubusercontent.com/bbangert/beaker/master/beaker/ext/mongodb.py 2023-03-22 """ -import logging -from beaker.container import NamespaceManager, Container -from beaker.exceptions import InvalidCacheBackendError, MissingCacheParameter -from beaker.synchronization import null_synchronizer -from beaker.util import verify_directory, SyncDict - -from StringIO import StringIO -try: - import cPickle as pickle -except ImportError: - import pickle +import datetime +import os +import threading +import time +import pickle try: - from pymongo.connection import Connection + import pymongo + import pymongo.errors import bson - import bson.errors except ImportError: - raise InvalidCacheBackendError("Unable to load the pymongo driver.") - -log = logging.getLogger(__name__) -#log.setLevel(logging.DEBUG) - -class MongoDBNamespaceManager(NamespaceManager): - clients = SyncDict() - _pickle = True - _sparse = False + pymongo = None + bson = None - # TODO _- support write concern / safe - def __init__(self, namespace, url=None, data_dir=None, skip_pickle=False, - sparse_collection=False, **params): - NamespaceManager.__init__(self, namespace) +from beaker.container import NamespaceManager +from beaker.synchronization import SynchronizerImpl +from beaker.util import SyncDict, machine_identifier +from beaker.crypto.util import sha1 +from beaker._compat import string_type, PY2 - if not url: - raise MissingCacheParameter("MongoDB url is required") - if skip_pickle: - log.info("Disabling pickling for namespace: %s" % self.namespace) - self._pickle = False +class MongoNamespaceManager(NamespaceManager): + """Provides the :class:`.NamespaceManager` API over MongoDB. - if sparse_collection: - log.info("Separating data to one row per key (sparse collection) for ns %s ." % self.namespace) - self._sparse = True + Provided ``url`` can be both a mongodb connection string or + an already existing MongoClient instance. - # Temporarily uses a local copy of the functions until pymongo upgrades to new parser code - (host_list, database, username, password, collection, options) = _parse_uri(url) - - if database and host_list: - data_key = "mongodb:%s" % (database) - else: - raise MissingCacheParameter("Invalid Cache URL. Cannot parse.") - - def _create_mongo_conn(): - host_uri = 'mongodb://' - for x in host_list: - host_uri += '%s:%s' % x - log.info("Host URI: %s" % host_uri) - conn = Connection(host_uri, slave_okay=options.get('slaveok', False)) + The data will be stored into ``beaker_cache`` collection of the + *default database*, so make sure your connection string or + MongoClient point to a default database. + """ + MAX_KEY_LENGTH = 1024 - db = conn[database] + clients = SyncDict() - if username: - log.info("Attempting to authenticate %s/%s " % (username, password)) - if not db.authenticate(username, password): - raise InvalidCacheBackendError('Cannot authenticate to ' - ' MongoDB.') - return db[collection] + def __init__(self, namespace, url, **kw): + super(MongoNamespaceManager, self).__init__(namespace) + self.lock_dir = None # MongoDB uses mongo itself for locking. - self.mongo = MongoDBNamespaceManager.clients.get(data_key, - _create_mongo_conn) + if pymongo is None: + raise RuntimeError('pymongo3 is not available') - def get_creation_lock(self, key): - """@TODO - stop hitting filesystem for this... - I think mongo can properly avoid dog piling for us. - """ - return null_synchronizer() - - def do_remove(self): - """Clears the entire filesystem (drops the collection)""" - log.debug("[MongoDB] Remove namespace: %s" % self.namespace) - q = {} - if self._sparse: - q = {'_id.namespace': self.namespace} + if isinstance(url, string_type): + self.client = MongoNamespaceManager.clients.get(url, pymongo.MongoClient, url) else: - q = {'_id': self.namespace} - - log.debug("[MongoDB] Remove Query: %s" % q) - self.mongo.remove(q) + self.client = url + self.db = self.client.get_default_database() + + def _format_key(self, key): + if not isinstance(key, str): + key = key.decode('ascii') + if len(key) > (self.MAX_KEY_LENGTH - len(self.namespace) - 1): + if not PY2: + key = key.encode('utf-8') + key = sha1(key).hexdigest() + return '%s:%s' % (self.namespace, key) + def get_creation_lock(self, key): + return MongoSynchronizer(self._format_key(key), self.client) def __getitem__(self, key): - log.debug("[MongoDB %s] Get Key: %s" % (self.mongo, - key)) - - _id = {} - fields = {} - if self._sparse: - _id = { - 'namespace': self.namespace, - 'key': key - } - fields['data'] = True - else: - _id = self.namespace - fields['data.' + key] = True - - log.debug("[MongoDB] Get Query: id == %s Fields: %s", _id, fields) - result = self.mongo.find_one({'_id': _id}, fields=fields) - log.debug("[MongoDB] Get Result: %s", result) - - if result: - """Running into instances in which mongo is returning - -1, which causes an error as __len__ should return 0 - or positive integers, hence the check of size explicit""" - log.debug("Result: %s", result) - data = result.get('data', None) - log.debug("Data: %s", data) - if self._sparse: - value = data - else: - value = data.get(key, None) - - if not value: - return None - - if self._pickle or key == 'session': - value = _depickle(value) - else: - if value['pickled']: - value = (value['stored'], value['expires'], _depickle(value['value'])) - else: - value = (value['stored'], value['expires'], value['value']) - - log.debug("[key: %s] Value: %s" % (key, value)) - - return value - else: - return None - + self._clear_expired() + entry = self.db.backer_cache.find_one({'_id': self._format_key(key)}) + if entry is None: + raise KeyError(key) + return pickle.loads(entry['value']) def __contains__(self, key): - def _has(): - result = self.__getitem__(key) - if result: - log.debug("[MongoDB] %s == %s" % (key, result)) - return result is not None - else: - return False - - log.debug("[MongoDB] Has '%s'? " % key) - ret = _has() - - - return ret + self._clear_expired() + entry = self.db.backer_cache.find_one({'_id': self._format_key(key)}) + return entry is not None def has_key(self, key): return key in self def set_value(self, key, value, expiretime=None): - log.debug("[MongoDB %s] Set Key: %s (Expiry: %s) ... " % - (self.mongo, key, expiretime)) + self._clear_expired() - _id = {} - doc = {} + expiration = None + if expiretime is not None: + expiration = time.time() + expiretime - if self._pickle or key == 'session': - try: - value = pickle.dumps(value) - except: - log.exception("Failed to pickle value.") - else: - value = { - 'stored': value[0], - 'expires': value[1], - 'value': value[2], - 'pickled': False - } - try: - bson.BSON.encode(value) - except: - log.warning("Value is not bson serializable, pickling inner value.") - value['value'] = pickle.dumps(value['value']) - value['pickled'] = True + value = pickle.dumps(value) + self.db.backer_cache.update_one({'_id': self._format_key(key)}, + {'$set': {'value': bson.Binary(value), + 'expiration': expiration}}, + upsert=True) + + def __setitem__(self, key, value): + self.set_value(key, value) + + def __delitem__(self, key): + self._clear_expired() + self.db.backer_cache.delete_many({'_id': self._format_key(key)}) + def do_remove(self): + self.db.backer_cache.delete_many({'_id': {'$regex': '^%s' % self.namespace}}) + def keys(self): + return [e['key'].split(':', 1)[-1] for e in self.db.backer_cache.find_all( + {'_id': {'$regex': '^%s' % self.namespace}} + )] - if self._sparse: - _id = { - 'namespace': self.namespace, - 'key': key - } + def _clear_expired(self): + now = time.time() + self.db.backer_cache.delete_many({'_id': {'$regex': '^%s' % self.namespace}, + 'expiration': {'$ne': None, '$lte': now}}) - doc['data'] = bson.Binary(value) - doc['_id'] = _id - if expiretime: - # TODO - What is the datatype of this? it should be instantiated as a datetime instance - doc['valid_until'] = expiretime - else: - _id = self.namespace - doc['$set'] = {'data.' + key: bson.Binary(value)} - if expiretime: - # TODO - What is the datatype of this? it should be instantiated as a datetime instance - doc['$set']['valid_until'] = expiretime - log.debug("Upserting Doc '%s' to _id '%s'" % (doc, _id)) - self.mongo.update({"_id": _id}, doc, upsert=True, safe=True) +class MongoSynchronizer(SynchronizerImpl): + """Provides a Writer/Reader lock based on MongoDB. - def __setitem__(self, key, value): - self.set_value(key, value) + Provided ``url`` can be both a mongodb connection string or + an already existing MongoClient instance. - def __delitem__(self, key): - """Delete JUST the key, by setting it to None.""" - if self._sparse: - self.mongo.remove({'_id.namespace': self.namespace}) - else: - self.mongo.update({'_id': self.namespace}, - {'$unset': {'data.' + key: True}}, upsert=False) + The data will be stored into ``beaker_locks`` collection of the + *default database*, so make sure your connection string or + MongoClient point to a default database. - def keys(self): - if self._sparse: - return [row['_id']['field'] for row in self.mongo.find({'_id.namespace': self.namespace}, {'_id': True})] + Locks are identified by local machine, PID and threadid, so + are suitable for use in both local and distributed environments. + """ + # If a cache entry generation function can take a lot, + # but 15 minutes is more than a reasonable time. + LOCK_EXPIRATION = 900 + MACHINE_ID = machine_identifier() + + def __init__(self, identifier, url): + super(MongoSynchronizer, self).__init__() + self.identifier = identifier + if isinstance(url, string_type): + self.client = MongoNamespaceManager.clients.get(url, pymongo.MongoClient, url) else: - return self.mongo.find_one({'_id': self.namespace}, {'data': True}).get('data', {}) + self.client = url + self.db = self.client.get_default_database() + + def _clear_expired_locks(self): + now = datetime.datetime.utcnow() + expired = now - datetime.timedelta(seconds=self.LOCK_EXPIRATION) + self.db.beaker_locks.delete_many({'_id': self.identifier, 'timestamp': {'$lte': expired}}) + return now + + def _get_owner_id(self): + return '%s-%s-%s' % (self.MACHINE_ID, os.getpid(), threading.current_thread().ident) + + def do_release_read_lock(self): + owner_id = self._get_owner_id() + self.db.beaker_locks.update_one({'_id': self.identifier, 'readers': owner_id}, + {'$pull': {'readers': owner_id}}) + + def do_acquire_read_lock(self, wait): + now = self._clear_expired_locks() + owner_id = self._get_owner_id() + while True: + try: + self.db.beaker_locks.update_one({'_id': self.identifier, 'owner': None}, + {'$set': {'timestamp': now}, + '$push': {'readers': owner_id}}, + upsert=True) + return True + except pymongo.errors.DuplicateKeyError: + if not wait: + return False + time.sleep(0.2) + + def do_release_write_lock(self): + self.db.beaker_locks.delete_one({'_id': self.identifier, 'owner': self._get_owner_id()}) + + def do_acquire_write_lock(self, wait): + now = self._clear_expired_locks() + owner_id = self._get_owner_id() + while True: + try: + self.db.beaker_locks.update_one({'_id': self.identifier, 'owner': None, + 'readers': []}, + {'$set': {'owner': owner_id, + 'timestamp': now}}, + upsert=True) + return True + except pymongo.errors.DuplicateKeyError: + if not wait: + return False + time.sleep(0.2) -class MongoDBContainer(Container): - namespace_class = MongoDBNamespaceManager def _partition(source, sub): """Our own string partitioning method. @@ -499,6 +458,6 @@ def _parse_uri(uri, default_port=27017): def _depickle(value): try: return pickle.loads(value) - except Exception, e: + except Exception as e: log.exception("Failed to unpickle value '{0}'.".format(e)) return None diff --git a/patzilla/util/database/beaker_mongodb_gridfs.py b/patzilla/util/database/beaker_mongodb_gridfs.py index 605a0c70..e9aff35f 100644 --- a/patzilla/util/database/beaker_mongodb_gridfs.py +++ b/patzilla/util/database/beaker_mongodb_gridfs.py @@ -1,5 +1,8 @@ +import pickle +import logging as log from mongodb_gridfs_beaker import MongoDBGridFSNamespaceManager, log, pickle + def includeme(config): # Monkey patch 3rd party class to fix runtime error diff --git a/patzilla/util/date/__init__.py b/patzilla/util/date/__init__.py index a683524f..4be8b63a 100644 --- a/patzilla/util/date/__init__.py +++ b/patzilla/util/date/__init__.py @@ -111,7 +111,7 @@ def parse_date_within(value): """ value = value.replace('within', '').strip().strip('"') parts = value.split(',') - parts = map(unicode.strip, parts) + parts = list(map(str.strip, parts)) result = { 'startdate': parts[0], 'enddate': parts[1], @@ -123,12 +123,12 @@ def year_range_to_within(value): Parse year ranges like "1990-2014" or "1990 - 2014" and convert into "within 1990,2014" expression """ - if value.count(u'-') == 1: - parts = value.split(u'-') + if value.count('-') == 1: + parts = value.split('-') parts = [part.strip() for part in parts] year_from, year_to = parts if len(year_from) == 4 and len(year_to) == 4: - value = u'within {year_from},{year_to}'.format(**locals()) + value = 'within {year_from},{year_to}'.format(**locals()) return value def week_range(date): diff --git a/patzilla/util/email/core.py b/patzilla/util/email/core.py index 899b3c72..8e14d2cd 100644 --- a/patzilla/util/email/core.py +++ b/patzilla/util/email/core.py @@ -17,7 +17,7 @@ log = logging.getLogger(__name__) -def build_email(mail_to, subject, body_text, mail_from=u'test@example.org', reply_to=None, attachments=None, mime_headers=None): +def build_email(mail_to, subject, body_text, mail_from='test@example.org', reply_to=None, attachments=None, mime_headers=None): """ Flexible Multipart MIME message builder. @@ -53,11 +53,11 @@ def build_email(mail_to, subject, body_text, mail_from=u'test@example.org', repl } # Subject header - mime_headers.update({u'Subject': Header(s=subject, charset='utf-8')}) + mime_headers.update({'Subject': Header(s=subject, charset='utf-8')}) # Add address headers - for key, item in address_headers.iteritems(): + for key, item in address_headers.items(): if isinstance(item, AddressList): # v1 @@ -70,7 +70,7 @@ def build_email(mail_to, subject, body_text, mail_from=u'test@example.org', repl message[key] = value # Add more headers - for key, value in mime_headers.iteritems(): + for key, value in mime_headers.items(): #message.add_header(key, value) if value: message[key] = value @@ -97,7 +97,7 @@ def build_email(mail_to, subject, body_text, mail_from=u'test@example.org', repl # multipart attachments # ------------------------------------------ # from https://docs.python.org/2/library/email-examples.html - for filename, payload in attachments.iteritems(): + for filename, payload in attachments.items(): # Guess the content type based on the file's extension. Encoding # will be ignored, although we should check for simple things like @@ -149,10 +149,10 @@ def build_email(mail_to, subject, body_text, mail_from=u'test@example.org', repl return payload -def send_email(mail_to, message, smtp_settings=None, mail_from=u'test@example.org'): +def send_email(mail_to, message, smtp_settings=None, mail_from='test@example.org'): smtp_settings = smtp_settings or {} - smtp_settings.setdefault('hostname', u'localhost') + smtp_settings.setdefault('hostname', 'localhost') smtp_settings.setdefault('port', 25) # sanity checks @@ -191,7 +191,7 @@ def send_email(mail_to, message, smtp_settings=None, mail_from=u'test@example.or def format_addresslist(addresslist): #print 'addresslist:', addresslist.addresslist - return map(formataddr, addresslist.addresslist) + return list(map(formataddr, addresslist.addresslist)) def fix_addresslist(addresslist): diff --git a/patzilla/util/email/message.py b/patzilla/util/email/message.py index 7baca1ee..37f7ea62 100644 --- a/patzilla/util/email/message.py +++ b/patzilla/util/email/message.py @@ -6,7 +6,7 @@ import logging import textwrap from copy import deepcopy -from core import build_email, send_email +from .core import build_email, send_email from patzilla.util.config import read_config, to_list log = logging.getLogger(__name__) @@ -38,25 +38,25 @@ def add_reply(self, address): def send(self, subject='', message='', files=None): - recipients = u', '.join(self.recipients) - reply_to = u', '.join(self.reply_to) + recipients = ', '.join(self.recipients) + reply_to = ', '.join(self.reply_to) files = files or {} # get smtp addressing information from settings - smtp_host = self.smtp_settings.get('hostname', u'localhost') - mail_from = self.email_settings['addressbook'].get('from', u'test@example.org') + smtp_host = self.smtp_settings.get('hostname', 'localhost') + mail_from = self.email_settings['addressbook'].get('from', 'test@example.org') # log smtp settings smtp_settings_log = deepcopy(self.smtp_settings) if 'password' in smtp_settings_log: del smtp_settings_log['password'] - log.info(u'Sending email to "{recipients}". smtp settings: {smtp_settings}'.format( + log.info('Sending email to "{recipients}". smtp settings: {smtp_settings}'.format( recipients=recipients, smtp_settings=smtp_settings_log)) # build subject event_date = time.strftime('%Y-%m-%d') event_time = time.strftime('%H:%M:%S') - subject_real = u'' + subject_real = '' if 'subject_prefix' in self.email_settings['content']: prefix = self.email_settings['content'].get('subject_prefix') if not prefix.endswith(' '): @@ -64,14 +64,14 @@ def send(self, subject='', message='', files=None): subject_real += prefix #subject_real += u'{subject} on {event_date} at {event_time}'.format(**locals()) - subject_real += u'{}'.format(subject) + subject_real += '{}'.format(subject) - filenames = u'\n'.join([u'- ' + entry for entry in files.keys()]) + filenames = '\n'.join(['- ' + entry for entry in list(files.keys())]) body_template = textwrap.dedent(self.email_settings['content'].get('body', '')).strip() if 'signature' in self.email_settings['content']: - body_template += u'\n\n--\n' + textwrap.dedent(self.email_settings['content']['signature']).strip() + body_template += '\n\n--\n' + textwrap.dedent(self.email_settings['content']['signature']).strip() body_template = body_template.replace('\\n', '\r') @@ -96,11 +96,11 @@ def send(self, subject='', message='', files=None): # smtplib.SMTPServerDisconnected: Connection unexpectedly closed # send_email(recipients, message, smtp_settings=self.smtp_settings, mail_from=mail_from) - log.info(u'Email to recipients "{recipients}" sent successfully'.format(recipients=recipients)) + log.info('Email to recipients "{recipients}" sent successfully'.format(recipients=recipients)) except Exception as ex: # TODO: catch traceback when running in commandline mode - log.error(u'Error sending email: {failure}'.format(failure=ex)) + log.error('Error sending email: {failure}'.format(failure=ex)) raise @@ -123,10 +123,10 @@ def send(self, subject='', message='', files=None): message = EmailMessage(settings['smtp'], settings['email'], {'subject_prefix': 'acme-product'}) message.add_recipient('test@example.org') message.send( - subject = u'Self-test email from Räuber Hotzenplotz', - message = u'Self-test email from Räuber Hotzenplotz', + subject = 'Self-test email from Räuber Hotzenplotz', + message = 'Self-test email from Räuber Hotzenplotz', files = { - u'test.txt': u'☠☠☠ SKULL AND CROSSBONES ☠☠☠', - u'test.json': json.dumps(u'☠☠☠ SKULL AND CROSSBONES ☠☠☠'), + 'test.txt': '☠☠☠ SKULL AND CROSSBONES ☠☠☠', + 'test.json': json.dumps('☠☠☠ SKULL AND CROSSBONES ☠☠☠'), } ) diff --git a/patzilla/util/expression/__init__.py b/patzilla/util/expression/__init__.py index e96ed902..43dbceac 100644 --- a/patzilla/util/expression/__init__.py +++ b/patzilla/util/expression/__init__.py @@ -28,8 +28,8 @@ class SearchExpression(object): def parse_expression(self, query): - logger.info(u'Parsing search expression "{query}" with syntax "{syntax}" and grammar "{grammar}"'.format( - query=query, syntax=self.syntax, grammar=self.grammar and self.grammar.__name__ or u'default')) + logger.info('Parsing search expression "{query}" with syntax "{syntax}" and grammar "{grammar}"'.format( + query=query, syntax=self.syntax, grammar=self.grammar and self.grammar.__name__ or 'default')) if self.syntax == 'cql': self.parse_expression_cql(query) @@ -40,8 +40,8 @@ def parse_expression(self, query): def parse_expression_cql(self, expression): # Fixup query: Wrap into quotes if CQL expression is a) unspecific, b) contains spaces and c) is still unquoted - if should_be_quoted(expression) and u'within' not in expression: - expression = u'"%s"' % expression + if should_be_quoted(expression) and 'within' not in expression: + expression = '"%s"' % expression # Parse and recompile CQL query string to apply number normalization query_object = None @@ -59,11 +59,11 @@ def parse_expression_cql(self, expression): expression = query_recompiled if query_recompiled != expression: - logger.info(u'Recompiled search expression to "{query}"'.format(query=expression)) + logger.info('Recompiled search expression to "{query}"'.format(query=expression)) except Exception as ex: # TODO: Can we get more details from diagnostic information to just stop here w/o propagating obviously wrong query to OPS? - logger.warn(u'CQL parse error: query="{0}", reason={1}, Exception was:\n{2}'.format(expression, ex, _exception_traceback())) + logger.warn('CQL parse error: query="{0}", reason={1}, Exception was:\n{2}'.format(expression, ex, _exception_traceback())) self.cql_parser = query_object self.expression = expression diff --git a/patzilla/util/expression/keywords.py b/patzilla/util/expression/keywords.py index e5bd7b3b..bfb3f422 100644 --- a/patzilla/util/expression/keywords.py +++ b/patzilla/util/expression/keywords.py @@ -64,7 +64,7 @@ def scan_keywords(op, keywords): #print "op.index:", op.index #print "op.term:", op.term if str(op.index) in keyword_fields: - keyword = clean_keyword(unicode(op.term)) + keyword = clean_keyword(str(op.term)) keywords.append(keyword) hasattr(op, 'leftOperand') and scan_keywords(op.leftOperand, keywords) @@ -76,7 +76,7 @@ def keywords_to_response(request, search): Propagate keywords to client for highlighting """ - logger.info(u'Propagating keywords from "{origin}": {keywords}'.format( + logger.info('Propagating keywords from "{origin}": {keywords}'.format( origin=search.keywords_origin, keywords=search.keywords)) request.response.headers['X-PatZilla-Query-Keywords'] = json.dumps(search.keywords) diff --git a/patzilla/util/image/convert.py b/patzilla/util/image/convert.py index 63e53dee..2c6ccbee 100644 --- a/patzilla/util/image/convert.py +++ b/patzilla/util/image/convert.py @@ -3,13 +3,13 @@ import os import shutil import tempfile -from pathlib2 import Path +from pathlib import Path import requests import where import logging import datetime -import StringIO +import io import subprocess from six import BytesIO from tempfile import NamedTemporaryFile @@ -195,7 +195,7 @@ def run_imagemagick(command, input=None): def png_resize(png_payload, width): - image = Image.open(StringIO.StringIO(png_payload)).convert('RGB') + image = Image.open(io.StringIO(png_payload)).convert('RGB') image_width = image.size[0] image_height = image.size[1] @@ -209,13 +209,13 @@ def png_resize(png_payload, width): #size = (int(width), int(image_height * aspect)) size = (int(width), int(image_height / scale_factor)) #print "size:", size - print "Resizing image from %s to %s" % (image.size, size) + print("Resizing image from %s to %s" % (image.size, size)) image.thumbnail(size, Image.ANTIALIAS) #image.resize(size, Image.ANTIALIAS) #print "thumbnail done" - png = StringIO.StringIO() + png = io.StringIO() image.save(png, 'PNG') #print "image saved to memory" diff --git a/patzilla/util/ipc/parser.py b/patzilla/util/ipc/parser.py index e8701e54..e94d54a9 100644 --- a/patzilla/util/ipc/parser.py +++ b/patzilla/util/ipc/parser.py @@ -10,7 +10,7 @@ def decodeMatchToDict(match, key_suffix): if match: # transfer data from match groups to instance variable, # making all values uppercase - for key, value in match.groupdict().iteritems(): + for key, value in match.groupdict().items(): if key.endswith(key_suffix): key = key.replace(key_suffix, '') if value: @@ -56,7 +56,7 @@ def decode(self): m = self.r.match(self.raw) self.ipc = decodeMatchToDict(m, '__1') if not self.ipc: - raise ValueError, "IPCR class '%s' could not be decoded" % self.raw + raise ValueError("IPCR class '%s' could not be decoded" % self.raw) def fix(self): @@ -82,7 +82,7 @@ def asDict(self): def formatFlexible(self, class_padding='', group_subgroup_delimiter='', group_padding='', subgroup_padding=''): if not self.ipc['section']: - raise ValueError, "IPCR class '%s' could not be formatted" % self.raw + raise ValueError("IPCR class '%s' could not be formatted" % self.raw) ipc_serialized = self.ipc['section'] diff --git a/patzilla/util/network/browser.py b/patzilla/util/network/browser.py index 51f61cc1..3545dbff 100644 --- a/patzilla/util/network/browser.py +++ b/patzilla/util/network/browser.py @@ -1,4 +1,4 @@ # -*- coding: utf-8 -*- # (c) 2017-2019 Andreas Motl -regular_user_agent = u'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:107.0) Gecko/20100101 Firefox/107.0' +regular_user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:107.0) Gecko/20100101 Firefox/107.0' diff --git a/patzilla/util/network/requests_xmlrpclib.py b/patzilla/util/network/requests_xmlrpclib.py index 18dbbc2b..e61eaf61 100644 --- a/patzilla/util/network/requests_xmlrpclib.py +++ b/patzilla/util/network/requests_xmlrpclib.py @@ -6,17 +6,14 @@ Usage: - >>> import xmlrpclib + >>> import xmlrpc.client >>> #from transport import RequestsTransport - >>> s = xmlrpclib.ServerProxy('http://yoursite.com/xmlrpc', transport=RequestsTransport()) + >>> s = xmlrpc.client.ServerProxy('http://yoursite.com/xmlrpc', transport=RequestsTransport()) >>> #s.demo.sayHello() Hello! """ -try: - import xmlrpc.client as xmlrpc -except ImportError: - import xmlrpclib as xmlrpc +import xmlrpc.client as xmlrpc import requests class RequestsTransport(xmlrpc.Transport): diff --git a/patzilla/util/numbers/common.py b/patzilla/util/numbers/common.py index ac5af45a..f44f1e25 100644 --- a/patzilla/util/numbers/common.py +++ b/patzilla/util/numbers/common.py @@ -3,7 +3,7 @@ import re import types import logging -from patzilla.util.data.container import SmartBunch +from patzilla.util.data.container import SmartMunch from patzilla.util.numbers.helper import strip_spaces """ @@ -13,7 +13,7 @@ log = logging.getLogger(__name__) -class DocumentIdentifierBunch(SmartBunch): +class DocumentIdentifierBunch(SmartMunch): def __str__(self): return self.dump() @@ -29,12 +29,12 @@ def join_patent(patent): return number def decode_patent_number(patent): - if isinstance(patent, types.StringTypes): + if isinstance(patent, (str,)): decoded = split_patent_number(patent) - elif isinstance(patent, types.DictionaryType): + elif isinstance(patent, dict): decoded = patent else: - raise TypeError(u'Document number "{patent}" of type "{type}" could not be decoded'.format(patent=patent, type=type(patent))) + raise TypeError('Document number "{patent}" of type "{type}" could not be decoded'.format(patent=patent, type=type(patent))) return decoded def split_patent_number(patent_number): @@ -154,7 +154,7 @@ def split_patent_number(patent_number): return dib else: - log.error(u'Unable to parse patent number "{0}"'.format(patent_number)) + log.error('Unable to parse patent number "{0}"'.format(patent_number)) def split_patent_number_more(patent): diff --git a/patzilla/util/numbers/denormalize.py b/patzilla/util/numbers/denormalize.py index 6395e2a6..9b4deee2 100644 --- a/patzilla/util/numbers/denormalize.py +++ b/patzilla/util/numbers/denormalize.py @@ -121,16 +121,16 @@ def test_denormalization(): WO1990004917 """ - print "-" * 30 - print "original\tdenormalized" - print "-" * 30 + print("-" * 30) + print("original\tdenormalized") + print("-" * 30) for number in payload.split("\n"): if not number or number == "\n": continue if number.startswith('---'): - print number + print(number) continue number_denormalized = join_patent(denormalize_patent(split_patent_number(number))) - print "%s\t%s" % (number, number_denormalized) + print("%s\t%s" % (number, number_denormalized)) if __name__ == "__main__": diff --git a/patzilla/util/numbers/helper.py b/patzilla/util/numbers/helper.py index 044ba815..520c49fc 100644 --- a/patzilla/util/numbers/helper.py +++ b/patzilla/util/numbers/helper.py @@ -22,11 +22,11 @@ def strip_spaces(number): number = r_invalid.sub('', number) return number -def read_numbersfile(file): - fh = open(file, 'r') +def read_numbersfile(_file): + fh = open(_file, 'r') numbers_raw = fh.readlines() fh.close() - numbers = map(lambda number: number.strip(" ;\"'\t\n\r"), numbers_raw) + numbers = [number.strip(" ;\"'\t\n\r") for number in numbers_raw] numbers = [number for number in numbers if number and not number.startswith('#')] return numbers diff --git a/patzilla/util/numbers/normalize.py b/patzilla/util/numbers/normalize.py index 1dd5da49..0b60f9d9 100644 --- a/patzilla/util/numbers/normalize.py +++ b/patzilla/util/numbers/normalize.py @@ -194,7 +194,7 @@ def normalize_patent(number, as_dict=False, as_string=False, fix_kindcode=False, provider = 'ops' # 1. handle patent dicts or convert (split) from string - if isinstance(number, types.DictionaryType): + if isinstance(number, dict): patent = number else: patent = split_patent_number(number) @@ -209,7 +209,7 @@ def normalize_patent(number, as_dict=False, as_string=False, fix_kindcode=False, # 3. result handling # 3.a) default mechanism: return what we've got - if isinstance(number, types.DictionaryType): + if isinstance(number, dict): result = patent_normalized else: result = join_patent(patent_normalized) @@ -622,7 +622,7 @@ def normalize_patent_it(patent): # filter: special document handling (with alphanumeric prefixes) # trim and pad sequential number with zeros to get total length of 7 characters for patent number - if patched.has_key('number-type') and patched.has_key('number-real'): + if 'number-type' in patched and 'number-real' in patched: subtype = patched['number-type'] seqnumber = patched['number-real'] patched['number'] = subtype + seqnumber.lstrip('0') @@ -671,16 +671,16 @@ def normalization_example(): # pragma: nocover 'JP3657641B2', ] - print "-" * 30 - print '{0}{1}'.format("original".ljust(20), "normalized") - print "-" * 30 + print("-" * 30) + print('{0}{1}'.format("original".ljust(20), "normalized")) + print("-" * 30) for number in numbers: if number.find('---') != -1: - print number + print(number) continue result = normalize_patent(number) #result = join_patent(patch_patent_old_archive(patent)) - print "{0}{1}".format(number.ljust(20), result) + print("{0}{1}".format(number.ljust(20), result)) if __name__ == "__main__": # pragma: nocover diff --git a/patzilla/util/numbers/numberlists.py b/patzilla/util/numbers/numberlists.py index d6341e32..22ceb2ad 100644 --- a/patzilla/util/numbers/numberlists.py +++ b/patzilla/util/numbers/numberlists.py @@ -4,13 +4,13 @@ from patzilla.util.numbers.normalize import normalize_patent def parse_numberlist(rawdata): - pattern = re.compile(u'[,\n]') + pattern = re.compile('[,\n]') entries = pattern.split(rawdata) - entries = map(unicode.strip, entries) + entries = list(map(str.strip, entries)) return entries def normalize_numbers(entries): - entries = map(lambda s: s.replace(u' ', u''), entries) + entries = [s.replace(' ', '') for s in entries] response = {'valid': [], 'invalid': [], 'all': []} for entry in entries: entry_normalized = normalize_patent(entry, fix_kindcode=True) diff --git a/patzilla/util/python/__init__.py b/patzilla/util/python/__init__.py index 4974efcc..5a69667c 100644 --- a/patzilla/util/python/__init__.py +++ b/patzilla/util/python/__init__.py @@ -2,7 +2,7 @@ # (c) 2014 Andreas Motl, Elmyra UG import sys import traceback -from StringIO import StringIO +from io import StringIO def exception_traceback(exc_info=None): """ diff --git a/patzilla/util/text/format.py b/patzilla/util/text/format.py index ae59c647..9093becf 100644 --- a/patzilla/util/text/format.py +++ b/patzilla/util/text/format.py @@ -2,9 +2,9 @@ # (c) 2014-2016 Andreas Motl, Elmyra UG import re -_slugify_strip_re = re.compile(r'[^\w\s-]') -_slugify_strip_wo_equals_re = re.compile(r'[^\w\s=-]') -_slugify_hyphenate_re = re.compile(r'[-\s]+') +_slugify_strip_re = re.compile(rb'[^\w\s-]') +_slugify_strip_wo_equals_re = re.compile(rb'[^\w\s=-]') +_slugify_hyphenate_re = re.compile(rb'[-\s]+') def slugify(value, strip_equals=True, lowercase=True): """ Normalizes string, converts to lowercase, removes non-alpha characters, @@ -15,19 +15,23 @@ def slugify(value, strip_equals=True, lowercase=True): Via http://code.activestate.com/recipes/577257-slugify-make-a-string-usable-in-a-url-or-filename/ """ import unicodedata - if not isinstance(value, unicode): - value = unicode(value) + if not isinstance(value, str): + value = str(value) value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore') _strip_re = _slugify_strip_re if not strip_equals: _strip_re = _slugify_strip_wo_equals_re - value = unicode(_strip_re.sub('', value).strip()) + + if isinstance(value, str): + value = _strip_re.sub('', value).strip() + else: + value = _strip_re.sub(b'', value).strip() if lowercase: value = value.lower() - value = _slugify_hyphenate_re.sub('-', value) + value = _slugify_hyphenate_re.sub(b'-', value) return value def text_indent(text, amount=4, ch=' '): diff --git a/patzilla/util/web/email/submit.py b/patzilla/util/web/email/submit.py index 2504401e..e8353e2e 100644 --- a/patzilla/util/web/email/submit.py +++ b/patzilla/util/web/email/submit.py @@ -5,7 +5,7 @@ from validate_email import validate_email from pyramid.threadlocal import get_current_request from patzilla.util.config import read_config, read_list, to_list -from patzilla.util.data.container import SmartBunch +from patzilla.util.data.container import SmartMunch from patzilla.util.email.message import EmailMessage log = logging.getLogger(__name__) @@ -68,23 +68,23 @@ def email_issue_report(report, recipients): recipients = to_list(recipients) identifier = None - if isinstance(report, SmartBunch): + if isinstance(report, SmartMunch): identifier = report.meta.id # Build reasonable subject - subject = u'Product issue' + subject = 'Product issue' if 'dialog' in report and 'what' in report.dialog: - subject = u'[{}] '.format(report.dialog.what) + subject + subject = '[{}] '.format(report.dialog.what) + subject if identifier: - subject += u' #' + identifier + subject += ' #' + identifier # Build reasonable message - message = u'' + message = '' if 'dialog' in report and 'remark' in report.dialog: message = report.dialog.remark # Add JSON report as attachment - files = {u'report.json': report.pretty()} + files = {'report.json': report.pretty()} email = message_factory(recipients=recipients) email.send( diff --git a/patzilla/util/web/identity/store.py b/patzilla/util/web/identity/store.py index 92a69d45..982538e0 100644 --- a/patzilla/util/web/identity/store.py +++ b/patzilla/util/web/identity/store.py @@ -12,8 +12,8 @@ from mongoengine.fields import StringField, ListField, DateTimeField, DictField from mongoengine.errors import NotUniqueError from pyramid.threadlocal import get_current_request -from zope.interface.declarations import implements from zope.interface.interface import Interface +from zope.interface import implementer log = logging.getLogger(__name__) @@ -133,9 +133,10 @@ class UserMetrics(Document): class IUserMetricsManager(Interface): pass +@implementer(IUserMetricsManager) class UserMetricsManager(object): - implements(IUserMetricsManager) +# py27 implements(IUserMetricsManager) def measure_upstream(self, upstream, volume): diff --git a/patzilla/util/web/pyramid/cornice.py b/patzilla/util/web/pyramid/cornice.py index b7dadcae..89a6ee9b 100644 --- a/patzilla/util/web/pyramid/cornice.py +++ b/patzilla/util/web/pyramid/cornice.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # (c) 2017 Andreas Motl, Elmyra UG -from __future__ import absolute_import + from cornice.errors import Errors def add_location_whitelisted(self, location, name=None, description=None, **kw): diff --git a/patzilla/util/web/pyramid/renderer.py b/patzilla/util/web/pyramid/renderer.py index 78a06af4..d941add6 100644 --- a/patzilla/util/web/pyramid/renderer.py +++ b/patzilla/util/web/pyramid/renderer.py @@ -18,7 +18,7 @@ def __call__(self, data, context): content_type = (context['request'].accept.best_match(acceptable) or acceptable[0]) response.content_type = content_type - print "data:", data + print("data:", data) return 'hello' #return json.dumps(data, use_decimal=True) diff --git a/patzilla/util/web/util/xmlrpclib.py b/patzilla/util/web/util/xmlrpclib.py index 50c5f6de..df3353b6 100644 --- a/patzilla/util/web/util/xmlrpclib.py +++ b/patzilla/util/web/util/xmlrpclib.py @@ -1,9 +1,9 @@ # -*- coding: utf-8 -*- # (c) 2014-2015 Andreas Motl, Elmyra UG -from __future__ import absolute_import + import sys import socket -import xmlrpclib +import xmlrpc.client import ssl # https://stackoverflow.com/questions/372365/set-timeout-for-xmlrpclib-serverproxy/14397619#14397619 @@ -24,7 +24,7 @@ def __enter__(self): if self.__timeout: self.__prevDefaultTimeout = socket.getdefaulttimeout() socket.setdefaulttimeout(self.__timeout) - proxy = xmlrpclib.Server(self.__url, allow_none=True) + proxy = xmlrpc.client.Server(self.__url, allow_none=True) except Exception as ex: raise Exception("Unable create XMLRPC-proxy for url '%s': %s" % (self.__url, ex)) diff --git a/patzilla/util/web/uwsgi/uwsgidecorators.py b/patzilla/util/web/uwsgi/uwsgidecorators.py index 79c08ea1..29b20c36 100644 --- a/patzilla/util/web/uwsgi/uwsgidecorators.py +++ b/patzilla/util/web/uwsgi/uwsgidecorators.py @@ -4,7 +4,7 @@ from threading import Thread try: - import cPickle as pickle + import pickle as pickle except: import pickle diff --git a/patzilla/util/xml/format.py b/patzilla/util/xml/format.py index 49d32120..1ee0b738 100644 --- a/patzilla/util/xml/format.py +++ b/patzilla/util/xml/format.py @@ -69,5 +69,5 @@ def data(self, root): return super(BadgerFishNoNamespace, self).data(root) def clean_tag(self, node): - if isinstance(node.tag, basestring): + if isinstance(node.tag, str): node.tag = re.sub('{.*}', '', node.tag) diff --git a/pserve.py b/pserve.py new file mode 100644 index 00000000..4ddeca3f --- /dev/null +++ b/pserve.py @@ -0,0 +1,10 @@ +#!/home/frank/DATA/Envs/env1/bin/python3 +# -*- coding: utf-8 -*- +import regex as re +import sys + +from pyramid.scripts.pserve import main + +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/setup.cfg b/setup.cfg index 28b62b06..5622110a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -13,6 +13,8 @@ addopts = -rA -vvv --app-cache-backend=filesystem patzilla tests -k 'not uspto' +doctest_optionflags = NORMALIZE_WHITESPACE IGNORE_EXCEPTION_DETAIL + log_level = DEBUG log_cli_level = DEBUG diff --git a/setup.py b/setup.py index e75bff7d..763fadec 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ # Environment # ---------------------------------------------- 'six>=1.10.0', - 'mock>=3,<4', # 4.0.3 + 'mock', # ---------------------------------------------- # Backend @@ -41,9 +41,9 @@ # Database and storage # Can't upgrade to pymongo-3.5.1 due to "from pymongo.connection import Connection" # usage in "mongodb_gridfs_beaker" module. - 'pymongo<3', # 3.13.0, 4.3.3 - 'mongodb_gridfs_beaker==0.5.4', - 'mongoengine==0.13.0', # 0.24.1 + 'pymongo', # 3.13.0, 4.3.3 + 'mongodb_gridfs_beaker@https://github.com/ip-tools/mongodb_gridfs_beaker/archive/0.6.0dev1.tar.gz#egg=mongodb_gridfs_beaker', + 'mongoengine==0.20.0', # 0.27.0 'python-magic<1', # Web services @@ -51,7 +51,7 @@ # Authorization 'pycryptodome>=3,<4', - 'python-jwt>=3.3.4,<4', + 'python-jwt', 'pbkdf2==1.3', @@ -73,8 +73,8 @@ 'ndg-httpsclient<1', # HTML - 'BeautifulSoup<4', - 'html2text==2016.9.19', # 2020.1.16 + 'beautifulsoup4', + 'html2text', # XML # Remark: Both lxml 3.8.0 and 4.0.0 will segfault on Debian Wheezy (7.11) @@ -92,19 +92,19 @@ # Data handling 'attrs', - 'Bunch==1.0.1', # Maybe switch to "Munch" - 'pyparsing==2.0.2', # 2.2.2, 2.3.1, 2.4.7, 3.0.8 + 'Munch', + 'pyparsing<4', # 3.0.9 'python-dateutil<3', 'ago==0.0.9', # 0.0.93 'arrow==0.10.0', # 0.12.1 'validate_email<2', - 'numpy==1.16.6', # 1.22.3 - 'pandas==0.18.1', # 0.22.0, 0.25.3, 1.4.2 - 'pathlib2<3', + 'numpy>=1.16.6', # 1.22.3 + 'pandas', # 0.22.0, 0.25.3, 1.4.2 + 'pathlib', # Data formatting - 'openpyxl>=2.4.2,<3', - 'xlrd==0.9.3', # 0.9.4, 1.2.0, 2.0.1 + 'openpyxl', + 'xlrd3', 'XlsxWriter==0.9.3', # 1.4.5, 2.0.0, 3.0.3 # Data conversion @@ -215,8 +215,6 @@ extras_require={ 'test': test_requires, }, - dependency_links=[ - ], entry_points={ 'paste.app_factory': [ diff --git a/tests/__init__.py b/tests/__init__.py index b06494c4..30067b0a 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -11,7 +11,7 @@ def suppress_warnings(): """ with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=DeprecationWarning) - import pandas.util.nosetester + import numpy.testing suppress_warnings() diff --git a/tests/access/test_dpma_register.py b/tests/access/test_dpma_register.py index ba2aa470..1180615e 100644 --- a/tests/access/test_dpma_register.py +++ b/tests/access/test_dpma_register.py @@ -37,10 +37,10 @@ def test_dpmaregister_url_de(): def test_dpmaregister_xml(): with F5WafWrapper(): xml = access_register("WO2008034638", output_format="xml") - assert '' in xml - assert "" in xml - assert "" in xml + assert b'' in xml + assert b"" in xml + assert b"" in xml def test_dpmaregister_json(): @@ -72,12 +72,12 @@ def test_dpmaregister_html_compact_de(): def test_dpmaregister_pdf_compact_en(): with F5WafWrapper(): pdf = access_register("EP666666", output_format="pdf") - assert "File number 695 34 171.5" in pdf - assert "Most recent update in DPMAregister on Jan 7, 2017" in pdf + assert b"File number 695 34 171.5" in pdf + assert b"Most recent update in DPMAregister on Jan 7, 2017" in pdf def test_dpmaregister_pdf_compact_de(): with F5WafWrapper(): pdf = access_register("EP666666", output_format="pdf", language="de") - assert "Aktenzeichen 695 34 171.5" in pdf - assert "letzte Aktualisierung in DPMAregister am 07.01.2017" in pdf + assert b"Aktenzeichen 695 34 171.5" in pdf + assert b"letzte Aktualisierung in DPMAregister am 07.01.2017" in pdf diff --git a/tests/access/test_epo_ops.py b/tests/access/test_epo_ops.py index b30051e2..fead9461 100644 --- a/tests/access/test_epo_ops.py +++ b/tests/access/test_epo_ops.py @@ -50,7 +50,7 @@ def test_baseurl(app_request): response = client._make_request( OPS_BASE_URI, data={}, extra_headers={"Accept": "*"}, use_get=True, ) - assert "EPO - Open Patent Services (OPS)" in response.content + assert b"EPO - Open Patent Services (OPS)" in response.content def test_search_full_success(app_request): @@ -74,15 +74,15 @@ def test_search_biblio_compact_success(app_request): assert jpath('/0/pubdate', compact) == "1995-08-09" assert jpath('/1/pubnumber', compact) == "EP0666667" assert jpath('/1/pubdate', compact) == "1995-08-09" - assert compact[0].keys() == compact[1].keys() == [ + assert sorted(compact[0].keys()) == sorted(compact[1].keys()) == [ + 'abstract', 'appdate', 'applicant', - 'pubdate', 'appnumber', - 'title', - 'abstract', - 'pubnumber', 'inventor', + 'pubdate', + 'pubnumber', + 'title', ] @@ -140,7 +140,7 @@ def test_search_swap_family(app_request): total_result_count = int(jpath('/ops:world-patent-data/ops:biblio-search/@total-result-count', results.data)) assert total_result_count == 2 - assert results.selected_numbers == [u'DE69534171T2', u'EP0666667A2'] + assert results.selected_numbers == ['DE69534171T2', 'EP0666667A2'] def test_crawl(app_request): @@ -188,13 +188,13 @@ def test_biblio_data_json_success(app_request): assert len(documents) == 3 assert kindcodes == ["A2", "A3", "B1"] assert attributes == [ - u'@country', - u'@doc-number', - u'@family-id', - u'@kind', - u'@system', - u'abstract', - u'bibliographic-data', + '@country', + '@doc-number', + '@family-id', + '@kind', + '@system', + 'abstract', + 'bibliographic-data', ] @@ -218,7 +218,7 @@ def test_biblio_data_xml_success(app_request): Proof getting bibliographic for a specific document in XML format works. """ results = get_ops_biblio_data("publication", "EP0666666", xml=True) - assert results.startswith('') + assert results.startswith(b'') def test_document_kindcodes_success(app_request): @@ -275,31 +275,31 @@ def test_family_members(app_request): pubnumbers = sorted([item["publication"]["number-docdb"] for item in members.items]) assert appnumbers == [ - u'CA2142029A', - u'CA2142029A', - u'DE69534171T', - u'DE69534171T', - u'EP95480005A', - u'EP95480005A', - u'EP95480005A', - u'JP29020894A', - u'JP29020894A', - u'US19288494A', - u'US47157195A', + 'CA2142029A', + 'CA2142029A', + 'DE69534171T', + 'DE69534171T', + 'EP95480005A', + 'EP95480005A', + 'EP95480005A', + 'JP29020894A', + 'JP29020894A', + 'US19288494A', + 'US47157195A', ] assert pubnumbers == [ - u'CA2142029A1', - u'CA2142029C', - u'DE69534171D1', - u'DE69534171T2', - u'EP0666666A2', - u'EP0666666A3', - u'EP0666666B1', - u'JP2613027B2', - u'JPH07231328A', - u'US5467352A', - u'US5572526A', + 'CA2142029A1', + 'CA2142029C', + 'DE69534171D1', + 'DE69534171T2', + 'EP0666666A2', + 'EP0666666A3', + 'EP0666666B1', + 'JP2613027B2', + 'JPH07231328A', + 'US5467352A', + 'US5572526A', ] @@ -435,8 +435,8 @@ def test_description_xml_success(app_request): Acquire full text "description" in XML format. """ data = ops_description("EP666666A2", xml=True) - assert data.startswith('') - assert "The present invention generally relates to multi-node communication systems with shared resources." in data + assert data.startswith(b'') + assert b"The present invention generally relates to multi-node communication systems with shared resources." in data def test_description_failure(app_request): @@ -485,8 +485,8 @@ def test_claims_xml_success(app_request): Acquire full text "claims" in XML format. """ data = ops_claims("EP666666A2", xml=True) - assert data.startswith('') - assert "1. In a communication system having a plurality of nodes" in data + assert data.startswith(b'') + assert b"1. In a communication system having a plurality of nodes" in data def test_claims_failure(app_request): @@ -531,7 +531,7 @@ def test_family_docdb_xml_success(app_request): document_number="EP0666666A2", constituents="biblio", ) - assert response.startswith('') + assert response.startswith(b'') def test_family_docdb_xml_not_found_failure(app_request): @@ -558,7 +558,7 @@ def test_register_json_success(app_request): def test_register_xml_success(app_request): response = ops_register(reference_type="publication", document_number="EP0666666A2", xml=True) - assert response.startswith('') + assert response.startswith(b'') def test_register_not_found_failure(app_request): @@ -573,4 +573,4 @@ def test_register_not_found_failure(app_request): def test_service_usage(app_request): response = ops_service_usage("01/01/2022", "02/01/2022") - assert response.keys() == ["response-size", "time-range", "message-count"] + assert sorted(response.keys()) == ["message-count", "response-size", "time-range"] diff --git a/tests/access/test_uspto.py b/tests/access/test_uspto.py index f3503a3b..8852600d 100644 --- a/tests/access/test_uspto.py +++ b/tests/access/test_uspto.py @@ -6,7 +6,7 @@ import re import pytest -from bunch import Bunch +from munch import Munch from pyramid.httpexceptions import HTTPNotFound from patzilla.access.uspto.image import fetch_first_drawing @@ -161,9 +161,9 @@ def test_fetch_url_failure(): def test_get_reference_type_valid(): - assert get_reference_type(Bunch(number="2022110447")) == UsptoPdfReferenceType.APPLICATION - assert get_reference_type(Bunch(number="2548918")) == UsptoPdfReferenceType.PUBLICATION - assert get_reference_type(Bunch(number=1)) == UsptoPdfReferenceType.PUBLICATION + assert get_reference_type(Munch(number="2022110447")) == UsptoPdfReferenceType.APPLICATION + assert get_reference_type(Munch(number="2548918")) == UsptoPdfReferenceType.PUBLICATION + assert get_reference_type(Munch(number=1)) == UsptoPdfReferenceType.PUBLICATION def test_get_reference_type_invalid(): @@ -172,9 +172,9 @@ def test_get_reference_type_invalid(): assert ex.match(re.escape("Unknown document reference type: None")) with pytest.raises(ValueError) as ex: - get_reference_type(Bunch()) + get_reference_type(Munch()) assert ex.match(re.escape("Unknown document reference type:")) with pytest.raises(ValueError) as ex: - get_reference_type(Bunch(number=None)) + get_reference_type(Munch(number=None)) assert ex.match(re.escape("Unknown document reference type:")) diff --git a/tests/commands/test_commands_ops.py b/tests/commands/test_commands_ops.py index c2830056..a6b63f4b 100644 --- a/tests/commands/test_commands_ops.py +++ b/tests/commands/test_commands_ops.py @@ -76,8 +76,8 @@ def test_command_ops_image_fulldocument_pdf_success(): result = runner.invoke(cli, "ops image --document=EP0666666B1 --page=1", catch_exceptions=False) assert result.exit_code == 0 - assert result.stdout.startswith("%PDF-1.4") - assert 30000 < len(result.stdout) < 50000 + assert result.stdout_bytes.startswith(b"%PDF-1.4") + assert 30_000 < len(result.stdout_bytes) < 150_000 def test_command_ops_image_fulldocument_tiff_success(): @@ -89,7 +89,7 @@ def test_command_ops_image_fulldocument_tiff_success(): result = runner.invoke(cli, "ops image --document=EP0666666B1 --page=1 --format=tiff", catch_exceptions=False) assert result.exit_code == 0 - assert result.stdout.startswith(b"\x4d\x4d\x00\x2a") + assert result.stdout_bytes.startswith(b"\x4d\x4d\x00\x2a") def test_command_ops_image_drawing_pdf_success(): @@ -101,8 +101,8 @@ def test_command_ops_image_drawing_pdf_success(): result = runner.invoke(cli, "ops image --document=EP0666666B1 --kind=FullDocumentDrawing --page=1", catch_exceptions=False) assert result.exit_code == 0 - assert result.stdout.startswith("%PDF-1.4") - assert 10000 < len(result.stdout) < 20000 + assert result.stdout_bytes.startswith(b"%PDF-1.4") + assert 10_000 < len(result.stdout_bytes) < 20_000 def test_command_ops_image_failure(): diff --git a/tests/test_numberlists.py b/tests/test_numberlists.py index 860eb35a..63096e49 100644 --- a/tests/test_numberlists.py +++ b/tests/test_numberlists.py @@ -5,26 +5,26 @@ def test_parse_numberlist(): """ Proof that conveniently parsing a list of items works. """ - assert parse_numberlist(u"foo , bar") == [u'foo', u'bar'] - assert parse_numberlist(u"foo \n bar") == [u'foo', u'bar'] + assert parse_numberlist("foo , bar") == ['foo', 'bar'] + assert parse_numberlist("foo \n bar") == ['foo', 'bar'] def test_normalize_numbers_valid(): """ Normalize a list of valid patent numbers. """ - assert normalize_numbers([u'EP666666B1', u'EP1000000']) == {'all': [u'EP0666666B1', u'EP1000000'], 'invalid': [], 'valid': [u'EP0666666B1', u'EP1000000']} + assert normalize_numbers(['EP666666B1', 'EP1000000']) == {'all': ['EP0666666B1', 'EP1000000'], 'invalid': [], 'valid': ['EP0666666B1', 'EP1000000']} def test_normalize_numbers_invalid(): """ Normalize a list of invalid patent numbers. """ - assert normalize_numbers([u'foo', u'bar']) == {'all': [u'foo', u'bar'], 'invalid': [u'foo', u'bar'], 'valid': []} + assert normalize_numbers(['foo', 'bar']) == {'all': ['foo', 'bar'], 'invalid': ['foo', 'bar'], 'valid': []} def test_normalize_numbers_mixed(): """ Normalize a list of both valid and invalid patent numbers. """ - assert normalize_numbers([u'EP666666B1', u'foobar']) == {'all': [u'EP0666666B1', u'foobar'], 'invalid': [u'foobar'], 'valid': [u'EP0666666B1']} + assert normalize_numbers(['EP666666B1', 'foobar']) == {'all': ['EP0666666B1', 'foobar'], 'invalid': ['foobar'], 'valid': ['EP0666666B1']} diff --git a/tests/util/test_jwt.py b/tests/util/test_jwt.py index 9fb7f71e..c7204585 100644 --- a/tests/util/test_jwt.py +++ b/tests/util/test_jwt.py @@ -59,7 +59,7 @@ def test_signer_sign_invalid_expiration(jwt_signer): """ with pytest.raises(ValueError) as ex: jwt_signer.sign("foo", ttl="bar") - assert ex.match("value=bar, type= is an invalid JWT expiration date") + assert ex.match("value=bar, type= is an invalid JWT expiration date, use `datetime.datetime` or `datetime.timedelta") def test_signer_unsign_expired_token(): @@ -77,7 +77,7 @@ def test_signer_unsign_expired_token(): 'location': 'JSON Web Token', 'name': '_JWTError', 'jwt_expiry': 1640995200, - 'jwt_header': {u'alg': u'RS256', u'typ': u'JWT'}, + 'jwt_header': {'alg': 'RS256', 'typ': 'JWT'}, } @@ -117,8 +117,8 @@ def test_signer_unsign_invalid_payload(jwt_signer): assert value == { 'location': 'JSON Web Token', - 'jwt_header': {u'alg': u'RS256', u'typ': u'JWT'}, + 'jwt_header': {'alg': 'RS256', 'typ': 'JWT'}, 'description': 'No "data" attribute in payload/claims', 'name': 'JwtSigner', - 'jwt_payload': {u'foo': u'bar', u'exp': 2145916800}, + 'jwt_payload': {'foo': 'bar', 'exp': 2145916800}, } diff --git a/tests/util/test_numbers_common.py b/tests/util/test_numbers_common.py index a6ebb516..1233a8e7 100644 --- a/tests/util/test_numbers_common.py +++ b/tests/util/test_numbers_common.py @@ -27,11 +27,11 @@ def generate(data): class TestNumberDecoding: - @pytest.mark.parametrize("number,expected,computed", generate(good), ids=good.keys()) + @pytest.mark.parametrize("number,expected,computed", generate(good), ids=list(good.keys())) def testDecodeOK(self, number, expected, computed): self.check_ok(number, expected, computed) - @pytest.mark.parametrize("number,expected,computed", generate(bad), ids=bad.keys()) + @pytest.mark.parametrize("number,expected,computed", generate(bad), ids=list(bad.keys())) def testDecodeBAD(self, number, expected, computed): self.check_ok(number, expected, computed) diff --git a/tests/util/test_numbers_helper.py b/tests/util/test_numbers_helper.py index 989ccaf7..c3ecb799 100644 --- a/tests/util/test_numbers_helper.py +++ b/tests/util/test_numbers_helper.py @@ -18,6 +18,6 @@ def test_read_numbersfile(): """ # TODO: Need to adjust for Python 3, see https://stackoverflow.com/a/34677735. - with patch("__builtin__.open", mock_open(read_data=data)) as mock_file: + with patch("builtins.open", mock_open(read_data=data)) as mock_file: numbers = read_numbersfile(None) assert numbers == ['EP666666', 'EP666667', 'EP666668', 'EP666669'] diff --git a/tests/util/test_numbers_normalize.py b/tests/util/test_numbers_normalize.py index 6930587e..2fe9e69b 100644 --- a/tests/util/test_numbers_normalize.py +++ b/tests/util/test_numbers_normalize.py @@ -595,11 +595,11 @@ def normalize_patent_us_smart(input): class TestNumberNormalization: - @pytest.mark.parametrize("number,expected,computed", generate(t, fun=partial(normalize_patent, fix_kindcode=True, for_ops=True)), ids=t.keys()) + @pytest.mark.parametrize("number,expected,computed", generate(t, fun=partial(normalize_patent, fix_kindcode=True, for_ops=True)), ids=list(t.keys())) def testDecodeOK(self, number, expected, computed): self.check_ok(number, expected, computed) - @pytest.mark.parametrize("number,expected,computed", generate(depatisconnect_cases, fun=partial(depatisconnect_alternatives)), ids=depatisconnect_cases.keys()) + @pytest.mark.parametrize("number,expected,computed", generate(depatisconnect_cases, fun=partial(depatisconnect_alternatives)), ids=list(depatisconnect_cases.keys())) def test_depatisconnect_alternatives(self, number, expected, computed): self.check_ok(number, expected, computed) diff --git a/tests/util/test_python.py b/tests/util/test_python.py index ad8638a2..d3ce8955 100644 --- a/tests/util/test_python.py +++ b/tests/util/test_python.py @@ -8,11 +8,11 @@ def test_run_command_success_basic(): - assert run_command(["echo", "foo"]).read().strip() == "foo" + assert run_command(["echo", "foo"]).read().strip() == b"foo" def test_run_command_success_input(): - assert run_command(["cat"], input="foo").read().strip() == "foo" + assert run_command(["cat"], input=b"foo").read().strip() == b"foo" def test_run_command_failure_not_found(): @@ -29,8 +29,8 @@ def test_run_command_failure_program_error(): def test_run_command_failure_input_error(): with pytest.raises(RuntimeError) as ex: - run_command(["true"], input={"abc": "def"}) - assert ex.match('Command "true" failed, returncode=None, exception=unhashable type, stderr=') + run_command(["true"], input={b"abc": b"def"}) + assert ex.match('Command "true" failed, returncode=None, exception=memoryview: a bytes-like object is required, not \'dict\', stderr=') def test_memoize(): @@ -49,4 +49,4 @@ def test_exception_traceback(capsys): output = exception_traceback() assert "Traceback (most recent call last)" in output - assert "NameError: global name 'foobar' is not defined" in output + assert "NameError: name \'foobar\' is not defined" in output diff --git a/tests/util/test_text_format.py b/tests/util/test_text_format.py index e0174517..a9680957 100644 --- a/tests/util/test_text_format.py +++ b/tests/util/test_text_format.py @@ -4,14 +4,14 @@ def test_slugify(): - assert slugify("Franz jagt Trueffel.") == "franz-jagt-trueffel" - assert slugify(u"Franz jagt Trüffel -=- im Wald. 👋") == "franz-jagt-truffel-im-wald" - assert slugify(u"Franz jagt Trüffel -=- im Wald. 👋", strip_equals=False) == "franz-jagt-truffel-=-im-wald" - assert slugify(u"Franz jagt Trüffel -=- im Wald. 👋", lowercase=False) == "Franz-jagt-Truffel-im-Wald" + assert slugify("Franz jagt Trueffel.") == b"franz-jagt-trueffel" + assert slugify("Franz jagt Trüffel -=- im Wald. 👋") == b"franz-jagt-truffel-im-wald" + assert slugify("Franz jagt Trüffel -=- im Wald. 👋", strip_equals=False) == b"franz-jagt-truffel-=-im-wald" + assert slugify("Franz jagt Trüffel -=- im Wald. 👋", lowercase=False) == b"Franz-jagt-Truffel-im-Wald" def test_text_indent(): - assert text_indent(u"Franz jagt Trüffel.\nIm Wald.\n\n👋") == u""" + assert text_indent("Franz jagt Trüffel.\nIm Wald.\n\n👋") == """ Franz jagt Trüffel. Im Wald.