From 4566ecbf41f0ec2db343e65552a72de37e5c8f2d Mon Sep 17 00:00:00 2001 From: FrankHeider Date: Fri, 10 May 2019 08:40:32 +0200 Subject: [PATCH 01/23] Start porting to Python 3.6 --- docs/conf.py | 18 ++-- fabfile.py | 6 +- patzilla/access/cipo/drawing.py | 12 +-- patzilla/access/depatech/client.py | 20 ++--- patzilla/access/depatech/clientpool.py | 3 +- patzilla/access/depatech/expression.py | 34 ++++---- patzilla/access/dpma/depatisconnect.py | 10 +-- patzilla/access/dpma/depatisnet.py | 34 ++++---- patzilla/access/dpma/dpmaregister.py | 19 +++-- patzilla/access/epo/espacenet/pyramid.py | 2 +- patzilla/access/epo/ops/api.py | 58 ++++++------- patzilla/access/epo/ops/client.py | 2 +- patzilla/access/epo/ops/commands.py | 1 + .../access/epo/publicationserver/client.py | 4 +- patzilla/access/generic/exceptions.py | 10 +-- patzilla/access/generic/pdf.py | 6 +- patzilla/access/generic/search.py | 24 +++--- patzilla/access/google/search.py | 18 ++-- patzilla/access/ificlaims/api.py | 4 +- patzilla/access/ificlaims/client.py | 44 +++++----- patzilla/access/ificlaims/clientpool.py | 2 +- patzilla/access/ificlaims/commands.py | 1 + patzilla/access/ificlaims/expression.py | 26 +++--- patzilla/access/office.py | 2 +- patzilla/access/sip/client.py | 42 +++++----- patzilla/access/sip/clientpool.py | 4 +- patzilla/access/sip/concordance.py | 4 +- patzilla/access/sip/expression.py | 84 +++++++++---------- patzilla/access/sip/pyramid_service.py | 8 +- patzilla/navigator/export.py | 72 ++++++++-------- patzilla/navigator/services/__init__.py | 6 +- patzilla/navigator/services/admin.py | 2 +- patzilla/navigator/services/analytics.py | 20 ++--- patzilla/navigator/services/depatech.py | 8 +- patzilla/navigator/services/dpma.py | 8 +- patzilla/navigator/services/ificlaims.py | 18 ++-- patzilla/navigator/services/ops.py | 10 +-- patzilla/navigator/services/util.py | 28 +++---- patzilla/navigator/settings.py | 41 ++++----- .../navigator/tools/browser_database_tool.py | 10 +-- patzilla/navigator/util.py | 8 +- patzilla/navigator/views.py | 54 ++++++------ patzilla/util/config/__init__.py | 12 +-- patzilla/util/cql/cheshire3/__init__.py | 6 +- patzilla/util/cql/cheshire3/parser.py | 22 ++--- patzilla/util/cql/cheshire3/test_cheshire3.py | 2 +- patzilla/util/cql/pyparsing/__init__.py | 8 +- patzilla/util/cql/pyparsing/demo.py | 12 +-- patzilla/util/cql/pyparsing/parser.py | 12 +-- patzilla/util/cql/pyparsing/searchparser.py | 37 ++++---- patzilla/util/cql/pyparsing/serializer.py | 16 ++-- patzilla/util/cql/pyparsing/util.py | 2 +- patzilla/util/cql/util.py | 4 +- patzilla/util/crypto/jwt.py | 12 +-- patzilla/util/data/container.py | 4 +- patzilla/util/data/orderedset.py | 6 +- patzilla/util/data/zip.py | 2 +- patzilla/util/database/beaker_mongodb.py | 6 +- .../util/database/beaker_mongodb_gridfs.py | 8 +- patzilla/util/date/__init__.py | 8 +- patzilla/util/email/core.py | 16 ++-- patzilla/util/email/message.py | 32 +++---- patzilla/util/expression/__init__.py | 12 +-- patzilla/util/expression/keywords.py | 4 +- patzilla/util/image/convert.py | 8 +- patzilla/util/ipc/parser.py | 6 +- patzilla/util/network/browser.py | 2 +- patzilla/util/network/requests_xmlrpclib.py | 2 +- patzilla/util/numbers/common.py | 8 +- patzilla/util/numbers/denormalize.py | 10 +-- patzilla/util/numbers/helper.py | 2 +- patzilla/util/numbers/normalize.py | 16 ++-- patzilla/util/numbers/numberlists.py | 6 +- patzilla/util/python/__init__.py | 2 +- patzilla/util/text/format.py | 6 +- patzilla/util/web/email/submit.py | 10 +-- patzilla/util/web/identity/store.py | 4 +- patzilla/util/web/pyramid/cornice.py | 2 +- patzilla/util/web/pyramid/renderer.py | 2 +- patzilla/util/web/util/xmlrpclib.py | 6 +- patzilla/util/web/uwsgi/uwsgidecorators.py | 2 +- patzilla/util/xml/format.py | 2 +- pserve.py | 10 +++ setup.py | 4 +- 84 files changed, 591 insertions(+), 549 deletions(-) create mode 100644 pserve.py diff --git a/docs/conf.py b/docs/conf.py index 3a19a2d2..48e0265c 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -49,18 +49,18 @@ master_doc = 'index' # General information about the project. -project = u'PatZilla' -copyright = u'2013-2022, The PatZilla authors' -author = u'The PatZilla authors' +project = 'PatZilla' +copyright = '2013-2012, The PatZilla authors' +author = 'The PatZilla authors' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. -version = u'0.169.3' +version = '0.169.3' # The full version, including alpha/beta/rc tags. -release = u'0.169.3' +release = '0.169.3' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -145,8 +145,8 @@ # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - (master_doc, 'PatZilla.tex', u'PatZilla Documentation', - u'The PatZilla authors', 'manual'), + (master_doc, 'PatZilla.tex', 'PatZilla Documentation', + 'The PatZilla authors', 'manual'), ] @@ -155,7 +155,7 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ - (master_doc, 'patzilla', u'PatZilla Documentation', + (master_doc, 'patzilla', 'PatZilla Documentation', [author], 1) ] @@ -166,7 +166,7 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - (master_doc, 'PatZilla', u'PatZilla Documentation', + (master_doc, 'PatZilla', 'PatZilla Documentation', author, 'PatZilla', 'One line description of project.', 'Miscellaneous'), ] diff --git a/fabfile.py b/fabfile.py index d0c5d1f5..377b1400 100644 --- a/fabfile.py +++ b/fabfile.py @@ -34,7 +34,7 @@ def install(version, target): if not version: version = pkg_version - print 'Installing package {0}, version {1} to target {2}.'.format(*map(yellow, [pkg_name, version, target])) + print('Installing package {0}, version {1} to target {2}.'.format(*list(map(yellow, [pkg_name, version, target])))) if env.confirm: response = ask('Proceed (y/n)? ', ('y', 'n')) else: @@ -72,7 +72,7 @@ def install(version, target): restart_service(target) else: - print yellow('Skipped package install due to user request.') + print(yellow('Skipped package install due to user request.')) def setup_package(package, virtualenv, options=''): #--index-url=http://c.pypi.python.org/simple @@ -100,7 +100,7 @@ def restart_service(target): if uwsgi_name: run('service uwsgi reload %s' % uwsgi_name) else: - print(red('WARNING: Could not restart service "%s"' % target)) + print((red('WARNING: Could not restart service "%s"' % target))) @task @hosts(INSTALLATION_HOST) diff --git a/patzilla/access/cipo/drawing.py b/patzilla/access/cipo/drawing.py index f15d59c4..4adfaff5 100644 --- a/patzilla/access/cipo/drawing.py +++ b/patzilla/access/cipo/drawing.py @@ -1,9 +1,11 @@ # -*- coding: utf-8 -*- # (c) 2014-2016 Andreas Motl, Elmyra UG -import re +# py27 import re +import regex as re import logging import requests -from BeautifulSoup import BeautifulSoup +# py27 from BeautifulSoup import BeautifulSoup +from bs4 import BeautifulSoup from patzilla.util.numbers.common import split_patent_number log = logging.getLogger(__name__) @@ -70,7 +72,7 @@ def get_first_drawing_url(patent): images_index_html = fetch_images_index(images_index_url) soup = BeautifulSoup(images_index_html) # Canadian Patent Document 141597. Drawings page. Image 1 of 3 - first_drawing_url = cipo_baseurl + soup.find('img', src=re.compile(ur'/opic-cipo/cpd/page'))['src'] + first_drawing_url = cipo_baseurl + soup.find('img', src=re.compile(r'/opic-cipo/cpd/page'))['src'] return first_drawing_url @@ -83,6 +85,6 @@ def get_first_drawing_url(patent): payload = fetch_first_drawing(split_patent_number(number)) if payload: #print "payload length:", len(payload) - print payload + print(payload) else: - print "not found" + print("not found") diff --git a/patzilla/access/depatech/client.py b/patzilla/access/depatech/client.py index 652bd6e1..9253f835 100644 --- a/patzilla/access/depatech/client.py +++ b/patzilla/access/depatech/client.py @@ -73,7 +73,7 @@ def search_real(self, query, options=None): transport = 'json' query.expression = self.translate_deparom_query(query.expression) - log.info(u"{backend_name}: searching documents, expression='{0}', offset={1}, limit={2}; user={username}".format( + log.info("{backend_name}: searching documents, expression='{0}', offset={1}, limit={2}; user={username}".format( query.expression, offset, limit, **self.__dict__)) starttime = timeit.default_timer() @@ -92,7 +92,7 @@ def search_real(self, query, options=None): 'from': offset, 'size': limit, } - log.info(u'{backend_name}: query={query}, uri={uri}, params={params}, options={options}'.format( + log.info('{backend_name}: query={query}, uri={uri}, params={params}, options={options}'.format( query=query, uri=uri, params=params, options=options.dump(), backend_name=self.backend_name)) # Perform search request @@ -164,10 +164,10 @@ def search_real(self, query, options=None): if 'reason' not in upstream_error: upstream_error['reason'] = 'Reason unknown' - message = u'Response status code: {code}\n\n{reason}'.format(**upstream_error) + message = 'Response status code: {code}\n\n{reason}'.format(**upstream_error) raise self.search_failed( - user_info=u'Error searching depa.tech.', + user_info='Error searching depa.tech.', message=message, response=response) @@ -180,7 +180,7 @@ def translate_deparom_query(self, expression): expression = expression.replace(upstream_prefix, '').replace('deparom:', '') - log.info(u'{backend_name}: Translate DEPAROM query expression={expression}, uri={uri}'.format( + log.info('{backend_name}: Translate DEPAROM query expression={expression}, uri={uri}'.format( expression=expression, uri=uri, backend_name=self.backend_name)) expression = upstream_prefix + expression @@ -212,7 +212,7 @@ def translate_deparom_query(self, expression): elif response.status_code >= 400: - message = u'Reason unknown' + message = 'Reason unknown' if response.headers.get('Content-Type', '').startswith('application/json'): @@ -224,15 +224,15 @@ def translate_deparom_query(self, expression): upstream_error['code'] = response_data['status'] if 'reason' not in upstream_error: - upstream_error['reason'] = u'Reason unknown' + upstream_error['reason'] = 'Reason unknown' - message = u'Response status code: {code}\n\n{reason}'.format(**upstream_error) + message = 'Response status code: {code}\n\n{reason}'.format(**upstream_error) else: message = response.content raise self.search_failed( - user_info=u'Translating DEPAROM query expression failed', + user_info='Translating DEPAROM query expression failed', message=message, response=response) @@ -314,7 +314,7 @@ def read(self): self.read_documents() def document_to_number(self, document): - _id = document[u'_id'] + _id = document['_id'] cc, docno, kindcode = _id.split('.') publication_number = cc + docno + kindcode number = normalize_patent(publication_number) diff --git a/patzilla/access/depatech/clientpool.py b/patzilla/access/depatech/clientpool.py index 223d094d..b47bb979 100644 --- a/patzilla/access/depatech/clientpool.py +++ b/patzilla/access/depatech/clientpool.py @@ -5,6 +5,7 @@ from pyramid.httpexceptions import HTTPUnauthorized from zope.interface.declarations import implements from zope.interface.interface import Interface +from zope.interface import implementer from patzilla.access.depatech.client import DepaTechClient from patzilla.access.generic.credentials import AbstractCredentialsGetter, DatasourceCredentialsManager @@ -83,7 +84,7 @@ class DepaTechClientPool(object): depa.tech client pool as Pyramid utility implementation. """ - implements(IDepaTechClientPool) +# py27 implements(IDepaTechClientPool) def __init__(self, api_uri): logger.info("Creating upstream client pool for depa.tech") diff --git a/patzilla/access/depatech/expression.py b/patzilla/access/depatech/expression.py index ef6cebde..ca925fec 100644 --- a/patzilla/access/depatech/expression.py +++ b/patzilla/access/depatech/expression.py @@ -21,7 +21,7 @@ class DepaTechGrammar(CQLGrammar): def preconfigure(self): CQLGrammar.preconfigure(self) - self.cmp_single = u':'.split() + self.cmp_single = ':'.split() class DepaTechParser(object): @@ -161,7 +161,7 @@ def pair_to_elasticsearch(cls, key, value, modifiers=None): return expression = None - format = u'{0}:{1}' + format = '{0}:{1}' # ------------------------------------------ @@ -184,20 +184,20 @@ def pair_to_elasticsearch(cls, key, value, modifiers=None): patent = patent_normalized if patent: - subexpression = u'PC:{country} AND DE:{number}'.format(**patent) + subexpression = 'PC:{country} AND DE:{number}'.format(**patent) if patent['kind']: - subexpression += u' AND KI:{kind}'.format(**patent) - expression_parts.append(u'({})'.format(subexpression)) + subexpression += ' AND KI:{kind}'.format(**patent) + expression_parts.append('({})'.format(subexpression)) # Application number - subexpression = u'AN:{}'.format(value) + subexpression = 'AN:{}'.format(value) expression_parts.append(subexpression) - expression = u' OR '.join(expression_parts) + expression = ' OR '.join(expression_parts) # Priority number - subexpression = u'NP:{}'.format(value) + subexpression = 'NP:{}'.format(value) expression_parts.append(subexpression) - expression = u' OR '.join(expression_parts) + expression = ' OR '.join(expression_parts) elif key == 'pubdate': @@ -212,7 +212,7 @@ def pair_to_elasticsearch(cls, key, value, modifiers=None): # e.g. 1991 if len(value) == 4 and value.isdigit(): - value = u'within {}0101,{}1231'.format(value, value) + value = 'within {}0101,{}1231'.format(value, value) # e.g. 1990-2014, 1990 - 2014 value = year_range_to_within(value) @@ -254,7 +254,7 @@ def pair_to_elasticsearch(cls, key, value, modifiers=None): elif key == 'inventor' or key == 'applicant': if not has_booleans(value) and should_be_quoted(value): - value = u'"{0}"'.format(value) + value = '"{0}"'.format(value) elif key == 'class': @@ -268,7 +268,7 @@ def pair_to_elasticsearch(cls, key, value, modifiers=None): # Put value into parenthesis, to properly capture expressions if value: - value = u'({value})'.format(value=value) + value = '({value})'.format(value=value) # Parse value as simple query expression query_object = CQL(cql=value) @@ -290,7 +290,7 @@ def pair_to_elasticsearch(cls, key, value, modifiers=None): # ------------------------------------------ if key in ['fulltext', 'inventor', 'applicant', 'country', 'citation']: if has_booleans(value) and not should_be_quoted(value): - value = u'({0})'.format(value) + value = '({0})'.format(value) # ------------------------------------------ # expression formatter @@ -358,15 +358,15 @@ def triple_callback(token, index, binop, term): def format_expression(format, fieldname, value): expression = None - if type(fieldname) in types.StringTypes: + if type(fieldname) in (str,): expression = format.format(fieldname, value) - elif type(fieldname) is types.ListType: + elif type(fieldname) is list: subexpressions = [] for fieldname in fieldname: subexpressions.append(format.format(fieldname, value)) expression = ' or '.join(subexpressions) # surround with parentheses - expression = u'({0})'.format(expression) + expression = '({0})'.format(expression) return expression def lucene_convert_class(value): @@ -395,4 +395,4 @@ def should_be_quoted(value): if __name__ == '__main__': - print DepaTechParser('IC:G01F000184').keywords + print(DepaTechParser('IC:G01F000184').keywords) diff --git a/patzilla/access/dpma/depatisconnect.py b/patzilla/access/dpma/depatisconnect.py index 0065bb25..0d42d769 100644 --- a/patzilla/access/dpma/depatisconnect.py +++ b/patzilla/access/dpma/depatisconnect.py @@ -4,9 +4,9 @@ import json import logging import requests -import xmlrpclib -from StringIO import StringIO -from ConfigParser import NoOptionError +import xmlrpc.client +from io import StringIO +from configparser import NoOptionError from lxml import etree as ET from lxml.builder import E from cornice.util import to_list @@ -72,7 +72,7 @@ def run_acquisition(document_number, doctypes=None): url = archive_service_baseurl + '/RPC2' transport = RequestsTransport(session=get_client(), timeout=(2, 17)) transport.use_https = use_https - server = xmlrpclib.ServerProxy(url, transport=transport) + server = xmlrpc.client.ServerProxy(url, transport=transport) return server.runAcquisition(numbers, doctypes) def fetch_xml(number): @@ -313,4 +313,4 @@ def depatisconnect_abstracts(document_number, language=None, invalidate=False): # Failed on 2018-04-23 #response = depatisconnect_claims('USD813591S') - print json.dumps(response) + print(json.dumps(response)) diff --git a/patzilla/access/dpma/depatisnet.py b/patzilla/access/dpma/depatisnet.py index 1b000fa3..a93bb99c 100644 --- a/patzilla/access/dpma/depatisnet.py +++ b/patzilla/access/dpma/depatisnet.py @@ -1,14 +1,16 @@ # -*- coding: utf-8 -*- # (c) 2014-2015 Andreas Motl, Elmyra UG -import re +# py27 import re +import regex as re import sys import json import types import logging -import urllib2 +import urllib.request, urllib.error, urllib.parse import mechanize -import cookielib -from BeautifulSoup import BeautifulSoup +import http.cookiejar +# py27 from BeautifulSoup import BeautifulSoup +from bs4 import BeautifulSoup from xlrd import open_workbook from patzilla.access.generic.search import GenericSearchResponse from patzilla.util.date import from_german, date_iso @@ -44,7 +46,7 @@ class DpmaDepatisnetAccess: ] def __init__(self): - print 'DpmaDepatisnetAccess.__init__' + print('DpmaDepatisnetAccess.__init__') self.baseurl = 'https://depatisnet.dpma.de/DepatisNet' self.searchurl_cql = self.baseurl + '/depatisnet?action=experte&switchToLang=en' self.searchurl_ikofax = self.baseurl + '/depatisnet?action=ikofax&switchToLang=en' @@ -65,7 +67,7 @@ def setup_browser(self): # http://wwwsearch.sourceforge.net/mechanize/ # https://github.com/python-mechanize/mechanize self.browser = mechanize.Browser() - self.browser.set_cookiejar(cookielib.LWPCookieJar()) + self.browser.set_cookiejar(http.cookiejar.LWPCookieJar()) self.browser.addheaders = [('User-Agent', regular_user_agent)] # ignore robots.txt self.browser.set_handle_robots(False) @@ -85,7 +87,7 @@ def search_patents(self, query, options=None): limit = options.get('limit') max_hits = options.get('max_hits') - logger.info(u'Searching documents. query="%s", options=%s' % (query, options)) + logger.info('Searching documents. query="%s", options=%s' % (query, options)) # 0. create browser instance if not self.browser: @@ -97,7 +99,7 @@ def search_patents(self, query, options=None): search_url = self.searchurl_ikofax try: self.browser.open(search_url) - except urllib2.HTTPError as ex: + except urllib.error.HTTPError as ex: logger.critical('Hard error with DEPATISnet: {}'.format(ex)) self.logout() raise @@ -127,7 +129,7 @@ def search_patents(self, query, options=None): #self.browser['so'] = ['desc'] # sort by user selection - if 'sorting' in options and type(options['sorting']) is types.DictionaryType: + if 'sorting' in options and type(options['sorting']) is dict: self.browser['sf'] = [options['sorting']['field']] self.browser['so'] = [options['sorting']['order']] @@ -232,11 +234,11 @@ def find_errors(self, body): [s.extract() for s in error_message('a')] [parts.append(s.extract()) for s in error_message('p', {'class': 'headline'})] reason = ', '.join([part.getText() for part in parts]) - error_message = u'{}\n{}'.format(reason, str(error_message)) + error_message = '{}\n{}'.format(reason, str(error_message)) else: error_message = '' - if u'An error has occurred' in body: + if 'An error has occurred' in body: error_message = error_message.replace('\t', '').replace('\r\n', '\n').strip() raise SyntaxError(error_message) @@ -355,17 +357,17 @@ def excel_to_dict(payload): start_row = 0 # upstream added new status line to first row, e.g. "Search query: pn=(EP666666) Status: 25.09.2015" - if u'Search query' in sheet.cell(0, 0).value: + if 'Search query' in sheet.cell(0, 0).value: start_row = 1 # read header values - keys = [sheet.cell(start_row, col_index).value for col_index in xrange(sheet.ncols)] + keys = [sheet.cell(start_row, col_index).value for col_index in range(sheet.ncols)] # read sheet content dict_list = [] - for row_index in xrange(start_row + 1, sheet.nrows): + for row_index in range(start_row + 1, sheet.nrows): d = {keys[col_index]: sheet.cell(row_index, col_index).value - for col_index in xrange(sheet.ncols)} + for col_index in range(sheet.ncols)} dict_list.append(d) return dict_list @@ -390,4 +392,4 @@ def excel_to_dict(payload): else: data = depatisnet.search_patents('BI=bagger and PC=DE') - print json.dumps(data) + print(json.dumps(data)) diff --git a/patzilla/access/dpma/dpmaregister.py b/patzilla/access/dpma/dpmaregister.py index a662ef49..b314b89c 100644 --- a/patzilla/access/dpma/dpmaregister.py +++ b/patzilla/access/dpma/dpmaregister.py @@ -16,7 +16,8 @@ from pprint import pformat from jsonpointer import JsonPointer, JsonPointerException from xml.etree.ElementTree import fromstring -from BeautifulSoup import BeautifulSoup +# py27 from BeautifulSoup import BeautifulSoup +from bs4 import BeautifulSoup from collections import namedtuple, OrderedDict from patzilla.access.dpma.util import dpma_file_number from patzilla.boot.cache import configure_cache_backend @@ -528,13 +529,13 @@ def decode(self): self.decode_badgerfish() # Document numbers - self.application_reference = map( + self.application_reference = list(map( operator.itemgetter('document_id'), - self.convert_list(self.query_data(self.pointer_application_reference))) + self.convert_list(self.query_data(self.pointer_application_reference)))) - self.publication_reference = map( + self.publication_reference = list(map( operator.itemgetter('document_id'), - self.convert_list(self.query_data(self.pointer_publication_reference))) + self.convert_list(self.query_data(self.pointer_publication_reference)))) # Classifications self.classifications['ipcr'] = self.convert_list(self.query_data(self.pointer_classifications_ipcr)) @@ -565,9 +566,9 @@ def decode(self): self.designated_states = self.convert_list(self.query_data(self.pointer_designated_states)) # Citations - self.references_cited = map( + self.references_cited = list(map( operator.attrgetter('document_id.doc_number'), - bunchify(self.convert_list(self.query_data(self.pointer_references_cited)))) + bunchify(self.convert_list(self.query_data(self.pointer_references_cited))))) # office-specific-bib-data self.office_specific_bibdata = self.convert_dict(self.query_data(self.pointer_office_specific_bibdata)) @@ -590,7 +591,7 @@ def convert_list(cls, things_raw, nested_element='$'): things = [] for thing in to_list(things_raw): if not thing: continue - if nested_element in thing and len(thing.keys()) == 1: + if nested_element in thing and len(list(thing.keys())) == 1: thing = thing[nested_element] if isinstance(thing, dict): thing = cls.convert_dict(thing) @@ -606,7 +607,7 @@ def convert_dict(cls, data): return {} newdata = OrderedDict() - for key, value in data.items(): + for key, value in list(data.items()): # Decode nested text or recurse if '$' in value: diff --git a/patzilla/access/epo/espacenet/pyramid.py b/patzilla/access/epo/espacenet/pyramid.py index db3038bb..b36da609 100644 --- a/patzilla/access/epo/espacenet/pyramid.py +++ b/patzilla/access/epo/espacenet/pyramid.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # (c) 2015-2018 Andreas Motl, Elmyra UG -from __future__ import absolute_import + import logging from cornice.service import Service from pyramid.httpexceptions import HTTPBadRequest, HTTPNotFound diff --git a/patzilla/access/epo/ops/api.py b/patzilla/access/epo/ops/api.py index 5a11e3cf..4056fcbe 100644 --- a/patzilla/access/epo/ops/api.py +++ b/patzilla/access/epo/ops/api.py @@ -133,7 +133,7 @@ def results_swap_family_members(response): def match_filter(item, filter): if callable(filter): patent = split_patent_number(item) - outcome = filter(patent) + outcome = list(filter(patent)) else: outcome = item.startswith(filter) return outcome @@ -310,7 +310,7 @@ def ops_published_data_search_real(constituents, query, range): ops = get_ops_client() # Send request to OPS. - range_begin, range_end = map(int, range.split('-')) + range_begin, range_end = list(map(int, range.split('-'))) response = ops.published_data_search( query, range_begin=range_begin, range_end=range_end, constituents=to_list(constituents)) @@ -461,7 +461,7 @@ def image_representative_from_family(patent, countries, func_filter=None): # Compute alternative family members sorted by given countries alternatives = family.publications_by_country(exclude=[document], countries=countries) if func_filter: - alternatives = filter(func_filter, alternatives) + alternatives = list(filter(func_filter, alternatives)) if alternatives: # TODO: Currently using first item as representative. This might change. @@ -583,7 +583,7 @@ def inquire_images(document): def is_fulldocument(node): - return '@desc' in node and node['@desc'] == u'FullDocument' + return '@desc' in node and node['@desc'] == 'FullDocument' def is_amendment_only(node): @@ -602,7 +602,7 @@ def is_amendment_only(node): """ if is_fulldocument(node): sections = to_list(node.get('ops:document-section', [])) - if len(sections) == 1 and sections[0]['@name'] == u'AMENDMENT': + if len(sections) == 1 and sections[0]['@name'] == 'AMENDMENT': return True return False @@ -659,7 +659,7 @@ def get_ops_image(document, page, kind, format=None): # 1. Inquire images to compute url to image resource image_info = inquire_images(document) if image_info: - if image_info.has_key(kind): + if kind in image_info: drawing_node = image_info.get(kind) link = drawing_node['@link'] @@ -670,7 +670,7 @@ def get_ops_image(document, page, kind, format=None): page = page + start_page - 1 # fallback chain, if no drawings are available - elif image_info.has_key('JapaneseAbstract'): + elif 'JapaneseAbstract' in image_info: drawing_node = image_info.get('JapaneseAbstract') link = drawing_node['@link'] page = 1 @@ -913,32 +913,32 @@ def handle_error(response, location): if 'CLIENT.InvalidCountryCode' in error_content: ops_code = 'CLIENT.InvalidCountryCode' - message = u'OPS API response ({status}, {ops_code}). url={url}'.format(status=status, ops_code=ops_code, url=url) + message = 'OPS API response ({status}, {ops_code}). url={url}'.format(status=status, ops_code=ops_code, url=url) log.error(message) return response_json if 'SERVER.EntityNotFound' in error_content: ops_code = 'SERVER.EntityNotFound' - message = u'OPS API response ({status}, {ops_code}). url={url}'.format(status=status, ops_code=ops_code, url=url) + message = 'OPS API response ({status}, {ops_code}). url={url}'.format(status=status, ops_code=ops_code, url=url) log.warning(message) return response_json if 'OPS - 404' in error_content or 'Page not found' in error_content: ops_code = '404 OPS Page not found' - message = u'OPS API response ({status}, {ops_code}). url={url}'.format(status=status, ops_code=ops_code, url=url) + message = 'OPS API response ({status}, {ops_code}). url={url}'.format(status=status, ops_code=ops_code, url=url) log.error(message) - log.error(u'OPS API errors:\n{}'.format(pformat(request.errors))) + log.error('OPS API errors:\n{}'.format(pformat(request.errors))) response_json.status_code = 502 return response_json if 'This API version is not supported' in error_content: ops_code = '404 API version not supported' - message = u'OPS API response ({status}, {ops_code}). url={url}'.format(status=status, ops_code=ops_code, url=url) + message = 'OPS API response ({status}, {ops_code}). url={url}'.format(status=status, ops_code=ops_code, url=url) log.error(message) response_json.status_code = 502 return response_json - log.error(u'OPS API errors:\n{}'.format(pformat(request.errors))) + log.error('OPS API errors:\n{}'.format(pformat(request.errors))) return response_json @@ -972,7 +972,7 @@ def pdf_document_build(patent): # 3. add pdf metadata page_sections = None - if resource_info.has_key('ops:document-section'): + if 'ops:document-section' in resource_info: page_sections = resource_info['ops:document-section'] #pprint(page_sections) @@ -1028,7 +1028,7 @@ def ops_document_kindcodes(patent): for document in documents: # TODO: check whether a single occurrance of "not found" should really raise this exception - if document.has_key('@status') and document['@status'] == 'not found': + if '@status' in document and document['@status'] == 'not found': error = HTTPNotFound(error_msg_access) raise error @@ -1080,7 +1080,7 @@ def analytics_family(query): # B. Enrich all family representatives # http://ops.epo.org/3.1/rest-services/family/application/docdb/US19288494.xml - for family_id, document_number in family_representatives.iteritems(): + for family_id, document_number in family_representatives.items(): payload.setdefault(family_id, {}) @@ -1246,7 +1246,7 @@ def __init__(self): self.items = [] def __repr__(self): - return u'<{name} object at 0x{id}>\nitems:\n{items}'.format(name=self.__class__.__name__, id=id(self), items=pformat(self.items)) + return '<{name} object at 0x{id}>\nitems:\n{items}'.format(name=self.__class__.__name__, id=id(self), items=pformat(self.items)) def publications_by_country(self, exclude=None, countries=None): exclude = exclude or [] @@ -1290,13 +1290,13 @@ def _find_publication_number_by_prio_number(): def _format_title(title): - return u'[{0}] {1}'.format(title.get(u'@lang', u'').upper() or u'', title[u'$'] or u'') + return '[{0}] {1}'.format(title.get('@lang', '').upper() or '', title['$'] or '') def _format_abstract(abstract): if not abstract: return lines = to_list(abstract['p']) - lines = map(lambda line: line['$'], lines) - return u'[{0}] {1}'.format(abstract.get(u'@lang', u'').upper() or u'', '\n'.join(lines)) + lines = [line['$'] for line in lines] + return '[{0}] {1}'.format(abstract.get('@lang', '').upper() or '', '\n'.join(lines)) def _mogrify_parties(partylist, name): results = [] @@ -1307,9 +1307,9 @@ def _mogrify_parties(partylist, name): parties[key][party['@data-format']] = party[name]['name']['$'] for key in sorted(parties.keys()): - name_epodoc = parties[key]['epodoc'].replace(u'\u2002', u' ') + name_epodoc = parties[key]['epodoc'].replace('\u2002', ' ') name_original = parties[key]['original'] - entry = u'{0}; {1}'.format(name_epodoc, name_original) + entry = '{0}; {1}'.format(name_epodoc, name_original) results.append(entry) return results @@ -1338,13 +1338,13 @@ def _result_list_compact(response): try: titles = to_list(pointer_invention_title.resolve(result)) - titles = map(_format_title, titles) + titles = list(map(_format_title, titles)) except JsonPointerException: titles = None try: abstracts = to_list(pointer_abstract.resolve(result)) - abstracts = map(_format_abstract, abstracts) + abstracts = list(map(_format_abstract, abstracts)) except JsonPointerException: abstracts = None @@ -1382,10 +1382,10 @@ def _summarize_metrics(payload, kind): except KeyError: return 'error while computing value' - total_response_size_entries = filter(lambda item: item['name'] == kind, metrics)[0]['values'] + total_response_size_entries = [item for item in metrics if item['name'] == kind][0]['values'] #print total_response_size_entries - total_response_sizes = map(lambda item: float(item['value']), total_response_size_entries) + total_response_sizes = [float(item['value']) for item in total_response_size_entries] #print total_response_sizes total = sum(total_response_sizes) @@ -1421,6 +1421,6 @@ def ops_service_usage(date_begin, date_end): if __name__ == '__main__': # pragma: nocover data = ops_service_usage('06/11/2014', '09/12/2014') - print 'Time range: {0}'.format(data['time-range']) - print 'Response size: {0}G'.format(data['response-size'] / float(10**9)) - print 'Message count: {0}'.format(data['message-count']) + print('Time range: {0}'.format(data['time-range'])) + print('Response size: {0}G'.format(data['response-size'] / float(10**9))) + print('Message count: {0}'.format(data['message-count'])) diff --git a/patzilla/access/epo/ops/client.py b/patzilla/access/epo/ops/client.py index a0037443..f0826655 100644 --- a/patzilla/access/epo/ops/client.py +++ b/patzilla/access/epo/ops/client.py @@ -78,7 +78,7 @@ class OpsClientPool(object): EPO/OPS client pool as Pyramid utility implementation. """ - implements(IOpsClientPool) +# py27 implements(IOpsClientPool) def __init__(self): logger.info("Creating upstream client pool for EPO/OPS") diff --git a/patzilla/access/epo/ops/commands.py b/patzilla/access/epo/ops/commands.py index cd94aa02..038625fe 100644 --- a/patzilla/access/epo/ops/commands.py +++ b/patzilla/access/epo/ops/commands.py @@ -13,6 +13,7 @@ export OPS_API_CONSUMER_SECRET=rrXdr5WA7x9tudmP patzilla ops search "txt=(wind or solar) and energy" + Use configuration file:: export PATZILLA_CONFIG=patzilla/config/development-local.ini diff --git a/patzilla/access/epo/publicationserver/client.py b/patzilla/access/epo/publicationserver/client.py index 5e9a38bd..5777e48a 100644 --- a/patzilla/access/epo/publicationserver/client.py +++ b/patzilla/access/epo/publicationserver/client.py @@ -25,7 +25,7 @@ def fetch_pdf(document_number): patent = normalize_patent(document_number, as_dict=True, provider='espacenet') - url_tpl = u'https://data.epo.org/publication-server/pdf-document?cc=EP&pn={number}&ki={kind}' + url_tpl = 'https://data.epo.org/publication-server/pdf-document?cc=EP&pn={number}&ki={kind}' url = url_tpl.format(**patent) @@ -63,4 +63,4 @@ def fetch_pdf(document_number): if __name__ == '__main__': - print fetch_pdf('EP666666A2') + print(fetch_pdf('EP666666A2')) diff --git a/patzilla/access/generic/exceptions.py b/patzilla/access/generic/exceptions.py index 7e9c4224..6b188cef 100644 --- a/patzilla/access/generic/exceptions.py +++ b/patzilla/access/generic/exceptions.py @@ -14,11 +14,11 @@ class GenericAdapterException(Exception): def __init__(self, *args, **kwargs): self.data = None - if kwargs.has_key('data'): + if 'data' in kwargs: self.data = kwargs['data'] self.user_info = '' - if kwargs.has_key('user_info'): + if 'user_info' in kwargs: self.user_info = kwargs['user_info'] super(GenericAdapterException, self).__init__(*args) @@ -30,11 +30,11 @@ def get_message(self): #message_parts.append(ex.user_info) message['user'] = cgi.escape(self.user_info) if hasattr(self, 'message'): - message_parts.append(self.__class__.__name__ + u': ' + u'
{message}
'.format(message=cgi.escape(self.message))) + message_parts.append(self.__class__.__name__ + ': ' + '
{message}
'.format(message=cgi.escape(self.message))) if hasattr(self, 'details'): - message_parts.append(u'
{message}
'.format(message=cgi.escape(self.details))) + message_parts.append('
{message}
'.format(message=cgi.escape(self.details))) - message['details'] = u'
'.join(message_parts) + message['details'] = '
'.join(message_parts) return message diff --git a/patzilla/access/generic/pdf.py b/patzilla/access/generic/pdf.py index c46ba6e5..8516b01f 100644 --- a/patzilla/access/generic/pdf.py +++ b/patzilla/access/generic/pdf.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # (c) 2013-2022 The PatZilla Developers import logging -from StringIO import StringIO +from io import StringIO from zipfile import ZipFile, ZipInfo, ZIP_DEFLATED import attr @@ -56,7 +56,7 @@ def pdf_universal_real(patent, response): if document is None: log.error('Locating a document at the domestic office requires ' 'a decoded document number for "{}"'.format(patent)) - raise ValueError(u'Unable to decode document number {}'.format(patent)) + raise ValueError('Unable to decode document number {}'.format(patent)) # 1. If it's an EP document, try European publication server first. if response.pdf is None and document.country == 'EP': @@ -92,7 +92,7 @@ def pdf_universal_real(patent, response): try: # Skip requests for documents w/o kindcode if not document.kind: - raise ValueError(u'No kindcode for patent: {}'.format(patent)) + raise ValueError('No kindcode for patent: {}'.format(patent)) response.pdf = depatisconnect_fetch_pdf(number_normalized) response.datasource = 'dpma' diff --git a/patzilla/access/generic/search.py b/patzilla/access/generic/search.py index 3077c8be..57065e95 100644 --- a/patzilla/access/generic/search.py +++ b/patzilla/access/generic/search.py @@ -13,27 +13,27 @@ class GenericSearchClient(object): def lm(self, message): - message = u'{backend_name}: {message}'.format(message=message, **self.__dict__) + message = '{backend_name}: {message}'.format(message=message, **self.__dict__) return message def search_failed(self, message=None, response=None, user_info=None, ex=None, meta=None): # Compute user info - user_info = user_info or u'Search failed with unknown reason, please report this error to us.' + user_info = user_info or 'Search failed with unknown reason, please report this error to us.' meta = meta or {} # Compute reason and status - message = message or u'unknown' + message = message or 'unknown' if ex: - message = u'{}: {}'.format(ex.__class__.__name__, ex.message) + message = '{}: {}'.format(ex.__class__.__name__, ex.message) # Compute and emit log message - log_message = u'{backend_name}: Search failed. message={message}'.format(message=message, **self.__dict__) + log_message = '{backend_name}: Search failed. message={message}'.format(message=message, **self.__dict__) if meta: - log_message += u', meta=' + unicode(meta) + log_message += ', meta=' + str(meta) if response: - status = unicode(response.status_code) + u' ' + response.reason - log_message += u', status={status}, response=\n{response}'.format(status=status, response=response.content.decode('utf-8')) + status = str(response.status_code) + ' ' + response.reason + log_message += ', status={status}, response=\n{response}'.format(status=status, response=response.content.decode('utf-8')) log.error(log_message) # Return exception object @@ -177,8 +177,8 @@ def read_documents(self): if number_normalized: number = number_normalized - document[u'publication_number'] = number - document[u'upstream_provider'] = self.meta.upstream.name + document['publication_number'] = number + document['upstream_provider'] = self.meta.upstream.name def render(self): @@ -216,7 +216,7 @@ def family_remover(item): # Sanity checks on family id # Do not remove documents without valid family id - if not fam or fam in [u'0', u'-1']: + if not fam or fam in ['0', '-1']: return True # "Seen" filtering logic @@ -233,7 +233,7 @@ def family_remover(item): # Update metadata and content # 1. Apply family cleansing filter to main documents response - self.documents = filter(family_remover, self.documents) + self.documents = list(filter(family_remover, self.documents)) #print 'removed_map:'; pprint(removed_map) # 2. Add list of removed family members to output diff --git a/patzilla/access/google/search.py b/patzilla/access/google/search.py index a55b81c5..b8888e0c 100644 --- a/patzilla/access/google/search.py +++ b/patzilla/access/google/search.py @@ -2,11 +2,13 @@ # (c) 2014 Andreas Motl, Elmyra UG import json from pyramid.encode import urlencode -import re +# py27 import re import sys import logging import requests -from BeautifulSoup import BeautifulSoup +# py27 from BeautifulSoup import BeautifulSoup +from bs4 import BeautifulSoup + from patzilla.util.expression.keywords import keywords_from_boolean_expression from patzilla.util.numbers.normalize import normalize_patent @@ -88,7 +90,7 @@ def tweak_captcha_response(self, body): captcha_form['action'] = baseurl + '/' + captcha_form['action'] newbody = str(soup) - print newbody + print(newbody) return newbody def parse_response(self, body): @@ -163,7 +165,7 @@ def parse_response(self, body): 'message': message, } - print payload + print(payload) return payload @@ -226,7 +228,7 @@ def pair_to_term(cls, key, value): value_normalized = normalize_patent(value) if value_normalized: value = value_normalized - term = u'{0}:{1}'.format(fieldname, value) + term = '{0}:{1}'.format(fieldname, value) else: term = value @@ -243,7 +245,7 @@ def serialize(self): """ query_params = [] tbs_params = [] - for key, value in self.criteria.iteritems(): + for key, value in self.criteria.items(): term = self.pair_to_term(key, value) if term['parameter'] == 'q': query_params.append(term['term']) @@ -265,7 +267,7 @@ def serialize(self): def get_keywords(self): keywords = [] - for key, value in self.criteria.iteritems(): + for key, value in self.criteria.items(): keywords += keywords_from_boolean_expression(key, value) return keywords @@ -282,4 +284,4 @@ def get_keywords(self): #data = google.search('matrix', 19900) data = google.search('intitle:matrix', 19900) - print data + print(data) diff --git a/patzilla/access/ificlaims/api.py b/patzilla/access/ificlaims/api.py index 7680403f..2c30de9e 100644 --- a/patzilla/access/ificlaims/api.py +++ b/patzilla/access/ificlaims/api.py @@ -157,7 +157,7 @@ def ificlaims_download_multi(numberlist, formats): for format in formats: - format_parts = format.split(u':') + format_parts = format.split(':') # decode modifiers if len(format_parts) == 1: @@ -235,7 +235,7 @@ def ificlaims_download_single(number, format, options=None): try: response = ificlaims_download(number, format, options) - except IFIClaimsException, ex: + except IFIClaimsException as ex: logger.warn('IFI: IFIClaimsException for number={number}, format={format}, options={options}: {ex}'.format(**locals())) if response.payload: diff --git a/patzilla/access/ificlaims/client.py b/patzilla/access/ificlaims/client.py index db9a7f2a..4f1393e6 100644 --- a/patzilla/access/ificlaims/client.py +++ b/patzilla/access/ificlaims/client.py @@ -141,36 +141,36 @@ def search_real(self, query, options=None): if 'msg' not in upstream_error: upstream_error['msg'] = 'Reason unknown' - message = u'Response status code: {code}\n\n{msg}'.format(**upstream_error) + message = 'Response status code: {code}\n\n{msg}'.format(**upstream_error) # Enrich "maxClauseCount" message, e.g. raised by {!complexphrase}text:"auto* AND leucht*"~5 - if upstream_error["code"] == 500 and u'maxClauseCount is set to' in upstream_error["msg"]: + if upstream_error["code"] == 500 and 'maxClauseCount is set to' in upstream_error["msg"]: raise self.search_failed( - user_info=u'Too many terms in phrase expression, wildcard term prefixes might by too short.', + user_info='Too many terms in phrase expression, wildcard term prefixes might by too short.', message=message, response=response) # Enrich "no servers hosting shard" message elif upstream_error["code"] == 503 and \ ( - u'no servers hosting shard' in upstream_error["msg"] or \ - u'No server is available' in upstream_error["msg"] + 'no servers hosting shard' in upstream_error["msg"] or \ + 'No server is available' in upstream_error["msg"] ): raise self.search_failed( - user_info=u'Error while connecting to upstream database. Database might be offline.', + user_info='Error while connecting to upstream database. Database might be offline.', message=message, response=response) # Regular traceback elif upstream_error["code"] == 500 and 'trace' in upstream_error: - message = u'Response status code: {code}\n\n{trace}'.format(**upstream_error) + message = 'Response status code: {code}\n\n{trace}'.format(**upstream_error) raise self.search_failed( - user_info=u'Unknown exception at search backend', + user_info='Unknown exception at search backend', message=message, response=response) # Enrich "SyntaxError" exception - elif upstream_error["code"] == 400 and u'ParseException' in upstream_error["msg"]: + elif upstream_error["code"] == 400 and 'ParseException' in upstream_error["msg"]: user_info = re.sub( r'.*(Encountered.*at line.*?\.).*', r'SyntaxError, can not parse query expression: \1', @@ -207,7 +207,7 @@ def search_real(self, query, options=None): user_info = None if response_data['message'] == 'JSON error: failed to read response object': - user_info = u'Error while connecting to upstream database. Database might be offline.' + user_info = 'Error while connecting to upstream database. Database might be offline.' raise self.search_failed( user_info=user_info, @@ -237,7 +237,7 @@ def search_real(self, query, options=None): message = json.dumps(upstream_error) raise self.search_failed( - user_info=u'Error while connecting to upstream database. Database might be offline.', + user_info='Error while connecting to upstream database. Database might be offline.', message=message, response=response) @@ -252,9 +252,10 @@ def text_fetch(self, ucid, format='xml'): EP666666A2 => EP0666666A2 (EP0666666A3, EP0666666B1) """ - log.info(u"{backend_name}: text_fetch, ucid={ucid}, format={format}; user={username}".format( + log.info("{backend_name}: text_fetch, ucid={ucid}, format={format}; user={username}".format( ucid=ucid, format=format, **self.__dict__)) + starttime = timeit.default_timer() if not self.token or self.stale: @@ -288,7 +289,7 @@ def text_fetch(self, ucid, format='xml'): @cache_region('longer') def attachment_list(self, ucid): - log.info(u"{backend_name}: attachment_list, ucid={ucid}; user={username}".format(ucid=ucid, **self.__dict__)) + log.info("{backend_name}: attachment_list, ucid={ucid}; user={username}".format(ucid=ucid, **self.__dict__)) if not self.token or self.stale: self.login() @@ -310,14 +311,14 @@ def attachment_list(self, ucid): data = json.loads(response.content) return data else: - log.error(u"{backend_name}: attachment_list, ucid={ucid}, status={status}, response={response}".format( + log.error("{backend_name}: attachment_list, ucid={ucid}, status={status}, response={response}".format( ucid=ucid, status=response.status_code, response=response.content , **self.__dict__)) @cache_region('longer') def attachment_fetch(self, path): - log.info(u"{backend_name}: attachment_fetch, path={path}; user={username}".format(path=path, **self.__dict__)) + log.info("{backend_name}: attachment_fetch, path={path}; user={username}".format(path=path, **self.__dict__)) if not self.token or self.stale: self.login() @@ -341,18 +342,19 @@ def attachment_fetch(self, path): return response.content else: - log.error(u"{backend_name}: attachment_fetch, path={path}, status={status}, response={response}".format( + log.error("{backend_name}: attachment_fetch, path={path}, status={status}, response={response}".format( path=path, status=response.status_code, response=response.content , **self.__dict__)) def pdf_fetch(self, ucid): - log.info(u"{backend_name}: pdf_fetch, ucid={ucid}; user={username}".format(ucid=ucid, **self.__dict__)) + log.info("{backend_name}: pdf_fetch, ucid={ucid}; user={username}".format(ucid=ucid, **self.__dict__)) attachments_response = self.attachment_list(ucid) if not attachments_response: return + #print 'attachments_response:' #pprint(attachments_response) @@ -435,7 +437,7 @@ def tif_attachments(self, ucid): """ # filter tif references only - tif_attachments = filter(lambda attachment: attachment['media'] in ['image/tiff', 'image/jpeg'], attachments) + tif_attachments = [attachment for attachment in attachments if attachment['media'] in ['image/tiff', 'image/jpeg']] #print 'tif_attachments:' #pprint(tif_attachments) return tif_attachments @@ -443,7 +445,7 @@ def tif_attachments(self, ucid): def tif_fetch(self, ucid, seq=1): - log.info(u"{backend_name}: tif_fetch, ucid={ucid}, seq={seq}; user={username}".format(ucid=ucid, seq=seq, **self.__dict__)) + log.info("{backend_name}: tif_fetch, ucid={ucid}, seq={seq}; user={username}".format(ucid=ucid, seq=seq, **self.__dict__)) tif_attachments = self.tif_attachments(ucid) @@ -464,7 +466,7 @@ def tif_fetch(self, ucid, seq=1): @cache_region('longer') def png_fetch(self, ucid, seq=1): - log.info(u"{backend_name}: png_fetch, ucid={ucid}, seq={seq}; user={username}".format(ucid=ucid, seq=seq, **self.__dict__)) + log.info("{backend_name}: png_fetch, ucid={ucid}, seq={seq}; user={username}".format(ucid=ucid, seq=seq, **self.__dict__)) tif = self.tif_fetch(ucid, seq) if tif: png = to_png(tif) @@ -535,7 +537,7 @@ def read(self): self.read_documents() def document_to_number(self, document): - ucid = document[u'ucid'] + ucid = document['ucid'] cc, docno, kindcode = ucid.split('-') number = cc + docno + kindcode number_normalized = normalize_patent(number) diff --git a/patzilla/access/ificlaims/clientpool.py b/patzilla/access/ificlaims/clientpool.py index 1e0fc64e..66b0e4b9 100644 --- a/patzilla/access/ificlaims/clientpool.py +++ b/patzilla/access/ificlaims/clientpool.py @@ -86,7 +86,7 @@ class IFIClaimsClientPool(object): IFI CLAIMS client pool as Pyramid utility implementation. """ - implements(IIFIClaimsClientPool) +# py27 implements(IIFIClaimsClientPool) def __init__(self, api_uri, api_uri_json): logger.info("Creating upstream client pool for IFI CLAIMS") diff --git a/patzilla/access/ificlaims/commands.py b/patzilla/access/ificlaims/commands.py index d44f2d56..e002033c 100644 --- a/patzilla/access/ificlaims/commands.py +++ b/patzilla/access/ificlaims/commands.py @@ -101,6 +101,7 @@ def make_request(client): #results = client.search(u'text:放射線を照射する放射線源と', 0, 10) #results = client.search(SmartBunch({'expression': 'pnctry:(de OR ep OR wo OR cn OR jp OR tw) AND pa:"taiwan paiho" AND pd:[20170101 TO 20170731]'}), SmartBunch({'offset': 0, 'limit': 50})) + #results = client.text_fetch('US-20100077592-A1') #results = client.text_fetch('CN-1055497-A') #results = client.text_fetch('PL-2543232-T3') diff --git a/patzilla/access/ificlaims/expression.py b/patzilla/access/ificlaims/expression.py index 9496b698..cda4d062 100644 --- a/patzilla/access/ificlaims/expression.py +++ b/patzilla/access/ificlaims/expression.py @@ -22,7 +22,7 @@ class IFIClaimsGrammar(CQLGrammar): def preconfigure(self): CQLGrammar.preconfigure(self) - self.cmp_single = u':'.split() + self.cmp_single = ':'.split() class IFIClaimsParser(object): @@ -60,8 +60,8 @@ def trim_complexphrase(self): after: text:((parallel* AND schalt*) AND (antrieb* AND stufe*)) """ #print >>sys.stderr, 'expression-before:', self.expression - self.expression = re.sub(u'"(.+?)"~\d+', u'(\\1)', self.expression) - self.expression = self.expression.replace(u'{!complexphrase}', '') + self.expression = re.sub('"(.+?)"~\d+', '(\\1)', self.expression) + self.expression = self.expression.replace('{!complexphrase}', '') #print >>sys.stderr, 'expression-after :', self.expression @property @@ -192,7 +192,7 @@ def pair_to_solr(cls, key, value, modifiers=None): return expression = None - format = u'{0}:{1}' + format = '{0}:{1}' # ------------------------------------------ @@ -230,7 +230,7 @@ def pair_to_solr(cls, key, value, modifiers=None): # within 2009-08-20,2011-03-03 if 'within' in value: within_dates = parse_date_within(value) - elements_are_years = all([len(value) == 4 and value.isdigit() for value in within_dates.values()]) + elements_are_years = all([len(value) == 4 and value.isdigit() for value in list(within_dates.values())]) if elements_are_years: fieldname = 'pdyear' @@ -263,7 +263,7 @@ def pair_to_solr(cls, key, value, modifiers=None): elif key == 'inventor' or key == 'applicant': if not has_booleans(value) and should_be_quoted(value): - value = u'"{0}"'.format(value) + value = '"{0}"'.format(value) elif key == 'class': @@ -277,7 +277,7 @@ def pair_to_solr(cls, key, value, modifiers=None): # Put value into parenthesis, to properly capture expressions if value: - value = u'({value})'.format(value=value) + value = '({value})'.format(value=value) # Parse value as simple query expression query_object = CQL(cql=value) @@ -297,7 +297,7 @@ def pair_to_solr(cls, key, value, modifiers=None): # ------------------------------------------ if key in ['fulltext', 'inventor', 'applicant', 'country', 'citation']: if has_booleans(value) and not should_be_quoted(value) and not '{!complexphrase' in value: - value = u'({0})'.format(value) + value = '({0})'.format(value) # ------------------------------------------ # expression formatter @@ -368,15 +368,15 @@ def triple_callback(token, index, binop, term): def format_expression(format, fieldname, value): expression = None - if type(fieldname) in types.StringTypes: + if type(fieldname) in (str,): expression = format.format(fieldname, value) - elif type(fieldname) is types.ListType: + elif type(fieldname) is list: subexpressions = [] for fieldname in fieldname: subexpressions.append(format.format(fieldname, value)) expression = ' or '.join(subexpressions) # surround with parentheses - expression = u'({0})'.format(expression) + expression = '({0})'.format(expression) return expression def ifi_convert_class(value): @@ -406,5 +406,5 @@ def should_be_quoted(value): if __name__ == '__main__': - print IFIClaimsParser('{!complexphrase}text:"(aussto* OR eject* OR pusher*) AND (verriegel* OR lock* OR sperr*)"~6').keywords - print IFIClaimsParser('{!complexphrase}text:"parallel* AND schalt*"~6 AND ((ic:F16H006104 OR cpc:F16H006104))').keywords + print(IFIClaimsParser('{!complexphrase}text:"(aussto* OR eject* OR pusher*) AND (verriegel* OR lock* OR sperr*)"~6').keywords) + print(IFIClaimsParser('{!complexphrase}text:"parallel* AND schalt*"~6 AND ((ic:F16H006104 OR cpc:F16H006104))').keywords) diff --git a/patzilla/access/office.py b/patzilla/access/office.py index 43531d75..da08fb70 100644 --- a/patzilla/access/office.py +++ b/patzilla/access/office.py @@ -76,5 +76,5 @@ def jump_office(request): else: return url - return HTTPNotFound(u'Could not locate document "{document_number}" at {office}/{service}.'.format( + return HTTPNotFound('Could not locate document "{document_number}" at {office}/{service}.'.format( document_number=document_number, office=office, service=service)) diff --git a/patzilla/access/sip/client.py b/patzilla/access/sip/client.py index 9eba41bb..ad8b4a69 100644 --- a/patzilla/access/sip/client.py +++ b/patzilla/access/sip/client.py @@ -27,9 +27,9 @@ class SipException(GenericAdapterException): def __init__(self, *args, **kwargs): self.sip_info = '' super(SipException, self).__init__(*args) - if kwargs.has_key('sip_info'): + if 'sip_info' in kwargs: self.sip_info = kwargs['sip_info'] - if kwargs.has_key('sip_response'): + if 'sip_response' in kwargs: self.sip_info = kwargs['sip_response'].get_childvalue('Info') if self.sip_info: self.user_info = self.sip_info @@ -106,7 +106,7 @@ def search(self, expression, options=None): offset = options.offset limit = options.limit - log.info(u"{backend_name}: searching documents, expression='{0}', offset={1}, limit={2}".format( + log.info("{backend_name}: searching documents, expression='{0}', offset={1}, limit={2}".format( expression, offset, limit, **self.__dict__)) if not self.sessionid or self.stale: @@ -116,11 +116,11 @@ def search(self, expression, options=None): try: response = requests.post(self.uri + '/search/new', data={'session': self.sessionid, 'searchtree': expression}) except (ConnectionError, ConnectTimeout) as ex: - log.error(u'SIP search for user "{username}" at "{uri}" failed. Reason: {0} {1}.'.format( + log.error('SIP search for user "{username}" at "{uri}" failed. Reason: {0} {1}.'.format( ex.__class__, ex.message, username=self.username, uri=self.uri)) self.logout() raise SearchException(ex.message, - sip_info=u'Error or timeout while connecting to upstream database. Database might be offline.') + sip_info='Error or timeout while connecting to upstream database. Database might be offline.') # Process search response if response.status_code == 200: @@ -129,7 +129,7 @@ def search(self, expression, options=None): search_response = self._search_parse_xml(response.content) if search_response['success'] == 'false': - raise SearchException(u'Search failed', sip_response=search_response['response']) + raise SearchException('Search failed', sip_response=search_response['response']) if 'ResultSetId' in search_response['data']: @@ -145,7 +145,7 @@ def search(self, expression, options=None): #print "SIP search results:", search_results duration = timeit.default_timer() - starttime - log.info(u'Search succeeded. duration={0}s, search_info={1}'.format(round(duration, 1), search_info)) + log.info('Search succeeded. duration={0}s, search_info={1}'.format(round(duration, 1), search_info)) upstream_response = { 'info': search_info, @@ -159,33 +159,33 @@ def search(self, expression, options=None): duration = round(duration, 1) # TODO: Unify between SIP and IFI CLAIMS - log.info(u'{backend_name}: Search succeeded. duration={duration}s, meta=\n{meta}'.format( + log.info('{backend_name}: Search succeeded. duration={duration}s, meta=\n{meta}'.format( duration=duration, meta=result['meta'].prettify(), **self.__dict__)) if not result['numbers']: - log.warn(u'{backend_name} search from "{user}" for "{expression}" had empty results.'.format( + log.warn('{backend_name} search from "{user}" for "{expression}" had empty results.'.format( user=self.username, expression=expression, **self.__dict__ )) return result else: - message = u'Search failed. Reason: Upstream response lacks valid ResultSetId. content={0}'.format(response.text) - raise SearchException(message, sip_info=u'Search failed. Search response could not be parsed.') + message = 'Search failed. Reason: Upstream response lacks valid ResultSetId. content={0}'.format(response.text) + raise SearchException(message, sip_info='Search failed. Search response could not be parsed.') except Exception as ex: - log.error(u'Search failed. {name}: {message}. expression={expression}, response={response}'.format( + log.error('Search failed. {name}: {message}. expression={expression}, response={response}'.format( name=ex.__class__.__name__, message=ex.message, response=response.text, expression=expression)) raise else: response_status = str(response.status_code) + ' ' + response.reason - message = u'SIP search failed. Reason: response status != 200. status={0}, content={1}'.format( + message = 'SIP search failed. Reason: response status != 200. status={0}, content={1}'.format( response_status, response.text) log.error(message) raise SearchException(message, - sip_info=u'HTTP error "{status}" while searching upstream database'.format(status=response_status)) + sip_info='HTTP error "{status}" while searching upstream database'.format(status=response_status)) def getresults(self, resultid, options): @@ -207,23 +207,23 @@ def getresults(self, resultid, options): raise SearchException(message) duration = timeit.default_timer() - starttime - log.info(u'SIP getresults succeeded. duration={0}s'.format(round(duration, 1))) + log.info('SIP getresults succeeded. duration={0}s'.format(round(duration, 1))) return results except SearchException: raise except Exception as ex: - message = u'SIP getresults failed. Unknown exception. Reason: {0} {1}'.format( + message = 'SIP getresults failed. Unknown exception. Reason: {0} {1}'.format( ex.__class__, ex.message) - logmessage = u'{}. response={}'.format(message, response.text) + logmessage = '{}. response={}'.format(message, response.text) log.error(logmessage) raise SearchException(message) else: - message = u'SIP getresults failed. status_code={0}'.format( + message = 'SIP getresults failed. status_code={0}'.format( str(response.status_code) + ' ' + response.reason) - logmessage = u'{}. response={}'.format(message, response.text) + logmessage = '{}. response={}'.format(message, response.text) log.error(logmessage) raise SearchException(message) @@ -243,8 +243,8 @@ def _login_parse_xml(self, xml): 'this happens regularly on Wednesday evenings at 17:00 hours UTC (19:00 hours CEST)
' \ 'and usually does not take longer than one hour.' - if error.sip_info == u'i': - error.sip_info = u'Login failed' + if error.sip_info == 'i': + error.sip_info = 'Login failed' raise error def _search_parse_xml(self, xml): diff --git a/patzilla/access/sip/clientpool.py b/patzilla/access/sip/clientpool.py index f28a8c3c..78ef162e 100644 --- a/patzilla/access/sip/clientpool.py +++ b/patzilla/access/sip/clientpool.py @@ -6,6 +6,7 @@ from pyramid.httpexceptions import HTTPUnauthorized from zope.interface.declarations import implements from zope.interface.interface import Interface +from zope.interface import implementer from patzilla.access.generic.credentials import AbstractCredentialsGetter, DatasourceCredentialsManager from patzilla.access.sip.client import SipClient @@ -85,7 +86,7 @@ class SipClientPool(object): SIP client pool as Pyramid utility implementation. """ - implements(ISipClientPool) +# py27 implements(ISipClientPool) def __init__(self, api_uri): logger.info("Creating upstream client pool for SIP") @@ -103,3 +104,4 @@ def get(self, identifier, credentials=None, debug=False): uri=self.api_uri, username=credentials['api_username'], password=credentials['api_password']) return self.clients.get(identifier) + diff --git a/patzilla/access/sip/concordance.py b/patzilla/access/sip/concordance.py index 210371ab..ac9f44f8 100644 --- a/patzilla/access/sip/concordance.py +++ b/patzilla/access/sip/concordance.py @@ -202,7 +202,7 @@ def decode_row(row): try: stream = DictReader(csvfile) - print stream.fieldnames + print(stream.fieldnames) except Exception as ex: log.error('SIP CPC class map: Reading CSV file {} failed: {}'.format(filename, ex.message)) return @@ -225,7 +225,7 @@ def decode_row(row): return ws = wb.active - print 'XLSX row 1:', [cell.value for cell in ws.rows[0]] + print('XLSX row 1:', [cell.value for cell in ws.rows[0]]) stream = ws.rows[1:20] #sys.exit(1) diff --git a/patzilla/access/sip/expression.py b/patzilla/access/sip/expression.py index b5254f38..cc0e1583 100644 --- a/patzilla/access/sip/expression.py +++ b/patzilla/access/sip/expression.py @@ -49,16 +49,16 @@ class SipExpression(object): } sip_xml_expression_templates = { - 'patentnumber': u'{value}', - 'fulltext': u'{value}', + 'patentnumber': '{value}', + 'fulltext': '{value}', #'applicant': u'{value}', #'inventor': u'{value}', - 'applicant': u'{value}', - 'inventor': u'{value}', + 'applicant': '{value}', + 'inventor': '{value}', 'pubdate': { - 'both': u'', - 'startdate': u'', - 'enddate': u'', + 'both': '', + 'startdate': '', + 'enddate': '', } } @@ -83,11 +83,11 @@ def pair_to_sip_xml(cls, key, value, modifiers): # {u'fulltext': {u'claim': True, u'abstract': True, u'description': True, u'title': True} # -> # {u'fulltext': {u'claim': 'true', u'abstract': 'true', u'description': 'true', u'title': 'true'} - for modifier_field, modifier_values in modifiers.iteritems(): - if type(modifiers[modifier_field]) is types.DictionaryType: - for modifier_name, modifier_value in modifiers[modifier_field].iteritems(): + for modifier_field, modifier_values in modifiers.items(): + if type(modifiers[modifier_field]) is dict: + for modifier_name, modifier_value in modifiers[modifier_field].items(): modifiers[modifier_field][modifier_name] = str(modifier_value).lower() - elif type(modifiers[modifier_field]) is types.BooleanType: + elif type(modifiers[modifier_field]) is bool: modifiers[modifier_field] = str(modifiers[modifier_field]).lower() xml_part = None @@ -99,7 +99,7 @@ def pair_to_sip_xml(cls, key, value, modifiers): if len(value) == 4 and value.isdigit(): # e.g. 1978 - value = u'within {year}-01-01,{year}-12-31'.format(year=value) + value = 'within {year}-01-01,{year}-12-31'.format(year=value) # e.g. 1990-2014, 1990 - 2014 value = year_range_to_within(value) @@ -198,13 +198,13 @@ def pair_to_sip_xml(cls, key, value, modifiers): #print pretty_print(xml_part) except FulltextDecodingError as ex: - return {'error': True, 'message': unicode(ex)} + return {'error': True, 'message': str(ex)} except pyparsing.ParseException as ex: - return {'error': True, 'message': u'
' + ex.explanation + '
'} + return {'error': True, 'message': '
' + ex.explanation + '
'} except SyntaxError as ex: - return {'error': True, 'message': u'
' + unicode(ex) + '
'} + return {'error': True, 'message': '
' + str(ex) + '
'} elif key in cls.sip_xml_expression_templates: template = cls.sip_xml_expression_templates[key] @@ -232,7 +232,7 @@ def pair_to_sip_xml(cls, key, value, modifiers): def compute_modifiers(cls, modifiers): # prefer defaults (all True), but mixin modifiers from query - for modifier_field, modifier_values in cls.modifier_defaults.iteritems(): + for modifier_field, modifier_values in cls.modifier_defaults.items(): if modifier_field in cls.modifier_defaults: backup = deepcopy(modifiers.get(modifier_field, {})) modifiers[modifier_field] = cls.modifier_defaults[modifier_field] @@ -313,8 +313,8 @@ def to_etree(self, expression): result = self.parser._parser(expression, parseAll=True) except pyparsing.ParseException as ex: - ex.explanation = u'%s\n%s\n%s' % (expression, u' ' * ex.loc + u'^\n', ex) - logger.error(u'\n%s', ex.explanation) + ex.explanation = '%s\n%s\n%s' % (expression, ' ' * ex.loc + '^\n', ex) + logger.error('\n%s', ex.explanation) raise #print 'result:', result, type(result), dir(result) @@ -487,16 +487,16 @@ def parse(self): def eexists(element, name): return element.find(name) is not None child_constraints =\ - all(map(lambda x: eexists(root, x), ['index', 'binop'])) and \ - any(map(lambda x: eexists(root, x), ['value', 'quotes'])) + all([eexists(root, x) for x in ['index', 'binop']]) and \ + any([eexists(root, x) for x in ['value', 'quotes']]) if root.tag == 'parenthesis' and child_constraints: root.tag = 'term' # also rewrite all other parenthesis looking like terms for parens in root.iter('parenthesis'): child_constraints =\ - all(map(lambda x: eexists(parens, x), ['index', 'binop'])) and\ - any(map(lambda x: eexists(parens, x), ['value', 'quotes', 'or', 'and', 'not'])) + all([eexists(parens, x) for x in ['index', 'binop']]) and\ + any([eexists(parens, x) for x in ['value', 'quotes', 'or', 'and', 'not']]) if child_constraints: parens.tag = 'term' @@ -522,7 +522,7 @@ def eexists(element, name): elif boolean_content: value = self.convert_boolean_nodes(term) - value = value.replace(u'and not', u'not') + value = value.replace('and not', 'not') # 2. expand triple @@ -600,7 +600,7 @@ def convert_elements(self, root, element, tags): # skip elements without a valid representation on this level, e.g. "(ab=fahrzeug or ab=pkw)" if not value: return root - value = value.replace(u'and not', u'not') + value = value.replace('and not', 'not') elif tag in ['near', 'span']: value = self.convert_proximity_nodes(element_nested) @@ -628,13 +628,13 @@ def _get_index_binop(self, element): if index_node is not None: index = index_node.text else: - index = u'bi' + index = 'bi' # 2. binop if binop_node is not None: binop = binop_node.text else: - binop = u'=' + binop = '=' return index, binop @@ -667,14 +667,14 @@ def convert_proximity_nodes(self, container): # fall back to using already translated "text" nodes if value: - expression = map(lambda x: x.text, value) - map(lambda x: self.keyword_add(x), expression) + expression = [x.text for x in value] + list(map(lambda x: self.keyword_add(x), expression)) elif text: - expression = map(lambda x: '({0})'.format(x.text), text) + expression = ['({0})'.format(x.text) for x in text] - expression = u' '.join(expression) + expression = ' '.join(expression) distance = distance[0].text - value = u'{operator}({expression}, {distance})'.format(operator=container.tag, expression=expression, distance=distance) + value = '{operator}({expression}, {distance})'.format(operator=container.tag, expression=expression, distance=distance) return value def convert_boolean_nodes(self, node): @@ -693,7 +693,7 @@ def convert_boolean_nodes(self, node): elif element.tag == 'parenthesis': result = self.convert_boolean_nodes(element) if result: - result = u'(' + result + u')' + result = '(' + result + ')' child_values.append(result) elif element.tag in ['near', 'span']: @@ -706,9 +706,9 @@ def convert_boolean_nodes(self, node): pass if len(child_values) == 1 and node.tag == 'not': - child_values = [u'not ' + child_values[0]] + child_values = ['not ' + child_values[0]] - return u' {0} '.format(node.tag).join(child_values) + return ' {0} '.format(node.tag).join(child_values) def decode_quoted_value(self, element): """ @@ -731,15 +731,15 @@ def decode_quoted_value(self, element): value = element.text elif element.tag == 'quotes': - values = map(lambda x: x.text, element.iter('value')) - value = u'"{0}"'.format(u' '.join(values)) + values = [x.text for x in element.iter('value')] + value = '"{0}"'.format(' '.join(values)) return value def expand_fulltext(self, value, origin=None, modifiers=None): triple = value - origin = origin or u'{0}{1}{2}'.format(*triple) + origin = origin or '{0}{1}{2}'.format(*triple) ft_field, ft_op, ft_value = triple @@ -753,15 +753,15 @@ def expand_fulltext(self, value, origin=None, modifiers=None): try: ft_modifier = SipExpression.fulltext_field_modifier_map[ft_field] except KeyError: - message = u'SIP expression "{0}" contains unknown index "{1}".'.format(origin, ft_field) + message = 'SIP expression "{0}" contains unknown index "{1}".'.format(origin, ft_field) logger.warn(message) raise FulltextDecodingError(message) ft_modifiers = SipExpression.fulltext_modifiers_off.copy() - if type(ft_modifier) in types.StringTypes: + if type(ft_modifier) in (str,): ft_modifiers.update({ft_modifier: 'true'}) - elif type(ft_modifier) is types.ListType: + elif type(ft_modifier) is list: for ft_mod_item in ft_modifier: ft_modifiers.update({ft_mod_item: 'true'}) @@ -776,10 +776,10 @@ def strip_accents(s): #return ''.join((c for c in unicodedata.normalize('NFD', unicode(s)) if unicodedata.category(c) != 'Mn')) result = [] for char in s: - if char.lower() in u'äöüß': + if char.lower() in 'äöüß': result.append(char) else: - char_decomposed = unicodedata.normalize('NFD', unicode(char)) + char_decomposed = unicodedata.normalize('NFD', str(char)) for cd in char_decomposed: if unicodedata.category(cd) != 'Mn': result.append(cd) diff --git a/patzilla/access/sip/pyramid_service.py b/patzilla/access/sip/pyramid_service.py index 51fb44b5..b2c34a14 100644 --- a/patzilla/access/sip/pyramid_service.py +++ b/patzilla/access/sip/pyramid_service.py @@ -94,8 +94,8 @@ def sip_published_data_search_handler(request): return ex.data except OperationFailure as ex: - message = unicode(ex) - message = re.sub(u'namespace: .*', u'', message) + message = str(ex) + message = re.sub('namespace: .*', '', message) request.errors.add('sip-search', 'internals', message) log.error(request.errors) @@ -126,7 +126,7 @@ def sip_published_data_crawl_handler(request): if hasattr(ex, 'user_info'): message = ex.user_info else: - message = unicode(ex) + message = str(ex) request.errors.add('sip-crawl', 'crawl', message) log.error(request.errors) - log.error(u'query="{0}", exception:\n{1}'.format(query, _exception_traceback())) + log.error('query="{0}", exception:\n{1}'.format(query, _exception_traceback())) diff --git a/patzilla/navigator/export.py b/patzilla/navigator/export.py index 8328001e..88909d79 100644 --- a/patzilla/navigator/export.py +++ b/patzilla/navigator/export.py @@ -34,7 +34,7 @@ class Dossier(object): - summary_template = dedent(u""" + summary_template = dedent(""" Summary The research about »{project_name}« @@ -61,7 +61,7 @@ def make_metadata(self): self.metadata = ReportMetadata() - self.metadata.set('producer', u'IP Navigator') + self.metadata.set('producer', 'IP Navigator') # Project metadata self.metadata.set('project_name', self.data.project.name) @@ -120,7 +120,7 @@ def prepare_dataframes(self): # Queries - queries = map(self.query_criteria_smoother, self.data.get('queries', [])) + queries = list(map(self.query_criteria_smoother, self.data.get('queries', []))) self.df_queries = pandas.DataFrame(queries, columns=['criteria', 'query_expression', 'result_count', 'datasource', 'created']) self.df_queries.rename(columns={'query_expression': 'expression', 'result_count': 'hits', 'created': 'timestamp'}, inplace=True) @@ -155,10 +155,10 @@ def get_summary(self): def get_metadata(self): return self.format_with_metadata( - u'Author: {author_name} <{author_email}>\n' - u'Created: {project_created}\n' - u'Updated: {project_modified}\n' - u'Producer: {producer}') + 'Author: {author_name} <{author_email}>\n' + 'Created: {project_created}\n' + 'Updated: {project_modified}\n' + 'Producer: {producer}') @staticmethod def to_csv(dataframe): @@ -203,7 +203,7 @@ def to_zip(self, request=None, options=None): with ZipFile(buffer, 'w', ZIP_DEFLATED) as zipfile: # FIXME: Add TERMS (liability waiver) and more... - zipfile.writestr('@readme.txt', u'Zip archive created by IP Navigator.') + zipfile.writestr('@readme.txt', 'Zip archive created by IP Navigator.') # Add text summary zipfile.writestr('@metadata.txt', self.get_metadata().encode('utf-8')) @@ -224,8 +224,8 @@ def to_zip(self, request=None, options=None): try: zipfile.writestr('report/@dossier.pdf', DossierXlsx(self.data).to_pdf(payload=workbook_payload)) except Exception as ex: - log.error(u'Rendering dossier to PDF failed. ' \ - u'Exception: {ex}\n{trace}'.format(ex=ex, trace=exception_traceback())) + log.error('Rendering dossier to PDF failed. ' \ + 'Exception: {ex}\n{trace}'.format(ex=ex, trace=exception_traceback())) # Add CSV if options.report.csv: @@ -263,7 +263,7 @@ def to_zip(self, request=None, options=None): if not document or not document.strip(): continue - log.info(u'Data acquisition for document {document}'.format(document=document)) + log.info('Data acquisition for document {document}'.format(document=document)) status.setdefault(document, OrderedDict()) patent = decode_patent_number(document) @@ -272,7 +272,7 @@ def to_zip(self, request=None, options=None): if options.media.biblio: try: biblio_payload = get_ops_biblio_data('publication', document, xml=True) - zipfile.writestr(u'media/xml/{document}.biblio.xml'.format(document=document), biblio_payload) + zipfile.writestr('media/xml/{document}.biblio.xml'.format(document=document), biblio_payload) status[document]['biblio'] = True except Exception as ex: @@ -290,14 +290,14 @@ def to_zip(self, request=None, options=None): # Write XML document_number = encode_epodoc_number(patent) description_payload = ops_description(document_number, xml=True) - zipfile.writestr(u'media/xml/{document}.description.xml'.format(document=document), description_payload) + zipfile.writestr('media/xml/{document}.description.xml'.format(document=document), description_payload) status[document]['description'] = True # Write TEXT with ignored(): text_payload = self.get_fulltext(description_payload, 'description') if text_payload: - zipfile.writestr(u'media/txt/{document}.description.txt'.format(document=document), text_payload.encode('utf-8')) + zipfile.writestr('media/txt/{document}.description.txt'.format(document=document), text_payload.encode('utf-8')) except Exception as ex: self.handle_exception(ex, 'description', document) @@ -313,14 +313,14 @@ def to_zip(self, request=None, options=None): # Write XML document_number = encode_epodoc_number(patent) claims_payload = ops_claims(document_number, xml=True) - zipfile.writestr(u'media/xml/{document}.claims.xml'.format(document=document), claims_payload) + zipfile.writestr('media/xml/{document}.claims.xml'.format(document=document), claims_payload) status[document]['claims'] = True # Write TEXT with ignored(): text_payload = self.get_fulltext(claims_payload.replace('', '

').replace('', '

'), 'claims') if text_payload: - zipfile.writestr(u'media/txt/{document}.claims.txt'.format(document=document), text_payload.encode('utf-8')) + zipfile.writestr('media/txt/{document}.claims.txt'.format(document=document), text_payload.encode('utf-8')) except Exception as ex: self.handle_exception(ex, 'claims', document) @@ -332,7 +332,7 @@ def to_zip(self, request=None, options=None): try: register_payload = ops_register('publication', document, xml=True) - zipfile.writestr(u'media/xml/{document}.register.xml'.format(document=document), register_payload) + zipfile.writestr('media/xml/{document}.register.xml'.format(document=document), register_payload) status[document]['register'] = True except Exception as ex: @@ -346,7 +346,7 @@ def to_zip(self, request=None, options=None): try: document_number = encode_epodoc_number(patent, options={'nokind': True}) family_payload = ops_family_inpadoc('publication', document_number, 'biblio', xml=True) - zipfile.writestr(u'media/xml/{document}.family.xml'.format(document=document), family_payload) + zipfile.writestr('media/xml/{document}.family.xml'.format(document=document), family_payload) status[document]['family'] = True except Exception as ex: @@ -368,20 +368,20 @@ def to_zip(self, request=None, options=None): delivered_items = [] missing_items = [] - for document, kinds in status.iteritems(): + for document, kinds in status.items(): delivered = [] missing = [] - for kind, ok in kinds.iteritems(): + for kind, ok in kinds.items(): if ok: delivered.append(kind) else: missing.append(kind) if delivered: - item = u'{document:20}{delivered}'.format(document=document, delivered=u', '.join(delivered)) + item = '{document:20}{delivered}'.format(document=document, delivered=', '.join(delivered)) delivered_items.append(item) if missing: - item = u'{document:20}{missing}'.format(document=document, missing=u', '.join(missing)) + item = '{document:20}{missing}'.format(document=document, missing=', '.join(missing)) missing_items.append(item) if delivered_items or missing_items: @@ -409,13 +409,13 @@ def to_zip(self, request=None, options=None): def handle_exception(self, ex, service_name, document): if isinstance(ex, (_JSONError, HTTPError)) and hasattr(ex, 'status_int') and ex.status_int == 404: - log.warning(u'XML({service_name}, {document}) not found'.format(service_name=service_name, document=document)) + log.warning('XML({service_name}, {document}) not found'.format(service_name=service_name, document=document)) # Signal exception has been handled (ignored) return True else: - log.warning(u'XML({service_name}, {document}) failed. ' \ - u'Exception:\n{trace}'.format(service_name=service_name, document=document, trace=exception_traceback())) + log.warning('XML({service_name}, {document}) failed. ' \ + 'Exception:\n{trace}'.format(service_name=service_name, document=document, trace=exception_traceback())) # Signal exception should be re-raised, maybe return False @@ -464,7 +464,7 @@ def default(self, o): return JSONEncoder.default(self, o) """ - if isinstance(o, (numpy.bool_,)): + if isinstance(o, numpy.bool_): return bool(o) raise TypeError(repr(o) + " is not JSON serializable") @@ -512,9 +512,9 @@ def create(self): def set_header_footer(self, worksheet): # http://xlsxwriter.readthedocs.io/example_headers_footers.html - header = u'&LIP Navigator&RSearch report' + header = '&LIP Navigator&RSearch report' worksheet.set_header(header) - footer = u'&L&L&D &T&C&A&RPage &P of &N' + footer = '&L&L&D &T&C&A&RPage &P of &N' worksheet.set_footer(footer) def write_cover_sheet(self): @@ -529,7 +529,7 @@ def write_cover_sheet(self): cover_sheet = self.workbook.add_worksheet('cover') self.set_header_footer(cover_sheet) - title = u'Dossier »{name}«'.format(name=self.data.project.name) + title = 'Dossier »{name}«'.format(name=self.data.project.name) title_format = self.workbook.add_format({'align': 'center', 'valign': 'vcenter', 'font_size': 17, 'bold': True}) cover_sheet.merge_range('A1:I2', title, title_format) @@ -545,7 +545,7 @@ def write_cover_sheet(self): footnote_format = self.workbook.add_format({'font_size': 9}) - footnote = dedent(u""" + footnote = dedent(""" Please have a look at the other worksheets in this workbook for more detailed information about all queries, comments and document numbers @@ -554,7 +554,7 @@ def write_cover_sheet(self): summary = self.generate_with_metadata(self.summary_template, emphasis=blue) - args = list(summary) + ['\n'] + [footnote_format, u'\n\n' + footnote] + args = list(summary) + ['\n'] + [footnote_format, '\n\n' + footnote] args.append(cell_format) cover_sheet.write_rich_string('B10', *args) @@ -571,7 +571,7 @@ def write_numberlist_sheets(self): sheets['rated'] = self.data.get('collections', {}).get('rated') sheets['dismissed'] = self.data.get('collections', {}).get('dismissed') sheets['seen'] = self.data.get('collections', {}).get('seen') - for sheet_name, entries in sheets.iteritems(): + for sheet_name, entries in sheets.items(): #print 'entries:'; pprint(entries) @@ -581,10 +581,10 @@ def write_numberlist_sheets(self): first = {} # Create pandas DataFrame - if type(first) in types.StringTypes: + if type(first) in (str,): df = pandas.DataFrame(entries, columns=['PN']) - elif isinstance(first, (types.DictionaryType, Bunch)): + elif isinstance(first, (dict, Bunch)): df = pandas.DataFrame(entries, columns=['number', 'score', 'timestamp', 'url']) df.rename(columns={'number': 'document', 'url': 'display'}, inplace=True) @@ -750,7 +750,7 @@ def set(self, key, value): # https://stackoverflow.com/questions/17215400/python-format-string-unused-named-arguments/17215533#17215533 def __missing__(self, key): - return u'n/a' + return 'n/a' # Machinery for monkeypatching XlsxWriter's Worksheet's ``write_url`` method @@ -763,7 +763,7 @@ def write_url_deduce_title(self, row, col, url, cell_format=None, string=None, t if string is None: string = os.path.basename(url) if tip is None: - tip = u'Open "{name}" in Patent Navigator'.format(name=string) + tip = 'Open "{name}" in Patent Navigator'.format(name=string) return self.write_url_dist(row, col, url, cell_format=cell_format, string=string, tip=tip) def workbook_add_sheet_hook(self, name=None): diff --git a/patzilla/navigator/services/__init__.py b/patzilla/navigator/services/__init__.py index be31f855..4a52ae3d 100644 --- a/patzilla/navigator/services/__init__.py +++ b/patzilla/navigator/services/__init__.py @@ -35,12 +35,12 @@ def handle_generic_exception(request, ex, backend_name, query): module_name = ex.__class__.__module__ class_name = ex.__class__.__name__ - reason = u'{}.{}: {}'.format(module_name, class_name, ex.message) + reason = '{}.{}: {}'.format(module_name, class_name, ex.message) - logger.critical(u'{backend_name} error: query="{query}", reason={reason}\nresponse:\n{http_response}\nexception:\n{exception}'.format( + logger.critical('{backend_name} error: query="{query}", reason={reason}\nresponse:\n{http_response}\nexception:\n{exception}'.format( exception=_exception_traceback(), **locals())) - message = u'An exception occurred while processing your query.
\nReason: {}

\n'.format(reason) + message = 'An exception occurred while processing your query.
\nReason: {}

\n'.format(reason) if module_name == 'pymongo.errors': message += 'Error connecting to cache database. Please report this problem to us.' diff --git a/patzilla/navigator/services/admin.py b/patzilla/navigator/services/admin.py index 28aef93b..62db0094 100644 --- a/patzilla/navigator/services/admin.py +++ b/patzilla/navigator/services/admin.py @@ -30,7 +30,7 @@ def admin_users_emails_handler(request): continue user_emails.append(user.username.lower()) - payload = u'\n'.join(user_emails) + payload = '\n'.join(user_emails) return Response(payload, content_type='text/plain', charset='utf-8') diff --git a/patzilla/navigator/services/analytics.py b/patzilla/navigator/services/analytics.py index 3bce27ce..c73923da 100644 --- a/patzilla/navigator/services/analytics.py +++ b/patzilla/navigator/services/analytics.py @@ -3,7 +3,7 @@ import logging import datetime import operator -import HTMLParser +import html.parser from arrow.arrow import Arrow from cornice.service import Service from dateutil.relativedelta import relativedelta @@ -63,7 +63,7 @@ def _decode_expression_from_query(request): # decode query parameters into datasource and criteria decoded = {} params = dict(request.params) - if params.has_key('datasource'): + if 'datasource' in params: decoded['datasource'] = params['datasource'].lower() del params['datasource'] decoded.update({'criteria': params}) @@ -96,7 +96,7 @@ def __init__(self, datasource, criteria, kind): if self.kind == self.OLDEST: - self.date_from = Arrow.fromdatetime(datetime.datetime(1800, 01, 01)) + self.date_from = Arrow.fromdatetime(datetime.datetime(1800, 0o1, 0o1)) self.date_to = Arrow.fromdatetime(datetime.datetime(1899, 12, 31)) self.factor = +1 @@ -106,7 +106,7 @@ def __init__(self, datasource, criteria, kind): self.machine.add_transition('step', 'right', 'whole', unless='is_ready', after=['range_shrink']) elif self.kind == self.NEWEST: - self.date_from = Arrow.fromdatetime(datetime.datetime(2000, 01, 01)) + self.date_from = Arrow.fromdatetime(datetime.datetime(2000, 0o1, 0o1)) self.date_to = Arrow.utcnow() self.date_to += relativedelta(months=12-self.date_to.month, days=31-self.date_to.day) self.factor = -1 @@ -124,7 +124,7 @@ def __init__(self, datasource, criteria, kind): def runquery(self): criteria = self.criteria.copy() - criteria['pubdate'] = u'within {date_from},{date_to}'.format( + criteria['pubdate'] = 'within {date_from},{date_to}'.format( date_from=self.date_from.format('YYYY-MM-DD'), date_to=self.date_to.format('YYYY-MM-DD')) query = make_expression_filter({ @@ -199,10 +199,10 @@ def work(self): debug = False while True: if debug: - print '-' * 42 - print 'state:', self.state - print 'delta:', self.delta - print 'querycount:', self.querycount + print('-' * 42) + print('state:', self.state) + print('delta:', self.delta) + print('querycount:', self.querycount) if self.state == 'finished' or self.querycount > 15: break self.step() @@ -294,7 +294,7 @@ def analytics_applicants_distinct_handler(request): #print 'results:', results applicants = {} - htmlparser = HTMLParser.HTMLParser() + htmlparser = html.parser.HTMLParser() for item in results['details']: applicant = item.get('applicant') if applicant: diff --git a/patzilla/navigator/services/depatech.py b/patzilla/navigator/services/depatech.py index f852a81d..19a4e3db 100644 --- a/patzilla/navigator/services/depatech.py +++ b/patzilla/navigator/services/depatech.py @@ -103,7 +103,7 @@ def depatech_published_data_search_handler(request): log.warn(request.errors) except SyntaxError as ex: - request.errors.add('depatech-search', 'expression', unicode(ex.msg)) + request.errors.add('depatech-search', 'expression', str(ex.msg)) log.warn(request.errors) except SearchException as ex: @@ -117,7 +117,7 @@ def depatech_published_data_search_handler(request): return ex.data except OperationFailure as ex: - message = unicode(ex) + message = str(ex) request.errors.add('depatech-search', 'internals', message) log.error(request.errors) @@ -151,6 +151,6 @@ def depatech_published_data_crawl_handler(request): return result except Exception as ex: - request.errors.add('depatech-crawl', 'crawl', unicode(ex)) + request.errors.add('depatech-crawl', 'crawl', str(ex)) log.error(request.errors) - log.error(u'query="{0}", exception:\n{1}'.format(query, _exception_traceback())) + log.error('query="{0}", exception:\n{1}'.format(query, _exception_traceback())) diff --git a/patzilla/navigator/services/dpma.py b/patzilla/navigator/services/dpma.py index a83c99bb..9bc363e0 100644 --- a/patzilla/navigator/services/dpma.py +++ b/patzilla/navigator/services/dpma.py @@ -94,7 +94,7 @@ def prepare_search(request): expression = expression.replace('ikofax:', '') syntax = 'ikofax' - log.info(u'DEPATISnet query: {}, syntax: {}'.format(expression, syntax)) + log.info('DEPATISnet query: {}, syntax: {}'.format(expression, syntax)) # Compute query options, like # - limit @@ -112,7 +112,7 @@ def prepare_search(request): elif syntax == 'ikofax': search = ikofax_prepare_query(expression) else: - request.errors.add('depatisnet-search', 'expression', u'Unknown syntax {}'.format(syntax)) + request.errors.add('depatisnet-search', 'expression', 'Unknown syntax {}'.format(syntax)) # Propagate keywords to highlighting component keywords_to_response(request, search=search) @@ -165,10 +165,10 @@ def depatisnet_published_data_crawl_handler(request): http_response = None if hasattr(ex, 'http_response'): http_response = ex.http_response - log.error(u'DEPATISnet crawler error: query="{0}", reason={1}\nresponse:\n{2}\nexception:\n{3}'.format( + log.error('DEPATISnet crawler error: query="{0}", reason={1}\nresponse:\n{2}\nexception:\n{3}'.format( query, ex, http_response, _exception_traceback())) - message = u'An exception occurred while processing your query
Reason: {}'.format(ex) + message = 'An exception occurred while processing your query
Reason: {}'.format(ex) request.errors.add('depatisnet-search', 'crawl', message) diff --git a/patzilla/navigator/services/ificlaims.py b/patzilla/navigator/services/ificlaims.py index 2897a109..f347defe 100644 --- a/patzilla/navigator/services/ificlaims.py +++ b/patzilla/navigator/services/ificlaims.py @@ -71,7 +71,7 @@ def ificlaims_download_handler(request): try: response = ificlaims_download(resource, format, options) - except IFIClaimsException, ex: + except IFIClaimsException as ex: if type(ex) is IFIClaimsFormatException: raise HTTPNotFound(ex) else: @@ -102,16 +102,16 @@ def ificlaims_deliver_handler(request): """Deliver resources from IFI CLAIMS Direct in bulk""" kind = request.matchdict['kind'] - formats = map(unicode.strip, request.params.get('formats', u'').lower().split(u',')) - numberlist = filter(lambda item: bool(item), map(unicode.strip, re.split('[\n,]', request.params.get('numberlist', u'')))) + formats = list(map(str.strip, request.params.get('formats', '').lower().split(','))) + numberlist = [item for item in map(str.strip, re.split('[\n,]', request.params.get('numberlist', ''))) if bool(item)] if kind == 'zip': multi = ificlaims_download_multi(numberlist, formats) #for entry in multi['results']: # print 'entry:', entry - print 'report:' - print json.dumps(multi['report'], indent=4) + print('report:') + print(json.dumps(multi['report'], indent=4)) payload = zip_multi(multi) @@ -181,7 +181,7 @@ def ificlaims_published_data_search_handler(request): log.warn(request.errors) except SyntaxError as ex: - request.errors.add('ificlaims-search', 'expression', unicode(ex.msg)) + request.errors.add('ificlaims-search', 'expression', str(ex.msg)) log.warn(request.errors) except SearchException as ex: @@ -195,7 +195,7 @@ def ificlaims_published_data_search_handler(request): return ex.data except OperationFailure as ex: - message = unicode(ex) + message = str(ex) request.errors.add('ificlaims-search', 'internals', message) log.error(request.errors) @@ -229,6 +229,6 @@ def ificlaims_published_data_crawl_handler(request): return result except Exception as ex: - request.errors.add('ificlaims-crawl', 'crawl', unicode(ex)) + request.errors.add('ificlaims-crawl', 'crawl', str(ex)) log.error(request.errors) - log.error(u'query="{0}", exception:\n{1}'.format(query, _exception_traceback())) + log.error('query="{0}", exception:\n{1}'.format(query, _exception_traceback())) diff --git a/patzilla/navigator/services/ops.py b/patzilla/navigator/services/ops.py index 08316486..ef9eaf74 100644 --- a/patzilla/navigator/services/ops.py +++ b/patzilla/navigator/services/ops.py @@ -89,12 +89,12 @@ def ops_published_data_search_handler(request): # CQL query string query = request.params.get('expression', '') - log.info(u'query raw: %s', query) + log.info('query raw: %s', query) # Transcode CQL query expression search = cql_prepare_query(query) - log.info(u'query cql: %s', search.expression) + log.info('query cql: %s', search.expression) # range: x-y, maximum delta is 100, default is 25 range = request.params.get('range') @@ -136,7 +136,7 @@ def ops_published_data_crawl_handler(request): # CQL query string query = request.params.get('expression', '') - log.info(u'query raw: ' + query) + log.info('query raw: ' + query) # Transcode CQL query expression search = cql_prepare_query(query) @@ -144,7 +144,7 @@ def ops_published_data_crawl_handler(request): # Propagate keywords to highlighting component keywords_to_response(request, search=search) - log.info(u'query cql: ' + search.expression) + log.info('query cql: ' + search.expression) chunksize = int(request.params.get('chunksize', '100')) @@ -154,7 +154,7 @@ def ops_published_data_crawl_handler(request): return result except Exception as ex: - log.error(u'OPS crawler error: query="{0}", reason={1}, Exception was:\n{2}'.format(query, ex, _exception_traceback())) + log.error('OPS crawler error: query="{0}", reason={1}, Exception was:\n{2}'.format(query, ex, _exception_traceback())) request.errors.add('ops-published-data-crawl', 'query', str(ex)) diff --git a/patzilla/navigator/services/util.py b/patzilla/navigator/services/util.py index 652a78d0..ccb23017 100644 --- a/patzilla/navigator/services/util.py +++ b/patzilla/navigator/services/util.py @@ -55,9 +55,9 @@ def query_expression_util_handler(request): # TODO: improve error handling data = request.json - log.info(u'[{userid}] Expression data: {data}'.format(userid=request.user.userid, data=data)) + log.info('[{userid}] Expression data: {data}'.format(userid=request.user.userid, data=data)) expression_data = make_expression_filter(data) - log.info(u'[{userid}] Expression query: {expression_data}'.format(userid=request.user.userid, expression_data=expression_data)) + log.info('[{userid}] Expression query: {expression_data}'.format(userid=request.user.userid, expression_data=expression_data)) return expression_data @@ -100,7 +100,7 @@ def make_expression_filter(data): else: # Bring criteria in order: Process "fulltext" first - keys = criteria.keys() + keys = list(criteria.keys()) if 'fulltext' in keys: keys.remove('fulltext') keys.insert(0, 'fulltext') @@ -132,7 +132,7 @@ def make_expression_filter(data): elif datasource == 'sip': expression_part = SipExpression.pair_to_sip_xml(key, value, modifiers) if expression_part: - if expression_part.has_key('keywords'): + if 'keywords' in expression_part: keywords += expression_part['keywords'] else: keywords += keywords_from_boolean_expression(key, value) @@ -147,7 +147,7 @@ def make_expression_filter(data): else: expression_part = IFIClaimsExpression.pair_to_solr(key, value, modifiers) if expression_part: - if expression_part.has_key('keywords'): + if 'keywords' in expression_part: keywords += expression_part['keywords'] else: keywords += keywords_from_boolean_expression(key, value) @@ -157,13 +157,13 @@ def make_expression_filter(data): expression_part = DepaTechExpression.pair_to_elasticsearch(key, value, modifiers) if expression_part: - if expression_part.has_key('keywords'): + if 'keywords' in expression_part: keywords += expression_part['keywords'] else: keywords += keywords_from_boolean_expression(key, value) # Accumulate expression part - error_tpl = u'Criteria "{0}: {1}" has invalid format, datasource={2}.' + error_tpl = 'Criteria "{0}: {1}" has invalid format, datasource={2}.' if not expression_part: message = error_tpl.format(key, value, datasource) log.warn(message) @@ -171,7 +171,7 @@ def make_expression_filter(data): elif 'error' in expression_part: message = error_tpl.format(key, value, datasource) - message += u'
' + expression_part['message'] + message += '
' + expression_part['message'] log.warn(message) request.errors.add('query-expression-utility-service', 'comfort-form', message) @@ -181,12 +181,12 @@ def make_expression_filter(data): expression_parts.append(query) # Accumulate filter part - error_tpl = u'Filter "{0}: {1}" has invalid format, datasource={2}.' + error_tpl = 'Filter "{0}: {1}" has invalid format, datasource={2}.' if filter_part: if 'error' in filter_part: message = error_tpl.format(key, value, datasource) - message += u'
' + filter_part['message'] + message += '
' + filter_part['message'] log.warn(message) request.errors.add('query-expression-utility-service', 'comfort-form', message) @@ -251,8 +251,8 @@ def request_to_options(request, options): options.update({'feature_family_replace': True}) # this is awful, switch to JSON POST - for key, value in request.params.iteritems(): - if key.startswith(u'query_data[sorting]'): + for key, value in request.params.items(): + if key.startswith('query_data[sorting]'): key = key.replace('query_data[sorting]', '').replace('[', '').replace(']', '') options.setdefault('sorting', {}) options['sorting'][key] = value @@ -314,10 +314,10 @@ def export_util_handler(request): payload = dossier.to_zip(request=request, options=data.get('options')) else: - return HTTPBadRequest(u'Export format "{format}" is unknown.'.format(format=output_format)) + return HTTPBadRequest('Export format "{format}" is unknown.'.format(format=output_format)) except Exception as ex: - message = u'Exporting format "{format}" failed.'.format(format=output_format) + message = 'Exporting format "{format}" failed.'.format(format=output_format) log.error('{message}. Exception:\n{trace}'.format(message=message, trace=exception_traceback())) return HTTPServerError(message) diff --git a/patzilla/navigator/settings.py b/patzilla/navigator/settings.py index 7c809e8a..4a1410d6 100644 --- a/patzilla/navigator/settings.py +++ b/patzilla/navigator/settings.py @@ -13,7 +13,11 @@ from patzilla.util.config import read_list, asbool, get_configuration from patzilla.util.date import datetime_isoformat, unixtime_to_datetime from patzilla.util.python import _exception_traceback -from patzilla.util.data.container import SmartBunch + +from patzilla.util.data.munch import Munch, munchify + +#py27 +#from patzilla.util.data.container import Bunch log = logging.getLogger(__name__) @@ -52,7 +56,6 @@ def get_application_settings(self): # FIXME: Maybe do the same what `attach_ops_client` does? # `if '/static' in event.request.url: return`. settings = get_configuration(self.configfile, kind=SmartBunch) - # Add some global settings settings['software_version'] = __version__ @@ -68,8 +71,8 @@ def get_datasource_settings(self, vendor=None): # Container for datasource settings. datasource_settings = SmartBunch({ 'datasources': [], - 'datasource': SmartBunch(), - 'total': SmartBunch.bunchify({'fulltext_countries': [], 'details_countries': []}), + 'datasource': Munch(), + 'total': munchify({'fulltext_countries': [], 'details_countries': []}), }) # Read datasource settings from configuration. @@ -101,9 +104,9 @@ def get_datasource_settings(self, vendor=None): def get_vendor_settings(self): # Container for vendor settings - vendor_settings = SmartBunch({ + vendor_settings = Munch({ 'vendors': [], - 'vendor': SmartBunch(), + 'vendor': Munch(), }) # Read vendor settings from configuration @@ -122,8 +125,8 @@ def get_vendor_settings(self): vendor=vendor, configfile=self.configfile)) vendor_info = self.application_settings.get(settings_key, {}) - for key, value in vendor_info.iteritems(): - vendor_info[key] = value.decode('utf-8') + for key, value in vendor_info.items(): + vendor_info[key] = value if 'hostname_matches' in vendor_info: vendor_info.hostname_matches = read_list(vendor_info.hostname_matches) @@ -146,9 +149,9 @@ def get_email_settings(self, vendor): """ # Container for email settings - email_settings = SmartBunch({ + email_settings = Munch({ 'addressbook': [], - 'content': SmartBunch(), + 'content': Munch(), }) for setting_name in ['addressbook', 'content']: @@ -160,8 +163,8 @@ def get_email_settings(self, vendor): if defaults and specific: thing.update(deepcopy(specific)) - for key, value in thing.items(): - thing[key] = value.decode('utf-8') + for key, value in list(thing.items()): + thing[key] = value email_settings[setting_name] = thing @@ -281,12 +284,12 @@ def theme_settings(self): 'ui.version': software_version_link, 'ui.page.title': vendor.get('page_title', ''), # + '   ' + self.beta_badge, 'ui.page.subtitle': '', - 'ui.page.footer': 'Data sources: ' + u', '.join(data_source_list), + 'ui.page.footer': 'Data sources: ' + ', '.join(data_source_list), } # Transfer all properties having designated prefixes 1:1 prefixes = ['ui.', 'feature.'] - for key, value in vendor.iteritems(): + for key, value in vendor.items(): for prefix in prefixes: if key.startswith(prefix): if key.endswith('.enabled'): @@ -304,10 +307,10 @@ def datasource_settings(self): Return datasource settings while accounting for sensible settings like API URI and credentials. """ request = get_current_request() - datasource_settings = SmartBunch.bunchify(request.registry.datasource_settings) + datasource_settings = munchify(request.registry.datasource_settings) if 'protected_fields' in datasource_settings: for fieldname in datasource_settings.protected_fields: - for name, settings in datasource_settings.datasource.iteritems(): + for name, settings in datasource_settings.datasource.items(): if fieldname in settings: del settings[fieldname] del datasource_settings['protected_fields'] @@ -363,7 +366,7 @@ def config_parameters(self): isviewer = 'patentview' in host or 'viewer' in host or 'patview' in host # 1. don't allow "query" from outside on view-only domains - if request_params.has_key('query') and isviewer: + if 'query' in request_params and isviewer: log.warning('Parameter "query=%s" not allowed on host "%s", purging it', request_params['query'], host) del request_params['query'] @@ -388,7 +391,7 @@ def config_parameters(self): # C. parameter firewall, OUTPUT # remove "opaque parameter" - if params.has_key('op'): + if 'op' in params: del params['op'] @@ -409,7 +412,7 @@ def config_parameters(self): params['datasources_enabled'].append(datasource) # E. backward-compat amendments - for key, value in params.iteritems(): + for key, value in params.items(): if key.startswith('ship_'): newkey = key.replace('ship_', 'ship-') params[newkey] = value diff --git a/patzilla/navigator/tools/browser_database_tool.py b/patzilla/navigator/tools/browser_database_tool.py index 98e4c8f8..ec1bc5ab 100755 --- a/patzilla/navigator/tools/browser_database_tool.py +++ b/patzilla/navigator/tools/browser_database_tool.py @@ -12,17 +12,17 @@ def purge_titles(data): # Purge "title" attributes from BasketEntry objects - for name, entity in data['database'].iteritems(): + for name, entity in data['database'].items(): if name.startswith('BasketEntry'): if 'title' in entity: del entity['title'] if 'number' in entity: - entity['number'] = entity['number'].strip(u'★ ') + entity['number'] = entity['number'].strip('★ ') def purge_numbers_seen(data): # Purge all BasketEntry objects with "seen==true" keys = [] - for name, item in data['database'].iteritems(): + for name, item in data['database'].items(): if name.startswith('BasketEntry/'): if 'seen' in item and item['seen'] == True: keys.append(name) @@ -32,7 +32,7 @@ def purge_numbers_seen(data): def purge_projects(data): # Purge "project" attributes from all "Query/..." objects - for name, item in data['database'].iteritems(): + for name, item in data['database'].items(): if name.startswith('Query/'): if 'project' in item: del item['project'] @@ -51,7 +51,7 @@ def main(): #purge_projects(data) # Save database file - print json.dumps(data, indent=4) + print(json.dumps(data, indent=4)) if __name__ == '__main__': diff --git a/patzilla/navigator/util.py b/patzilla/navigator/util.py index ad77fc2f..df5314a4 100644 --- a/patzilla/navigator/util.py +++ b/patzilla/navigator/util.py @@ -6,7 +6,7 @@ def get_exception_message(ex, add_traceback=False): name = ex.__class__.__name__ - description = '%s: %s' % (name, unicode(ex.message)) + description = '%s: %s' % (name, str(ex.message)) if add_traceback: description += '\n' + get_safe_traceback(ex) return description @@ -24,7 +24,7 @@ def safe_value(value): e.g. CaseInsensitiveDict to dict """ if hasattr(value, 'items') and callable(value.items): - return dict(value.items()) + return dict(list(value.items())) else: return value @@ -35,7 +35,7 @@ def dict_subset(bigdict, *wanted_keys): def dict_prefix_key(d, prefix): # prefix keys in dictionary new = {} - for key, value in d.iteritems(): + for key, value in d.items(): key = prefix + key new[key] = value return new @@ -53,7 +53,7 @@ def dict_merge(dct, merge_dct): :param merge_dct: dct merged into dct :return: None """ - for k, v in merge_dct.iteritems(): + for k, v in merge_dct.items(): if (k in dct and isinstance(dct[k], dict) and isinstance(merge_dct[k], collections.Mapping)): dict_merge(dct[k], merge_dct[k]) diff --git a/patzilla/navigator/views.py b/patzilla/navigator/views.py index 36050c9b..0389bb7e 100644 --- a/patzilla/navigator/views.py +++ b/patzilla/navigator/views.py @@ -137,7 +137,7 @@ def navigator_quick(request): # Compute query expression expression = compute_expression(field, value, value2, parameters=request.params) - print 'quick expression:', expression + print('quick expression:', expression) #return get_redirect_query(request, expression, query_args=query_args) return get_redirect_query(request, expression) @@ -150,7 +150,7 @@ def compute_expression(field, value, value2=None, **kwargs): field = 'pn' if field in ['cl', 'ipc', 'ic', 'cpc', 'cpci', 'cpca']: - value = value.replace(u'-', u'/') + value = value.replace('-', '/') quotable = True if field in ['pa', 'applicant']: @@ -159,38 +159,38 @@ def compute_expression(field, value, value2=None, **kwargs): # apply blacklist blacklist = [ - u'GmbH & Co. KG', - u'GmbH', - u' KG', - u' AG', - u'& Co.', + 'GmbH & Co. KG', + 'GmbH', + ' KG', + ' AG', + '& Co.', ] replacements = { - u' and ': u' ', - u' or ': u' ', - u' not ': u' ', + ' and ': ' ', + ' or ': ' ', + ' not ': ' ', } for black in blacklist: pattern = re.compile(re.escape(black), re.IGNORECASE) - value = pattern.sub(u'', value).strip() - for replacement_key, replacement_value in replacements.iteritems(): + value = pattern.sub('', value).strip() + for replacement_key, replacement_value in replacements.items(): #value = value.replace(replacement_key, replacement_value) pattern = re.compile(replacement_key, re.IGNORECASE) value = pattern.sub(replacement_value, value).strip() # make query expression - parts_raw = re.split(u'[ -]*', value) + parts_raw = re.split('[ -]*', value) umlaut_map = { - u'ä': u'ae', - u'ö': u'oe', - u'ü': u'ue', - u'Ä': u'Ae', - u'Ö': u'Oe', - u'Ü': u'Ue', - u'ß': u'ss', + 'ä': 'ae', + 'ö': 'oe', + 'ü': 'ue', + 'Ä': 'Ae', + 'Ö': 'Oe', + 'Ü': 'Ue', + 'ß': 'ss', } def replace_parts(thing): - for umlaut, replacement in umlaut_map.iteritems(): + for umlaut, replacement in umlaut_map.items(): thing = thing.replace(umlaut, replacement) return thing @@ -198,22 +198,22 @@ def replace_parts(thing): for part in parts_raw: # "Alfred H. Schütte" => Alfred Schütte - if re.match(u'^(\w\.)+$', part): + if re.match('^(\w\.)+$', part): continue part_normalized = replace_parts(part) if part != part_normalized: - part = u'({} or {})'.format(part, part_normalized) + part = '({} or {})'.format(part, part_normalized) parts.append(part) - value = u' and '.join(parts) + value = ' and '.join(parts) #value = u'({})'.format(value) - if quotable and u' ' in value: - value = u'"{0}"'.format(value) + if quotable and ' ' in value: + value = '"{0}"'.format(value) - query = u'{field}={value}'.format(**locals()) + query = '{field}={value}'.format(**locals()) if field in ['pd', 'publicationdate']: if 'W' in value: diff --git a/patzilla/util/config/__init__.py b/patzilla/util/config/__init__.py index 0c94ff3a..e332bccb 100644 --- a/patzilla/util/config/__init__.py +++ b/patzilla/util/config/__init__.py @@ -4,7 +4,7 @@ import logging import sys from glob import glob -from ConfigParser import ConfigParser +from configparser import ConfigParser logger = logging.getLogger(__name__) @@ -29,7 +29,7 @@ def get_configuration(*args, **kwargs): logger.info('Effective configuration files: {}'.format(make_list(used))) return config else: - msg = u'Could not read settings from configuration files: {}'.format(config_files) + msg = 'Could not read settings from configuration files: {}'.format(config_files) logger.critical(msg) raise ValueError(msg) @@ -76,22 +76,22 @@ def asbool(s): s = str(s).strip() return s.lower() in truthy -def read_list(string, separator=u','): +def read_list(string, separator=','): if string is None: return [] elif isinstance(string, list): return string - result = map(unicode.strip, string.split(separator)) + result = list(map(str.strip, string.split(separator))) if len(result) == 1 and not result[0]: result = [] return result -def make_list(items, separator=u', '): +def make_list(items, separator=', '): return separator.join(items) def normalize_docopt_options(options): normalized = {} - for key, value in options.items(): + for key, value in list(options.items()): key = key.strip('--<>') normalized[key] = value return normalized diff --git a/patzilla/util/cql/cheshire3/__init__.py b/patzilla/util/cql/cheshire3/__init__.py index 35e47286..aad5781f 100644 --- a/patzilla/util/cql/cheshire3/__init__.py +++ b/patzilla/util/cql/cheshire3/__init__.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # (c) 2014 Andreas Motl, Elmyra UG -import parser as cheshire3_parser -from parser import SearchClause, CQLParser, Diagnostic +from . import parser as cheshire3_parser +from .parser import SearchClause, CQLParser, Diagnostic from patzilla.util.numbers.normalize import normalize_patent @@ -14,7 +14,7 @@ class SmartSearchClause(SearchClause): def toCQL(self): text = [] - for p in self.prefixes.keys(): + for p in list(self.prefixes.keys()): if (p != ''): text.append('>%s="%s"' % (p, self.prefixes[p])) else: diff --git a/patzilla/util/cql/cheshire3/parser.py b/patzilla/util/cql/cheshire3/parser.py index 15504717..b5db8763 100644 --- a/patzilla/util/cql/cheshire3/parser.py +++ b/patzilla/util/cql/cheshire3/parser.py @@ -19,8 +19,8 @@ from shlex import shlex from xml.sax.saxutils import escape -from StringIO import StringIO -from __builtin__ import isinstance +from io import StringIO +from builtins import isinstance serverChoiceRelation = "=" serverChoiceIndex = "cql.serverchoice" @@ -75,7 +75,7 @@ def __init__(self): def toXCQL(self, depth=0): space = " " * depth xml = ['{s}\n'] - for p in self.prefixes.keys(): + for p in list(self.prefixes.keys()): xml.extend(["{s} \n", "{s} {name}\n", "{s} {ident}\n", @@ -221,7 +221,7 @@ def toCQL(self): txt = [] if (self.prefixes): ptxt = [] - for p in self.prefixes.keys(): + for p in list(self.prefixes.keys()): if p != '': ptxt.append('>%s="%s"' % (p, self.prefixes[p])) else: @@ -236,7 +236,7 @@ def toCQL(self): txt.append("sortBy") for sk in self.sortKeys: txt.append(sk.toCQL()) - return u"({0})".format(u" ".join(txt)) + return "({0})".format(" ".join(txt)) def getResultSetId(self, top=None): if ( @@ -315,7 +315,7 @@ def toXCQL(self, depth=0): def toCQL(self): text = [] - for p in self.prefixes.keys(): + for p in list(self.prefixes.keys()): if p != '': text.append('>%s="%s"' % (p, self.prefixes[p])) else: @@ -406,7 +406,7 @@ def toXCQL(self, depth=0): def toCQL(self): txt = [self.value] - txt.extend(map(str, self.modifiers)) + txt.extend(list(map(str, self.modifiers))) return '/'.join(txt) @@ -774,7 +774,7 @@ def query(self): left.sortKeys = self.sortQuery() else: break - for p in prefs.keys(): + for p in list(prefs.keys()): left.addPrefix(p, prefs[p]) return left @@ -812,7 +812,7 @@ def subQuery(self): prefs = self.prefixes() if (prefs): object = self.query() - for p in prefs.keys(): + for p in list(prefs.keys()): object.addPrefix(p, prefs[p]) else: object = self.clause() @@ -847,7 +847,7 @@ def clause(self): elif self.currentToken == ">": prefs = self.prefixes() object = self.clause() - for p in prefs.keys(): + for p in list(prefs.keys()): object.addPrefix(p, prefs[p]) return object @@ -918,7 +918,7 @@ def parse(query): if type(query) == str: try: query = query.decode("utf-8") - except Exception, e: + except Exception as e: raise q = StringIO(query) diff --git a/patzilla/util/cql/cheshire3/test_cheshire3.py b/patzilla/util/cql/cheshire3/test_cheshire3.py index daab7bb1..8df9519d 100644 --- a/patzilla/util/cql/cheshire3/test_cheshire3.py +++ b/patzilla/util/cql/cheshire3/test_cheshire3.py @@ -60,7 +60,7 @@ def test_boolean_german(self): self.assertEqual(self.do_parse('bi=foo und bi=bar'), '(bi = "foo" und bi = "bar")') def test_utf8(self): - self.assertEqual(self.do_parse('ab=radaufstandskraft or ab=radaufstandskräfte?'), u'(ab = "radaufstandskraft" or ab = "radaufstandskr\xe4fte?")') + self.assertEqual(self.do_parse('ab=radaufstandskraft or ab=radaufstandskräfte?'), '(ab = "radaufstandskraft" or ab = "radaufstandskr\xe4fte?")') if __name__ == '__main__': unittest.main() diff --git a/patzilla/util/cql/pyparsing/__init__.py b/patzilla/util/cql/pyparsing/__init__.py index f916d5aa..e7af9101 100644 --- a/patzilla/util/cql/pyparsing/__init__.py +++ b/patzilla/util/cql/pyparsing/__init__.py @@ -61,12 +61,12 @@ def parse(self): # log.info(u'tokens: %s', tokens.pformat()) except pyparsing.ParseException as ex: - ex.explanation = u'%s\n%s\n%s' % (ex.pstr, u' ' * ex.loc + u'^\n', ex) + ex.explanation = '%s\n%s\n%s' % (ex.pstr, ' ' * ex.loc + '^\n', ex) #if self.logging: # log.error('\n%s', ex.explanation) - log.warning(u'Query expression "{query}" is invalid. ' \ - u'Reason: {reason}\n{location}'.format( - query=self.cql, reason=unicode(ex), location=ex.explanation)) + log.warning('Query expression "{query}" is invalid. ' \ + 'Reason: {reason}\n{location}'.format( + query=self.cql, reason=str(ex), location=ex.explanation)) raise return tokens diff --git a/patzilla/util/cql/pyparsing/demo.py b/patzilla/util/cql/pyparsing/demo.py index 47573044..157773c6 100644 --- a/patzilla/util/cql/pyparsing/demo.py +++ b/patzilla/util/cql/pyparsing/demo.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # (c) 2014 Andreas Motl, Elmyra UG from . import CQL -from serializer import tokens_to_cql, expand_shortcut_notation, get_triples, get_keywords, normalize_patentnumbers +from .serializer import tokens_to_cql, expand_shortcut_notation, get_triples, get_keywords, normalize_patentnumbers def parse_cql(cql): c = CQL(cql) @@ -24,9 +24,9 @@ def enrich_cql(cql): def dump_results(tokens): cql = tokens_to_cql(tokens) - print "=" * 42 - print "tokens:", tokens - print "cql:", cql + print("=" * 42) + print("tokens:", tokens) + print("cql:", cql) def rundemo(): @@ -80,11 +80,11 @@ def rundemo(): # B.3. dump all triples triples = [] get_triples(tokens, triples) - print "triples:", triples + print("triples:", triples) # B.4. dump all keywords keywords = get_keywords(triples) - print "keywords:", keywords + print("keywords:", keywords) if __name__ == '__main__': diff --git a/patzilla/util/cql/pyparsing/parser.py b/patzilla/util/cql/pyparsing/parser.py index 12c6cfb2..a0d7955d 100644 --- a/patzilla/util/cql/pyparsing/parser.py +++ b/patzilla/util/cql/pyparsing/parser.py @@ -54,19 +54,19 @@ TODO: maybe extract this to a different place, since ..services is also using it """ -wildcards = u'*?#!' +wildcards = '*?#!' # - classification terms (IPC, CPC) may contain forward slashes and dashes, e.g. H04L12/433, F17D5-00 # - numeric terms may contain punctuation (,.), e.g. 2.45 # - dates may contain dashes, e.g. M11-2009 -separators = u'/,.-' +separators = '/,.-' # limited set of unicode characters #umlauts = u'äöüÄÖÜß' # all unicode characters # http://stackoverflow.com/questions/2339386/python-pyparsing-unicode-characters/2340659#2340659 -unicode_printables = u''.join(unichr(c) for c in xrange(65536) if unichr(c).isalnum() and not unichr(c).isspace()) +unicode_printables = ''.join(chr(c) for c in range(65536) if chr(c).isalnum() and not chr(c).isspace()) # indexchars indexchars = alphanums + '{}!' @@ -92,9 +92,9 @@ def __init__(self): def preconfigure(self): # Binary comparison operators - self.cmp_single = u'= != < > <= >='.split() - self.cmp_perl = u'eq ne lt gt le ge'.split() - self.cmp_cql = u'exact within encloses all any any/relevant any/rel.lr'.split() + self.cmp_single = '= != < > <= >='.split() + self.cmp_perl = 'eq ne lt gt le ge'.split() + self.cmp_cql = 'exact within encloses all any any/relevant any/rel.lr'.split() # Boolean operators # TODO: Configure german operators with DPMAGrammar only diff --git a/patzilla/util/cql/pyparsing/searchparser.py b/patzilla/util/cql/pyparsing/searchparser.py index 880ebad1..e4b26ae1 100644 --- a/patzilla/util/cql/pyparsing/searchparser.py +++ b/patzilla/util/cql/pyparsing/searchparser.py @@ -65,18 +65,27 @@ Set = set except NameError: from sets import Set + from patzilla.util.cql.pyparsing.parser import separators, wildcards +import sys +_IS_PYTHON_3 = (sys.version_info[0] >= 3) +if _IS_PYTHON_3: + Set = set +else: + from sets import Set + # define characters comprising a word #wordchars = alphanums + separators + wildcards # all unicode characters # http://stackoverflow.com/questions/2339386/python-pyparsing-unicode-characters/2340659#2340659 -unicode_printables = u''.join(unichr(c) for c in xrange(65536) if unichr(c).isalnum() and not unichr(c).isspace()) -more_chars = u'°' +unicode_printables = ''.join(chr(c) for c in range(65536) if chr(c).isalnum() and not chr(c).isspace()) +more_chars = '°' wordchars = unicode_printables + more_chars + separators + wildcards + class SearchQueryParser: def __init__(self): @@ -272,14 +281,14 @@ class ParserTest(SearchQueryParser): } def GetWord(self, word): - if (self.index.has_key(word)): + if (word in self.index): return self.index[word] else: return Set() def GetWordWildcard(self, word): result = Set() - for item in self.index.keys(): + for item in list(self.index.keys()): if word == item[0:len(word)]: result = result.union(self.index[item]) return result @@ -292,27 +301,27 @@ def GetQuotes(self, search_string, tmp_result): return result def GetNot(self, not_set): - all = Set(self.docs.keys()) + all = Set(list(self.docs.keys())) return all.difference(not_set) def Test(self): all_ok = True - for item in self.tests.keys(): - print item + for item in list(self.tests.keys()): + print(item) r = self.Parse(item) e = self.tests[item] - print 'Result: %s' % r - print 'Expect: %s' % e + print('Result: %s' % r) + print('Expect: %s' % e) if e == r: - print 'Test OK' + print('Test OK') else: all_ok = False - print '>>>>>>>>>>>>>>>>>>>>>>Test ERROR<<<<<<<<<<<<<<<<<<<<<' - print '' + print('>>>>>>>>>>>>>>>>>>>>>>Test ERROR<<<<<<<<<<<<<<<<<<<<<') + print('') return all_ok if __name__=='__main__': if ParserTest().Test(): - print 'All tests OK' + print('All tests OK') else: - print 'One or more tests FAILED' \ No newline at end of file + print('One or more tests FAILED') diff --git a/patzilla/util/cql/pyparsing/serializer.py b/patzilla/util/cql/pyparsing/serializer.py index a078b5f8..c0ac6268 100644 --- a/patzilla/util/cql/pyparsing/serializer.py +++ b/patzilla/util/cql/pyparsing/serializer.py @@ -3,7 +3,7 @@ import re import types import logging -import StringIO +import io from pyparsing import ParseResults from patzilla.util.cql.pyparsing.parser import CQLGrammar from patzilla.util.cql.pyparsing.util import walk_token_results, token_to_triple @@ -32,7 +32,7 @@ def tokens_to_cql(tokens): u'foo=bar and baz=(qux or quux)' """ - buffer = StringIO.StringIO() + buffer = io.StringIO() tokens_to_cql_buffer(tokens, buffer) buffer.seek(0) return buffer.read() @@ -51,23 +51,23 @@ def tokens_to_cql_buffer(tokens, buffer): # surround binop with spaces for all operators but equality (=) if binop != '=': - triple[1] = u' {0} '.format(binop) + triple[1] = ' {0} '.format(binop) - payload = u''.join(triple) + payload = ''.join(triple) else: - payload = u''.join(token) + payload = ''.join(token) buffer.write(payload) elif name.startswith('subquery'): tokens_to_cql_buffer(token, buffer) - elif tokentype in types.StringTypes: + elif tokentype in (str,): out = token # surround all boolean operators with whitespace if token in grammar.booleans: - out = u' {0} '.format(token) + out = ' {0} '.format(token) buffer.write(out) def normalize_patentnumbers(tokens): @@ -200,7 +200,7 @@ def expand_shortcut_notation(tokens, index=None, binop=None): # If it does, put term inside parenthesis, which got lost while performing shortcut expansion. if token: if re.match('.*(?:' + grammar.termop.pattern + ').*', token[0], flags=grammar.termop.flags): - token[0] = u'({0})'.format(token[0]) + token[0] = '({0})'.format(token[0]) # Process triple in value shortcut notation (contains only the single term). # Take action: Insert index and binop from subquery context. diff --git a/patzilla/util/cql/pyparsing/util.py b/patzilla/util/cql/pyparsing/util.py index a202a99b..e36785f6 100644 --- a/patzilla/util/cql/pyparsing/util.py +++ b/patzilla/util/cql/pyparsing/util.py @@ -6,7 +6,7 @@ def get_literals(*elements): literals = [] for element in elements: for literal in element: - literal = unicode(literal).strip('"').strip("'") + literal = str(literal).strip('"').strip("'") literals.append(literal) return literals diff --git a/patzilla/util/cql/util.py b/patzilla/util/cql/util.py index 07787f47..d117bf5f 100644 --- a/patzilla/util/cql/util.py +++ b/patzilla/util/cql/util.py @@ -15,7 +15,7 @@ def pair_to_cql(datasource, key, value): return cql_part = None - format = u'{0}=({1})' + format = '{0}=({1})' # Special processing rules for depatisnet if datasource == 'depatisnet': @@ -94,7 +94,7 @@ def pair_to_cql(datasource, key, value): if key == 'inventor' or key == 'applicant': if not has_booleans(value) and should_be_quoted(value): - value = u'"{0}"'.format(value) + value = '"{0}"'.format(value) if key == 'pubdate': diff --git a/patzilla/util/crypto/jwt.py b/patzilla/util/crypto/jwt.py index 204727ba..aeae850d 100644 --- a/patzilla/util/crypto/jwt.py +++ b/patzilla/util/crypto/jwt.py @@ -1,13 +1,15 @@ # -*- coding: utf-8 -*- -# (c) 2014-2022 Andreas Motl -from __future__ import absolute_import + +# (c) 2014 Andreas Motl, Elmyra UG + import logging from datetime import datetime, timedelta import python_jwt from jwcrypto import jwk from zope.interface.interface import Interface -from zope.interface.declarations import implements +#from zope.interface.declarations import implements +#from zope.interface import implementer log = logging.getLogger(__name__) @@ -26,7 +28,7 @@ class JwtSigner(object): - https://jwcrypto.readthedocs.io/ """ - implements(ISigner) +# py27 implements(ISigner) def __init__(self, key=None, ttl=None): self.key = key @@ -86,7 +88,7 @@ def unsign(self, token): iat_skew=timedelta(minutes=5), ) - if not payload.has_key('data'): + if 'data' not in payload: error_payload = { 'location': 'JSON Web Token', 'name': self.__class__.__name__, diff --git a/patzilla/util/data/container.py b/patzilla/util/data/container.py index d9c06532..8fd79b61 100644 --- a/patzilla/util/data/container.py +++ b/patzilla/util/data/container.py @@ -24,7 +24,7 @@ def bunchify(cls, x): Generic "bunchify", also works with descendants of Bunch. """ if isinstance(x, dict): - return cls( (k, cls.bunchify(v)) for k,v in x.iteritems() ) + return cls( (k, cls.bunchify(v)) for k,v in x.items() ) elif isinstance(x, (list, tuple)): return type(x)( cls.bunchify(v) for v in x ) else: @@ -35,7 +35,7 @@ def unique_sequence(seq): # https://stackoverflow.com/questions/480214/how-do-you-remove-duplicates-from-a-list-in-python-whilst-preserving-order/480227#480227 seen = set() seen_add = seen.add - unhashable_types = (types.ListType, types.DictionaryType) + unhashable_types = (list, dict) return [x for x in seq if type(x) in unhashable_types or not (x in seen or seen_add(x))] diff --git a/patzilla/util/data/orderedset.py b/patzilla/util/data/orderedset.py index 5ba05be5..01b21d09 100644 --- a/patzilla/util/data/orderedset.py +++ b/patzilla/util/data/orderedset.py @@ -64,6 +64,6 @@ def __eq__(self, other): if __name__ == '__main__': s = OrderedSet('abracadaba') t = OrderedSet('simsalabim') - print(s | t) - print(s & t) - print(s - t) + print((s | t)) + print((s & t)) + print((s - t)) diff --git a/patzilla/util/data/zip.py b/patzilla/util/data/zip.py index 9c9b472b..9879aa9c 100644 --- a/patzilla/util/data/zip.py +++ b/patzilla/util/data/zip.py @@ -17,7 +17,7 @@ def zip_multi(multi): now = time.localtime(time.time())[:6] # http://stackoverflow.com/questions/434641/how-do-i-set-permissions-attributes-on-a-file-in-a-zip-file-using-pythons-zip/434689#434689 - unix_permissions = 0644 << 16L + unix_permissions = 0o644 << 16 # add index file for drawings """ diff --git a/patzilla/util/database/beaker_mongodb.py b/patzilla/util/database/beaker_mongodb.py index 4ccab80c..26da6a49 100644 --- a/patzilla/util/database/beaker_mongodb.py +++ b/patzilla/util/database/beaker_mongodb.py @@ -194,9 +194,9 @@ from beaker.synchronization import null_synchronizer from beaker.util import verify_directory, SyncDict -from StringIO import StringIO +from io import StringIO try: - import cPickle as pickle + import pickle as pickle except ImportError: import pickle @@ -499,6 +499,6 @@ def _parse_uri(uri, default_port=27017): def _depickle(value): try: return pickle.loads(value) - except Exception, e: + except Exception as e: log.exception("Failed to unpickle value '{0}'.".format(e)) return None diff --git a/patzilla/util/database/beaker_mongodb_gridfs.py b/patzilla/util/database/beaker_mongodb_gridfs.py index 605a0c70..7c40e2d4 100644 --- a/patzilla/util/database/beaker_mongodb_gridfs.py +++ b/patzilla/util/database/beaker_mongodb_gridfs.py @@ -1,9 +1,11 @@ -from mongodb_gridfs_beaker import MongoDBGridFSNamespaceManager, log, pickle +import pickle +import logging as log +# py27 from mongodb_gridfs_beaker import MongoDBGridFSNamespaceManager, log, pickle def includeme(config): # Monkey patch 3rd party class to fix runtime error - MongoDBGridFSNamespaceManager.lock_dir = None +# py27 MongoDBGridFSNamespaceManager.lock_dir = None # Monkey patch "set_value" method after upgrade to Beaker-1.9.0 to accept the "expiretime" argument. def set_value(self, key, value, expiretime=None): @@ -20,4 +22,4 @@ def set_value(self, key, value, expiretime=None): self.__delitem__(key) gridfs.put(value, **query) - MongoDBGridFSNamespaceManager.set_value = set_value +# py27 MongoDBGridFSNamespaceManager.set_value = set_value diff --git a/patzilla/util/date/__init__.py b/patzilla/util/date/__init__.py index a683524f..4be8b63a 100644 --- a/patzilla/util/date/__init__.py +++ b/patzilla/util/date/__init__.py @@ -111,7 +111,7 @@ def parse_date_within(value): """ value = value.replace('within', '').strip().strip('"') parts = value.split(',') - parts = map(unicode.strip, parts) + parts = list(map(str.strip, parts)) result = { 'startdate': parts[0], 'enddate': parts[1], @@ -123,12 +123,12 @@ def year_range_to_within(value): Parse year ranges like "1990-2014" or "1990 - 2014" and convert into "within 1990,2014" expression """ - if value.count(u'-') == 1: - parts = value.split(u'-') + if value.count('-') == 1: + parts = value.split('-') parts = [part.strip() for part in parts] year_from, year_to = parts if len(year_from) == 4 and len(year_to) == 4: - value = u'within {year_from},{year_to}'.format(**locals()) + value = 'within {year_from},{year_to}'.format(**locals()) return value def week_range(date): diff --git a/patzilla/util/email/core.py b/patzilla/util/email/core.py index 899b3c72..8e14d2cd 100644 --- a/patzilla/util/email/core.py +++ b/patzilla/util/email/core.py @@ -17,7 +17,7 @@ log = logging.getLogger(__name__) -def build_email(mail_to, subject, body_text, mail_from=u'test@example.org', reply_to=None, attachments=None, mime_headers=None): +def build_email(mail_to, subject, body_text, mail_from='test@example.org', reply_to=None, attachments=None, mime_headers=None): """ Flexible Multipart MIME message builder. @@ -53,11 +53,11 @@ def build_email(mail_to, subject, body_text, mail_from=u'test@example.org', repl } # Subject header - mime_headers.update({u'Subject': Header(s=subject, charset='utf-8')}) + mime_headers.update({'Subject': Header(s=subject, charset='utf-8')}) # Add address headers - for key, item in address_headers.iteritems(): + for key, item in address_headers.items(): if isinstance(item, AddressList): # v1 @@ -70,7 +70,7 @@ def build_email(mail_to, subject, body_text, mail_from=u'test@example.org', repl message[key] = value # Add more headers - for key, value in mime_headers.iteritems(): + for key, value in mime_headers.items(): #message.add_header(key, value) if value: message[key] = value @@ -97,7 +97,7 @@ def build_email(mail_to, subject, body_text, mail_from=u'test@example.org', repl # multipart attachments # ------------------------------------------ # from https://docs.python.org/2/library/email-examples.html - for filename, payload in attachments.iteritems(): + for filename, payload in attachments.items(): # Guess the content type based on the file's extension. Encoding # will be ignored, although we should check for simple things like @@ -149,10 +149,10 @@ def build_email(mail_to, subject, body_text, mail_from=u'test@example.org', repl return payload -def send_email(mail_to, message, smtp_settings=None, mail_from=u'test@example.org'): +def send_email(mail_to, message, smtp_settings=None, mail_from='test@example.org'): smtp_settings = smtp_settings or {} - smtp_settings.setdefault('hostname', u'localhost') + smtp_settings.setdefault('hostname', 'localhost') smtp_settings.setdefault('port', 25) # sanity checks @@ -191,7 +191,7 @@ def send_email(mail_to, message, smtp_settings=None, mail_from=u'test@example.or def format_addresslist(addresslist): #print 'addresslist:', addresslist.addresslist - return map(formataddr, addresslist.addresslist) + return list(map(formataddr, addresslist.addresslist)) def fix_addresslist(addresslist): diff --git a/patzilla/util/email/message.py b/patzilla/util/email/message.py index 7baca1ee..37f7ea62 100644 --- a/patzilla/util/email/message.py +++ b/patzilla/util/email/message.py @@ -6,7 +6,7 @@ import logging import textwrap from copy import deepcopy -from core import build_email, send_email +from .core import build_email, send_email from patzilla.util.config import read_config, to_list log = logging.getLogger(__name__) @@ -38,25 +38,25 @@ def add_reply(self, address): def send(self, subject='', message='', files=None): - recipients = u', '.join(self.recipients) - reply_to = u', '.join(self.reply_to) + recipients = ', '.join(self.recipients) + reply_to = ', '.join(self.reply_to) files = files or {} # get smtp addressing information from settings - smtp_host = self.smtp_settings.get('hostname', u'localhost') - mail_from = self.email_settings['addressbook'].get('from', u'test@example.org') + smtp_host = self.smtp_settings.get('hostname', 'localhost') + mail_from = self.email_settings['addressbook'].get('from', 'test@example.org') # log smtp settings smtp_settings_log = deepcopy(self.smtp_settings) if 'password' in smtp_settings_log: del smtp_settings_log['password'] - log.info(u'Sending email to "{recipients}". smtp settings: {smtp_settings}'.format( + log.info('Sending email to "{recipients}". smtp settings: {smtp_settings}'.format( recipients=recipients, smtp_settings=smtp_settings_log)) # build subject event_date = time.strftime('%Y-%m-%d') event_time = time.strftime('%H:%M:%S') - subject_real = u'' + subject_real = '' if 'subject_prefix' in self.email_settings['content']: prefix = self.email_settings['content'].get('subject_prefix') if not prefix.endswith(' '): @@ -64,14 +64,14 @@ def send(self, subject='', message='', files=None): subject_real += prefix #subject_real += u'{subject} on {event_date} at {event_time}'.format(**locals()) - subject_real += u'{}'.format(subject) + subject_real += '{}'.format(subject) - filenames = u'\n'.join([u'- ' + entry for entry in files.keys()]) + filenames = '\n'.join(['- ' + entry for entry in list(files.keys())]) body_template = textwrap.dedent(self.email_settings['content'].get('body', '')).strip() if 'signature' in self.email_settings['content']: - body_template += u'\n\n--\n' + textwrap.dedent(self.email_settings['content']['signature']).strip() + body_template += '\n\n--\n' + textwrap.dedent(self.email_settings['content']['signature']).strip() body_template = body_template.replace('\\n', '\r') @@ -96,11 +96,11 @@ def send(self, subject='', message='', files=None): # smtplib.SMTPServerDisconnected: Connection unexpectedly closed # send_email(recipients, message, smtp_settings=self.smtp_settings, mail_from=mail_from) - log.info(u'Email to recipients "{recipients}" sent successfully'.format(recipients=recipients)) + log.info('Email to recipients "{recipients}" sent successfully'.format(recipients=recipients)) except Exception as ex: # TODO: catch traceback when running in commandline mode - log.error(u'Error sending email: {failure}'.format(failure=ex)) + log.error('Error sending email: {failure}'.format(failure=ex)) raise @@ -123,10 +123,10 @@ def send(self, subject='', message='', files=None): message = EmailMessage(settings['smtp'], settings['email'], {'subject_prefix': 'acme-product'}) message.add_recipient('test@example.org') message.send( - subject = u'Self-test email from Räuber Hotzenplotz', - message = u'Self-test email from Räuber Hotzenplotz', + subject = 'Self-test email from Räuber Hotzenplotz', + message = 'Self-test email from Räuber Hotzenplotz', files = { - u'test.txt': u'☠☠☠ SKULL AND CROSSBONES ☠☠☠', - u'test.json': json.dumps(u'☠☠☠ SKULL AND CROSSBONES ☠☠☠'), + 'test.txt': '☠☠☠ SKULL AND CROSSBONES ☠☠☠', + 'test.json': json.dumps('☠☠☠ SKULL AND CROSSBONES ☠☠☠'), } ) diff --git a/patzilla/util/expression/__init__.py b/patzilla/util/expression/__init__.py index e96ed902..43dbceac 100644 --- a/patzilla/util/expression/__init__.py +++ b/patzilla/util/expression/__init__.py @@ -28,8 +28,8 @@ class SearchExpression(object): def parse_expression(self, query): - logger.info(u'Parsing search expression "{query}" with syntax "{syntax}" and grammar "{grammar}"'.format( - query=query, syntax=self.syntax, grammar=self.grammar and self.grammar.__name__ or u'default')) + logger.info('Parsing search expression "{query}" with syntax "{syntax}" and grammar "{grammar}"'.format( + query=query, syntax=self.syntax, grammar=self.grammar and self.grammar.__name__ or 'default')) if self.syntax == 'cql': self.parse_expression_cql(query) @@ -40,8 +40,8 @@ def parse_expression(self, query): def parse_expression_cql(self, expression): # Fixup query: Wrap into quotes if CQL expression is a) unspecific, b) contains spaces and c) is still unquoted - if should_be_quoted(expression) and u'within' not in expression: - expression = u'"%s"' % expression + if should_be_quoted(expression) and 'within' not in expression: + expression = '"%s"' % expression # Parse and recompile CQL query string to apply number normalization query_object = None @@ -59,11 +59,11 @@ def parse_expression_cql(self, expression): expression = query_recompiled if query_recompiled != expression: - logger.info(u'Recompiled search expression to "{query}"'.format(query=expression)) + logger.info('Recompiled search expression to "{query}"'.format(query=expression)) except Exception as ex: # TODO: Can we get more details from diagnostic information to just stop here w/o propagating obviously wrong query to OPS? - logger.warn(u'CQL parse error: query="{0}", reason={1}, Exception was:\n{2}'.format(expression, ex, _exception_traceback())) + logger.warn('CQL parse error: query="{0}", reason={1}, Exception was:\n{2}'.format(expression, ex, _exception_traceback())) self.cql_parser = query_object self.expression = expression diff --git a/patzilla/util/expression/keywords.py b/patzilla/util/expression/keywords.py index e5bd7b3b..bfb3f422 100644 --- a/patzilla/util/expression/keywords.py +++ b/patzilla/util/expression/keywords.py @@ -64,7 +64,7 @@ def scan_keywords(op, keywords): #print "op.index:", op.index #print "op.term:", op.term if str(op.index) in keyword_fields: - keyword = clean_keyword(unicode(op.term)) + keyword = clean_keyword(str(op.term)) keywords.append(keyword) hasattr(op, 'leftOperand') and scan_keywords(op.leftOperand, keywords) @@ -76,7 +76,7 @@ def keywords_to_response(request, search): Propagate keywords to client for highlighting """ - logger.info(u'Propagating keywords from "{origin}": {keywords}'.format( + logger.info('Propagating keywords from "{origin}": {keywords}'.format( origin=search.keywords_origin, keywords=search.keywords)) request.response.headers['X-PatZilla-Query-Keywords'] = json.dumps(search.keywords) diff --git a/patzilla/util/image/convert.py b/patzilla/util/image/convert.py index 63e53dee..2bdf92e2 100644 --- a/patzilla/util/image/convert.py +++ b/patzilla/util/image/convert.py @@ -9,7 +9,7 @@ import where import logging import datetime -import StringIO +import io import subprocess from six import BytesIO from tempfile import NamedTemporaryFile @@ -195,7 +195,7 @@ def run_imagemagick(command, input=None): def png_resize(png_payload, width): - image = Image.open(StringIO.StringIO(png_payload)).convert('RGB') + image = Image.open(io.StringIO(png_payload)).convert('RGB') image_width = image.size[0] image_height = image.size[1] @@ -209,13 +209,13 @@ def png_resize(png_payload, width): #size = (int(width), int(image_height * aspect)) size = (int(width), int(image_height / scale_factor)) #print "size:", size - print "Resizing image from %s to %s" % (image.size, size) + print("Resizing image from %s to %s" % (image.size, size)) image.thumbnail(size, Image.ANTIALIAS) #image.resize(size, Image.ANTIALIAS) #print "thumbnail done" - png = StringIO.StringIO() + png = io.StringIO() image.save(png, 'PNG') #print "image saved to memory" diff --git a/patzilla/util/ipc/parser.py b/patzilla/util/ipc/parser.py index e8701e54..e94d54a9 100644 --- a/patzilla/util/ipc/parser.py +++ b/patzilla/util/ipc/parser.py @@ -10,7 +10,7 @@ def decodeMatchToDict(match, key_suffix): if match: # transfer data from match groups to instance variable, # making all values uppercase - for key, value in match.groupdict().iteritems(): + for key, value in match.groupdict().items(): if key.endswith(key_suffix): key = key.replace(key_suffix, '') if value: @@ -56,7 +56,7 @@ def decode(self): m = self.r.match(self.raw) self.ipc = decodeMatchToDict(m, '__1') if not self.ipc: - raise ValueError, "IPCR class '%s' could not be decoded" % self.raw + raise ValueError("IPCR class '%s' could not be decoded" % self.raw) def fix(self): @@ -82,7 +82,7 @@ def asDict(self): def formatFlexible(self, class_padding='', group_subgroup_delimiter='', group_padding='', subgroup_padding=''): if not self.ipc['section']: - raise ValueError, "IPCR class '%s' could not be formatted" % self.raw + raise ValueError("IPCR class '%s' could not be formatted" % self.raw) ipc_serialized = self.ipc['section'] diff --git a/patzilla/util/network/browser.py b/patzilla/util/network/browser.py index 51f61cc1..3545dbff 100644 --- a/patzilla/util/network/browser.py +++ b/patzilla/util/network/browser.py @@ -1,4 +1,4 @@ # -*- coding: utf-8 -*- # (c) 2017-2019 Andreas Motl -regular_user_agent = u'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:107.0) Gecko/20100101 Firefox/107.0' +regular_user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:107.0) Gecko/20100101 Firefox/107.0' diff --git a/patzilla/util/network/requests_xmlrpclib.py b/patzilla/util/network/requests_xmlrpclib.py index 18dbbc2b..efcd83c7 100644 --- a/patzilla/util/network/requests_xmlrpclib.py +++ b/patzilla/util/network/requests_xmlrpclib.py @@ -15,7 +15,7 @@ try: import xmlrpc.client as xmlrpc except ImportError: - import xmlrpclib as xmlrpc + import xmlrpc.client as xmlrpc import requests diff --git a/patzilla/util/numbers/common.py b/patzilla/util/numbers/common.py index ac5af45a..9f4d8359 100644 --- a/patzilla/util/numbers/common.py +++ b/patzilla/util/numbers/common.py @@ -29,12 +29,12 @@ def join_patent(patent): return number def decode_patent_number(patent): - if isinstance(patent, types.StringTypes): + if isinstance(patent, (str,)): decoded = split_patent_number(patent) - elif isinstance(patent, types.DictionaryType): + elif isinstance(patent, dict): decoded = patent else: - raise TypeError(u'Document number "{patent}" of type "{type}" could not be decoded'.format(patent=patent, type=type(patent))) + raise TypeError('Document number "{patent}" of type "{type}" could not be decoded'.format(patent=patent, type=type(patent))) return decoded def split_patent_number(patent_number): @@ -154,7 +154,7 @@ def split_patent_number(patent_number): return dib else: - log.error(u'Unable to parse patent number "{0}"'.format(patent_number)) + log.error('Unable to parse patent number "{0}"'.format(patent_number)) def split_patent_number_more(patent): diff --git a/patzilla/util/numbers/denormalize.py b/patzilla/util/numbers/denormalize.py index 6395e2a6..9b4deee2 100644 --- a/patzilla/util/numbers/denormalize.py +++ b/patzilla/util/numbers/denormalize.py @@ -121,16 +121,16 @@ def test_denormalization(): WO1990004917 """ - print "-" * 30 - print "original\tdenormalized" - print "-" * 30 + print("-" * 30) + print("original\tdenormalized") + print("-" * 30) for number in payload.split("\n"): if not number or number == "\n": continue if number.startswith('---'): - print number + print(number) continue number_denormalized = join_patent(denormalize_patent(split_patent_number(number))) - print "%s\t%s" % (number, number_denormalized) + print("%s\t%s" % (number, number_denormalized)) if __name__ == "__main__": diff --git a/patzilla/util/numbers/helper.py b/patzilla/util/numbers/helper.py index 044ba815..6a498bb4 100644 --- a/patzilla/util/numbers/helper.py +++ b/patzilla/util/numbers/helper.py @@ -26,7 +26,7 @@ def read_numbersfile(file): fh = open(file, 'r') numbers_raw = fh.readlines() fh.close() - numbers = map(lambda number: number.strip(" ;\"'\t\n\r"), numbers_raw) + numbers = [number.replace("\n", '').replace(' ', '') for number in numbers_raw] numbers = [number for number in numbers if number and not number.startswith('#')] return numbers diff --git a/patzilla/util/numbers/normalize.py b/patzilla/util/numbers/normalize.py index 1dd5da49..0b60f9d9 100644 --- a/patzilla/util/numbers/normalize.py +++ b/patzilla/util/numbers/normalize.py @@ -194,7 +194,7 @@ def normalize_patent(number, as_dict=False, as_string=False, fix_kindcode=False, provider = 'ops' # 1. handle patent dicts or convert (split) from string - if isinstance(number, types.DictionaryType): + if isinstance(number, dict): patent = number else: patent = split_patent_number(number) @@ -209,7 +209,7 @@ def normalize_patent(number, as_dict=False, as_string=False, fix_kindcode=False, # 3. result handling # 3.a) default mechanism: return what we've got - if isinstance(number, types.DictionaryType): + if isinstance(number, dict): result = patent_normalized else: result = join_patent(patent_normalized) @@ -622,7 +622,7 @@ def normalize_patent_it(patent): # filter: special document handling (with alphanumeric prefixes) # trim and pad sequential number with zeros to get total length of 7 characters for patent number - if patched.has_key('number-type') and patched.has_key('number-real'): + if 'number-type' in patched and 'number-real' in patched: subtype = patched['number-type'] seqnumber = patched['number-real'] patched['number'] = subtype + seqnumber.lstrip('0') @@ -671,16 +671,16 @@ def normalization_example(): # pragma: nocover 'JP3657641B2', ] - print "-" * 30 - print '{0}{1}'.format("original".ljust(20), "normalized") - print "-" * 30 + print("-" * 30) + print('{0}{1}'.format("original".ljust(20), "normalized")) + print("-" * 30) for number in numbers: if number.find('---') != -1: - print number + print(number) continue result = normalize_patent(number) #result = join_patent(patch_patent_old_archive(patent)) - print "{0}{1}".format(number.ljust(20), result) + print("{0}{1}".format(number.ljust(20), result)) if __name__ == "__main__": # pragma: nocover diff --git a/patzilla/util/numbers/numberlists.py b/patzilla/util/numbers/numberlists.py index d6341e32..22ceb2ad 100644 --- a/patzilla/util/numbers/numberlists.py +++ b/patzilla/util/numbers/numberlists.py @@ -4,13 +4,13 @@ from patzilla.util.numbers.normalize import normalize_patent def parse_numberlist(rawdata): - pattern = re.compile(u'[,\n]') + pattern = re.compile('[,\n]') entries = pattern.split(rawdata) - entries = map(unicode.strip, entries) + entries = list(map(str.strip, entries)) return entries def normalize_numbers(entries): - entries = map(lambda s: s.replace(u' ', u''), entries) + entries = [s.replace(' ', '') for s in entries] response = {'valid': [], 'invalid': [], 'all': []} for entry in entries: entry_normalized = normalize_patent(entry, fix_kindcode=True) diff --git a/patzilla/util/python/__init__.py b/patzilla/util/python/__init__.py index 4974efcc..5a69667c 100644 --- a/patzilla/util/python/__init__.py +++ b/patzilla/util/python/__init__.py @@ -2,7 +2,7 @@ # (c) 2014 Andreas Motl, Elmyra UG import sys import traceback -from StringIO import StringIO +from io import StringIO def exception_traceback(exc_info=None): """ diff --git a/patzilla/util/text/format.py b/patzilla/util/text/format.py index ae59c647..225c86b2 100644 --- a/patzilla/util/text/format.py +++ b/patzilla/util/text/format.py @@ -15,14 +15,14 @@ def slugify(value, strip_equals=True, lowercase=True): Via http://code.activestate.com/recipes/577257-slugify-make-a-string-usable-in-a-url-or-filename/ """ import unicodedata - if not isinstance(value, unicode): - value = unicode(value) + if not isinstance(value, str): + value = str(value) value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore') _strip_re = _slugify_strip_re if not strip_equals: _strip_re = _slugify_strip_wo_equals_re - value = unicode(_strip_re.sub('', value).strip()) + value = str(_strip_re.sub('', value).strip()) if lowercase: value = value.lower() diff --git a/patzilla/util/web/email/submit.py b/patzilla/util/web/email/submit.py index 2504401e..33b33adf 100644 --- a/patzilla/util/web/email/submit.py +++ b/patzilla/util/web/email/submit.py @@ -72,19 +72,19 @@ def email_issue_report(report, recipients): identifier = report.meta.id # Build reasonable subject - subject = u'Product issue' + subject = 'Product issue' if 'dialog' in report and 'what' in report.dialog: - subject = u'[{}] '.format(report.dialog.what) + subject + subject = '[{}] '.format(report.dialog.what) + subject if identifier: - subject += u' #' + identifier + subject += ' #' + identifier # Build reasonable message - message = u'' + message = '' if 'dialog' in report and 'remark' in report.dialog: message = report.dialog.remark # Add JSON report as attachment - files = {u'report.json': report.pretty()} + files = {'report.json': report.pretty()} email = message_factory(recipients=recipients) email.send( diff --git a/patzilla/util/web/identity/store.py b/patzilla/util/web/identity/store.py index 92a69d45..1f8df810 100644 --- a/patzilla/util/web/identity/store.py +++ b/patzilla/util/web/identity/store.py @@ -14,6 +14,7 @@ from pyramid.threadlocal import get_current_request from zope.interface.declarations import implements from zope.interface.interface import Interface +from zope.interface import implementer log = logging.getLogger(__name__) @@ -133,9 +134,10 @@ class UserMetrics(Document): class IUserMetricsManager(Interface): pass +@implementer(IUserMetricsManager) class UserMetricsManager(object): - implements(IUserMetricsManager) +# py27 implements(IUserMetricsManager) def measure_upstream(self, upstream, volume): diff --git a/patzilla/util/web/pyramid/cornice.py b/patzilla/util/web/pyramid/cornice.py index b7dadcae..89a6ee9b 100644 --- a/patzilla/util/web/pyramid/cornice.py +++ b/patzilla/util/web/pyramid/cornice.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # (c) 2017 Andreas Motl, Elmyra UG -from __future__ import absolute_import + from cornice.errors import Errors def add_location_whitelisted(self, location, name=None, description=None, **kw): diff --git a/patzilla/util/web/pyramid/renderer.py b/patzilla/util/web/pyramid/renderer.py index 78a06af4..d941add6 100644 --- a/patzilla/util/web/pyramid/renderer.py +++ b/patzilla/util/web/pyramid/renderer.py @@ -18,7 +18,7 @@ def __call__(self, data, context): content_type = (context['request'].accept.best_match(acceptable) or acceptable[0]) response.content_type = content_type - print "data:", data + print("data:", data) return 'hello' #return json.dumps(data, use_decimal=True) diff --git a/patzilla/util/web/util/xmlrpclib.py b/patzilla/util/web/util/xmlrpclib.py index 50c5f6de..df3353b6 100644 --- a/patzilla/util/web/util/xmlrpclib.py +++ b/patzilla/util/web/util/xmlrpclib.py @@ -1,9 +1,9 @@ # -*- coding: utf-8 -*- # (c) 2014-2015 Andreas Motl, Elmyra UG -from __future__ import absolute_import + import sys import socket -import xmlrpclib +import xmlrpc.client import ssl # https://stackoverflow.com/questions/372365/set-timeout-for-xmlrpclib-serverproxy/14397619#14397619 @@ -24,7 +24,7 @@ def __enter__(self): if self.__timeout: self.__prevDefaultTimeout = socket.getdefaulttimeout() socket.setdefaulttimeout(self.__timeout) - proxy = xmlrpclib.Server(self.__url, allow_none=True) + proxy = xmlrpc.client.Server(self.__url, allow_none=True) except Exception as ex: raise Exception("Unable create XMLRPC-proxy for url '%s': %s" % (self.__url, ex)) diff --git a/patzilla/util/web/uwsgi/uwsgidecorators.py b/patzilla/util/web/uwsgi/uwsgidecorators.py index 79c08ea1..29b20c36 100644 --- a/patzilla/util/web/uwsgi/uwsgidecorators.py +++ b/patzilla/util/web/uwsgi/uwsgidecorators.py @@ -4,7 +4,7 @@ from threading import Thread try: - import cPickle as pickle + import pickle as pickle except: import pickle diff --git a/patzilla/util/xml/format.py b/patzilla/util/xml/format.py index 49d32120..1ee0b738 100644 --- a/patzilla/util/xml/format.py +++ b/patzilla/util/xml/format.py @@ -69,5 +69,5 @@ def data(self, root): return super(BadgerFishNoNamespace, self).data(root) def clean_tag(self, node): - if isinstance(node.tag, basestring): + if isinstance(node.tag, str): node.tag = re.sub('{.*}', '', node.tag) diff --git a/pserve.py b/pserve.py new file mode 100644 index 00000000..4ddeca3f --- /dev/null +++ b/pserve.py @@ -0,0 +1,10 @@ +#!/home/frank/DATA/Envs/env1/bin/python3 +# -*- coding: utf-8 -*- +import regex as re +import sys + +from pyramid.scripts.pserve import main + +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/setup.py b/setup.py index e75bff7d..9128ac3a 100644 --- a/setup.py +++ b/setup.py @@ -73,8 +73,8 @@ 'ndg-httpsclient<1', # HTML - 'BeautifulSoup<4', - 'html2text==2016.9.19', # 2020.1.16 + 'beautifulsoup4', + 'html2text', # XML # Remark: Both lxml 3.8.0 and 4.0.0 will segfault on Debian Wheezy (7.11) From 00bd74b13c3c419ff24dcc276ddf1e46c9944a7d Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Sat, 11 May 2019 20:21:00 +0200 Subject: [PATCH 02/23] More steps towards Python 3 --- patzilla/access/dpma/depatisnet.py | 12 ++++++------ patzilla/access/dpma/dpmaregister.py | 1 - patzilla/navigator/services/__init__.py | 5 ++--- patzilla/navigator/settings.py | 5 +---- patzilla/util/database/beaker_mongodb_gridfs.py | 7 ++++--- setup.py | 8 +++++--- 6 files changed, 18 insertions(+), 20 deletions(-) diff --git a/patzilla/access/dpma/depatisnet.py b/patzilla/access/dpma/depatisnet.py index a93bb99c..e1dd4edb 100644 --- a/patzilla/access/dpma/depatisnet.py +++ b/patzilla/access/dpma/depatisnet.py @@ -1,15 +1,13 @@ # -*- coding: utf-8 -*- # (c) 2014-2015 Andreas Motl, Elmyra UG -# py27 import re -import regex as re import sys import json import types import logging import urllib.request, urllib.error, urllib.parse import mechanize +import regex as re import http.cookiejar -# py27 from BeautifulSoup import BeautifulSoup from bs4 import BeautifulSoup from xlrd import open_workbook from patzilla.access.generic.search import GenericSearchResponse @@ -227,7 +225,7 @@ def find_errors(self, body): 'otherwise don\'t hesitate to report this problem to us.') # Check for error messages - soup = BeautifulSoup(body) + soup = BeautifulSoup(body, 'lxml') error_message = soup.find('div', {'id': 'errormsg'}) if error_message: parts = [] @@ -238,11 +236,13 @@ def find_errors(self, body): else: error_message = '' + # Compute error message. + prefix = 'Upstream service: ' if 'An error has occurred' in body: - error_message = error_message.replace('\t', '').replace('\r\n', '\n').strip() + error_message = prefix + error_message.replace('\t', '').replace('\r\n', '\n').strip() raise SyntaxError(error_message) - return error_message + return prefix + error_message def read_xls_response(self, xls_response): data = excel_to_dict(xls_response.read()) diff --git a/patzilla/access/dpma/dpmaregister.py b/patzilla/access/dpma/dpmaregister.py index b314b89c..2acb850a 100644 --- a/patzilla/access/dpma/dpmaregister.py +++ b/patzilla/access/dpma/dpmaregister.py @@ -16,7 +16,6 @@ from pprint import pformat from jsonpointer import JsonPointer, JsonPointerException from xml.etree.ElementTree import fromstring -# py27 from BeautifulSoup import BeautifulSoup from bs4 import BeautifulSoup from collections import namedtuple, OrderedDict from patzilla.access.dpma.util import dpma_file_number diff --git a/patzilla/navigator/services/__init__.py b/patzilla/navigator/services/__init__.py index 4a52ae3d..8355b43e 100644 --- a/patzilla/navigator/services/__init__.py +++ b/patzilla/navigator/services/__init__.py @@ -35,10 +35,9 @@ def handle_generic_exception(request, ex, backend_name, query): module_name = ex.__class__.__module__ class_name = ex.__class__.__name__ - reason = '{}.{}: {}'.format(module_name, class_name, ex.message) + reason = '{}.{}: {}'.format(module_name, class_name, str(ex)) - logger.critical('{backend_name} error: query="{query}", reason={reason}\nresponse:\n{http_response}\nexception:\n{exception}'.format( - exception=_exception_traceback(), **locals())) + logger.exception('{backend_name} error: query="{query}", reason={reason}\nresponse:\n{http_response}'.format(**locals())) message = 'An exception occurred while processing your query.
\nReason: {}

\n'.format(reason) if module_name == 'pymongo.errors': diff --git a/patzilla/navigator/settings.py b/patzilla/navigator/settings.py index 4a1410d6..1c8ecd23 100644 --- a/patzilla/navigator/settings.py +++ b/patzilla/navigator/settings.py @@ -4,6 +4,7 @@ import logging from copy import deepcopy from email.utils import parseaddr +from munch import Munch, munchify from pyramid.exceptions import ConfigurationError from pyramid.threadlocal import get_current_request, get_current_registry @@ -14,10 +15,6 @@ from patzilla.util.date import datetime_isoformat, unixtime_to_datetime from patzilla.util.python import _exception_traceback -from patzilla.util.data.munch import Munch, munchify - -#py27 -#from patzilla.util.data.container import Bunch log = logging.getLogger(__name__) diff --git a/patzilla/util/database/beaker_mongodb_gridfs.py b/patzilla/util/database/beaker_mongodb_gridfs.py index 7c40e2d4..e9aff35f 100644 --- a/patzilla/util/database/beaker_mongodb_gridfs.py +++ b/patzilla/util/database/beaker_mongodb_gridfs.py @@ -1,11 +1,12 @@ import pickle import logging as log -# py27 from mongodb_gridfs_beaker import MongoDBGridFSNamespaceManager, log, pickle +from mongodb_gridfs_beaker import MongoDBGridFSNamespaceManager, log, pickle + def includeme(config): # Monkey patch 3rd party class to fix runtime error -# py27 MongoDBGridFSNamespaceManager.lock_dir = None + MongoDBGridFSNamespaceManager.lock_dir = None # Monkey patch "set_value" method after upgrade to Beaker-1.9.0 to accept the "expiretime" argument. def set_value(self, key, value, expiretime=None): @@ -22,4 +23,4 @@ def set_value(self, key, value, expiretime=None): self.__delitem__(key) gridfs.put(value, **query) -# py27 MongoDBGridFSNamespaceManager.set_value = set_value + MongoDBGridFSNamespaceManager.set_value = set_value diff --git a/setup.py b/setup.py index 9128ac3a..582d22de 100644 --- a/setup.py +++ b/setup.py @@ -42,7 +42,7 @@ # Can't upgrade to pymongo-3.5.1 due to "from pymongo.connection import Connection" # usage in "mongodb_gridfs_beaker" module. 'pymongo<3', # 3.13.0, 4.3.3 - 'mongodb_gridfs_beaker==0.5.4', + 'mongodb_gridfs_beaker==0.6.0dev1', 'mongoengine==0.13.0', # 0.24.1 'python-magic<1', @@ -99,8 +99,8 @@ 'arrow==0.10.0', # 0.12.1 'validate_email<2', 'numpy==1.16.6', # 1.22.3 - 'pandas==0.18.1', # 0.22.0, 0.25.3, 1.4.2 - 'pathlib2<3', + 'pandas', # 0.22.0, 0.25.3, 1.4.2 + 'pathlib', # Data formatting 'openpyxl>=2.4.2,<3', @@ -216,6 +216,8 @@ 'test': test_requires, }, dependency_links=[ + 'https://github.com/ip-tools/mongodb_gridfs_beaker/archive/0.6.0dev1.tar.gz#egg=mongodb_gridfs_beaker-0.6.0dev1', + 'https://github.com/ip-tools/mechanize/archive/v0.4.3dev1.tar.gz#egg=mechanize-0.4.3dev1', ], entry_points={ From 025559a4463e5612940ffe5d3ad135df26c39e26 Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Sun, 12 May 2019 14:12:17 +0200 Subject: [PATCH 03/23] No need to manually encode form fields with "mechanize" anymore --- CHANGES.rst | 2 ++ patzilla/access/dpma/depatisnet.py | 2 +- setup.py | 5 +++++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 349304fb..06c14c37 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -142,6 +142,8 @@ Development - [mw] Improve settings for having per-vendor OPS credentials - [ui] More flexbox for header layout - [ui] Improve comment editing usability +- [mw] No need to manually encode form fields with "mechanize" anymore. + Thanks, `Kovid `_! 2019-05-08 0.165.0 diff --git a/patzilla/access/dpma/depatisnet.py b/patzilla/access/dpma/depatisnet.py index e1dd4edb..44a2b9a5 100644 --- a/patzilla/access/dpma/depatisnet.py +++ b/patzilla/access/dpma/depatisnet.py @@ -107,7 +107,7 @@ def search_patents(self, query, options=None): self.browser.select_form(nr=0) #self.browser.select_form(name='form') - self.browser['query'] = query.encode('iso-8859-1') + self.browser['query'] = query self.browser['hitsPerPage'] = [str(limit)] self.browser['maxHitsUser'] = [str(max_hits)] diff --git a/setup.py b/setup.py index 582d22de..3e509c63 100644 --- a/setup.py +++ b/setup.py @@ -217,7 +217,12 @@ }, dependency_links=[ 'https://github.com/ip-tools/mongodb_gridfs_beaker/archive/0.6.0dev1.tar.gz#egg=mongodb_gridfs_beaker-0.6.0dev1', +<<<<<<< HEAD 'https://github.com/ip-tools/mechanize/archive/v0.4.3dev1.tar.gz#egg=mechanize-0.4.3dev1', +======= + 'https://github.com/ip-tools/mechanize/archive/v0.4.3dev2.tar.gz#egg=mechanize-0.4.3dev2', + #'https://github.com/dagwieers/unoconv/archive/master.tar.gz#egg=unoconv-0.8.2', +>>>>>>> No need to manually encode form fields with "mechanize" anymore ], entry_points={ From a95d0a8e1f3dc20385229369c46c3612335fc236 Mon Sep 17 00:00:00 2001 From: Papoteur Date: Sun, 19 Mar 2023 17:21:04 +0100 Subject: [PATCH 04/23] Use python3 only in Makefile --- Makefile | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 444dae82..8cc4599a 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ #VERSION := $(shell cat patzilla/version.py | awk '{ print $$3 }' | tr -d "'") #$(error VERSION=$(VERSION)) -$(eval venvpath := .venv2) +$(eval venvpath := .venv3) $(eval pip := $(venvpath)/bin/pip) $(eval twine := $(venvpath)/bin/twine) $(eval python := $(venvpath)/bin/python) @@ -9,10 +9,8 @@ $(eval pserve := $(venvpath)/bin/pserve) $(eval pytest := $(venvpath)/bin/pytest) $(eval bumpversion := $(venvpath)/bin/bumpversion) $(eval fab := $(venvpath)/bin/fab) - -$(eval venv3path := .venv) -$(eval yarn := $(venv3path)/bin/yarn) -$(eval npx := $(venv3path)/bin/npx) +$(eval yarn := $(venvpath)/bin/yarn) +$(eval npx := $(venvpath)/bin/npx) setup: setup-py @@ -65,7 +63,7 @@ upload-pypi: # Setup Python virtualenv. setup-virtualenv: - @test -e $(python) || virtualenv --python=python2 $(venvpath) + @test -e $(python) || virtualenv --python=python3 $(venvpath) setup-py: setup-virtualenv $(pip) install --editable=.[test] From 414b991c7e71bc015d5350cc58627745c6e6ed88 Mon Sep 17 00:00:00 2001 From: Papoteur Date: Thu, 23 Mar 2023 12:08:03 +0100 Subject: [PATCH 05/23] Switch to xlrd3 --- patzilla/access/dpma/depatisnet.py | 4 ++-- setup.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/patzilla/access/dpma/depatisnet.py b/patzilla/access/dpma/depatisnet.py index 44a2b9a5..374e5c31 100644 --- a/patzilla/access/dpma/depatisnet.py +++ b/patzilla/access/dpma/depatisnet.py @@ -9,7 +9,7 @@ import regex as re import http.cookiejar from bs4 import BeautifulSoup -from xlrd import open_workbook +from xlrd3 import open_workbook from patzilla.access.generic.search import GenericSearchResponse from patzilla.util.date import from_german, date_iso from patzilla.util.network.browser import regular_user_agent @@ -197,7 +197,7 @@ def search_patents(self, query, options=None): results = self.read_xls_response(xls_response) except Exception as ex: logger.error('Problem downloading results in XLS format: {}'.format(ex)) - ex.http_response = ex.read() + #ex.http_response = ex.read() raise # debugging diff --git a/setup.py b/setup.py index 3e509c63..00d5b2e1 100644 --- a/setup.py +++ b/setup.py @@ -104,7 +104,7 @@ # Data formatting 'openpyxl>=2.4.2,<3', - 'xlrd==0.9.3', # 0.9.4, 1.2.0, 2.0.1 + 'xlrd3', 'XlsxWriter==0.9.3', # 1.4.5, 2.0.0, 3.0.3 # Data conversion From 9cbddfc37f59160b9e87afbed66196741363d2d3 Mon Sep 17 00:00:00 2001 From: Papoteur Date: Thu, 23 Mar 2023 12:22:58 +0100 Subject: [PATCH 06/23] fix usage of iterator not working on element --- patzilla/util/cql/pyparsing/parser.py | 15 +++++++-------- patzilla/util/cql/pyparsing/util.py | 8 -------- 2 files changed, 7 insertions(+), 16 deletions(-) diff --git a/patzilla/util/cql/pyparsing/parser.py b/patzilla/util/cql/pyparsing/parser.py index a0d7955d..5ab35363 100644 --- a/patzilla/util/cql/pyparsing/parser.py +++ b/patzilla/util/cql/pyparsing/parser.py @@ -21,11 +21,10 @@ Keyword, CaselessKeyword, \ Regex, \ alphas, nums, alphanums, quotedString, \ - oneOf, upcaseTokens, delimitedList, restOfLine, \ + oneOf, common, delimitedList, restOfLine, \ Forward, Group, Combine, Optional, ZeroOrMore, OneOrMore, \ NotAny, Suppress, FollowedBy, StringEnd, \ ParseResults, ParseException, removeQuotes -from patzilla.util.cql.pyparsing.util import get_literals log = logging.getLogger(__name__) @@ -98,10 +97,11 @@ def preconfigure(self): # Boolean operators # TODO: Configure german operators with DPMAGrammar only - self.and_ = CaselessKeyword("and") | CaselessKeyword("UND") - self.or_ = CaselessKeyword("or") | CaselessKeyword("ODER") - self.not_ = CaselessKeyword("not") | CaselessKeyword("NICHT") - self.prox_ = CaselessKeyword("prox") | CaselessKeyword("NAHE") + self.booleans = ("and", "UND", "or", "ODER", "not", "NICHT", "prox", "NAHE") + self.and_ = CaselessKeyword(self.booleans[0]) | CaselessKeyword(self.booleans[1]) + self.or_ = CaselessKeyword(self.booleans[2]) | CaselessKeyword(self.booleans[3]) + self.not_ = CaselessKeyword(self.booleans[4]) | CaselessKeyword(self.booleans[5]) + self.prox_ = CaselessKeyword(self.booleans[6]) | CaselessKeyword(self.booleans[7]) # Neighbourhood term operators self.neighbourhood_symbols = '(W) (NOTW) (#W) (A) (#A) (P) (L)'.split() @@ -112,7 +112,6 @@ def configure(self): self.binop_symbols = self.cmp_single + self.cmp_perl + self.cmp_cql # Boolean operators - self.booleans = get_literals(self.and_, self.or_, self.not_, self.prox_) self.booleans_or = ( self.and_ | self.or_ | self.not_ | self.prox_ ) # Neighbourhood term operators @@ -134,7 +133,7 @@ def build(self): # ------------------------------------------ # C. building blocks # ------------------------------------------ - self.termop = Regex( "|".join(self.neighbourhood_symbols), re.IGNORECASE ).setParseAction( upcaseTokens ).setName("termop") + self.termop = Regex( "|".join(self.neighbourhood_symbols), re.IGNORECASE ).setParseAction( common.upcase_tokens ).setName("termop") termword = Word(self.unicode_printables + self.separators + self.wildcards).setName("term") termword_termop = (termword + OneOrMore( self.termop + termword )) diff --git a/patzilla/util/cql/pyparsing/util.py b/patzilla/util/cql/pyparsing/util.py index e36785f6..199a804c 100644 --- a/patzilla/util/cql/pyparsing/util.py +++ b/patzilla/util/cql/pyparsing/util.py @@ -2,14 +2,6 @@ # (c) 2014-2016 Andreas Motl, Elmyra UG from pyparsing import ParseResults -def get_literals(*elements): - literals = [] - for element in elements: - for literal in element: - literal = str(literal).strip('"').strip("'") - literals.append(literal) - return literals - def walk_token_results(tokens, *args, **kwargs): for token in tokens: From 93086803e789407b96094a5ee309ba0dd5807051 Mon Sep 17 00:00:00 2001 From: Papoteur Date: Thu, 23 Mar 2023 13:20:32 +0100 Subject: [PATCH 07/23] Sync beaker_mongodb.py with its source --- patzilla/util/database/beaker_mongodb.py | 339 ++++++++++------------- 1 file changed, 149 insertions(+), 190 deletions(-) diff --git a/patzilla/util/database/beaker_mongodb.py b/patzilla/util/database/beaker_mongodb.py index 26da6a49..21938c99 100644 --- a/patzilla/util/database/beaker_mongodb.py +++ b/patzilla/util/database/beaker_mongodb.py @@ -184,235 +184,194 @@ before upgrading to 0.5+ and be aware that it will generate new caches. - +A part of this code is a copy of https://raw.githubusercontent.com/bbangert/beaker/master/beaker/ext/mongodb.py 2023-03-22 """ -import logging -from beaker.container import NamespaceManager, Container -from beaker.exceptions import InvalidCacheBackendError, MissingCacheParameter -from beaker.synchronization import null_synchronizer -from beaker.util import verify_directory, SyncDict - -from io import StringIO -try: - import pickle as pickle -except ImportError: - import pickle +import datetime +import os +import threading +import time +import pickle try: - from pymongo.connection import Connection + import pymongo + import pymongo.errors import bson - import bson.errors except ImportError: - raise InvalidCacheBackendError("Unable to load the pymongo driver.") - -log = logging.getLogger(__name__) -#log.setLevel(logging.DEBUG) - -class MongoDBNamespaceManager(NamespaceManager): - clients = SyncDict() - _pickle = True - _sparse = False + pymongo = None + bson = None - # TODO _- support write concern / safe - def __init__(self, namespace, url=None, data_dir=None, skip_pickle=False, - sparse_collection=False, **params): - NamespaceManager.__init__(self, namespace) +from beaker.container import NamespaceManager +from beaker.synchronization import SynchronizerImpl +from beaker.util import SyncDict, machine_identifier +from beaker.crypto.util import sha1 +from beaker._compat import string_type, PY2 - if not url: - raise MissingCacheParameter("MongoDB url is required") - if skip_pickle: - log.info("Disabling pickling for namespace: %s" % self.namespace) - self._pickle = False +class MongoNamespaceManager(NamespaceManager): + """Provides the :class:`.NamespaceManager` API over MongoDB. - if sparse_collection: - log.info("Separating data to one row per key (sparse collection) for ns %s ." % self.namespace) - self._sparse = True + Provided ``url`` can be both a mongodb connection string or + an already existing MongoClient instance. - # Temporarily uses a local copy of the functions until pymongo upgrades to new parser code - (host_list, database, username, password, collection, options) = _parse_uri(url) - - if database and host_list: - data_key = "mongodb:%s" % (database) - else: - raise MissingCacheParameter("Invalid Cache URL. Cannot parse.") - - def _create_mongo_conn(): - host_uri = 'mongodb://' - for x in host_list: - host_uri += '%s:%s' % x - log.info("Host URI: %s" % host_uri) - conn = Connection(host_uri, slave_okay=options.get('slaveok', False)) + The data will be stored into ``beaker_cache`` collection of the + *default database*, so make sure your connection string or + MongoClient point to a default database. + """ + MAX_KEY_LENGTH = 1024 - db = conn[database] + clients = SyncDict() - if username: - log.info("Attempting to authenticate %s/%s " % (username, password)) - if not db.authenticate(username, password): - raise InvalidCacheBackendError('Cannot authenticate to ' - ' MongoDB.') - return db[collection] + def __init__(self, namespace, url, **kw): + super(MongoNamespaceManager, self).__init__(namespace) + self.lock_dir = None # MongoDB uses mongo itself for locking. - self.mongo = MongoDBNamespaceManager.clients.get(data_key, - _create_mongo_conn) + if pymongo is None: + raise RuntimeError('pymongo3 is not available') - def get_creation_lock(self, key): - """@TODO - stop hitting filesystem for this... - I think mongo can properly avoid dog piling for us. - """ - return null_synchronizer() - - def do_remove(self): - """Clears the entire filesystem (drops the collection)""" - log.debug("[MongoDB] Remove namespace: %s" % self.namespace) - q = {} - if self._sparse: - q = {'_id.namespace': self.namespace} + if isinstance(url, string_type): + self.client = MongoNamespaceManager.clients.get(url, pymongo.MongoClient, url) else: - q = {'_id': self.namespace} - - log.debug("[MongoDB] Remove Query: %s" % q) - self.mongo.remove(q) + self.client = url + self.db = self.client.get_default_database() + + def _format_key(self, key): + if not isinstance(key, str): + key = key.decode('ascii') + if len(key) > (self.MAX_KEY_LENGTH - len(self.namespace) - 1): + if not PY2: + key = key.encode('utf-8') + key = sha1(key).hexdigest() + return '%s:%s' % (self.namespace, key) + def get_creation_lock(self, key): + return MongoSynchronizer(self._format_key(key), self.client) def __getitem__(self, key): - log.debug("[MongoDB %s] Get Key: %s" % (self.mongo, - key)) - - _id = {} - fields = {} - if self._sparse: - _id = { - 'namespace': self.namespace, - 'key': key - } - fields['data'] = True - else: - _id = self.namespace - fields['data.' + key] = True - - log.debug("[MongoDB] Get Query: id == %s Fields: %s", _id, fields) - result = self.mongo.find_one({'_id': _id}, fields=fields) - log.debug("[MongoDB] Get Result: %s", result) - - if result: - """Running into instances in which mongo is returning - -1, which causes an error as __len__ should return 0 - or positive integers, hence the check of size explicit""" - log.debug("Result: %s", result) - data = result.get('data', None) - log.debug("Data: %s", data) - if self._sparse: - value = data - else: - value = data.get(key, None) - - if not value: - return None - - if self._pickle or key == 'session': - value = _depickle(value) - else: - if value['pickled']: - value = (value['stored'], value['expires'], _depickle(value['value'])) - else: - value = (value['stored'], value['expires'], value['value']) - - log.debug("[key: %s] Value: %s" % (key, value)) - - return value - else: - return None - + self._clear_expired() + entry = self.db.backer_cache.find_one({'_id': self._format_key(key)}) + if entry is None: + raise KeyError(key) + return pickle.loads(entry['value']) def __contains__(self, key): - def _has(): - result = self.__getitem__(key) - if result: - log.debug("[MongoDB] %s == %s" % (key, result)) - return result is not None - else: - return False - - log.debug("[MongoDB] Has '%s'? " % key) - ret = _has() - - - return ret + self._clear_expired() + entry = self.db.backer_cache.find_one({'_id': self._format_key(key)}) + return entry is not None def has_key(self, key): return key in self def set_value(self, key, value, expiretime=None): - log.debug("[MongoDB %s] Set Key: %s (Expiry: %s) ... " % - (self.mongo, key, expiretime)) + self._clear_expired() - _id = {} - doc = {} + expiration = None + if expiretime is not None: + expiration = time.time() + expiretime - if self._pickle or key == 'session': - try: - value = pickle.dumps(value) - except: - log.exception("Failed to pickle value.") - else: - value = { - 'stored': value[0], - 'expires': value[1], - 'value': value[2], - 'pickled': False - } - try: - bson.BSON.encode(value) - except: - log.warning("Value is not bson serializable, pickling inner value.") - value['value'] = pickle.dumps(value['value']) - value['pickled'] = True + value = pickle.dumps(value) + self.db.backer_cache.update_one({'_id': self._format_key(key)}, + {'$set': {'value': bson.Binary(value), + 'expiration': expiration}}, + upsert=True) + + def __setitem__(self, key, value): + self.set_value(key, value) + + def __delitem__(self, key): + self._clear_expired() + self.db.backer_cache.delete_many({'_id': self._format_key(key)}) + def do_remove(self): + self.db.backer_cache.delete_many({'_id': {'$regex': '^%s' % self.namespace}}) + def keys(self): + return [e['key'].split(':', 1)[-1] for e in self.db.backer_cache.find_all( + {'_id': {'$regex': '^%s' % self.namespace}} + )] - if self._sparse: - _id = { - 'namespace': self.namespace, - 'key': key - } + def _clear_expired(self): + now = time.time() + self.db.backer_cache.delete_many({'_id': {'$regex': '^%s' % self.namespace}, + 'expiration': {'$ne': None, '$lte': now}}) - doc['data'] = bson.Binary(value) - doc['_id'] = _id - if expiretime: - # TODO - What is the datatype of this? it should be instantiated as a datetime instance - doc['valid_until'] = expiretime - else: - _id = self.namespace - doc['$set'] = {'data.' + key: bson.Binary(value)} - if expiretime: - # TODO - What is the datatype of this? it should be instantiated as a datetime instance - doc['$set']['valid_until'] = expiretime - log.debug("Upserting Doc '%s' to _id '%s'" % (doc, _id)) - self.mongo.update({"_id": _id}, doc, upsert=True, safe=True) +class MongoSynchronizer(SynchronizerImpl): + """Provides a Writer/Reader lock based on MongoDB. - def __setitem__(self, key, value): - self.set_value(key, value) + Provided ``url`` can be both a mongodb connection string or + an already existing MongoClient instance. - def __delitem__(self, key): - """Delete JUST the key, by setting it to None.""" - if self._sparse: - self.mongo.remove({'_id.namespace': self.namespace}) - else: - self.mongo.update({'_id': self.namespace}, - {'$unset': {'data.' + key: True}}, upsert=False) + The data will be stored into ``beaker_locks`` collection of the + *default database*, so make sure your connection string or + MongoClient point to a default database. - def keys(self): - if self._sparse: - return [row['_id']['field'] for row in self.mongo.find({'_id.namespace': self.namespace}, {'_id': True})] + Locks are identified by local machine, PID and threadid, so + are suitable for use in both local and distributed environments. + """ + # If a cache entry generation function can take a lot, + # but 15 minutes is more than a reasonable time. + LOCK_EXPIRATION = 900 + MACHINE_ID = machine_identifier() + + def __init__(self, identifier, url): + super(MongoSynchronizer, self).__init__() + self.identifier = identifier + if isinstance(url, string_type): + self.client = MongoNamespaceManager.clients.get(url, pymongo.MongoClient, url) else: - return self.mongo.find_one({'_id': self.namespace}, {'data': True}).get('data', {}) + self.client = url + self.db = self.client.get_default_database() + + def _clear_expired_locks(self): + now = datetime.datetime.utcnow() + expired = now - datetime.timedelta(seconds=self.LOCK_EXPIRATION) + self.db.beaker_locks.delete_many({'_id': self.identifier, 'timestamp': {'$lte': expired}}) + return now + + def _get_owner_id(self): + return '%s-%s-%s' % (self.MACHINE_ID, os.getpid(), threading.current_thread().ident) + + def do_release_read_lock(self): + owner_id = self._get_owner_id() + self.db.beaker_locks.update_one({'_id': self.identifier, 'readers': owner_id}, + {'$pull': {'readers': owner_id}}) + + def do_acquire_read_lock(self, wait): + now = self._clear_expired_locks() + owner_id = self._get_owner_id() + while True: + try: + self.db.beaker_locks.update_one({'_id': self.identifier, 'owner': None}, + {'$set': {'timestamp': now}, + '$push': {'readers': owner_id}}, + upsert=True) + return True + except pymongo.errors.DuplicateKeyError: + if not wait: + return False + time.sleep(0.2) + + def do_release_write_lock(self): + self.db.beaker_locks.delete_one({'_id': self.identifier, 'owner': self._get_owner_id()}) + + def do_acquire_write_lock(self, wait): + now = self._clear_expired_locks() + owner_id = self._get_owner_id() + while True: + try: + self.db.beaker_locks.update_one({'_id': self.identifier, 'owner': None, + 'readers': []}, + {'$set': {'owner': owner_id, + 'timestamp': now}}, + upsert=True) + return True + except pymongo.errors.DuplicateKeyError: + if not wait: + return False + time.sleep(0.2) -class MongoDBContainer(Container): - namespace_class = MongoDBNamespaceManager def _partition(source, sub): """Our own string partitioning method. From fa9d7b37d421ba1de86cb46aa5bd690a93e80b4f Mon Sep 17 00:00:00 2001 From: Papoteur Date: Thu, 23 Mar 2023 13:25:19 +0100 Subject: [PATCH 08/23] fix setup.py --- setup.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/setup.py b/setup.py index 00d5b2e1..d9481303 100644 --- a/setup.py +++ b/setup.py @@ -217,12 +217,8 @@ }, dependency_links=[ 'https://github.com/ip-tools/mongodb_gridfs_beaker/archive/0.6.0dev1.tar.gz#egg=mongodb_gridfs_beaker-0.6.0dev1', -<<<<<<< HEAD - 'https://github.com/ip-tools/mechanize/archive/v0.4.3dev1.tar.gz#egg=mechanize-0.4.3dev1', -======= 'https://github.com/ip-tools/mechanize/archive/v0.4.3dev2.tar.gz#egg=mechanize-0.4.3dev2', #'https://github.com/dagwieers/unoconv/archive/master.tar.gz#egg=unoconv-0.8.2', ->>>>>>> No need to manually encode form fields with "mechanize" anymore ], entry_points={ From c964d9d18e1a72f2534b60085bbe15354f2d3115 Mon Sep 17 00:00:00 2001 From: Papoteur Date: Fri, 24 Mar 2023 09:26:38 +0100 Subject: [PATCH 09/23] More Python 3 fixes --- patzilla/access/cipo/drawing.py | 4 +--- patzilla/access/depatech/clientpool.py | 4 +--- patzilla/access/dpma/depatisnet.py | 2 +- patzilla/access/epo/ops/api.py | 2 +- patzilla/access/epo/ops/client.py | 6 ++---- patzilla/access/ificlaims/clientpool.py | 4 ++-- patzilla/access/sip/clientpool.py | 5 ++--- patzilla/navigator/settings.py | 18 +++++++++--------- patzilla/util/crypto/jwt.py | 3 ++- patzilla/util/image/convert.py | 2 +- patzilla/util/web/identity/store.py | 1 - setup.py | 10 +++++----- 12 files changed, 27 insertions(+), 34 deletions(-) diff --git a/patzilla/access/cipo/drawing.py b/patzilla/access/cipo/drawing.py index 4adfaff5..3eb22f75 100644 --- a/patzilla/access/cipo/drawing.py +++ b/patzilla/access/cipo/drawing.py @@ -1,10 +1,8 @@ # -*- coding: utf-8 -*- # (c) 2014-2016 Andreas Motl, Elmyra UG -# py27 import re -import regex as re +import re import logging import requests -# py27 from BeautifulSoup import BeautifulSoup from bs4 import BeautifulSoup from patzilla.util.numbers.common import split_patent_number diff --git a/patzilla/access/depatech/clientpool.py b/patzilla/access/depatech/clientpool.py index b47bb979..989247e0 100644 --- a/patzilla/access/depatech/clientpool.py +++ b/patzilla/access/depatech/clientpool.py @@ -3,7 +3,6 @@ import logging import os from pyramid.httpexceptions import HTTPUnauthorized -from zope.interface.declarations import implements from zope.interface.interface import Interface from zope.interface import implementer from patzilla.access.depatech.client import DepaTechClient @@ -79,13 +78,12 @@ class IDepaTechClientPool(Interface): pass +@implementer(IDepaTechClientPool) class DepaTechClientPool(object): """ depa.tech client pool as Pyramid utility implementation. """ -# py27 implements(IDepaTechClientPool) - def __init__(self, api_uri): logger.info("Creating upstream client pool for depa.tech") self.api_uri = api_uri diff --git a/patzilla/access/dpma/depatisnet.py b/patzilla/access/dpma/depatisnet.py index 374e5c31..9bba9a33 100644 --- a/patzilla/access/dpma/depatisnet.py +++ b/patzilla/access/dpma/depatisnet.py @@ -6,7 +6,7 @@ import logging import urllib.request, urllib.error, urllib.parse import mechanize -import regex as re +import re import http.cookiejar from bs4 import BeautifulSoup from xlrd3 import open_workbook diff --git a/patzilla/access/epo/ops/api.py b/patzilla/access/epo/ops/api.py index 4056fcbe..cb3ef0fb 100644 --- a/patzilla/access/epo/ops/api.py +++ b/patzilla/access/epo/ops/api.py @@ -885,7 +885,7 @@ def handle_error(response, location): # Compute name name = 'http-response' - body = response_dict['content'] + body = str(response_dict['content'],'UTF-8') if 'CLIENT.CQL' in body: name = 'expression' diff --git a/patzilla/access/epo/ops/client.py b/patzilla/access/epo/ops/client.py index f0826655..3f60f7ee 100644 --- a/patzilla/access/epo/ops/client.py +++ b/patzilla/access/epo/ops/client.py @@ -7,7 +7,7 @@ from mock import mock from pyramid.httpexceptions import HTTPUnauthorized from pyramid.threadlocal import get_current_registry -from zope.interface.declarations import implements +from zope.interface import implementer from zope.interface.interface import Interface from zope.interface.interfaces import ComponentLookupError @@ -72,14 +72,12 @@ def attach_ops_client(event): class IOpsClientPool(Interface): pass - +@implementer(IOpsClientPool) class OpsClientPool(object): """ EPO/OPS client pool as Pyramid utility implementation. """ -# py27 implements(IOpsClientPool) - def __init__(self): logger.info("Creating upstream client pool for EPO/OPS") self.clients = {} diff --git a/patzilla/access/ificlaims/clientpool.py b/patzilla/access/ificlaims/clientpool.py index 66b0e4b9..2a5fb2b2 100644 --- a/patzilla/access/ificlaims/clientpool.py +++ b/patzilla/access/ificlaims/clientpool.py @@ -4,7 +4,7 @@ import os from pyramid.httpexceptions import HTTPUnauthorized -from zope.interface.declarations import implements +from zope.interface import implementer from zope.interface.interface import Interface from patzilla.access.generic.credentials import AbstractCredentialsGetter, DatasourceCredentialsManager @@ -81,12 +81,12 @@ class IIFIClaimsClientPool(Interface): pass +@implementer(IIFIClaimsClientPool) class IFIClaimsClientPool(object): """ IFI CLAIMS client pool as Pyramid utility implementation. """ -# py27 implements(IIFIClaimsClientPool) def __init__(self, api_uri, api_uri_json): logger.info("Creating upstream client pool for IFI CLAIMS") diff --git a/patzilla/access/sip/clientpool.py b/patzilla/access/sip/clientpool.py index 78ef162e..42a2db22 100644 --- a/patzilla/access/sip/clientpool.py +++ b/patzilla/access/sip/clientpool.py @@ -4,7 +4,7 @@ import os from pyramid.httpexceptions import HTTPUnauthorized -from zope.interface.declarations import implements +from zope.interface import implementer from zope.interface.interface import Interface from zope.interface import implementer @@ -81,13 +81,12 @@ class ISipClientPool(Interface): pass +@implementer(ISipClientPool) class SipClientPool(object): """ SIP client pool as Pyramid utility implementation. """ -# py27 implements(ISipClientPool) - def __init__(self, api_uri): logger.info("Creating upstream client pool for SIP") self.api_uri = api_uri diff --git a/patzilla/navigator/settings.py b/patzilla/navigator/settings.py index 1c8ecd23..1205e220 100644 --- a/patzilla/navigator/settings.py +++ b/patzilla/navigator/settings.py @@ -4,7 +4,6 @@ import logging from copy import deepcopy from email.utils import parseaddr -from munch import Munch, munchify from pyramid.exceptions import ConfigurationError from pyramid.threadlocal import get_current_request, get_current_registry @@ -14,6 +13,7 @@ from patzilla.util.config import read_list, asbool, get_configuration from patzilla.util.date import datetime_isoformat, unixtime_to_datetime from patzilla.util.python import _exception_traceback +from patzilla.util.data.container import SmartBunch log = logging.getLogger(__name__) @@ -68,8 +68,8 @@ def get_datasource_settings(self, vendor=None): # Container for datasource settings. datasource_settings = SmartBunch({ 'datasources': [], - 'datasource': Munch(), - 'total': munchify({'fulltext_countries': [], 'details_countries': []}), + 'datasource': SmartBunch(), + 'total': SmartBunch.bunchify({'fulltext_countries': [], 'details_countries': []}), }) # Read datasource settings from configuration. @@ -88,7 +88,7 @@ def get_datasource_settings(self, vendor=None): datasource_info.setdefault('fulltext_countries', read_list(ds_settings.get('fulltext_countries', ''))) datasource_info.setdefault('details_enabled', asbool(ds_settings.get('details_enabled', False))) datasource_info.setdefault('details_countries', read_list(ds_settings.get('details_countries', ''))) - for key, value in ds_settings.iteritems(): + for key, value in ds_settings.items(): datasource_info.setdefault(key, value) datasource_settings.datasource[datasource] = SmartBunch.bunchify(datasource_info) @@ -101,9 +101,9 @@ def get_datasource_settings(self, vendor=None): def get_vendor_settings(self): # Container for vendor settings - vendor_settings = Munch({ + vendor_settings = SmartBunch({ 'vendors': [], - 'vendor': Munch(), + 'vendor': SmartBunch(), }) # Read vendor settings from configuration @@ -146,9 +146,9 @@ def get_email_settings(self, vendor): """ # Container for email settings - email_settings = Munch({ + email_settings = SmartBunch({ 'addressbook': [], - 'content': Munch(), + 'content': SmartBunch(), }) for setting_name in ['addressbook', 'content']: @@ -304,7 +304,7 @@ def datasource_settings(self): Return datasource settings while accounting for sensible settings like API URI and credentials. """ request = get_current_request() - datasource_settings = munchify(request.registry.datasource_settings) + datasource_settings = SmartBunch.bunchify(request.registry.datasource_settings) if 'protected_fields' in datasource_settings: for fieldname in datasource_settings.protected_fields: for name, settings in datasource_settings.datasource.items(): diff --git a/patzilla/util/crypto/jwt.py b/patzilla/util/crypto/jwt.py index aeae850d..194728f4 100644 --- a/patzilla/util/crypto/jwt.py +++ b/patzilla/util/crypto/jwt.py @@ -9,7 +9,7 @@ from jwcrypto import jwk from zope.interface.interface import Interface #from zope.interface.declarations import implements -#from zope.interface import implementer +from zope.interface import implementer log = logging.getLogger(__name__) @@ -18,6 +18,7 @@ class ISigner(Interface): pass +@implementer(ISigner) class JwtSigner(object): """ Generate and verify JSON Web Tokens. diff --git a/patzilla/util/image/convert.py b/patzilla/util/image/convert.py index 2bdf92e2..2c6ccbee 100644 --- a/patzilla/util/image/convert.py +++ b/patzilla/util/image/convert.py @@ -3,7 +3,7 @@ import os import shutil import tempfile -from pathlib2 import Path +from pathlib import Path import requests import where diff --git a/patzilla/util/web/identity/store.py b/patzilla/util/web/identity/store.py index 1f8df810..982538e0 100644 --- a/patzilla/util/web/identity/store.py +++ b/patzilla/util/web/identity/store.py @@ -12,7 +12,6 @@ from mongoengine.fields import StringField, ListField, DateTimeField, DictField from mongoengine.errors import NotUniqueError from pyramid.threadlocal import get_current_request -from zope.interface.declarations import implements from zope.interface.interface import Interface from zope.interface import implementer diff --git a/setup.py b/setup.py index d9481303..5ee87e89 100644 --- a/setup.py +++ b/setup.py @@ -41,9 +41,9 @@ # Database and storage # Can't upgrade to pymongo-3.5.1 due to "from pymongo.connection import Connection" # usage in "mongodb_gridfs_beaker" module. - 'pymongo<3', # 3.13.0, 4.3.3 + 'pymongo', # 3.13.0, 4.3.3 'mongodb_gridfs_beaker==0.6.0dev1', - 'mongoengine==0.13.0', # 0.24.1 + 'mongoengine', # 0.24.1 'python-magic<1', # Web services @@ -92,8 +92,8 @@ # Data handling 'attrs', - 'Bunch==1.0.1', # Maybe switch to "Munch" - 'pyparsing==2.0.2', # 2.2.2, 2.3.1, 2.4.7, 3.0.8 + 'Bunch', # Maybe switch to "Munch" + 'pyparsing', 'python-dateutil<3', 'ago==0.0.9', # 0.0.93 'arrow==0.10.0', # 0.12.1 @@ -216,7 +216,7 @@ 'test': test_requires, }, dependency_links=[ - 'https://github.com/ip-tools/mongodb_gridfs_beaker/archive/0.6.0dev1.tar.gz#egg=mongodb_gridfs_beaker-0.6.0dev1', + 'https://github.com/ip-tools/mongodb_gridfs_beaker/archive/0.6.0dev1.tar.gz#egg=mongodb_gridfs_beaker', 'https://github.com/ip-tools/mechanize/archive/v0.4.3dev2.tar.gz#egg=mechanize-0.4.3dev2', #'https://github.com/dagwieers/unoconv/archive/master.tar.gz#egg=unoconv-0.8.2', ], From 8664e51e3feeb9ad50581e5991151c39d66aae8d Mon Sep 17 00:00:00 2001 From: Papoteur Date: Fri, 24 Mar 2023 11:40:05 +0100 Subject: [PATCH 10/23] Replace Bunch with Munch Fix PDF export --- patzilla/access/depatech/client.py | 12 +++++----- patzilla/access/dpma/depatisnet.py | 6 ++--- patzilla/access/dpma/dpmaregister.py | 4 ++-- patzilla/access/generic/search.py | 14 ++++++------ patzilla/access/ificlaims/client.py | 14 ++++++------ patzilla/access/ificlaims/commands.py | 10 ++++----- patzilla/access/sip/client.py | 10 ++++----- patzilla/access/sip/pyramid_service.py | 4 ++-- patzilla/navigator/export.py | 15 +++++++------ patzilla/navigator/services/depatech.py | 10 ++++----- patzilla/navigator/services/ificlaims.py | 10 ++++----- patzilla/navigator/services/util.py | 10 ++++----- patzilla/navigator/settings.py | 28 ++++++++++++------------ patzilla/util/data/container.py | 14 ++++++------ patzilla/util/numbers/common.py | 4 ++-- patzilla/util/web/email/submit.py | 4 ++-- setup.py | 4 ++-- 17 files changed, 87 insertions(+), 86 deletions(-) diff --git a/patzilla/access/depatech/client.py b/patzilla/access/depatech/client.py index 9253f835..011edff6 100644 --- a/patzilla/access/depatech/client.py +++ b/patzilla/access/depatech/client.py @@ -13,7 +13,7 @@ from patzilla.access.depatech import get_depatech_client from patzilla.access.generic.exceptions import NoResultsException, GenericAdapterException, SearchException from patzilla.access.generic.search import GenericSearchResponse, GenericSearchClient -from patzilla.util.data.container import SmartBunch +from patzilla.util.data.container import SmartMunch from patzilla.util.numbers.normalize import normalize_patent log = logging.getLogger(__name__) @@ -55,7 +55,7 @@ def search(self, query, options=None): return self.search_real(query, options=options) def search_real(self, query, options=None): - options = options or SmartBunch() + options = options or SmartMunch() options.setdefault('offset', 0) options.setdefault('limit', self.pagesize) @@ -298,8 +298,8 @@ def read(self): 'name': 'depatech', 'time': self.input['took'], 'status': 'success', - #'params': SmartBunch.bunchify(self.input['content']['responseHeader']['params']), - #'pager': SmartBunch.bunchify(self.input['content']['responseHeader'].get('pager', {})), + #'params': SmartMunch.munchify(self.input['content']['responseHeader']['params']), + #'pager': SmartMunch.munchify(self.input['content']['responseHeader'].get('pager', {})), }) self.meta.navigator.count_total = int(self.input['hits']['total']) @@ -307,7 +307,7 @@ def read(self): self.meta.navigator.offset = int(self.options.offset) self.meta.navigator.limit = int(self.options.limit) self.meta.navigator.max_hits = int(self.options.max_hits) - self.meta.navigator.postprocess = SmartBunch() + self.meta.navigator.postprocess = SmartMunch() # Read content self.documents = self.input['hits']['hits'] @@ -326,7 +326,7 @@ def document_to_family_id(self, document): def depatech_search(query, options=None): - options = options or SmartBunch() + options = options or SmartMunch() client = get_depatech_client() try: diff --git a/patzilla/access/dpma/depatisnet.py b/patzilla/access/dpma/depatisnet.py index 9bba9a33..4b0f42cc 100644 --- a/patzilla/access/dpma/depatisnet.py +++ b/patzilla/access/dpma/depatisnet.py @@ -309,8 +309,8 @@ def read(self): # TODO: Reference from IFI CLAIMS, fill up/unify. #'time': self.input['time'], #'status': self.input['status'], - #'params': SmartBunch.bunchify(self.input['content']['responseHeader']['params']), - #'pager': SmartBunch.bunchify(self.input['content']['responseHeader'].get('pager', {})), + #'params': SmartMunch.munchify(self.input['content']['responseHeader']['params']), + #'pager': SmartMunch.munchify(self.input['content']['responseHeader'].get('pager', {})), }) self.meta.navigator.count_total = int(self.input['hits']) @@ -319,7 +319,7 @@ def read(self): # TODO: Fill up? #self.meta.navigator.offset = int(self.meta.upstream.Offset) #self.meta.navigator.limit = int(self.meta.upstream.Limit) - #self.meta.navigator.postprocess = SmartBunch() + #self.meta.navigator.postprocess = SmartMunch() # Propagate user message diff --git a/patzilla/access/dpma/dpmaregister.py b/patzilla/access/dpma/dpmaregister.py index 2acb850a..c9381a37 100644 --- a/patzilla/access/dpma/dpmaregister.py +++ b/patzilla/access/dpma/dpmaregister.py @@ -11,7 +11,7 @@ import operator import mechanicalsoup from beaker.cache import cache_region -from bunch import bunchify +from munch import munchify from docopt import docopt from pprint import pformat from jsonpointer import JsonPointer, JsonPointerException @@ -567,7 +567,7 @@ def decode(self): # Citations self.references_cited = list(map( operator.attrgetter('document_id.doc_number'), - bunchify(self.convert_list(self.query_data(self.pointer_references_cited))))) + munchify(self.convert_list(self.query_data(self.pointer_references_cited))))) # office-specific-bib-data self.office_specific_bibdata = self.convert_dict(self.query_data(self.pointer_office_specific_bibdata)) diff --git a/patzilla/access/generic/search.py b/patzilla/access/generic/search.py index 57065e95..1c819e45 100644 --- a/patzilla/access/generic/search.py +++ b/patzilla/access/generic/search.py @@ -4,7 +4,7 @@ import logging from pprint import pprint from collections import defaultdict -from patzilla.util.data.container import SmartBunch +from patzilla.util.data.container import SmartMunch from patzilla.util.numbers.normalize import normalize_patent from patzilla.access.generic.exceptions import SearchException @@ -50,7 +50,7 @@ def crawl(self, constituents, expression, chunksize): # fetch first chunk (1-chunksize) from upstream #first_chunk = self.search(expression, 0, chunksize) - first_chunk = self.search_method(expression, SmartBunch({'offset': 0, 'limit': chunksize})) + first_chunk = self.search_method(expression, SmartMunch({'offset': 0, 'limit': chunksize})) #print first_chunk #total_count = int(first_chunk['meta'].get('pager', {}).get('totalEntries', 0)) @@ -82,7 +82,7 @@ def crawl(self, constituents, expression, chunksize): time.sleep(1) log.info(self.lm('Crawling from offset {offset}'.format(offset=offset))) - chunk = self.search_method(expression, SmartBunch({'offset': offset, 'limit': chunksize})) + chunk = self.search_method(expression, SmartMunch({'offset': offset, 'limit': chunksize})) chunks.append(chunk) @@ -128,7 +128,7 @@ def __init__(self, input, options=None): # Input data and options self.input = input - self.options = options and SmartBunch.bunchify(options) or SmartBunch() + self.options = options and SmartMunch.munchify(options) or SmartMunch() # Setup data structures self.setup() @@ -146,13 +146,13 @@ def setup(self): self.documents = [] # Metadata information, upstream (raw) and downstream (unified) - self.meta = SmartBunch.bunchify({ + self.meta = SmartMunch.munchify({ 'navigator': {}, 'upstream': {}, }) # Output information, upstream (raw) and downstream (unified) - self.output = SmartBunch.bunchify({ + self.output = SmartMunch.munchify({ 'meta': {}, 'numbers': [], 'details': [], @@ -209,7 +209,7 @@ def remove_family_members(self): seen = {} removed = [] removed_map = defaultdict(list) - stats = SmartBunch(removed = 0) + stats = SmartMunch(removed = 0) def family_remover(item): fam = self.document_to_family_id(item) diff --git a/patzilla/access/ificlaims/client.py b/patzilla/access/ificlaims/client.py index 4f1393e6..0febf090 100644 --- a/patzilla/access/ificlaims/client.py +++ b/patzilla/access/ificlaims/client.py @@ -16,7 +16,7 @@ from patzilla.access.generic.exceptions import NoResultsException, GenericAdapterException, SearchException from patzilla.access.generic.search import GenericSearchResponse, GenericSearchClient from patzilla.access.ificlaims import get_ificlaims_client -from patzilla.util.data.container import SmartBunch +from patzilla.util.data.container import SmartMunch from patzilla.util.numbers.normalize import normalize_patent log = logging.getLogger(__name__) @@ -73,7 +73,7 @@ def search_real(self, query, options=None): query.setdefault('filter', '') - options = options or SmartBunch() + options = options or SmartMunch() options.setdefault('offset', 0) options.setdefault('limit', self.pagesize) @@ -522,15 +522,15 @@ def read(self): 'name': 'ifi', 'time': self.input['time'], 'status': self.input['status'], - 'params': SmartBunch.bunchify(self.input['content']['responseHeader']['params']), - 'pager': SmartBunch.bunchify(self.input['content']['responseHeader'].get('pager', {})), + 'params': SmartMunch.munchify(self.input['content']['responseHeader']['params']), + 'pager': SmartMunch.munchify(self.input['content']['responseHeader'].get('pager', {})), }) self.meta.navigator.count_total = int(self.meta.upstream.pager.totalEntries) self.meta.navigator.count_page = int(self.meta.upstream.pager.entriesOnThisPage) self.meta.navigator.offset = int(self.meta.upstream.params.start) self.meta.navigator.limit = int(self.meta.upstream.params.rows) - self.meta.navigator.postprocess = SmartBunch() + self.meta.navigator.postprocess = SmartMunch() # Read content self.documents = self.input['content']['response']['docs'] @@ -550,7 +550,7 @@ def document_to_family_id(self, document): def ificlaims_client(options=None): - options = options or SmartBunch() + options = options or SmartMunch() if 'vendor' in options and options.vendor == 'serviva': client = get_serviva_client() else: @@ -578,7 +578,7 @@ def ificlaims_fetch(resource, format, options=None): @cache_region('search') def ificlaims_search(query, options=None): - options = options or SmartBunch() + options = options or SmartMunch() client = ificlaims_client(options=options) try: diff --git a/patzilla/access/ificlaims/commands.py b/patzilla/access/ificlaims/commands.py index e002033c..1fe7f7fe 100644 --- a/patzilla/access/ificlaims/commands.py +++ b/patzilla/access/ificlaims/commands.py @@ -33,7 +33,7 @@ from patzilla.boot.cache import configure_cache_backend from patzilla.boot.config import BootConfiguration from patzilla.util.config import get_configfile_from_commandline -from patzilla.util.data.container import SmartBunch, jd +from patzilla.util.data.container import SmartMunch, jd from patzilla.boot.framework import pyramid_setup @@ -79,7 +79,7 @@ def search(ctx, expression, request_json): # Invoke API and output result. logger.warning("Only the first 100 hits will be displayed. The CLI currently does not employ paging.") - results = client.search(SmartBunch({'expression': expression}), SmartBunch({'offset': 0, 'limit': 100})) + results = client.search(SmartMunch({'expression': expression}), SmartMunch({'offset': 0, 'limit': 100})) print(jd(results)) @@ -95,11 +95,11 @@ def make_request(client): #results = client.search('pa:siemens OR pa:bosch', 0, 10) #results = client.search('pa:(siemens OR bosch)', 0, 10) #results = client.search('text:"solar energy"', 0, 10) - results = client.search(SmartBunch({'expression': 'text:solar energy'}), SmartBunch({'offset': 0, 'limit': 10})) - #results = client.search(SmartBunch({'expression': '{!complexphrase inOrder=true}"siemen* *haus"'}), SmartBunch({'offset': 0, 'limit': 10})) + results = client.search(SmartMunch({'expression': 'text:solar energy'}), SmartMunch({'offset': 0, 'limit': 10})) + #results = client.search(SmartMunch({'expression': '{!complexphrase inOrder=true}"siemen* *haus"'}), SmartMunch({'offset': 0, 'limit': 10})) #results = client.search(u'text:抑血管生成素的药物用途', 0, 10) #results = client.search(u'text:放射線を照射する放射線源と', 0, 10) - #results = client.search(SmartBunch({'expression': 'pnctry:(de OR ep OR wo OR cn OR jp OR tw) AND pa:"taiwan paiho" AND pd:[20170101 TO 20170731]'}), SmartBunch({'offset': 0, 'limit': 50})) + #results = client.search(SmartMunch({'expression': 'pnctry:(de OR ep OR wo OR cn OR jp OR tw) AND pa:"taiwan paiho" AND pd:[20170101 TO 20170731]'}), SmartMunch({'offset': 0, 'limit': 50})) #results = client.text_fetch('US-20100077592-A1') diff --git a/patzilla/access/sip/client.py b/patzilla/access/sip/client.py index ad8b4a69..11de635d 100644 --- a/patzilla/access/sip/client.py +++ b/patzilla/access/sip/client.py @@ -9,7 +9,7 @@ from patzilla.access.generic.exceptions import NoResultsException, GenericAdapterException from patzilla.access.generic.search import GenericSearchResponse, GenericSearchClient from patzilla.access.sip import get_sip_client -from patzilla.util.data.container import SmartBunch +from patzilla.util.data.container import SmartMunch """ @@ -98,7 +98,7 @@ def logout(self): def search(self, expression, options=None): - options = options or SmartBunch() + options = options or SmartMunch() options.setdefault('offset', 0) options.setdefault('limit', self.pagesize) @@ -329,15 +329,15 @@ def read(self): # TODO: Reference from IFI CLAIMS, fill up/unify. #'time': self.input['time'], #'status': self.input['status'], - #'params': SmartBunch.bunchify(self.input['content']['responseHeader']['params']), - #'pager': SmartBunch.bunchify(self.input['content']['responseHeader'].get('pager', {})), + #'params': SmartMunch.munchify(self.input['content']['responseHeader']['params']), + #'pager': SmartMunch.munchify(self.input['content']['responseHeader'].get('pager', {})), }) self.meta.navigator.count_total = int(self.meta.upstream.MemCount) self.meta.navigator.count_page = len(self.input['results']) self.meta.navigator.offset = int(self.meta.upstream.Offset) self.meta.navigator.limit = int(self.meta.upstream.Limit) - self.meta.navigator.postprocess = SmartBunch() + self.meta.navigator.postprocess = SmartMunch() # Read content """ diff --git a/patzilla/access/sip/pyramid_service.py b/patzilla/access/sip/pyramid_service.py index b2c34a14..225a3928 100644 --- a/patzilla/access/sip/pyramid_service.py +++ b/patzilla/access/sip/pyramid_service.py @@ -12,7 +12,7 @@ from patzilla.access.sip.client import sip_published_data_search, sip_published_data_crawl, SearchException from patzilla.access.sip.client import LoginException from patzilla.util.cql.util import should_be_quoted -from patzilla.util.data.container import SmartBunch +from patzilla.util.data.container import SmartMunch from patzilla.util.python import _exception_traceback log = logging.getLogger(__name__) @@ -58,7 +58,7 @@ def sip_published_data_search_handler(request): # - sorting # - whether to remove family members # - whether to return all family members - options = SmartBunch() + options = SmartMunch() options.update({ 'limit': limit, 'offset': offset_remote, diff --git a/patzilla/navigator/export.py b/patzilla/navigator/export.py index 88909d79..ed9c53f5 100644 --- a/patzilla/navigator/export.py +++ b/patzilla/navigator/export.py @@ -15,7 +15,7 @@ from io import BytesIO from textwrap import dedent from lxml import etree as ET -from bunch import bunchify, Bunch +from munch import munchify, Munch from json.encoder import JSONEncoder from zipfile import ZipFile, ZIP_DEFLATED from collections import OrderedDict @@ -53,7 +53,7 @@ class Dossier(object): """).strip() def __init__(self, data): - self.data = bunchify(data) + self.data = munchify(data) self.prepare_dataframes() self.make_metadata() @@ -189,7 +189,7 @@ def to_zip(self, request=None, options=None): # TODO: Text representations for biblio, register, family # TODO: PDF Extracts - options = options or bunchify({'report': {}, 'media': {}}) + options = options or munchify({'report': {}, 'media': {}}) # Remove entries with empty/undefined document numbers @@ -584,7 +584,7 @@ def write_numberlist_sheets(self): if type(first) in (str,): df = pandas.DataFrame(entries, columns=['PN']) - elif isinstance(first, (dict, Bunch)): + elif isinstance(first, (dict, Munch)): df = pandas.DataFrame(entries, columns=['number', 'score', 'timestamp', 'url']) df.rename(columns={'number': 'document', 'url': 'display'}, inplace=True) @@ -717,10 +717,11 @@ def to_pdf(self, payload=None): #print 'out:', process.std_out #print 'err:', process.std_err log.info('STDERR:\n{}'.format(process.std_err)) + print(f"PDF name: {pdf_path}") if process.status_code == 0: #pdf_name = os.path.join(pdf_path, os.path.basename(xlsx_file.name).replace('.xlsx', '.pdf')) - payload = file(pdf_path, 'r').read() + payload = open(pdf_path, 'rb').read() #shutil.rmtree(pdf_path) os.unlink(pdf_path) return payload @@ -810,8 +811,8 @@ def _vgenerate(self, format_string, args, kwargs, used_args, recursion_depth): obj = self.convert_field(obj, conversion) # expand the format spec, if needed - format_spec = self._vformat(format_spec, args, kwargs, - used_args, recursion_depth-1) + #format_spec = self._vformat(format_spec, args, kwargs, + #used_args, recursion_depth-1) # format the object and append to the result if 'emphasis' in kwargs: diff --git a/patzilla/navigator/services/depatech.py b/patzilla/navigator/services/depatech.py index 19a4e3db..bae75f29 100644 --- a/patzilla/navigator/services/depatech.py +++ b/patzilla/navigator/services/depatech.py @@ -14,7 +14,7 @@ from patzilla.util.expression.keywords import keywords_to_response from patzilla.navigator.services.util import request_to_options from patzilla.access.generic.exceptions import NoResultsException, SearchException -from patzilla.util.data.container import SmartBunch +from patzilla.util.data.container import SmartMunch from patzilla.util.python import _exception_traceback log = logging.getLogger(__name__) @@ -37,7 +37,7 @@ @status_upstream_depatech.get() def status_upstream_depatech_handler(request): client = get_depatech_client() - query = SmartBunch({ + query = SmartMunch({ 'expression': '(PC:DE AND DE:212016000074 AND KI:U1) OR AN:DE212016000074U1 OR NP:DE212016000074U1', }) data = client.search_real(query) @@ -53,7 +53,7 @@ def depatech_published_data_search_handler(request): # Get hold of query expression and filter expression = request.params.get('expression', '') filter = request.params.get('filter', '') - query = SmartBunch({ + query = SmartMunch({ 'syntax': 'lucene', 'expression': expression, 'filter': filter, @@ -84,7 +84,7 @@ def depatech_published_data_search_handler(request): # - limit # - sorting # - whether to remove family members - options = SmartBunch() + options = SmartMunch() options.update({ 'limit': limit, 'offset': offset_remote, @@ -131,7 +131,7 @@ def depatech_published_data_crawl_handler(request): """Crawl published-data at MTC depa.tech""" # Get hold of query expression and filter - query = SmartBunch({ + query = SmartMunch({ 'expression': request.params.get('expression', ''), 'filter': request.params.get('filter', ''), }) diff --git a/patzilla/navigator/services/ificlaims.py b/patzilla/navigator/services/ificlaims.py index f347defe..b7ac45ee 100644 --- a/patzilla/navigator/services/ificlaims.py +++ b/patzilla/navigator/services/ificlaims.py @@ -18,7 +18,7 @@ from patzilla.access.ificlaims.api import ificlaims_download, ificlaims_download_multi from patzilla.access.ificlaims.client import IFIClaimsException, IFIClaimsFormatException, LoginException, ificlaims_search, ificlaims_crawl, ificlaims_client from patzilla.access.ificlaims.expression import should_be_quoted, IFIClaimsParser -from patzilla.util.data.container import SmartBunch +from patzilla.util.data.container import SmartMunch from patzilla.util.data.zip import zip_multi from patzilla.util.python import _exception_traceback @@ -51,7 +51,7 @@ @status_upstream_ificlaims.get() def status_upstream_ificlaims_handler(request): client = ificlaims_client() - query = SmartBunch({ + query = SmartMunch({ 'expression': 'pn:EP0666666', }) data = client.search_real(query) @@ -138,7 +138,7 @@ def ificlaims_published_data_search_handler(request): """Search for published-data at IFI CLAIMS Direct""" # Get hold of query expression and filter - query = SmartBunch({ + query = SmartMunch({ 'expression': request.params.get('expression', ''), 'filter': request.params.get('filter', ''), }) @@ -162,7 +162,7 @@ def ificlaims_published_data_search_handler(request): # - limit # - sorting # - whether to remove family members - options = SmartBunch() + options = SmartMunch() options.update({ 'limit': limit, 'offset': offset_remote, @@ -209,7 +209,7 @@ def ificlaims_published_data_crawl_handler(request): """Crawl published-data at IFI CLAIMS Direct""" # Get hold of query expression and filter - query = SmartBunch({ + query = SmartMunch({ 'expression': request.params.get('expression', ''), 'filter': request.params.get('filter', ''), }) diff --git a/patzilla/navigator/services/util.py b/patzilla/navigator/services/util.py index ccb23017..1221fe1f 100644 --- a/patzilla/navigator/services/util.py +++ b/patzilla/navigator/services/util.py @@ -5,7 +5,7 @@ import logging import mimetypes from pprint import pprint -from bunch import bunchify +from munch import munchify from cornice.service import Service from pyramid.settings import asbool from pyramid.threadlocal import get_current_request @@ -13,7 +13,7 @@ from patzilla.navigator.export import Dossier, DossierXlsx from patzilla.util.config import read_list from patzilla.util.cql.util import pair_to_cql -from patzilla.util.data.container import SmartBunch +from patzilla.util.data.container import SmartMunch from patzilla.util.expression.keywords import keywords_from_boolean_expression from patzilla.util.numbers.numberlists import parse_numberlist, normalize_numbers from patzilla.util.python import exception_traceback @@ -288,7 +288,7 @@ def export_util_handler(request): elif output_kind == 'dossier': log.info('Starting dossier export to format "{format}"'.format(format=output_format)) - data = bunchify(json.loads(request.params.get('json'))) + data = munchify(json.loads(request.params.get('json'))) # Debugging #print 'dossier-data:'; pprint(data.toDict()) @@ -350,7 +350,7 @@ def issue_reporter_handler(request): report_data = request.json report_data.setdefault('application', {}) - report = SmartBunch.bunchify(report_data) + report = SmartMunch.munchify(report_data) # Add user information to issue report user = request.user @@ -361,7 +361,7 @@ def issue_reporter_handler(request): user.upstream_credentials = None # Serialize user object and attach to report - report.application.user = SmartBunch(json.loads(user.to_json())) + report.application.user = SmartMunch(json.loads(user.to_json())) # Send the whole beast to the standard application log log.error('Issue report [{targets}]:\n{report}'.format( diff --git a/patzilla/navigator/settings.py b/patzilla/navigator/settings.py index 1205e220..4f9fc082 100644 --- a/patzilla/navigator/settings.py +++ b/patzilla/navigator/settings.py @@ -13,7 +13,7 @@ from patzilla.util.config import read_list, asbool, get_configuration from patzilla.util.date import datetime_isoformat, unixtime_to_datetime from patzilla.util.python import _exception_traceback -from patzilla.util.data.container import SmartBunch +from patzilla.util.data.container import SmartMunch log = logging.getLogger(__name__) @@ -52,7 +52,7 @@ def get_application_settings(self): # TODO: Optimize: Only read once, not on each request! # FIXME: Maybe do the same what `attach_ops_client` does? # `if '/static' in event.request.url: return`. - settings = get_configuration(self.configfile, kind=SmartBunch) + settings = get_configuration(self.configfile, kind=SmartMunch) # Add some global settings settings['software_version'] = __version__ @@ -66,10 +66,10 @@ def get_application_settings(self): def get_datasource_settings(self, vendor=None): # Container for datasource settings. - datasource_settings = SmartBunch({ + datasource_settings = SmartMunch({ 'datasources': [], - 'datasource': SmartBunch(), - 'total': SmartBunch.bunchify({'fulltext_countries': [], 'details_countries': []}), + 'datasource': SmartMunch(), + 'total': SmartMunch.munchify({'fulltext_countries': [], 'details_countries': []}), }) # Read datasource settings from configuration. @@ -77,7 +77,7 @@ def get_datasource_settings(self, vendor=None): datasource_settings.protected_fields = read_list(self.application_settings.get('ip_navigator', {}).get('datasources_protected_fields')) for datasource in datasource_settings.datasources: - datasource_info = SmartBunch() + datasource_info = SmartMunch() if vendor is None: settings_key = 'datasource:{name}'.format(name=datasource) else: @@ -91,7 +91,7 @@ def get_datasource_settings(self, vendor=None): for key, value in ds_settings.items(): datasource_info.setdefault(key, value) - datasource_settings.datasource[datasource] = SmartBunch.bunchify(datasource_info) + datasource_settings.datasource[datasource] = SmartMunch.munchify(datasource_info) # Aggregate data for all countries. datasource_settings.total.fulltext_countries += datasource_info['fulltext_countries'] @@ -101,9 +101,9 @@ def get_datasource_settings(self, vendor=None): def get_vendor_settings(self): # Container for vendor settings - vendor_settings = SmartBunch({ + vendor_settings = SmartMunch({ 'vendors': [], - 'vendor': SmartBunch(), + 'vendor': SmartMunch(), }) # Read vendor settings from configuration @@ -135,7 +135,7 @@ def get_vendor_settings(self): vendor_info.datasource_settings = self.get_datasource_settings(vendor) # Collect all vendor settings. - vendor_settings.vendor[vendor] = SmartBunch.bunchify(vendor_info) + vendor_settings.vendor[vendor] = SmartMunch.munchify(vendor_info) return vendor_settings @@ -146,9 +146,9 @@ def get_email_settings(self, vendor): """ # Container for email settings - email_settings = SmartBunch({ + email_settings = SmartMunch({ 'addressbook': [], - 'content': SmartBunch(), + 'content': SmartMunch(), }) for setting_name in ['addressbook', 'content']: @@ -227,7 +227,7 @@ def effective_vendor(self): # Skip resolving effective vendor when no vendors are configured at all if self.registry.vendor_settings is None: - return SmartBunch() + return SmartMunch() # Select vendor by matching hostnames vendor_names = self.registry.vendor_settings.vendors @@ -304,7 +304,7 @@ def datasource_settings(self): Return datasource settings while accounting for sensible settings like API URI and credentials. """ request = get_current_request() - datasource_settings = SmartBunch.bunchify(request.registry.datasource_settings) + datasource_settings = SmartMunch.munchify(request.registry.datasource_settings) if 'protected_fields' in datasource_settings: for fieldname in datasource_settings.protected_fields: for name, settings in datasource_settings.datasource.items(): diff --git a/patzilla/util/data/container.py b/patzilla/util/data/container.py index 8fd79b61..22b3b05e 100644 --- a/patzilla/util/data/container.py +++ b/patzilla/util/data/container.py @@ -2,11 +2,11 @@ # (c) 2016 Andreas Motl, Elmyra UG import json import types -from bunch import Bunch +from munch import Munch from jsonpointer import JsonPointer -class SmartBunch(Bunch): +class SmartMunch(Munch): def dump(self): return self.toJSON() @@ -18,15 +18,15 @@ def prettify(self): return self.pretty() @classmethod - def bunchify(cls, x): + def munchify(cls, x): """ - Recursively transforms a dictionary into a SmartBunch via copy. - Generic "bunchify", also works with descendants of Bunch. + Recursively transforms a dictionary into a SmartMunch via copy. + Generic "munchify", also works with descendants of Munch. """ if isinstance(x, dict): - return cls( (k, cls.bunchify(v)) for k,v in x.items() ) + return cls( (k, cls.munchify(v)) for k,v in x.items() ) elif isinstance(x, (list, tuple)): - return type(x)( cls.bunchify(v) for v in x ) + return type(x)( cls.munchify(v) for v in x ) else: return x diff --git a/patzilla/util/numbers/common.py b/patzilla/util/numbers/common.py index 9f4d8359..f44f1e25 100644 --- a/patzilla/util/numbers/common.py +++ b/patzilla/util/numbers/common.py @@ -3,7 +3,7 @@ import re import types import logging -from patzilla.util.data.container import SmartBunch +from patzilla.util.data.container import SmartMunch from patzilla.util.numbers.helper import strip_spaces """ @@ -13,7 +13,7 @@ log = logging.getLogger(__name__) -class DocumentIdentifierBunch(SmartBunch): +class DocumentIdentifierBunch(SmartMunch): def __str__(self): return self.dump() diff --git a/patzilla/util/web/email/submit.py b/patzilla/util/web/email/submit.py index 33b33adf..e8353e2e 100644 --- a/patzilla/util/web/email/submit.py +++ b/patzilla/util/web/email/submit.py @@ -5,7 +5,7 @@ from validate_email import validate_email from pyramid.threadlocal import get_current_request from patzilla.util.config import read_config, read_list, to_list -from patzilla.util.data.container import SmartBunch +from patzilla.util.data.container import SmartMunch from patzilla.util.email.message import EmailMessage log = logging.getLogger(__name__) @@ -68,7 +68,7 @@ def email_issue_report(report, recipients): recipients = to_list(recipients) identifier = None - if isinstance(report, SmartBunch): + if isinstance(report, SmartMunch): identifier = report.meta.id # Build reasonable subject diff --git a/setup.py b/setup.py index 5ee87e89..dd630b4a 100644 --- a/setup.py +++ b/setup.py @@ -51,7 +51,7 @@ # Authorization 'pycryptodome>=3,<4', - 'python-jwt>=3.3.4,<4', + 'python-jwt', 'pbkdf2==1.3', @@ -92,7 +92,7 @@ # Data handling 'attrs', - 'Bunch', # Maybe switch to "Munch" + 'Munch', 'pyparsing', 'python-dateutil<3', 'ago==0.0.9', # 0.0.93 From 07deef1bb1aff4d9a619ac31c8b83d20f2fd07e0 Mon Sep 17 00:00:00 2001 From: Papoteur Date: Sat, 25 Mar 2023 07:41:31 +0100 Subject: [PATCH 11/23] Fixes still for Python 3 migration after testing --- patzilla/access/depatech/expression.py | 2 +- patzilla/access/depatech/expression.rst | 78 +++++----- patzilla/access/dpma/dpmaregister.py | 8 +- patzilla/access/epo/espacenet/client_html.py | 2 +- patzilla/access/ificlaims/expression.py | 2 +- patzilla/access/ificlaims/expression.rst | 88 ++++++------ patzilla/access/uspto/pdf.py | 2 +- patzilla/boot/config.py | 2 +- patzilla/navigator/export.py | 1 - patzilla/util/cql/cheshire3/parser.py | 7 - patzilla/util/cql/cheshire3/test_cheshire3.py | 2 +- patzilla/util/cql/pyparsing/__init__.py | 2 +- patzilla/util/cql/pyparsing/serializer.py | 22 +-- patzilla/util/cql/pyparsing/test/01_spec.rst | 32 ++--- patzilla/util/cql/pyparsing/test/05_misc.rst | 28 ++-- .../util/cql/pyparsing/test/10_extensions.rst | 30 ++-- patzilla/util/cql/pyparsing/test/15_ops.rst | 32 ++--- .../util/cql/pyparsing/test/20_depatisnet.rst | 134 +++++++++--------- .../util/cql/pyparsing/test/30_ificlaims.rst | 10 +- patzilla/util/data/orderedset.py | 2 +- patzilla/util/network/requests_xmlrpclib.py | 9 +- patzilla/util/text/format.py | 14 +- 22 files changed, 251 insertions(+), 258 deletions(-) diff --git a/patzilla/access/depatech/expression.py b/patzilla/access/depatech/expression.py index ca925fec..c6cd8491 100644 --- a/patzilla/access/depatech/expression.py +++ b/patzilla/access/depatech/expression.py @@ -249,7 +249,7 @@ def pair_to_elasticsearch(cls, key, value, modifiers=None): except Exception as ex: message = 'depatech query: Invalid date or range expression "{0}". Reason: {1}.'.format(value, ex) - logger.warn(message + ' Exception was: {0}'.format(_exception_traceback())) + logger.warning(message + ' Exception was: {0}'.format(_exception_traceback())) return {'error': True, 'message': message} elif key == 'inventor' or key == 'applicant': diff --git a/patzilla/access/depatech/expression.rst b/patzilla/access/depatech/expression.rst index eed14b55..78adcac3 100644 --- a/patzilla/access/depatech/expression.rst +++ b/patzilla/access/depatech/expression.rst @@ -20,30 +20,30 @@ Empty query IPC/CPC ======= >>> DepaTechParser('H01F7/00').dumps() -u'H01F7/00' +'H01F7/00' # Rewrite all patent classifications from depa.tech format to OPS format >>> DepaTechParser('IC:G01F000184').parse().rewrite_classes_ops().dumps() -u'IC : G01F1/84' +'IC : G01F1/84' >>> DepaTechParser('IC:G01F000184').keywords -[u'G01F1/84'] +['G01F1/84'] >>> DepaTechExpression.pair_to_elasticsearch('class', 'H04L12/433 or H04L12/24') -{'query': u'((IC:H04L0012433 OR NC:H04L0012433) OR (IC:H04L001224 OR NC:H04L001224))'} +{'query': '((IC:H04L0012433 OR NC:H04L0012433) OR (IC:H04L001224 OR NC:H04L001224))'} >>> DepaTechExpression.pair_to_elasticsearch('class', 'H01F7/00 or (H01F7/02 and H02K7/1876)') -{'query': u'((IC:H01F000700 OR NC:H01F000700) OR ((IC:H01F000702 OR NC:H01F000702) AND (IC:H02K00071876 OR NC:H02K00071876)))'} +{'query': '((IC:H01F000700 OR NC:H01F000700) OR ((IC:H01F000702 OR NC:H01F000702) AND (IC:H02K00071876 OR NC:H02K00071876)))'} >>> DepaTechExpression.pair_to_elasticsearch('class', 'H01F7/00 not (H01F7/02 or H02K7/1876)') -{'query': u'((IC:H01F000700 OR NC:H01F000700) NOT ((IC:H01F000702 OR NC:H01F000702) OR (IC:H02K00071876 OR NC:H02K00071876)))'} +{'query': '((IC:H01F000700 OR NC:H01F000700) NOT ((IC:H01F000702 OR NC:H01F000702) OR (IC:H02K00071876 OR NC:H02K00071876)))'} Publication date ================ >>> DepaTechExpression.pair_to_elasticsearch('pubdate', 'foobar') -{'message': 'depatech query: Invalid date or range expression "foobar". Reason: foobar.', 'error': True} +{'error': True, 'message': 'depatech query: Invalid date or range expression "foobar". Reason: foobar.'} ********* @@ -54,39 +54,39 @@ Simple expressions ================== >>> DepaTechParser('GT:bildschirm').keywords -[u'bildschirm'] +['bildschirm'] >>> DepaTechExpression.pair_to_elasticsearch('fulltext', 'bildschirm') -{'query': u'(AB:bildschirm OR GT:bildschirm OR ET:bildschirm OR FT:bildschirm)'} +{'query': '(AB:bildschirm OR GT:bildschirm OR ET:bildschirm OR FT:bildschirm)'} >>> DepaTechParser('GT:bildschirm or AB:fahrzeug').keywords -[u'bildschirm', u'fahrzeug'] +['bildschirm', 'fahrzeug'] >>> DepaTechExpression.pair_to_elasticsearch('fulltext', 'bildschirm or fahrzeug') -{'query': u'(AB:(bildschirm OR fahrzeug) OR GT:(bildschirm OR fahrzeug) OR ET:(bildschirm OR fahrzeug) OR FT:(bildschirm OR fahrzeug))'} +{'query': '(AB:(bildschirm OR fahrzeug) OR GT:(bildschirm OR fahrzeug) OR ET:(bildschirm OR fahrzeug) OR FT:(bildschirm OR fahrzeug))'} >>> DepaTechParser('GT:bildschirm and AB:(fahrzeug or pkw)').keywords -[u'bildschirm', u'fahrzeug', u'pkw'] +['bildschirm', 'fahrzeug', 'pkw'] >>> DepaTechExpression.pair_to_elasticsearch('fulltext', 'bildschirm and (fahrzeug or pkw)') -{'query': u'(AB:(bildschirm AND (fahrzeug OR pkw)) OR GT:(bildschirm AND (fahrzeug OR pkw)) OR ET:(bildschirm AND (fahrzeug OR pkw)) OR FT:(bildschirm AND (fahrzeug OR pkw)))'} +{'query': '(AB:(bildschirm AND (fahrzeug OR pkw)) OR GT:(bildschirm AND (fahrzeug OR pkw)) OR ET:(bildschirm AND (fahrzeug OR pkw)) OR FT:(bildschirm AND (fahrzeug OR pkw)))'} >>> DepaTechParser('GT:bildschirm and AB:(fahrzeug or pkw not lkw)').keywords -[u'bildschirm', u'fahrzeug', u'pkw', u'lkw'] +['bildschirm', 'fahrzeug', 'pkw', 'lkw'] >>> DepaTechExpression.pair_to_elasticsearch('fulltext', 'bildschirm and (fahrzeug or pkw not lkw)') -{'query': u'(AB:(bildschirm AND (fahrzeug OR pkw NOT lkw)) OR GT:(bildschirm AND (fahrzeug OR pkw NOT lkw)) OR ET:(bildschirm AND (fahrzeug OR pkw NOT lkw)) OR FT:(bildschirm AND (fahrzeug OR pkw NOT lkw)))'} +{'query': '(AB:(bildschirm AND (fahrzeug OR pkw NOT lkw)) OR GT:(bildschirm AND (fahrzeug OR pkw NOT lkw)) OR ET:(bildschirm AND (fahrzeug OR pkw NOT lkw)) OR FT:(bildschirm AND (fahrzeug OR pkw NOT lkw)))'} >>> DepaTechParser('AB:fahrzeug or AB:pkw').keywords -[u'fahrzeug', u'pkw'] +['fahrzeug', 'pkw'] >>> DepaTechParser('AB:fahrzeug not GT:pkw').keywords -[u'fahrzeug', u'pkw'] +['fahrzeug', 'pkw'] @@ -97,17 +97,17 @@ Queries without proper fieldnames like AB:, GT:, AB:, etc. on the left side of t >>> DepaTechParser('bildschirm').dumps() -u'bildschirm' +'bildschirm' >>> DepaTechExpression.pair_to_elasticsearch('fulltext', 'bildschirm') -{'query': u'(AB:bildschirm OR GT:bildschirm OR ET:bildschirm OR FT:bildschirm)'} +{'query': '(AB:bildschirm OR GT:bildschirm OR ET:bildschirm OR FT:bildschirm)'} >>> DepaTechParser('bildschirm and fahrzeug').dumps() -u'bildschirm and fahrzeug' +'bildschirm and fahrzeug' >>> DepaTechExpression.pair_to_elasticsearch('fulltext', 'bildschirm and fahrzeug') -{'query': u'(AB:(bildschirm AND fahrzeug) OR GT:(bildschirm AND fahrzeug) OR ET:(bildschirm AND fahrzeug) OR FT:(bildschirm AND fahrzeug))'} +{'query': '(AB:(bildschirm AND fahrzeug) OR GT:(bildschirm AND fahrzeug) OR ET:(bildschirm AND fahrzeug) OR FT:(bildschirm AND fahrzeug))'} @@ -115,22 +115,22 @@ Expressions containing quoted words =================================== >>> DepaTechParser('"bildschirm"').dumps() -u'"bildschirm"' +'"bildschirm"' >>> DepaTechParser('"bildschirm"').keywords [] >>> DepaTechExpression.pair_to_elasticsearch('fulltext', '"bildschirm"') -{'query': u'(AB:"bildschirm" OR GT:"bildschirm" OR ET:"bildschirm" OR FT:"bildschirm")'} +{'query': '(AB:"bildschirm" OR GT:"bildschirm" OR ET:"bildschirm" OR FT:"bildschirm")'} >>> DepaTechParser('AB:"bildschirm"').dumps() -u'AB : "bildschirm"' +'AB : "bildschirm"' >>> DepaTechParser('AB:"bildschirm"').keywords -[u'bildschirm'] +['bildschirm'] >>> DepaTechParser('AB:(("aussto*" OR "eject*" OR pusher*) AND (verriegel* OR lock* OR sperr*))').keywords -[u'aussto', u'eject', u'pusher', u'verriegel', u'lock', u'sperr'] +['aussto', 'eject', 'pusher', 'verriegel', 'lock', 'sperr'] @@ -138,19 +138,19 @@ Keyword extraction ================== >>> DepaTechParser(DepaTechExpression.pair_to_elasticsearch('class', 'H01F7/00')['query']).keywords -[u'H01F7/00'] +['H01F7/00'] >>> DepaTechParser(DepaTechExpression.pair_to_elasticsearch('class', 'H01F7/00 not (H01F7/02 or H02K7/1876)')['query']).keywords -[u'H01F7/00', u'H01F7/02', u'H02K7/1876'] +['H01F7/00', 'H01F7/02', 'H02K7/1876'] >>> DepaTechParser(DepaTechExpression.pair_to_elasticsearch('fulltext', 'bildschirm')['query']).keywords -[u'bildschirm'] +['bildschirm'] >>> DepaTechParser(DepaTechExpression.pair_to_elasticsearch('fulltext', '"bildschirm"')['query']).keywords -[u'bildschirm'] +['bildschirm'] >>> DepaTechParser(DepaTechExpression.pair_to_elasticsearch('fulltext', 'GT:bildschirm OR AB:(fahrzeug OR pkw)')['query']).keywords -[u'bildschirm', u'fahrzeug', u'pkw'] +['bildschirm', 'fahrzeug', 'pkw'] @@ -160,18 +160,18 @@ From the wild Umlauts ------- ->>> DepaTechParser(u'AB:((*messschieber* OR *meßschieber*) AND *digital* )').dumps() -u'((AB : *messschieber* or AB : *me\xdfschieber*) and AB : *digital*)' +>>> DepaTechParser('AB:((*messschieber* OR *meßschieber*) AND *digital* )').dumps() +'((AB : *messschieber* or AB : *me\xdfschieber*) and AB : *digital*)' ->>> DepaTechParser(u'AB:((*messschieber* OR *meßschieber*) AND *digital* )').keywords -[u'messschieber', u'me\xdfschieber', u'digital'] +>>> DepaTechParser('AB:((*messschieber* OR *meßschieber*) AND *digital* )').keywords +['messschieber', 'me\xdfschieber', 'digital'] More ---- ->>> DepaTechParser(u'ET:(energy and water) or AB:(waves or Tide) and AB:"90°"').keywords -[u'energy', u'water', u'waves', u'Tide', u'90\xb0'] +>>> DepaTechParser('ET:(energy and water) or AB:(waves or Tide) and AB:"90°"').keywords +['energy', 'water', 'waves', 'Tide', '90\xb0'] ->>> DepaTechParser(u'AB:(((bremsgefühl* or pedalgefühl) and (*simulator or simul*)) and (separ* or getrennt* or entkoppel* or entkoppl* or decoupl*) and (eigenständig* or independent* or autonom*))').keywords -[u'bremsgef\xfchl', u'pedalgef\xfchl', u'simulator', u'simul', u'separ', u'getrennt', u'entkoppel', u'entkoppl', u'decoupl', u'eigenst\xe4ndig', u'independent', u'autonom'] +>>> DepaTechParser('AB:(((bremsgefühl* or pedalgefühl) and (*simulator or simul*)) and (separ* or getrennt* or entkoppel* or entkoppl* or decoupl*) and (eigenständig* or independent* or autonom*))').keywords +['bremsgef\xfchl', 'pedalgef\xfchl', 'simulator', 'simul', 'separ', 'getrennt', 'entkoppel', 'entkoppl', 'decoupl', 'eigenst\xe4ndig', 'independent', 'autonom'] diff --git a/patzilla/access/dpma/dpmaregister.py b/patzilla/access/dpma/dpmaregister.py index c9381a37..cd12451f 100644 --- a/patzilla/access/dpma/dpmaregister.py +++ b/patzilla/access/dpma/dpmaregister.py @@ -247,7 +247,7 @@ def search_patent(self, patent): # has to be adjusted. time.sleep(1.0) - if "/TSPD" in self.response.content: + if b"/TSPD" in self.response.content: raise ValueError("Site is protected by F5 Advanced WAF") # Debugging @@ -283,7 +283,7 @@ def search_patent(self, patent): return [entry] # Sanity checks - if "0 result/s" in response.content: + if b"0 result/s" in response.content: msg = 'No search results for "{}"'.format(patent) logger.warning(msg) raise NoResults(msg) @@ -311,7 +311,7 @@ def parse_reference_link(self, link, patent): msg = "Could not parse document reference from link '%s' (patent='%s')" % (link, patent) logger.error(msg) raise Exception(msg) - label = link.find(text=True) + label = link.find(string=True) return reference, label def fetch_reference(self, result, language): @@ -369,7 +369,7 @@ def html_compact(self): PDF-Download """ - soup = BeautifulSoup(self.html) + soup = BeautifulSoup(self.html, "lxml") soup_content = soup.find('table', {'id': 'verfahrensdaten_tabelle'}) diff --git a/patzilla/access/epo/espacenet/client_html.py b/patzilla/access/epo/espacenet/client_html.py index d5202db4..caa83a7f 100644 --- a/patzilla/access/epo/espacenet/client_html.py +++ b/patzilla/access/epo/espacenet/client_html.py @@ -97,7 +97,7 @@ def espacenet_fetch_html(document_number, section, element_id=None, element_clas else: - if 'Entity not found' in response.content: + if b'Entity not found' in response.content: raise KeyError(message_404) else: raise ValueError(message_fail) diff --git a/patzilla/access/ificlaims/expression.py b/patzilla/access/ificlaims/expression.py index cda4d062..64a8a704 100644 --- a/patzilla/access/ificlaims/expression.py +++ b/patzilla/access/ificlaims/expression.py @@ -258,7 +258,7 @@ def pair_to_solr(cls, key, value, modifiers=None): except Exception as ex: message = 'IFI CLAIMS query: Invalid date or range expression "{0}". Reason: {1}.'.format(value, ex) - logger.warn(message + '\nException was:\n{0}'.format(_exception_traceback())) + logger.warning(message + '\nException was:\n{0}'.format(_exception_traceback())) return {'error': True, 'message': message} elif key == 'inventor' or key == 'applicant': diff --git a/patzilla/access/ificlaims/expression.rst b/patzilla/access/ificlaims/expression.rst index 2162ee87..61c78a07 100644 --- a/patzilla/access/ificlaims/expression.rst +++ b/patzilla/access/ificlaims/expression.rst @@ -20,30 +20,30 @@ Empty query IPC/CPC ======= >>> IFIClaimsParser('H01F7/00').dumps() -u'H01F7/00' +'H01F7/00' # Rewrite all patent classifications from IFI format to OPS format >>> IFIClaimsParser('ic:G01F000184').parse().rewrite_classes_ops().dumps() -u'ic : G01F1/84' +'ic : G01F1/84' >>> IFIClaimsParser('ic:G01F000184').keywords -[u'G01F1/84'] +['G01F1/84'] >>> IFIClaimsExpression.pair_to_solr('class', 'H04L12/433 or H04L12/24') -{'query': u'((ic:H04L0012433 OR cpc:H04L0012433) OR (ic:H04L001224 OR cpc:H04L001224))'} +{'query': '((ic:H04L0012433 OR cpc:H04L0012433) OR (ic:H04L001224 OR cpc:H04L001224))'} >>> IFIClaimsExpression.pair_to_solr('class', 'H01F7/00 or (H01F7/02 and H02K7/1876)') -{'query': u'((ic:H01F000700 OR cpc:H01F000700) OR ((ic:H01F000702 OR cpc:H01F000702) AND (ic:H02K00071876 OR cpc:H02K00071876)))'} +{'query': '((ic:H01F000700 OR cpc:H01F000700) OR ((ic:H01F000702 OR cpc:H01F000702) AND (ic:H02K00071876 OR cpc:H02K00071876)))'} >>> IFIClaimsExpression.pair_to_solr('class', 'H01F7/00 not (H01F7/02 or H02K7/1876)') -{'query': u'((ic:H01F000700 OR cpc:H01F000700) NOT ((ic:H01F000702 OR cpc:H01F000702) OR (ic:H02K00071876 OR cpc:H02K00071876)))'} +{'query': '((ic:H01F000700 OR cpc:H01F000700) NOT ((ic:H01F000702 OR cpc:H01F000702) OR (ic:H02K00071876 OR cpc:H02K00071876)))'} Publication date ================ >>> IFIClaimsExpression.pair_to_solr('pubdate', 'foobar') -{'message': 'IFI CLAIMS query: Invalid date or range expression "foobar". Reason: foobar.', 'error': True} +{'error': True, 'message': 'IFI CLAIMS query: Invalid date or range expression "foobar". Reason: foobar.'} ********* @@ -54,39 +54,39 @@ Simple expressions ================== >>> IFIClaimsParser('ttl:bildschirm').keywords -[u'bildschirm'] +['bildschirm'] >>> IFIClaimsExpression.pair_to_solr('fulltext', 'bildschirm') -{'query': u'text:bildschirm'} +{'query': 'text:bildschirm'} >>> IFIClaimsParser('ttl:bildschirm or ab:fahrzeug').keywords -[u'bildschirm', u'fahrzeug'] +['bildschirm', 'fahrzeug'] >>> IFIClaimsExpression.pair_to_solr('fulltext', 'bildschirm or fahrzeug') -{'query': u'text:(bildschirm OR fahrzeug)'} +{'query': 'text:(bildschirm OR fahrzeug)'} >>> IFIClaimsParser('ttl:bildschirm and ab:(fahrzeug or pkw)').keywords -[u'bildschirm', u'fahrzeug', u'pkw'] +['bildschirm', 'fahrzeug', 'pkw'] >>> IFIClaimsExpression.pair_to_solr('fulltext', 'bildschirm and (fahrzeug or pkw)') -{'query': u'text:(bildschirm AND (fahrzeug OR pkw))'} +{'query': 'text:(bildschirm AND (fahrzeug OR pkw))'} >>> IFIClaimsParser('ttl:bildschirm and ab:(fahrzeug or pkw not lkw)').keywords -[u'bildschirm', u'fahrzeug', u'pkw', u'lkw'] +['bildschirm', 'fahrzeug', 'pkw', 'lkw'] >>> IFIClaimsExpression.pair_to_solr('fulltext', 'bildschirm and (fahrzeug or pkw not lkw)') -{'query': u'text:(bildschirm AND (fahrzeug OR pkw NOT lkw))'} +{'query': 'text:(bildschirm AND (fahrzeug OR pkw NOT lkw))'} >>> IFIClaimsParser('ab:fahrzeug or ab:pkw').keywords -[u'fahrzeug', u'pkw'] +['fahrzeug', 'pkw'] >>> IFIClaimsParser('ab:fahrzeug not ttl:pkw').keywords -[u'fahrzeug', u'pkw'] +['fahrzeug', 'pkw'] @@ -96,22 +96,22 @@ Expressions with proximity operators Queries based on the proximity of words to each other in a document. >>> IFIClaimsParser('text:((aussto* OR eject* OR pusher*) AND (verriegel* OR lock* OR sperr*))').keywords -[u'aussto', u'eject', u'pusher', u'verriegel', u'lock', u'sperr'] +['aussto', 'eject', 'pusher', 'verriegel', 'lock', 'sperr'] >>> IFIClaimsParser('{!complexphrase}text:"(aussto* OR eject* OR pusher*) AND (verriegel* OR lock* OR sperr*)"~6').keywords -[u'aussto', u'eject', u'pusher', u'verriegel', u'lock', u'sperr'] +['aussto', 'eject', 'pusher', 'verriegel', 'lock', 'sperr'] >>> IFIClaimsExpression.pair_to_solr('fulltext', '{!complexphrase}text:"(aussto* OR eject* OR pusher*) AND (verriegel* OR lock* OR sperr*)"~6') {'query': '{!complexphrase}text:"(aussto* OR eject* OR pusher*) AND (verriegel* OR lock* OR sperr*)"~6'} >>> IFIClaimsParser('{!complexphrase}text:"parallel* AND schalt*"~6 AND ((ic:F16H006104 OR cpc:F16H006104))').keywords -[u'parallel', u'schalt', u'F16H61/04'] +['parallel', 'schalt', 'F16H61/04'] >>> IFIClaimsParser('((ic:F16H006104 OR cpc:F16H006104)) AND {!complexphrase}text:"parallel* AND schalt*"~6').keywords -[u'F16H61/04', u'parallel', u'schalt'] +['F16H61/04', 'parallel', 'schalt'] >>> IFIClaimsParser('{!complexphrase}text:("parallel* AND schalt*"~6 AND "antrieb* AND stufe*"~3)').keywords -[u'parallel', u'schalt', u'antrieb', u'stufe'] +['parallel', 'schalt', 'antrieb', 'stufe'] @@ -122,17 +122,17 @@ Queries without proper fieldnames like ab=, ti=, bi=, etc. on the left side of t >>> IFIClaimsParser('bildschirm').dumps() -u'bildschirm' +'bildschirm' >>> IFIClaimsExpression.pair_to_solr('fulltext', 'bildschirm') -{'query': u'text:bildschirm'} +{'query': 'text:bildschirm'} >>> IFIClaimsParser('bildschirm and fahrzeug').dumps() -u'bildschirm and fahrzeug' +'bildschirm and fahrzeug' >>> IFIClaimsExpression.pair_to_solr('fulltext', 'bildschirm and fahrzeug') -{'query': u'text:(bildschirm AND fahrzeug)'} +{'query': 'text:(bildschirm AND fahrzeug)'} @@ -140,22 +140,22 @@ Expressions containing quoted words =================================== >>> IFIClaimsParser('"bildschirm"').dumps() -u'"bildschirm"' +'"bildschirm"' >>> IFIClaimsParser('"bildschirm"').keywords [] >>> IFIClaimsExpression.pair_to_solr('fulltext', '"bildschirm"') -{'query': u'text:"bildschirm"'} +{'query': 'text:"bildschirm"'} >>> IFIClaimsParser('ab:"bildschirm"').dumps() -u'ab : "bildschirm"' +'ab : "bildschirm"' >>> IFIClaimsParser('ab:"bildschirm"').keywords -[u'bildschirm'] +['bildschirm'] >>> IFIClaimsParser('text:(("aussto*" OR "eject*" OR pusher*) AND (verriegel* OR lock* OR sperr*))').keywords -[u'aussto', u'eject', u'pusher', u'verriegel', u'lock', u'sperr'] +['aussto', 'eject', 'pusher', 'verriegel', 'lock', 'sperr'] @@ -163,19 +163,19 @@ Keyword extraction ================== >>> IFIClaimsParser(IFIClaimsExpression.pair_to_solr('class', 'H01F7/00')['query']).keywords -[u'H01F7/00'] +['H01F7/00'] >>> IFIClaimsParser(IFIClaimsExpression.pair_to_solr('class', 'H01F7/00 not (H01F7/02 or H02K7/1876)')['query']).keywords -[u'H01F7/00', u'H01F7/02', u'H02K7/1876'] +['H01F7/00', 'H01F7/02', 'H02K7/1876'] >>> IFIClaimsParser(IFIClaimsExpression.pair_to_solr('fulltext', 'bildschirm')['query']).keywords -[u'bildschirm'] +['bildschirm'] >>> IFIClaimsParser(IFIClaimsExpression.pair_to_solr('fulltext', '"bildschirm"')['query']).keywords -[u'bildschirm'] +['bildschirm'] >>> IFIClaimsParser(IFIClaimsExpression.pair_to_solr('fulltext', 'ttl:bildschirm OR ab:(fahrzeug OR pkw)')['query']).keywords -[u'bildschirm', u'fahrzeug', u'pkw'] +['bildschirm', 'fahrzeug', 'pkw'] @@ -185,18 +185,18 @@ From the wild Umlauts ------- ->>> IFIClaimsParser(u'tac:((*messschieber* OR *meßschieber*) AND *digital* )').dumps() -u'((tac : *messschieber* or tac : *me\xdfschieber*) and tac : *digital*)' +>>> IFIClaimsParser('tac:((*messschieber* OR *meßschieber*) AND *digital* )').dumps() +'((tac : *messschieber* or tac : *me\xdfschieber*) and tac : *digital*)' ->>> IFIClaimsParser(u'tac:((*messschieber* OR *meßschieber*) AND *digital* )').keywords -[u'messschieber', u'me\xdfschieber', u'digital'] +>>> IFIClaimsParser('tac:((*messschieber* OR *meßschieber*) AND *digital* )').keywords +['messschieber', 'me\xdfschieber', 'digital'] More ---- ->>> IFIClaimsParser(u'ttl:(energy and water) or ab:(waves or Tide) and clm:"90°"').keywords -[u'energy', u'water', u'waves', u'Tide', u'90\xb0'] +>>> IFIClaimsParser('ttl:(energy and water) or ab:(waves or Tide) and clm:"90°"').keywords +['energy', 'water', 'waves', 'Tide', '90\xb0'] ->>> IFIClaimsParser(u'text:(((bremsgefühl* or pedalgefühl) and (*simulator or simul*)) and (separ* or getrennt* or entkoppel* or entkoppl* or decoupl*) and (eigenständig* or independent* or autonom*))').keywords -[u'bremsgef\xfchl', u'pedalgef\xfchl', u'simulator', u'simul', u'separ', u'getrennt', u'entkoppel', u'entkoppl', u'decoupl', u'eigenst\xe4ndig', u'independent', u'autonom'] +>>> IFIClaimsParser('text:(((bremsgefühl* or pedalgefühl) and (*simulator or simul*)) and (separ* or getrennt* or entkoppel* or entkoppl* or decoupl*) and (eigenständig* or independent* or autonom*))').keywords +['bremsgef\xfchl', 'pedalgef\xfchl', 'simulator', 'simul', 'separ', 'getrennt', 'entkoppel', 'entkoppl', 'decoupl', 'eigenst\xe4ndig', 'independent', 'autonom'] diff --git a/patzilla/access/uspto/pdf.py b/patzilla/access/uspto/pdf.py index a31b8cbf..ef9b7ccc 100644 --- a/patzilla/access/uspto/pdf.py +++ b/patzilla/access/uspto/pdf.py @@ -119,7 +119,7 @@ def get_reference_type(document): Analyze document number to tell application vs. patent (publication, grant) numbers apart. The basic heuristic is to assume e.g. US2007231208A1 (4+6=10 chars) to be an application. """ - if document is None or not (hasattr(document, "number") and isinstance(document.number, (int, str, unicode))): + if document is None or not (hasattr(document, "number") and isinstance(document.number, (int, str, bytes))): raise ValueError("Unknown document reference type: {}".format(document)) number_length = len(str(document.number)) reference_type = None diff --git a/patzilla/boot/config.py b/patzilla/boot/config.py index ae75b9f9..c3d02580 100644 --- a/patzilla/boot/config.py +++ b/patzilla/boot/config.py @@ -90,7 +90,7 @@ def tmpfile(self, payload, suffix=None): """ Create a temporary file with given content. """ - tmp = tempfile.NamedTemporaryFile(suffix=suffix) + tmp = tempfile.NamedTemporaryFile(mode='w+', suffix=suffix) self._tmpfiles.append(tmp) tmp.write(payload) tmp.flush() diff --git a/patzilla/navigator/export.py b/patzilla/navigator/export.py index ed9c53f5..6bb6251a 100644 --- a/patzilla/navigator/export.py +++ b/patzilla/navigator/export.py @@ -717,7 +717,6 @@ def to_pdf(self, payload=None): #print 'out:', process.std_out #print 'err:', process.std_err log.info('STDERR:\n{}'.format(process.std_err)) - print(f"PDF name: {pdf_path}") if process.status_code == 0: #pdf_name = os.path.join(pdf_path, os.path.basename(xlsx_file.name).replace('.xlsx', '.pdf')) diff --git a/patzilla/util/cql/cheshire3/parser.py b/patzilla/util/cql/cheshire3/parser.py index b5db8763..610af8e9 100644 --- a/patzilla/util/cql/cheshire3/parser.py +++ b/patzilla/util/cql/cheshire3/parser.py @@ -572,7 +572,6 @@ def __init__(self, thing): shlex.__init__(self, thing) self.wordchars += "!@#$%^&*-+{}[];,.?|~`:\\" # self.wordchars += ''.join(map(chr, range(128,254))) - self.wordchars = self.wordchars.decode('utf-8') def read_token(self): "Read a token from the input stream (no pushback or inclusions)" @@ -915,12 +914,6 @@ def relation(self): def parse(query): """Return a searchClause/triple object from CQL string""" - if type(query) == str: - try: - query = query.decode("utf-8") - except Exception as e: - raise - q = StringIO(query) lexer = CQLshlex(q) parser = CQLParser(lexer) diff --git a/patzilla/util/cql/cheshire3/test_cheshire3.py b/patzilla/util/cql/cheshire3/test_cheshire3.py index 8df9519d..89020576 100644 --- a/patzilla/util/cql/cheshire3/test_cheshire3.py +++ b/patzilla/util/cql/cheshire3/test_cheshire3.py @@ -54,7 +54,7 @@ def test_value_shortcut_notation_fail(self): self.do_parse('ti=(foo and bar baz) and pc=qux') self.assertEqual( str(cm.exception), - "info:srw/diagnostic/1/10 [Malformed Query]: Expected Boolean or closing parenthesis but got: u'baz'") + "info:srw/diagnostic/1/10 [Malformed Query]: Expected Boolean or closing parenthesis but got: 'baz'") def test_boolean_german(self): self.assertEqual(self.do_parse('bi=foo und bi=bar'), '(bi = "foo" und bi = "bar")') diff --git a/patzilla/util/cql/pyparsing/__init__.py b/patzilla/util/cql/pyparsing/__init__.py index e7af9101..e75529c5 100644 --- a/patzilla/util/cql/pyparsing/__init__.py +++ b/patzilla/util/cql/pyparsing/__init__.py @@ -60,7 +60,7 @@ def parse(self): #if self.logging: # log.info(u'tokens: %s', tokens.pformat()) - except pyparsing.ParseException as ex: + except ParseException as ex: ex.explanation = '%s\n%s\n%s' % (ex.pstr, ' ' * ex.loc + '^\n', ex) #if self.logging: # log.error('\n%s', ex.explanation) diff --git a/patzilla/util/cql/pyparsing/serializer.py b/patzilla/util/cql/pyparsing/serializer.py index c0ac6268..3abe241b 100644 --- a/patzilla/util/cql/pyparsing/serializer.py +++ b/patzilla/util/cql/pyparsing/serializer.py @@ -29,7 +29,7 @@ def tokens_to_cql(tokens): >>> tokens = parse_cql('foo=bar and baz=(qux or quux)') >>> tokens_to_cql(tokens) - u'foo=bar and baz=(qux or quux)' + 'foo=bar and baz=(qux or quux)' """ buffer = io.StringIO() @@ -77,7 +77,7 @@ def normalize_patentnumbers(tokens): >>> tokens = parse_cql('pn=EP666666') >>> normalize_patentnumbers(tokens) >>> tokens_to_cql(tokens) - u'pn=EP0666666' + 'pn=EP0666666' """ def action(token, index, binop, term): @@ -99,15 +99,15 @@ def get_keywords(triples, whitelist_indexes=None): >>> triples = []; get_triples(parse_cql('txt=foo or (bi=bar or bi=baz)'), triples) >>> get_keywords(triples) - [u'foo', u'bar', u'baz'] + ['foo', 'bar', 'baz'] >>> triples = []; get_triples(parse_cql('pa all "central, intelligence, agency"'), triples) >>> get_keywords(triples) - [u'central', u'intelligence', u'agency'] + ['central', 'intelligence', 'agency'] >>> triples = []; get_triples(parse_cql('foo=bar and baz=qux'), triples) >>> get_keywords(triples, ['baz']) - [u'qux'] + ['qux'] """ keywords = [] @@ -143,11 +143,11 @@ def trim_keywords(keywords): keywords and a list of keyword elements for multi-term keywords Example: - >>> trim_keywords([u'!!!daimler?', u'Misch?(P)?wasser']) - [u'daimler', [u'Misch', u'wasser']] + >>> trim_keywords(['!!!daimler?', 'Misch?(P)?wasser']) + ['daimler', ['Misch', 'wasser']] - >>> trim_keywords([u'"foo"', u'" bar "']) - [u'foo', u'bar'] + >>> trim_keywords(['"foo"', '" bar "']) + ['foo', 'bar'] """ keywords_trimmed = [] @@ -164,7 +164,7 @@ def get_triples(tokens, triples): >>> triples = []; get_triples(parse_cql('foo=bar and baz=(qux or quux)'), triples) >>> triples - [['foo', u'=', 'bar'], ['qux'], ['quux']] + [['foo', '=', 'bar'], ['qux'], ['quux']] """ for token in tokens: @@ -184,7 +184,7 @@ def expand_shortcut_notation(tokens, index=None, binop=None): >>> tokens = parse_cql('foo=bar and baz=(qux or quux)') >>> expand_shortcut_notation(tokens) >>> tokens_to_cql(tokens) - u'foo=bar and (baz=qux or baz=quux)' + 'foo=bar and (baz=qux or baz=quux)' """ for token in tokens: diff --git a/patzilla/util/cql/pyparsing/test/01_spec.rst b/patzilla/util/cql/pyparsing/test/01_spec.rst index de7571ed..a5395600 100644 --- a/patzilla/util/cql/pyparsing/test/01_spec.rst +++ b/patzilla/util/cql/pyparsing/test/01_spec.rst @@ -20,52 +20,52 @@ Simple queries ============== >>> CQL('dinosaur').dumps() -u'dinosaur' +'dinosaur' >>> CQL('"complete dinosaur"').dumps() -u'"complete dinosaur"' +'"complete dinosaur"' >>> CQL('title = "complete dinosaur"').dumps() -u'title="complete dinosaur"' +'title="complete dinosaur"' >>> CQL('title exact "the complete dinosaur"').dumps() -u'title exact "the complete dinosaur"' +'title exact "the complete dinosaur"' Queries using Boolean logic =========================== >>> CQL('dinosaur or bird').dumps() -u'dinosaur or bird' +'dinosaur or bird' .. note:: **FIXME: enhance grammar** >>> #CQL('Palomar assignment and "ice age"').dumps() >>> CQL('dinosaur not reptile').dumps() -u'dinosaur not reptile' +'dinosaur not reptile' >>> CQL('dinosaur and bird or dinobird').dumps() -u'dinosaur and bird or dinobird' +'dinosaur and bird or dinobird' >>> CQL('(bird or dinosaur) and (feathers or scales)').dumps() -u'(bird or dinosaur) and (feathers or scales)' +'(bird or dinosaur) and (feathers or scales)' >>> CQL('"feathered dinosaur" and (yixian or jehol)').dumps() -u'"feathered dinosaur" and (yixian or jehol)' +'"feathered dinosaur" and (yixian or jehol)' Queries accessing publication indexes ===================================== >>> CQL('publicationYear < 1980').dumps() -u'publicationYear < 1980' +'publicationYear < 1980' >>> CQL('lengthOfFemur > 2.4').dumps() -u'lengthOfFemur > 2.4' +'lengthOfFemur > 2.4' >>> CQL('bioMass >= 100').dumps() -u'bioMass >= 100' +'bioMass >= 100' Queries based on the proximity of words to each other in a document @@ -82,17 +82,17 @@ Queries across multiple dimensions ================================== >>> CQL('date within "2002 2005"').dumps() -u'date within "2002 2005"' +'date within "2002 2005"' >>> CQL('dateRange encloses 2003').dumps() -u'dateRange encloses 2003' +'dateRange encloses 2003' Queries based on relevance ========================== >>> CQL('subject any/relevant "fish frog"').dumps() -u'subject any/relevant "fish frog"' +'subject any/relevant "fish frog"' >>> CQL('subject any/rel.lr "fish frog"').dumps() -u'subject any/rel.lr "fish frog"' +'subject any/rel.lr "fish frog"' diff --git a/patzilla/util/cql/pyparsing/test/05_misc.rst b/patzilla/util/cql/pyparsing/test/05_misc.rst index ed175ba6..2f556800 100644 --- a/patzilla/util/cql/pyparsing/test/05_misc.rst +++ b/patzilla/util/cql/pyparsing/test/05_misc.rst @@ -15,14 +15,14 @@ Queries with UTF-8 characters Try parsing a query containing utf-8 characters. ->>> CQL(u'title=molécules').dumps() -u'title=mol\xe9cules' +>>> CQL('title=molécules').dumps() +'title=mol\xe9cules' ->>> CQL(u'inventor="CEGARRA SERRANO JOSÉ MARIANO"').dumps() -u'inventor="CEGARRA SERRANO JOS\xc9 MARIANO"' +>>> CQL('inventor="CEGARRA SERRANO JOSÉ MARIANO"').dumps() +'inventor="CEGARRA SERRANO JOS\xc9 MARIANO"' ->>> CQL(u'ab=radaufstandskraft or ab=radaufstandskräfte?').dumps() -u'ab=radaufstandskraft or ab=radaufstandskr\xe4fte?' +>>> CQL('ab=radaufstandskraft or ab=radaufstandskräfte?').dumps() +'ab=radaufstandskraft or ab=radaufstandskr\xe4fte?' # TODO: use more esoteric utf-8 characters, e.g. special chars et al. @@ -30,7 +30,7 @@ Queries using wildcards ======================= >>> CQL('txt=footw or txt=footw? or txt=footw# or txt=footw! and txt=footw*re').dumps() -u'txt=footw or txt=footw? or txt=footw# or txt=footw! and txt=footw*re' +'txt=footw or txt=footw? or txt=footw# or txt=footw! and txt=footw*re' Query with comments @@ -41,16 +41,16 @@ Query with comments ... (baz or qux)) -- comment 2 ... ... """).dumps() -u'foo=(bar and (baz or qux))' +'foo=(bar and (baz or qux))' Weird queries ============= >>> CQL(' foobar ').dumps() -u'foobar' +'foobar' >>> CQL('(((foobar)))').dumps() -u'(((foobar)))' +'(((foobar)))' Queries with errors @@ -61,7 +61,7 @@ Nonsense >>> CQL('foo bar', logging=False).dumps() Traceback (most recent call last): ... -ParseException: Expected end of text (at char 4), (line:1, col:5) +ParseException: Expected end of text, found 'bar' (at char 4), (line:1, col:5) Lacking terms ------------- @@ -92,12 +92,12 @@ Unknown binops >>> CQL('foo % bar', logging=False).dumps() Traceback (most recent call last): ... -ParseException: Expected end of text (at char 4), (line:1, col:5) +ParseException: Expected end of text, found 'bar' (at char 4), (line:1, col:5) Error explanation ----------------- >>> try: -... CQL(u'foo bar', logging=False).dumps() +... CQL('foo bar', logging=False).dumps() ... except Exception as ex: ... ex.explanation -u'foo bar\n ^\n\nExpected end of text (at char 4), (line:1, col:5)' +'foo bar\n ^\n\nExpected end of text, found 'bar' (at char 4), (line:1, col:5)' diff --git a/patzilla/util/cql/pyparsing/test/10_extensions.rst b/patzilla/util/cql/pyparsing/test/10_extensions.rst index 73c72e1e..02d217bc 100644 --- a/patzilla/util/cql/pyparsing/test/10_extensions.rst +++ b/patzilla/util/cql/pyparsing/test/10_extensions.rst @@ -18,13 +18,13 @@ Patent number normalization First, check parsing and reproducing a query for a publication number without normalization applied: >>> CQL('pn=EP666666').dumps() -u'pn=EP666666' +'pn=EP666666' Then, check whether normalization works correctly. Here, the EP document number should get zero-padded properly: >>> CQL('pn=EP666666').normalize_numbers().dumps() -u'pn=EP0666666' +'pn=EP0666666' Keyword extraction @@ -33,13 +33,13 @@ Keyword extraction First, make sure the query can actually be parsed: >>> CQL('bi=greifer and pc=de').dumps() -u'bi=greifer and pc=de' +'bi=greifer and pc=de' Then, check the list of extracted keywords: >>> CQL('bi=greifer and pc=de').keywords() -[u'greifer'] +['greifer'] Details @@ -53,8 +53,8 @@ because index name "pc" is not whitelisted. We can have a look at the layer below, where raw triples got extracted from the query string, that's the step just before collecting the keywords: ->>> CQL(u'bi=greifer and pc=de').triples() -[[u'bi', u'=', u'greifer'], [u'pc', u'=', u'de']] +>>> CQL('bi=greifer and pc=de').triples() +[['bi', '=', 'greifer'], ['pc', '=', 'de']] This shows we also have access to the "pc=de" condition if there's demand for enhanced query analytics in the future. @@ -70,13 +70,13 @@ Parse and reproduce a cql query containing a nested expression in value shortcut Our old token-based parser wasn't capable doing this. >>> CQL('bi=(socke and (Inlay or Teile)) and pc=de').dumps() -u'bi=(socke and (Inlay or Teile)) and pc=de' +'bi=(socke and (Inlay or Teile)) and pc=de' Expand the value shortcut notation: >>> CQL('bi=(socke and (Inlay or Teile)) and pc=de').expand_shortcuts().dumps() -u'(bi=socke and (bi=Inlay or bi=Teile)) and pc=de' +'(bi=socke and (bi=Inlay or bi=Teile)) and pc=de' Special operators @@ -86,7 +86,7 @@ Boolean operators (binops) in german ------------------------------------ >>> CQL('BI=Socke und PA=onion').dumps() -u'BI=Socke UND PA=onion' +'BI=Socke UND PA=onion' @@ -108,7 +108,7 @@ Verbatim reproduction The query should be reproduced verbatim when not applying any expansion or normalization: >>> CQL(query).dumps() -u'pn=(EP666666 or EP666667) or (cpc=H04L12/433 and txt=communication?)' +'pn=(EP666666 or EP666667) or (cpc=H04L12/433 and txt=communication?)' Polishing @@ -116,12 +116,12 @@ Polishing After shortcut expansion and number normalization, we should see zero-padded EP document numbers: >>> CQL(query).polish().dumps() -u'(pn=EP0666666 or pn=EP0666667) or (cpc=H04L12/433 and txt=communication?)' +'(pn=EP0666666 or pn=EP0666667) or (cpc=H04L12/433 and txt=communication?)' Terms from conditions for classification- or fulltext-indexes should count towards keywords: >>> CQL(query).polish().keywords() -[u'H04L12/433', u'communication'] +['H04L12/433', 'communication'] Details @@ -130,13 +130,13 @@ Even without polishing the query, the keywords should be the same, since "cpc" and "txt" conditions both are not in value shortcut notation. >>> CQL(query).keywords() -[u'H04L12/433', u'communication'] +['H04L12/433', 'communication'] On the other hand, number normalization for numbers in value shortcut notation obviously does not work when not having shortcut expansion applied before: >>> CQL('pn=(EP666666 or EP666667)').normalize_numbers().dumps() -u'pn=(EP666666 or EP666667)' +'pn=(EP666666 or EP666667)' Nesting and keywords @@ -146,4 +146,4 @@ We especially want to properly extract keywords from nested expressions, even when they are in value shortcut notation. >>> CQL('bi=(socke and (Inlay or Teile)) and pc=de').expand_shortcuts().keywords() -[u'socke', u'Inlay', u'Teile'] +['socke', 'Inlay', 'Teile'] diff --git a/patzilla/util/cql/pyparsing/test/15_ops.rst b/patzilla/util/cql/pyparsing/test/15_ops.rst index eae17e30..65d244f8 100644 --- a/patzilla/util/cql/pyparsing/test/15_ops.rst +++ b/patzilla/util/cql/pyparsing/test/15_ops.rst @@ -20,7 +20,7 @@ Date range Test date range condition used when extrapolating from vanity url, e.g. /publicationdate/2014W10. >>> CQL('publicationdate within 2014-03-10,2014-03-16').dumps() -u'publicationdate within 2014-03-10,2014-03-16' +'publicationdate within 2014-03-10,2014-03-16' Examples from OPS reference guide @@ -37,23 +37,23 @@ CQL examples Original CQL examples from reference guide. >>> CQL('ti all "green, energy"').dumps() -u'ti all "green, energy"' +'ti all "green, energy"' .. note:: **FIXME: enhance grammar** >>> #CQL('ti=green prox/unit=world ti=energy').dumps() >>> CQL('pd within "20051212 20051214"').dumps() -u'pd within "20051212 20051214"' +'pd within "20051212 20051214"' >>> CQL('pd="20051212 20051214"').dumps() -u'pd="20051212 20051214"' +'pd="20051212 20051214"' >>> CQL('ia any "John, Smith"').dumps() -u'ia any "John, Smith"' +'ia any "John, Smith"' >>> CQL('pn=EP and pr=GB').dumps() -u'pn=EP and pr=GB' +'pn=EP and pr=GB' .. note:: **FIXME: enhance grammar** @@ -62,19 +62,19 @@ u'pn=EP and pr=GB' >>> #CQL('(ta=green prox/distance<=3 ta=energy) or (ta=renewable prox/distance<=3 ta=energy)').dumps() >>> CQL('pa all "central, intelligence, agency" and US').dumps() -u'pa all "central, intelligence, agency" and US' +'pa all "central, intelligence, agency" and US' >>> CQL('pa all "central, intelligence, agency" and US and pd>2000').dumps() -u'pa all "central, intelligence, agency" and US and pd > 2000' +'pa all "central, intelligence, agency" and US and pd > 2000' >>> CQL('pd < 18000101').dumps() -u'pd < 18000101' +'pd < 18000101' >>> CQL('ta=synchroni#ed').dumps() -u'ta=synchroni#ed' +'ta=synchroni#ed' >>> CQL('EP and 2009 and Smith').dumps() -u'EP and 2009 and Smith' +'EP and 2009 and Smith' .. note:: **FIXME: enhance grammar** @@ -91,23 +91,23 @@ Shortcut notation expansion All these should not be affected by any query manipulation. Prove that. >>> CQL('pa all "central, intelligence, agency" and US').polish().dumps() -u'pa all "central, intelligence, agency" and US' +'pa all "central, intelligence, agency" and US' >>> CQL('pa all "central, intelligence, agency" and US and pd>2000').polish().dumps() -u'pa all "central, intelligence, agency" and US and pd > 2000' +'pa all "central, intelligence, agency" and US and pd > 2000' >>> CQL('EP and 2009 and Smith').polish().dumps() -u'EP and 2009 and Smith' +'EP and 2009 and Smith' Keyword extraction ------------------ >>> CQL('pa all "central, intelligence, agency" and US').polish().keywords() -[u'central', u'intelligence', u'agency'] +['central', 'intelligence', 'agency'] >>> CQL('pa all "central intelligence agency" and US').polish().keywords() -[u'central', u'intelligence', u'agency'] +['central', 'intelligence', 'agency'] .. note:: **FIXME: enhance parser smartness: follow rules outlined on p. 148, section 4.2. CQL index catalogue** diff --git a/patzilla/util/cql/pyparsing/test/20_depatisnet.rst b/patzilla/util/cql/pyparsing/test/20_depatisnet.rst index 3f22af00..176137dd 100644 --- a/patzilla/util/cql/pyparsing/test/20_depatisnet.rst +++ b/patzilla/util/cql/pyparsing/test/20_depatisnet.rst @@ -27,19 +27,19 @@ Test some logic operators localized to german. Getting started --------------- >>> CQL('bi=(greifer oder bagger)').dumps() -u'bi=(greifer ODER bagger)' +'bi=(greifer ODER bagger)' Made up ------- Try to understand the query. ->>> CQL(u'bi=((wasser UND Getränk) NICHT (?hahn oder ?zapf oder (kühl? oder ?kühl)))').dumps() -u'bi=((wasser UND Getr\xe4nk) NICHT (?hahn ODER ?zapf ODER (k\xfchl? ODER ?k\xfchl)))' +>>> CQL('bi=((wasser UND Getränk) NICHT (?hahn oder ?zapf oder (kühl? oder ?kühl)))').dumps() +'bi=((wasser UND Getr\xe4nk) NICHT (?hahn ODER ?zapf ODER (k\xfchl? ODER ?k\xfchl)))' Extract keywords from query. ->>> CQL(u'bi=((wasser UND Getränk) NICHT (?hahn oder ?zapf oder (kühl? oder ?kühl)))').polish().keywords() -[u'wasser', u'Getr\xe4nk', u'hahn', u'zapf', u'k\xfchl', u'k\xfchl'] +>>> CQL('bi=((wasser UND Getränk) NICHT (?hahn oder ?zapf oder (kühl? oder ?kühl)))').polish().keywords() +['wasser', 'Getr\xe4nk', 'hahn', 'zapf', 'k\xfchl', 'k\xfchl'] Neighbourhood operators @@ -50,18 +50,18 @@ Getting started Try a bareword query string containing a neighbourhood term operator: ->>> CQL(u'L(W)Serine').dumps() -u'L(W)Serine' +>>> CQL('L(W)Serine').dumps() +'L(W)Serine' Try the same in the context of a real condition (triple): ->>> CQL(u'ab=(L(W)Serine)').dumps() -u'ab=(L(W)Serine)' +>>> CQL('ab=(L(W)Serine)').dumps() +'ab=(L(W)Serine)' Check this works caseless as well: ->>> CQL(u'L(w)Serine').dumps() -u'L(W)Serine' +>>> CQL('L(w)Serine').dumps() +'L(W)Serine' Made up @@ -69,14 +69,14 @@ Made up Try some more complex queries containing neighbourhood term operators and wildcards. ->>> CQL(u'bi=(Cry1?(L)resist?)').dumps() -u'bi=(Cry1?(L)resist?)' +>>> CQL('bi=(Cry1?(L)resist?)').dumps() +'bi=(Cry1?(L)resist?)' ->>> CQL(u'bi=(Cry1?(5A)tox?)').dumps() -u'bi=(Cry1?(5A)tox?)' +>>> CQL('bi=(Cry1?(5A)tox?)').dumps() +'bi=(Cry1?(5A)tox?)' ->>> CQL(u'bi=(Misch?(P)?wasser)').dumps() -u'bi=(Misch?(P)?wasser)' +>>> CQL('bi=(Misch?(P)?wasser)').dumps() +'bi=(Misch?(P)?wasser)' @@ -93,91 +93,91 @@ Search examples --------------- >>> CQL('PA= siemens').dumps() -u'PA=siemens' +'PA=siemens' >>> CQL('PUB= 01.03.2010 UND PA= siemens').dumps() -u'PUB=01.03.2010 UND PA=siemens' +'PUB=01.03.2010 UND PA=siemens' >>> CQL('PA= siemens UND IN= Braun UND PUB>= 01.03.2010').dumps() -u'PA=siemens UND IN=Braun UND PUB >= 01.03.2010' +'PA=siemens UND IN=Braun UND PUB >= 01.03.2010' >>> CQL('PUB= M11-2009 UND PA= daimler?').dumps() -u'PUB=M11-2009 UND PA=daimler?' +'PUB=M11-2009 UND PA=daimler?' ->>> CQL(u'AB = !!!lösung').dumps() -u'AB=!!!l\xf6sung' +>>> CQL('AB = !!!lösung').dumps() +'AB=!!!l\xf6sung' >>> CQL('TI = ###heizung').dumps() -u'TI=###heizung' +'TI=###heizung' >>> CQL('CL = ?fahrzeug').dumps() -u'CL=?fahrzeug' +'CL=?fahrzeug' >>> CQL('BI= (programmabschnitt# UND administra?)').dumps() -u'BI=(programmabschnitt# UND administra?)' +'BI=(programmabschnitt# UND administra?)' >>> CQL('ICB=F17D5/00').dumps() -u'ICB=F17D5/00' +'ICB=F17D5/00' >>> CQL('ICB=F17D5-00').dumps() -u'ICB=F17D5-00' +'ICB=F17D5-00' >>> CQL("ICB='F17D 5/00'").dumps() -u"ICB='F17D 5/00'" +"ICB='F17D 5/00'" >>> CQL('ICB=F17D0005000000').dumps() -u'ICB=F17D0005000000' +'ICB=F17D0005000000' >>> CQL('ICP=F17D5/00M').dumps() -u'ICP=F17D5/00M' +'ICP=F17D5/00M' >>> CQL('ICP=F17D5-00M').dumps() -u'ICP=F17D5-00M' +'ICP=F17D5-00M' >>> CQL("ICP='F17D 5/00 M'").dumps() -u"ICP='F17D 5/00 M'" +"ICP='F17D 5/00 M'" >>> CQL('ICP=F17D000500000M').dumps() -u'ICP=F17D000500000M' +'ICP=F17D000500000M' >>> CQL('ICB=F04D13/?').dumps() -u'ICB=F04D13/?' +'ICB=F04D13/?' >>> CQL('ICB=F04D13-?').dumps() -u'ICB=F04D13-?' +'ICB=F04D13-?' >>> CQL("ICB='F04D 13/?'").dumps() -u"ICB='F04D 13/?'" +"ICB='F04D 13/?'" >>> CQL('ICB=F04D0013?').dumps() -u'ICB=F04D0013?' +'ICB=F04D0013?' Search examples for the proximity operator (NOTW) ------------------------------------------------- >>> CQL('Bi= (Regler und (mechanische(NOTW)Regler))').dumps() -u'Bi=(Regler UND (mechanische(NOTW)Regler))' +'Bi=(Regler UND (mechanische(NOTW)Regler))' >>> CQL('Bi= (Regler und (mechanische (NOTW) Regler))').dumps() -u'Bi=(Regler UND (mechanische (NOTW) Regler))' +'Bi=(Regler UND (mechanische (NOTW) Regler))' Searches in the text fields "Title", "Abstract", "Description", "Claims", "Full text data" ------------------------------------------------------------------------------------------ >>> CQL('TI = ( DVB(W)T )').dumps() -u'TI=(DVB(W)T)' +'TI=(DVB(W)T)' >>> CQL('Bi= (personalcomputer oder (personal(W)computer))').dumps() -u'Bi=(personalcomputer ODER (personal(W)computer))' +'Bi=(personalcomputer ODER (personal(W)computer))' Searches in the fields "Applicant/owner", "Inventor" ---------------------------------------------------- >>> CQL('PA = ( Anna(L)Huber )').dumps() -u'PA=(Anna(L)Huber)' +'PA=(Anna(L)Huber)' Keywords @@ -185,7 +185,7 @@ Keywords Try some more complex queries containing *value shortcut notations*, *neighbourhood term operators* and *wildcards*. ->>> largequery = u""" +>>> largequery = """ ... (PA= siemens UND IN= Braun UND PUB>= 01.03.2010) or ... (PUB=M11-2009 UND PA=daimler?) or ... (AB = (!!!lösung or ###heizung or ?fahrzeug)) or @@ -195,10 +195,10 @@ Try some more complex queries containing *value shortcut notations*, *neighbourh ... """ >>> CQL(largequery).dumps() -u"(PA=siemens UND IN=Braun UND PUB >= 01.03.2010) or (PUB=M11-2009 UND PA=daimler?) or (AB=(!!!l\xf6sung or ###heizung or ?fahrzeug)) or (ICB='F17D 5/00' or ICB=F04D13-?) or bi=(mechanische (NOTW) Regler) or bi=(Cry1?(L)resist? or Cry1?(5A)tox? or Misch?(P)?wasser)" +"(PA=siemens UND IN=Braun UND PUB >= 01.03.2010) or (PUB=M11-2009 UND PA=daimler?) or (AB=(!!!l\xf6sung or ###heizung or ?fahrzeug)) or (ICB='F17D 5/00' or ICB=F04D13-?) or bi=(mechanische (NOTW) Regler) or bi=(Cry1?(L)resist? or Cry1?(5A)tox? or Misch?(P)?wasser)" >>> CQL(largequery).keywords() -[u'siemens', u'Braun', u'daimler', u'F17D 5/00', u'F04D13-', [u'mechanische', u'Regler']] +['siemens', 'Braun', 'daimler', 'F17D 5/00', 'F04D13-', ['mechanische', 'Regler']] Polishing @@ -207,20 +207,20 @@ Polishing Polishing a query, especially the shortcut notation expansion, should not corrupt query syntax. >>> CQL('TI = ( DVB(W)T )').polish().dumps() -u'TI=(DVB(W)T)' +'TI=(DVB(W)T)' >>> CQL('Bi= (personalcomputer oder (personal(W)computer))').polish().dumps() -u'(Bi=personalcomputer ODER (Bi=(personal(W)computer)))' +'(Bi=personalcomputer ODER (Bi=(personal(W)computer)))' >>> CQL('bi=(Cry1?(L)resist?)').polish().dumps() -u'bi=(Cry1?(L)resist?)' +'bi=(Cry1?(L)resist?)' >>> CQL(largequery).polish().dumps() -u"(PA=siemens UND IN=Braun UND PUB >= 01.03.2010) or (PUB=M11-2009 UND PA=daimler?) or ((AB=!!!l\xf6sung or AB=###heizung or AB=?fahrzeug)) or (ICB='F17D 5/00' or ICB=F04D13-?) or bi=(mechanische (NOTW) Regler) or (bi=(Cry1?(L)resist?) or bi=(Cry1?(5A)tox?) or bi=(Misch?(P)?wasser))" +"(PA=siemens UND IN=Braun UND PUB >= 01.03.2010) or (PUB=M11-2009 UND PA=daimler?) or ((AB=!!!l\xf6sung or AB=###heizung or AB=?fahrzeug)) or (ICB='F17D 5/00' or ICB=F04D13-?) or bi=(mechanische (NOTW) Regler) or (bi=(Cry1?(L)resist?) or bi=(Cry1?(5A)tox?) or bi=(Misch?(P)?wasser))" >>> CQL(largequery).polish().keywords() -[u'siemens', u'Braun', u'daimler', u'l\xf6sung', u'heizung', u'fahrzeug', u'F17D 5/00', u'F04D13-', [u'mechanische', u'Regler'], [u'Cry1', u'resist'], [u'Cry1', u'tox'], [u'Misch', u'wasser']] +['siemens', 'Braun', 'daimler', 'l\xf6sung', 'heizung', 'fahrzeug', 'F17D 5/00', 'F04D13-', ['mechanische', 'Regler'], ['Cry1', 'resist'], ['Cry1', 'tox'], ['Misch', 'wasser']] From the wild @@ -233,18 +233,18 @@ Query 1 Reproduce verbatim: ->>> print(CQL(u'(ab=radaufstandskraft or ab=radaufstandskräfte?)').dumps()) +>>> print(CQL('(ab=radaufstandskraft or ab=radaufstandskräfte?)').dumps()) (ab=radaufstandskraft or ab=radaufstandskräfte?) Reproduce with polishing: ->>> print(CQL(u'(ab=radaufstandskraft or ab=radaufstandskräfte?)').polish().dumps()) +>>> print(CQL('(ab=radaufstandskraft or ab=radaufstandskräfte?)').polish().dumps()) (ab=radaufstandskraft or ab=radaufstandskräfte?) Extract keywords after polishing: ->>> CQL(u'(ab=radaufstandskraft or ab=radaufstandskräfte?)').polish().keywords() -[u'radaufstandskraft', u'radaufstandskr\xe4fte'] +>>> CQL('(ab=radaufstandskraft or ab=radaufstandskräfte?)').polish().keywords() +['radaufstandskraft', 'radaufstandskr\xe4fte'] Query 2 @@ -252,18 +252,18 @@ Query 2 Reproduce verbatim: ->>> print(CQL(u'bi=( ( warm(P)walzen) AND ( band(P)mitte and messung) ) oder bi=( ( warm and walzen) AND ( band and säbel and messung) ) oder bi=((warm and walzen)and (mitten und messung)) oder BI =((reversiergerüst)und(breitenmessung))').dumps()) +>>> print(CQL('bi=( ( warm(P)walzen) AND ( band(P)mitte and messung) ) oder bi=( ( warm and walzen) AND ( band and säbel and messung) ) oder bi=((warm and walzen)and (mitten und messung)) oder BI =((reversiergerüst)und(breitenmessung))').dumps()) bi=((warm(P)walzen) and (band(P)mitte and messung)) ODER bi=((warm and walzen) and (band and säbel and messung)) ODER bi=((warm and walzen) and (mitten UND messung)) ODER BI=((reversiergerüst) UND (breitenmessung)) Reproduce with polishing: ->>> print(CQL(u'bi=( ( warm(P)walzen) AND ( band(P)mitte and messung) ) oder bi=( ( warm and walzen) AND ( band and säbel and messung) ) oder bi=((warm and walzen)and (mitten und messung)) oder BI =((reversiergerüst)und(breitenmessung))').polish().dumps()) +>>> print(CQL('bi=( ( warm(P)walzen) AND ( band(P)mitte and messung) ) oder bi=( ( warm and walzen) AND ( band and säbel and messung) ) oder bi=((warm and walzen)and (mitten und messung)) oder BI =((reversiergerüst)und(breitenmessung))').polish().dumps()) ((bi=(warm(P)walzen)) and (bi=(band(P)mitte) and bi=messung)) ODER ((bi=warm and bi=walzen) and (bi=band and bi=säbel and bi=messung)) ODER ((bi=warm and bi=walzen) and (bi=mitten UND bi=messung)) ODER ((BI=reversiergerüst) UND (BI=breitenmessung)) Extract keywords after polishing: ->>> CQL(u'bi=( ( warm(P)walzen) AND ( band(P)mitte and messung) ) oder bi=( ( warm and walzen) AND ( band and säbel and messung) ) oder bi=((warm and walzen)and (mitten und messung)) oder BI =((reversiergerüst)und(breitenmessung))').polish().keywords() -[[u'warm', u'walzen'], [u'band', u'mitte'], u'messung', u'warm', u'walzen', u'band', u's\xe4bel', u'messung', u'warm', u'walzen', u'mitten', u'messung', u'reversierger\xfcst', u'breitenmessung'] +>>> CQL('bi=( ( warm(P)walzen) AND ( band(P)mitte and messung) ) oder bi=( ( warm and walzen) AND ( band and säbel and messung) ) oder bi=((warm and walzen)and (mitten und messung)) oder BI =((reversiergerüst)und(breitenmessung))').polish().keywords() +[['warm', 'walzen'], ['band', 'mitte'], 'messung', 'warm', 'walzen', 'band', 's\xe4bel', 'messung', 'warm', 'walzen', 'mitten', 'messung', 'reversierger\xfcst', 'breitenmessung'] Query 3 @@ -271,18 +271,18 @@ Query 3 Reproduce verbatim: ->>> print(CQL(u'bi=( ( hot(P)rolling) AND ( strip(P)center and measurement) oder ( hot and rolling) AND ( strip and camber and measurement) ) oder bi=((reversing and mill)and (camber)) ODER bi=( ( hot and steel) AND (center and measurement) ) ODER BI =((hot(P)slab) und(position(P)measurement)) ODER BI =((hot(P)strip) und(position(P)measurement))').dumps()) +>>> print(CQL('bi=( ( hot(P)rolling) AND ( strip(P)center and measurement) oder ( hot and rolling) AND ( strip and camber and measurement) ) oder bi=((reversing and mill)and (camber)) ODER bi=( ( hot and steel) AND (center and measurement) ) ODER BI =((hot(P)slab) und(position(P)measurement)) ODER BI =((hot(P)strip) und(position(P)measurement))').dumps()) bi=((hot(P)rolling) and (strip(P)center and measurement) ODER (hot and rolling) and (strip and camber and measurement)) ODER bi=((reversing and mill) and (camber)) ODER bi=((hot and steel) and (center and measurement)) ODER BI=((hot(P)slab) UND (position(P)measurement)) ODER BI=((hot(P)strip) UND (position(P)measurement)) Reproduce with polishing: ->>> print(CQL(u'bi=( ( hot(P)rolling) AND ( strip(P)center and measurement) oder ( hot and rolling) AND ( strip and camber and measurement) ) oder bi=((reversing and mill)and (camber)) ODER bi=( ( hot and steel) AND (center and measurement) ) ODER BI =((hot(P)slab) und(position(P)measurement)) ODER BI =((hot(P)strip) und(position(P)measurement))').polish().dumps()) +>>> print(CQL('bi=( ( hot(P)rolling) AND ( strip(P)center and measurement) oder ( hot and rolling) AND ( strip and camber and measurement) ) oder bi=((reversing and mill)and (camber)) ODER bi=( ( hot and steel) AND (center and measurement) ) ODER BI =((hot(P)slab) und(position(P)measurement)) ODER BI =((hot(P)strip) und(position(P)measurement))').polish().dumps()) ((bi=(hot(P)rolling)) and (bi=(strip(P)center) and bi=measurement) ODER (bi=hot and bi=rolling) and (bi=strip and bi=camber and bi=measurement)) ODER ((bi=reversing and bi=mill) and (bi=camber)) ODER ((bi=hot and bi=steel) and (bi=center and bi=measurement)) ODER ((BI=(hot(P)slab)) UND (BI=(position(P)measurement))) ODER ((BI=(hot(P)strip)) UND (BI=(position(P)measurement))) Extract keywords after polishing: ->>> CQL(u'bi=( ( hot(P)rolling) AND ( strip(P)center and measurement) oder ( hot and rolling) AND ( strip and camber and measurement) ) oder bi=((reversing and mill)and (camber)) ODER bi=( ( hot and steel) AND (center and measurement) ) ODER BI =((hot(P)slab) und(position(P)measurement)) ODER BI =((hot(P)strip) und(position(P)measurement))').polish().keywords() -[[u'hot', u'rolling'], [u'strip', u'center'], u'measurement', u'hot', u'rolling', u'strip', u'camber', u'measurement', u'reversing', u'mill', u'camber', u'hot', u'steel', u'center', u'measurement', [u'hot', u'slab'], [u'position', u'measurement'], [u'hot', u'strip'], [u'position', u'measurement']] +>>> CQL('bi=( ( hot(P)rolling) AND ( strip(P)center and measurement) oder ( hot and rolling) AND ( strip and camber and measurement) ) oder bi=((reversing and mill)and (camber)) ODER bi=( ( hot and steel) AND (center and measurement) ) ODER BI =((hot(P)slab) und(position(P)measurement)) ODER BI =((hot(P)strip) und(position(P)measurement))').polish().keywords() +[['hot', 'rolling'], ['strip', 'center'], 'measurement', 'hot', 'rolling', 'strip', 'camber', 'measurement', 'reversing', 'mill', 'camber', 'hot', 'steel', 'center', 'measurement', ['hot', 'slab'], ['position', 'measurement'], ['hot', 'strip'], ['position', 'measurement']] Query 4 @@ -290,15 +290,15 @@ Query 4 Reproduce verbatim: ->>> print(CQL(u'BI=((finne? or (flying(1a)buttress?) or fins or effillee?) and (viergelenk? or mehrgelenk? or quadrilateral? or quadruple? or (four(w)joint) or quadrilaterale or quatre))').dumps()) +>>> print(CQL('BI=((finne? or (flying(1a)buttress?) or fins or effillee?) and (viergelenk? or mehrgelenk? or quadrilateral? or quadruple? or (four(w)joint) or quadrilaterale or quatre))').dumps()) BI=((finne? or (flying(1A)buttress?) or fins or effillee?) and (viergelenk? or mehrgelenk? or quadrilateral? or quadruple? or (four(W)joint) or quadrilaterale or quatre)) Reproduce with polishing: ->>> print(CQL(u'BI=((finne? or (flying(1a)buttress?) or fins or effillee?) and (viergelenk? or mehrgelenk? or quadrilateral? or quadruple? or (four(w)joint) or quadrilaterale or quatre))').polish().dumps()) +>>> print(CQL('BI=((finne? or (flying(1a)buttress?) or fins or effillee?) and (viergelenk? or mehrgelenk? or quadrilateral? or quadruple? or (four(w)joint) or quadrilaterale or quatre))').polish().dumps()) ((BI=finne? or (BI=(flying(1A)buttress?)) or BI=fins or BI=effillee?) and (BI=viergelenk? or BI=mehrgelenk? or BI=quadrilateral? or BI=quadruple? or (BI=(four(W)joint)) or BI=quadrilaterale or BI=quatre)) Extract keywords after polishing: ->>> CQL(u'BI=((finne? or (flying(1a)buttress?) or fins or effillee?) and (viergelenk? or mehrgelenk? or quadrilateral? or quadruple? or (four(w)joint) or quadrilaterale or quatre))').polish().keywords() -[u'finne', [u'flying', u'buttress'], u'fins', u'effillee', u'viergelenk', u'mehrgelenk', u'quadrilateral', u'quadruple', [u'four', u'joint'], u'quadrilaterale', u'quatre'] +>>> CQL('BI=((finne? or (flying(1a)buttress?) or fins or effillee?) and (viergelenk? or mehrgelenk? or quadrilateral? or quadruple? or (four(w)joint) or quadrilaterale or quatre))').polish().keywords() +['finne', ['flying', 'buttress'], 'fins', 'effillee', 'viergelenk', 'mehrgelenk', 'quadrilateral', 'quadruple', ['four', 'joint'], 'quadrilaterale', 'quatre'] diff --git a/patzilla/util/cql/pyparsing/test/30_ificlaims.rst b/patzilla/util/cql/pyparsing/test/30_ificlaims.rst index 3659f21e..0385cfc7 100644 --- a/patzilla/util/cql/pyparsing/test/30_ificlaims.rst +++ b/patzilla/util/cql/pyparsing/test/30_ificlaims.rst @@ -29,16 +29,16 @@ Test some logic operators localized to german. Getting started --------------- >>> CQL('pnctry:EP AND text:vibrat*').dumps() -u'pnctry : EP and text : vibrat*' +'pnctry : EP and text : vibrat*' Made up ------- Try to understand the query. ->>> CQL(u'(pnctry:EP and (pnctry:EP AND text:vibrat* AND (ic:G01F000184 OR cpc:G01F000184)))').dumps() -u'(pnctry : EP and (pnctry : EP and text : vibrat* and (ic : G01F000184 or cpc : G01F000184)))' +>>> CQL('(pnctry:EP and (pnctry:EP AND text:vibrat* AND (ic:G01F000184 OR cpc:G01F000184)))').dumps() +'(pnctry : EP and (pnctry : EP and text : vibrat* and (ic : G01F000184 or cpc : G01F000184)))' Extract keywords from query. ->>> CQL(u'(pnctry:EP and (pnctry:EP AND text:vibrat* AND (ic:G01F000184 OR cpc:G01F000184)))').polish().keywords() -[u'vibrat', u'G01F000184', u'G01F000184'] +>>> CQL('(pnctry:EP and (pnctry:EP AND text:vibrat* AND (ic:G01F000184 OR cpc:G01F000184)))').polish().keywords() +['vibrat', 'G01F000184', 'G01F000184'] diff --git a/patzilla/util/data/orderedset.py b/patzilla/util/data/orderedset.py index 01b21d09..43e45da1 100644 --- a/patzilla/util/data/orderedset.py +++ b/patzilla/util/data/orderedset.py @@ -2,7 +2,7 @@ # Set that remembers original insertion order. import collections -class OrderedSet(collections.MutableSet): +class OrderedSet(collections.abc.MutableSet): def __init__(self, iterable=None): self.end = end = [] diff --git a/patzilla/util/network/requests_xmlrpclib.py b/patzilla/util/network/requests_xmlrpclib.py index efcd83c7..e61eaf61 100644 --- a/patzilla/util/network/requests_xmlrpclib.py +++ b/patzilla/util/network/requests_xmlrpclib.py @@ -6,17 +6,14 @@ Usage: - >>> import xmlrpclib + >>> import xmlrpc.client >>> #from transport import RequestsTransport - >>> s = xmlrpclib.ServerProxy('http://yoursite.com/xmlrpc', transport=RequestsTransport()) + >>> s = xmlrpc.client.ServerProxy('http://yoursite.com/xmlrpc', transport=RequestsTransport()) >>> #s.demo.sayHello() Hello! """ -try: - import xmlrpc.client as xmlrpc -except ImportError: - import xmlrpc.client as xmlrpc +import xmlrpc.client as xmlrpc import requests class RequestsTransport(xmlrpc.Transport): diff --git a/patzilla/util/text/format.py b/patzilla/util/text/format.py index 225c86b2..9093becf 100644 --- a/patzilla/util/text/format.py +++ b/patzilla/util/text/format.py @@ -2,9 +2,9 @@ # (c) 2014-2016 Andreas Motl, Elmyra UG import re -_slugify_strip_re = re.compile(r'[^\w\s-]') -_slugify_strip_wo_equals_re = re.compile(r'[^\w\s=-]') -_slugify_hyphenate_re = re.compile(r'[-\s]+') +_slugify_strip_re = re.compile(rb'[^\w\s-]') +_slugify_strip_wo_equals_re = re.compile(rb'[^\w\s=-]') +_slugify_hyphenate_re = re.compile(rb'[-\s]+') def slugify(value, strip_equals=True, lowercase=True): """ Normalizes string, converts to lowercase, removes non-alpha characters, @@ -22,12 +22,16 @@ def slugify(value, strip_equals=True, lowercase=True): _strip_re = _slugify_strip_re if not strip_equals: _strip_re = _slugify_strip_wo_equals_re - value = str(_strip_re.sub('', value).strip()) + + if isinstance(value, str): + value = _strip_re.sub('', value).strip() + else: + value = _strip_re.sub(b'', value).strip() if lowercase: value = value.lower() - value = _slugify_hyphenate_re.sub('-', value) + value = _slugify_hyphenate_re.sub(b'-', value) return value def text_indent(text, amount=4, ch=' '): From 836fac0384f599abb934d74e660554fbd7189648 Mon Sep 17 00:00:00 2001 From: Papoteur Date: Sat, 25 Mar 2023 07:42:19 +0100 Subject: [PATCH 12/23] Fixes for tests --- tests/__init__.py | 2 +- tests/access/test_dpma_register.py | 16 ++++---- tests/access/test_epo_ops.py | 60 ++++++++++++++-------------- tests/access/test_uspto.py | 12 +++--- tests/test_numberlists.py | 10 ++--- tests/util/test_jwt.py | 8 ++-- tests/util/test_numbers_common.py | 4 +- tests/util/test_numbers_normalize.py | 4 +- tests/util/test_python.py | 10 ++--- tests/util/test_text_format.py | 10 ++--- 10 files changed, 68 insertions(+), 68 deletions(-) diff --git a/tests/__init__.py b/tests/__init__.py index b06494c4..30067b0a 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -11,7 +11,7 @@ def suppress_warnings(): """ with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=DeprecationWarning) - import pandas.util.nosetester + import numpy.testing suppress_warnings() diff --git a/tests/access/test_dpma_register.py b/tests/access/test_dpma_register.py index ba2aa470..1180615e 100644 --- a/tests/access/test_dpma_register.py +++ b/tests/access/test_dpma_register.py @@ -37,10 +37,10 @@ def test_dpmaregister_url_de(): def test_dpmaregister_xml(): with F5WafWrapper(): xml = access_register("WO2008034638", output_format="xml") - assert '' in xml - assert "" in xml - assert "" in xml + assert b'' in xml + assert b"" in xml + assert b"" in xml def test_dpmaregister_json(): @@ -72,12 +72,12 @@ def test_dpmaregister_html_compact_de(): def test_dpmaregister_pdf_compact_en(): with F5WafWrapper(): pdf = access_register("EP666666", output_format="pdf") - assert "File number 695 34 171.5" in pdf - assert "Most recent update in DPMAregister on Jan 7, 2017" in pdf + assert b"File number 695 34 171.5" in pdf + assert b"Most recent update in DPMAregister on Jan 7, 2017" in pdf def test_dpmaregister_pdf_compact_de(): with F5WafWrapper(): pdf = access_register("EP666666", output_format="pdf", language="de") - assert "Aktenzeichen 695 34 171.5" in pdf - assert "letzte Aktualisierung in DPMAregister am 07.01.2017" in pdf + assert b"Aktenzeichen 695 34 171.5" in pdf + assert b"letzte Aktualisierung in DPMAregister am 07.01.2017" in pdf diff --git a/tests/access/test_epo_ops.py b/tests/access/test_epo_ops.py index b30051e2..1dea183a 100644 --- a/tests/access/test_epo_ops.py +++ b/tests/access/test_epo_ops.py @@ -140,7 +140,7 @@ def test_search_swap_family(app_request): total_result_count = int(jpath('/ops:world-patent-data/ops:biblio-search/@total-result-count', results.data)) assert total_result_count == 2 - assert results.selected_numbers == [u'DE69534171T2', u'EP0666667A2'] + assert results.selected_numbers == ['DE69534171T2', 'EP0666667A2'] def test_crawl(app_request): @@ -188,13 +188,13 @@ def test_biblio_data_json_success(app_request): assert len(documents) == 3 assert kindcodes == ["A2", "A3", "B1"] assert attributes == [ - u'@country', - u'@doc-number', - u'@family-id', - u'@kind', - u'@system', - u'abstract', - u'bibliographic-data', + '@country', + '@doc-number', + '@family-id', + '@kind', + '@system', + 'abstract', + 'bibliographic-data', ] @@ -275,31 +275,31 @@ def test_family_members(app_request): pubnumbers = sorted([item["publication"]["number-docdb"] for item in members.items]) assert appnumbers == [ - u'CA2142029A', - u'CA2142029A', - u'DE69534171T', - u'DE69534171T', - u'EP95480005A', - u'EP95480005A', - u'EP95480005A', - u'JP29020894A', - u'JP29020894A', - u'US19288494A', - u'US47157195A', + 'CA2142029A', + 'CA2142029A', + 'DE69534171T', + 'DE69534171T', + 'EP95480005A', + 'EP95480005A', + 'EP95480005A', + 'JP29020894A', + 'JP29020894A', + 'US19288494A', + 'US47157195A', ] assert pubnumbers == [ - u'CA2142029A1', - u'CA2142029C', - u'DE69534171D1', - u'DE69534171T2', - u'EP0666666A2', - u'EP0666666A3', - u'EP0666666B1', - u'JP2613027B2', - u'JPH07231328A', - u'US5467352A', - u'US5572526A', + 'CA2142029A1', + 'CA2142029C', + 'DE69534171D1', + 'DE69534171T2', + 'EP0666666A2', + 'EP0666666A3', + 'EP0666666B1', + 'JP2613027B2', + 'JPH07231328A', + 'US5467352A', + 'US5572526A', ] diff --git a/tests/access/test_uspto.py b/tests/access/test_uspto.py index f3503a3b..8852600d 100644 --- a/tests/access/test_uspto.py +++ b/tests/access/test_uspto.py @@ -6,7 +6,7 @@ import re import pytest -from bunch import Bunch +from munch import Munch from pyramid.httpexceptions import HTTPNotFound from patzilla.access.uspto.image import fetch_first_drawing @@ -161,9 +161,9 @@ def test_fetch_url_failure(): def test_get_reference_type_valid(): - assert get_reference_type(Bunch(number="2022110447")) == UsptoPdfReferenceType.APPLICATION - assert get_reference_type(Bunch(number="2548918")) == UsptoPdfReferenceType.PUBLICATION - assert get_reference_type(Bunch(number=1)) == UsptoPdfReferenceType.PUBLICATION + assert get_reference_type(Munch(number="2022110447")) == UsptoPdfReferenceType.APPLICATION + assert get_reference_type(Munch(number="2548918")) == UsptoPdfReferenceType.PUBLICATION + assert get_reference_type(Munch(number=1)) == UsptoPdfReferenceType.PUBLICATION def test_get_reference_type_invalid(): @@ -172,9 +172,9 @@ def test_get_reference_type_invalid(): assert ex.match(re.escape("Unknown document reference type: None")) with pytest.raises(ValueError) as ex: - get_reference_type(Bunch()) + get_reference_type(Munch()) assert ex.match(re.escape("Unknown document reference type:")) with pytest.raises(ValueError) as ex: - get_reference_type(Bunch(number=None)) + get_reference_type(Munch(number=None)) assert ex.match(re.escape("Unknown document reference type:")) diff --git a/tests/test_numberlists.py b/tests/test_numberlists.py index 860eb35a..63096e49 100644 --- a/tests/test_numberlists.py +++ b/tests/test_numberlists.py @@ -5,26 +5,26 @@ def test_parse_numberlist(): """ Proof that conveniently parsing a list of items works. """ - assert parse_numberlist(u"foo , bar") == [u'foo', u'bar'] - assert parse_numberlist(u"foo \n bar") == [u'foo', u'bar'] + assert parse_numberlist("foo , bar") == ['foo', 'bar'] + assert parse_numberlist("foo \n bar") == ['foo', 'bar'] def test_normalize_numbers_valid(): """ Normalize a list of valid patent numbers. """ - assert normalize_numbers([u'EP666666B1', u'EP1000000']) == {'all': [u'EP0666666B1', u'EP1000000'], 'invalid': [], 'valid': [u'EP0666666B1', u'EP1000000']} + assert normalize_numbers(['EP666666B1', 'EP1000000']) == {'all': ['EP0666666B1', 'EP1000000'], 'invalid': [], 'valid': ['EP0666666B1', 'EP1000000']} def test_normalize_numbers_invalid(): """ Normalize a list of invalid patent numbers. """ - assert normalize_numbers([u'foo', u'bar']) == {'all': [u'foo', u'bar'], 'invalid': [u'foo', u'bar'], 'valid': []} + assert normalize_numbers(['foo', 'bar']) == {'all': ['foo', 'bar'], 'invalid': ['foo', 'bar'], 'valid': []} def test_normalize_numbers_mixed(): """ Normalize a list of both valid and invalid patent numbers. """ - assert normalize_numbers([u'EP666666B1', u'foobar']) == {'all': [u'EP0666666B1', u'foobar'], 'invalid': [u'foobar'], 'valid': [u'EP0666666B1']} + assert normalize_numbers(['EP666666B1', 'foobar']) == {'all': ['EP0666666B1', 'foobar'], 'invalid': ['foobar'], 'valid': ['EP0666666B1']} diff --git a/tests/util/test_jwt.py b/tests/util/test_jwt.py index 9fb7f71e..c7204585 100644 --- a/tests/util/test_jwt.py +++ b/tests/util/test_jwt.py @@ -59,7 +59,7 @@ def test_signer_sign_invalid_expiration(jwt_signer): """ with pytest.raises(ValueError) as ex: jwt_signer.sign("foo", ttl="bar") - assert ex.match("value=bar, type= is an invalid JWT expiration date") + assert ex.match("value=bar, type= is an invalid JWT expiration date, use `datetime.datetime` or `datetime.timedelta") def test_signer_unsign_expired_token(): @@ -77,7 +77,7 @@ def test_signer_unsign_expired_token(): 'location': 'JSON Web Token', 'name': '_JWTError', 'jwt_expiry': 1640995200, - 'jwt_header': {u'alg': u'RS256', u'typ': u'JWT'}, + 'jwt_header': {'alg': 'RS256', 'typ': 'JWT'}, } @@ -117,8 +117,8 @@ def test_signer_unsign_invalid_payload(jwt_signer): assert value == { 'location': 'JSON Web Token', - 'jwt_header': {u'alg': u'RS256', u'typ': u'JWT'}, + 'jwt_header': {'alg': 'RS256', 'typ': 'JWT'}, 'description': 'No "data" attribute in payload/claims', 'name': 'JwtSigner', - 'jwt_payload': {u'foo': u'bar', u'exp': 2145916800}, + 'jwt_payload': {'foo': 'bar', 'exp': 2145916800}, } diff --git a/tests/util/test_numbers_common.py b/tests/util/test_numbers_common.py index a6ebb516..1233a8e7 100644 --- a/tests/util/test_numbers_common.py +++ b/tests/util/test_numbers_common.py @@ -27,11 +27,11 @@ def generate(data): class TestNumberDecoding: - @pytest.mark.parametrize("number,expected,computed", generate(good), ids=good.keys()) + @pytest.mark.parametrize("number,expected,computed", generate(good), ids=list(good.keys())) def testDecodeOK(self, number, expected, computed): self.check_ok(number, expected, computed) - @pytest.mark.parametrize("number,expected,computed", generate(bad), ids=bad.keys()) + @pytest.mark.parametrize("number,expected,computed", generate(bad), ids=list(bad.keys())) def testDecodeBAD(self, number, expected, computed): self.check_ok(number, expected, computed) diff --git a/tests/util/test_numbers_normalize.py b/tests/util/test_numbers_normalize.py index 6930587e..2fe9e69b 100644 --- a/tests/util/test_numbers_normalize.py +++ b/tests/util/test_numbers_normalize.py @@ -595,11 +595,11 @@ def normalize_patent_us_smart(input): class TestNumberNormalization: - @pytest.mark.parametrize("number,expected,computed", generate(t, fun=partial(normalize_patent, fix_kindcode=True, for_ops=True)), ids=t.keys()) + @pytest.mark.parametrize("number,expected,computed", generate(t, fun=partial(normalize_patent, fix_kindcode=True, for_ops=True)), ids=list(t.keys())) def testDecodeOK(self, number, expected, computed): self.check_ok(number, expected, computed) - @pytest.mark.parametrize("number,expected,computed", generate(depatisconnect_cases, fun=partial(depatisconnect_alternatives)), ids=depatisconnect_cases.keys()) + @pytest.mark.parametrize("number,expected,computed", generate(depatisconnect_cases, fun=partial(depatisconnect_alternatives)), ids=list(depatisconnect_cases.keys())) def test_depatisconnect_alternatives(self, number, expected, computed): self.check_ok(number, expected, computed) diff --git a/tests/util/test_python.py b/tests/util/test_python.py index ad8638a2..d3ce8955 100644 --- a/tests/util/test_python.py +++ b/tests/util/test_python.py @@ -8,11 +8,11 @@ def test_run_command_success_basic(): - assert run_command(["echo", "foo"]).read().strip() == "foo" + assert run_command(["echo", "foo"]).read().strip() == b"foo" def test_run_command_success_input(): - assert run_command(["cat"], input="foo").read().strip() == "foo" + assert run_command(["cat"], input=b"foo").read().strip() == b"foo" def test_run_command_failure_not_found(): @@ -29,8 +29,8 @@ def test_run_command_failure_program_error(): def test_run_command_failure_input_error(): with pytest.raises(RuntimeError) as ex: - run_command(["true"], input={"abc": "def"}) - assert ex.match('Command "true" failed, returncode=None, exception=unhashable type, stderr=') + run_command(["true"], input={b"abc": b"def"}) + assert ex.match('Command "true" failed, returncode=None, exception=memoryview: a bytes-like object is required, not \'dict\', stderr=') def test_memoize(): @@ -49,4 +49,4 @@ def test_exception_traceback(capsys): output = exception_traceback() assert "Traceback (most recent call last)" in output - assert "NameError: global name 'foobar' is not defined" in output + assert "NameError: name \'foobar\' is not defined" in output diff --git a/tests/util/test_text_format.py b/tests/util/test_text_format.py index e0174517..a9680957 100644 --- a/tests/util/test_text_format.py +++ b/tests/util/test_text_format.py @@ -4,14 +4,14 @@ def test_slugify(): - assert slugify("Franz jagt Trueffel.") == "franz-jagt-trueffel" - assert slugify(u"Franz jagt Trüffel -=- im Wald. 👋") == "franz-jagt-truffel-im-wald" - assert slugify(u"Franz jagt Trüffel -=- im Wald. 👋", strip_equals=False) == "franz-jagt-truffel-=-im-wald" - assert slugify(u"Franz jagt Trüffel -=- im Wald. 👋", lowercase=False) == "Franz-jagt-Truffel-im-Wald" + assert slugify("Franz jagt Trueffel.") == b"franz-jagt-trueffel" + assert slugify("Franz jagt Trüffel -=- im Wald. 👋") == b"franz-jagt-truffel-im-wald" + assert slugify("Franz jagt Trüffel -=- im Wald. 👋", strip_equals=False) == b"franz-jagt-truffel-=-im-wald" + assert slugify("Franz jagt Trüffel -=- im Wald. 👋", lowercase=False) == b"Franz-jagt-Truffel-im-Wald" def test_text_indent(): - assert text_indent(u"Franz jagt Trüffel.\nIm Wald.\n\n👋") == u""" + assert text_indent("Franz jagt Trüffel.\nIm Wald.\n\n👋") == """ Franz jagt Trüffel. Im Wald. From 578f971b87da5c797b06d4a4434c71e26a4584e4 Mon Sep 17 00:00:00 2001 From: Papoteur Date: Sat, 25 Mar 2023 13:18:19 +0100 Subject: [PATCH 13/23] Fix expected results within tests --- patzilla/util/cql/pyparsing/__init__.py | 4 ++-- patzilla/util/cql/pyparsing/test/05_misc.rst | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/patzilla/util/cql/pyparsing/__init__.py b/patzilla/util/cql/pyparsing/__init__.py index e75529c5..9bf78eda 100644 --- a/patzilla/util/cql/pyparsing/__init__.py +++ b/patzilla/util/cql/pyparsing/__init__.py @@ -45,7 +45,7 @@ def parse(self): >>> tokens = parse_cql('foo=bar') >>> tokens - ([(['foo', u'=', 'bar'], {'triple': [((['foo', u'=', 'bar'], {}), 0)]})], {}) + ParseResults([ParseResults(['foo', '=', 'bar'], {'triple': ['foo', '=', 'bar']})], {}) """ @@ -60,7 +60,7 @@ def parse(self): #if self.logging: # log.info(u'tokens: %s', tokens.pformat()) - except ParseException as ex: + except pyparsing.exceptions.ParseException as ex: ex.explanation = '%s\n%s\n%s' % (ex.pstr, ' ' * ex.loc + '^\n', ex) #if self.logging: # log.error('\n%s', ex.explanation) diff --git a/patzilla/util/cql/pyparsing/test/05_misc.rst b/patzilla/util/cql/pyparsing/test/05_misc.rst index 2f556800..4d752805 100644 --- a/patzilla/util/cql/pyparsing/test/05_misc.rst +++ b/patzilla/util/cql/pyparsing/test/05_misc.rst @@ -68,7 +68,7 @@ Lacking terms >>> CQL('foo=', logging=False).dumps() Traceback (most recent call last): ... -ParseException: Expected term (at char 4), (line:1, col:5) +ParseException: Expected end of text, found 'bar' (at char 9), (line:1, col:10) >>> CQL('foo= and bar=', logging=False).dumps() Traceback (most recent call last): From 7b8d681f806e1afbe1acf0e4a4e27b37736a7231 Mon Sep 17 00:00:00 2001 From: Papoteur Date: Sat, 25 Mar 2023 13:18:42 +0100 Subject: [PATCH 14/23] Update requirements --- setup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index dd630b4a..55b90be9 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ # Environment # ---------------------------------------------- 'six>=1.10.0', - 'mock>=3,<4', # 4.0.3 + 'mock', # ---------------------------------------------- # Backend @@ -98,12 +98,12 @@ 'ago==0.0.9', # 0.0.93 'arrow==0.10.0', # 0.12.1 'validate_email<2', - 'numpy==1.16.6', # 1.22.3 + 'numpy>=1.16.6', # 1.22.3 'pandas', # 0.22.0, 0.25.3, 1.4.2 'pathlib', # Data formatting - 'openpyxl>=2.4.2,<3', + 'openpyxl', 'xlrd3', 'XlsxWriter==0.9.3', # 1.4.5, 2.0.0, 3.0.3 From 7fe3e9e2cdc9b12bfd88d0e6ca2e88b8d8fac1bc Mon Sep 17 00:00:00 2001 From: Papoteur Date: Sat, 25 Mar 2023 13:19:59 +0100 Subject: [PATCH 15/23] Fix reading number file --- patzilla/util/numbers/helper.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/patzilla/util/numbers/helper.py b/patzilla/util/numbers/helper.py index 6a498bb4..520c49fc 100644 --- a/patzilla/util/numbers/helper.py +++ b/patzilla/util/numbers/helper.py @@ -22,11 +22,11 @@ def strip_spaces(number): number = r_invalid.sub('', number) return number -def read_numbersfile(file): - fh = open(file, 'r') +def read_numbersfile(_file): + fh = open(_file, 'r') numbers_raw = fh.readlines() fh.close() - numbers = [number.replace("\n", '').replace(' ', '') for number in numbers_raw] + numbers = [number.strip(" ;\"'\t\n\r") for number in numbers_raw] numbers = [number for number in numbers if number and not number.startswith('#')] return numbers From e075113becf4573cb323aab3b900d1ff7f515226 Mon Sep 17 00:00:00 2001 From: Papoteur Date: Sat, 25 Mar 2023 13:21:20 +0100 Subject: [PATCH 16/23] Fix usage of builtins for testing file with mock in Python 3 --- tests/util/test_numbers_helper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/util/test_numbers_helper.py b/tests/util/test_numbers_helper.py index 989ccaf7..c3ecb799 100644 --- a/tests/util/test_numbers_helper.py +++ b/tests/util/test_numbers_helper.py @@ -18,6 +18,6 @@ def test_read_numbersfile(): """ # TODO: Need to adjust for Python 3, see https://stackoverflow.com/a/34677735. - with patch("__builtin__.open", mock_open(read_data=data)) as mock_file: + with patch("builtins.open", mock_open(read_data=data)) as mock_file: numbers = read_numbersfile(None) assert numbers == ['EP666666', 'EP666667', 'EP666668', 'EP666669'] From 119daacf5891920ed71acc2e41ad076a9c76e6fb Mon Sep 17 00:00:00 2001 From: Papoteur Date: Sat, 25 Mar 2023 16:04:50 +0100 Subject: [PATCH 17/23] Fix OPS API --- patzilla/access/epo/ops/api.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/patzilla/access/epo/ops/api.py b/patzilla/access/epo/ops/api.py index cb3ef0fb..35e1375f 100644 --- a/patzilla/access/epo/ops/api.py +++ b/patzilla/access/epo/ops/api.py @@ -133,7 +133,7 @@ def results_swap_family_members(response): def match_filter(item, filter): if callable(filter): patent = split_patent_number(item) - outcome = list(filter(patent)) + outcome = filter(patent) else: outcome = item.startswith(filter) return outcome @@ -167,7 +167,7 @@ def match_filter(item, filter): original_publication_numbers += representation_pubrefs_docdb # Debugging - #print 'representation_pubref_epodoc:', representation_pubref_epodoc + #print( 'representation_pubref_epodoc:', representation_pubref_epodoc) #print 'representation_pubrefs_docdb:', representation_pubrefs_docdb # Fetch family members. When failing, use first cycle as representation. @@ -901,7 +901,7 @@ def handle_error(response, location): response_json.status = response.status_code # countermeasure against "_JSONError: " or the like - response_json.detail = str(response.status_code) + ' ' + response.reason + ': ' + response.content + response_json.detail = str(response.status_code) + ' ' + str(response.reason) + ': ' + str(response.content) #print "response:", response if len(request.errors) == 1: @@ -911,19 +911,19 @@ def handle_error(response, location): url = error_info.get('url') status = str(error_info.get('status_code', '')) + ' ' + error_info.get('reason', '') - if 'CLIENT.InvalidCountryCode' in error_content: + if b'CLIENT.InvalidCountryCode' in error_content: ops_code = 'CLIENT.InvalidCountryCode' message = 'OPS API response ({status}, {ops_code}). url={url}'.format(status=status, ops_code=ops_code, url=url) log.error(message) return response_json - if 'SERVER.EntityNotFound' in error_content: + if b'SERVER.EntityNotFound' in error_content: ops_code = 'SERVER.EntityNotFound' message = 'OPS API response ({status}, {ops_code}). url={url}'.format(status=status, ops_code=ops_code, url=url) log.warning(message) return response_json - if 'OPS - 404' in error_content or 'Page not found' in error_content: + if b'OPS - 404' in error_content or b'Page not found' in error_content: ops_code = '404 OPS Page not found' message = 'OPS API response ({status}, {ops_code}). url={url}'.format(status=status, ops_code=ops_code, url=url) log.error(message) @@ -931,7 +931,7 @@ def handle_error(response, location): response_json.status_code = 502 return response_json - if 'This API version is not supported' in error_content: + if b'This API version is not supported' in error_content: ops_code = '404 API version not supported' message = 'OPS API response ({status}, {ops_code}). url={url}'.format(status=status, ops_code=ops_code, url=url) log.error(message) From 0eef0803ddedb6a93b87e62e667e163c3b1ee801 Mon Sep 17 00:00:00 2001 From: Papoteur Date: Sat, 25 Mar 2023 16:05:15 +0100 Subject: [PATCH 18/23] Fix some tests on OPS API --- tests/access/test_epo_ops.py | 16 ++++++++-------- tests/commands/test_commands_ops.py | 6 +++--- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/access/test_epo_ops.py b/tests/access/test_epo_ops.py index 1dea183a..345aa1de 100644 --- a/tests/access/test_epo_ops.py +++ b/tests/access/test_epo_ops.py @@ -50,7 +50,7 @@ def test_baseurl(app_request): response = client._make_request( OPS_BASE_URI, data={}, extra_headers={"Accept": "*"}, use_get=True, ) - assert "EPO - Open Patent Services (OPS)" in response.content + assert b"EPO - Open Patent Services (OPS)" in response.content def test_search_full_success(app_request): @@ -218,7 +218,7 @@ def test_biblio_data_xml_success(app_request): Proof getting bibliographic for a specific document in XML format works. """ results = get_ops_biblio_data("publication", "EP0666666", xml=True) - assert results.startswith('') + assert results.startswith(b'') def test_document_kindcodes_success(app_request): @@ -435,8 +435,8 @@ def test_description_xml_success(app_request): Acquire full text "description" in XML format. """ data = ops_description("EP666666A2", xml=True) - assert data.startswith('') - assert "The present invention generally relates to multi-node communication systems with shared resources." in data + assert data.startswith(b'') + assert b"The present invention generally relates to multi-node communication systems with shared resources." in data def test_description_failure(app_request): @@ -485,8 +485,8 @@ def test_claims_xml_success(app_request): Acquire full text "claims" in XML format. """ data = ops_claims("EP666666A2", xml=True) - assert data.startswith('') - assert "1. In a communication system having a plurality of nodes" in data + assert data.startswith(b'') + assert b"1. In a communication system having a plurality of nodes" in data def test_claims_failure(app_request): @@ -531,7 +531,7 @@ def test_family_docdb_xml_success(app_request): document_number="EP0666666A2", constituents="biblio", ) - assert response.startswith('') + assert response.startswith(b'') def test_family_docdb_xml_not_found_failure(app_request): @@ -558,7 +558,7 @@ def test_register_json_success(app_request): def test_register_xml_success(app_request): response = ops_register(reference_type="publication", document_number="EP0666666A2", xml=True) - assert response.startswith('') + assert response.startswith(b'') def test_register_not_found_failure(app_request): diff --git a/tests/commands/test_commands_ops.py b/tests/commands/test_commands_ops.py index c2830056..a18fddf5 100644 --- a/tests/commands/test_commands_ops.py +++ b/tests/commands/test_commands_ops.py @@ -76,7 +76,7 @@ def test_command_ops_image_fulldocument_pdf_success(): result = runner.invoke(cli, "ops image --document=EP0666666B1 --page=1", catch_exceptions=False) assert result.exit_code == 0 - assert result.stdout.startswith("%PDF-1.4") + assert result.stdout.startswith("b'%PDF-1.4") assert 30000 < len(result.stdout) < 50000 @@ -89,7 +89,7 @@ def test_command_ops_image_fulldocument_tiff_success(): result = runner.invoke(cli, "ops image --document=EP0666666B1 --page=1 --format=tiff", catch_exceptions=False) assert result.exit_code == 0 - assert result.stdout.startswith(b"\x4d\x4d\x00\x2a") + assert result.stdout.startswith("b'\x4d\x4d\x00\x2a") def test_command_ops_image_drawing_pdf_success(): @@ -101,7 +101,7 @@ def test_command_ops_image_drawing_pdf_success(): result = runner.invoke(cli, "ops image --document=EP0666666B1 --kind=FullDocumentDrawing --page=1", catch_exceptions=False) assert result.exit_code == 0 - assert result.stdout.startswith("%PDF-1.4") + assert result.stdout.startswith("b'%PDF-1.4") assert 10000 < len(result.stdout) < 20000 From 8e31c32f034d12f8a51812fa2f30eccb9663145a Mon Sep 17 00:00:00 2001 From: Papoteur Date: Tue, 28 Mar 2023 17:08:09 +0200 Subject: [PATCH 19/23] Use Python 3.8 for testing purposes --- .github/workflows/tests.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 6afe2da0..b6d9824a 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -26,7 +26,7 @@ jobs: # Don't use macOS for now, it is currently unstable, otherwise slow. -- 2022-04-19, amo # 'macos-latest', ] - python-version: ['2.7'] + python-version: ['3.8'] defaults: run: @@ -103,7 +103,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v4 with: - python-version: '2.7' + python-version: '3.8' architecture: 'x64' cache: 'pip' cache-dependency-path: 'requirements-docs.txt' From 46d404732d1af4c245508cb8307e5bf54b76be72 Mon Sep 17 00:00:00 2001 From: Papoteur Date: Tue, 28 Mar 2023 17:08:59 +0200 Subject: [PATCH 20/23] Update Copyright year --- docs/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/conf.py b/docs/conf.py index 48e0265c..5569cd62 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -50,7 +50,7 @@ # General information about the project. project = 'PatZilla' -copyright = '2013-2012, The PatZilla authors' +copyright = '2013-2023, The PatZilla authors' author = 'The PatZilla authors' # The version info for the project you're documenting, acts as replacement for From 18504d86418024f36276230c90fe66170806cf95 Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Wed, 29 Mar 2023 02:09:02 +0200 Subject: [PATCH 21/23] [mw] Improve dependencies after migration to Python 3 --- setup.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/setup.py b/setup.py index 55b90be9..763fadec 100644 --- a/setup.py +++ b/setup.py @@ -41,9 +41,9 @@ # Database and storage # Can't upgrade to pymongo-3.5.1 due to "from pymongo.connection import Connection" # usage in "mongodb_gridfs_beaker" module. - 'pymongo', # 3.13.0, 4.3.3 - 'mongodb_gridfs_beaker==0.6.0dev1', - 'mongoengine', # 0.24.1 + 'pymongo', # 3.13.0, 4.3.3 + 'mongodb_gridfs_beaker@https://github.com/ip-tools/mongodb_gridfs_beaker/archive/0.6.0dev1.tar.gz#egg=mongodb_gridfs_beaker', + 'mongoengine==0.20.0', # 0.27.0 'python-magic<1', # Web services @@ -93,7 +93,7 @@ # Data handling 'attrs', 'Munch', - 'pyparsing', + 'pyparsing<4', # 3.0.9 'python-dateutil<3', 'ago==0.0.9', # 0.0.93 'arrow==0.10.0', # 0.12.1 @@ -215,11 +215,6 @@ extras_require={ 'test': test_requires, }, - dependency_links=[ - 'https://github.com/ip-tools/mongodb_gridfs_beaker/archive/0.6.0dev1.tar.gz#egg=mongodb_gridfs_beaker', - 'https://github.com/ip-tools/mechanize/archive/v0.4.3dev2.tar.gz#egg=mechanize-0.4.3dev2', - #'https://github.com/dagwieers/unoconv/archive/master.tar.gz#egg=unoconv-0.8.2', - ], entry_points={ 'paste.app_factory': [ From 22a200876290943bb592aff3120da590ff7147e1 Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Wed, 29 Mar 2023 03:02:30 +0200 Subject: [PATCH 22/23] [mw] Fix more tests after migration to Python 3 --- patzilla/access/epo/ops/api.py | 2 +- patzilla/access/epo/ops/commands.py | 3 ++- patzilla/util/cql/pyparsing/test/05_misc.rst | 2 +- setup.cfg | 2 ++ tests/access/test_epo_ops.py | 12 ++++++------ tests/commands/test_commands_ops.py | 10 +++++----- 6 files changed, 17 insertions(+), 14 deletions(-) diff --git a/patzilla/access/epo/ops/api.py b/patzilla/access/epo/ops/api.py index 35e1375f..e6da53c2 100644 --- a/patzilla/access/epo/ops/api.py +++ b/patzilla/access/epo/ops/api.py @@ -907,7 +907,7 @@ def handle_error(response, location): if len(request.errors) == 1: error_info = request.errors[0].get('description') if error_info.get('status_code') == 404: - error_content = error_info.get('content', '') + error_content = error_info.get('content', b'') url = error_info.get('url') status = str(error_info.get('status_code', '')) + ' ' + error_info.get('reason', '') diff --git a/patzilla/access/epo/ops/commands.py b/patzilla/access/epo/ops/commands.py index 038625fe..faab8cb8 100644 --- a/patzilla/access/epo/ops/commands.py +++ b/patzilla/access/epo/ops/commands.py @@ -21,6 +21,7 @@ """ import json import logging +import sys from datetime import date, timedelta import click @@ -133,7 +134,7 @@ def image(ctx, document, page, kind, format): Access the OPS image acquisition API, see OPS handbook section 3.1.3. """ payload = get_ops_image(document, page, kind, format) - print(payload) + sys.stdout.buffer.write(payload) ops_cli.add_command(cmd=usage) diff --git a/patzilla/util/cql/pyparsing/test/05_misc.rst b/patzilla/util/cql/pyparsing/test/05_misc.rst index 4d752805..4ccdce8d 100644 --- a/patzilla/util/cql/pyparsing/test/05_misc.rst +++ b/patzilla/util/cql/pyparsing/test/05_misc.rst @@ -100,4 +100,4 @@ Error explanation ... CQL('foo bar', logging=False).dumps() ... except Exception as ex: ... ex.explanation -'foo bar\n ^\n\nExpected end of text, found 'bar' (at char 4), (line:1, col:5)' +"foo bar\n ^\n\nExpected end of text, found 'bar' (at char 4), (line:1, col:5)" diff --git a/setup.cfg b/setup.cfg index 28b62b06..5622110a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -13,6 +13,8 @@ addopts = -rA -vvv --app-cache-backend=filesystem patzilla tests -k 'not uspto' +doctest_optionflags = NORMALIZE_WHITESPACE IGNORE_EXCEPTION_DETAIL + log_level = DEBUG log_cli_level = DEBUG diff --git a/tests/access/test_epo_ops.py b/tests/access/test_epo_ops.py index 345aa1de..fead9461 100644 --- a/tests/access/test_epo_ops.py +++ b/tests/access/test_epo_ops.py @@ -74,15 +74,15 @@ def test_search_biblio_compact_success(app_request): assert jpath('/0/pubdate', compact) == "1995-08-09" assert jpath('/1/pubnumber', compact) == "EP0666667" assert jpath('/1/pubdate', compact) == "1995-08-09" - assert compact[0].keys() == compact[1].keys() == [ + assert sorted(compact[0].keys()) == sorted(compact[1].keys()) == [ + 'abstract', 'appdate', 'applicant', - 'pubdate', 'appnumber', - 'title', - 'abstract', - 'pubnumber', 'inventor', + 'pubdate', + 'pubnumber', + 'title', ] @@ -573,4 +573,4 @@ def test_register_not_found_failure(app_request): def test_service_usage(app_request): response = ops_service_usage("01/01/2022", "02/01/2022") - assert response.keys() == ["response-size", "time-range", "message-count"] + assert sorted(response.keys()) == ["message-count", "response-size", "time-range"] diff --git a/tests/commands/test_commands_ops.py b/tests/commands/test_commands_ops.py index a18fddf5..a6b63f4b 100644 --- a/tests/commands/test_commands_ops.py +++ b/tests/commands/test_commands_ops.py @@ -76,8 +76,8 @@ def test_command_ops_image_fulldocument_pdf_success(): result = runner.invoke(cli, "ops image --document=EP0666666B1 --page=1", catch_exceptions=False) assert result.exit_code == 0 - assert result.stdout.startswith("b'%PDF-1.4") - assert 30000 < len(result.stdout) < 50000 + assert result.stdout_bytes.startswith(b"%PDF-1.4") + assert 30_000 < len(result.stdout_bytes) < 150_000 def test_command_ops_image_fulldocument_tiff_success(): @@ -89,7 +89,7 @@ def test_command_ops_image_fulldocument_tiff_success(): result = runner.invoke(cli, "ops image --document=EP0666666B1 --page=1 --format=tiff", catch_exceptions=False) assert result.exit_code == 0 - assert result.stdout.startswith("b'\x4d\x4d\x00\x2a") + assert result.stdout_bytes.startswith(b"\x4d\x4d\x00\x2a") def test_command_ops_image_drawing_pdf_success(): @@ -101,8 +101,8 @@ def test_command_ops_image_drawing_pdf_success(): result = runner.invoke(cli, "ops image --document=EP0666666B1 --kind=FullDocumentDrawing --page=1", catch_exceptions=False) assert result.exit_code == 0 - assert result.stdout.startswith("b'%PDF-1.4") - assert 10000 < len(result.stdout) < 20000 + assert result.stdout_bytes.startswith(b"%PDF-1.4") + assert 10_000 < len(result.stdout_bytes) < 20_000 def test_command_ops_image_failure(): From c7af54908b9ea79a9518498bf2df8a97169fe3d0 Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Wed, 29 Mar 2023 03:17:24 +0200 Subject: [PATCH 23/23] [mw] Improve reading of data source credentials from environment vars Before, the system would happily use empty credentials induced by CI when running from foreign repositories, where the GitHub secrets attached to the repository are not active, but the GHA workflow configuration still sets the environment variables - to empty values. --- patzilla/access/depatech/clientpool.py | 2 ++ patzilla/access/epo/ops/client.py | 2 ++ patzilla/access/ificlaims/clientpool.py | 2 ++ patzilla/access/sip/clientpool.py | 2 ++ 4 files changed, 8 insertions(+) diff --git a/patzilla/access/depatech/clientpool.py b/patzilla/access/depatech/clientpool.py index 989247e0..62599608 100644 --- a/patzilla/access/depatech/clientpool.py +++ b/patzilla/access/depatech/clientpool.py @@ -43,6 +43,8 @@ def from_settings(datasource_settings): @staticmethod def from_environment(): + if not os.environ["DEPATECH_API_USERNAME"] or not os.environ["DEPATECH_API_PASSWORD"]: + raise KeyError("DEPATECH_API_USERNAME or DEPATECH_API_PASSWORD is empty") return { "api_username": os.environ["DEPATECH_API_USERNAME"], "api_password": os.environ["DEPATECH_API_PASSWORD"], diff --git a/patzilla/access/epo/ops/client.py b/patzilla/access/epo/ops/client.py index 3f60f7ee..75070fd4 100644 --- a/patzilla/access/epo/ops/client.py +++ b/patzilla/access/epo/ops/client.py @@ -38,6 +38,8 @@ def from_settings(datasource_settings): @staticmethod def from_environment(): + if not os.environ["OPS_API_CONSUMER_KEY"] or not os.environ["OPS_API_CONSUMER_SECRET"]: + raise KeyError("OPS_API_CONSUMER_KEY or OPS_API_CONSUMER_SECRET is empty") return { "consumer_key": os.environ["OPS_API_CONSUMER_KEY"], "consumer_secret": os.environ["OPS_API_CONSUMER_SECRET"], diff --git a/patzilla/access/ificlaims/clientpool.py b/patzilla/access/ificlaims/clientpool.py index 2a5fb2b2..a926ad16 100644 --- a/patzilla/access/ificlaims/clientpool.py +++ b/patzilla/access/ificlaims/clientpool.py @@ -46,6 +46,8 @@ def from_settings(datasource_settings): @staticmethod def from_environment(): + if not os.environ["IFICLAIMS_API_USERNAME"] or not os.environ["IFICLAIMS_API_PASSWORD"]: + raise KeyError("IFICLAIMS_API_USERNAME or IFICLAIMS_API_PASSWORD is empty") return { "api_username": os.environ["IFICLAIMS_API_USERNAME"], "api_password": os.environ["IFICLAIMS_API_PASSWORD"], diff --git a/patzilla/access/sip/clientpool.py b/patzilla/access/sip/clientpool.py index 42a2db22..1c8e679d 100644 --- a/patzilla/access/sip/clientpool.py +++ b/patzilla/access/sip/clientpool.py @@ -46,6 +46,8 @@ def from_settings(datasource_settings): @staticmethod def from_environment(): + if not os.environ["SIP_API_USERNAME"] or not os.environ["SIP_API_PASSWORD"]: + raise KeyError("SIP_API_USERNAME or SIP_API_PASSWORD is empty") return { "api_username": os.environ["SIP_API_USERNAME"], "api_password": os.environ["SIP_API_PASSWORD"],