Skip to content

Commit

Permalink
SFR-2288 Querying by OCLC Number uses metadata API MARCXML endpoint (#…
Browse files Browse the repository at this point in the history
…464)

* Update scope to use new view_marc_bib scope & no manage/write scopes
* Fix unit tests
* Strip OCLC prefixes from v2 catalog xml
  • Loading branch information
Apophenia authored Dec 5, 2024
1 parent 731f070 commit 5d49d08
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 39 deletions.
2 changes: 1 addition & 1 deletion managers/oclc_auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class OCLCAuthManager:
_metadata_token = None
_metadata_token_expires_at = None
OCLC_SEARCH_AUTH_URL = 'https://oauth.oclc.org/token?scope=wcapi&grant_type=client_credentials'
OCLC_METADATA_AUTH_URL = 'https://oauth.oclc.org/token?scope=WorldCatMetadataAPI&grant_type=client_credentials'
OCLC_METADATA_AUTH_URL = 'https://oauth.oclc.org/token?scope=WorldCatMetadataAPI:view_marc_bib&grant_type=client_credentials'

TIME_TO_REFRESH_IN_SECONDS = 60

Expand Down
30 changes: 3 additions & 27 deletions managers/oclc_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,25 +11,21 @@


class OCLCCatalogManager:
CATALOG_URL = 'http://www.worldcat.org/webservices/catalog/content/{}?wskey={}'
METADATA_BIB_URL = 'https://metadata.api.oclc.org/worldcat/manage/bibs/{}'
OCLC_SEARCH_URL = 'https://americas.discovery.api.oclc.org/worldcat/search/v2/'
ITEM_TYPES = ['archv', 'audiobook', 'book', 'encyc', 'jrnl']
LIMIT = 50
MAX_NUMBER_OF_RECORDS = 100
BEST_MATCH = 'bestMatch'

def __init__(self):
self.oclc_key = os.environ['OCLC_API_KEY']

def query_catalog_v2(self, oclc_no):
catalog_query = self.CATALOG_URL.format(oclc_no, self.oclc_key)
def query_catalog(self, oclc_no):
catalog_query = self.METADATA_BIB_URL.format(oclc_no)

for _ in range(0, 3):
try:
token = OCLCAuthManager.get_metadata_token()
headers = { 'Authorization': f'Bearer {token}' }

catalog_response = requests.get(catalog_query, headers=headers, timeout=3)

if catalog_response.status_code != 200:
Expand All @@ -42,27 +38,7 @@ def query_catalog_v2(self, oclc_no):
except Exception as e:
logger.error(f'Failed to query catalog with query {catalog_query} due to {e}')
return None

return None

def query_catalog(self, oclc_no):
catalog_query = self.CATALOG_URL.format(oclc_no, self.oclc_key)

for _ in range(0, 3):
try:
catalog_response = requests.get(catalog_query, timeout=3)

if catalog_response.status_code != 200:
logger.warning(f'OCLC catalog request failed with status {catalog_response.status_code}')
return None

return catalog_response.text
except (Timeout, ConnectionError):
logger.warning(f'Could not connect to {catalog_query} or timed out')
except Exception as e:
logger.error(f'Failed to query catalog with query {catalog_query} due to {e}')
return None

return None

def get_related_oclc_numbers(self, oclc_number: int) -> list[int]:
Expand Down
13 changes: 11 additions & 2 deletions mappings/oclcCatalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,18 @@ class CatalogMapping(XMLMapping):
'hathitrust': r'catalog.hathitrust.org\/api\/volumes\/([a-z]{3,6}\/[a-zA-Z0-9]+)\.html'
}

OCLC_PREFIXES = ['(OCoLC)', 'on', 'ocn', 'ocm']

def __init__(self, source, namespace, constants):
super(CatalogMapping, self).__init__(source, namespace, constants)
self.mapping = self.createMapping()

def remove_oclc_prefixes(self, oclc_id):
for prefix in self.OCLC_PREFIXES:
oclc_id = oclc_id.removeprefix(prefix)
return oclc_id


def createMapping(self):
return {
'title': ('//oclc:datafield[@tag=\'245\']/oclc:subfield[@code=\'a\' or @code=\'b\']/text()', '{0} {1}'),
Expand Down Expand Up @@ -158,12 +166,13 @@ def createMapping(self):

def applyFormatting(self):
self.record.source = 'oclcCatalog'
self.record.identifiers[0] = self.remove_oclc_prefixes(self.record.identifiers[0])
self.record.source_id = self.record.identifiers[0]
self.record.frbr_status = 'complete'

_, _, lang_3, *_ = tuple(self.record.languages[0].split('|'))
self.record.languages = [('||{}'.format(lang_3[35:38]))]

self.record.has_part = self.record.has_part[:10]

self.record.has_part = list(filter(None, [
Expand Down
27 changes: 18 additions & 9 deletions tests/unit/test_oclcCatalog_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,26 +8,25 @@
class TestOCLCCatalogManager:
@pytest.fixture
def testInstance(self, mocker):
mocker.patch.dict('os.environ', {'OCLC_API_KEY': 'test_api_key'})

return OCLCCatalogManager()

def test_initializer(self, testInstance):
assert testInstance.oclc_key == 'test_api_key'

def test_query_catalog_success(self, testInstance, mocker):
mockResponse = mocker.MagicMock()
mockRequest = mocker.patch('managers.oclc_catalog.requests')
mockRequest.get.return_value = mockResponse

mock_auth = mocker.patch('managers.oclc_auth.OCLCAuthManager.get_metadata_token')
mock_auth.return_value = 'foo'

mockResponse.status_code = 200
mockResponse.text = 'testClassifyRecord'

testResponse = testInstance.query_catalog(1)

assert testResponse == 'testClassifyRecord'
mockRequest.get.assert_called_once_with(
'http://www.worldcat.org/webservices/catalog/content/1?wskey=test_api_key',
'https://metadata.api.oclc.org/worldcat/manage/bibs/1',
headers={'Authorization': 'Bearer foo'},
timeout=3
)

Expand All @@ -36,14 +35,18 @@ def test_query_catalog_error(self, testInstance, mocker):
mockRequest = mocker.patch('managers.oclc_catalog.requests')
mockRequest.get.return_value = mockResponse

mock_auth = mocker.patch('managers.oclc_auth.OCLCAuthManager.get_metadata_token')
mock_auth.return_value = 'foo'

mockResponse.status_code = 500
mockResponse.text = 'testClassifyRecord'

testResponse = testInstance.query_catalog(1)

assert testResponse == None
mockRequest.get.assert_called_once_with(
'http://www.worldcat.org/webservices/catalog/content/1?wskey=test_api_key',
'https://metadata.api.oclc.org/worldcat/manage/bibs/1',
headers={'Authorization': 'Bearer foo'},
timeout=3
)

Expand All @@ -52,29 +55,35 @@ def test_query_catalog_single_retry_then_success(self, testInstance, mocker):
mockRequest = mocker.patch('managers.oclc_catalog.requests')
mockRequest.get.side_effect = [ConnectionError, mockResponse]

mock_auth = mocker.patch('managers.oclc_auth.OCLCAuthManager.get_metadata_token')
mock_auth.return_value = 'foo'

mockResponse.status_code = 200
mockResponse.text = 'testClassifyRecord'

testResponse = testInstance.query_catalog(1)

assert testResponse == 'testClassifyRecord'
mockRequest.get.assert_has_calls(
[mocker.call('http://www.worldcat.org/webservices/catalog/content/1?wskey=test_api_key', timeout=3)] * 2
[mocker.call('https://metadata.api.oclc.org/worldcat/manage/bibs/1', timeout=3, headers={'Authorization': 'Bearer foo'})] * 2
)

def test_query_catalog_exhaust_retries(self, testInstance, mocker):
mockResponse = mocker.MagicMock()
mockRequest = mocker.patch('managers.oclc_catalog.requests')
mockRequest.get.side_effect = [ConnectionError, ConnectionError, Timeout]

mock_auth = mocker.patch('managers.oclc_auth.OCLCAuthManager.get_metadata_token')
mock_auth.return_value = 'foo'

mockResponse.status_code = 200
mockResponse.text = 'testClassifyRecord'

testResponse = testInstance.query_catalog(1)

assert testResponse == None
mockRequest.get.assert_has_calls(
[mocker.call('http://www.worldcat.org/webservices/catalog/content/1?wskey=test_api_key', timeout=3)] * 3
[mocker.call('https://metadata.api.oclc.org/worldcat/manage/bibs/1', timeout=3, headers={'Authorization': 'Bearer foo'})] * 3
)

def test_generate_search_query_w_identifier(self, testInstance):
Expand Down
6 changes: 6 additions & 0 deletions tests/unit/test_oclcCatalog_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@ def testRecord_standard(self, mocker):
has_part=['1|uri|test|text/html|{}', '1|uri|bad|text/html|{}']
)

def test_remove_oclc_prefixes(self, testMapping):
assert testMapping.remove_oclc_prefixes('on48542660') == '48542660'
assert testMapping.remove_oclc_prefixes('ocm48542660') == '48542660'
assert testMapping.remove_oclc_prefixes('(OCoLC)on48542660') == '48542660'
assert testMapping.remove_oclc_prefixes('foo48542660') == 'foo48542660'

def test_createMapping(self, testMapping):
recordMapping = testMapping.createMapping()

Expand Down

0 comments on commit 5d49d08

Please sign in to comment.