From df5a5fa7ddd8444180e7732b419d4ea5f5e696ca Mon Sep 17 00:00:00 2001 From: Your Name Date: Thu, 20 Jul 2023 16:43:33 -0400 Subject: [PATCH 1/4] SFR-1718v2_AutoIngestionLOC --- CHANGELOG.md | 3 +- processes/loc.py | 77 +++++++++++++++++++++++++++------- tests/unit/test_loc_process.py | 25 ++++++++++- 3 files changed, 87 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 34961e2929..a612a4eb87 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # CHANGELOG -## unreleased version -- v0.12.1 +## 2023-07-20 -- v0.12.1 ### Added - Script to delete current duplicate authors/contributors in the PSQL database - Web scraper script for the Chicago ISAC catalog of publications @@ -8,6 +8,7 @@ - Unit tests for Chicago ISAC catalog mapping and process - Added mapping for Library of Congress collections - Added process for Library of Congress collections +- Automatic Ingestion for Library of Congress collections ### Fixed - Modify agentParser method to reduce number of future duplicate agents - Install `wheel` with pip to fix fasttext build diff --git a/processes/loc.py b/processes/loc.py index 708f364edd..3525f195e4 100644 --- a/processes/loc.py +++ b/processes/loc.py @@ -7,10 +7,11 @@ from mappings.loc import LOCMapping from managers import WebpubManifest from logger import createLog +from datetime import datetime, timedelta logger = createLog(__name__) -LOC_ROOT_OPEN_ACCESS = 'https://www.loc.gov/collections/open-access-books/?fo=json&fa=access-restricted%3Afalse&c=2&at=results' +LOC_ROOT_OPEN_ACCESS = 'https://www.loc.gov/collections/open-access-books/?fo=json&fa=access-restricted%3Afalse&c=2&at=results&sp=2' LOC_ROOT_DIGIT = 'https://www.loc.gov/collections/selected-digitized-books/?fo=json&fa=access-restricted%3Afalse&c=2&at=results' class LOCProcess(CoreProcess): @@ -20,7 +21,8 @@ def __init__(self, *args): self.ingestOffset = int(args[5] or 0) self.ingestLimit = (int(args[4]) + self.ingestOffset) if args[4] else 5000 - self.fullImport = self.process == 'complete' + self.fullImport = self.process == 'complete' + self.startTimestamp = None # Connect to database self.generateEngine() @@ -37,27 +39,49 @@ def __init__(self, *args): self.createS3Client() def runProcess(self): + if self.process == 'weekly': + self.importLOCRecords() + elif self.process == 'complete': + self.importLOCRecords(fullImport=True) + elif self.process == 'custom': + self.importLOCRecords(startTimestamp=self.ingestPeriod) + + self.saveRecords() + self.commitChanges() + + + def importLOCRecords(self, fullImport=False, startTimestamp=None): + + if not fullImport: + if not startTimestamp: + startTimestamp = datetime.utcnow() - timedelta(days=7) + else: + startTimestamp = datetime.strptime(startTimestamp, '%Y-%m-%dT%H:%M:%S') + openAccessRequestCount = 0 digitizedRequestCount = 0 try: - openAccessRequestCount = self.importOpenAccessRecords(openAccessRequestCount) + openAccessRequestCount = self.importOpenAccessRecords(openAccessRequestCount, startTimestamp) + logger.debug('Open Access Collection Ingestion Complete') except Exception or HTTPError as e: logger.exception(e) try: - digitizedRequestCount = self.importDigitizedRecords(digitizedRequestCount) + digitizedRequestCount = self.importDigitizedRecords(digitizedRequestCount, startTimestamp) + logger.debug('Digitized Books Collection Ingestion Complete') except Exception or HTTPError as e: logger.exception(e) - self.saveRecords() - self.commitChanges() + - def importOpenAccessRecords(self, count): - sp = 2 + def importOpenAccessRecords(self, count, weekTimeStamp): + sp = 1 try: + whileBreakFlag = False + # An HTTP error will occur when the sp parameter value # passes the last page number of the collection search reuslts while sp < 100000: @@ -66,6 +90,14 @@ def importOpenAccessRecords(self, count): LOCData = jsonData.json() for metaDict in LOCData['results']: + #Weekly Ingestion Conditional + if weekTimeStamp: + itemTimeStamp = datetime.strptime(metaDict['timestamp'], '%Y-%m-%dT%H:%M:%S.%fZ') + + if itemTimeStamp > weekTimeStamp: + whileBreakFlag = True + break + resources = metaDict['resources'][0] if 'pdf' in resources.keys() or 'epub_file' in resources.keys(): logger.debug(f'OPEN ACCESS URL: {openAccessURL}') @@ -76,27 +108,38 @@ def importOpenAccessRecords(self, count): logger.debug(f'Count for OP Access: {count}') + if whileBreakFlag == True: + logger.debug('No new items added to collection') + break + sp += 1 except Exception or HTTPError as e: if e == Exception: logger.exception(e) - else: - logger.debug('Open Access Collection Ingestion Complete') return count - def importDigitizedRecords(self, count): - sp = 2 + def importDigitizedRecords(self, count, weekTimeStamp): + sp = 1 try: # An HTTP error will occur when the sp parameter value # passes the last page number of the collection search reuslts - while sp > 100000: + whileBreakFlag = False + while sp < 100000: digitizedURL = '{}&sp={}'.format(LOC_ROOT_DIGIT, sp) jsonData = self.fetchPageJSON(digitizedURL) LOCData = jsonData.json() for metaDict in LOCData['results']: + #Weekly Ingestion conditional + if weekTimeStamp: + itemTimeStamp = datetime.strptime(metaDict['timestamp'], '%Y-%m-%dT%H:%M:%S.%fZ') + + if itemTimeStamp > weekTimeStamp: + whileBreakFlag = True + break + resources = metaDict['resources'][0] if 'pdf' in resources.keys() or 'epub_file' in resources.keys(): logger.debug(f'DIGITIZED URL: {digitizedURL}') @@ -107,6 +150,12 @@ def importDigitizedRecords(self, count): logger.debug(f'Count for Digitized: {count}') + + + if whileBreakFlag == True: + logger.debug('No new items added to collection') + break + sp += 1 return count @@ -114,8 +163,6 @@ def importDigitizedRecords(self, count): except Exception or HTTPError as e: if e == Exception: logger.exception(e) - else: - logger.debug('Digitized Books Collection Ingestion Complete') def processLOCRecord(self, record): try: diff --git a/tests/unit/test_loc_process.py b/tests/unit/test_loc_process.py index 7fde9edecb..516c1a9a59 100644 --- a/tests/unit/test_loc_process.py +++ b/tests/unit/test_loc_process.py @@ -22,7 +22,7 @@ def __init__(self): self.session = mocker.MagicMock(session='testSession') self.records = mocker.MagicMock(record='testRecord') self.batchSize = mocker.MagicMock(batchSize='testBatchSize') - self.process = mocker.MagicMock(process='testRecord') + self.process = 'complete' return TestLOC() @@ -30,11 +30,13 @@ def test_runProcess(self, testProcess, mocker): runMocks = mocker.patch.multiple( LOCProcess, saveRecords=mocker.DEFAULT, - commitChanges=mocker.DEFAULT + commitChanges=mocker.DEFAULT, + importLOCRecords=mocker.DEFAULT ) testProcess.runProcess() + runMocks['importLOCRecords'].assert_called_once_with(fullImport=True) runMocks['saveRecords'].assert_called_once() runMocks['commitChanges'].assert_called_once() @@ -93,6 +95,25 @@ def test_storePDFManifest(self, testProcess, mocker): mockGenerateMan.assert_called_once_with(mockRecord, 'testURI', testManifestURI) mockCreateMan.assert_called_once_with('manifests/loc/1.json', 'testJSON') + def test_storeEpubsInS3(self, testProcess, mocker): + mockRecord = mocker.MagicMock(identifiers=['1|loc']) + mockRecord.has_part = [ + '1|testURI|loc|application/epub+zip|{"reader": false, "catalog": false, "download": true}', + ] + + mockSendToQueue = mocker.patch.object(LOCProcess, 'sendFileToProcessingQueue') + mockAddEPUBManifest = mocker.patch.object(LOCProcess, 'addEPUBManifest') + + testProcess.storeEpubsInS3(mockRecord) + + mockSendToQueue.assert_has_calls([ + mocker.call('testURI', 'epubs/loc/1.epub'), + ]) + + mockAddEPUBManifest.assert_has_calls([ + mocker.call(mockRecord, '1', 'loc', '{"reader": false, "catalog": false, "download": true}', 'application/epub+zip', 'epubs/loc/1.epub'), + ]) + def test_createManifestInS3(self, testProcess, mocker): mockPut = mocker.patch.object(LOCProcess, 'putObjectInBucket') From 377eb4caa3845659a6448149b5f891a392b3e91b Mon Sep 17 00:00:00 2001 From: Your Name Date: Wed, 23 Aug 2023 12:33:56 -0400 Subject: [PATCH 2/4] Added print statements to debug ingest issue --- processes/loc.py | 31 ++++++++++++++++--------------- processes/s3Files.py | 7 +++++++ 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/processes/loc.py b/processes/loc.py index c78d823e95..1eafcfcd76 100644 --- a/processes/loc.py +++ b/processes/loc.py @@ -11,8 +11,8 @@ logger = createLog(__name__) -LOC_ROOT_OPEN_ACCESS = 'https://www.loc.gov/collections/open-access-books/?fo=json&fa=access-restricted%3Afalse&c=2&at=results&sb=timestamp_desc' -LOC_ROOT_DIGIT = 'https://www.loc.gov/collections/selected-digitized-books/?fo=json&fa=access-restricted%3Afalse&c=2&at=results&sb=timestamp_desc' +LOC_ROOT_OPEN_ACCESS = 'https://www.loc.gov/collections/open-access-books/?fo=json&fa=access-restricted%3Afalse&c=2&at=results&sp=1' +LOC_ROOT_DIGIT = 'https://www.loc.gov/collections/selected-digitized-books/?fo=json&fa=access-restricted%3Afalse&c=2&at=results&sp=1' class LOCProcess(CoreProcess): @@ -28,16 +28,16 @@ def __init__(self, *args): self.generateEngine() self.createSession() + # S3 Configuration + self.createS3Client() + self.s3Bucket = os.environ['FILE_BUCKET'] + # Connect to epub processing queue self.fileQueue = os.environ['FILE_QUEUE'] self.fileRoute = os.environ['FILE_ROUTING_KEY'] self.createRabbitConnection() self.createOrConnectQueue(self.fileQueue, self.fileRoute) - # S3 Configuration - self.s3Bucket = os.environ['FILE_BUCKET'] - self.createS3Client() - def runProcess(self): if self.process == 'weekly': startTimeStamp = datetime.utcnow() - timedelta(days=7) @@ -105,6 +105,7 @@ def importOpenAccessRecords(self, count, customTimeStamp): count += 1 logger.debug(f'Count for OP Access: {count}') + raise Exception if whileBreakFlag == True: logger.debug('No new items added to collection') @@ -150,11 +151,14 @@ def importDigitizedRecords(self, count, customTimeStamp): logger.debug(f'Count for Digitized: {count}') + raise Exception + if whileBreakFlag == True: logger.debug('No new items added to collection') break sp += 1 + return count @@ -231,16 +235,13 @@ def storeEpubsInS3(self, record): recordID = record.identifiers[0].split('|')[0] - flags = json.loads(flagStr) - - if flags['download'] is True: - bucketLocation = 'epubs/{}/{}.epub'.format(source, recordID) - self.addEPUBManifest( - record, itemNo, source, flagStr, mediaType, bucketLocation - ) + bucketLocation = 'epubs/{}/{}.epub'.format(source, recordID) + self.addEPUBManifest( + record, itemNo, source, flagStr, mediaType, bucketLocation + ) - self.sendFileToProcessingQueue(uri, bucketLocation) - break + self.sendFileToProcessingQueue(uri, bucketLocation) + break def createManifestInS3(self, manifestPath, manifestJSON): self.putObjectInBucket( diff --git a/processes/s3Files.py b/processes/s3Files.py index 1ccc74ce41..43726ce142 100644 --- a/processes/s3Files.py +++ b/processes/s3Files.py @@ -49,6 +49,8 @@ def storeFilesInS3(): attempts = 1 while True: msgProps, _, msgBody = rabbitManager.getMessageFromQueue(fileQueue) + print('TRUE') + print(msgProps, msgBody) if msgProps is None: if attempts <= 3: sleep(30 * attempts) @@ -64,14 +66,19 @@ def storeFilesInS3(): filePath = fileMeta['bucketPath'] try: + print('Try Storing 1') logger.info('Storing {}'.format(fileURL)) epubB = S3Process.getFileContents(fileURL) + print('Try Storing 2') storageManager.putObjectInBucket(epubB, filePath, bucket) + print('Try Storing 3') + if '.epub' in filePath: fileRoot = '.'.join(filePath.split('.')[:-1]) + print('Try Storing 4') webpubManifest = S3Process.generateWebpub( epubConverterURL, fileRoot, bucket ) From 40e3609a2f270df1b135611f9e1c12e46867c397 Mon Sep 17 00:00:00 2001 From: Your Name Date: Tue, 29 Aug 2023 12:03:48 -0400 Subject: [PATCH 3/4] Modified mapping and process --- mappings/loc.py | 83 ++++++++++++++++++++-------------- processes/loc.py | 54 +++++++++++----------- processes/s3Files.py | 7 --- tests/unit/test_loc_mapping.py | 6 +-- 4 files changed, 80 insertions(+), 70 deletions(-) diff --git a/mappings/loc.py b/mappings/loc.py index 5be7b94dbc..9df8930fcb 100644 --- a/mappings/loc.py +++ b/mappings/loc.py @@ -10,13 +10,13 @@ def __init__(self, source): def createMapping(self): return { 'title': ('title', '{0}'), - 'alternative': [('other_title', '{0}')], #One other_title in items block and one outside of it - 'medium': [('original_format', '{0}')], + 'alternative': ('other_title', '{0}'), + 'medium': ('original_format', '{0}'), 'authors': ('contributor', '{0}|||true'), 'dates': ('dates', '{0}|publication_date'), 'publisher': ('item', '{0}'), 'identifiers': [ - ('number_lccn', '{0}|loc'), + ('number_lccn', '{0}|lccn'), ('item', '{0}'), ], 'contributors': @@ -28,16 +28,24 @@ def createMapping(self): , } - def applyFormatting(self): + def applyFormatting(self): self.record.has_part = [] self.record.source = 'loc' - self.record.medium = self.record.medium[0] + if self.record.medium: + self.record.medium = self.record.medium[0] + if len(self.record.is_part_of) == 0: + self.record.is_part_of = None + if len(self.record.abstract) == 0: + self.record.abstract = None #Convert string repr of list to actual list itemList = ast.literal_eval(self.record.identifiers[1]) self.record.identifiers[0], self.record.identifiers[1], self.record.source_id = self.formatIdentifierSourceID(itemList) + if self.record.identifiers[1] == None: + del self.record.identifiers[1] + self.record.publisher, self.record.spatial = self.formatPubSpatial(itemList) self.record.extent = self.formatExtent(itemList) @@ -51,41 +59,49 @@ def applyFormatting(self): #Identifier/SourceID Formatting def formatIdentifierSourceID(self, itemList): newIdentifier = itemList - newIdentifier['call_number'][0] = f'{newIdentifier["call_number"][0]}|call_number' lccnNumber = self.record.identifiers[0][0] #lccnNumber comes in as an array and we need the string inside the array - callNumber = newIdentifier['call_number'][0].strip(' ') sourceID = lccnNumber + if 'call_number' in newIdentifier.keys(): + newIdentifier['call_number'][0] = f'{newIdentifier["call_number"][0]}|call_number' + callNumber = newIdentifier['call_number'][0].strip(' ') + else: + callNumber = None return (lccnNumber, callNumber, sourceID) #Publisher/Spatial Formatting def formatPubSpatial(self, itemList): pubArray = [] - spatialArray = [] - for elem in itemList['created_published']: - if ':' not in elem: - createdPublishedList = elem.split(',', 1) - pubLocation = createdPublishedList[0].strip(' ') - if ',' in createdPublishedList[1]: - pubOnly = createdPublishedList[1].split(',')[0].strip(' ') + spatialString = None + if 'created_published' in itemList.keys(): + for elem in itemList['created_published']: + if ':' not in elem: + createdPublishedList = elem.split(',', 1) + pubLocation = createdPublishedList[0].strip(' ') + if ',' in createdPublishedList[1]: + pubOnly = createdPublishedList[1].split(',')[0].strip(' ') + pubArray.append(pubOnly) + spatialString = pubLocation + else: + pubLocatAndPubInfo = elem.split(':', 1) + pubLocation = pubLocatAndPubInfo[0].strip() + pubInfo = pubLocatAndPubInfo[1] + pubOnly = pubInfo.split(',', 1)[0].strip() pubArray.append(pubOnly) - spatialArray.append(pubLocation) - else: - pubLocatAndPubInfo = elem.split(':', 1) - pubLocation = pubLocatAndPubInfo[0].strip() - pubInfo = pubLocatAndPubInfo[1] - pubOnly = pubInfo.split(',', 1)[0].strip() - pubArray.append(pubOnly) - spatialArray.append(pubLocation) - return (pubArray, spatialArray) + spatialString = pubLocation + return (pubArray, spatialString) + else: + return ([], None) #Extent Formatting def formatExtent(self, itemList): - extentArray = [] + extentString = '' if 'medium' in itemList: - extentArray.extend(itemList['medium']) - - return extentArray + if itemList['medium']: + extentString = itemList['medium'][0] + return extentString + + return None #Subjects Formatting def formatSubjects(self, itemList): @@ -99,14 +115,15 @@ def formatSubjects(self, itemList): #Rights Formatting def formatRights(self, itemList): - rightsArray = [] + rightsString = '' if 'rights_advisory' in itemList: - for elem in itemList['rights_advisory']: - rightsArray.append(f'loc|{elem}|||') - - return rightsArray - + if itemList['rights_advisory']: + rightsString = f'loc|{itemList["rights_advisory"][0]}|||' + return rightsString + + return None + #Languages Formatting def formatLanguages(self, itemList): languageArray = [] diff --git a/processes/loc.py b/processes/loc.py index 1eafcfcd76..2661b6f429 100644 --- a/processes/loc.py +++ b/processes/loc.py @@ -1,4 +1,4 @@ -import json +import time import os, requests from requests.exceptions import HTTPError, ConnectionError @@ -11,8 +11,8 @@ logger = createLog(__name__) -LOC_ROOT_OPEN_ACCESS = 'https://www.loc.gov/collections/open-access-books/?fo=json&fa=access-restricted%3Afalse&c=2&at=results&sp=1' -LOC_ROOT_DIGIT = 'https://www.loc.gov/collections/selected-digitized-books/?fo=json&fa=access-restricted%3Afalse&c=2&at=results&sp=1' +LOC_ROOT_OPEN_ACCESS = 'https://www.loc.gov/collections/open-access-books/?fo=json&fa=access-restricted%3Afalse&c=50&at=results&sb=timestamp_desc' +LOC_ROOT_DIGIT = 'https://www.loc.gov/collections/selected-digitized-books/?fo=json&fa=access-restricted%3Afalse&c=50&at=results&sb=timestamp_desc' class LOCProcess(CoreProcess): @@ -96,26 +96,27 @@ def importOpenAccessRecords(self, count, customTimeStamp): whileBreakFlag = True break - resources = metaDict['resources'][0] - if 'pdf' in resources.keys() or 'epub_file' in resources.keys(): - logger.debug(f'OPEN ACCESS URL: {openAccessURL}') - logger.debug(f"TITLE: {metaDict['title']}") + if 'resources' in metaDict.keys(): + if metaDict['resources']: + resources = metaDict['resources'][0] + if 'pdf' in resources.keys() or 'epub_file' in resources.keys(): + logger.debug(f'OPEN ACCESS URL: {openAccessURL}') + logger.debug(f"TITLE: {metaDict['title']}") - self.processLOCRecord(metaDict) - count += 1 + self.processLOCRecord(metaDict) + count += 1 - logger.debug(f'Count for OP Access: {count}') - raise Exception + logger.debug(f'Count for OP Access: {count}') if whileBreakFlag == True: logger.debug('No new items added to collection') break sp += 1 + time.sleep(5) - except Exception or HTTPError as e: - if e == Exception: - logger.exception(e) + except Exception or HTTPError or IndexError or KeyError as e: + logger.exception(e) return count @@ -141,30 +142,29 @@ def importDigitizedRecords(self, count, customTimeStamp): whileBreakFlag = True break - resources = metaDict['resources'][0] - if 'pdf' in resources.keys() or 'epub_file' in resources.keys(): - logger.debug(f'DIGITIZED URL: {digitizedURL}') - logger.debug(f"TITLE: {metaDict['title']}") + if 'resources' in metaDict.keys(): + if metaDict['resources']: + resources = metaDict['resources'][0] + if 'pdf' in resources.keys() or 'epub_file' in resources.keys(): + logger.debug(f'DIGITIZED URL: {digitizedURL}') + logger.debug(f"TITLE: {metaDict['title']}") - self.processLOCRecord(metaDict) - count += 1 + self.processLOCRecord(metaDict) + count += 1 - logger.debug(f'Count for Digitized: {count}') - - raise Exception + logger.debug(f'Count for Digitized: {count}') if whileBreakFlag == True: logger.debug('No new items added to collection') break sp += 1 + time.sleep(5) - return count - except Exception or HTTPError as e: - if e == Exception: - logger.exception(e) + except Exception or HTTPError or IndexError or KeyError as e: + logger.exception(e) def processLOCRecord(self, record): try: diff --git a/processes/s3Files.py b/processes/s3Files.py index 43726ce142..1ccc74ce41 100644 --- a/processes/s3Files.py +++ b/processes/s3Files.py @@ -49,8 +49,6 @@ def storeFilesInS3(): attempts = 1 while True: msgProps, _, msgBody = rabbitManager.getMessageFromQueue(fileQueue) - print('TRUE') - print(msgProps, msgBody) if msgProps is None: if attempts <= 3: sleep(30 * attempts) @@ -66,19 +64,14 @@ def storeFilesInS3(): filePath = fileMeta['bucketPath'] try: - print('Try Storing 1') logger.info('Storing {}'.format(fileURL)) epubB = S3Process.getFileContents(fileURL) - print('Try Storing 2') storageManager.putObjectInBucket(epubB, filePath, bucket) - print('Try Storing 3') - if '.epub' in filePath: fileRoot = '.'.join(filePath.split('.')[:-1]) - print('Try Storing 4') webpubManifest = S3Process.generateWebpub( epubConverterURL, fileRoot, bucket ) diff --git a/tests/unit/test_loc_mapping.py b/tests/unit/test_loc_mapping.py index 28cd128447..da20b69ebd 100644 --- a/tests/unit/test_loc_mapping.py +++ b/tests/unit/test_loc_mapping.py @@ -74,9 +74,9 @@ def test_applyFormatting_standard(self, testMapping, testRecordStandard): ] assert testMapping.record.source_id == 'testLCCN|loc' assert testMapping.record.publisher == ['testPub'] - assert testMapping.record.spatial == ['testPubLocation'] - assert testMapping.record.extent == ['testExtent'] + assert testMapping.record.spatial == 'testPubLocation' + assert testMapping.record.extent == 'testExtent' assert testMapping.record.subjects == ['testSubject1||', 'testSubject2||'] - assert testMapping.record.rights == ['loc|testRights|||'] + assert testMapping.record.rights == 'loc|testRights|||' assert testMapping.record.languages == ['||testLang'] From cc8398b7826398de30a4227ec46dbeeeac717867 Mon Sep 17 00:00:00 2001 From: Your Name Date: Tue, 29 Aug 2023 12:43:25 -0400 Subject: [PATCH 4/4] Added more comments to mapping methods --- mappings/loc.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mappings/loc.py b/mappings/loc.py index 9df8930fcb..739ddcfb2f 100644 --- a/mappings/loc.py +++ b/mappings/loc.py @@ -56,7 +56,7 @@ def applyFormatting(self): self.record.languages = self.formatLanguages(itemList) - #Identifier/SourceID Formatting + #Identifier/SourceID Formatting to return (string, string, string) def formatIdentifierSourceID(self, itemList): newIdentifier = itemList lccnNumber = self.record.identifiers[0][0] #lccnNumber comes in as an array and we need the string inside the array @@ -68,7 +68,7 @@ def formatIdentifierSourceID(self, itemList): callNumber = None return (lccnNumber, callNumber, sourceID) - #Publisher/Spatial Formatting + #Publisher/Spatial Formatting to return (array, string) def formatPubSpatial(self, itemList): pubArray = [] spatialString = None @@ -92,7 +92,7 @@ def formatPubSpatial(self, itemList): else: return ([], None) - #Extent Formatting + #Extent Formatting to return string def formatExtent(self, itemList): extentString = '' @@ -103,7 +103,7 @@ def formatExtent(self, itemList): return None - #Subjects Formatting + #Subjects Formatting to return array def formatSubjects(self, itemList): subjectArray = [] @@ -113,7 +113,7 @@ def formatSubjects(self, itemList): return subjectArray - #Rights Formatting + #Rights Formatting to return string def formatRights(self, itemList): rightsString = '' @@ -124,7 +124,7 @@ def formatRights(self, itemList): return None - #Languages Formatting + #Languages Formatting to return array def formatLanguages(self, itemList): languageArray = []