From df5a5fa7ddd8444180e7732b419d4ea5f5e696ca Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Thu, 20 Jul 2023 16:43:33 -0400
Subject: [PATCH 1/4] SFR-1718v2_AutoIngestionLOC

---
 CHANGELOG.md                   |  3 +-
 processes/loc.py               | 77 +++++++++++++++++++++++++++-------
 tests/unit/test_loc_process.py | 25 ++++++++++-
 3 files changed, 87 insertions(+), 18 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 34961e2929..a612a4eb87 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,6 @@
 # CHANGELOG
 
-## unreleased version -- v0.12.1
+## 2023-07-20 -- v0.12.1
 ### Added
 - Script to delete current duplicate authors/contributors in the PSQL database
 - Web scraper script for the Chicago ISAC catalog of publications
@@ -8,6 +8,7 @@
 - Unit tests for Chicago ISAC catalog mapping and process
 - Added mapping for Library of Congress collections
 - Added process for Library of Congress collections
+- Automatic Ingestion for Library of Congress collections
 ### Fixed
 - Modify agentParser method to reduce number of future duplicate agents
 - Install `wheel` with pip to fix fasttext build
diff --git a/processes/loc.py b/processes/loc.py
index 708f364edd..3525f195e4 100644
--- a/processes/loc.py
+++ b/processes/loc.py
@@ -7,10 +7,11 @@
 from mappings.loc import LOCMapping
 from managers import WebpubManifest
 from logger import createLog
+from datetime import datetime, timedelta
 
 logger = createLog(__name__)
 
-LOC_ROOT_OPEN_ACCESS = 'https://www.loc.gov/collections/open-access-books/?fo=json&fa=access-restricted%3Afalse&c=2&at=results'
+LOC_ROOT_OPEN_ACCESS = 'https://www.loc.gov/collections/open-access-books/?fo=json&fa=access-restricted%3Afalse&c=2&at=results&sp=2'
 LOC_ROOT_DIGIT = 'https://www.loc.gov/collections/selected-digitized-books/?fo=json&fa=access-restricted%3Afalse&c=2&at=results' 
 
 class LOCProcess(CoreProcess):
@@ -20,7 +21,8 @@ def __init__(self, *args):
 
         self.ingestOffset = int(args[5] or 0)
         self.ingestLimit = (int(args[4]) + self.ingestOffset) if args[4] else 5000
-        self.fullImport = self.process == 'complete' 
+        self.fullImport = self.process == 'complete'
+        self.startTimestamp = None 
 
         # Connect to database
         self.generateEngine()
@@ -37,27 +39,49 @@ def __init__(self, *args):
         self.createS3Client()
 
     def runProcess(self):
+        if self.process == 'weekly':
+            self.importLOCRecords()
+        elif self.process == 'complete':
+            self.importLOCRecords(fullImport=True)
+        elif self.process == 'custom':
+            self.importLOCRecords(startTimestamp=self.ingestPeriod)
+
+        self.saveRecords()
+        self.commitChanges()
+    
+
+    def importLOCRecords(self, fullImport=False, startTimestamp=None):
+
+        if not fullImport:
+            if not startTimestamp:
+                startTimestamp = datetime.utcnow() - timedelta(days=7)
+            else:
+                startTimestamp = datetime.strptime(startTimestamp, '%Y-%m-%dT%H:%M:%S')
+
         openAccessRequestCount = 0 
         digitizedRequestCount = 0
 
         try:
-            openAccessRequestCount = self.importOpenAccessRecords(openAccessRequestCount)
+            openAccessRequestCount = self.importOpenAccessRecords(openAccessRequestCount, startTimestamp)
+            logger.debug('Open Access Collection Ingestion Complete')
 
         except Exception or HTTPError as e:
             logger.exception(e)
 
         try:
-            digitizedRequestCount = self.importDigitizedRecords(digitizedRequestCount)
+            digitizedRequestCount = self.importDigitizedRecords(digitizedRequestCount, startTimestamp)
+            logger.debug('Digitized Books Collection Ingestion Complete')
         
         except Exception or HTTPError as e:
             logger.exception(e)
 
-        self.saveRecords()
-        self.commitChanges()
+        
 
-    def importOpenAccessRecords(self, count):
-        sp = 2
+    def importOpenAccessRecords(self, count, weekTimeStamp):
+        sp = 1
         try:
+            whileBreakFlag = False
+            
             # An HTTP error will occur when the sp parameter value
             # passes the last page number of the collection search reuslts
             while sp < 100000:
@@ -66,6 +90,14 @@ def importOpenAccessRecords(self, count):
                 LOCData = jsonData.json()
 
                 for metaDict in LOCData['results']:
+                    #Weekly Ingestion Conditional
+                    if weekTimeStamp:
+                        itemTimeStamp = datetime.strptime(metaDict['timestamp'], '%Y-%m-%dT%H:%M:%S.%fZ')
+
+                        if itemTimeStamp > weekTimeStamp:
+                            whileBreakFlag = True
+                            break
+
                     resources = metaDict['resources'][0]
                     if 'pdf' in resources.keys() or 'epub_file' in resources.keys():
                         logger.debug(f'OPEN ACCESS URL: {openAccessURL}')
@@ -76,27 +108,38 @@ def importOpenAccessRecords(self, count):
 
                         logger.debug(f'Count for OP Access: {count}')
 
+                if whileBreakFlag == True:
+                    logger.debug('No new items added to collection')
+                    break
+
                 sp += 1
 
         except Exception or HTTPError as e:
             if e == Exception:
                 logger.exception(e)
-            else:
-                logger.debug('Open Access Collection Ingestion Complete')
 
         return count
 
-    def importDigitizedRecords(self, count):
-        sp = 2
+    def importDigitizedRecords(self, count, weekTimeStamp):
+        sp = 1
         try:
             # An HTTP error will occur when the sp parameter value
             # passes the last page number of the collection search reuslts
-            while sp > 100000:
+            whileBreakFlag = False
+            while sp < 100000:
                 digitizedURL = '{}&sp={}'.format(LOC_ROOT_DIGIT, sp)
                 jsonData = self.fetchPageJSON(digitizedURL)
                 LOCData = jsonData.json()
 
                 for metaDict in LOCData['results']:
+                    #Weekly Ingestion conditional
+                    if weekTimeStamp:
+                        itemTimeStamp = datetime.strptime(metaDict['timestamp'], '%Y-%m-%dT%H:%M:%S.%fZ')
+
+                        if itemTimeStamp > weekTimeStamp:
+                            whileBreakFlag = True
+                            break
+
                     resources = metaDict['resources'][0]
                     if 'pdf' in resources.keys() or 'epub_file' in resources.keys():
                         logger.debug(f'DIGITIZED URL: {digitizedURL}')
@@ -107,6 +150,12 @@ def importDigitizedRecords(self, count):
 
                         logger.debug(f'Count for Digitized: {count}')
 
+
+                            
+                if whileBreakFlag == True:
+                    logger.debug('No new items added to collection')
+                    break
+
                 sp += 1
 
             return count
@@ -114,8 +163,6 @@ def importDigitizedRecords(self, count):
         except Exception or HTTPError as e:
             if e == Exception:
                 logger.exception(e)
-            else:
-                logger.debug('Digitized Books Collection Ingestion Complete')
 
     def processLOCRecord(self, record):
         try:
diff --git a/tests/unit/test_loc_process.py b/tests/unit/test_loc_process.py
index 7fde9edecb..516c1a9a59 100644
--- a/tests/unit/test_loc_process.py
+++ b/tests/unit/test_loc_process.py
@@ -22,7 +22,7 @@ def __init__(self):
                 self.session = mocker.MagicMock(session='testSession')
                 self.records = mocker.MagicMock(record='testRecord')
                 self.batchSize = mocker.MagicMock(batchSize='testBatchSize')
-                self.process = mocker.MagicMock(process='testRecord')
+                self.process = 'complete'
         
         return TestLOC()
 
@@ -30,11 +30,13 @@ def test_runProcess(self, testProcess, mocker):
         runMocks = mocker.patch.multiple(
             LOCProcess,
             saveRecords=mocker.DEFAULT,
-            commitChanges=mocker.DEFAULT
+            commitChanges=mocker.DEFAULT,
+            importLOCRecords=mocker.DEFAULT
         )
 
         testProcess.runProcess()
 
+        runMocks['importLOCRecords'].assert_called_once_with(fullImport=True)
         runMocks['saveRecords'].assert_called_once()
         runMocks['commitChanges'].assert_called_once()
 
@@ -93,6 +95,25 @@ def test_storePDFManifest(self, testProcess, mocker):
         mockGenerateMan.assert_called_once_with(mockRecord, 'testURI', testManifestURI)
         mockCreateMan.assert_called_once_with('manifests/loc/1.json', 'testJSON')
 
+    def test_storeEpubsInS3(self, testProcess, mocker):
+        mockRecord = mocker.MagicMock(identifiers=['1|loc'])
+        mockRecord.has_part = [
+            '1|testURI|loc|application/epub+zip|{"reader": false, "catalog": false, "download": true}',
+        ]
+
+        mockSendToQueue = mocker.patch.object(LOCProcess, 'sendFileToProcessingQueue')
+        mockAddEPUBManifest = mocker.patch.object(LOCProcess, 'addEPUBManifest')
+
+        testProcess.storeEpubsInS3(mockRecord)
+
+        mockSendToQueue.assert_has_calls([
+            mocker.call('testURI', 'epubs/loc/1.epub'),
+        ])
+
+        mockAddEPUBManifest.assert_has_calls([
+            mocker.call(mockRecord, '1', 'loc', '{"reader": false, "catalog": false, "download": true}', 'application/epub+zip', 'epubs/loc/1.epub'),
+        ])
+
     def test_createManifestInS3(self, testProcess, mocker):
         mockPut = mocker.patch.object(LOCProcess, 'putObjectInBucket')
         

From 377eb4caa3845659a6448149b5f891a392b3e91b Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Wed, 23 Aug 2023 12:33:56 -0400
Subject: [PATCH 2/4] Added print statements to debug ingest issue

---
 processes/loc.py     | 31 ++++++++++++++++---------------
 processes/s3Files.py |  7 +++++++
 2 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/processes/loc.py b/processes/loc.py
index c78d823e95..1eafcfcd76 100644
--- a/processes/loc.py
+++ b/processes/loc.py
@@ -11,8 +11,8 @@
 
 logger = createLog(__name__)
 
-LOC_ROOT_OPEN_ACCESS = 'https://www.loc.gov/collections/open-access-books/?fo=json&fa=access-restricted%3Afalse&c=2&at=results&sb=timestamp_desc'
-LOC_ROOT_DIGIT = 'https://www.loc.gov/collections/selected-digitized-books/?fo=json&fa=access-restricted%3Afalse&c=2&at=results&sb=timestamp_desc' 
+LOC_ROOT_OPEN_ACCESS = 'https://www.loc.gov/collections/open-access-books/?fo=json&fa=access-restricted%3Afalse&c=2&at=results&sp=1'
+LOC_ROOT_DIGIT = 'https://www.loc.gov/collections/selected-digitized-books/?fo=json&fa=access-restricted%3Afalse&c=2&at=results&sp=1' 
 
 class LOCProcess(CoreProcess):
 
@@ -28,16 +28,16 @@ def __init__(self, *args):
         self.generateEngine()
         self.createSession()
 
+        # S3 Configuration
+        self.createS3Client()
+        self.s3Bucket = os.environ['FILE_BUCKET']
+
         # Connect to epub processing queue
         self.fileQueue = os.environ['FILE_QUEUE']
         self.fileRoute = os.environ['FILE_ROUTING_KEY']
         self.createRabbitConnection()
         self.createOrConnectQueue(self.fileQueue, self.fileRoute)
 
-        # S3 Configuration
-        self.s3Bucket = os.environ['FILE_BUCKET']
-        self.createS3Client()
-
     def runProcess(self):
         if self.process == 'weekly':
             startTimeStamp = datetime.utcnow() - timedelta(days=7)
@@ -105,6 +105,7 @@ def importOpenAccessRecords(self, count, customTimeStamp):
                         count += 1
 
                         logger.debug(f'Count for OP Access: {count}')
+                raise Exception
 
                 if whileBreakFlag == True:
                     logger.debug('No new items added to collection')
@@ -150,11 +151,14 @@ def importDigitizedRecords(self, count, customTimeStamp):
 
                         logger.debug(f'Count for Digitized: {count}')
                             
+                raise Exception
+            
                 if whileBreakFlag == True:
                     logger.debug('No new items added to collection')
                     break
 
                 sp += 1
+                
 
             return count
         
@@ -231,16 +235,13 @@ def storeEpubsInS3(self, record):
 
                 recordID = record.identifiers[0].split('|')[0]
 
-                flags = json.loads(flagStr)
-
-                if flags['download'] is True:
-                    bucketLocation = 'epubs/{}/{}.epub'.format(source, recordID)
-                    self.addEPUBManifest(
-                        record, itemNo, source, flagStr, mediaType, bucketLocation
-                    )
+                bucketLocation = 'epubs/{}/{}.epub'.format(source, recordID)
+                self.addEPUBManifest(
+                    record, itemNo, source, flagStr, mediaType, bucketLocation
+                )
 
-                    self.sendFileToProcessingQueue(uri, bucketLocation)
-                    break
+                self.sendFileToProcessingQueue(uri, bucketLocation)
+                break
 
     def createManifestInS3(self, manifestPath, manifestJSON):
         self.putObjectInBucket(
diff --git a/processes/s3Files.py b/processes/s3Files.py
index 1ccc74ce41..43726ce142 100644
--- a/processes/s3Files.py
+++ b/processes/s3Files.py
@@ -49,6 +49,8 @@ def storeFilesInS3():
         attempts = 1
         while True:
             msgProps, _, msgBody = rabbitManager.getMessageFromQueue(fileQueue)
+            print('TRUE')
+            print(msgProps, msgBody)
             if msgProps is None:
                 if attempts <= 3:
                     sleep(30 * attempts)
@@ -64,14 +66,19 @@ def storeFilesInS3():
             filePath = fileMeta['bucketPath']
 
             try:
+                print('Try Storing 1')
                 logger.info('Storing {}'.format(fileURL))
                 epubB = S3Process.getFileContents(fileURL)
 
+                print('Try Storing 2')
                 storageManager.putObjectInBucket(epubB, filePath, bucket)
 
+                print('Try Storing 3')
+
                 if '.epub' in filePath:
                     fileRoot = '.'.join(filePath.split('.')[:-1])
 
+                    print('Try Storing 4')
                     webpubManifest = S3Process.generateWebpub(
                         epubConverterURL, fileRoot, bucket
                     )

From 40e3609a2f270df1b135611f9e1c12e46867c397 Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Tue, 29 Aug 2023 12:03:48 -0400
Subject: [PATCH 3/4] Modified mapping and process

---
 mappings/loc.py                | 83 ++++++++++++++++++++--------------
 processes/loc.py               | 54 +++++++++++-----------
 processes/s3Files.py           |  7 ---
 tests/unit/test_loc_mapping.py |  6 +--
 4 files changed, 80 insertions(+), 70 deletions(-)

diff --git a/mappings/loc.py b/mappings/loc.py
index 5be7b94dbc..9df8930fcb 100644
--- a/mappings/loc.py
+++ b/mappings/loc.py
@@ -10,13 +10,13 @@ def __init__(self, source):
     def createMapping(self):
         return {
             'title': ('title', '{0}'),
-            'alternative': [('other_title', '{0}')], #One other_title in items block and one outside of it 
-            'medium': [('original_format', '{0}')],
+            'alternative': ('other_title', '{0}'),
+            'medium': ('original_format', '{0}'),
             'authors': ('contributor', '{0}|||true'),
             'dates': ('dates', '{0}|publication_date'),
             'publisher': ('item', '{0}'),
             'identifiers': [
-                ('number_lccn', '{0}|loc'),
+                ('number_lccn', '{0}|lccn'),
                 ('item', '{0}'),
             ],
             'contributors': 
@@ -28,16 +28,24 @@ def createMapping(self):
             ,
         }
 
-    def applyFormatting(self):
+    def applyFormatting(self):       
         self.record.has_part = []
         self.record.source = 'loc'
-        self.record.medium = self.record.medium[0]
+        if self.record.medium:
+            self.record.medium = self.record.medium[0]
+        if len(self.record.is_part_of) == 0:
+            self.record.is_part_of = None
+        if len(self.record.abstract) == 0:
+            self.record.abstract = None
 
         #Convert string repr of list to actual list
         itemList = ast.literal_eval(self.record.identifiers[1])
 
         self.record.identifiers[0], self.record.identifiers[1], self.record.source_id = self.formatIdentifierSourceID(itemList)
 
+        if self.record.identifiers[1] == None:
+            del self.record.identifiers[1]
+
         self.record.publisher, self.record.spatial = self.formatPubSpatial(itemList)
 
         self.record.extent = self.formatExtent(itemList)
@@ -51,41 +59,49 @@ def applyFormatting(self):
     #Identifier/SourceID Formatting
     def formatIdentifierSourceID(self, itemList):
         newIdentifier = itemList
-        newIdentifier['call_number'][0] = f'{newIdentifier["call_number"][0]}|call_number'
         lccnNumber = self.record.identifiers[0][0]  #lccnNumber comes in as an array and we need the string inside the array
-        callNumber = newIdentifier['call_number'][0].strip(' ')
         sourceID = lccnNumber
+        if 'call_number' in newIdentifier.keys():
+            newIdentifier['call_number'][0] = f'{newIdentifier["call_number"][0]}|call_number'
+            callNumber = newIdentifier['call_number'][0].strip(' ')
+        else: 
+            callNumber = None
         return (lccnNumber, callNumber, sourceID)
     
     #Publisher/Spatial Formatting
     def formatPubSpatial(self, itemList):
         pubArray = []
-        spatialArray = []
-        for elem in itemList['created_published']:
-            if ':' not in elem:
-                createdPublishedList = elem.split(',', 1)
-                pubLocation = createdPublishedList[0].strip(' ')
-                if ',' in createdPublishedList[1]:
-                    pubOnly = createdPublishedList[1].split(',')[0].strip(' ')
+        spatialString = None
+        if 'created_published' in itemList.keys():
+            for elem in itemList['created_published']:
+                if ':' not in elem:
+                    createdPublishedList = elem.split(',', 1)
+                    pubLocation = createdPublishedList[0].strip(' ')
+                    if ',' in createdPublishedList[1]:
+                        pubOnly = createdPublishedList[1].split(',')[0].strip(' ')
+                        pubArray.append(pubOnly)
+                    spatialString = pubLocation
+                else:
+                    pubLocatAndPubInfo = elem.split(':', 1)
+                    pubLocation = pubLocatAndPubInfo[0].strip()
+                    pubInfo = pubLocatAndPubInfo[1]
+                    pubOnly = pubInfo.split(',', 1)[0].strip()
                     pubArray.append(pubOnly)
-                spatialArray.append(pubLocation)
-            else:
-                pubLocatAndPubInfo = elem.split(':', 1)
-                pubLocation = pubLocatAndPubInfo[0].strip()
-                pubInfo = pubLocatAndPubInfo[1]
-                pubOnly = pubInfo.split(',', 1)[0].strip()
-                pubArray.append(pubOnly)
-                spatialArray.append(pubLocation)
-        return (pubArray, spatialArray)
+                    spatialString = pubLocation
+            return (pubArray, spatialString)
+        else:
+            return ([], None)
     
     #Extent Formatting
     def formatExtent(self, itemList):
-        extentArray = []
+        extentString = ''
 
         if 'medium' in itemList:
-            extentArray.extend(itemList['medium'])
-
-        return extentArray
+            if itemList['medium']:
+                extentString = itemList['medium'][0]
+                return extentString
+            
+        return None
     
     #Subjects Formatting
     def formatSubjects(self, itemList):
@@ -99,14 +115,15 @@ def formatSubjects(self, itemList):
     
     #Rights Formatting
     def formatRights(self, itemList):
-        rightsArray = []
+        rightsString = ''
 
         if 'rights_advisory' in itemList:
-            for elem in itemList['rights_advisory']:
-                rightsArray.append(f'loc|{elem}|||')
-
-        return rightsArray
-    
+            if itemList['rights_advisory']:
+                rightsString = f'loc|{itemList["rights_advisory"][0]}|||'
+                return rightsString
+            
+        return None
+        
     #Languages Formatting
     def formatLanguages(self, itemList):
         languageArray = []
diff --git a/processes/loc.py b/processes/loc.py
index 1eafcfcd76..2661b6f429 100644
--- a/processes/loc.py
+++ b/processes/loc.py
@@ -1,4 +1,4 @@
-import json
+import time
 import os, requests
 from requests.exceptions import HTTPError, ConnectionError
 
@@ -11,8 +11,8 @@
 
 logger = createLog(__name__)
 
-LOC_ROOT_OPEN_ACCESS = 'https://www.loc.gov/collections/open-access-books/?fo=json&fa=access-restricted%3Afalse&c=2&at=results&sp=1'
-LOC_ROOT_DIGIT = 'https://www.loc.gov/collections/selected-digitized-books/?fo=json&fa=access-restricted%3Afalse&c=2&at=results&sp=1' 
+LOC_ROOT_OPEN_ACCESS = 'https://www.loc.gov/collections/open-access-books/?fo=json&fa=access-restricted%3Afalse&c=50&at=results&sb=timestamp_desc'
+LOC_ROOT_DIGIT = 'https://www.loc.gov/collections/selected-digitized-books/?fo=json&fa=access-restricted%3Afalse&c=50&at=results&sb=timestamp_desc' 
 
 class LOCProcess(CoreProcess):
 
@@ -96,26 +96,27 @@ def importOpenAccessRecords(self, count, customTimeStamp):
                             whileBreakFlag = True
                             break
 
-                    resources = metaDict['resources'][0]
-                    if 'pdf' in resources.keys() or 'epub_file' in resources.keys():
-                        logger.debug(f'OPEN ACCESS URL: {openAccessURL}')
-                        logger.debug(f"TITLE: {metaDict['title']}")
+                    if 'resources' in metaDict.keys():
+                        if metaDict['resources']:
+                            resources = metaDict['resources'][0]
+                            if 'pdf' in resources.keys() or 'epub_file' in resources.keys():
+                                logger.debug(f'OPEN ACCESS URL: {openAccessURL}')
+                                logger.debug(f"TITLE: {metaDict['title']}")
 
-                        self.processLOCRecord(metaDict)
-                        count += 1
+                                self.processLOCRecord(metaDict)
+                                count += 1
 
-                        logger.debug(f'Count for OP Access: {count}')
-                raise Exception
+                                logger.debug(f'Count for OP Access: {count}')
 
                 if whileBreakFlag == True:
                     logger.debug('No new items added to collection')
                     break
 
                 sp += 1
+                time.sleep(5)
 
-        except Exception or HTTPError as e:
-            if e == Exception:
-                logger.exception(e)
+        except Exception or HTTPError or IndexError or KeyError as e:
+            logger.exception(e)
 
         return count
 
@@ -141,30 +142,29 @@ def importDigitizedRecords(self, count, customTimeStamp):
                             whileBreakFlag = True
                             break
 
-                    resources = metaDict['resources'][0]
-                    if 'pdf' in resources.keys() or 'epub_file' in resources.keys():
-                        logger.debug(f'DIGITIZED URL: {digitizedURL}')
-                        logger.debug(f"TITLE: {metaDict['title']}")
+                    if 'resources' in metaDict.keys():
+                        if metaDict['resources']:
+                            resources = metaDict['resources'][0]
+                            if 'pdf' in resources.keys() or 'epub_file' in resources.keys():
+                                logger.debug(f'DIGITIZED URL: {digitizedURL}')
+                                logger.debug(f"TITLE: {metaDict['title']}")
 
-                        self.processLOCRecord(metaDict)
-                        count += 1
+                                self.processLOCRecord(metaDict)
+                                count += 1
 
-                        logger.debug(f'Count for Digitized: {count}')
-                            
-                raise Exception
+                                logger.debug(f'Count for Digitized: {count}')
             
                 if whileBreakFlag == True:
                     logger.debug('No new items added to collection')
                     break
 
                 sp += 1
+                time.sleep(5)
                 
-
             return count
         
-        except Exception or HTTPError as e:
-            if e == Exception:
-                logger.exception(e)
+        except Exception or HTTPError or IndexError or KeyError as e:
+            logger.exception(e)
 
     def processLOCRecord(self, record):
         try:
diff --git a/processes/s3Files.py b/processes/s3Files.py
index 43726ce142..1ccc74ce41 100644
--- a/processes/s3Files.py
+++ b/processes/s3Files.py
@@ -49,8 +49,6 @@ def storeFilesInS3():
         attempts = 1
         while True:
             msgProps, _, msgBody = rabbitManager.getMessageFromQueue(fileQueue)
-            print('TRUE')
-            print(msgProps, msgBody)
             if msgProps is None:
                 if attempts <= 3:
                     sleep(30 * attempts)
@@ -66,19 +64,14 @@ def storeFilesInS3():
             filePath = fileMeta['bucketPath']
 
             try:
-                print('Try Storing 1')
                 logger.info('Storing {}'.format(fileURL))
                 epubB = S3Process.getFileContents(fileURL)
 
-                print('Try Storing 2')
                 storageManager.putObjectInBucket(epubB, filePath, bucket)
 
-                print('Try Storing 3')
-
                 if '.epub' in filePath:
                     fileRoot = '.'.join(filePath.split('.')[:-1])
 
-                    print('Try Storing 4')
                     webpubManifest = S3Process.generateWebpub(
                         epubConverterURL, fileRoot, bucket
                     )
diff --git a/tests/unit/test_loc_mapping.py b/tests/unit/test_loc_mapping.py
index 28cd128447..da20b69ebd 100644
--- a/tests/unit/test_loc_mapping.py
+++ b/tests/unit/test_loc_mapping.py
@@ -74,9 +74,9 @@ def test_applyFormatting_standard(self, testMapping, testRecordStandard):
         ]
         assert testMapping.record.source_id == 'testLCCN|loc'
         assert testMapping.record.publisher == ['testPub']
-        assert testMapping.record.spatial == ['testPubLocation']
-        assert testMapping.record.extent == ['testExtent']
+        assert testMapping.record.spatial == 'testPubLocation'
+        assert testMapping.record.extent == 'testExtent'
         assert testMapping.record.subjects == ['testSubject1||', 'testSubject2||']
-        assert testMapping.record.rights == ['loc|testRights|||']
+        assert testMapping.record.rights == 'loc|testRights|||'
         assert testMapping.record.languages == ['||testLang']
 

From cc8398b7826398de30a4227ec46dbeeeac717867 Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Tue, 29 Aug 2023 12:43:25 -0400
Subject: [PATCH 4/4] Added more comments to mapping methods

---
 mappings/loc.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/mappings/loc.py b/mappings/loc.py
index 9df8930fcb..739ddcfb2f 100644
--- a/mappings/loc.py
+++ b/mappings/loc.py
@@ -56,7 +56,7 @@ def applyFormatting(self):
 
         self.record.languages = self.formatLanguages(itemList)
 
-    #Identifier/SourceID Formatting
+    #Identifier/SourceID Formatting to return (string, string, string)
     def formatIdentifierSourceID(self, itemList):
         newIdentifier = itemList
         lccnNumber = self.record.identifiers[0][0]  #lccnNumber comes in as an array and we need the string inside the array
@@ -68,7 +68,7 @@ def formatIdentifierSourceID(self, itemList):
             callNumber = None
         return (lccnNumber, callNumber, sourceID)
     
-    #Publisher/Spatial Formatting
+    #Publisher/Spatial Formatting to return (array, string)
     def formatPubSpatial(self, itemList):
         pubArray = []
         spatialString = None
@@ -92,7 +92,7 @@ def formatPubSpatial(self, itemList):
         else:
             return ([], None)
     
-    #Extent Formatting
+    #Extent Formatting to return string
     def formatExtent(self, itemList):
         extentString = ''
 
@@ -103,7 +103,7 @@ def formatExtent(self, itemList):
             
         return None
     
-    #Subjects Formatting
+    #Subjects Formatting to return array
     def formatSubjects(self, itemList):
         subjectArray = []
 
@@ -113,7 +113,7 @@ def formatSubjects(self, itemList):
         
         return subjectArray
     
-    #Rights Formatting
+    #Rights Formatting to return string
     def formatRights(self, itemList):
         rightsString = ''
 
@@ -124,7 +124,7 @@ def formatRights(self, itemList):
             
         return None
         
-    #Languages Formatting
+    #Languages Formatting to return array
     def formatLanguages(self, itemList):
         languageArray = []