Skip to content

Commit

Permalink
SFR-2327: Removing S3Manager as an Ancestor of CoreProcess (#451)
Browse files Browse the repository at this point in the history
  • Loading branch information
kylevillegas93 authored Nov 25, 2024
1 parent 147afa0 commit 76cced2
Show file tree
Hide file tree
Showing 19 changed files with 61 additions and 87 deletions.
4 changes: 2 additions & 2 deletions processes/core.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from managers import DBManager, S3Manager
from managers import DBManager
from model import Record

from logger import create_log
Expand All @@ -7,7 +7,7 @@
logger = create_log(__name__)


class CoreProcess(DBManager, S3Manager):
class CoreProcess(DBManager):
def __init__(self, process, customFile, ingestPeriod, singleRecord, batchSize=500):
super(CoreProcess, self).__init__()
self.process = process
Expand Down
7 changes: 4 additions & 3 deletions processes/file/covers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import os

from ..core import CoreProcess
from managers import CoverManager, RedisManager
from managers import CoverManager, RedisManager, S3Manager
from model import Edition, Link
from model.postgres.edition import EDITION_LINKS
from logger import create_log
Expand All @@ -20,7 +20,8 @@ def __init__(self, *args):
self.redis_manager = RedisManager()
self.redis_manager.createRedisClient()

self.createS3Client()
self.s3_manager = S3Manager()
self.s3_manager.createS3Client()
self.fileBucket = os.environ['FILE_BUCKET']

self.ingestLimit = None
Expand Down Expand Up @@ -87,7 +88,7 @@ def storeFoundCover(self, manager, edition):
manager.coverFormat.lower()
)

self.putObjectInBucket(manager.coverContent, coverPath, self.fileBucket)
self.s3_manager.putObjectInBucket(manager.coverContent, coverPath, self.fileBucket)

coverLink = Link(
url='https://{}.s3.amazonaws.com/{}'.format(self.fileBucket, coverPath),
Expand Down
8 changes: 5 additions & 3 deletions processes/file/fulfill_url_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from ..core import CoreProcess
from datetime import datetime, timedelta, timezone
from managers import S3Manager
from model import Link
from logger import create_log

Expand All @@ -24,7 +25,8 @@ def __init__(self, *args):
self.s3Bucket = os.environ['FILE_BUCKET']
self.host = os.environ['DRB_API_HOST']
self.prefix = 'manifests/UofM/'
self.createS3Client()
self.s3_manager = S3Manager()
self.s3_manager.createS3Client()

def runProcess(self):
if self.process == 'daily':
Expand All @@ -40,7 +42,7 @@ def runProcess(self):

def fetch_and_update_manifests(self, start_timestamp=None):

batches = self.load_batches(self.prefix, self.s3Bucket)
batches = self.s3_manager.load_batches(self.prefix, self.s3Bucket)
if start_timestamp:
#Using JMESPath to extract keys from the JSON batches
filtered_batch_keys = batches.search(f"Contents[?to_string(LastModified) > '\"{start_timestamp}\"'].Key")
Expand Down Expand Up @@ -88,7 +90,7 @@ def replace_manifest_object(self, metadata_json, metadata_json_copy, bucket_name
if metadata_json != metadata_json_copy:
try:
fulfill_manifest = json.dumps(metadata_json, ensure_ascii = False)
return self.s3Client.put_object(
return self.s3_manager.s3Client.put_object(
Bucket=bucket_name,
Key=curr_key,
Body=fulfill_manifest,
Expand Down
7 changes: 4 additions & 3 deletions processes/ingest/chicago_isac.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from ..core import CoreProcess
from mappings.chicagoISAC import ChicagoISACMapping
from managers import WebpubManifest
from managers import S3Manager, WebpubManifest
from logger import create_log

logger = create_log(__name__)
Expand All @@ -17,7 +17,8 @@ def __init__(self, *args):
self.createSession()

self.s3Bucket = os.environ['FILE_BUCKET']
self.createS3Client()
self.s3_manager = S3Manager()
self.s3_manager.createS3Client()

def runProcess(self):
with open('ingestJSONFiles/chicagoISAC_metadata.json') as f:
Expand Down Expand Up @@ -56,7 +57,7 @@ def store_pdf_manifest(self, record):

manifest_json = self.generate_manifest(record, uri, manifest_url)

self.createManifestInS3(manifest_path, manifest_json)
self.s3_manager.createManifestInS3(manifest_path, manifest_json)

link_string = '|'.join([
item_no,
Expand Down
7 changes: 4 additions & 3 deletions processes/ingest/doab.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from logger import create_log
from mappings.doab import DOABMapping
from mappings.base_mapping import MappingError
from managers import DOABLinkManager, RabbitMQManager
from managers import DOABLinkManager, RabbitMQManager, S3Manager
from model import get_file_message


Expand All @@ -35,7 +35,8 @@ def __init__(self, *args):
self.generateEngine()
self.createSession()

self.createS3Client()
self.s3_manager = S3Manager()
self.s3_manager.createS3Client()
self.s3Bucket = os.environ['FILE_BUCKET']

self.fileQueue = os.environ['FILE_QUEUE']
Expand Down Expand Up @@ -75,7 +76,7 @@ def parseDOABRecord(self, oaiRec):

for manifest in linkManager.manifests:
manifestPath, manifestJSON = manifest
self.createManifestInS3(manifestPath, manifestJSON)
self.s3_manager.createManifestInS3(manifestPath, manifestJSON)

for epubLink in linkManager.ePubLinks:
ePubPath, ePubURI = epubLink
Expand Down
6 changes: 4 additions & 2 deletions processes/ingest/loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from ..core import CoreProcess
from mappings.base_mapping import MappingError
from mappings.loc import LOCMapping
from managers import RabbitMQManager, WebpubManifest
from managers import RabbitMQManager, S3Manager, WebpubManifest
from model import get_file_message
from logger import create_log
from datetime import datetime, timedelta, timezone
Expand All @@ -28,6 +28,8 @@ def __init__(self, *args):
self.generateEngine()
self.createSession()

self.createS3Client()
self.s3_manager = S3Manager()
self.createS3Client()
self.s3Bucket = os.environ['FILE_BUCKET']

Expand Down Expand Up @@ -226,7 +228,7 @@ def storePDFManifest(self, record):

manifestJSON = self.generateManifest(record, uri, manifestURI)

self.createManifestInS3(manifestPath, manifestJSON)
self.s3_manager.createManifestInS3(manifestPath, manifestJSON)

linkString = '|'.join([
itemNo,
Expand Down
7 changes: 4 additions & 3 deletions processes/ingest/met.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from ..core import CoreProcess
from mappings.base_mapping import MappingError
from mappings.met import METMapping
from managers import RabbitMQManager, WebpubManifest
from managers import RabbitMQManager, S3Manager, WebpubManifest
from model import get_file_message
from logger import create_log

Expand Down Expand Up @@ -42,7 +42,8 @@ def __init__(self, *args):
self.rabbitmq_manager.createOrConnectQueue(self.fileQueue, self.fileRoute)

self.s3Bucket = os.environ['FILE_BUCKET']
self.createS3Client()
self.s3_manager = S3Manager()
self.s3_manager.createS3Client()

def runProcess(self):
self.setStartTime()
Expand Down Expand Up @@ -173,7 +174,7 @@ def storePDFManifest(self, record):

manifestJSON = self.generateManifest(record, uri, manifestURI)

self.createManifestInS3(manifestPath, manifestJSON)
self.s3_manager.createManifestInS3(manifestPath, manifestJSON)

linkString = '|'.join([
itemNo,
Expand Down
7 changes: 4 additions & 3 deletions processes/ingest/muse.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from ..core import CoreProcess
from mappings.muse import MUSEMapping
from managers import MUSEError, MUSEManager, RabbitMQManager
from managers import MUSEError, MUSEManager, RabbitMQManager, S3Manager
from model import get_file_message
from logger import create_log

Expand All @@ -27,7 +27,8 @@ def __init__(self, *args):
self.generateEngine()
self.createSession()

self.createS3Client()
self.s3_manager = S3Manager()
self.s3_manager.createS3Client()

self.fileQueue = os.environ['FILE_QUEUE']
self.fileRoute = os.environ['FILE_ROUTING_KEY']
Expand Down Expand Up @@ -70,7 +71,7 @@ def parseMuseRecord(self, marcRec):
museManager.addReadableLinks()

if museManager.pdfWebpubManifest:
self.putObjectInBucket(
self.s3_manager.putObjectInBucket(
museManager.pdfWebpubManifest.toJson().encode('utf-8'),
museManager.s3PDFReadPath,
museManager.s3Bucket
Expand Down
3 changes: 2 additions & 1 deletion processes/ingest/publisher_backlist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from ..core import CoreProcess
from logger import create_log
from managers import S3Manager

logger = create_log(__name__)

Expand All @@ -14,7 +15,7 @@ def __init__(self, *args):
self.offset = (len(args) >= 6 and args[5]) or None

self.s3_bucket = os.environ['FILE_BUCKET']
self.createS3Client()
self.s3_manager = S3Manager()

self.publisher_backlist_service = PublisherBacklistService()

Expand Down
11 changes: 6 additions & 5 deletions processes/ingest/u_of_m.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from urllib.error import HTTPError
from mappings.base_mapping import MappingError
from mappings.UofM import UofMMapping
from managers import WebpubManifest
from managers import S3Manager, WebpubManifest
from logger import create_log

logger = create_log(__name__)
Expand All @@ -25,7 +25,8 @@ def __init__(self, *args):
self.createSession()

self.s3Bucket = os.environ['FILE_BUCKET']
self.createS3Client()
self.s3_manager = S3Manager()
self.s3_manager.createS3Client()

def runProcess(self):
with open('ingestJSONFiles/UofM_Updated_CSV.json') as f:
Expand Down Expand Up @@ -57,7 +58,7 @@ def addHasPartMapping(self, resultsRecord, record):

try:
#The get_object method is to make sure the object with a specific bucket and key exists in S3
self.s3Client.get_object(Bucket=bucket,
self.s3_manager.s3Client.get_object(Bucket=bucket,
Key=f'{resultsRecord["File ID 1"]}_060pct.pdf')
key = f'{resultsRecord["File ID 1"]}_060pct.pdf'
urlPDFObject = f'https://{bucket}.s3.amazonaws.com/{key}'
Expand Down Expand Up @@ -91,7 +92,7 @@ def addHasPartMapping(self, resultsRecord, record):
if not record.has_part:
try:
#The get_object method is to make sure the object with a specific bucket and key exists in S3
self.s3Client.get_object(Bucket= 'ump-pdf-repository',
self.s3_manager.s3Client.get_object(Bucket= 'ump-pdf-repository',
Key= f'{resultsRecord["File ID 1"]}_100pct.pdf')
key = f'{resultsRecord["File ID 1"]}_100pct.pdf'
urlPDFObject = f'https://{bucket}.s3.amazonaws.com/{key}'
Expand Down Expand Up @@ -138,7 +139,7 @@ def storePDFManifest(self, record):

manifestJSON = self.generateManifest(record, uri, manifestURI)

self.createManifestInS3(manifestPath, manifestJSON)
self.s3_manager.createManifestInS3(manifestPath, manifestJSON)

if 'in_copyright' in record.rights:
linkString = '|'.join([
Expand Down
7 changes: 4 additions & 3 deletions processes/ingest/u_of_sc.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from ..core import CoreProcess
from mappings.base_mapping import MappingError
from mappings.UofSC import UofSCMapping
from managers import WebpubManifest
from managers import S3Manager, WebpubManifest
from logger import create_log

logger = create_log(__name__)
Expand All @@ -23,7 +23,8 @@ def __init__(self, *args):
self.createSession()

self.s3Bucket = os.environ['FILE_BUCKET']
self.createS3Client()
self.s3_manager = S3Manager()
self.s3_manager.createS3Client()

def runProcess(self):
with open('UofSC_metadata.json') as f:
Expand Down Expand Up @@ -61,7 +62,7 @@ def storePDFManifest(self, record):

manifestJSON = self.generateManifest(record, uri, manifestURI)

self.createManifestInS3(manifestPath, manifestJSON)
self.s3_manager.createManifestInS3(manifestPath, manifestJSON)

linkString = '|'.join([
itemNo,
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/processes/file/test_cover_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ def __init__(self, *args):
self.batchSize = 3
self.runTime = datetime(1900, 1, 1)
self.redis_manager = mocker.MagicMock()
self.s3_manager = mocker.MagicMock(s3Client=mocker.MagicMock())

return TestCoverProcess()

Expand Down Expand Up @@ -182,7 +183,6 @@ def test_getEditionIdentifiers(self, testProcess, mocker):
])

def test_storeFoundCover(self, testProcess, mocker):
mockPut = mocker.patch.object(CoverProcess, 'putObjectInBucket')
mockSave = mocker.patch.object(CoverProcess, 'bulkSaveObjects')

mockFetcher = mocker.MagicMock(SOURCE='test', coverID=1)
Expand All @@ -197,4 +197,4 @@ def test_storeFoundCover(self, testProcess, mocker):
assert mockEdition.links[0].media_type == 'image/tst'
assert mockEdition.links[0].flags == {'cover': True}
assert mockSave.call_args[0][0] == set(['ed1', 'ed2', mockEdition])
mockPut.assert_called_once_with('testBytes', 'covers/test/1.tst', 'test_aws_bucket')
testProcess.s3_manager.putObjectInBucket.assert_called_once_with('testBytes', 'covers/test/1.tst', 'test_aws_bucket')
9 changes: 3 additions & 6 deletions tests/unit/processes/file/test_fulfill_manifest_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def test_process(self, mocker):
class TestFulfill(FulfillURLManifestProcess):
def __init__(self):
self.s3Bucket = 'test_aws_bucket'
self.s3Client = mocker.MagicMock(s3Client='testS3Client')
self.s3_manager = mocker.MagicMock(s3Client=mocker.MagicMock())
self.session = mocker.MagicMock(session='testSession')
self.records = mocker.MagicMock(record='testRecord')
self.batchSize = mocker.MagicMock(batchSize='testBatchSize')
Expand All @@ -41,14 +41,11 @@ def test_runProcess(self, test_process, mocker):


def test_fetch_and_update_manifests(self, test_process, mocker):
process_mocks = mocker.patch.multiple(FulfillURLManifestProcess,
load_batches=mocker.DEFAULT,
update_metadata_object=mocker.DEFAULT
)
mocker.patch.multiple(FulfillURLManifestProcess, update_metadata_object=mocker.DEFAULT)

mock_timestamp = mocker.MagicMock(time_stamp='test_timestamp')

test_process.fetch_and_update_manifests(mock_timestamp)

process_mocks['load_batches'].assert_called_once_with('testPrefix','test_aws_bucket')
test_process.s3_manager.load_batches.assert_called_once_with('testPrefix','test_aws_bucket')

5 changes: 2 additions & 3 deletions tests/unit/processes/ingest/test_chicago_isac_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def test_process(self, mocker):
class TestISAC(ChicagoISACProcess):
def __init__(self):
self.s3Bucket = 'test_aws_bucket'
self.s3_client = mocker.MagicMock(s3_client='test_s3_client')
self.s3_manager = mocker.MagicMock(s3Client=mocker.MagicMock())
self.session = mocker.MagicMock(session='test_session')
self.records = mocker.MagicMock(record='test_record')
self.batch_size = mocker.MagicMock(batch_size='test_batch_size')
Expand Down Expand Up @@ -71,15 +71,14 @@ def test_store_pdf_manifest(self, test_process, mocker):

mock_generate_man = mocker.patch.object(ChicagoISACProcess, 'generate_manifest')
mock_generate_man.return_value = 'test_json'
mock_create_man = mocker.patch.object(ChicagoISACProcess, 'createManifestInS3')

test_process.store_pdf_manifest(mock_record)

test_manifest_url = 'https://test_aws_bucket.s3.amazonaws.com/manifests/isac/1.json'
assert mock_record.has_part[0] == '1|{}|isac|application/webpub+json|{{}}'.format(test_manifest_url)

mock_generate_man.assert_called_once_with(mock_record, 'test_url', test_manifest_url)
mock_create_man.assert_called_once_with('manifests/isac/1.json', 'test_json')
test_process.s3_manager.createManifestInS3.assert_called_once_with('manifests/isac/1.json', 'test_json')

def test_generate_manifest(self, mocker):
mock_manifest = mocker.MagicMock(links=[])
Expand Down
Loading

0 comments on commit 76cced2

Please sign in to comment.