Skip to content

Commit

Permalink
SFR-2308_PubBacklistMapping
Browse files Browse the repository at this point in the history
  • Loading branch information
Dmitri committed Nov 25, 2024
1 parent 76cced2 commit 5918f02
Show file tree
Hide file tree
Showing 4 changed files with 160 additions and 16 deletions.
12 changes: 0 additions & 12 deletions mappings/UofM.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,12 +83,6 @@ def formatSubjects(self):
return subjectList

def formatRights(self):
'''
The pipe delimiter is to separate the Rights table attributes into this format:
source|license|reason|statement|date
which makes it easy to place the right data into the columns when clustered
'''

if not self.record.rights:
return None

Expand All @@ -102,9 +96,3 @@ def formatRights(self):
return 'UofM|{}||{}|'.format('public_domain', 'Public Domain')

return None






105 changes: 105 additions & 0 deletions mappings/publisher_backlist.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
from .json import JSONMapping

class PublisherBacklistMapping(JSONMapping):
def __init__(self, source):
super().__init__(source, {})
self.mapping = self.createMapping()

def createMapping(self):
return {
'title': ('Title', '{0}'),
'authors': ('Author(s)', '{0}'),
'dates': [('Pub Date', '{0}|publication_date')],
'publisher': [('Publisher (from Projects)', '{0}||')],
'identifiers': [
('ISBN', '{0}|isbn'),
('OCLC', '{0}|oclc')
],
'rights': ('DRB Rights Classification', '{0}||||'),
'contributors': [('Contributors', '{0}|||contributor')],
'subjects': ('Subject 1', '{0}'),
'source': ('Projects', '{0}'),
'publisher_project_source': ('Publisher (from Projects)', '{0}')
}

def apply_formatting(self):
self.record.has_part = []

if self.record.source:
source_list = self.record.source.split(' ')
print(source_list)
self.record.source = source_list[0]

if self.record.publisher_project_source:
publisher_source = self.record.publisher_project_source[0]
self.record.publisher_project_source = publisher_source

if self.record.authors:
self.record.authors = self.format_authors()

if self.record.subjects:
self.record.subjects = self.format_subjects()

if self.record.identifiers:
if len(self.record.identifiers) == 1:
source_id = self.record.identifiers[0].split('|')[0]
self.record.source_id = f'{self.record.source}_{source_id}'
self.record.identifiers = self.format_identifiers()
else:
source_id = self.record.identifiers[1].split('|')[0]
self.record.source_id = f'{self.record.source}_{source_id}'
self.record.identifiers = self.format_identifiers()

self.record.rights = self.format_rights()

def format_authors(self):
authorList = []

if ';' in self.record.authors:
authorList = self.record.authors.split('; ')
newAuthorList = [f'{author}|||true' for author in authorList]
return newAuthorList
else:
authorList.append(f'{self.record.authors}|||true)')
return authorList


def format_identifiers(self):
if 'isbn' in self.record.identifiers[0]:
isbnString = self.record.identifiers[0].split('|')[0]
if ';' in isbnString:
isbnList = isbnString.split('; ')
newISBNList = [f'{isbn}|isbn' for isbn in isbnList]
if len(self.record.identifiers) > 1 and 'oclc' in self.record.identifiers[1]:
newISBNList.append(f'{self.record.identifiers[1]}')
return newISBNList
else:
return newISBNList

return self.record.identifiers

def format_subjects(self):
subjectList = []

if '|' in self.record.subjects:
subjectList = self.record.subjects.split('|')
newSubjectList = [f'{subject}||' for subject in subjectList]
return newSubjectList
else:
subjectList.append(f'{self.record.subjects}||')
return subjectList

def format_rights(self):
if not self.record.rights:
return None

rightsElements = self.record.rights.split('|')
rightsStatus = rightsElements[0]

if rightsStatus == 'in copyright':
return '{}|{}||{}|'.format('self.record.source', 'in_copyright', 'In Copyright')

if rightsStatus == 'public domain':
return '{}|{}||{}|'.format('self.record.source', 'public_domain', 'Public Domain')

return None
8 changes: 4 additions & 4 deletions services/sources/publisher_backlist_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
import requests
import json
import urllib.parse
from typing import Optional, Dict
from typing import Optional

from logger import create_log
from mappings.UofM import UofMMapping
from mappings.publisher_backlist import PublisherBacklistMapping
from .source_service import SourceService

logger = create_log(__name__)
Expand All @@ -23,14 +23,14 @@ def get_records(
start_timestamp: datetime=None,
offset: Optional[int]=None,
limit: Optional[int]=None
) -> list[UofMMapping]:
) -> list[PublisherBacklistMapping]:
array_json_records = self.get_records_json(full_import, start_timestamp, offset, limit)

for json_dict in array_json_records:
for records_value in json_dict['records']:
try:
record_metadata_dict = records_value
record = UofMMapping(record_metadata_dict)
record = PublisherBacklistMapping(record_metadata_dict)
record.applyMapping()
except Exception:
logger.exception(f'Failed to process Publisher Backlist record')
Expand Down
51 changes: 51 additions & 0 deletions tests/unit/processes/test_pub_backlist_mapping.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import pytest

from mappings.publisher_backlist import PublisherBacklistMapping

class TestPublisherBacklistMapping:
@pytest.fixture
def testMapping(self):
class TestPublisherBacklistMapping(PublisherBacklistMapping):
def __init__(self):
self.mapping = None

return TestPublisherBacklistMapping()

@pytest.fixture
def testRecordStandard(self, mocker):
return mocker.MagicMock(
title='testTitle',
authors=['testAuthor|||true'],
dates=['testDate|publication_date'],
publisher=['testPublisher||'],
identifiers=['testISBN|isbn', 'testOCLC|oclc'],
rights='in copyright||||',
contributor=['testContributor|||contributor'],
subjects='testSubject',
source='UofM Press',
publisher_project_source=['University of Michigan']
)

def test_createMapping(self, testMapping):
recordMapping = testMapping.createMapping()

assert list(recordMapping.keys()) == [
'title', 'authors', 'dates', 'publisher',
'identifiers', 'rights', 'contributors', 'subjects',
'source', 'publisher_project_source'
]
assert recordMapping['title'] == ('Title', '{0}')

def test_apply_formatting_standard(self, testMapping, testRecordStandard):
testMapping.record = testRecordStandard

testMapping.apply_formatting()

assert testMapping.record.has_part == []
assert testMapping.record.source == 'UofM'
assert testMapping.record.identifiers == ['testISBN|isbn', 'testOCLC|oclc']
assert testMapping.record.source_id == 'UofM_testOCLC'
assert testMapping.record.publisher == ['testPublisher||']
assert testMapping.record.source == 'UofM'
assert testMapping.record.publisher_project_source == 'University of Michigan'
assert testMapping.record.subjects == ['testSubject||']

0 comments on commit 5918f02

Please sign in to comment.