Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SFR-2308_PubBacklistMapping #460

Merged
merged 6 commits into from
Nov 26, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 0 additions & 12 deletions mappings/UofM.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,12 +83,6 @@ def formatSubjects(self):
return subjectList

def formatRights(self):
'''
The pipe delimiter is to separate the Rights table attributes into this format:
source|license|reason|statement|date
which makes it easy to place the right data into the columns when clustered
'''

if not self.record.rights:
return None

Expand All @@ -102,9 +96,3 @@ def formatRights(self):
return 'UofM|{}||{}|'.format('public_domain', 'Public Domain')

return None






97 changes: 97 additions & 0 deletions mappings/publisher_backlist.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
from .json import JSONMapping

class PublisherBacklistMapping(JSONMapping):
def __init__(self, source):
super().__init__(source, {})
self.mapping = self.createMapping()

def createMapping(self):
return {
'title': ('Title', '{0}'),
'authors': ('Author(s)', '{0}'),
'dates': [('Pub Date', '{0}|publication_date')],
'publisher': [('Publisher (from Projects)', '{0}||')],
'identifiers': [
('ISBN', '{0}|isbn'),
('OCLC', '{0}|oclc')
],
'rights': ('DRB Rights Classification', '{0}||||'),
'contributors': [('Contributors', '{0}|||contributor')],
'subjects': ('Subject 1', '{0}'),
'source': ('Project Name (from Projects)', '{0}'),
'source_id': ('DRB Record_ID', '{0}'),
'publisher_project_source': ('Publisher (from Projects)', '{0}')
}

def applyFormatting(self):
self.record.has_part = []
if self.record.source:
source_list = self.record.source[0].split(' ')
self.record.source = source_list[0]

if self.record.publisher_project_source:
publisher_source = self.record.publisher_project_source[0]
self.record.publisher_project_source = publisher_source

if self.record.authors:
self.record.authors = self.format_authors()

if self.record.subjects:
self.record.subjects = self.format_subjects()

if self.record.identifiers:
self.record.identifiers = self.format_identifiers()

self.record.rights = self.format_rights()

def format_authors(self):
author_list = []

if ';' in self.record.authors:
author_list = self.record.authors.split('; ')
new_author_list = [f'{author}|||true' for author in author_list]
return new_author_list
else:
author_list.append(f'{self.record.authors}|||true)')
return author_list


def format_identifiers(self):
if 'isbn' in self.record.identifiers[0]:
isbn_string = self.record.identifiers[0].split('|')[0]
if ';' in isbn_string:
isbnList = isbn_string.split('; ')
new_isbn_list = [f'{isbn}|isbn' for isbn in isbnList]
if len(self.record.identifiers) > 1 and 'oclc' in self.record.identifiers[1]:
new_isbn_list.append(f'{self.record.identifiers[1]}')
return new_isbn_list
else:
return new_isbn_list

return self.record.identifiers

def format_subjects(self):
subject_list = []

if '|' in self.record.subjects:
subject_list = self.record.subjects.split('|')
new_subject_list = [f'{subject}||' for subject in subject_list]
return new_subject_list
else:
subject_list.append(f'{self.record.subjects}||')
return subject_list

def format_rights(self):
if not self.record.rights:
return None

rights_elements = self.record.rights.split('|')
rights_status = rights_elements[0]

if rights_status == 'in copyright':
return '{}|{}||{}|'.format('self.record.source', 'in_copyright', 'In Copyright')

if rights_status == 'public domain':
return '{}|{}||{}|'.format('self.record.source', 'public_domain', 'Public Domain')

return None
1 change: 0 additions & 1 deletion processes/ingest/publisher_backlist.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ def runProcess(self):
else:
logger.warning(f'Unknown Publisher Backlist ingestion process type {self.process}')
return

for record in records:
self.addDCDWToUpdateList(record)

Expand Down
11 changes: 5 additions & 6 deletions services/sources/publisher_backlist_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
import requests
import json
import urllib.parse
from typing import Optional, Dict
from typing import Optional

from logger import create_log
from mappings.UofM import UofMMapping
from mappings.publisher_backlist import PublisherBacklistMapping
from .source_service import SourceService

logger = create_log(__name__)
Expand All @@ -23,14 +23,14 @@ def get_records(
start_timestamp: datetime=None,
offset: Optional[int]=None,
limit: Optional[int]=None
) -> list[UofMMapping]:
) -> list[PublisherBacklistMapping]:
array_json_records = self.get_records_json(full_import, start_timestamp, offset, limit)

for json_dict in array_json_records:
for records_value in json_dict['records']:
try:
record_metadata_dict = records_value
record = UofMMapping(record_metadata_dict)
record_metadata_dict = records_value['fields']
record = PublisherBacklistMapping(record_metadata_dict)
record.applyMapping()
except Exception:
logger.exception(f'Failed to process Publisher Backlist record')
Expand Down Expand Up @@ -89,7 +89,6 @@ def get_records_array(self,
pub_backlist_records_response = requests.get(url, headers=headers)
pub_backlist_records_response_json = pub_backlist_records_response.json()
array_json = [pub_backlist_records_response_json]

while 'offset' in pub_backlist_records_response_json:
next_page_url = url + f"&offset={pub_backlist_records_response_json['offset']}"
pub_backlist_records_response = requests.get(next_page_url, headers=headers)
Expand Down
51 changes: 51 additions & 0 deletions tests/unit/test_pub_backlist_mapping.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import pytest

from mappings.publisher_backlist import PublisherBacklistMapping

class TestPublisherBacklistMapping:
@pytest.fixture
def testMapping(self):
class TestPublisherBacklistMapping(PublisherBacklistMapping):
def __init__(self):
self.mapping = None

return TestPublisherBacklistMapping()

@pytest.fixture
def testRecordStandard(self, mocker):
return mocker.MagicMock(
title='testTitle',
authors=['testAuthor|||true'],
dates=['testDate|publication_date'],
publisher=['testPublisher||'],
identifiers=['testISBN|isbn', 'testOCLC|oclc'],
rights='in copyright||||',
contributor=['testContributor|||contributor'],
subjects='testSubject',
source=['UofMichigan Backlist'],
source_id='testSourceID',
publisher_project_source=['University of Michigan Press']
)

def test_createMapping(self, testMapping):
recordMapping = testMapping.createMapping()

assert list(recordMapping.keys()) == [
'title', 'authors', 'dates', 'publisher',
'identifiers', 'rights', 'contributors', 'subjects',
'source', 'source_id', 'publisher_project_source'
]
assert recordMapping['title'] == ('Title', '{0}')

def test_applyFormatting_standard(self, testMapping, testRecordStandard):
testMapping.record = testRecordStandard

testMapping.applyFormatting()

assert testMapping.record.has_part == []
assert testMapping.record.source == 'UofMichigan'
assert testMapping.record.identifiers == ['testISBN|isbn', 'testOCLC|oclc']
assert testMapping.record.source_id == 'testSourceID'
assert testMapping.record.publisher == ['testPublisher||']
assert testMapping.record.publisher_project_source == 'University of Michigan Press'
assert testMapping.record.subjects == ['testSubject||']
Loading