Skip to content

Commit

Permalink
SFR-2308_PubBacklistMapping (#460)
Browse files Browse the repository at this point in the history
* SFR-2308_PubBacklistMapping

* Fixed method name

* Fixed failed test

* Fixed failed test

* Added source_id to createMapping and retrieved source name from Airtable

* Changed some variables to snake case

---------

Co-authored-by: Dmitri <[email protected]>
  • Loading branch information
mitri-slory and Dmitri authored Nov 26, 2024
1 parent f97ca6f commit 6efb5b6
Show file tree
Hide file tree
Showing 5 changed files with 152 additions and 19 deletions.
12 changes: 0 additions & 12 deletions mappings/UofM.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,12 +83,6 @@ def formatSubjects(self):
return subjectList

def formatRights(self):
'''
The pipe delimiter is to separate the Rights table attributes into this format:
source|license|reason|statement|date
which makes it easy to place the right data into the columns when clustered
'''

if not self.record.rights:
return None

Expand All @@ -102,9 +96,3 @@ def formatRights(self):
return 'UofM|{}||{}|'.format('public_domain', 'Public Domain')

return None






96 changes: 96 additions & 0 deletions mappings/publisher_backlist.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
from .json import JSONMapping

class PublisherBacklistMapping(JSONMapping):
def __init__(self, source):
super().__init__(source, {})
self.mapping = self.createMapping()

def createMapping(self):
return {
'title': ('Title', '{0}'),
'authors': ('Author(s)', '{0}'),
'dates': [('Pub Date', '{0}|publication_date')],
'publisher': [('Publisher (from Projects)', '{0}||')],
'identifiers': [
('ISBN', '{0}|isbn'),
('OCLC', '{0}|oclc')
],
'rights': ('DRB Rights Classification', '{0}||||'),
'contributors': [('Contributors', '{0}|||contributor')],
'subjects': ('Subject 1', '{0}'),
'source': ('Project Name (from Projects)', '{0}'),
'source_id': ('DRB Record_ID', '{0}'),
'publisher_project_source': ('Publisher (from Projects)', '{0}')
}

def applyFormatting(self):
self.record.has_part = []
if self.record.source:
source_list = self.record.source[0].split(' ')
self.record.source = source_list[0]

if self.record.publisher_project_source:
publisher_source = self.record.publisher_project_source[0]
self.record.publisher_project_source = publisher_source

if self.record.authors:
self.record.authors = self.format_authors()

if self.record.subjects:
self.record.subjects = self.format_subjects()

if self.record.identifiers:
self.record.identifiers = self.format_identifiers()

self.record.rights = self.format_rights()

def format_authors(self):
author_list = []

if ';' in self.record.authors:
author_list = self.record.authors.split('; ')
new_author_list = [f'{author}|||true' for author in author_list]
return new_author_list
else:
author_list.append(f'{self.record.authors}|||true)')
return author_list


def format_identifiers(self):
if 'isbn' in self.record.identifiers[0]:
isbn_string = self.record.identifiers[0].split('|')[0]
if ';' in isbn_string:
isbns = isbn_string.split('; ')
formatted_isbns = [f'{isbn}|isbn' for isbn in isbns]
if len(self.record.identifiers) > 1 and 'oclc' in self.record.identifiers[1]:
formatted_isbns.append(f'{self.record.identifiers[1]}')
return formatted_isbns
else:
return formatted_isbns

return self.record.identifiers

def format_subjects(self):
subject_list = []

if '|' in self.record.subjects:
subject_list = self.record.subjects.split('|')
return [f'{subject}||' for subject in subject_list]
else:
subject_list.append(f'{self.record.subjects}||')
return subject_list

def format_rights(self):
if not self.record.rights:
return None

rights_elements = self.record.rights.split('|')
rights_status = rights_elements[0]

if rights_status == 'in copyright':
return '{}|{}||{}|'.format('self.record.source', 'in_copyright', 'In Copyright')

if rights_status == 'public domain':
return '{}|{}||{}|'.format('self.record.source', 'public_domain', 'Public Domain')

return None
1 change: 0 additions & 1 deletion processes/ingest/publisher_backlist.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ def runProcess(self):
else:
logger.warning(f'Unknown Publisher Backlist ingestion process type {self.process}')
return

for record in records:
self.addDCDWToUpdateList(record)

Expand Down
11 changes: 5 additions & 6 deletions services/sources/publisher_backlist_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
import requests
import json
import urllib.parse
from typing import Optional, Dict
from typing import Optional

from logger import create_log
from mappings.UofM import UofMMapping
from mappings.publisher_backlist import PublisherBacklistMapping
from .source_service import SourceService

logger = create_log(__name__)
Expand All @@ -23,14 +23,14 @@ def get_records(
start_timestamp: datetime=None,
offset: Optional[int]=None,
limit: Optional[int]=None
) -> list[UofMMapping]:
) -> list[PublisherBacklistMapping]:
array_json_records = self.get_records_json(full_import, start_timestamp, offset, limit)

for json_dict in array_json_records:
for records_value in json_dict['records']:
try:
record_metadata_dict = records_value
record = UofMMapping(record_metadata_dict)
record_metadata_dict = records_value['fields']
record = PublisherBacklistMapping(record_metadata_dict)
record.applyMapping()
except Exception:
logger.exception(f'Failed to process Publisher Backlist record')
Expand Down Expand Up @@ -89,7 +89,6 @@ def get_records_array(self,
pub_backlist_records_response = requests.get(url, headers=headers)
pub_backlist_records_response_json = pub_backlist_records_response.json()
array_json = [pub_backlist_records_response_json]

while 'offset' in pub_backlist_records_response_json:
next_page_url = url + f"&offset={pub_backlist_records_response_json['offset']}"
pub_backlist_records_response = requests.get(next_page_url, headers=headers)
Expand Down
51 changes: 51 additions & 0 deletions tests/unit/test_pub_backlist_mapping.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import pytest

from mappings.publisher_backlist import PublisherBacklistMapping

class TestPublisherBacklistMapping:
@pytest.fixture
def test_mapping(self):
class TestPublisherBacklistMapping(PublisherBacklistMapping):
def __init__(self):
self.mapping = None

return TestPublisherBacklistMapping()

@pytest.fixture
def testRecordStandard(self, mocker):
return mocker.MagicMock(
title='testTitle',
authors=['testAuthor|||true'],
dates=['testDate|publication_date'],
publisher=['testPublisher||'],
identifiers=['testISBN|isbn', 'testOCLC|oclc'],
rights='in copyright||||',
contributor=['testContributor|||contributor'],
subjects='testSubject',
source=['UofMichigan Backlist'],
source_id='testSourceID',
publisher_project_source=['University of Michigan Press']
)

def test_createMapping(self, test_mapping):
record_mapping = test_mapping.createMapping()

assert list(record_mapping.keys()) == [
'title', 'authors', 'dates', 'publisher',
'identifiers', 'rights', 'contributors', 'subjects',
'source', 'source_id', 'publisher_project_source'
]
assert record_mapping['title'] == ('Title', '{0}')

def test_applyFormatting_standard(self, test_mapping, testRecordStandard):
test_mapping.record = testRecordStandard

test_mapping.applyFormatting()

assert test_mapping.record.has_part == []
assert test_mapping.record.source == 'UofMichigan'
assert test_mapping.record.identifiers == ['testISBN|isbn', 'testOCLC|oclc']
assert test_mapping.record.source_id == 'testSourceID'
assert test_mapping.record.publisher == ['testPublisher||']
assert test_mapping.record.publisher_project_source == 'University of Michigan Press'
assert test_mapping.record.subjects == ['testSubject||']

0 comments on commit 6efb5b6

Please sign in to comment.