Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

importer: handle different import media types #920

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions cds_ils/importer/XMLRecordToJson.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from invenio_records.api import Record

from cds_ils.importer import marc21
from cds_ils.importer.errors import LossyConversion, RecordModelMissing
from cds_ils.importer.errors import LossyConversion, UnrecognisedImportMediaType
from cds_ils.importer.handlers import xml_import_handlers


Expand Down Expand Up @@ -54,14 +54,17 @@ def dump(self):
is_deletable = False

init_fields = {}
if "im" in marc_record.get("leader", []):
leader_tag = marc_record.get("leader", [])
if "am" in leader_tag:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was added to be able to fail with an error if some unknown media type is provided, but to still handle am as an e-book

init_fields.update({"_eitem": {"_type": "e-book"}})
elif "im" in leader_tag or "jm" in leader_tag:
init_fields.update({"_eitem": {"_type": "audiobook"}})
elif "gm" in marc_record.get("leader", []):
elif "gm" in leader_tag:
init_fields.update(
{"document_type": "MULTIMEDIA", "_eitem": {"_type": "video"}}
)
else:
init_fields.update({"_eitem": {"_type": "e-book"}})
raise UnrecognisedImportMediaType(leader_tag)
# MARCXML -> JSON fields translation
val = self.dojson_model.do(
marc_record,
Expand Down
6 changes: 6 additions & 0 deletions cds_ils/importer/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,12 @@ class RecordModelMissing(CDSImporterException):
message = "[Record did not match any available model]"


class UnrecognisedImportMediaType(CDSImporterException):
"""Unrecognised record media type exception."""

message = "Record media type is not recognised."


class UnexpectedValue(CDSImporterException):
"""The corresponding value is unexpected."""

Expand Down
139 changes: 139 additions & 0 deletions tests/importer/data/safari_audiobook2.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
<?xml version="1.0" encoding="UTF-8"?>
<collection xmlns="http://www.loc.gov/MARC21/slim" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd">
<record xmlns="http://www.loc.gov/MARC21/slim">
<leader>00000njm a22000007i 4500</leader>
<controlfield tag="001">on1417409648</controlfield>
<controlfield tag="003">OCoLC</controlfield>
<controlfield tag="005">20240125213018.0</controlfield>
<controlfield tag="006">m o h </controlfield>
<controlfield tag="007">sz zunnnnnuneu</controlfield>
<controlfield tag="007">cr nnannnuuuuu</controlfield>
<controlfield tag="008">240117s2024 xx nnnn o z n eng d</controlfield>
<datafield tag="040" ind1=" " ind2=" ">
<subfield code="a">ORMDA</subfield>
<subfield code="b">eng</subfield>
<subfield code="e">rda</subfield>
<subfield code="e">pn</subfield>
<subfield code="c">ORMDA</subfield>
<subfield code="d">OCLCO</subfield>
</datafield>
<datafield tag="020" ind1=" " ind2=" ">
<subfield code="a">9781663731913</subfield>
<subfield code="q">(electronic audio bk.)</subfield>
</datafield>
<datafield tag="020" ind1=" " ind2=" ">
<subfield code="a">1663731918</subfield>
<subfield code="q">(electronic audio bk.)</subfield>
</datafield>
<datafield tag="035" ind1=" " ind2=" ">
<subfield code="a">(OCoLC)1417409648</subfield>
</datafield>
<datafield tag="037" ind1=" " ind2=" ">
<subfield code="a">9781663731913</subfield>
<subfield code="b">O'Reilly Media</subfield>
</datafield>
<datafield tag="050" ind1=" " ind2="4">
<subfield code="a">BF575.A6</subfield>
</datafield>
<datafield tag="082" ind1="0" ind2="4">
<subfield code="a">152.4/6</subfield>
<subfield code="2">23/eng/20240117</subfield>
</datafield>
<datafield tag="049" ind1=" " ind2=" ">
<subfield code="a">MAIN</subfield>
</datafield>
<datafield tag="245" ind1="0" ind2="0">
<subfield code="a">Managing your anxiety /</subfield>
<subfield code="c">Harvard Business Review.</subfield>
</datafield>
<datafield tag="250" ind1=" " ind2=" ">
<subfield code="a">[First edition].</subfield>
</datafield>
<datafield tag="264" ind1=" " ind2="1">
<subfield code="a">[Place of publication not identified] :</subfield>
<subfield code="b">Ascent Audio,</subfield>
<subfield code="c">2024.</subfield>
</datafield>
<datafield tag="300" ind1=" " ind2=" ">
<subfield code="a">1 online resource (1 sound file (1 hr., 27 min.))</subfield>
</datafield>
<datafield tag="306" ind1=" " ind2=" ">
<subfield code="a">012700</subfield>
</datafield>
<datafield tag="336" ind1=" " ind2=" ">
<subfield code="a">spoken word</subfield>
<subfield code="b">spw</subfield>
<subfield code="2">rdacontent</subfield>
</datafield>
<datafield tag="337" ind1=" " ind2=" ">
<subfield code="a">computer</subfield>
<subfield code="b">c</subfield>
<subfield code="2">rdamedia</subfield>
</datafield>
<datafield tag="338" ind1=" " ind2=" ">
<subfield code="a">online resource</subfield>
<subfield code="b">cr</subfield>
<subfield code="2">rdacarrier</subfield>
</datafield>
<datafield tag="344" ind1=" " ind2=" ">
<subfield code="a">digital</subfield>
<subfield code="2">rdatr</subfield>
</datafield>
<datafield tag="347" ind1=" " ind2=" ">
<subfield code="a">audio file</subfield>
<subfield code="2">rdaft</subfield>
</datafield>
<datafield tag="511" ind1="0" ind2=" ">
<subfield code="a">Read by Steve Marvel, Teri Schnaubelt.</subfield>
</datafield>
<datafield tag="520" ind1=" " ind2=" ">
<subfield code="a">Make anxiety work for you. Work is stressful: We race to meet deadlines. We extend ourselves to return favors for colleagues. We set ambitious goals for ourselves and our teams. We measure ourselves against metrics, our competitors, and sometimes, our colleagues. Some of us even go beyond tangible metrics to internalize stress and fear of missing the mark-ruminating over presentations that didn't go according to plan, imagining worst-case scenarios, or standing frozen, paralyzed by perfectionism. But hypervigilance, worry, and catastrophizing don't have to hold you back at work. When channeled thoughtfully, anxiety can motivate us to be more resourceful, productive, and creative. It can break down barriers and create new bonds with our colleagues. Managing Your Anxiety will help you distinguish stress from anxiety, learn what anxiety looks like for you, understand it, and respond to it with self-compassion at work. With the latest psychological research and practical advice from leading experts, you'll learn how to recognize how your anxiety manifests itself; manage it in small, day-to-day moments and in more challenging times; experiment and find a mindfulness practice that works for you; and build a support infrastructure to help you manage your anxiety over the long term.</subfield>
</datafield>
<datafield tag="588" ind1=" " ind2=" ">
<subfield code="a">Online resource; title from title details screen (O'Reilly, viewed January 17, 2024).</subfield>
</datafield>
<datafield tag="590" ind1=" " ind2=" ">
<subfield code="a">O'Reilly</subfield>
<subfield code="b">O'Reilly Online Learning Platform: Academic Edition (SAML SSO Access)</subfield>
</datafield>
<datafield tag="650" ind1=" " ind2="0">
<subfield code="a">Anxiety.</subfield>
</datafield>
<datafield tag="650" ind1=" " ind2="0">
<subfield code="a">Job stress.</subfield>
</datafield>
<datafield tag="650" ind1=" " ind2="0">
<subfield code="a">Self-care, Health.</subfield>
</datafield>
<datafield tag="650" ind1=" " ind2="6">
<subfield code="a">Autothérapie.</subfield>
</datafield>
<datafield tag="655" ind1=" " ind2="7">
<subfield code="a">Audiobooks.</subfield>
<subfield code="2">lcgft</subfield>
</datafield>
<datafield tag="655" ind1=" " ind2="7">
<subfield code="a">Livres audio.</subfield>
<subfield code="2">rvmgf</subfield>
</datafield>
<datafield tag="700" ind1="1" ind2=" ">
<subfield code="a">Marvel, Steve,</subfield>
<subfield code="e">narrator.</subfield>
</datafield>
<datafield tag="700" ind1="1" ind2=" ">
<subfield code="a">Schnaubelt, Teri,</subfield>
<subfield code="e">narrator.</subfield>
</datafield>
<datafield tag="710" ind1="2" ind2=" ">
<subfield code="a">Harvard Business Review Press,</subfield>
<subfield code="e">issuing body.</subfield>
</datafield>
<datafield tag="856" ind1="4" ind2="0">
<subfield code="u">https://learning.oreilly.com/library/view/-/9781663731913/?ar</subfield>
</datafield>
<datafield tag="994" ind1=" " ind2=" ">
<subfield code="a">92</subfield>
<subfield code="b">CHCER</subfield>
</datafield>
</record>
</collection>
152 changes: 152 additions & 0 deletions tests/importer/data/safari_record_broken_mediatype.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
<marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd">
<marc:record>
<marc:leader>01538nbla a2200397 a 4500</marc:leader>
<marc:controlfield tag="001">9780814415467</marc:controlfield>
<marc:controlfield tag="003">OCoLC</marc:controlfield>
<marc:controlfield tag="005">20200404234619.1</marc:controlfield>
<marc:controlfield tag="006">m o d </marc:controlfield>
<marc:controlfield tag="007">cr zn|||||||||</marc:controlfield>
<marc:controlfield tag="008">110609s2009 enka ob 001 0 eng d</marc:controlfield>
<marc:datafield tag="019" ind1=" " ind2=" ">
<marc:subfield code="a">765144159</marc:subfield>
<marc:subfield code="a">961649869</marc:subfield>
<marc:subfield code="a">962617255</marc:subfield>
</marc:datafield>
<marc:datafield tag="020" ind1=" " ind2=" ">
<marc:subfield code="a">9780814415467</marc:subfield>
</marc:datafield>
<marc:datafield tag="020" ind1=" " ind2=" ">
<marc:subfield code="z">9781801073141 (pbk.)</marc:subfield>
<marc:subfield code="q">electronic publication</marc:subfield>
</marc:datafield>
<marc:datafield tag="020" ind1=" " ind2=" ">
<marc:subfield code="a">9780814415474</marc:subfield>
<marc:subfield code="q">(electronic bk. ;</marc:subfield>
<marc:subfield code="q">oBook)</marc:subfield>
</marc:datafield>
<marc:datafield tag="035" ind1=" " ind2=" ">
<marc:subfield code="a">(OCoLC)9780814415467</marc:subfield>
<marc:subfield code="z">(OCoLC)765144159</marc:subfield>
</marc:datafield>
<marc:datafield tag="040" ind1=" " ind2=" ">
<marc:subfield code="a">CtWfDGI</marc:subfield>
<marc:subfield code="b">eng</marc:subfield>
<marc:subfield code="e">pn</marc:subfield>
<marc:subfield code="c">STF</marc:subfield>
</marc:datafield>
<marc:datafield tag="041" ind1="0" ind2=" ">
<marc:subfield code="a">eng</marc:subfield>
</marc:datafield>
<marc:datafield tag="041" ind1="0" ind2=" ">
<marc:subfield code="a">ita</marc:subfield>
</marc:datafield>
<marc:datafield tag="050" ind1=" " ind2="4">
<marc:subfield code="a">HD69.P75</marc:subfield>
<marc:subfield code="b">M868 2009eb</marc:subfield>
</marc:datafield>
<marc:datafield tag="050" ind1="1" ind2="4">
<marc:subfield code="a">HD69.P86</marc:subfield>
</marc:datafield>
<marc:datafield tag="082" ind1="0" ind2="4">
<marc:subfield code="a">658.4/04</marc:subfield>
<marc:subfield code="2">22</marc:subfield>
</marc:datafield>
<marc:datafield tag="100" ind1="1" ind2=" ">
<marc:subfield code="a">Falcone, Paul,</marc:subfield>
<marc:subfield code="e">author.</marc:subfield>
</marc:datafield>
<marc:datafield tag="100" ind1="1" ind2=" ">
<marc:subfield code="a">Murray, Andy.</marc:subfield>
</marc:datafield>
<marc:datafield tag="245" ind1="1" ind2="0">
<marc:subfield code="a">101 Sample Write-Ups for Documenting Employee Performance Problems: </marc:subfield>
<marc:subfield code="b">A Guide to Progressive Discipline and Termination</marc:subfield>
<marc:subfield code="h">[electronic resource] /</marc:subfield>
<marc:subfield code="c">Falcone, Paul.</marc:subfield>
</marc:datafield>
<marc:datafield tag="250" ind1=" " ind2=" ">
<marc:subfield code="a">2nd edition</marc:subfield>
</marc:datafield>
<marc:datafield tag="260" ind1=" " ind2=" ">
<marc:subfield code="a">London :</marc:subfield>
<marc:subfield code="b">TSO (The Stationary Office),</marc:subfield>
<marc:subfield code="c">2009.</marc:subfield>
</marc:datafield>
<marc:datafield tag="300" ind1=" " ind2=" ">
<marc:subfield code="a">1 online resource (399 pages)</marc:subfield>
</marc:datafield>
<marc:datafield tag="336" ind1=" " ind2=" ">
<marc:subfield code="a">text</marc:subfield>
<marc:subfield code="b">txt</marc:subfield>
<marc:subfield code="2">rdacontent</marc:subfield>
</marc:datafield>
<marc:datafield tag="337" ind1=" " ind2=" ">
<marc:subfield code="a">computer</marc:subfield>
<marc:subfield code="b">c</marc:subfield>
<marc:subfield code="2">rdamedia</marc:subfield>
</marc:datafield>
<marc:datafield tag="338" ind1=" " ind2=" ">
<marc:subfield code="a">online resource</marc:subfield>
<marc:subfield code="b">cr</marc:subfield>
<marc:subfield code="2">rdacarrier</marc:subfield>
</marc:datafield>
<marc:datafield tag="347" ind1=" " ind2=" ">
<marc:subfield code="a">text file</marc:subfield>
</marc:datafield>
<marc:datafield tag="365" ind1=" " ind2=" ">
<marc:subfield code="b">35.00</marc:subfield>
</marc:datafield>
<marc:datafield tag="490" ind1="1" ind2=" ">
<marc:subfield code="a">--For dummies</marc:subfield>
<marc:subfield code="v">1</marc:subfield>
</marc:datafield>
<marc:datafield tag="520" ind1=" " ind2=" ">
<marc:subfield code="a">A complete tool kit for handling disciplinary problems in a fair, responsible, and legally defensible way.</marc:subfield>
</marc:datafield>
<marc:datafield tag="533" ind1=" " ind2=" ">
<marc:subfield code="a">Electronic reproduction.</marc:subfield>
<marc:subfield code="b">Boston, MA :</marc:subfield>
<marc:subfield code="c">Safari,</marc:subfield>
<marc:subfield code="n">Available via World Wide Web.</marc:subfield>
<marc:subfield code="d">2010.</marc:subfield>
</marc:datafield>
<marc:datafield tag="538" ind1=" " ind2=" ">
<marc:subfield code="a">Mode of access: World Wide Web.</marc:subfield>
</marc:datafield>
<marc:datafield tag="542" ind1=" " ind2=" ">
<marc:subfield code="g">2010</marc:subfield>
</marc:datafield>
<marc:datafield tag="550" ind1=" " ind2=" ">
<marc:subfield code="a">Made available through: Safari, an O'Reilly Media Company.</marc:subfield>
</marc:datafield>
<marc:datafield tag="588" ind1="0" ind2="#">
<marc:subfield code="a">Online resource; Title from title page (viewed March 31, 2010)</marc:subfield>
</marc:datafield>
<marc:datafield tag="650" ind1=" " ind2="0">
<marc:subfield code="a">Project management.</marc:subfield>
</marc:datafield>
<marc:datafield tag="650" ind1=" " ind2="6">
<marc:subfield code="a">Gestion de projet.</marc:subfield>
</marc:datafield>
<marc:datafield tag="650" ind1=" " ind2="7">
<marc:subfield code="a">Project other.</marc:subfield>
<marc:subfield code="2">fast</marc:subfield>
<marc:subfield code="0">(OCoLC)fst01078797</marc:subfield>
</marc:datafield>
<marc:datafield tag="655" ind1=" " ind2="7">
<marc:subfield code="a">Electronic books.</marc:subfield>
<marc:subfield code="2">local</marc:subfield>
</marc:datafield>
<marc:datafield tag="700" ind1="1" ind2=" ">
<marc:subfield code="a">Bennett, Nigel.</marc:subfield>
<marc:subfield code="4">aut</marc:subfield>
</marc:datafield>
<marc:datafield tag="710" ind1="2" ind2=" ">
<marc:subfield code="a">Safari, an O'Reilly Media Company.</marc:subfield>
</marc:datafield>
<marc:datafield tag="856" ind1="4" ind2="0">
<marc:subfield code="z">Connect to this resource online</marc:subfield>
<marc:subfield code="u">https://learning.oreilly.com/library/view/-/9780814415467/?ar</marc:subfield>
</marc:datafield>
</marc:record>
</marc:collection>
Loading