Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Skip not language info that are not three letters long #2072

Merged
merged 5 commits into from
Oct 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 19 additions & 7 deletions src/main/resources/alma/fix/otherFields.fix
Original file line number Diff line number Diff line change
@@ -1,24 +1,36 @@
# 008 - Fixed-Length Data Elements-General Information (NR) - No subfields
# 041 - 041 - Language Code (R)

set_array("@language")
copy_field("008", "@008-lang")
substring("@008-lang", "35", "3")
copy_field("@008-lang", "@language.$append")
copy_field("041[ 01] .[adj]", "@language.$append")
copy_field("@008-lang", "@language.$append.id")
copy_field("@008-lang", "@language.$last.label")
do list(path: "041[ 01] ","var":"$i")
copy_field("$i.[adj]", "@language.$append.id")
copy_field("$i.[adj]", "@language.$last.label")
end

uniq("@language")
lookup("@language.*.label","ISO639-2-to-GND", delete:"true")

set_array("language[]")
do list(path:"@language", "var":"$i")
unless any_match("$i","zxx|mul|sgn|und|.*[\\|\\#].*|\\s*")
copy_field("$i", "language[].$append.id")
copy_field("$i", "language[].$last.label")
unless any_match("$i.id","zxx|mul|sgn|und|.*[\\|\\#].*|\\s*")
if exists("$i.label")
copy_field("$i.id", "language[].$append.id")
copy_field("$i.label", "language[].$last.label")
elsif any_match("$i.id","[dD]eutsch")
add_field("language[].$append.id","ger")
add_field("language[].$last.label","Deutsch")
end
end
end

lookup("language[].*.label","ISO639-2-to-GND")

prepend("language[].*.id", "http://id.loc.gov/vocabulary/iso639-2/")

uniq("language[]")

# 300 - Physical Description (R)
# We reuse the introx transformation here.

Expand Down
136 changes: 136 additions & 0 deletions src/test/resources/alma-fix/990126426530206441.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
{
"@context" : "http://lobid.org/resources/context.jsonld",
"id" : "http://lobid.org/resources/990126426530206441#!",
"type" : [ "BibliographicResource", "Article" ],
"medium" : [ {
"label" : "Print",
"id" : "http://rdaregistry.info/termList/RDAproductionMethod/1010"
} ],
"title" : "Gute Chancen. Stand und Perspektiven der Kombikraftwerkstechnik mit Kohlevergasung",
"almaMmsId" : "990126426530206441",
"hbzId" : "TT000252730",
"deprecatedUri" : "http://lobid.org/resources/TT000252730#!",
"oclcNumber" : [ "1046441573" ],
"publication" : [ {
"startDate" : "1996",
"type" : [ "PublicationEvent" ]
} ],
"describedBy" : {
"id" : "http://lobid.org/resources/990126426530206441",
"label" : "Webseite der hbz-Ressource 990126426530206441",
"type" : [ "BibliographicDescription" ],
"inDataset" : {
"id" : "http://lobid.org/resources/dataset#!",
"label" : "lobid-resources – Der hbz-Verbundkatalog als Linked Open Data"
},
"resultOf" : {
"type" : [ "CreateAction" ],
"endTime" : "0000-00-00T00:00:00",
"instrument" : {
"id" : "https://github.com/hbz/lobid-resources",
"type" : [ "SoftwareApplication" ],
"label" : "Software lobid-resources"
},
"object" : {
"id" : "https://lobid.org/marcxml/990126426530206441",
"dateCreated" : "2021-04-05",
"dateModified" : "2024-08-14",
"type" : [ "DataFeedItem" ],
"label" : "hbz-Ressource 990126426530206441 im Exportformat MARC21 XML",
"inDataset" : {
"id" : "https://datahub.io/dataset/hbz_unioncatalog",
"label" : "hbz_unioncatalog"
},
"sourceOrganization" : {
"id" : "http://lobid.org/organisations/DE-Bm3#!",
"label" : "Bibliothek des Ruhrgebiets"
},
"provider" : {
"id" : "http://lobid.org/organisations/DE-Bm3#!",
"label" : "Bibliothek des Ruhrgebiets"
},
"modifiedBy" : [ {
"id" : "http://lobid.org/organisations/DE-605#!",
"label" : "hbz - Hochschulbibliothekszentrum des Landes Nordrhein-Westfalen"
} ]
}
},
"license" : [ {
"id" : "http://creativecommons.org/publicdomain/zero/1.0",
"label" : "Creative Commons-Lizenz CC0 1.0 Universal"
} ]
},
"sameAs" : [ {
"id" : "https://hub.culturegraph.org/resource/(DE-605)990126426530206441",
"label" : "Culturegraph Ressource"
}, {
"id" : "http://worldcat.org/oclc/1046441573",
"label" : "OCLC Ressource"
} ],
"containedIn" : [ {
"id" : "http://lobid.org/resources/ZDB-626249-1#!",
"label" : "lobid Ressource"
} ],
"inCollection" : [ {
"id" : "https://nrw.digibib.net/search/hbzvk/",
"label" : "DigiBib hbz Verbundkatalog",
"type" : [ "Collection" ]
}, {
"id" : "http://lobid.org/organisations/DE-655#!",
"label" : "hbz - Hochschulbibliothekszentrum des Landes Nordrhein-Westfalen, Netzwerkzone",
"type" : [ "Collection" ]
} ],
"language" : [ {
"id" : "http://id.loc.gov/vocabulary/iso639-2/ger",
"label" : "Deutsch"
} ],
"note" : [ "In: Energie Spektr.. - 11 (1996) Nr. 3 S. 18/21 : Abb.; 2 Lit." ],
"bibliographicCitation" : "Energie Spektr.. - 11 (1996) Nr. 3 S. 18/21 : Abb.; 2 Lit.",
"hasItem" : [ {
"label" : "lobid Bestandsressource",
"type" : [ "Item", "PhysicalObject" ],
"serialNumber" : "61992-10",
"currentLibrary" : "BR003",
"currentLocation" : "S",
"heldBy" : {
"isil" : "DE-Bm3-3",
"id" : "http://lobid.org/organisations/DE-Bm3-3#!",
"label" : "Bibliothek des Ruhrgebiets, Bestandsabteilung Bergbau-Bücherei und Ruhrgebiet"
},
"inCollection" : [ {
"id" : "http://lobid.org/organisations/DE-Bm3#!",
"label" : "Bibliothek des Ruhrgebiets"
} ],
"id" : "http://lobid.org/items/990126426530206441:DE-Bm3-3:2310018530007508#!"
} ],
"bibliographicLevel" : {
"label" : "Monographic component part",
"id" : "https://www.loc.gov/marc/bibliographic/bdleader.html#Monographic_component_part"
},
"contribution" : [ {
"agent" : {
"label" : "Kuske, E.",
"type" : [ "Person" ]
},
"role" : {
"id" : "http://id.loc.gov/vocabulary/relators/aut",
"label" : "Autor/in"
},
"type" : [ "Contribution" ]
}, {
"agent" : {
"gndIdentifier" : "117215481",
"id" : "https://d-nb.info/gnd/117215481",
"label" : "Schellberg, Wilhelm",
"type" : [ "Person" ],
"dateOfBirth" : "1880",
"dateOfDeath" : "1937",
"altLabel" : [ "Schellberg, Wilh.", "Schellberg, W." ]
},
"role" : {
"id" : "http://id.loc.gov/vocabulary/relators/aut",
"label" : "Autor/in"
},
"type" : [ "Contribution" ]
} ]
}
156 changes: 156 additions & 0 deletions src/test/resources/alma-fix/990126426530206441.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
<?xml version="1.0" encoding="UTF-8"?>
<record>
<leader>00810naa#a2200253#c#4500</leader>
<controlfield tag="005">20210408144638.0</controlfield>
<controlfield tag="007">tu</controlfield>
<controlfield tag="008">020507|1996####xx############|||#|#####c</controlfield>
<controlfield tag="003">DE-605</controlfield>
<controlfield tag="001">990126426530206441</controlfield>
<datafield tag="016" ind1="7" ind2=" ">
<subfield code="a">1046441573</subfield>
<subfield code="2">OCoLC</subfield>
</datafield>
<datafield tag="035" ind1=" " ind2=" ">
<subfield code="a">(DE-605)TT000252730</subfield>
</datafield>
<datafield tag="040" ind1=" " ind2=" ">
<subfield code="a">Bm 3</subfield>
<subfield code="b">ger</subfield>
<subfield code="c">Bm 3</subfield>
<subfield code="e">rakwb</subfield>
<subfield code="d">DE-605</subfield>
</datafield>
<datafield tag="041" ind1=" " ind2=" ">
<subfield code="a">deutsch</subfield>
</datafield>
<datafield tag="100" ind1="1" ind2=" ">
<subfield code="a">Kuske, E.</subfield>
<subfield code="4">aut</subfield>
</datafield>
<datafield tag="245" ind1="1" ind2="0">
<subfield code="a">Gute Chancen. Stand und Perspektiven der Kombikraftwerkstechnik mit Kohlevergasung.</subfield>
</datafield>
<datafield tag="264" ind1=" " ind2="1">
<subfield code="c">1996</subfield>
</datafield>
<datafield tag="035" ind1=" " ind2=" ">
<subfield code="a">(OCoLC)1046441573</subfield>
</datafield>
<datafield tag="964" ind1="0" ind2="s">
<subfield code="F">030</subfield>
<subfield code="A">a|1uc||||||17</subfield>
</datafield>
<datafield tag="964" ind1="0" ind2="s">
<subfield code="F">050</subfield>
<subfield code="A">a|||||||||||||</subfield>
</datafield>
<datafield tag="964" ind1="0" ind2="s">
<subfield code="F">051</subfield>
<subfield code="A">a|||||||</subfield>
</datafield>
<datafield tag="700" ind1="1" ind2=" ">
<subfield code="a">Schellberg, Wilhelm</subfield>
<subfield code="d">1880-1937</subfield>
<subfield code="0">(DE-588)117215481</subfield>
<subfield code="4">aut</subfield>
<subfield code="0">https://d-nb.info/gnd/117215481</subfield>
<subfield code="0">http://viaf.org/viaf/100223623</subfield>
<subfield code="B">GND-117215481</subfield>
</datafield>
<datafield tag="773" ind1="0" ind2="8">
<subfield code="i">Enthalten in</subfield>
<subfield code="d"> 1996</subfield>
<subfield code="w">(DE-600)626249-1</subfield>
</datafield>
<datafield tag="500" ind1=" " ind2=" ">
<subfield code="a">In: Energie Spektr.. - 11 (1996) Nr. 3 S. 18/21 : Abb.; 2 Lit.</subfield>
<subfield code="9">F:525</subfield>
</datafield>
<datafield tag="035" ind1=" " ind2=" ">
<subfield code="a">(DE-599)HBZTT000252730</subfield>
</datafield>
<datafield tag="MBD" ind1=" " ind2=" ">
<subfield code="M">49HBZ_NETWORK</subfield>
<subfield code="i">990126426530206441</subfield>
<subfield code="n">HBZ Network</subfield>
</datafield>
<datafield tag="MBD" ind1=" " ind2=" ">
<subfield code="M">49HBZ_BRIDGE_BDR</subfield>
<subfield code="i">9925154707508</subfield>
<subfield code="n">Bibliothek des Ruhrgebiets, Bochum</subfield>
</datafield>
<datafield tag="MNG" ind1=" " ind2=" ">
<subfield code="c">System</subfield>
<subfield code="f">OTHER</subfield>
<subfield code="i">marc21</subfield>
<subfield code="k">01</subfield>
<subfield code="e">false</subfield>
<subfield code="d">2024-08-14 04:06:09 Europe/Berlin</subfield>
<subfield code="g">012642653-HBZ01</subfield>
<subfield code="j">60</subfield>
<subfield code="a">import</subfield>
<subfield code="b">2021-04-05 07:59:18 Europe/Berlin</subfield>
</datafield>
<datafield tag="H52" ind1="0" ind2=" ">
<subfield code="b">BR003</subfield>
<subfield code="c">S</subfield>
<subfield code="8">2210018630007508</subfield>
</datafield>
<datafield tag="HOL" ind1=" " ind2=" ">
<subfield code="d">2023-02-16 01:48:14</subfield>
<subfield code="8">2210018630007508</subfield>
<subfield code="b">2023-02-16 01:48:14</subfield>
<subfield code="M">49HBZ_BRIDGE_BDR</subfield>
<subfield code="g">false</subfield>
<subfield code="a">System</subfield>
<subfield code="c">System</subfield>
</datafield>
<datafield tag="ITM" ind1=" " ind2=" ">
<subfield code="H">2210018630007508</subfield>
<subfield code="x">S</subfield>
<subfield code="f">BOOK</subfield>
<subfield code="v">S</subfield>
<subfield code="p">32</subfield>
<subfield code="X">System</subfield>
<subfield code="Y">2002-05-07 02:00:00 Europe/Berlin</subfield>
<subfield code="M">49HBZ_BRIDGE_BDR</subfield>
<subfield code="s">1</subfield>
<subfield code="d">0</subfield>
<subfield code="V">System</subfield>
<subfield code="b">61992-10</subfield>
<subfield code="a">2310018530007508</subfield>
<subfield code="D">00000000</subfield>
<subfield code="W">2023-02-16 02:48:15 Europe/Berlin</subfield>
<subfield code="u">BR003</subfield>
<subfield code="w">BR003</subfield>
</datafield>
<datafield tag="GPN" ind1="1" ind2=" ">
<subfield code="a">Schellberg, Wilh.</subfield>
<subfield code="d">1880-1937</subfield>
<subfield code="A">GND</subfield>
<subfield code="B">GND-117215481</subfield>
<subfield code="C">400</subfield>
</datafield>
<datafield tag="GPN" ind1="1" ind2=" ">
<subfield code="a">Schellberg, W.</subfield>
<subfield code="d">1880-1937</subfield>
<subfield code="A">GND</subfield>
<subfield code="B">GND-117215481</subfield>
<subfield code="C">400</subfield>
</datafield>
<datafield tag="GSI" ind1="7" ind2=" ">
<subfield code="a">117215481</subfield>
<subfield code="0">http://d-nb.info/gnd/117215481</subfield>
<subfield code="2">gnd</subfield>
<subfield code="A">GND</subfield>
<subfield code="B">GND-117215481</subfield>
<subfield code="C">024</subfield>
</datafield>
<datafield tag="GSI" ind1="7" ind2=" ">
<subfield code="a">5110475</subfield>
<subfield code="2">geprishisp</subfield>
<subfield code="A">GND</subfield>
<subfield code="B">GND-117215481</subfield>
<subfield code="C">024</subfield>
</datafield>
</record>
Loading
Loading