Skip to content

Commit

Permalink
Pre-Release Data Preparation Tool 3.5.0 (GND4C-Testing).
Browse files Browse the repository at this point in the history
  • Loading branch information
olivergoetze committed Jul 19, 2022
1 parent 9726dcc commit dff6936
Show file tree
Hide file tree
Showing 4 changed files with 130 additions and 24 deletions.
23 changes: 18 additions & 5 deletions modules/connectors/gnd4c_nds/gnd4c_nds.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@


def parse_xml_content(session_data, xml_findbuch_in, input_type, input_file, error_status, propagate_logging, administrative_data, provider_rights, serializer):
# Aggregiertes Logging vorbereiten
logfile_in_multiple_date_elements = open("log_multiple_date_elements.txt", "a", encoding="utf-8")

namespaces = {"gnd4c": "http://gnd4c.digicult-verbund.de"}
xml_result = None

Expand Down Expand Up @@ -207,6 +210,11 @@ def parse_xml_content(session_data, xml_findbuch_in, input_type, input_file, err
if get_compare_value(geographic_area_code_uri_element) != "":
object_metadata["geographic_area_code"].append(get_compare_value(geographic_area_code_uri_element))

geographic_area_code_label_element = geographic_area_code_element.find("gnd4c:label", namespaces)
if geographic_area_code_label_element is not None:
if get_compare_value(geographic_area_code_label_element) != "":
object_metadata["geographic_area_code"].append(get_compare_value(geographic_area_code_label_element))

# gnd4c:person/gnd4c:placeOfBirth|placeOfDeath|associatedPlace
object_metadata["place_of_birth"] = []
object_metadata["place_of_death"] = []
Expand Down Expand Up @@ -273,15 +281,20 @@ def parse_xml_content(session_data, xml_findbuch_in, input_type, input_file, err
object_metadata["period_of_activity"].append(date_single)

if len(object_metadata["date_of_birth"]) > 1:
logger.warning("Für Datensatz-ID {} sind {} dateOfBirth-Elemente vorhanden. Bitte prüfen, ob Geburts- und Sterbedatum korrekt zusammengesetzt wurden.".format(object_metadata["record_id"], len(object_metadata["date_of_birth"])))
if len(object_metadata["date_of_death"]) > 1:
logger.warning("Für Datensatz-ID {} sind {} dateOfDeath-Elemente vorhanden. Bitte prüfen, ob Geburts- und Sterbedatum korrekt zusammengesetzt wurden.".format(object_metadata["record_id"], len(object_metadata["date_of_death"])))
log_message = "Für Datensatz-ID {} sind {} dateOfBirth-Elemente vorhanden. Bitte prüfen, ob Geburts- und Sterbedatum korrekt zusammengesetzt wurden. (Datei: {})".format(object_metadata["record_id"], len(object_metadata["date_of_birth"]), input_file)
logger.warning(log_message)
logfile_in_multiple_date_elements.write("{}\n".format(log_message))


if len(object_metadata["date_of_death"]) > 1:
log_message = "Für Datensatz-ID {} sind {} dateOfDeath-Elemente vorhanden. Bitte prüfen, ob Geburts- und Sterbedatum korrekt zusammengesetzt wurden. (Datei: {})".format(object_metadata["record_id"], len(object_metadata["date_of_death"]), input_file)
logger.warning(log_message)
logfile_in_multiple_date_elements.write("{}\n".format(log_message))

if serializer == "marcxml":
xml_result = map2marcxml.serialize_metadata(session_data, object_id, object_level, object_type, object_parent_id, object_metadata, object_rights, object_binaries, administrative_data, xml_base=xml_result)

if serializer == "marcxml":
xml_result = map2marcxml.serialize_metadata(session_data, object_id, object_level, object_type, object_parent_id, object_metadata, object_rights, object_binaries, administrative_data, input_file, xml_base=xml_result)

logfile_in_multiple_date_elements.close()
if xml_result is not None:
return xml_result
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
<?xml version='1.0' encoding='UTF-8'?>
<provider_script_set><provider id="GND4C-NDS" range="global"><set id="0c59f30406904f9c9a563a66f53b3d5f"><name>GND4C-NDS-Format in MarcXML umwandeln</name><description></description><modules><module><module_provider>common</module_provider><module_name>maintenance_function.py</module_name><module_config>{'maintenance_type': 'mapping_definition'}</module_config></module><module><module_provider>GND4C</module_provider><module_name>split_marcxml_records_from_gnd4c_connector.py</module_name><module_config/></module></modules></set></provider></provider_script_set>
<provider_script_set><provider id="GND4C-NDS" range="global"><set id="0c59f30406904f9c9a563a66f53b3d5f"><name>GND4C-NDS-Format in MarcXML umwandeln</name><description></description><modules><module><module_provider>common</module_provider><module_name>maintenance_function.py</module_name><module_config>{'maintenance_type': 'mapping_definition'}</module_config></module><module><module_provider>GND4C</module_provider><module_name>set_authentication_code_gnd3.py</module_name><module_config/></module><module><module_provider>GND4C</module_provider><module_name>set_general_note_labw.py</module_name><module_config/></module><module><module_provider>GND4C</module_provider><module_name>set_other_classification_number_from_config.py</module_name><module_config/></module><module><module_provider>GND4C</module_provider><module_name>set_source_data_found_from_config.py</module_name><module_config/></module><module><module_provider>GND4C</module_provider><module_name>set_teilbestandskennzeichen_d.py</module_name><module_config/></module><module><module_provider>GND4C</module_provider><module_name>sort_marcxml_fields.py</module_name><module_config/></module></modules></set></provider></provider_script_set>
34 changes: 34 additions & 0 deletions modules/provider_specific/GND4C/sort_marcxml_fields.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from lxml import etree
from loguru import logger


def get_sort_key_from_tag(element):
if "tag" in element.attrib:
return element.attrib["tag"]
else:
return "00000000"

def get_sort_key_from_code(element):
if "code" in element.attrib:
code_value = element.attrib["code"]
if code_value.isnumeric():
# Subfields deren Code einen Buchstaben enthält, voranstellen.
code_value = "zzzzzzzz{}".format(code_value)
return code_value
else:
return "00000000"


def parse_xml_content(xml_findbuch_in, input_type, input_file):
"""control-, data- und subfields sortieren"""
record_elements = xml_findbuch_in.findall("//{http://www.loc.gov/MARC21/slim}record")
for record_element in record_elements:
# controlfields und datafields sortieren
record_element[:] = sorted(record_element, key=get_sort_key_from_tag)

datafield_elements = record_element.findall("{http://www.loc.gov/MARC21/slim}datafield")
for datafield_element in datafield_elements:
# subfields sortieren
datafield_element[:] = sorted(datafield_element, key=get_sort_key_from_code)

return xml_findbuch_in
Loading

0 comments on commit dff6936

Please sign in to comment.