diff --git a/build_standalone_dist.py b/build_standalone_dist.py index a38f60d..ba63709 100644 --- a/build_standalone_dist.py +++ b/build_standalone_dist.py @@ -10,6 +10,7 @@ timer_start = datetime.datetime.now() # Variablen für den Build-Prozess (64 bit Python-Umgebung): +include_providerspecific_modules = False qt_lib_path = "C:\\Users\\OGoetze\\venv\\build\\ddbmappings_build\\Lib\\site-packages\\PyQt5\\Qt\\bin" msvc_path = "C:\\Program Files (x86)\\Windows Kits\\10\\Redist\\ucrt\\DLLs\\x64" icon_path = "gui_components/ui_templates/resources/datapreparationtool.ico" @@ -30,7 +31,7 @@ rmtree("build") # Ausführen des PyInstaller-Skripts als Subprozess: -execute_string = 'pyinstaller --clean --onefile -p "{}" -p "{}" --name "datapreparationtool" --noconsole --icon "{}" main_gui.py'.format(qt_lib_path, msvc_path, icon_path) +execute_string = 'pyinstaller --clean --onefile -p "{}" -p "{}" --name "datapreparationtool" --icon "{}" main_gui.py'.format(qt_lib_path, msvc_path, icon_path) logger.info("Führe PyInstaller-Script aus: {execute_string}", execute_string=execute_string) subprocess.call(execute_string) @@ -45,6 +46,7 @@ os.makedirs("dist/modules/common/provider_metadata") os.makedirs("dist/modules/analysis/enrichment") os.makedirs("dist/modules/serializers/eadddb") +os.makedirs("dist/modules/provider_specific") logger.info("Kopieren der gui_session Daten ...") copyfile("gui_session/templates/processing_status.xml", "dist/gui_session/templates/processing_status.xml") @@ -57,7 +59,16 @@ copyfile("gui_components/ui_templates/resources/list.png", "dist/gui_components/ui_templates/resources/list.png") logger.info("Kopieren der providerspezifischen Anpassungen, inkl. modules/provider_specific/aggregator_mapping.xml ...") -copytree("modules/provider_specific", "dist/modules/provider_specific") +if include_providerspecific_modules: + copytree("modules/provider_specific", "dist/modules/provider_specific") + os.remove("dist/modules/provider_specific/handle_provider_aggregator_mapping.py") + os.remove("dist/modules/provider_specific/handle_provider_rights.py") + os.remove("dist/modules/provider_specific/handle_provider_scripts.py") + if os.path.isdir("dist/modules/provider_specific/__pycache__"): + rmtree("dist/modules/provider_specific/__pycache__") +else: + copyfile("modules/provider_specific/aggregator_mapping.xml", + "dist/modules/provider_specific/aggregator_mapping.xml") logger.info("Kopieren des Templates zur METS/MODS-Generierung ...") copyfile("modules/ead2mets/mets_template.xml", "dist/modules/ead2mets/mets_template.xml") diff --git a/gui_session/version.xml b/gui_session/version.xml index 9ca209d..20e4c05 100644 --- a/gui_session/version.xml +++ b/gui_session/version.xml @@ -1,6 +1,6 @@ - 3.0.0 + 3.0.1 stable - 32ad5ca0 + 6bfc54be diff --git a/main_gui.py b/main_gui.py index b35b188..4994545 100644 --- a/main_gui.py +++ b/main_gui.py @@ -535,7 +535,10 @@ def finished_validation(): self.validationStatusDialog_ui.frame_validation_error.setVisible(True) self.statusbar.showMessage("Validierung abgeschlossen.") - self.enable_processing_controls() + if len(self.comboBox_provider.currentText()) == 0: + self.enable_processing_controls(is_first_launch=True) + else: + self.enable_processing_controls() self.validationStatusDialog_ui.stackedWidget.setCurrentIndex(1) @@ -859,22 +862,12 @@ def get_provider_list(self): self.comboBox_provider.clear() self.comboBox_provider.addItems(self.provider_list) - # Wenn noch keine Datengeber vorhanden sind: Einstellungs-TabWidget, Buttons für Transformation und Analyse, Menüpunkte "Tools" und "Validierung" sowie "Datei" -> "Dateien per OAI-PMH laden ..." deaktivieren + # Wenn noch keine Datengeber vorhanden sind, Funktionen zur Prozessierung deaktivieren. if len(self.comboBox_provider.currentText()) == 0: - self.tabWidget.setEnabled(False) - self.pushButton_startTransformation.setEnabled(False) - self.pushButton_startAnalyse.setEnabled(False) - self.menuTools.setEnabled(False) - self.menuValidierung.setEnabled(False) - self.action_fetch_from_oai.setEnabled(False) + self.disable_processing_controls(is_first_launch=True) self.stackedWidget.setCurrentIndex(1) else: - self.tabWidget.setEnabled(True) - self.pushButton_startTransformation.setEnabled(True) - self.pushButton_startAnalyse.setEnabled(True) - self.menuTools.setEnabled(True) - self.menuValidierung.setEnabled(True) - self.action_fetch_from_oai.setEnabled(True) + self.enable_processing_controls() self.stackedWidget.setCurrentIndex(0) def set_provider_from_list(self, current_index): @@ -1294,23 +1287,27 @@ def save_mets_settings(self): def open_in_browser(target_url): webbrowser.open(target_url) - def disable_processing_controls(self): + def disable_processing_controls(self, is_first_launch=False): """GUI-Elemente zur Steuerung von Prozessierungen deaktivieren, während bereits eine Prozessierung läuft.""" self.tabWidget.setEnabled(False) self.pushButton_startTransformation.setEnabled(False) self.pushButton_startAnalyse.setEnabled(False) self.menuTools.setEnabled(False) - self.menuValidierung.setEnabled(False) + if not is_first_launch: + self.menuValidierung.setEnabled(False) self.action_fetch_from_oai.setEnabled(False) - def enable_processing_controls(self): + def enable_processing_controls(self, is_first_launch=False): """GUI-Elemente zur Steuerung von Prozessierungen aktivieren, nachdem eine Prozessierung abgeschlossen wurde.""" - self.tabWidget.setEnabled(True) - self.pushButton_startTransformation.setEnabled(True) - self.pushButton_startAnalyse.setEnabled(True) - self.menuTools.setEnabled(True) - self.menuValidierung.setEnabled(True) - self.action_fetch_from_oai.setEnabled(True) + if not is_first_launch: + self.tabWidget.setEnabled(True) + self.pushButton_startTransformation.setEnabled(True) + self.pushButton_startAnalyse.setEnabled(True) + self.menuTools.setEnabled(True) + self.action_fetch_from_oai.setEnabled(True) + self.menuValidierung.setEnabled(True) + else: + self.menuValidierung.setEnabled(True) def exit_application(self): handle_session_data.save_to_xml(self.session_data) # Speichern der Sitzungsdaten beim Beenden diff --git a/modules/analysis/enrichment/helpers/process_repeatable_elements.py b/modules/analysis/enrichment/helpers/process_repeatable_elements.py index 75a778c..71d58c1 100644 --- a/modules/analysis/enrichment/helpers/process_repeatable_elements.py +++ b/modules/analysis/enrichment/helpers/process_repeatable_elements.py @@ -49,13 +49,15 @@ def merge_repeatable_elements(source_value_multiple, compare_with_existing_eleme for existing_element in element_exists_in_target_doc: existing_element_p = existing_element.findall("{urn:isbn:1-931666-22-9}p") existing_element_text = None - if len(existing_element_p) == 0 and (len(existing_element.text) > 0 or len(existing_element) > 0): # abstract (ohne p-Subelement) berücksichtigen + existing_element_text_compare_value = get_compare_value(existing_element) + + if len(existing_element_p) == 0 and existing_element_text_compare_value != "": # abstract (ohne p-Subelement) berücksichtigen existing_element_text = existing_element if len(existing_element_p) > 0: if get_compare_value(value[1]) == get_compare_value(existing_element_p[0]): target_possible_duplicate = True if existing_element_text is not None: - if get_compare_value(value[1]) == get_compare_value(existing_element_text): + if get_compare_value(value[1]) == existing_element_text_compare_value: target_possible_duplicate = True if not target_possible_duplicate: diff --git a/transformation_p1.py b/transformation_p1.py index d680a33..51159b8 100644 --- a/transformation_p1.py +++ b/transformation_p1.py @@ -116,116 +116,121 @@ def run_transformation_p1(root_path, session_data=None, is_gui_session=False): # Aufruf des allg. Software-Skripts ext = [".xml", ".XML"] - input_file_count = len(os.listdir('.')) - input_file_i = 1 - for input_file in os.listdir('.'): - if input_file.endswith(tuple(ext)) and input_file != "provider.xml": - if handle_thread_actions.load_from_xml("stop_thread", root_path) is True: - break - + input_files = [] + for input_file_candidate in os.listdir("."): + if input_file_candidate.endswith(tuple(ext)) and input_file_candidate != "provider.xml": + input_files.append(input_file_candidate) + input_files_count = len(input_files) + + for input_file_i, input_file in enumerate(input_files): + if handle_thread_actions.load_from_xml("stop_thread", root_path) is True: + break + + transformation_progress = int((input_file_i / input_files_count) * 100) + + try: + xml_findbuch_in = etree.parse(input_file) + except etree.XMLSyntaxError as e: + logger.warning("Verarbeitung der XML-Datei übersprungen (Fehler beim Parsen): {}".format(e)) + error_status = 1 + write_processing_status(root_path=root_path, processing_step=None, status_message=None, error_status=error_status) + continue + + # Bestimmen von input_type (Findbuch oder Tektonik). Kann kein Wert ermittelt werden, so erfolgt ein Fallback auf den Standardwert "findbuch" + archdesc_type = xml_findbuch_in.findall('//{urn:isbn:1-931666-22-9}archdesc[@level="collection"]') + if len(archdesc_type) == 1: + if "type" in archdesc_type[0].attrib: + input_type = archdesc_type[0].attrib["type"].lower() + + write_processing_status(root_path=root_path, processing_step=transformation_progress, status_message="Verarbeite Softwaremodul für {}: {} (Datei {}/{})".format(input_type, input_file, input_file_i, input_files_count), error_status=error_status) + + provider_args = [xml_findbuch_in, input_path, input_file, output_path, provider_isil, provider_id, provider_name, provider_software, provider_archivtyp, provider_state, provider_addressline_strasse, provider_addressline_ort, provider_addressline_mail, provider_website, provider_tektonik_url, input_type, mdb_override] # Übergabe der Parameter an die Software-Skripte + + try: + if provider_software == "eadddb": + xml_findbuch_in = eadddb.parse_xml_content(*provider_args) + + except (IndexError, TypeError, AttributeError, KeyError, SyntaxError) as e: + traceback_string = traceback.format_exc() + logger.warning("Softwareskript konnte für die Datei {} nicht angewandt werden; Fehlermeldung: {}.\n {}".format(input_file, e, traceback_string)) + error_status = 1 + write_processing_status(root_path=root_path, processing_step=None, status_message=None, error_status=error_status) + + # Anwenden der Mapping-Definiton: + mapping_definition_args = [xml_findbuch_in, input_type, input_file, + error_status] # Parameter zur Übergabe an die Mapping-Definition + administrative_data = {"provider_isil": provider_isil, "provider_id": provider_id, "provider_name": provider_name, "provider_archivtyp": provider_archivtyp, "provider_state": provider_state, "provider_addressline_strasse": provider_addressline_strasse, "provider_addressline_ort": provider_addressline_ort, "provider_addressline_mail": provider_addressline_mail, "provider_website": provider_website, "provider_tektonik_url": provider_tektonik_url} + if apply_mapping_definition: + write_processing_status(root_path=root_path, processing_step=transformation_progress, status_message="Anwenden der Mapping-Definition für {}: {} (Datei {}/{})".format( + input_type, input_file, input_file_i, input_files_count), error_status=error_status) try: - xml_findbuch_in = etree.parse(input_file) - except etree.XMLSyntaxError as e: - logger.warning("Verarbeitung der XML-Datei übersprungen (Fehler beim Parsen): {}".format(e)) + xml_findbuch_in = mapping_definition.apply_mapping(session_data, administrative_data, *mapping_definition_args) + except (IndexError, TypeError, AttributeError, KeyError, SyntaxError) as e: + traceback_string = traceback.format_exc() + logger.warning("Anwenden der Mapping-Definition für {} {} fehlgeschlagen; Fehlermeldung: {}.\n {}".format(input_type, input_file, e, traceback_string)) error_status = 1 write_processing_status(root_path=root_path, processing_step=None, status_message=None, error_status=error_status) - continue - # Bestimmen von input_type (Findbuch oder Tektonik). Kann kein Wert ermittelt werden, so erfolgt ein Fallback auf den Standardwert "findbuch" - archdesc_type = xml_findbuch_in.findall('//{urn:isbn:1-931666-22-9}archdesc[@level="collection"]') - if len(archdesc_type) == 1: - if "type" in archdesc_type[0].attrib: - input_type = archdesc_type[0].attrib["type"].lower() - write_processing_status(root_path=root_path, processing_step=10, status_message="Verarbeite Softwaremodul für {}: {} (Datei {}/{})".format(input_type, input_file, input_file_i, input_file_count), error_status=error_status) - - provider_args = [xml_findbuch_in, input_path, input_file, output_path, provider_isil, provider_id, provider_name, provider_software, provider_archivtyp, provider_state, provider_addressline_strasse, provider_addressline_ort, provider_addressline_mail, provider_website, provider_tektonik_url, input_type, mdb_override] # Übergabe der Parameter an die Software-Skripte + # Aufruf providerspezifischer Skripte: + provider_module_args = [root_path, xml_findbuch_in, input_type, input_file, + error_status] # Parameter zur Übergabe an die providerspezifischen Anpassungen + if is_gui_session is True: + write_processing_status(root_path=root_path, processing_step=transformation_progress, status_message="Verarbeite providerspezifische Anpassungen für {}: {} (Datei {}/{})".format(input_type, input_file, input_file_i, input_files_count), error_status=error_status) + xml_findbuch_in, error_status = handle_provider_scripts.parse_xml_content(*provider_module_args) + + # Anziehen der Binaries (falls "fetch_and_link_binaries = True" in transformation_p1) + if process_binaries: + write_processing_status(root_path=root_path, processing_step=transformation_progress, status_message="Lade Binaries für {}: {} (Datei {}/{})".format(input_type, input_file, input_file_i, input_files_count), error_status=error_status) + xml_findbuch_in = fetch_and_link_binaries.parse_xml_content(xml_findbuch_in, input_file, output_path, + input_type, input_path) + + # Generierung von METS-Dateien (falls "enable_mets_generation = True" in transformation_p1) + if enable_mets_generation: + write_processing_status(root_path=root_path, processing_step=transformation_progress, status_message="Generiere METS-Dateien für {}: {} (Datei {}/{})".format(input_type, input_file, input_file_i, input_files_count), error_status=error_status) + xml_findbuch_in = create_mets_files.parse_xml_content(xml_findbuch_in, input_file, output_path, + input_type, input_path, session_data) + + # Anreicherung der Rechte- und Lizenzinformation + if enrich_rights_info: + write_processing_status(root_path=root_path, processing_step=transformation_progress, status_message="Anreichern der Rechteinformation für {}: {} (Datei {}/{})".format(input_type, input_file,input_file_i, input_files_count), error_status=error_status) + try: + xml_findbuch_in = handle_provider_rights.parse_xml_content(xml_findbuch_in, input_file, input_type) + except (IndexError, TypeError, AttributeError, KeyError, SyntaxError) as e: + traceback_string = traceback.format_exc() + logger.warning("Anreichern der Rechteinformation für {} {} fehlgeschlagen; Fehlermeldung: {}.\n {}".format(input_type, input_file, e, traceback_string)) + error_status = 1 + write_processing_status(root_path=root_path, processing_step=None, status_message=None, error_status=error_status) + # Anreicherung der Aggregator-Zuordnung + if enrich_aggregator_info: + write_processing_status(root_path=root_path, processing_step=transformation_progress, status_message="Anreichern der Aggregatorinformation für {}: {} (Datei {}/{})".format(input_type, input_file, input_file_i, input_files_count), error_status=error_status) try: - if provider_software == "eadddb": - xml_findbuch_in = eadddb.parse_xml_content(*provider_args) + xml_findbuch_in = handle_provider_aggregator_mapping.parse_xml_content(xml_findbuch_in, input_file, input_type) + except (IndexError, TypeError, AttributeError, KeyError, SyntaxError) as e: + traceback_string = traceback.format_exc() + logger.warning( + "Anreichern der Aggregator-Zuordnung für {} {} fehlgeschlagen; Fehlermeldung: {}.\n {}".format(input_type, input_file, e, traceback_string)) + error_status = 1 + write_processing_status(root_path=root_path, processing_step=None, status_message=None, error_status=error_status) + + # Vorprozessierung für die DDB2017-Transformation + if enable_ddb2017_preprocessing: + write_processing_status(root_path=root_path, processing_step=transformation_progress, status_message="DDB2017-Vorprozessierung für {}: {} (Datei {}/{})".format(input_type, input_file, input_file_i, input_files_count), error_status=error_status) + try: + xml_findbuch_in = ddb2017_preprocessing.parse_xml_content(xml_findbuch_in, input_file, input_type, provider_isil) except (IndexError, TypeError, AttributeError, KeyError, SyntaxError) as e: traceback_string = traceback.format_exc() - logger.warning("Softwareskript konnte für die Datei {} nicht angewandt werden; Fehlermeldung: {}.\n {}".format(input_file, e, traceback_string)) + logger.warning("DDB2017-Vorprozessierung für {} {} fehlgeschlagen; Fehlermeldung: {}.\n {}".format(input_type, input_file, e, traceback_string)) error_status = 1 write_processing_status(root_path=root_path, processing_step=None, status_message=None, error_status=error_status) - # Anwenden der Mapping-Definiton: - mapping_definition_args = [xml_findbuch_in, input_type, input_file, - error_status] # Parameter zur Übergabe an die Mapping-Definition - administrative_data = {"provider_isil": provider_isil, "provider_id": provider_id, "provider_name": provider_name, "provider_archivtyp": provider_archivtyp, "provider_state": provider_state, "provider_addressline_strasse": provider_addressline_strasse, "provider_addressline_ort": provider_addressline_ort, "provider_addressline_mail": provider_addressline_mail, "provider_website": provider_website, "provider_tektonik_url": provider_tektonik_url} - if apply_mapping_definition: - write_processing_status(root_path=root_path, processing_step=20, status_message="Anwenden der Mapping-Definition für {}: {} (Datei {}/{})".format( - input_type, input_file, input_file_i, input_file_count), error_status=error_status) - try: - xml_findbuch_in = mapping_definition.apply_mapping(session_data, administrative_data, *mapping_definition_args) - except (IndexError, TypeError, AttributeError, KeyError, SyntaxError) as e: - traceback_string = traceback.format_exc() - logger.warning("Anwenden der Mapping-Definition für {} {} fehlgeschlagen; Fehlermeldung: {}.\n {}".format(input_type, input_file, e, traceback_string)) - error_status = 1 - write_processing_status(root_path=root_path, processing_step=None, status_message=None, error_status=error_status) - - - # Aufruf providerspezifischer Skripte: - provider_module_args = [root_path, xml_findbuch_in, input_type, input_file, - error_status] # Parameter zur Übergabe an die providerspezifischen Anpassungen - if is_gui_session is True: - write_processing_status(root_path=root_path, processing_step=30, status_message="Verarbeite providerspezifische Anpassungen für {}: {} (Datei {}/{})".format(input_type, input_file, input_file_i, input_file_count), error_status=error_status) - xml_findbuch_in, error_status = handle_provider_scripts.parse_xml_content(*provider_module_args) - - # Anziehen der Binaries (falls "fetch_and_link_binaries = True" in transformation_p1) - if process_binaries: - write_processing_status(root_path=root_path, processing_step=60, status_message="Lade Binaries für {}: {} (Datei {}/{})".format(input_type, input_file, input_file_i, input_file_count), error_status=error_status) - xml_findbuch_in = fetch_and_link_binaries.parse_xml_content(xml_findbuch_in, input_file, output_path, - input_type, input_path) - - # Generierung von METS-Dateien (falls "enable_mets_generation = True" in transformation_p1) - if enable_mets_generation: - write_processing_status(root_path=root_path, processing_step=80, status_message="Generiere METS-Dateien für {}: {} (Datei {}/{})".format(input_type, input_file, input_file_i, input_file_count), error_status=error_status) - xml_findbuch_in = create_mets_files.parse_xml_content(xml_findbuch_in, input_file, output_path, - input_type, input_path, session_data) - - # Anreicherung der Rechte- und Lizenzinformation - if enrich_rights_info: - write_processing_status(root_path=root_path, processing_step=90, status_message="Anreichern der Rechteinformation für {}: {} (Datei {}/{})".format(input_type, input_file,input_file_i, input_file_count), error_status=error_status) - try: - xml_findbuch_in = handle_provider_rights.parse_xml_content(xml_findbuch_in, input_file, input_type) - except (IndexError, TypeError, AttributeError, KeyError, SyntaxError) as e: - traceback_string = traceback.format_exc() - logger.warning("Anreichern der Rechteinformation für {} {} fehlgeschlagen; Fehlermeldung: {}.\n {}".format(input_type, input_file, e, traceback_string)) - error_status = 1 - write_processing_status(root_path=root_path, processing_step=None, status_message=None, error_status=error_status) - - # Anreicherung der Aggregator-Zuordnung - if enrich_aggregator_info: - write_processing_status(root_path=root_path, processing_step=92, status_message="Anreichern der Aggregatorinformation für {}: {} (Datei {}/{})".format(input_type, input_file, input_file_i, input_file_count), error_status=error_status) - try: - xml_findbuch_in = handle_provider_aggregator_mapping.parse_xml_content(xml_findbuch_in, input_file, input_type) - except (IndexError, TypeError, AttributeError, KeyError, SyntaxError) as e: - traceback_string = traceback.format_exc() - logger.warning( - "Anreichern der Aggregator-Zuordnung für {} {} fehlgeschlagen; Fehlermeldung: {}.\n {}".format(input_type, input_file, e, traceback_string)) - error_status = 1 - write_processing_status(root_path=root_path, processing_step=None, status_message=None, error_status=error_status) - - - # Vorprozessierung für die DDB2017-Transformation - if enable_ddb2017_preprocessing: - write_processing_status(root_path=root_path, processing_step=95, status_message="DDB2017-Vorprozessierung für {}: {} (Datei {}/{})".format(input_type, input_file, input_file_i, input_file_count), error_status=error_status) - try: - xml_findbuch_in = ddb2017_preprocessing.parse_xml_content(xml_findbuch_in, input_file, input_type, provider_isil) - except (IndexError, TypeError, AttributeError, KeyError, SyntaxError) as e: - traceback_string = traceback.format_exc() - logger.warning("DDB2017-Vorprozessierung für {} {} fehlgeschlagen; Fehlermeldung: {}.\n {}".format(input_type, input_file, e, traceback_string)) - error_status = 1 - write_processing_status(root_path=root_path, processing_step=None, status_message=None, error_status=error_status) - - - serialize_xml_result(xml_findbuch_in, input_file, output_path, input_type, mdb_override) - - input_file_i += 1 - os.chdir('data_input/' + input_folder_name) # Zurücksetzen des CWD (current working directory) für das Einlesen der nächsten Datei + + serialize_xml_result(xml_findbuch_in, input_file, output_path, input_type, mdb_override) + + input_file_i += 1 + os.chdir('data_input/' + input_folder_name) # Zurücksetzen des CWD (current working directory) für das Einlesen der nächsten Datei write_processing_status(root_path=root_path, processing_step=100, status_message="Transformation abgeschlossen.", error_status=error_status) os.chdir("../..")