Skip to content

Commit

Permalink
Release Data Preparation Tool 3.0.0 (first public release)
Browse files Browse the repository at this point in the history
  • Loading branch information
olivergoetze committed Mar 20, 2020
1 parent 91d05da commit 39c53b4
Show file tree
Hide file tree
Showing 304 changed files with 135,132 additions and 3 deletions.
44 changes: 42 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,14 @@ dist/
downloads/
eggs/
.eggs/
lib/
# lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
Expand All @@ -38,12 +40,14 @@ pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/

Expand All @@ -55,6 +59,7 @@ coverage.xml
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
Expand All @@ -72,11 +77,26 @@ target/
# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
.python-version

# celery beat schedule file
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py
Expand All @@ -102,3 +122,23 @@ venv.bak/

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# Jetbrains-specific
.idea/

# Project-specific
session.log
session.log.*
gui_session/processing_status.xml
gui_session/session.xml
gui_session/thread_actions.xml
data_input/
data_output/
utils/validation_results/
utils/xml_enriched_with_uuids/
utils/ddb_id_lists
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -671,4 +671,4 @@ into proprietary programs. If your program is a subroutine library, you
may consider it more useful to permit linking proprietary applications with
the library. If this is what you want to do, use the GNU Lesser General
Public License instead of this License. But first, please read
<https://www.gnu.org/licenses/why-not-lgpl.html>.
<https://www.gnu.org/licenses/why-not-lgpl.html>.
31 changes: 31 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
![Data Preparation Tool](https://github.com/olivergoetze/datapreparationtool/raw/master/dpt_screenshot.png "Data Preparation Tool")

![PyPI - Python Version](https://img.shields.io/pypi/pyversions/validify)

Das DDB Data Preparation Tool ist eine Open-Source-Anwendung zur Aufbereitung von Daten im [EAD(DDB)-Format](https://wiki.deutsche-digitale-bibliothek.de/pages/viewpage.action?pageId=19010180) für den Ingest in die Deutsche Digitale Bibliothek und das Archivportal-D. Es wird vornehmlich zur Datenanalyse und -anpassung in der Fachstelle Archiv verwendet, soll aber auch der Validierung von Exportdateien durch Datengeber, Schnittstellenentwickler und Softwarehersteller dienen.

### Installation
#### Windows
Sie können die aktuelle Version des Data Preparation Tools unter [Releases](https://github.com/Deutsche-Digitale-Bibliothek/ddblabs-datapreparationtool/releases) herunterladen. Das Tool läuft ohne zusätzliche Software-Installation. Es wird in einer 32- und 64-bit-Version bereitgestellt; letztere Version sollte bevorzugt verwendet werden.

#### Linux und macOS
##### Voraussetzungen
- Python 3.5+
- [PyQt5](https://pypi.org/project/PyQt5/)
- [PyQtWebEngine](https://pypi.org/project/PyQtWebEngine/)
- [lxml](https://pypi.org/project/lxml/)
- [requests](https://pypi.org/project/requests/)
- [loguru](https://pypi.org/project/loguru/)
- [validify](https://pypi.org/project/validify/)
- [pandas](https://pypi.org/project/pandas/)

```
git clone https://github.com/Deutsche-Digitale-Bibliothek/ddblabs-datapreparationtool.git .
python3 main_gui.py
```


### Weiterführende Informationen ...
... finden Sie [hier](https://wiki.deutsche-digitale-bibliothek.de/display/DFD/DDB+Data+Preparation+Tool).

Bei Fragen können Sie sich gerne an die [Fachstelle Archiv der Deutschen Digitalen Bibliothek](https://pro.deutsche-digitale-bibliothek.de/fachstelle-archiv) wenden
82 changes: 82 additions & 0 deletions build_standalone_dist.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import os
import subprocess
import datetime
import struct
from shutil import copyfile
from shutil import rmtree
from shutil import copytree
from loguru import logger

timer_start = datetime.datetime.now()

# Variablen für den Build-Prozess (64 bit Python-Umgebung):
qt_lib_path = "C:\\Users\\OGoetze\\venv\\build\\ddbmappings_build\\Lib\\site-packages\\PyQt5\\Qt\\bin"
msvc_path = "C:\\Program Files (x86)\\Windows Kits\\10\\Redist\\ucrt\\DLLs\\x64"
icon_path = "gui_components/ui_templates/resources/datapreparationtool.ico"
data_files = []

# Angepasste Variablen für 32 bit Python-Umgebung:
python_arch = struct.calcsize("P") * 8
if python_arch == 32: # 32 bit Python
qt_lib_path = "C:\\Users\\OGoetze\\venv\\build\\ddbmappings_build_32bit\\Lib\\site-packages\\PyQt5\\Qt\\bin"
msvc_path = "C:\\Program Files (x86)\\Windows Kits\\10\\Redist\\ucrt\\DLLs\\x86"
logger.info("Baue stand-alone Distribution für Windows ({architecture} bit).", architecture=str(python_arch))

# Aufräumen:
logger.info("Entferne Verzeichnisse 'dist' und 'build', falls aus vorherigem Build-Prozess vorhanden.")
if os.path.isdir("dist"):
rmtree("dist")
if os.path.isdir("build"):
rmtree("build")

# Ausführen des PyInstaller-Skripts als Subprozess:
execute_string = 'pyinstaller --clean --onefile -p "{}" -p "{}" --name "datapreparationtool" --noconsole --icon "{}" main_gui.py'.format(qt_lib_path, msvc_path, icon_path)
logger.info("Führe PyInstaller-Script aus: {execute_string}", execute_string=execute_string)
subprocess.call(execute_string)

# Kopieren benötigter Data-Files:
logger.info("Kopiere benötigte Data-Files ...")

logger.info("Erstellen von Unterzeichnissen ...")
os.makedirs("dist/gui_session/templates")
os.makedirs("dist/utils/xml_enriched_with_uuids")
os.makedirs("dist/modules/xsl_transform")
os.makedirs("dist/modules/ead2mets")
os.makedirs("dist/modules/common/provider_metadata")
os.makedirs("dist/modules/analysis/enrichment")
os.makedirs("dist/modules/serializers/eadddb")

logger.info("Kopieren der gui_session Daten ...")
copyfile("gui_session/templates/processing_status.xml", "dist/gui_session/templates/processing_status.xml")
copyfile("gui_session/templates/session.xml", "dist/gui_session/templates/session.xml")
copyfile("gui_session/templates/thread_actions.xml", "dist/gui_session/templates/thread_actions.xml")
copyfile("gui_session/version.xml", "dist/gui_session/version.xml")

logger.info("Kopieren der UI-Ressourcen ...")
copytree("gui_components/ui_templates/resources/html", "dist/gui_components/ui_templates/resources/html")
copyfile("gui_components/ui_templates/resources/list.png", "dist/gui_components/ui_templates/resources/list.png")

logger.info("Kopieren der providerspezifischen Anpassungen, inkl. modules/provider_specific/aggregator_mapping.xml ...")
copytree("modules/provider_specific", "dist/modules/provider_specific")

logger.info("Kopieren des Templates zur METS/MODS-Generierung ...")
copyfile("modules/ead2mets/mets_template.xml", "dist/modules/ead2mets/mets_template.xml")

logger.info("Kopieren der externen Ressourcen der Common-Skripte ...")
copyfile("modules/common/provider_metadata/provider_template.xml", "dist/modules/common/provider_metadata/provider_template.xml")

logger.info("Kopieren der externen Ressourcen der Analyse-Skripte ...")
copyfile("modules/analysis/enrichment/fake_tektonik_template.xml", "dist/modules/analysis/enrichment/fake_tektonik_template.xml")
copytree("modules/analysis/previews/helpers/templates", "dist/modules/analysis/previews/helpers/templates")
copytree("modules/analysis/statistics/helpers/templates", "dist/modules/analysis/statistics/helpers/templates")
copytree("modules/analysis/validation/helpers/templates", "dist/modules/analysis/validation/helpers/templates")

logger.info("Kopieren der externen Ressourcen der Connector- und Serialisierungs-Skripte ...")
copyfile("modules/serializers/eadddb/ead_template_findbuch.xml", "dist/modules/serializers/eadddb/ead_template_findbuch.xml")
copyfile("modules/serializers/eadddb/ead_template_tektonik.xml", "dist/modules/serializers/eadddb/ead_template_tektonik.xml")


logger.info("Build-Prozess abgeschlossen. Ausgabe im Ordner 'dist'.")
timer_end = datetime.datetime.now()
processing_duration = timer_end - timer_start
logger.info("Prozessierungsdauer: {processing_duration}", processing_duration=processing_duration)
Binary file modified dpt_screenshot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Empty file added gui_components/__init__.py
Empty file.
42 changes: 42 additions & 0 deletions gui_components/get_module_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from lxml import etree
import os, json


def fetch_providerspecific_modules():
# Ermitteln aller verfügbaren providerspezifischen Anpassungen sowie ihrer Provider-Zugehörigkeit
# DE_2410.append_unitid_to_unittitle

provider_modules = []

providers_isils = [name.replace("_", "-") for name in os.listdir("modules/provider_specific") if os.path.isdir(os.path.join("modules/provider_specific", name)) and not (name.startswith("__"))]
for provider_isil in providers_isils:
provider_module_path = "modules/provider_specific/" + provider_isil.replace("-", "_")
single_modules = []
for name in os.listdir(provider_module_path):
if name.endswith(".py") and not (name.startswith("__init__")):
single_modules.append([name, get_module_description(name, provider_isil)])

single_provider = {provider_isil: single_modules}
provider_modules.append(single_provider)

return provider_modules


def get_module_description(provider_module, provider_isil):
# Ermitteln der Modul-Beschreibung zur Darstellung in der GUI

module_desc = ""

try:
module_metadata_path = "modules/provider_specific/{}/metadata/{}.xml".format(provider_isil.replace("-", "_"), provider_module[:-3])
module_metadata_in = etree.parse(module_metadata_path)
findlist = module_metadata_in.findall("//description")
module_desc = findlist[0].text
except OSError:
pass

return module_desc

if __name__ == '__main__':
provider_module_test = fetch_providerspecific_modules()
print(json.dumps(provider_module_test, indent=2))
Empty file.
117 changes: 117 additions & 0 deletions gui_components/ui_templates/about_dialog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
# -*- coding: utf-8 -*-

# Form implementation generated from reading ui file 'about_dialog.ui'
#
# Created by: PyQt5 UI code generator 5.14.1
#
# WARNING! All changes made in this file will be lost!


from PyQt5 import QtCore, QtGui, QtWidgets


class Ui_aboutDialog(object):
def setupUi(self, aboutDialog):
aboutDialog.setObjectName("aboutDialog")
aboutDialog.setEnabled(True)
aboutDialog.resize(400, 446)
aboutDialog.setMinimumSize(QtCore.QSize(400, 446))
aboutDialog.setAutoFillBackground(False)
aboutDialog.setSizeGripEnabled(False)
aboutDialog.setModal(False)
self.gridLayout_2 = QtWidgets.QGridLayout(aboutDialog)
self.gridLayout_2.setObjectName("gridLayout_2")
self.gridLayout = QtWidgets.QGridLayout()
self.gridLayout.setObjectName("gridLayout")
self.label_about_dialog_version = QtWidgets.QLabel(aboutDialog)
self.label_about_dialog_version.setScaledContents(True)
self.label_about_dialog_version.setAlignment(QtCore.Qt.AlignCenter)
self.label_about_dialog_version.setWordWrap(False)
self.label_about_dialog_version.setObjectName("label_about_dialog_version")
self.gridLayout.addWidget(self.label_about_dialog_version, 2, 0, 1, 1)
self.label_7 = QtWidgets.QLabel(aboutDialog)
self.label_7.setMinimumSize(QtCore.QSize(131, 51))
self.label_7.setMaximumSize(QtCore.QSize(131, 51))
self.label_7.setText("")
self.label_7.setPixmap(QtGui.QPixmap(":/about-dialog/apd-logo.png"))
self.label_7.setScaledContents(True)
self.label_7.setAlignment(QtCore.Qt.AlignCenter)
self.label_7.setObjectName("label_7")
self.gridLayout.addWidget(self.label_7, 2, 1, 1, 1, QtCore.Qt.AlignHCenter|QtCore.Qt.AlignVCenter)
self.label = QtWidgets.QLabel(aboutDialog)
sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Preferred, QtWidgets.QSizePolicy.Preferred)
sizePolicy.setHorizontalStretch(0)
sizePolicy.setVerticalStretch(0)
sizePolicy.setHeightForWidth(self.label.sizePolicy().hasHeightForWidth())
self.label.setSizePolicy(sizePolicy)
self.label.setMinimumSize(QtCore.QSize(151, 151))
self.label.setMaximumSize(QtCore.QSize(151, 151))
self.label.setText("")
self.label.setPixmap(QtGui.QPixmap(":/about-dialog/list.png"))
self.label.setScaledContents(True)
self.label.setAlignment(QtCore.Qt.AlignCenter)
self.label.setOpenExternalLinks(False)
self.label.setObjectName("label")
self.gridLayout.addWidget(self.label, 0, 0, 1, 1)
spacerItem = QtWidgets.QSpacerItem(20, 20, QtWidgets.QSizePolicy.Minimum, QtWidgets.QSizePolicy.Fixed)
self.gridLayout.addItem(spacerItem, 1, 1, 1, 1)
self.verticalLayout = QtWidgets.QVBoxLayout()
self.verticalLayout.setObjectName("verticalLayout")
self.label_6 = QtWidgets.QLabel(aboutDialog)
sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Preferred, QtWidgets.QSizePolicy.Preferred)
sizePolicy.setHorizontalStretch(0)
sizePolicy.setVerticalStretch(0)
sizePolicy.setHeightForWidth(self.label_6.sizePolicy().hasHeightForWidth())
self.label_6.setSizePolicy(sizePolicy)
self.label_6.setMinimumSize(QtCore.QSize(111, 51))
self.label_6.setMaximumSize(QtCore.QSize(111, 51))
self.label_6.setText("")
self.label_6.setPixmap(QtGui.QPixmap(":/about-dialog/ddb-logo.png"))
self.label_6.setScaledContents(True)
self.label_6.setObjectName("label_6")
self.verticalLayout.addWidget(self.label_6, 0, QtCore.Qt.AlignHCenter|QtCore.Qt.AlignVCenter)
self.gridLayout.addLayout(self.verticalLayout, 3, 1, 1, 1)
self.label_2 = QtWidgets.QLabel(aboutDialog)
self.label_2.setScaledContents(True)
self.label_2.setAlignment(QtCore.Qt.AlignCenter)
self.label_2.setWordWrap(True)
self.label_2.setObjectName("label_2")
self.gridLayout.addWidget(self.label_2, 0, 1, 1, 1)
self.pushButton_opensource_components = QtWidgets.QPushButton(aboutDialog)
self.pushButton_opensource_components.setObjectName("pushButton_opensource_components")
self.gridLayout.addWidget(self.pushButton_opensource_components, 4, 0, 1, 1)
self.label_about_dialog_revision = QtWidgets.QLabel(aboutDialog)
self.label_about_dialog_revision.setAlignment(QtCore.Qt.AlignCenter)
self.label_about_dialog_revision.setObjectName("label_about_dialog_revision")
self.gridLayout.addWidget(self.label_about_dialog_revision, 3, 0, 1, 1)
self.buttonOpenGithub = QtWidgets.QPushButton(aboutDialog)
self.buttonOpenGithub.setEnabled(True)
self.buttonOpenGithub.setAutoDefault(False)
self.buttonOpenGithub.setDefault(False)
self.buttonOpenGithub.setFlat(False)
self.buttonOpenGithub.setObjectName("buttonOpenGithub")
self.gridLayout.addWidget(self.buttonOpenGithub, 5, 0, 1, 1)
self.gridLayout_2.addLayout(self.gridLayout, 1, 0, 1, 1)

self.retranslateUi(aboutDialog)
QtCore.QMetaObject.connectSlotsByName(aboutDialog)

def retranslateUi(self, aboutDialog):
_translate = QtCore.QCoreApplication.translate
aboutDialog.setWindowTitle(_translate("aboutDialog", "Über Data Preparation Tool"))
self.label_about_dialog_version.setText(_translate("aboutDialog", "Version x.y (Pre-Release)"))
self.label_2.setText(_translate("aboutDialog", "<html><head/><body><p><span style=\" font-weight:600;\">Data Preparation Tool</span></p><p>Ein Werkzeug zur Analyse und Aufbereitung von EAD(DDB)-Dateien für die Lieferung an DDB und Archivportal-D. </p><p>Entwickelt und zur Verfügung gestellt durch die DDB-Fachstelle Archiv.</p></body></html>"))
self.pushButton_opensource_components.setText(_translate("aboutDialog", "Open-Source-Komponenten"))
self.label_about_dialog_revision.setText(_translate("aboutDialog", "Revision: f28a591"))
self.buttonOpenGithub.setText(_translate("aboutDialog", "Fork me on Github"))

from gui_components.ui_templates import resources_rc

if __name__ == "__main__":
import sys
app = QtWidgets.QApplication(sys.argv)
aboutDialog = QtWidgets.QDialog()
ui = Ui_aboutDialog()
ui.setupUi(aboutDialog)
aboutDialog.show()
sys.exit(app.exec_())
Loading

0 comments on commit 39c53b4

Please sign in to comment.