diff --git a/README.md b/README.md index 097eb98ce..7d00fcf7b 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,8 @@ # QA catalogue
a metadata quality assessment tool for library catalogue records (MARC, PICA) +[![Java CI with Maven](https://github.com/pkiraly/qa-catalogue/actions/workflows/maven.yml/badge.svg)](https://github.com/pkiraly/qa-catalogue/actions/workflows/maven.yml) +[![Coverage Status](https://coveralls.io/repos/github/pkiraly/metadata-qa-marc/badge.svg?branch=main)](https://coveralls.io/github/pkiraly/metadata-qa-marc?branch=main) + QA catalogue is a set of software packages for bibliographical record quality assessment. It reads MARC or PICA files (in different formats), analyses some quality dimensions, and saves the results into CSV files. These CSV files could be used in different context, we provide a lightweight, web-based [user interface](#user-interface) for that. Some of the functionalities are available as a [web service](https://github.com/pkiraly/metadata-qa-marc-ws), so the validation could be built into a cataloguing/quality assessment workflow. ![Output sample](https://github.com/pkiraly/metadata-qa-marc-web/raw/gh-pages/img/issues-v1.gif) @@ -209,11 +212,11 @@ If you do not want to #### run ```bash -catalogues/[your script] [command] +catalogues/[your script] [command(s)] ``` or ```bash -./qa-catalogue --params="[options]" [command] +./qa-catalogue --params="[options]" [command(s)] ``` The following commands are supported: diff --git a/common-script b/common-script index 2ee21cbe7..b291c8500 100755 --- a/common-script +++ b/common-script @@ -180,11 +180,11 @@ do_network_analysis() { untrace - cat network-pairs.csv | sort | uniq -c | sort -nr > network-pairs-uniq-with-count.csv + sort network-pairs.csv | uniq -c | sort -nr > network-pairs-uniq-with-count.csv awk '{print $2 " " $3}' network-pairs-uniq-with-count.csv > network-pairs-all.csv log "ziping output" - PWD=`pdw` + PWD=$(pdw) cd ${OUTPUT_DIR} zip network-input network-nodes.csv network-nodes-???.csv network-pairs-???.csv network-by-concepts-tags.csv cd $PWD @@ -453,8 +453,8 @@ EOF } do_all_analyses() { - tasks=$(echo "${ANALYSES}" | tr , ' ') - for task in $tasks; do + analysis_tasks=$(echo "${ANALYSES}" | tr , ' ') + for task in $analysis_tasks; do declare -F "do_$task" > /dev/null || fatal "unknown analysis task: $task" done for task in $(echo "${ANALYSES}" | tr , ' '); do @@ -482,7 +482,7 @@ ME=$0 cat < + ${ME} [VARIABLES] Commands: validate record validation @@ -538,7 +538,6 @@ Environmental variables: more info: https://github.com/pkiraly/qa-catalogue END - exit 1 } config() { @@ -587,10 +586,18 @@ else fi ANALYSES=${ANALYSES:-$ALL_ANALYSES} -# check directories for processing commands -if [[ ! "${1:-help}" =~ ^(help|config|export-schema-files)$ ]]; then - cmd=$1 +tasks="${1:-help}" +datatask= +# Check whether data is going to be processed +for task in ${tasks//,/ }; do + if [[ ! "$task" =~ ^(help|config|export-schema-files)$ ]]; then + datatask=true + fi +done + +# check directories for processing commands +if [[ "$datatask" = true ]]; then mkdir -p $PREFIX mkdir -p $OUTPUT_DIR @@ -600,47 +607,49 @@ if [[ ! "${1:-help}" =~ ^(help|config|export-schema-files)$ ]]; then ls ${MARC_DIR}/${MASK} &> /dev/null || fatal "Missing input files: ${MARC_DIR}/${MASK}!\n" - if [[ ! -z "${UPDATE:-}" ]]; then + if [[ -n "${UPDATE:-}" ]]; then log "update: $UPDATE" echo "${UPDATE}" > "${OUTPUT_DIR}/last-update.csv" fi fi -case "${1:-help}" in - validate) do_validate ; do_validate_sqlite ;; - validate-sqlite) do_validate_sqlite ;; - prepare-solr) do_prepare_solr ;; - index) do_index ;; - postprocess_solr) do_postprocess_solr ;; - completeness) do_completeness ; do_completeness_sqlite ;; - completeness-sqlite) do_completeness_sqlite ;; - classifications) do_classifications ;; - authorities) do_authorities ;; - tt-completeness) do_tt_completeness ;; - shelf-ready-completeness) do_shelf_ready_completeness ;; - bl-classification) do_bl_classification ;; - serial-score) do_serial_score ;; - format) do_format ;; - functional-analysis) do_functional_analysis ;; - network-analysis) do_network_analysis ;; - pareto) do_pareto ;; - marc-history) do_marc_history ;; - record-patterns) do_record_patterns ;; - mysql) do_mysql ;; - export-schema-files) do_export_schema_files ;; - shacl4bib) do_shacl4bib ;; - all-analyses) do_all_analyses ;; - all-solr) do_all_solr ;; - all) do_all_analyses ; do_all_solr ;; - version-link) do_version_link ;; - config) config ;; - help) help ;; - *) fatal "unknown command: $1" -esac +for task in ${tasks//,/ }; do + case $task in + validate) do_validate ; do_validate_sqlite ;; + validate-sqlite) do_validate_sqlite ;; + prepare-solr) do_prepare_solr ;; + index) do_index ;; + postprocess_solr) do_postprocess_solr ;; + completeness) do_completeness ; do_completeness_sqlite ;; + completeness-sqlite) do_completeness_sqlite ;; + classifications) do_classifications ;; + authorities) do_authorities ;; + tt-completeness) do_tt_completeness ;; + shelf-ready-completeness) do_shelf_ready_completeness ;; + bl-classification) do_bl_classification ;; + serial-score) do_serial_score ;; + format) do_format ;; + functional-analysis) do_functional_analysis ;; + network-analysis) do_network_analysis ;; + pareto) do_pareto ;; + marc-history) do_marc_history ;; + record-patterns) do_record_patterns ;; + mysql) do_mysql ;; + export-schema-files) do_export_schema_files ;; + shacl4bib) do_shacl4bib ;; + all-analyses) do_all_analyses ;; + all-solr) do_all_solr ;; + all) do_all_analyses ; do_all_solr ;; + version-link) do_version_link ;; + config) config ;; + help) help ;; + *) fatal "unknown command: $1" + esac +done untrace -if [ ! -z "${cmd:-}" ]; then +if [[ "$datatask" = true ]]; then sec=$SECONDS log "DONE in $(printf '%02d:%02d:%02d\n' $((sec/3600)) $((sec%3600/60)) $((sec%60)))" fi diff --git a/qa-catalogue b/qa-catalogue index c0c8a3148..e7fce8fcc 100755 --- a/qa-catalogue +++ b/qa-catalogue @@ -3,7 +3,7 @@ set -ueo pipefail usage() { cat << EOF -Usage: $0 [options] +Usage: $0 [options] QA catalogue for analysing library data