Skip to content

Commit

Permalink
Merge branch 'main' of github.com:pkiraly/qa-catalogue
Browse files Browse the repository at this point in the history
  • Loading branch information
pkiraly committed Nov 1, 2023
2 parents 1207a33 + ab53d93 commit c57a545
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 45 deletions.
7 changes: 5 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# QA catalogue<br/>a metadata quality assessment tool for library catalogue records (MARC, PICA)

[![Java CI with Maven](https://github.com/pkiraly/qa-catalogue/actions/workflows/maven.yml/badge.svg)](https://github.com/pkiraly/qa-catalogue/actions/workflows/maven.yml)
[![Coverage Status](https://coveralls.io/repos/github/pkiraly/metadata-qa-marc/badge.svg?branch=main)](https://coveralls.io/github/pkiraly/metadata-qa-marc?branch=main)

QA catalogue is a set of software packages for bibliographical record quality assessment. It reads MARC or PICA files (in different formats), analyses some quality dimensions, and saves the results into CSV files. These CSV files could be used in different context, we provide a lightweight, web-based [user interface](#user-interface) for that. Some of the functionalities are available as a [web service](https://github.com/pkiraly/metadata-qa-marc-ws), so the validation could be built into a cataloguing/quality assessment workflow.

![Output sample](https://github.com/pkiraly/metadata-qa-marc-web/raw/gh-pages/img/issues-v1.gif)
Expand Down Expand Up @@ -209,11 +212,11 @@ If you do not want to
#### run

```bash
catalogues/[your script] [command]
catalogues/[your script] [command(s)]
```
or
```bash
./qa-catalogue --params="[options]" [command]
./qa-catalogue --params="[options]" [command(s)]
```

The following commands are supported:
Expand Down
93 changes: 51 additions & 42 deletions common-script
Original file line number Diff line number Diff line change
Expand Up @@ -180,11 +180,11 @@ do_network_analysis() {

untrace

cat network-pairs.csv | sort | uniq -c | sort -nr > network-pairs-uniq-with-count.csv
sort network-pairs.csv | uniq -c | sort -nr > network-pairs-uniq-with-count.csv
awk '{print $2 " " $3}' network-pairs-uniq-with-count.csv > network-pairs-all.csv

log "ziping output"
PWD=`pdw`
PWD=$(pdw)
cd ${OUTPUT_DIR}
zip network-input network-nodes.csv network-nodes-???.csv network-pairs-???.csv network-by-concepts-tags.csv
cd $PWD
Expand Down Expand Up @@ -453,8 +453,8 @@ EOF
}

do_all_analyses() {
tasks=$(echo "${ANALYSES}" | tr , ' ')
for task in $tasks; do
analysis_tasks=$(echo "${ANALYSES}" | tr , ' ')
for task in $analysis_tasks; do
declare -F "do_$task" > /dev/null || fatal "unknown analysis task: $task"
done
for task in $(echo "${ANALYSES}" | tr , ' '); do
Expand Down Expand Up @@ -482,7 +482,7 @@ ME=$0
cat <<END
Run QA catalogue analyses
${ME} [VARIABLES] <COMMAND>
${ME} [VARIABLES] <COMMAND[,COMMAND...]>
Commands:
validate record validation
Expand Down Expand Up @@ -538,7 +538,6 @@ Environmental variables:
more info: https://github.com/pkiraly/qa-catalogue
END
exit 1
}

config() {
Expand Down Expand Up @@ -587,10 +586,18 @@ else
fi
ANALYSES=${ANALYSES:-$ALL_ANALYSES}

# check directories for processing commands
if [[ ! "${1:-help}" =~ ^(help|config|export-schema-files)$ ]]; then
cmd=$1
tasks="${1:-help}"
datatask=

# Check whether data is going to be processed
for task in ${tasks//,/ }; do
if [[ ! "$task" =~ ^(help|config|export-schema-files)$ ]]; then
datatask=true
fi
done

# check directories for processing commands
if [[ "$datatask" = true ]]; then
mkdir -p $PREFIX
mkdir -p $OUTPUT_DIR

Expand All @@ -600,47 +607,49 @@ if [[ ! "${1:-help}" =~ ^(help|config|export-schema-files)$ ]]; then

ls ${MARC_DIR}/${MASK} &> /dev/null || fatal "Missing input files: ${MARC_DIR}/${MASK}!\n"

if [[ ! -z "${UPDATE:-}" ]]; then
if [[ -n "${UPDATE:-}" ]]; then
log "update: $UPDATE"
echo "${UPDATE}" > "${OUTPUT_DIR}/last-update.csv"
fi
fi

case "${1:-help}" in
validate) do_validate ; do_validate_sqlite ;;
validate-sqlite) do_validate_sqlite ;;
prepare-solr) do_prepare_solr ;;
index) do_index ;;
postprocess_solr) do_postprocess_solr ;;
completeness) do_completeness ; do_completeness_sqlite ;;
completeness-sqlite) do_completeness_sqlite ;;
classifications) do_classifications ;;
authorities) do_authorities ;;
tt-completeness) do_tt_completeness ;;
shelf-ready-completeness) do_shelf_ready_completeness ;;
bl-classification) do_bl_classification ;;
serial-score) do_serial_score ;;
format) do_format ;;
functional-analysis) do_functional_analysis ;;
network-analysis) do_network_analysis ;;
pareto) do_pareto ;;
marc-history) do_marc_history ;;
record-patterns) do_record_patterns ;;
mysql) do_mysql ;;
export-schema-files) do_export_schema_files ;;
shacl4bib) do_shacl4bib ;;
all-analyses) do_all_analyses ;;
all-solr) do_all_solr ;;
all) do_all_analyses ; do_all_solr ;;
version-link) do_version_link ;;
config) config ;;
help) help ;;
*) fatal "unknown command: $1"
esac
for task in ${tasks//,/ }; do
case $task in
validate) do_validate ; do_validate_sqlite ;;
validate-sqlite) do_validate_sqlite ;;
prepare-solr) do_prepare_solr ;;
index) do_index ;;
postprocess_solr) do_postprocess_solr ;;
completeness) do_completeness ; do_completeness_sqlite ;;
completeness-sqlite) do_completeness_sqlite ;;
classifications) do_classifications ;;
authorities) do_authorities ;;
tt-completeness) do_tt_completeness ;;
shelf-ready-completeness) do_shelf_ready_completeness ;;
bl-classification) do_bl_classification ;;
serial-score) do_serial_score ;;
format) do_format ;;
functional-analysis) do_functional_analysis ;;
network-analysis) do_network_analysis ;;
pareto) do_pareto ;;
marc-history) do_marc_history ;;
record-patterns) do_record_patterns ;;
mysql) do_mysql ;;
export-schema-files) do_export_schema_files ;;
shacl4bib) do_shacl4bib ;;
all-analyses) do_all_analyses ;;
all-solr) do_all_solr ;;
all) do_all_analyses ; do_all_solr ;;
version-link) do_version_link ;;
config) config ;;
help) help ;;
*) fatal "unknown command: $1"
esac
done

untrace

if [ ! -z "${cmd:-}" ]; then
if [[ "$datatask" = true ]]; then
sec=$SECONDS
log "DONE in $(printf '%02d:%02d:%02d\n' $((sec/3600)) $((sec%3600/60)) $((sec%60)))"
fi
2 changes: 1 addition & 1 deletion qa-catalogue
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ set -ueo pipefail

usage() {
cat << EOF
Usage: $0 [options] <command>
Usage: $0 [options] <command[,command...]>
QA catalogue for analysing library data
Expand Down

0 comments on commit c57a545

Please sign in to comment.