From 178fd6a5ebee0c687c47a35bdd78a56d3f20193c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A9ter=20Kir=C3=A1ly?= Date: Tue, 24 Oct 2023 21:14:49 +0200 Subject: [PATCH] Allow to run solr-indexing without an existing collection #289 --- catalogues/k10plus_pica_grouped.sh | 2 + index | 66 ++++++++++++++++-------------- 2 files changed, 37 insertions(+), 31 deletions(-) diff --git a/catalogues/k10plus_pica_grouped.sh b/catalogues/k10plus_pica_grouped.sh index bc39101c1..606bf72fb 100755 --- a/catalogues/k10plus_pica_grouped.sh +++ b/catalogues/k10plus_pica_grouped.sh @@ -9,6 +9,8 @@ TYPE_PARAMS="$TYPE_PARAMS --groupListFile src/main/resources/k10plus-libraries-b TYPE_PARAMS="$TYPE_PARAMS --ignorableFields 001@,001E,001L,001U,001U,001X,001X,002V,003C,003G,003Z,008G,017N,020F,027D,031B,037I,039V,042@,046G,046T,101@,101E,101U,102D,201E,201U,202D,1...,2..." #TYPE_PARAMS="$TYPE_PARAMS --ignorableIssueTypes undefinedField" TYPE_PARAMS="$TYPE_PARAMS --allowableRecords base64:"$(echo '002@.0 !~ "^L" && 002@.0 !~ "^..[iktN]" && (002@.0 !~ "^.v" || 021A.a?)' | base64 -w 0) +TYPE_PARAMS="$TYPE_PARAMS --solrForScoresUrl http://localhost:8983/solr/k10plus_pica_grouped_validation" +TYPE_PARAMS="$TYPE_PARAMS --indexWithTokenizedField" # MASK=sample.pica MASK=${MASK:=pica-with-holdings-info-1K.dat} # if not set in setdir.sh diff --git a/index b/index index 5af14d506..e8f913a7a 100755 --- a/index +++ b/index @@ -45,6 +45,7 @@ usage: -J, --groupBy group the results by the value of this data element (e.g. the ILN of library) -B, --validationCore the Solr collection used in the validation task -t, --outputDir the directory to write the file listing the parameters + -3, --groupListFile the file which contains a list of ILN codes -C, --indexWithTokenizedField -s, --status status information -p, --purge delete all records from a core @@ -75,36 +76,39 @@ ignorableFields="" groupBy="" validationCore="" outputDir="" -indexWithTokenizedField="" +PARAMS="" -GETOPT=$(getopt -o b:p:m:ws::xard:hSpv:l:i:g:A:F:f:z:J:B:t:C: \ - --long db:,file-path:,file-mask:,no-delete,solrFieldType:,marcxml,alephseq,trimId,defaultRecordType,help,status,purge,marcVersion:,limit:,ignorableRecords:,defaultEncoding:,alephseqLineType:,schemaType:,marcFormat:,ignorableFields:,groupBy:,validationCore:,outputDir:,outputDir,indexWithTokenizedField \ +GETOPT=$(getopt -o b:p:m:ws::xard:hSpv:l:i:g:A:F:f:z:J:B:t:C:3:c:4: \ + --long db:,file-path:,file-mask:,no-delete,solrFieldType:,marcxml,alephseq,trimId,defaultRecordType,help,status,purge,marcVersion:,limit:,ignorableRecords:,defaultEncoding:,alephseqLineType:,schemaType:,marcFormat:,ignorableFields:,groupBy:,validationCore:,outputDir:,outputDir,indexWithTokenizedField,groupListFile:,allowableRecords:,solrForScoresUrl: \ -n ${ME} -- "$@") eval set -- "$GETOPT" while true ; do case "$1" in - -b|--db) DB=$2 ; shift 2;; - -p|--file-path) FILE_PATH=$2 ; shift 2;; - -m|--file-mask) FILE_MASK=$2 ; shift 2;; - -w|--no-delete) DELETE=0 ; shift;; - -s|--solrFieldType) solrFieldType=$2 ; shift 2;; - -d|--defaultRecordType) defaultRecordType=$2 ; shift 2;; - -v|--marcVersion) marcVersion=$2 ; shift 2;; - -l|--limit) limit="--limit $2"; shift 2;; - -i|--ignorableRecords) ignorableRecords="--ignorableRecords $2"; shift 2;; - -x|--marcxml) marcxml="--marcxml" ; shift;; - -a|--alephseq) alephseq="--alephseq" ; shift;; - -r|--trimId) trimId="--trimId" ; shift;; - -g|--defaultEncoding) defaultEncoding="--defaultEncoding $2" ; shift 2;; - -A|--alephseqLineType) alephseqLineType="--alephseqLineType $2" ; shift 2;; - -F|--schemaType) schemaType="--schemaType $2" ; shift 2;; - -f|--marcFormat) marcFormat="--marcFormat $2" ; shift 2;; - -z|--ignorableFields) ignorableFields="--ignorableFields $2" ; shift 2;; - -J|--groupBy) groupBy="--groupBy $2" ; shift 2;; - -B|--validationCore) validationCore=$2 ; shift 2;; - -t|--outputDir) outputDir="--outputDir $2" ; shift 2;; - -C|--indexWithTokenizedField) indexWithTokenizedField="--indexWithTokenizedField" ; shift ;; + -b|--db) DB=$2 ; shift 2 ;; + -p|--file-path) FILE_PATH=$2 ; shift 2 ;; + -m|--file-mask) FILE_MASK=$2 ; shift 2 ;; + -w|--no-delete) DELETE=0 ; shift ;; + -s|--solrFieldType) solrFieldType=$2 ; shift 2 ;; + -d|--defaultRecordType) defaultRecordType=$2 ; shift 2 ;; + -v|--marcVersion) marcVersion=$2 ; shift 2 ;; + -l|--limit) limit="--limit $2"; shift 2 ;; + -i|--ignorableRecords) ignorableRecords="--ignorableRecords $2"; shift 2 ;; + -x|--marcxml) marcxml="--marcxml" ; shift ;; + -a|--alephseq) alephseq="--alephseq" ; shift ;; + -r|--trimId) trimId="--trimId" ; shift ;; + -g|--defaultEncoding) defaultEncoding="--defaultEncoding $2" ; shift 2 ;; + -A|--alephseqLineType) alephseqLineType="--alephseqLineType $2" ; shift 2 ;; + -F|--schemaType) schemaType="--schemaType $2" ; shift 2 ;; + -f|--marcFormat) marcFormat="--marcFormat $2" ; shift 2 ;; + -z|--ignorableFields) ignorableFields="--ignorableFields $2" ; shift 2 ;; + -J|--groupBy) groupBy="--groupBy $2" ; shift 2 ;; + -B|--validationCore) validationCore=$2 ; shift 2 ;; + -t|--outputDir) outputDir="--outputDir $2" ; shift 2 ;; + -3|--groupListFile) PARAMS="$PARAMS --groupListFile $2" ; shift 2 ;; + -c|--allowableRecords) PARAMS="$PARAMS --allowableRecords $2" ; shift 2 ;; + -C|--indexWithTokenizedField) PARAMS="$PARAMS --indexWithTokenizedField" ; shift ;; + -4|--solrForScoresUrl) PARAMS="$PARAMS --solrForScoresUrl $2" ; shift 2 ;; -S|--status) status ; shift ;; -p|--purge) purge_and_exit $DB ; shift ;; -h|--help) show_usage ; shift ;; @@ -120,11 +124,11 @@ CORE=${DB}_dev SOLR_DB_URL="${SOLR_HOST}/solr/${CORE}" -if [[ "${validationCore}" != "" ]]; then - VALIDATION_PARAMS="--validationUrl ${SOLR_HOST}/solr/${validationCore}" -else - VALIDATION_PARAMS="" -fi +# if [[ "${validationCore}" != "" ]]; then +# VALIDATION_PARAMS="--validationUrl ${SOLR_HOST}/solr/${validationCore}" +# else +# VALIDATION_PARAMS="" +# fi echo "SOLR URL: $SOLR_DB_URL" @@ -161,7 +165,7 @@ running the command $ignorableFields \ $groupBy \ $outputDir \ - $indexWithTokenizedField \ + $PARAMS \ ${FILE_PATH}/${FILE_MASK} ---END EOT @@ -172,7 +176,7 @@ EOT --marcVersion $marcVersion \ ${VALIDATION_PARAMS} \ $limit $trimId $marcxml $alephseq $ignorableRecords $defaultEncoding $alephseqLineType $schemaType \ - $marcFormat $ignorableFields $groupBy $outputDir $indexWithTokenizedField \ + $marcFormat $ignorableFields $groupBy $outputDir $PARAMS \ ${FILE_PATH}/${FILE_MASK} # echo "Start optimizing"