From 178fd6a5ebee0c687c47a35bdd78a56d3f20193c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A9ter=20Kir=C3=A1ly?= <pkiraly@gwdg.de>
Date: Tue, 24 Oct 2023 21:14:49 +0200
Subject: [PATCH] Allow to run solr-indexing without an existing collection
 #289

---
 catalogues/k10plus_pica_grouped.sh |  2 +
 index                              | 66 ++++++++++++++++--------------
 2 files changed, 37 insertions(+), 31 deletions(-)

diff --git a/catalogues/k10plus_pica_grouped.sh b/catalogues/k10plus_pica_grouped.sh
index bc39101c1..606bf72fb 100755
--- a/catalogues/k10plus_pica_grouped.sh
+++ b/catalogues/k10plus_pica_grouped.sh
@@ -9,6 +9,8 @@ TYPE_PARAMS="$TYPE_PARAMS --groupListFile src/main/resources/k10plus-libraries-b
 TYPE_PARAMS="$TYPE_PARAMS --ignorableFields 001@,001E,001L,001U,001U,001X,001X,002V,003C,003G,003Z,008G,017N,020F,027D,031B,037I,039V,042@,046G,046T,101@,101E,101U,102D,201E,201U,202D,1...,2..."
 #TYPE_PARAMS="$TYPE_PARAMS --ignorableIssueTypes undefinedField"
 TYPE_PARAMS="$TYPE_PARAMS --allowableRecords base64:"$(echo '002@.0 !~ "^L" && 002@.0 !~ "^..[iktN]" && (002@.0 !~ "^.v" || 021A.a?)' | base64 -w 0)
+TYPE_PARAMS="$TYPE_PARAMS --solrForScoresUrl http://localhost:8983/solr/k10plus_pica_grouped_validation"
+TYPE_PARAMS="$TYPE_PARAMS --indexWithTokenizedField"
 # MASK=sample.pica
 MASK=${MASK:=pica-with-holdings-info-1K.dat} # if not set in setdir.sh
 
diff --git a/index b/index
index 5af14d506..e8f913a7a 100755
--- a/index
+++ b/index
@@ -45,6 +45,7 @@ usage:
  -J, --groupBy          group the results by the value of this data element (e.g. the ILN of  library)
  -B, --validationCore   the Solr collection used in the validation task
  -t, --outputDir        the directory to write the file listing the parameters
+ -3, --groupListFile    the file which contains a list of ILN codes
  -C, --indexWithTokenizedField
  -s, --status           status information
  -p, --purge            delete all records from a core
@@ -75,36 +76,39 @@ ignorableFields=""
 groupBy=""
 validationCore=""
 outputDir=""
-indexWithTokenizedField=""
+PARAMS=""
 
-GETOPT=$(getopt -o b:p:m:ws::xard:hSpv:l:i:g:A:F:f:z:J:B:t:C: \
-  --long db:,file-path:,file-mask:,no-delete,solrFieldType:,marcxml,alephseq,trimId,defaultRecordType,help,status,purge,marcVersion:,limit:,ignorableRecords:,defaultEncoding:,alephseqLineType:,schemaType:,marcFormat:,ignorableFields:,groupBy:,validationCore:,outputDir:,outputDir,indexWithTokenizedField \
+GETOPT=$(getopt -o b:p:m:ws::xard:hSpv:l:i:g:A:F:f:z:J:B:t:C:3:c:4: \
+  --long db:,file-path:,file-mask:,no-delete,solrFieldType:,marcxml,alephseq,trimId,defaultRecordType,help,status,purge,marcVersion:,limit:,ignorableRecords:,defaultEncoding:,alephseqLineType:,schemaType:,marcFormat:,ignorableFields:,groupBy:,validationCore:,outputDir:,outputDir,indexWithTokenizedField,groupListFile:,allowableRecords:,solrForScoresUrl: \
   -n ${ME} -- "$@")
 eval set -- "$GETOPT"
 
 while true ; do
   case "$1" in
-    -b|--db) DB=$2 ; shift 2;;
-    -p|--file-path) FILE_PATH=$2 ; shift 2;;
-    -m|--file-mask) FILE_MASK=$2 ; shift 2;;
-    -w|--no-delete) DELETE=0 ; shift;;
-    -s|--solrFieldType) solrFieldType=$2 ; shift 2;;
-    -d|--defaultRecordType) defaultRecordType=$2 ; shift 2;;
-    -v|--marcVersion) marcVersion=$2 ; shift 2;;
-    -l|--limit) limit="--limit $2"; shift 2;;
-    -i|--ignorableRecords) ignorableRecords="--ignorableRecords $2"; shift 2;;
-    -x|--marcxml) marcxml="--marcxml" ; shift;;
-    -a|--alephseq) alephseq="--alephseq" ; shift;;
-    -r|--trimId) trimId="--trimId" ; shift;;
-    -g|--defaultEncoding) defaultEncoding="--defaultEncoding $2" ; shift 2;;
-    -A|--alephseqLineType) alephseqLineType="--alephseqLineType $2" ; shift 2;;
-    -F|--schemaType) schemaType="--schemaType $2" ; shift 2;;
-    -f|--marcFormat) marcFormat="--marcFormat $2" ; shift 2;;
-    -z|--ignorableFields) ignorableFields="--ignorableFields $2" ; shift 2;;
-    -J|--groupBy) groupBy="--groupBy $2" ; shift 2;;
-    -B|--validationCore) validationCore=$2 ; shift 2;;
-    -t|--outputDir) outputDir="--outputDir $2" ; shift 2;;
-    -C|--indexWithTokenizedField) indexWithTokenizedField="--indexWithTokenizedField" ; shift ;;
+    -b|--db) DB=$2 ; shift 2 ;;
+    -p|--file-path) FILE_PATH=$2 ; shift 2 ;;
+    -m|--file-mask) FILE_MASK=$2 ; shift 2 ;;
+    -w|--no-delete) DELETE=0 ; shift ;;
+    -s|--solrFieldType) solrFieldType=$2 ; shift 2 ;;
+    -d|--defaultRecordType) defaultRecordType=$2 ; shift 2 ;;
+    -v|--marcVersion) marcVersion=$2 ; shift 2 ;;
+    -l|--limit) limit="--limit $2"; shift 2 ;;
+    -i|--ignorableRecords) ignorableRecords="--ignorableRecords $2"; shift 2 ;;
+    -x|--marcxml) marcxml="--marcxml" ; shift ;;
+    -a|--alephseq) alephseq="--alephseq" ; shift ;;
+    -r|--trimId) trimId="--trimId" ; shift ;;
+    -g|--defaultEncoding) defaultEncoding="--defaultEncoding $2" ; shift 2 ;;
+    -A|--alephseqLineType) alephseqLineType="--alephseqLineType $2" ; shift 2 ;;
+    -F|--schemaType) schemaType="--schemaType $2" ; shift 2 ;;
+    -f|--marcFormat) marcFormat="--marcFormat $2" ; shift 2 ;;
+    -z|--ignorableFields) ignorableFields="--ignorableFields $2" ; shift 2 ;;
+    -J|--groupBy) groupBy="--groupBy $2" ; shift 2 ;;
+    -B|--validationCore) validationCore=$2 ; shift 2 ;;
+    -t|--outputDir) outputDir="--outputDir $2" ; shift 2 ;;
+    -3|--groupListFile) PARAMS="$PARAMS --groupListFile $2" ; shift 2 ;;
+    -c|--allowableRecords) PARAMS="$PARAMS --allowableRecords $2" ; shift 2 ;;
+    -C|--indexWithTokenizedField) PARAMS="$PARAMS --indexWithTokenizedField" ; shift ;;
+    -4|--solrForScoresUrl) PARAMS="$PARAMS --solrForScoresUrl $2" ; shift 2 ;;
     -S|--status) status ; shift ;;
     -p|--purge) purge_and_exit $DB ; shift ;;
     -h|--help) show_usage ; shift ;;
@@ -120,11 +124,11 @@ CORE=${DB}_dev
 
 SOLR_DB_URL="${SOLR_HOST}/solr/${CORE}"
 
-if [[ "${validationCore}" != "" ]]; then
-  VALIDATION_PARAMS="--validationUrl ${SOLR_HOST}/solr/${validationCore}"
-else
-  VALIDATION_PARAMS=""
-fi
+# if [[ "${validationCore}" != "" ]]; then
+#   VALIDATION_PARAMS="--validationUrl ${SOLR_HOST}/solr/${validationCore}"
+# else
+#   VALIDATION_PARAMS=""
+# fi
 
 echo "SOLR URL: $SOLR_DB_URL"
 
@@ -161,7 +165,7 @@ running the command
   $ignorableFields \
   $groupBy \
   $outputDir \
-  $indexWithTokenizedField \
+  $PARAMS \
   ${FILE_PATH}/${FILE_MASK}
 ---END
 EOT
@@ -172,7 +176,7 @@ EOT
   --marcVersion $marcVersion \
   ${VALIDATION_PARAMS} \
   $limit $trimId $marcxml $alephseq $ignorableRecords $defaultEncoding $alephseqLineType $schemaType \
-  $marcFormat $ignorableFields $groupBy $outputDir $indexWithTokenizedField \
+  $marcFormat $ignorableFields $groupBy $outputDir $PARAMS \
   ${FILE_PATH}/${FILE_MASK}
 
 # echo "Start optimizing"