From bb87a69fb0da253f8802f42a2d44a581a7340769 Mon Sep 17 00:00:00 2001 From: Winona Salesky Date: Thu, 1 Oct 2020 14:01:16 -0400 Subject: [PATCH] Update browse. Use Lucene fields to precompute browse title and authors. --- modules/lib/browse.xqm | 2 +- modules/lib/data.xqm | 158 ++++++++++++++++++++++------------------- modules/lib/facets.xql | 75 +++++++++++++++---- search-config.xml | 8 ++- 4 files changed, 153 insertions(+), 90 deletions(-) diff --git a/modules/lib/browse.xqm b/modules/lib/browse.xqm index bc43246f8..306151732 100644 --- a/modules/lib/browse.xqm +++ b/modules/lib/browse.xqm @@ -38,7 +38,7 @@ declare variable $browse:perpage {request:get-parameter('perpage', 25) cast as x : @param $facets facet xml file name, relative to collection directory :) declare function browse:get-all($node as node(), $model as map(*), $collection as xs:string*, $element as xs:string?, $facets as xs:string?){ - let $collectionPath := + let $collectionPath := if(config:collection-vars($collection)/@data-root != '') then concat('/',config:collection-vars($collection)/@data-root) else if($collection != '') then concat('/',$collection) else () diff --git a/modules/lib/data.xqm b/modules/lib/data.xqm index 15bab5924..8aed480d0 100644 --- a/modules/lib/data.xqm +++ b/modules/lib/data.xqm @@ -128,85 +128,93 @@ declare function data:get-records($collection as xs:string*, $element as xs:stri let $sort := if(request:get-parameter('sort', '') != '') then request:get-parameter('sort', '') else if(request:get-parameter('sort-element', '') != '') then request:get-parameter('sort-element', '') - else () + else () let $collection-path := - if(config:collection-vars($collection)/@data-root != '') then concat('/',config:collection-vars($collection)/@data-root) - else if($collection != '') then concat('/',$collection) - else () + if(config:collection-vars($collection)/@data-root != '') then concat('/',config:collection-vars($collection)/@data-root) + else if($collection != '') then concat('/',$collection) + else () let $get-series-idno := if(config:collection-vars($collection)/@collection-URI != '') then string(config:collection-vars($collection)/@collection-URI) else () - (: let $hits := - concat("collection(",$config:data-root,$collection-path,")//tei:title[@level='m'][. = '", config:collection-vars($collection)/@title,"']/ancestor::tei:TEI/") - :) - let $hits := if($collection = 'bhse' or $collection = 'bible') then - data:sort-element(collection($config:data-root || $collection-path)//tei:title[@level='m'][. = config:collection-vars($collection)/@title]/ancestor::tei:TEI, $element, request:get-parameter('lang', '')) - else if($collection = 'nhsl') then - data:sort-element(collection($config:data-root || $collection-path)//tei:TEI, $element, request:get-parameter('lang', '')) - else if($get-series-idno != '') then - data:sort-element(collection($config:data-root || $collection-path)//tei:idno[. = $get-series-idno][ancestor::tei:seriesStmt]/ancestor::tei:TEI, $element, request:get-parameter('lang', '')) - else data:sort-element(collection($config:data-root || $collection-path)//tei:TEI, $element, request:get-parameter('lang', '')) - return - - (: Bibl browse :) - if($collection = 'bibl' and not(request:get-parameter('view', ''))) then - for $hit in $hits[matches(.,'\p{IsBasicLatin}|\p{IsLatin-1Supplement}|\p{IsLatinExtended-A}|\p{IsLatinExtended-B}','i')] - let $root := $hit/ancestor-or-self::tei:TEI - where $hit[matches(substring(global:build-sort-string(.,''),1,1),global:get-alpha-filter(),'i')] - order by global:build-sort-string(data:add-sort-options-bibl($root, request:get-parameter('sort-element', '')),'') collation 'http://www.w3.org/2013/collation/UCA' - return $root - else if(request:get-parameter('view', '') = 'A-Z') then - for $hit in $hits[matches(.,'\p{IsBasicLatin}|\p{IsLatin-1Supplement}|\p{IsLatinExtended-A}|\p{IsLatinExtended-B}','i')] - let $root := $hit/ancestor-or-self::tei:TEI - let $sort := global:build-sort-string(data:add-sort-options-bibl($root, request:get-parameter('sort-element', '')),'') - where $hit[matches(substring(global:build-sort-string($root,''),1,1),global:get-alpha-filter(),'i')] - order by $sort collation 'http://www.w3.org/2013/collation/UCA' - return $root - else if(request:get-parameter('view', '') = 'ܐ-ܬ') then - for $hit in $hits[matches(.,'\p{IsSyriac}','i')] - let $root := $hit/ancestor-or-self::tei:TEI - order by global:build-sort-string(data:add-sort-options-bibl($root, request:get-parameter('sort-element', '')),'') collation 'http://www.w3.org/2013/collation/UCA' - return $root - else if(request:get-parameter('view', '') = 'ا-ي') then - for $hit in $hits[matches(.,'\p{IsArabic}','i')] - let $root := $hit/ancestor-or-self::tei:TEI - order by global:build-sort-string(data:add-sort-options-bibl($root, request:get-parameter('sort-element', '')),'ar') collation 'http://www.w3.org/2013/collation/UCA' - return $root - else if(request:get-parameter('view', '') = 'other') then - for $hit in $hits[not(matches(substring(global:build-sort-string(.,''),1,1),'\p{IsSyriac}|\p{IsArabic}|\p{IsBasicLatin}|\p{IsLatin-1Supplement}|\p{IsLatinExtended-A}|\p{IsLatinExtended-B}|\p{IsLatinExtendedAdditional}','i'))] - let $root := $hit/ancestor-or-self::tei:TEI - order by global:build-sort-string(data:add-sort-options-bibl($root, request:get-parameter('sort-element', '')),'') collation 'http://www.w3.org/2013/collation/UCA' - return $root - else if(request:get-parameter('view', '') = 'all') then - for $hit in $hits - let $root := $hit/ancestor-or-self::tei:TEI - order by global:build-sort-string(data:add-sort-options-bibl($root, request:get-parameter('sort-element', '')),'') collation 'http://www.w3.org/2013/collation/UCA' - return $root - else if(request:get-parameter('alpha-filter', '') = ('ALL','all')) then - for $hit in $hits - let $root := $hit/ancestor-or-self::tei:TEI - let $sort := global:build-sort-string($hit,'') - let $id := $root/descendant::tei:publicationStmt/tei:idno[1] - group by $facet-grp := $id - order by $sort[1] collation 'http://www.w3.org/2013/collation/UCA' - return $root[1] - else if(request:get-parameter('alpha-filter', '') != '') then - for $hit in $hits - let $root := $hit/ancestor-or-self::tei:TEI - let $sort := global:build-sort-string($hit,'') - (:let $id := $root/descendant::tei:publicationStmt/tei:idno[1] - group by $facet-grp := $id:) - order by $sort collation 'http://www.w3.org/2013/collation/UCA' - where matches($sort,global:get-alpha-filter()) - return $root - else - for $hit in $hits - let $root := $hit/ancestor-or-self::tei:TEI - let $sort := global:build-sort-string($hit,'') - (:let $id := $root/descendant::tei:publicationStmt/tei:idno[1] - group by $facet-grp := $id:) - order by $sort collation 'http://www.w3.org/2013/collation/UCA' - return $root + let $eval-string := concat(data:build-collection-path($collection),'//tei:body[ft:query(., (),map { "fields": ("title","titleSyriac","titleArabic", "author") })]',data:element-filter($element)) + let $hits := util:eval($eval-string) + return + if($collection = 'bibl' and not(request:get-parameter('view', ''))) then + for $hit in $hits + let $root := $hit/ancestor-or-self::tei:TEI + let $s := + if(contains($sort, 'author')) then ft:field($hit, "author")[1] + else if(request:get-parameter('lang', '') = 'syr') then ft:field($hit, "titleSyriac")[1] + else if(request:get-parameter('lang', '') = 'ar') then ft:field($hit, "titleArabic")[1] + else ft:field($hit, "title") + order by $s collation 'http://www.w3.org/2013/collation/UCA' + where $s[matches(.,'\p{IsBasicLatin}|\p{IsLatin-1Supplement}|\p{IsLatinExtended-A}|\p{IsLatinExtended-B}','i')] + where matches($s,global:get-alpha-filter()) + return $root + else if(request:get-parameter('view', '') = 'A-Z') then + for $hit in $hits + let $root := $hit/ancestor-or-self::tei:TEI + let $s := + if(contains($sort, 'author')) then ft:field($hit, "author")[1] + else if(request:get-parameter('lang', '') = 'syr') then ft:field($hit, "titleSyriac")[1] + else if(request:get-parameter('lang', '') = 'ar') then ft:field($hit, "titleArabic")[1] + else ft:field($hit, "title") + order by $s collation 'http://www.w3.org/2013/collation/UCA' + where $s[matches(.,'\p{IsBasicLatin}|\p{IsLatin-1Supplement}|\p{IsLatinExtended-A}|\p{IsLatinExtended-B}','i')] + where matches($s,global:get-alpha-filter()) + return $root + else if(request:get-parameter('view', '') = 'ܐ-ܬ') then + for $hit in $hits + let $root := $hit/ancestor-or-self::tei:TEI + let $s := + if(contains($sort, 'author')) then ft:field($hit, "author")[1] + else if(request:get-parameter('lang', '') = 'syr') then ft:field($hit, "titleSyriac")[1] + else if(request:get-parameter('lang', '') = 'ar') then ft:field($hit, "titleArabic")[1] + else ft:field($hit, "title") + order by $s collation 'http://www.w3.org/2013/collation/UCA' + where $s[matches(.,'\p{IsSyriac}','i')] + where matches($s,global:get-alpha-filter()) + return $root + else if(request:get-parameter('view', '') = 'ا-ي') then + for $hit in $hits + let $root := $hit/ancestor-or-self::tei:TEI + let $s := + if(contains($sort, 'author')) then ft:field($hit, "author")[1] + else if(request:get-parameter('lang', '') = 'syr') then ft:field($hit, "titleSyriac")[1] + else if(request:get-parameter('lang', '') = 'ar') then ft:field($hit, "titleArabic")[1] + else ft:field($hit, "title") + order by $s collation 'http://www.w3.org/2013/collation/UCA' + where $s[matches(.,'\p{IsArabic}','i')] + where matches($s,global:get-alpha-filter()) + return $root + else if(request:get-parameter('view', '') = 'other') then + for $hit in $hits + let $root := $hit/ancestor-or-self::tei:TEI + let $s := + if(contains($sort, 'author')) then ft:field($hit, "author")[1] + else if(request:get-parameter('lang', '') = 'syr') then ft:field($hit, "titleSyriac")[1] + else if(request:get-parameter('lang', '') = 'ar') then ft:field($hit, "titleArabic")[1] + else ft:field($hit, "title") + order by $s collation 'http://www.w3.org/2013/collation/UCA' + where $s[not(matches(substring(global:build-sort-string(.,''),1,1),'\p{IsSyriac}|\p{IsArabic}|\p{IsBasicLatin}|\p{IsLatin-1Supplement}|\p{IsLatinExtended-A}|\p{IsLatinExtended-B}|\p{IsLatinExtendedAdditional}','i'))] + where matches($s,global:get-alpha-filter()) + return $root + else if(request:get-parameter('alpha-filter', '') != ('','All','ALL','all')) then + for $hit in $hits + let $root := $hit/ancestor-or-self::tei:TEI + let $s := + if(contains($sort, 'author')) then ft:field($hit, "author")[1] + else if(request:get-parameter('lang', '') = 'syr') then ft:field($hit, "titleSyriac")[1] + else if(request:get-parameter('lang', '') = 'ar') then ft:field($hit, "titleArabic")[1] + else ft:field($hit, "title") + order by $s collation 'http://www.w3.org/2013/collation/UCA' + where matches($s,global:get-alpha-filter()) + return $root + else + for $hit in $hits + let $root := $hit/ancestor-or-self::tei:TEI + order by ft:field($hit, "title"), ft:field($hit, "author")[1] collation 'http://www.w3.org/2013/collation/UCA' + return $root }; (:~ diff --git a/modules/lib/facets.xql b/modules/lib/facets.xql index 08bc1d8e7..3090114e0 100644 --- a/modules/lib/facets.xql +++ b/modules/lib/facets.xql @@ -52,18 +52,19 @@ declare function sf:build-index(){ else - let $fields := - for $f in collection($config:app-root)//*:search-config/*:field - let $path := document-uri(root($f)) - group by $field-grp := $f/@name - where $field-grp != 'keyword' and $field-grp != 'fullText' - return - if($f[1]/@function != '') then - - else - + let $fields := + for $f in collection($config:app-root)//*:search-config/*:field + let $path := document-uri(root($f)) + group by $field-grp := $f/@name + where $field-grp != 'keyword' and $field-grp != 'fullText' + return + if($f[1]/@function != '') then + + else + + return - ($facets(:,$fields :)) + ($facets,$fields) } @@ -338,7 +339,6 @@ declare function sf:facet-query() { )) }; - (:~ : Adds type casting when type is specified facet:facet:group-by/@type : @param $value of xpath @@ -404,6 +404,21 @@ declare function sf:build-sort-string($titlestring as xs:string?) as xs:string* replace(normalize-space($titlestring),'^\s+|^[‘|ʻ|ʿ|ʾ]|^[tT]he\s+[^\p{L}]+|^[dD]e\s+|^[dD]e-|^[oO]n\s+[aA]\s+|^[oO]n\s+|^[aA]l-|^[aA]n\s|^[aA]\s+|^\d*\W|^[^\p{L}]','') }; +(:~ + : Syriaca.org strip non sort characters for sorting + :) +declare function sf:build-sort-string-arabic($titlestring as xs:string?) as xs:string* { + replace( + replace( + replace( + replace( + replace($titlestring,'^\s+',''), (:remove leading spaces. :) + '[ً-ٖ]',''), (:remove vowels and diacritics :) + '(^|\s)(ال|أل|ٱل)',''), (: remove all definite articles :) + 'آ|إ|أ|ٱ','ا'), (: normalize letter alif :) + '^(ابن|إبن|بن)','') (:remove all forms of (ابن) with leading space :) +}; + (: Custom search fields, some generic facets provided here, including for handling ranges, and arrays :) (:~ @@ -454,11 +469,47 @@ declare function sf:field-title($element as item()*, $facet-definition as item() let $en := $element/descendant-or-self::*[contains(@srophe:tags,'#headword')][@xml:lang='en'][1] let $syr := string-join($element/descendant::*[contains(@srophe:tags,'#headword')][matches(@xml:lang,'^syr')][1]//text(),' ') return sf:build-sort-string(concat($en, if($syr != '') then concat(' - ', $syr) else ())) + else if($element/descendant-or-self::*[contains(@srophe:tags,'#syriaca-headword')][@xml:lang='en']) then + let $en := $element/descendant-or-self::*[contains(@srophe:tags,'#syriaca-headword')][@xml:lang='en'][1] + let $syr := string-join($element/descendant::*[contains(@srophe:tags,'#syriaca-headword')][matches(@xml:lang,'^syr')][1]//text(),' ') + return sf:build-sort-string(concat($en, if($syr != '') then concat(' - ', $syr) else ())) else if($element/ancestor-or-self::tei:TEI/descendant::tei:biblStruct) then sf:build-sort-string($element/ancestor-or-self::tei:TEI/descendant::tei:biblStruct/descendant::tei:title) else sf:build-sort-string($element/ancestor-or-self::tei:TEI/descendant::tei:titleStmt/tei:title) }; +(:~ + : TEI Title field - Syriac, specific to Srophe applications + :) +declare function sf:field-titleSyriac($element as item()*, $facet-definition as item(), $name as xs:string){ + if($element/descendant-or-self::*[contains(@syriaca-tags,'#syriaca-headword')][matches(@xml:lang,'^syr')]) then + let $syr := string-join($element/descendant::*[contains(@syriaca-tags,'#syriaca-headword')][matches(@xml:lang,'^syr')][1]//text(),' ') + return $syr + else if($element/descendant-or-self::*[contains(@srophe:tags,'#headword')][matches(@xml:lang,'^syr')]) then + let $syr := string-join($element/descendant::*[contains(@srophe:tags,'#headword')][matches(@xml:lang,'^syr')][1]//text(),' ') + return $syr + else if($element/descendant-or-self::*[contains(@srophe:tags,'#syriaca-headword')][matches(@xml:lang,'^syr')]) then + let $syr := string-join($element/descendant::*[contains(@srophe:tags,'#syriaca-headword')][matches(@xml:lang,'^syr')][1]//text(),' ') + return $syr + else () +}; + +(:~ + : TEI Title field - Arabic, specific to Srophe applications + :) +declare function sf:field-titleArabic($element as item()*, $facet-definition as item(), $name as xs:string){ + if($element/descendant-or-self::*[contains(@syriaca-tags,'#syriaca-headword')][@xml:lang = 'ar']) then + let $ar := string-join($element/descendant::*[contains(@syriaca-tags,'#syriaca-headword')][@xml:lang = 'ar']//text(),' ') + return sf:build-sort-string-arabic($ar) + else if($element/descendant-or-self::*[contains(@srophe:tags,'#headword')][@xml:lang = 'ar']) then + let $ar := string-join($element/descendant::*[contains(@srophe:tags,'#headword')][@xml:lang = 'ar']//text(),' ') + return sf:build-sort-string-arabic($ar) + else if($element/tei:listPerson/tei:person/tei:persName[@xml:lang = 'ar']) then + sf:build-sort-string-arabic($element/tei:listPerson/tei:person/tei:persName[@xml:lang = 'ar']) + else if($element/tei:listPlace/tei:place/tei:placeName[@xml:lang = 'ar']) then + sf:build-sort-string-arabic($element/tei:listPlace/tei:place/tei:placeName[@xml:lang = 'ar']) + else () +}; (:~ : TEI title facet, specific to Srophe applications :) diff --git a/search-config.xml b/search-config.xml index 3d2eed09a..4ea59b021 100644 --- a/search-config.xml +++ b/search-config.xml @@ -1,6 +1,10 @@ - - + + + + + \ No newline at end of file