Skip to content

Commit

Permalink
Update browse. Use Lucene fields to precompute browse title and authors.
Browse files Browse the repository at this point in the history
  • Loading branch information
wsalesky committed Oct 1, 2020
1 parent 1a15392 commit bb87a69
Show file tree
Hide file tree
Showing 4 changed files with 153 additions and 90 deletions.
2 changes: 1 addition & 1 deletion modules/lib/browse.xqm
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ declare variable $browse:perpage {request:get-parameter('perpage', 25) cast as x
: @param $facets facet xml file name, relative to collection directory
:)
declare function browse:get-all($node as node(), $model as map(*), $collection as xs:string*, $element as xs:string?, $facets as xs:string?){
let $collectionPath :=
let $collectionPath :=
if(config:collection-vars($collection)/@data-root != '') then concat('/',config:collection-vars($collection)/@data-root)
else if($collection != '') then concat('/',$collection)
else ()
Expand Down
158 changes: 83 additions & 75 deletions modules/lib/data.xqm
Original file line number Diff line number Diff line change
Expand Up @@ -128,85 +128,93 @@ declare function data:get-records($collection as xs:string*, $element as xs:stri
let $sort :=
if(request:get-parameter('sort', '') != '') then request:get-parameter('sort', '')
else if(request:get-parameter('sort-element', '') != '') then request:get-parameter('sort-element', '')
else ()
else ()
let $collection-path :=
if(config:collection-vars($collection)/@data-root != '') then concat('/',config:collection-vars($collection)/@data-root)
else if($collection != '') then concat('/',$collection)
else ()
if(config:collection-vars($collection)/@data-root != '') then concat('/',config:collection-vars($collection)/@data-root)
else if($collection != '') then concat('/',$collection)
else ()
let $get-series-idno :=
if(config:collection-vars($collection)/@collection-URI != '') then string(config:collection-vars($collection)/@collection-URI)
else ()
(: let $hits :=
concat("collection(",$config:data-root,$collection-path,")//tei:title[@level='m'][. = '", config:collection-vars($collection)/@title,"']/ancestor::tei:TEI/")
:)
let $hits := if($collection = 'bhse' or $collection = 'bible') then
data:sort-element(collection($config:data-root || $collection-path)//tei:title[@level='m'][. = config:collection-vars($collection)/@title]/ancestor::tei:TEI, $element, request:get-parameter('lang', ''))
else if($collection = 'nhsl') then
data:sort-element(collection($config:data-root || $collection-path)//tei:TEI, $element, request:get-parameter('lang', ''))
else if($get-series-idno != '') then
data:sort-element(collection($config:data-root || $collection-path)//tei:idno[. = $get-series-idno][ancestor::tei:seriesStmt]/ancestor::tei:TEI, $element, request:get-parameter('lang', ''))
else data:sort-element(collection($config:data-root || $collection-path)//tei:TEI, $element, request:get-parameter('lang', ''))
return

(: Bibl browse :)
if($collection = 'bibl' and not(request:get-parameter('view', ''))) then
for $hit in $hits[matches(.,'\p{IsBasicLatin}|\p{IsLatin-1Supplement}|\p{IsLatinExtended-A}|\p{IsLatinExtended-B}','i')]
let $root := $hit/ancestor-or-self::tei:TEI
where $hit[matches(substring(global:build-sort-string(.,''),1,1),global:get-alpha-filter(),'i')]
order by global:build-sort-string(data:add-sort-options-bibl($root, request:get-parameter('sort-element', '')),'') collation 'http://www.w3.org/2013/collation/UCA'
return $root
else if(request:get-parameter('view', '') = 'A-Z') then
for $hit in $hits[matches(.,'\p{IsBasicLatin}|\p{IsLatin-1Supplement}|\p{IsLatinExtended-A}|\p{IsLatinExtended-B}','i')]
let $root := $hit/ancestor-or-self::tei:TEI
let $sort := global:build-sort-string(data:add-sort-options-bibl($root, request:get-parameter('sort-element', '')),'')
where $hit[matches(substring(global:build-sort-string($root,''),1,1),global:get-alpha-filter(),'i')]
order by $sort collation 'http://www.w3.org/2013/collation/UCA'
return $root
else if(request:get-parameter('view', '') = 'ܐ-ܬ') then
for $hit in $hits[matches(.,'\p{IsSyriac}','i')]
let $root := $hit/ancestor-or-self::tei:TEI
order by global:build-sort-string(data:add-sort-options-bibl($root, request:get-parameter('sort-element', '')),'') collation 'http://www.w3.org/2013/collation/UCA'
return $root
else if(request:get-parameter('view', '') = 'ا-ي') then
for $hit in $hits[matches(.,'\p{IsArabic}','i')]
let $root := $hit/ancestor-or-self::tei:TEI
order by global:build-sort-string(data:add-sort-options-bibl($root, request:get-parameter('sort-element', '')),'ar') collation 'http://www.w3.org/2013/collation/UCA'
return $root
else if(request:get-parameter('view', '') = 'other') then
for $hit in $hits[not(matches(substring(global:build-sort-string(.,''),1,1),'\p{IsSyriac}|\p{IsArabic}|\p{IsBasicLatin}|\p{IsLatin-1Supplement}|\p{IsLatinExtended-A}|\p{IsLatinExtended-B}|\p{IsLatinExtendedAdditional}','i'))]
let $root := $hit/ancestor-or-self::tei:TEI
order by global:build-sort-string(data:add-sort-options-bibl($root, request:get-parameter('sort-element', '')),'') collation 'http://www.w3.org/2013/collation/UCA'
return $root
else if(request:get-parameter('view', '') = 'all') then
for $hit in $hits
let $root := $hit/ancestor-or-self::tei:TEI
order by global:build-sort-string(data:add-sort-options-bibl($root, request:get-parameter('sort-element', '')),'') collation 'http://www.w3.org/2013/collation/UCA'
return $root
else if(request:get-parameter('alpha-filter', '') = ('ALL','all')) then
for $hit in $hits
let $root := $hit/ancestor-or-self::tei:TEI
let $sort := global:build-sort-string($hit,'')
let $id := $root/descendant::tei:publicationStmt/tei:idno[1]
group by $facet-grp := $id
order by $sort[1] collation 'http://www.w3.org/2013/collation/UCA'
return $root[1]
else if(request:get-parameter('alpha-filter', '') != '') then
for $hit in $hits
let $root := $hit/ancestor-or-self::tei:TEI
let $sort := global:build-sort-string($hit,'')
(:let $id := $root/descendant::tei:publicationStmt/tei:idno[1]
group by $facet-grp := $id:)
order by $sort collation 'http://www.w3.org/2013/collation/UCA'
where matches($sort,global:get-alpha-filter())
return $root
else
for $hit in $hits
let $root := $hit/ancestor-or-self::tei:TEI
let $sort := global:build-sort-string($hit,'')
(:let $id := $root/descendant::tei:publicationStmt/tei:idno[1]
group by $facet-grp := $id:)
order by $sort collation 'http://www.w3.org/2013/collation/UCA'
return $root
let $eval-string := concat(data:build-collection-path($collection),'//tei:body[ft:query(., (),map { "fields": ("title","titleSyriac","titleArabic", "author") })]',data:element-filter($element))
let $hits := util:eval($eval-string)
return
if($collection = 'bibl' and not(request:get-parameter('view', ''))) then
for $hit in $hits
let $root := $hit/ancestor-or-self::tei:TEI
let $s :=
if(contains($sort, 'author')) then ft:field($hit, "author")[1]
else if(request:get-parameter('lang', '') = 'syr') then ft:field($hit, "titleSyriac")[1]
else if(request:get-parameter('lang', '') = 'ar') then ft:field($hit, "titleArabic")[1]
else ft:field($hit, "title")
order by $s collation 'http://www.w3.org/2013/collation/UCA'
where $s[matches(.,'\p{IsBasicLatin}|\p{IsLatin-1Supplement}|\p{IsLatinExtended-A}|\p{IsLatinExtended-B}','i')]
where matches($s,global:get-alpha-filter())
return $root
else if(request:get-parameter('view', '') = 'A-Z') then
for $hit in $hits
let $root := $hit/ancestor-or-self::tei:TEI
let $s :=
if(contains($sort, 'author')) then ft:field($hit, "author")[1]
else if(request:get-parameter('lang', '') = 'syr') then ft:field($hit, "titleSyriac")[1]
else if(request:get-parameter('lang', '') = 'ar') then ft:field($hit, "titleArabic")[1]
else ft:field($hit, "title")
order by $s collation 'http://www.w3.org/2013/collation/UCA'
where $s[matches(.,'\p{IsBasicLatin}|\p{IsLatin-1Supplement}|\p{IsLatinExtended-A}|\p{IsLatinExtended-B}','i')]
where matches($s,global:get-alpha-filter())
return $root
else if(request:get-parameter('view', '') = 'ܐ-ܬ') then
for $hit in $hits
let $root := $hit/ancestor-or-self::tei:TEI
let $s :=
if(contains($sort, 'author')) then ft:field($hit, "author")[1]
else if(request:get-parameter('lang', '') = 'syr') then ft:field($hit, "titleSyriac")[1]
else if(request:get-parameter('lang', '') = 'ar') then ft:field($hit, "titleArabic")[1]
else ft:field($hit, "title")
order by $s collation 'http://www.w3.org/2013/collation/UCA'
where $s[matches(.,'\p{IsSyriac}','i')]
where matches($s,global:get-alpha-filter())
return $root
else if(request:get-parameter('view', '') = 'ا-ي') then
for $hit in $hits
let $root := $hit/ancestor-or-self::tei:TEI
let $s :=
if(contains($sort, 'author')) then ft:field($hit, "author")[1]
else if(request:get-parameter('lang', '') = 'syr') then ft:field($hit, "titleSyriac")[1]
else if(request:get-parameter('lang', '') = 'ar') then ft:field($hit, "titleArabic")[1]
else ft:field($hit, "title")
order by $s collation 'http://www.w3.org/2013/collation/UCA'
where $s[matches(.,'\p{IsArabic}','i')]
where matches($s,global:get-alpha-filter())
return $root
else if(request:get-parameter('view', '') = 'other') then
for $hit in $hits
let $root := $hit/ancestor-or-self::tei:TEI
let $s :=
if(contains($sort, 'author')) then ft:field($hit, "author")[1]
else if(request:get-parameter('lang', '') = 'syr') then ft:field($hit, "titleSyriac")[1]
else if(request:get-parameter('lang', '') = 'ar') then ft:field($hit, "titleArabic")[1]
else ft:field($hit, "title")
order by $s collation 'http://www.w3.org/2013/collation/UCA'
where $s[not(matches(substring(global:build-sort-string(.,''),1,1),'\p{IsSyriac}|\p{IsArabic}|\p{IsBasicLatin}|\p{IsLatin-1Supplement}|\p{IsLatinExtended-A}|\p{IsLatinExtended-B}|\p{IsLatinExtendedAdditional}','i'))]
where matches($s,global:get-alpha-filter())
return $root
else if(request:get-parameter('alpha-filter', '') != ('','All','ALL','all')) then
for $hit in $hits
let $root := $hit/ancestor-or-self::tei:TEI
let $s :=
if(contains($sort, 'author')) then ft:field($hit, "author")[1]
else if(request:get-parameter('lang', '') = 'syr') then ft:field($hit, "titleSyriac")[1]
else if(request:get-parameter('lang', '') = 'ar') then ft:field($hit, "titleArabic")[1]
else ft:field($hit, "title")
order by $s collation 'http://www.w3.org/2013/collation/UCA'
where matches($s,global:get-alpha-filter())
return $root
else
for $hit in $hits
let $root := $hit/ancestor-or-self::tei:TEI
order by ft:field($hit, "title"), ft:field($hit, "author")[1] collation 'http://www.w3.org/2013/collation/UCA'
return $root
};

(:~
Expand Down
75 changes: 63 additions & 12 deletions modules/lib/facets.xql
Original file line number Diff line number Diff line change
Expand Up @@ -52,18 +52,19 @@ declare function sf:build-index(){
<facet dimension="{functx:words-to-camel-case($facet-grp)}" expression="sf:facet(descendant-or-self::tei:body, {concat("'",$path[1],"'")}, {concat("'",$facet-grp,"'")})"/>
else
<facet dimension="{functx:words-to-camel-case($facet-grp)}" expression="{replace($f[1]/facet:group-by/facet:sub-path/text(),"&#34;","'")}"/>
let $fields :=
for $f in collection($config:app-root)//*:search-config/*:field
let $path := document-uri(root($f))
group by $field-grp := $f/@name
where $field-grp != 'keyword' and $field-grp != 'fullText'
return
if($f[1]/@function != '') then
<field name="{functx:words-to-camel-case($field-grp)}" expression="sf:field(descendant-or-self::tei:body, {concat("'",$path[1],"'")}, {concat("'",$field-grp,"'")})"/>
else
<field name="{functx:words-to-camel-case($field-grp)}" expression="{string($f[1]/@expression)}"/>
let $fields :=
for $f in collection($config:app-root)//*:search-config/*:field
let $path := document-uri(root($f))
group by $field-grp := $f/@name
where $field-grp != 'keyword' and $field-grp != 'fullText'
return
if($f[1]/@function != '') then
<field name="{functx:words-to-camel-case($field-grp)}" expression="sf:field(descendant-or-self::tei:body, {concat("'",$path[1],"'")}, {concat("'",$field-grp,"'")})"/>
else
<field name="{functx:words-to-camel-case($field-grp)}" expression="{string($f[1]/@expression)}"/>

return
($facets(:,$fields :))
($facets,$fields)
}
</text>
<text qname="tei:fileDesc"/>
Expand Down Expand Up @@ -338,7 +339,6 @@ declare function sf:facet-query() {
))
};


(:~
: Adds type casting when type is specified facet:facet:group-by/@type
: @param $value of xpath
Expand Down Expand Up @@ -404,6 +404,21 @@ declare function sf:build-sort-string($titlestring as xs:string?) as xs:string*
replace(normalize-space($titlestring),'^\s+|^[‘|ʻ|ʿ|ʾ]|^[tT]he\s+[^\p{L}]+|^[dD]e\s+|^[dD]e-|^[oO]n\s+[aA]\s+|^[oO]n\s+|^[aA]l-|^[aA]n\s|^[aA]\s+|^\d*\W|^[^\p{L}]','')
};

(:~
: Syriaca.org strip non sort characters for sorting
:)
declare function sf:build-sort-string-arabic($titlestring as xs:string?) as xs:string* {
replace(
replace(
replace(
replace(
replace($titlestring,'^\s+',''), (:remove leading spaces. :)
'[ً-ٖ]',''), (:remove vowels and diacritics :)
'(^|\s)(ال|أل|ٱل)',''), (: remove all definite articles :)
'آ|إ|أ|ٱ','ا'), (: normalize letter alif :)
'^(ابن|إبن|بن)','') (:remove all forms of (ابن) with leading space :)
};

(: Custom search fields, some generic facets provided here, including for handling ranges, and arrays :)

(:~
Expand Down Expand Up @@ -454,11 +469,47 @@ declare function sf:field-title($element as item()*, $facet-definition as item()
let $en := $element/descendant-or-self::*[contains(@srophe:tags,'#headword')][@xml:lang='en'][1]
let $syr := string-join($element/descendant::*[contains(@srophe:tags,'#headword')][matches(@xml:lang,'^syr')][1]//text(),' ')
return sf:build-sort-string(concat($en, if($syr != '') then concat(' - ', $syr) else ()))
else if($element/descendant-or-self::*[contains(@srophe:tags,'#syriaca-headword')][@xml:lang='en']) then
let $en := $element/descendant-or-self::*[contains(@srophe:tags,'#syriaca-headword')][@xml:lang='en'][1]
let $syr := string-join($element/descendant::*[contains(@srophe:tags,'#syriaca-headword')][matches(@xml:lang,'^syr')][1]//text(),' ')
return sf:build-sort-string(concat($en, if($syr != '') then concat(' - ', $syr) else ()))
else if($element/ancestor-or-self::tei:TEI/descendant::tei:biblStruct) then
sf:build-sort-string($element/ancestor-or-self::tei:TEI/descendant::tei:biblStruct/descendant::tei:title)
else sf:build-sort-string($element/ancestor-or-self::tei:TEI/descendant::tei:titleStmt/tei:title)
};

(:~
: TEI Title field - Syriac, specific to Srophe applications
:)
declare function sf:field-titleSyriac($element as item()*, $facet-definition as item(), $name as xs:string){
if($element/descendant-or-self::*[contains(@syriaca-tags,'#syriaca-headword')][matches(@xml:lang,'^syr')]) then
let $syr := string-join($element/descendant::*[contains(@syriaca-tags,'#syriaca-headword')][matches(@xml:lang,'^syr')][1]//text(),' ')
return $syr
else if($element/descendant-or-self::*[contains(@srophe:tags,'#headword')][matches(@xml:lang,'^syr')]) then
let $syr := string-join($element/descendant::*[contains(@srophe:tags,'#headword')][matches(@xml:lang,'^syr')][1]//text(),' ')
return $syr
else if($element/descendant-or-self::*[contains(@srophe:tags,'#syriaca-headword')][matches(@xml:lang,'^syr')]) then
let $syr := string-join($element/descendant::*[contains(@srophe:tags,'#syriaca-headword')][matches(@xml:lang,'^syr')][1]//text(),' ')
return $syr
else ()
};

(:~
: TEI Title field - Arabic, specific to Srophe applications
:)
declare function sf:field-titleArabic($element as item()*, $facet-definition as item(), $name as xs:string){
if($element/descendant-or-self::*[contains(@syriaca-tags,'#syriaca-headword')][@xml:lang = 'ar']) then
let $ar := string-join($element/descendant::*[contains(@syriaca-tags,'#syriaca-headword')][@xml:lang = 'ar']//text(),' ')
return sf:build-sort-string-arabic($ar)
else if($element/descendant-or-self::*[contains(@srophe:tags,'#headword')][@xml:lang = 'ar']) then
let $ar := string-join($element/descendant::*[contains(@srophe:tags,'#headword')][@xml:lang = 'ar']//text(),' ')
return sf:build-sort-string-arabic($ar)
else if($element/tei:listPerson/tei:person/tei:persName[@xml:lang = 'ar']) then
sf:build-sort-string-arabic($element/tei:listPerson/tei:person/tei:persName[@xml:lang = 'ar'])
else if($element/tei:listPlace/tei:place/tei:placeName[@xml:lang = 'ar']) then
sf:build-sort-string-arabic($element/tei:listPlace/tei:place/tei:placeName[@xml:lang = 'ar'])
else ()
};
(:~
: TEI title facet, specific to Srophe applications
:)
Expand Down
8 changes: 6 additions & 2 deletions search-config.xml
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
<?xml version="1.0" encoding="UTF-8"?>
<search-config>
<field name="title" expression="descendant-or-self::tei:body" function="title"/>
<field name="title" expression="descendant-or-self::tei:body" function="title" type="sort"/>
<field name="titleSyriac" expression="descendant-or-self::tei:body" function="titleSyriac" type="sort"/>
<field name="titleArabic" expression="descendant-or-self::tei:body" function="titleArabic" type="sort"/>
<field name="author" expression="descendant-or-self::tei:body" function="author" type="sort"/>
<!--
<field name="type" expression="descendant-or-self::tei:body" function="type"/>
<field name="idno" expression="descendant::tei:idno" function="uri"/>
-->
</search-config>

0 comments on commit bb87a69

Please sign in to comment.