From 121478693d3b6eeea88231be17cbf9a394dbcbd8 Mon Sep 17 00:00:00 2001 From: Mark Triggs Date: Fri, 11 Oct 2024 16:22:06 +1100 Subject: [PATCH] VUFIND-1710 Add support for custom delimiters in topic browse Ostensibly this just required a new TopicNormalizer, but the specific use case added a small complication. What want is to have our topic headings contain a UTF-8 marker (an en-space) to act as a delimiter between different terms. They'll be stripped out by the new TopicNormalizer for the purposes of searching and sorting, but VuFind will use the delimiter to display a user-selectable delimiter when showing topics. But, this extra delimiter would cause the lookup against the authority index to fail, since the authority index is always space-delimited. Further, we can't change the authority index to use the same delimiter as topics, because the authority index is a mixture of topics and names. So, in addition to the new TopicNormalizer, this commit extends the Normalizer interface to add a `headingForAuthQuery` method. By default this is a no-op (so existing code doesn't change), but the TopicNormalizer uses this to replace the UTF-8 delimiter with a regular space to allow the authority index lookup to succeed. --- .../java/org/vufind/solr/handler/Browse.java | 14 ++++++---- .../solr/handler/BrowseRequestHandler.java | 7 +++-- src/main/java/org/vufind/util/Normalizer.java | 5 +++- .../java/org/vufind/util/TopicNormalizer.java | 27 +++++++++++++++++++ 4 files changed, 45 insertions(+), 8 deletions(-) create mode 100644 src/main/java/org/vufind/util/TopicNormalizer.java diff --git a/src/main/java/org/vufind/solr/handler/Browse.java b/src/main/java/org/vufind/solr/handler/Browse.java index cdd1d85..a142103 100644 --- a/src/main/java/org/vufind/solr/handler/Browse.java +++ b/src/main/java/org/vufind/solr/handler/Browse.java @@ -5,6 +5,8 @@ import java.util.List; import java.util.Map; +import org.vufind.util.Normalizer; + /** * Class that performs the alphabetical browse of an index and produces a * {@code BrowseList} object. @@ -26,14 +28,16 @@ public Browse(HeadingsDB headings, BibDB bibdb, AuthDB auth, this.maxBibListSize = maxBibListSize; } - private void populateItem(BrowseItem item, String fields) throws Exception + private void populateItem(BrowseItem item, String fields, Normalizer normalizer) throws Exception { Map>> bibinfo = bibDB.matchingExtras(item.getHeading(), fields, maxBibListSize); item.setExtras(bibinfo); item.setCount(bibDB.recordCount(item.getHeading())); - Map> authFields = authDB.getFields(item.getHeading()); + String headingForAuthLookup = normalizer.headingForAuthQuery(item.getHeading()); + + Map> authFields = authDB.getFields(headingForAuthLookup); List seeAlsoList = new ArrayList(); for (String value : authFields.get("seeAlso")) { @@ -63,7 +67,7 @@ public int getId(String from) throws Exception } - public BrowseList getList(int rowid, int offset, int rows, String extras) + public BrowseList getList(int rowid, int offset, int rows, String extras, Normalizer normalizer) throws Exception { BrowseList result = new BrowseList(); @@ -79,11 +83,11 @@ public BrowseList getList(int rowid, int offset, int rows, String extras) BrowseItem item = new BrowseItem(sort_key, heading); - populateItem(item, extras); + populateItem(item, extras, normalizer); result.add(item); } return result; } -} \ No newline at end of file +} diff --git a/src/main/java/org/vufind/solr/handler/BrowseRequestHandler.java b/src/main/java/org/vufind/solr/handler/BrowseRequestHandler.java index fcccefa..1a32d79 100644 --- a/src/main/java/org/vufind/solr/handler/BrowseRequestHandler.java +++ b/src/main/java/org/vufind/solr/handler/BrowseRequestHandler.java @@ -24,6 +24,7 @@ import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.security.AuthorizationContext; import org.apache.solr.util.RefCounted; +import org.vufind.util.Normalizer; import org.vufind.util.NormalizerFactory; /* @@ -191,7 +192,9 @@ public void handleRequestBody(org.apache.solr.request.SolrQueryRequest req, Log.info("Browsing from: " + rowid); - BrowseList list = browse.getList(rowid, offset, rows, fields); + Normalizer normalizer = NormalizerFactory.getNormalizer(source.normalizer); + + BrowseList list = browse.getList(rowid, offset, rows, fields, normalizer); Map result = new HashMap<>(); @@ -200,7 +203,7 @@ public void handleRequestBody(org.apache.solr.request.SolrQueryRequest req, result.put("startRow", rowid); result.put("offset", offset); - new MatchTypeResponse(from, list, rowid, rows, offset, NormalizerFactory.getNormalizer(source.normalizer)).addTo(result); + new MatchTypeResponse(from, list, rowid, rows, offset, normalizer).addTo(result); rsp.add("Browse", result); } finally { diff --git a/src/main/java/org/vufind/util/Normalizer.java b/src/main/java/org/vufind/util/Normalizer.java index 91921ac..227f1e9 100644 --- a/src/main/java/org/vufind/util/Normalizer.java +++ b/src/main/java/org/vufind/util/Normalizer.java @@ -23,4 +23,7 @@ public interface Normalizer */ public byte[] normalize(String s); -} \ No newline at end of file + default public String headingForAuthQuery(String heading) { + return heading; + } +} diff --git a/src/main/java/org/vufind/util/TopicNormalizer.java b/src/main/java/org/vufind/util/TopicNormalizer.java new file mode 100644 index 0000000..1d17a51 --- /dev/null +++ b/src/main/java/org/vufind/util/TopicNormalizer.java @@ -0,0 +1,27 @@ +package org.vufind.util; + +public class TopicNormalizer implements Normalizer +{ + private Normalizer defaultNormalizer; + + public TopicNormalizer() { + defaultNormalizer = new ICUCollatorNormalizer(); + } + + // Separator defined by VuFind's marc.properties for topic_browse field. + final static String TOPIC_TERM_SEPARATOR = "\u2002"; + + @Override + public byte[] normalize(String s) + { + // Treat topic term separators as spaces when producing a sort key + return defaultNormalizer.normalize(s.replace(TOPIC_TERM_SEPARATOR, " ")); + } + + @Override + public String headingForAuthQuery(String heading) { + // Auth DB uses single spaces as delimiters while topics use em spaces + return heading.replace(TOPIC_TERM_SEPARATOR, " "); + } + +}