Skip to content

Commit

Permalink
VUFIND-1710 Add support for custom delimiters in topic browse
Browse files Browse the repository at this point in the history
Ostensibly this just required a new TopicNormalizer, but the specific
use case added a small complication.

What want is to have our topic headings contain a UTF-8 marker (an
en-space) to act as a delimiter between different terms.  They'll be
stripped out by the new TopicNormalizer for the purposes of searching
and sorting, but VuFind will use the delimiter to display a
user-selectable delimiter when showing topics.

But, this extra delimiter would cause the lookup against the authority
index to fail, since the authority index is always space-delimited.
Further, we can't change the authority index to use the same delimiter
as topics, because the authority index is a mixture of topics and
names.

So, in addition to the new TopicNormalizer, this commit extends the
Normalizer interface to add a `headingForAuthQuery` method.  By
default this is a no-op (so existing code doesn't change), but the
TopicNormalizer uses this to replace the UTF-8 delimiter with a
regular space to allow the authority index lookup to succeed.
  • Loading branch information
marktriggs committed Oct 15, 2024
1 parent 61f367b commit 1214786
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 8 deletions.
14 changes: 9 additions & 5 deletions src/main/java/org/vufind/solr/handler/Browse.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import java.util.List;
import java.util.Map;

import org.vufind.util.Normalizer;

/**
* Class that performs the alphabetical browse of an index and produces a
* {@code BrowseList} object.
Expand All @@ -26,14 +28,16 @@ public Browse(HeadingsDB headings, BibDB bibdb, AuthDB auth,
this.maxBibListSize = maxBibListSize;
}

private void populateItem(BrowseItem item, String fields) throws Exception
private void populateItem(BrowseItem item, String fields, Normalizer normalizer) throws Exception
{
Map<String, List<Collection<String>>> bibinfo =
bibDB.matchingExtras(item.getHeading(), fields, maxBibListSize);
item.setExtras(bibinfo);
item.setCount(bibDB.recordCount(item.getHeading()));

Map<String, List<String>> authFields = authDB.getFields(item.getHeading());
String headingForAuthLookup = normalizer.headingForAuthQuery(item.getHeading());

Map<String, List<String>> authFields = authDB.getFields(headingForAuthLookup);

List<String> seeAlsoList = new ArrayList<String>();
for (String value : authFields.get("seeAlso")) {
Expand Down Expand Up @@ -63,7 +67,7 @@ public int getId(String from) throws Exception
}


public BrowseList getList(int rowid, int offset, int rows, String extras)
public BrowseList getList(int rowid, int offset, int rows, String extras, Normalizer normalizer)
throws Exception
{
BrowseList result = new BrowseList();
Expand All @@ -79,11 +83,11 @@ public BrowseList getList(int rowid, int offset, int rows, String extras)

BrowseItem item = new BrowseItem(sort_key, heading);

populateItem(item, extras);
populateItem(item, extras, normalizer);

result.add(item);
}

return result;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.security.AuthorizationContext;
import org.apache.solr.util.RefCounted;
import org.vufind.util.Normalizer;
import org.vufind.util.NormalizerFactory;

/*
Expand Down Expand Up @@ -191,7 +192,9 @@ public void handleRequestBody(org.apache.solr.request.SolrQueryRequest req,

Log.info("Browsing from: " + rowid);

BrowseList list = browse.getList(rowid, offset, rows, fields);
Normalizer normalizer = NormalizerFactory.getNormalizer(source.normalizer);

BrowseList list = browse.getList(rowid, offset, rows, fields, normalizer);

Map<String,Object> result = new HashMap<>();

Expand All @@ -200,7 +203,7 @@ public void handleRequestBody(org.apache.solr.request.SolrQueryRequest req,
result.put("startRow", rowid);
result.put("offset", offset);

new MatchTypeResponse(from, list, rowid, rows, offset, NormalizerFactory.getNormalizer(source.normalizer)).addTo(result);
new MatchTypeResponse(from, list, rowid, rows, offset, normalizer).addTo(result);

rsp.add("Browse", result);
} finally {
Expand Down
5 changes: 4 additions & 1 deletion src/main/java/org/vufind/util/Normalizer.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,7 @@ public interface Normalizer
*/
public byte[] normalize(String s);

}
default public String headingForAuthQuery(String heading) {
return heading;
}
}
27 changes: 27 additions & 0 deletions src/main/java/org/vufind/util/TopicNormalizer.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package org.vufind.util;

public class TopicNormalizer implements Normalizer
{
private Normalizer defaultNormalizer;

public TopicNormalizer() {
defaultNormalizer = new ICUCollatorNormalizer();
}

// Separator defined by VuFind's marc.properties for topic_browse field.
final static String TOPIC_TERM_SEPARATOR = "\u2002";

@Override
public byte[] normalize(String s)
{
// Treat topic term separators as spaces when producing a sort key
return defaultNormalizer.normalize(s.replace(TOPIC_TERM_SEPARATOR, " "));
}

@Override
public String headingForAuthQuery(String heading) {
// Auth DB uses single spaces as delimiters while topics use em spaces
return heading.replace(TOPIC_TERM_SEPARATOR, " ");
}

}

0 comments on commit 1214786

Please sign in to comment.