Skip to content

Commit

Permalink
pull #17 changes adopted to implement a whole word check on the entir…
Browse files Browse the repository at this point in the history
…e keyword, including whitespaces.
  • Loading branch information
robert-bor committed Sep 22, 2015
1 parent 76ae822 commit dc27d6e
Show file tree
Hide file tree
Showing 3 changed files with 141 additions and 98 deletions.
24 changes: 24 additions & 0 deletions src/main/java/org/ahocorasick/trie/Trie.java
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@ public Collection<Emit> parseText(CharSequence text) {
removePartialMatches(text, collectedEmits);
}

if (trieConfig.isOnlyWholeWordsWhiteSpaceSeparated()) {
removePartialMatchesWhiteSpaceSeparated(text, collectedEmits);
}

if (!trieConfig.isAllowOverlaps()) {
IntervalTree intervalTree = new IntervalTree((List<Intervalable>)(List<?>)collectedEmits);
intervalTree.removeOverlaps((List<Intervalable>) (List<?>) collectedEmits);
Expand Down Expand Up @@ -161,6 +165,21 @@ private void removePartialMatches(CharSequence searchText, List<Emit> collectedE
}
}

private void removePartialMatchesWhiteSpaceSeparated(CharSequence searchText, List<Emit> collectedEmits) {
long size = searchText.length();
List<Emit> removeEmits = new ArrayList<>();
for (Emit emit : collectedEmits) {
if ((emit.getStart() == 0 || Character.isWhitespace(searchText.charAt(emit.getStart() - 1))) &&
(emit.getEnd() + 1 == size || Character.isWhitespace(searchText.charAt(emit.getEnd() + 1)))) {
continue;
}
removeEmits.add(emit);
}
for (Emit removeEmit : removeEmits) {
collectedEmits.remove(removeEmit);
}
}

private State getState(State currentState, Character character) {
State newCurrentState = currentState.nextState(character);
while (newCurrentState == null) {
Expand Down Expand Up @@ -237,6 +256,11 @@ public TrieBuilder onlyWholeWords() {
return this;
}

public TrieBuilder onlyWholeWordsWhiteSpaceSeparated() {
this.trieConfig.setOnlyWholeWordsWhiteSpaceSeparated(true);
return this;
}

public TrieBuilder addKeyword(String keyword) {
trie.addKeyword(keyword);
return this;
Expand Down
8 changes: 8 additions & 0 deletions src/main/java/org/ahocorasick/trie/TrieConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ public class TrieConfig {

private boolean onlyWholeWords = false;

private boolean onlyWholeWordsWhiteSpaceSeparated = false;

private boolean caseInsensitive = false;

private boolean stopOnHit = false;
Expand All @@ -30,6 +32,12 @@ public void setOnlyWholeWords(boolean onlyWholeWords) {
this.onlyWholeWords = onlyWholeWords;
}

public boolean isOnlyWholeWordsWhiteSpaceSeparated() { return onlyWholeWordsWhiteSpaceSeparated; }

public void setOnlyWholeWordsWhiteSpaceSeparated(boolean onlyWholeWordsWhiteSpaceSeparated) {
this.onlyWholeWordsWhiteSpaceSeparated = onlyWholeWordsWhiteSpaceSeparated;
}

public boolean isCaseInsensitive() {
return caseInsensitive;
}
Expand Down
Loading

0 comments on commit dc27d6e

Please sign in to comment.