Skip to content
This repository has been archived by the owner on Oct 26, 2024. It is now read-only.

Commit

Permalink
fix: Automatically filter capitalized first letter of each sentence. …
Browse files Browse the repository at this point in the history
…Use code points for better compatibility with some foreign languages.
  • Loading branch information
LisoUseInAIKyrios committed Mar 9, 2024
1 parent 04b9fca commit 80f12f7
Showing 1 changed file with 45 additions and 44 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,11 @@
import androidx.annotation.Nullable;
import androidx.annotation.RequiresApi;

import java.util.ArrayList;
import java.util.List;
import java.util.LinkedHashSet;
import java.util.Set;

import app.revanced.integrations.shared.Logger;
import app.revanced.integrations.shared.Utils;
import app.revanced.integrations.shared.settings.StringSetting;
import app.revanced.integrations.youtube.ByteTrieSearch;
import app.revanced.integrations.youtube.settings.Settings;

Expand All @@ -26,12 +25,17 @@
* - Filtering a channel name can still show Shorts from that channel in the search results
* - Some layout component residue will remain, such as the video chapter previews for some search results.
* - Keywords are case sensitive, but some casing variation is manually added.
* (such as "Mr Beast" automatically filtering "mr beast" and "MR BEAST").
* (ie: "mr beast" automatically filters "Mr Beast" and "MR BEAST").
*/
@SuppressWarnings("unused")
@RequiresApi(api = Build.VERSION_CODES.N)
final class HideKeywordContentFilter extends Filter {

/**
* Minimum keyword/phrase length to prevent excessively broad content filtering.
*/
private static final int MINIMUM_KEYWORD_LENGTH = 3;

/**
* Substrings that are always first in the path.
*/
Expand All @@ -54,62 +58,59 @@ final class HideKeywordContentFilter extends Filter {
private final ByteTrieSearch bufferSearch = new ByteTrieSearch();

/**
* Minimum keyword/phrase length to prevent excessively broad content filtering.
* Capitalize the first letter of each word.
*/
private static final int MINIMUM_KEYWORD_LENGTH = 3;
private static String capitalizeFirstLetters(String sentence) {
final int delimiter = ' ';
// Use code points and not characters to handle unicode surrogates.
int[] codePoints = sentence.codePoints().toArray();
boolean capitalizeNext = true;
for (int i = 0, length = codePoints.length; i < length; i++) {
final int codePoint = codePoints[i];
if (codePoint == delimiter) {
capitalizeNext = true;
} else if (capitalizeNext) {
// Use title case, which better handles some foreign languages compared to uppercase.
codePoints[i] = Character.toTitleCase(codePoint);
capitalizeNext = false;
}
}
return new String(codePoints, 0, codePoints.length);
}

private static List<String> splitAndVerifyStrings(StringSetting setting) {
String[] split = setting.get().split("\n");
List<String> keywords = new ArrayList<>(split.length);
private void parseKeywords() {
String[] split = Settings.HIDE_KEYWORD_CONTENT_PHRASES.get().split("\n");
if (split.length == 0) {
return;
}

// Linked Set so log statement are more organized and easier to read.
Set<String> keywords = new LinkedHashSet<>(5 * split.length);

for (String phrase : split) {
// Remove any trailing white space the user may have accidentally included.
phrase = phrase.stripTrailing();
if (phrase.isBlank()) continue;

if (phrase.length() < MINIMUM_KEYWORD_LENGTH) {
// Do not reset the setting. Keep the invalid keywords so the user can fix the mistake.
Utils.showToastLong(str("revanced_hide_keyword_toast_invalid_length", MINIMUM_KEYWORD_LENGTH, phrase));
continue;
}
keywords.add(phrase);
}

return keywords;
}

private void parseKeywords() {
List<String> keywords = splitAndVerifyStrings(Settings.HIDE_KEYWORD_CONTENT_PHRASES);
if (keywords.isEmpty()) {
return;
// Add common casing that might appear.
// This could be simplified by adding case insensitive search to the prefix search,
// but that also brings a small performance hit.
// Instead add all common variations of the keywords.
String lowerCase = phrase.toLowerCase();
keywords.add(lowerCase);
keywords.add(capitalizeFirstLetters(lowerCase));
keywords.add(phrase.toUpperCase());
}

// Add common casing that might appear.
// If a desired keyword has mixed casing (such as "Matt Whatever")
// the user will need to add that exact string.
// This could be simplified by adding case insensitive search to the prefix search,
// but that also brings a small performance hit.
List<String> modifiedKeywords = new ArrayList<>();
for (String keyword : keywords) {
modifiedKeywords.add(keyword); // Original casing as added by the user

// Add lower case and upper case variants.
String lowerCase = keyword.toLowerCase();
if (!keyword.equals(lowerCase)) {
modifiedKeywords.add(lowerCase);
}
String upperCase = keyword.toUpperCase();
if (!keyword.equals(upperCase)) {
modifiedKeywords.add(upperCase);
}
// Include first letter capitalization variant.
String sentenceCapital = Character.toUpperCase(keyword.charAt(0))
+ keyword.substring(1);
if (!sentenceCapital.equals(keyword) && !sentenceCapital.equals(upperCase)) {
modifiedKeywords.add(sentenceCapital);
}
}
Logger.printDebug(() -> "Using keywords: " + modifiedKeywords);
bufferSearch.addPatterns(convertStringsToBytes(modifiedKeywords.toArray(new String[0])));
Logger.printDebug(() -> "Using keywords: " + keywords);
bufferSearch.addPatterns(convertStringsToBytes(keywords.toArray(new String[0])));
}

public HideKeywordContentFilter() {
Expand Down

0 comments on commit 80f12f7

Please sign in to comment.