diff --git a/.idea/vcs.xml b/.idea/vcs.xml
index 48557884a8893..8d0785935f4ee 100644
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
@@ -14,7 +14,7 @@
-
-
-
+
+
+
diff --git a/CHANGELOG.md b/CHANGELOG.md
index a597b50775fde..e20460eb6745f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,6 +12,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Add dynamic setting allowing size > 0 requests to be cached in the request cache ([#16483](https://github.com/opensearch-project/OpenSearch/pull/16483))
- Make IndexStoreListener a pluggable interface ([#16583](https://github.com/opensearch-project/OpenSearch/pull/16583))
- Add vertical scaling and SoftReference for snapshot repository data cache ([#16489](https://github.com/opensearch-project/OpenSearch/pull/16489))
+- Add new configuration setting `synonym_analyzer`, to the `synonym` and `synonym_graph` filters, enabling the specification of a custom analyzer for reading the synonym file ([#16488](https://github.com/opensearch-project/OpenSearch/pull/16488)).
### Dependencies
- Bump `com.google.apis:google-api-services-compute` from v1-rev20240407-2.0.0 to v1-rev20241021-2.0.0 ([#16502](https://github.com/opensearch-project/OpenSearch/pull/16502), [#16548](https://github.com/opensearch-project/OpenSearch/pull/16548))
diff --git a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/CommonAnalysisPlugin.java b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/CommonAnalysisPlugin.java
index e2b778a71ebf5..c868dcdeedd7e 100644
--- a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/CommonAnalysisPlugin.java
+++ b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/CommonAnalysisPlugin.java
@@ -146,6 +146,7 @@
import org.opensearch.index.analysis.PreConfiguredTokenizer;
import org.opensearch.index.analysis.TokenFilterFactory;
import org.opensearch.index.analysis.TokenizerFactory;
+import org.opensearch.indices.analysis.AnalysisModule;
import org.opensearch.indices.analysis.AnalysisModule.AnalysisProvider;
import org.opensearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy;
import org.opensearch.plugins.AnalysisPlugin;
@@ -247,7 +248,7 @@ public Map>> getAn
}
@Override
- public Map> getTokenFilters() {
+ public Map> getTokenFilters(AnalysisModule analysisModule) {
Map> filters = new TreeMap<>();
filters.put("apostrophe", ApostropheFilterFactory::new);
filters.put("arabic_normalization", ArabicNormalizationFilterFactory::new);
@@ -325,14 +326,36 @@ public Map> getTokenFilters() {
filters.put("sorani_normalization", SoraniNormalizationFilterFactory::new);
filters.put("stemmer_override", requiresAnalysisSettings(StemmerOverrideTokenFilterFactory::new));
filters.put("stemmer", StemmerTokenFilterFactory::new);
- filters.put("synonym", requiresAnalysisSettings(SynonymTokenFilterFactory::new));
- filters.put("synonym_graph", requiresAnalysisSettings(SynonymGraphTokenFilterFactory::new));
filters.put("trim", TrimTokenFilterFactory::new);
filters.put("truncate", requiresAnalysisSettings(TruncateTokenFilterFactory::new));
filters.put("unique", UniqueTokenFilterFactory::new);
filters.put("uppercase", UpperCaseTokenFilterFactory::new);
filters.put("word_delimiter_graph", WordDelimiterGraphTokenFilterFactory::new);
filters.put("word_delimiter", WordDelimiterTokenFilterFactory::new);
+ filters.put(
+ "synonym",
+ requiresAnalysisSettings(
+ (indexSettings, environment, name, settings) -> new SynonymTokenFilterFactory(
+ indexSettings,
+ environment,
+ name,
+ settings,
+ analysisModule.getAnalysisRegistry()
+ )
+ )
+ );
+ filters.put(
+ "synonym_graph",
+ requiresAnalysisSettings(
+ (indexSettings, environment, name, settings) -> new SynonymGraphTokenFilterFactory(
+ indexSettings,
+ environment,
+ name,
+ settings,
+ analysisModule.getAnalysisRegistry()
+ )
+ )
+ );
return filters;
}
diff --git a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymGraphTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymGraphTokenFilterFactory.java
index fed959108c411..c2e20e99473de 100644
--- a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymGraphTokenFilterFactory.java
+++ b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymGraphTokenFilterFactory.java
@@ -40,6 +40,7 @@
import org.opensearch.env.Environment;
import org.opensearch.index.IndexSettings;
import org.opensearch.index.analysis.AnalysisMode;
+import org.opensearch.index.analysis.AnalysisRegistry;
import org.opensearch.index.analysis.CharFilterFactory;
import org.opensearch.index.analysis.TokenFilterFactory;
import org.opensearch.index.analysis.TokenizerFactory;
@@ -49,8 +50,14 @@
public class SynonymGraphTokenFilterFactory extends SynonymTokenFilterFactory {
- SynonymGraphTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
- super(indexSettings, env, name, settings);
+ SynonymGraphTokenFilterFactory(
+ IndexSettings indexSettings,
+ Environment env,
+ String name,
+ Settings settings,
+ AnalysisRegistry analysisRegistry
+ ) {
+ super(indexSettings, env, name, settings, analysisRegistry);
}
@Override
diff --git a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymTokenFilterFactory.java
index 01a65e87d7466..07585532635a3 100644
--- a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymTokenFilterFactory.java
+++ b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/SynonymTokenFilterFactory.java
@@ -44,11 +44,13 @@
import org.opensearch.index.analysis.AbstractTokenFilterFactory;
import org.opensearch.index.analysis.Analysis;
import org.opensearch.index.analysis.AnalysisMode;
+import org.opensearch.index.analysis.AnalysisRegistry;
import org.opensearch.index.analysis.CharFilterFactory;
import org.opensearch.index.analysis.CustomAnalyzer;
import org.opensearch.index.analysis.TokenFilterFactory;
import org.opensearch.index.analysis.TokenizerFactory;
+import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.List;
@@ -64,8 +66,16 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
protected final Settings settings;
protected final Environment environment;
protected final AnalysisMode analysisMode;
-
- SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
+ private final String synonymAnalyzerName;
+ private final AnalysisRegistry analysisRegistry;
+
+ SynonymTokenFilterFactory(
+ IndexSettings indexSettings,
+ Environment env,
+ String name,
+ Settings settings,
+ AnalysisRegistry analysisRegistry
+ ) {
super(indexSettings, name, settings);
this.settings = settings;
@@ -83,6 +93,8 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
boolean updateable = settings.getAsBoolean("updateable", false);
this.analysisMode = updateable ? AnalysisMode.SEARCH_TIME : AnalysisMode.ALL;
this.environment = env;
+ this.synonymAnalyzerName = settings.get("synonym_analyzer", null);
+ this.analysisRegistry = analysisRegistry;
}
@Override
@@ -137,6 +149,17 @@ Analyzer buildSynonymAnalyzer(
List tokenFilters,
Function allFilters
) {
+ if (synonymAnalyzerName != null) {
+ Analyzer customSynonymAnalyzer;
+ try {
+ customSynonymAnalyzer = analysisRegistry.getAnalyzer(synonymAnalyzerName);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ if (customSynonymAnalyzer != null) {
+ return customSynonymAnalyzer;
+ }
+ }
return new CustomAnalyzer(
tokenizer,
charFilters.toArray(new CharFilterFactory[0]),
diff --git a/modules/analysis-common/src/test/java/org/opensearch/analysis/common/CommonAnalysisFactoryTests.java b/modules/analysis-common/src/test/java/org/opensearch/analysis/common/CommonAnalysisFactoryTests.java
index e6aeea96995a9..7c6ef4b5f341d 100644
--- a/modules/analysis-common/src/test/java/org/opensearch/analysis/common/CommonAnalysisFactoryTests.java
+++ b/modules/analysis-common/src/test/java/org/opensearch/analysis/common/CommonAnalysisFactoryTests.java
@@ -39,12 +39,16 @@
import org.apache.lucene.analysis.snowball.SnowballPorterFilterFactory;
import org.apache.lucene.analysis.te.TeluguNormalizationFilterFactory;
import org.apache.lucene.analysis.te.TeluguStemFilterFactory;
+import org.opensearch.index.analysis.TokenFilterFactory;
import org.opensearch.indices.analysis.AnalysisFactoryTestCase;
+import org.opensearch.indices.analysis.AnalysisModule;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
+import org.mockito.Mock;
+
import static java.util.Collections.emptyList;
import static java.util.stream.Collectors.toList;
@@ -53,6 +57,9 @@ public CommonAnalysisFactoryTests() {
super(new CommonAnalysisPlugin());
}
+ @Mock
+ private AnalysisModule analysisModule;
+
@Override
protected Map> getTokenizers() {
Map> tokenizers = new TreeMap<>(super.getTokenizers());
@@ -302,4 +309,19 @@ private void markedTestCase(String name, Map> map) {
unmarked
);
}
+
+ /**
+ * Tests the getTokenFilters(AnalysisModule) method to verify:
+ * 1. All token filters are properly loaded
+ * 2. Basic filters remain available
+ * 3. Synonym filters remain available when AnalysisModule is provided
+ */
+ public void testGetTokenFiltersWithAnalysisModule() {
+ CommonAnalysisPlugin plugin = (CommonAnalysisPlugin) getAnalysisPlugin();
+ Map> filters = plugin.getTokenFilters(analysisModule);
+ assertNotNull("Token filters should not be null", filters);
+ assertTrue("Should contain basic filters", filters.containsKey("lowercase"));
+ assertTrue("Should contain synonym filter", filters.containsKey("synonym"));
+ assertTrue("Should contain synonym_graph filter", filters.containsKey("synonym_graph"));
+ }
}
diff --git a/modules/analysis-common/src/test/java/org/opensearch/analysis/common/SynonymsAnalysisTests.java b/modules/analysis-common/src/test/java/org/opensearch/analysis/common/SynonymsAnalysisTests.java
index d6285c64b09f5..b307372954c2c 100644
--- a/modules/analysis-common/src/test/java/org/opensearch/analysis/common/SynonymsAnalysisTests.java
+++ b/modules/analysis-common/src/test/java/org/opensearch/analysis/common/SynonymsAnalysisTests.java
@@ -42,11 +42,14 @@
import org.opensearch.cluster.metadata.IndexMetadata;
import org.opensearch.common.settings.Settings;
import org.opensearch.env.Environment;
+import org.opensearch.env.TestEnvironment;
import org.opensearch.index.IndexSettings;
+import org.opensearch.index.analysis.AnalysisRegistry;
import org.opensearch.index.analysis.IndexAnalyzers;
import org.opensearch.index.analysis.PreConfiguredTokenFilter;
import org.opensearch.index.analysis.TokenFilterFactory;
import org.opensearch.index.analysis.TokenizerFactory;
+import org.opensearch.indices.analysis.AnalysisModule;
import org.opensearch.test.IndexSettingsModule;
import org.opensearch.test.OpenSearchTestCase;
import org.opensearch.test.VersionUtils;
@@ -64,6 +67,7 @@
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf;
import static org.hamcrest.Matchers.startsWith;
+import static org.apache.lucene.tests.analysis.BaseTokenStreamTestCase.assertTokenStreamContents;
public class SynonymsAnalysisTests extends OpenSearchTestCase {
private IndexAnalyzers indexAnalyzers;
@@ -259,14 +263,17 @@ public void testTokenFiltersBypassSynonymAnalysis() throws IOException {
.put("hyphenation_patterns_path", "foo")
.build();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
+ Environment environment = TestEnvironment.newEnvironment(settings);
+ AnalysisModule analysisModule = new AnalysisModule(environment, Collections.singletonList(new CommonAnalysisPlugin()));
+ AnalysisRegistry analysisRegistry = analysisModule.getAnalysisRegistry();
String[] bypassingFactories = new String[] { "dictionary_decompounder" };
CommonAnalysisPlugin plugin = new CommonAnalysisPlugin();
for (String factory : bypassingFactories) {
- TokenFilterFactory tff = plugin.getTokenFilters().get(factory).get(idxSettings, null, factory, settings);
- TokenizerFactory tok = new KeywordTokenizerFactory(idxSettings, null, "keyword", settings);
- SynonymTokenFilterFactory stff = new SynonymTokenFilterFactory(idxSettings, null, "synonym", settings);
+ TokenFilterFactory tff = plugin.getTokenFilters(analysisModule).get(factory).get(idxSettings, environment, factory, settings);
+ TokenizerFactory tok = new KeywordTokenizerFactory(idxSettings, environment, "keyword", settings);
+ SynonymTokenFilterFactory stff = new SynonymTokenFilterFactory(idxSettings, environment, "synonym", settings, analysisRegistry);
Analyzer analyzer = stff.buildSynonymAnalyzer(tok, Collections.emptyList(), Collections.singletonList(tff), null);
try (TokenStream ts = analyzer.tokenStream("field", "text")) {
@@ -329,7 +336,10 @@ public void testDisallowedTokenFilters() throws IOException {
.putList("common_words", "a", "b")
.put("output_unigrams", "true")
.build();
+ Environment environment = TestEnvironment.newEnvironment(settings);
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings);
+ AnalysisModule analysisModule = new AnalysisModule(environment, Collections.singletonList(new CommonAnalysisPlugin()));
+ AnalysisRegistry analysisRegistry = analysisModule.getAnalysisRegistry();
CommonAnalysisPlugin plugin = new CommonAnalysisPlugin();
String[] disallowedFactories = new String[] {
@@ -343,9 +353,9 @@ public void testDisallowedTokenFilters() throws IOException {
"fingerprint" };
for (String factory : disallowedFactories) {
- TokenFilterFactory tff = plugin.getTokenFilters().get(factory).get(idxSettings, null, factory, settings);
- TokenizerFactory tok = new KeywordTokenizerFactory(idxSettings, null, "keyword", settings);
- SynonymTokenFilterFactory stff = new SynonymTokenFilterFactory(idxSettings, null, "synonym", settings);
+ TokenFilterFactory tff = plugin.getTokenFilters(analysisModule).get(factory).get(idxSettings, environment, factory, settings);
+ TokenizerFactory tok = new KeywordTokenizerFactory(idxSettings, environment, "keyword", settings);
+ SynonymTokenFilterFactory stff = new SynonymTokenFilterFactory(idxSettings, environment, "synonym", settings, analysisRegistry);
IllegalArgumentException e = expectThrows(
IllegalArgumentException.class,
@@ -372,4 +382,76 @@ private void match(String analyzerName, String source, String target) throws IOE
MatcherAssert.assertThat(target, equalTo(sb.toString().trim()));
}
+ /**
+ * Tests the integration of word delimiter and synonym graph filters with synonym_analyzer based on issue #16263.
+ * This test verifies the correct handling of:
+ * 1. Hyphenated words with word delimiter (e.g., "note-book" → ["notebook", "note", "book"])
+ * 2. Multi-word synonyms (e.g., "mobile phone" → ["smartphone"])
+ * 3. Single word synonyms (e.g., "laptop" → ["notebook"])
+ *
+ * @see Issue #16263
+ */
+ public void testSynonymAnalyzerWithWordDelimiter() throws IOException {
+ Settings settings = Settings.builder()
+ .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT)
+ .put("path.home", createTempDir().toString())
+ .put("index.analysis.filter.custom_word_delimiter.type", "word_delimiter_graph")
+ .put("index.analysis.filter.custom_word_delimiter.generate_word_parts", true)
+ .put("index.analysis.filter.custom_word_delimiter.catenate_all", true)
+ .put("index.analysis.filter.custom_word_delimiter.split_on_numerics", false)
+ .put("index.analysis.filter.custom_word_delimiter.split_on_case_change", false)
+ .put("index.analysis.filter.custom_pattern_replace_filter.type", "pattern_replace")
+ .put("index.analysis.filter.custom_pattern_replace_filter.pattern", "(-)")
+ .put("index.analysis.filter.custom_pattern_replace_filter.replacement", " ")
+ .put("index.analysis.filter.custom_pattern_replace_filter.all", true)
+ .put("index.analysis.filter.custom_synonym_graph_filter.type", "synonym_graph")
+ .putList(
+ "index.analysis.filter.custom_synonym_graph_filter.synonyms",
+ "laptop => notebook",
+ "smartphone, mobile phone, cell phone => smartphone",
+ "tv, television => television"
+ )
+ .put("index.analysis.filter.custom_synonym_graph_filter.synonym_analyzer", "standard")
+ .put("index.analysis.analyzer.text_en_index.type", "custom")
+ .put("index.analysis.analyzer.text_en_index.tokenizer", "whitespace")
+ .putList(
+ "index.analysis.analyzer.text_en_index.filter",
+ "lowercase",
+ "custom_word_delimiter",
+ "custom_synonym_graph_filter",
+ "custom_pattern_replace_filter",
+ "flatten_graph"
+ )
+ .build();
+ Environment environment = TestEnvironment.newEnvironment(settings);
+ IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", settings);
+ AnalysisModule module = new AnalysisModule(environment, Collections.singletonList(new CommonAnalysisPlugin()));
+ IndexAnalyzers analyzers = module.getAnalysisRegistry().build(indexSettings);
+ try (TokenStream ts = analyzers.get("text_en_index").tokenStream("", "note-book")) {
+ assertTokenStreamContents(
+ ts,
+ new String[] { "notebook", "note", "book" },
+ new int[] { 0, 0, 5 },
+ new int[] { 9, 4, 9 },
+ new String[] { "word", "word", "word" },
+ new int[] { 1, 0, 1 },
+ new int[] { 2, 1, 1 }
+ );
+ }
+ try (TokenStream ts = analyzers.get("text_en_index").tokenStream("", "mobile phone")) {
+ assertTokenStreamContents(
+ ts,
+ new String[] { "smartphone" },
+ new int[] { 0 },
+ new int[] { 12 },
+ new String[] { "SYNONYM" },
+ new int[] { 1 },
+ new int[] { 1 }
+ );
+ }
+ try (TokenStream ts = analyzers.get("text_en_index").tokenStream("", "laptop")) {
+ assertTokenStreamContents(ts, new String[] { "notebook" }, new int[] { 0 }, new int[] { 6 });
+ }
+ }
+
}
diff --git a/server/src/main/java/org/opensearch/indices/analysis/AnalysisModule.java b/server/src/main/java/org/opensearch/indices/analysis/AnalysisModule.java
index cc87c982a684d..64eb58d636b6a 100644
--- a/server/src/main/java/org/opensearch/indices/analysis/AnalysisModule.java
+++ b/server/src/main/java/org/opensearch/indices/analysis/AnalysisModule.java
@@ -181,7 +181,12 @@ public boolean requiresAnalysisSettings() {
)
);
- tokenFilters.extractAndRegister(plugins, AnalysisPlugin::getTokenFilters);
+ for (AnalysisPlugin plugin : plugins) {
+ Map> filters = plugin.getTokenFilters(this);
+ for (Map.Entry> entry : filters.entrySet()) {
+ tokenFilters.register(entry.getKey(), entry.getValue());
+ }
+ }
return tokenFilters;
}
diff --git a/server/src/main/java/org/opensearch/plugins/AnalysisPlugin.java b/server/src/main/java/org/opensearch/plugins/AnalysisPlugin.java
index 53dcc916b244f..14ab2fe7d7b31 100644
--- a/server/src/main/java/org/opensearch/plugins/AnalysisPlugin.java
+++ b/server/src/main/java/org/opensearch/plugins/AnalysisPlugin.java
@@ -47,6 +47,7 @@
import org.opensearch.index.analysis.PreConfiguredTokenizer;
import org.opensearch.index.analysis.TokenFilterFactory;
import org.opensearch.index.analysis.TokenizerFactory;
+import org.opensearch.indices.analysis.AnalysisModule;
import org.opensearch.indices.analysis.AnalysisModule.AnalysisProvider;
import java.io.IOException;
@@ -92,6 +93,14 @@ default Map> getTokenFilters() {
return emptyMap();
}
+ /**
+ * Override to add additional {@link TokenFilter}s that need access to the AnalysisModule.
+ * The default implementation for plugins that don't need AnalysisModule calls the existing getTokenFilters() method.
+ */
+ default Map> getTokenFilters(AnalysisModule analysisModule) {
+ return getTokenFilters();
+ }
+
/**
* Override to add additional {@link Tokenizer}s. See {@link #requiresAnalysisSettings(AnalysisProvider)}
* how to on get the configuration from the index.
diff --git a/server/src/test/java/org/opensearch/indices/analysis/AnalysisModuleTests.java b/server/src/test/java/org/opensearch/indices/analysis/AnalysisModuleTests.java
index 83cdab98a04f2..ce66f47b4b86a 100644
--- a/server/src/test/java/org/opensearch/indices/analysis/AnalysisModuleTests.java
+++ b/server/src/test/java/org/opensearch/indices/analysis/AnalysisModuleTests.java
@@ -57,6 +57,8 @@
import org.opensearch.index.analysis.CustomAnalyzer;
import org.opensearch.index.analysis.IndexAnalyzers;
import org.opensearch.index.analysis.MyFilterTokenFilterFactory;
+import org.opensearch.index.analysis.NameOrDefinition;
+import org.opensearch.index.analysis.NamedAnalyzer;
import org.opensearch.index.analysis.PreConfiguredCharFilter;
import org.opensearch.index.analysis.PreConfiguredTokenFilter;
import org.opensearch.index.analysis.PreConfiguredTokenizer;
@@ -81,6 +83,7 @@
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Arrays;
+import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;
@@ -550,4 +553,55 @@ public boolean incrementToken() throws IOException {
}
}
+ /**
+ * Tests registration and functionality of token filters that require access to the AnalysisModule.
+ * This test verifies the token filter registration using the extended getTokenFilters(AnalysisModule) method
+ */
+ public void testTokenFilterRegistrationWithModuleReference() throws IOException {
+ class TestPlugin implements AnalysisPlugin {
+ @Override
+ public Map> getTokenFilters(AnalysisModule module) {
+ return Map.of(
+ "test_filter",
+ (indexSettings, env, name, settings) -> AppendTokenFilter.factoryForSuffix("_" + module.hashCode())
+ );
+ }
+ }
+ Settings settings = Settings.builder()
+ .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
+ .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT)
+ .put("index.analysis.analyzer.my_analyzer.tokenizer", "standard")
+ .put("index.analysis.analyzer.my_analyzer.filter", "test_filter")
+ .build();
+ Environment environment = TestEnvironment.newEnvironment(settings);
+ AnalysisModule module = new AnalysisModule(environment, singletonList(new TestPlugin()));
+ AnalysisRegistry registry = module.getAnalysisRegistry();
+ IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", Settings.builder().put(settings).build());
+ Map tokenFilterFactories = registry.buildTokenFilterFactories(indexSettings);
+ assertTrue("Token filter 'test_filter' should be registered", tokenFilterFactories.containsKey("test_filter"));
+ IndexAnalyzers analyzers = registry.build(indexSettings);
+ String testText = "test";
+ TokenStream tokenStream = analyzers.get("my_analyzer").tokenStream("", testText);
+ CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
+ tokenStream.reset();
+ assertTrue("Should have found a token", tokenStream.incrementToken());
+ assertEquals("Token should have expected suffix", "test_" + module.hashCode(), charTermAttribute.toString());
+ assertFalse("Should not have additional tokens", tokenStream.incrementToken());
+ tokenStream.close();
+ NamedAnalyzer customAnalyzer = registry.buildCustomAnalyzer(
+ indexSettings,
+ false,
+ new NameOrDefinition("standard"),
+ Collections.emptyList(),
+ Collections.singletonList(new NameOrDefinition("test_filter"))
+ );
+ tokenStream = customAnalyzer.tokenStream("", testText);
+ charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
+ tokenStream.reset();
+ assertTrue("Custom analyzer should produce a token", tokenStream.incrementToken());
+ assertEquals("Custom analyzer token should have expected suffix", "test_" + module.hashCode(), charTermAttribute.toString());
+ assertFalse("Custom analyzer should not produce additional tokens", tokenStream.incrementToken());
+ tokenStream.close();
+ }
+
}
diff --git a/test/framework/src/main/java/org/opensearch/indices/analysis/AnalysisFactoryTestCase.java b/test/framework/src/main/java/org/opensearch/indices/analysis/AnalysisFactoryTestCase.java
index 23cf4d47a49d9..ca23f67215f3d 100644
--- a/test/framework/src/main/java/org/opensearch/indices/analysis/AnalysisFactoryTestCase.java
+++ b/test/framework/src/main/java/org/opensearch/indices/analysis/AnalysisFactoryTestCase.java
@@ -248,6 +248,17 @@ public AnalysisFactoryTestCase(AnalysisPlugin plugin) {
this.plugin = Objects.requireNonNull(plugin, "plugin is required. use an empty plugin for core");
}
+ /**
+ * Returns the AnalysisPlugin instance that was passed to this test case.
+ * This protected method allows subclasses to access the plugin for testing
+ * specific analysis components.
+ *
+ * @return The AnalysisPlugin instance used by this test case
+ */
+ protected AnalysisPlugin getAnalysisPlugin() {
+ return plugin;
+ }
+
protected Map> getCharFilters() {
return KNOWN_CHARFILTERS;
}