Skip to content

Commit

Permalink
Respect ignoreCase flag in CommonGramsFilterFactory (#781)
Browse files Browse the repository at this point in the history
* Respect ignoreCase flag in CommonGramsFilterFactory

* Add LUCENENET-specific backport comment

* Use GetType instead of typeof for resource loader

---------

Co-authored-by: Paul Irwin <[email protected]>
  • Loading branch information
chenhh021 and paulirwin authored Nov 11, 2024
1 parent e05e0ff commit de280b0
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ namespace Lucene.Net.Analysis.CommonGrams
/// </summary>
public class CommonGramsFilterFactory : TokenFilterFactory, IResourceLoaderAware
{
// TODO: shared base class for Stop/Keep/CommonGrams?
// TODO: shared base class for Stop/Keep/CommonGrams?
private CharArraySet commonWords;
private readonly string commonWordFiles;
private readonly string format;
Expand Down Expand Up @@ -71,7 +71,8 @@ public virtual void Inform(IResourceLoader loader)
}
else
{
commonWords = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
// LUCENENET-specific: backported ignoreCase fix from Lucene 8.10.0 (lucene#188, LUCENE-10008)
commonWords = new CharArraySet(m_luceneMatchVersion, StopAnalyzer.ENGLISH_STOP_WORDS_SET, ignoreCase);
}
}

Expand All @@ -85,4 +86,4 @@ public override TokenStream Create(TokenStream input)
return commonGrams;
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ namespace Lucene.Net.Analysis.CommonGrams
/// Tests pretty much copied from StopFilterFactoryTest We use the test files
/// used by the StopFilterFactoryTest TODO: consider creating separate test files
/// so this won't break if stop filter test files change
///
///
/// </summary>
public class TestCommonGramsFilterFactory : BaseTokenStreamFactoryTestCase
{
Expand Down Expand Up @@ -79,6 +79,24 @@ public virtual void TestDefaults()
AssertTokenStreamContents(stream, new string[] { "testing", "testing_the", "the", "the_factory", "factory" });
}

// LUCENENET-specific: backported ignoreCase fix from Lucene 8.10.0 (lucene#188, LUCENE-10008)
[Test]
public void TestIgnoreCase()
{
IResourceLoader loader = new ClasspathResourceLoader(GetType());
CommonGramsFilterFactory factory =
(CommonGramsFilterFactory)
TokenFilterFactory("CommonGrams", TEST_VERSION_CURRENT, loader, "ignoreCase", "true");
CharArraySet words = factory.CommonWords;
assertTrue("words is null and it shouldn't be", words != null);
assertTrue(words.contains("the"));
assertTrue(words.contains("The"));
Tokenizer tokenizer = new MockTokenizer(new StringReader("testing The factory"),MockTokenizer.WHITESPACE, false);
TokenStream stream = factory.Create(tokenizer);
AssertTokenStreamContents(
stream, new string[] {"testing", "testing_The", "The", "The_factory", "factory"});
}

/// <summary>
/// Test that bogus arguments result in exception </summary>
[Test]
Expand All @@ -95,4 +113,4 @@ public virtual void TestBogusArguments()
}
}
}
}
}

0 comments on commit de280b0

Please sign in to comment.