Skip to content

Commit

Permalink
Test review A-D, #259 (#1018)
Browse files Browse the repository at this point in the history
Add missing comments and attributes, match formatting, remove unnecessary code, etc.
  • Loading branch information
paulirwin authored Nov 18, 2024
1 parent b28fa34 commit c4ab72c
Show file tree
Hide file tree
Showing 48 changed files with 900 additions and 563 deletions.
13 changes: 4 additions & 9 deletions src/Lucene.Net.Tests/Analysis/TestCachingTokenFilter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -85,22 +85,17 @@ public virtual void TestCaching()

private sealed class TokenStreamAnonymousClass : TokenStream
{
private TestCachingTokenFilter outerInstance;
private readonly TestCachingTokenFilter outerInstance;

public TokenStreamAnonymousClass(TestCachingTokenFilter outerInstance)
{
InitMembers(outerInstance);
}

public void InitMembers(TestCachingTokenFilter outerInstance)
{
this.outerInstance = outerInstance;
index = 0;
// LUCENENET specific - AddAttribute must be called from the constructor
termAtt = AddAttribute<ICharTermAttribute>();
offsetAtt = AddAttribute<IOffsetAttribute>();
}

private int index;
private int index /* = 0 */;
private ICharTermAttribute termAtt;
private IOffsetAttribute offsetAtt;

Expand Down Expand Up @@ -135,4 +130,4 @@ private void CheckTokens(TokenStream stream)
Assert.AreEqual(tokens.Length, count);
}
}
}
}
4 changes: 3 additions & 1 deletion src/Lucene.Net.Tests/Analysis/TestCharFilter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ protected internal CharFilter1(TextReader @in)

public override int Read(char[] cbuf, int off, int len)
{
// LUCENENET specific: We need to return -1 when there are no more characters to read to match Java
int numRead = m_input.Read(cbuf, off, len);
return numRead == 0 ? -1 : numRead;
}
Expand All @@ -82,6 +83,7 @@ protected internal CharFilter2(TextReader @in)

public override int Read(char[] cbuf, int off, int len)
{
// LUCENENET specific: We need to return -1 when there are no more characters to read to match Java
int numRead = m_input.Read(cbuf, off, len);
return numRead == 0 ? -1 : numRead;
}
Expand All @@ -92,4 +94,4 @@ protected override int Correct(int currentOff)
}
}
}
}
}
141 changes: 108 additions & 33 deletions src/Lucene.Net.Tests/Analysis/TestGraphTokenizers.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using J2N.Text;
using Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Attributes;
using Lucene.Net.Diagnostics;
using NUnit.Framework;
using System;
Expand Down Expand Up @@ -52,18 +53,19 @@ public class TestGraphTokenizers : BaseTokenStreamTestCase

private class GraphTokenizer : Tokenizer
{
internal IList<Token> tokens;
internal int upto;
internal int inputLength;
private IList<Token> tokens;
private int upto;
private int inputLength;

internal readonly ICharTermAttribute termAtt;
internal readonly IOffsetAttribute offsetAtt;
internal readonly IPositionIncrementAttribute posIncrAtt;
internal readonly IPositionLengthAttribute posLengthAtt;
private readonly ICharTermAttribute termAtt;
private readonly IOffsetAttribute offsetAtt;
private readonly IPositionIncrementAttribute posIncrAtt;
private readonly IPositionLengthAttribute posLengthAtt;

public GraphTokenizer(TextReader input)
: base(input)
{
// LUCENENET specific - AddAttribute must be called in the constructor
termAtt = AddAttribute<ICharTermAttribute>();
offsetAtt = AddAttribute<IOffsetAttribute>();
posIncrAtt = AddAttribute<IPositionIncrementAttribute>();
Expand Down Expand Up @@ -124,7 +126,7 @@ internal virtual void FillTokens()
{
int count = m_input.Read(buffer, 0, buffer.Length);

//.NET TextReader.Read(buff, int, int) returns 0, not -1 on no chars
// LUCENENET specific - .NET TextReader.Read(buff, int, int) returns 0, not -1 on no chars
// but in some cases, such as MockCharFilter, it overloads read and returns -1
// so we should handle both 0 and -1 values
if (count <= 0)
Expand Down Expand Up @@ -229,14 +231,15 @@ public virtual void TestMockGraphTokenFilterOnGraphInput()
// Just deletes (leaving hole) token 'a':
private sealed class RemoveATokens : TokenFilter
{
internal int pendingPosInc;
private int pendingPosInc;

internal readonly ICharTermAttribute termAtt;
internal readonly IPositionIncrementAttribute posIncAtt;
private readonly ICharTermAttribute termAtt;
private readonly IPositionIncrementAttribute posIncAtt;

public RemoveATokens(TokenStream @in)
: base(@in)
{
// LUCENENET specific - AddAttribute must be called in the constructor
termAtt = AddAttribute<ICharTermAttribute>();
posIncAtt = AddAttribute<IPositionIncrementAttribute>();
}
Expand Down Expand Up @@ -383,13 +386,13 @@ public virtual void TestDoubleMockGraphTokenFilterRandom()
}

[Test]
[LuceneNetSpecific]
public void TestMockTokenizerCtor()
{
var sr = new StringReader("Hello");
var mt = new MockTokenizer(sr);
_ = new MockTokenizer(sr);
}


[Test]
public virtual void TestMockGraphTokenFilterBeforeHolesRandom()
{
Expand Down Expand Up @@ -459,7 +462,11 @@ private static Token Token(string term, int posInc, int posLength, int startOffs
[Test]
public virtual void TestSingleToken()
{
TokenStream ts = new CannedTokenStream(new Token[] { Token("abc", 1, 1) });
TokenStream ts = new CannedTokenStream(
new Token[]
{
Token("abc", 1, 1)
});
Automaton actual = (new TokenStreamToAutomaton()).ToAutomaton(ts);
Automaton expected = BasicAutomata.MakeString("abc");
Assert.IsTrue(BasicOperations.SameLanguage(expected, actual));
Expand All @@ -468,7 +475,12 @@ public virtual void TestSingleToken()
[Test]
public virtual void TestMultipleHoles()
{
TokenStream ts = new CannedTokenStream(new Token[] { Token("a", 1, 1), Token("b", 3, 1) });
TokenStream ts = new CannedTokenStream(
new Token[]
{
Token("a", 1, 1),
Token("b", 3, 1)
});
Automaton actual = (new TokenStreamToAutomaton()).ToAutomaton(ts);
Automaton expected = Join(S2a("a"), SEP_A, HOLE_A, SEP_A, HOLE_A, SEP_A, S2a("b"));
Assert.IsTrue(BasicOperations.SameLanguage(expected, actual));
Expand All @@ -477,7 +489,13 @@ public virtual void TestMultipleHoles()
[Test]
public virtual void TestSynOverMultipleHoles()
{
TokenStream ts = new CannedTokenStream(new Token[] { Token("a", 1, 1), Token("x", 0, 3), Token("b", 3, 1) });
TokenStream ts = new CannedTokenStream(
new Token[]
{
Token("a", 1, 1),
Token("x", 0, 3),
Token("b", 3, 1)
});
Automaton actual = (new TokenStreamToAutomaton()).ToAutomaton(ts);
Automaton a1 = Join(S2a("a"), SEP_A, HOLE_A, SEP_A, HOLE_A, SEP_A, S2a("b"));
Automaton a2 = Join(S2a("x"), SEP_A, S2a("b"));
Expand All @@ -499,7 +517,8 @@ private static void toDot(Automaton a) throws IOException {
private static readonly Automaton SEP_A = BasicAutomata.MakeChar(TokenStreamToAutomaton.POS_SEP);
private static readonly Automaton HOLE_A = BasicAutomata.MakeChar(TokenStreamToAutomaton.HOLE);

private Automaton Join(params string[] strings)
// LUCENENET specific - made static
private static Automaton Join(params string[] strings)
{
IList<Automaton> @as = new JCG.List<Automaton>();
foreach (string s in strings)
Expand All @@ -511,20 +530,27 @@ private Automaton Join(params string[] strings)
return BasicOperations.Concatenate(@as);
}

private Automaton Join(params Automaton[] @as)
// LUCENENET specific - made static
private static Automaton Join(params Automaton[] @as)
{
return BasicOperations.Concatenate(@as);
}

private Automaton S2a(string s)
// LUCENENET specific - made static
private static Automaton S2a(string s)
{
return BasicAutomata.MakeString(s);
}

[Test]
public virtual void TestTwoTokens()
{
TokenStream ts = new CannedTokenStream(new Token[] { Token("abc", 1, 1), Token("def", 1, 1) });
TokenStream ts = new CannedTokenStream(
new Token[]
{
Token("abc", 1, 1),
Token("def", 1, 1)
});
Automaton actual = (new TokenStreamToAutomaton()).ToAutomaton(ts);
Automaton expected = Join("abc", "def");

Expand All @@ -535,7 +561,12 @@ public virtual void TestTwoTokens()
[Test]
public virtual void TestHole()
{
TokenStream ts = new CannedTokenStream(new Token[] { Token("abc", 1, 1), Token("def", 2, 1) });
TokenStream ts = new CannedTokenStream(
new Token[]
{
Token("abc", 1, 1),
Token("def", 2, 1)
});
Automaton actual = (new TokenStreamToAutomaton()).ToAutomaton(ts);

Automaton expected = Join(S2a("abc"), SEP_A, HOLE_A, SEP_A, S2a("def"));
Expand All @@ -548,7 +579,12 @@ public virtual void TestHole()
public virtual void TestOverlappedTokensSausage()
{
// Two tokens on top of each other (sausage):
TokenStream ts = new CannedTokenStream(new Token[] { Token("abc", 1, 1), Token("xyz", 0, 1) });
TokenStream ts = new CannedTokenStream(
new Token[]
{
Token("abc", 1, 1),
Token("xyz", 0, 1)
});
Automaton actual = (new TokenStreamToAutomaton()).ToAutomaton(ts);
Automaton a1 = BasicAutomata.MakeString("abc");
Automaton a2 = BasicAutomata.MakeString("xyz");
Expand All @@ -559,7 +595,13 @@ public virtual void TestOverlappedTokensSausage()
[Test]
public virtual void TestOverlappedTokensLattice()
{
TokenStream ts = new CannedTokenStream(new Token[] { Token("abc", 1, 1), Token("xyz", 0, 2), Token("def", 1, 1) });
TokenStream ts = new CannedTokenStream(
new Token[]
{
Token("abc", 1, 1),
Token("xyz", 0, 2),
Token("def", 1, 1)
});
Automaton actual = (new TokenStreamToAutomaton()).ToAutomaton(ts);
Automaton a1 = BasicAutomata.MakeString("xyz");
Automaton a2 = Join("abc", "def");
Expand All @@ -572,27 +614,51 @@ public virtual void TestOverlappedTokensLattice()
[Test]
public virtual void TestSynOverHole()
{
TokenStream ts = new CannedTokenStream(new Token[] { Token("a", 1, 1), Token("X", 0, 2), Token("b", 2, 1) });
TokenStream ts = new CannedTokenStream(
new Token[]
{
Token("a", 1, 1),
Token("X", 0, 2),
Token("b", 2, 1)
});
Automaton actual = (new TokenStreamToAutomaton()).ToAutomaton(ts);
Automaton a1 = BasicOperations.Union(Join(S2a("a"), SEP_A, HOLE_A), BasicAutomata.MakeString("X"));
Automaton expected = BasicOperations.Concatenate(a1, Join(SEP_A, S2a("b")));
Automaton a1 = BasicOperations.Union(
Join(S2a("a"), SEP_A, HOLE_A),
BasicAutomata.MakeString("X"));
Automaton expected = BasicOperations.Concatenate(a1,
Join(SEP_A, S2a("b")));
//toDot(actual);
Assert.IsTrue(BasicOperations.SameLanguage(expected, actual));
}

[Test]
public virtual void TestSynOverHole2()
{
TokenStream ts = new CannedTokenStream(new Token[] { Token("xyz", 1, 1), Token("abc", 0, 3), Token("def", 2, 1) });
TokenStream ts = new CannedTokenStream(
new Token[]
{
Token("xyz", 1, 1),
Token("abc", 0, 3),
Token("def", 2, 1)
});
Automaton actual = (new TokenStreamToAutomaton()).ToAutomaton(ts);
Automaton expected = BasicOperations.Union(Join(S2a("xyz"), SEP_A, HOLE_A, SEP_A, S2a("def")), BasicAutomata.MakeString("abc"));
Automaton expected = BasicOperations.Union(
Join(S2a("xyz"), SEP_A, HOLE_A, SEP_A, S2a("def")),
BasicAutomata.MakeString("abc"));
Assert.IsTrue(BasicOperations.SameLanguage(expected, actual));
}

[Test]
public virtual void TestOverlappedTokensLattice2()
{
TokenStream ts = new CannedTokenStream(new Token[] { Token("abc", 1, 1), Token("xyz", 0, 3), Token("def", 1, 1), Token("ghi", 1, 1) });
TokenStream ts = new CannedTokenStream(
new Token[]
{
Token("abc", 1, 1),
Token("xyz", 0, 3),
Token("def", 1, 1),
Token("ghi", 1, 1)
});
Automaton actual = (new TokenStreamToAutomaton()).ToAutomaton(ts);
Automaton a1 = BasicAutomata.MakeString("xyz");
Automaton a2 = Join("abc", "def", "ghi");
Expand All @@ -606,14 +672,18 @@ public virtual void TestToDot()
{
TokenStream ts = new CannedTokenStream(new Token[] { Token("abc", 1, 1, 0, 4) });
StringWriter w = new StringWriter();
(new TokenStreamToDot("abcd", ts, (TextWriter)(w))).ToDot();
new TokenStreamToDot("abcd", ts, (TextWriter)(w)).ToDot();
Assert.IsTrue(w.ToString().IndexOf("abc / abcd", StringComparison.Ordinal) != -1);
}

[Test]
public virtual void TestStartsWithHole()
{
TokenStream ts = new CannedTokenStream(new Token[] { Token("abc", 2, 1) });
TokenStream ts = new CannedTokenStream(
new Token[]
{
Token("abc", 2, 1)
});
Automaton actual = (new TokenStreamToAutomaton()).ToAutomaton(ts);
Automaton expected = Join(HOLE_A, SEP_A, S2a("abc"));
//toDot(actual);
Expand All @@ -625,10 +695,15 @@ public virtual void TestStartsWithHole()
[Test]
public virtual void TestSynHangingOverEnd()
{
TokenStream ts = new CannedTokenStream(new Token[] { Token("a", 1, 1), Token("X", 0, 10) });
TokenStream ts = new CannedTokenStream(
new Token[]
{
Token("a", 1, 1),
Token("X", 0, 10)
});
Automaton actual = (new TokenStreamToAutomaton()).ToAutomaton(ts);
Automaton expected = BasicOperations.Union(BasicAutomata.MakeString("a"), BasicAutomata.MakeString("X"));
Assert.IsTrue(BasicOperations.SameLanguage(expected, actual));
}
}
}
}
22 changes: 20 additions & 2 deletions src/Lucene.Net.Tests/Analysis/TestLookaheadTokenFilter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,25 @@ public virtual void TestMissedFirstToken()
return new TokenStreamComponents(source, filter);
});

AssertAnalyzesTo(analyzer, "Only he who is running knows .", new string[] { "Only", "Only-huh?", "he", "he-huh?", "who", "who-huh?", "is", "is-huh?", "running", "running-huh?", "knows", "knows-huh?", ".", ".-huh?" });
AssertAnalyzesTo(analyzer,
"Only he who is running knows .",
new string[]
{
"Only",
"Only-huh?",
"he",
"he-huh?",
"who",
"who-huh?",
"is",
"is-huh?",
"running",
"running-huh?",
"knows",
"knows-huh?",
".",
".-huh?"
});
}
}
}
}
Loading

0 comments on commit c4ab72c

Please sign in to comment.