diff --git a/src/Lucene.Net.Codecs/SimpleText/SimpleTextUtil.cs b/src/Lucene.Net.Codecs/SimpleText/SimpleTextUtil.cs
index 695e817552..c3b8f41e6b 100644
--- a/src/Lucene.Net.Codecs/SimpleText/SimpleTextUtil.cs
+++ b/src/Lucene.Net.Codecs/SimpleText/SimpleTextUtil.cs
@@ -79,7 +79,7 @@ public static void ReadLine(DataInput input, BytesRef scratch)
{
break;
}
-
+
scratch.Bytes[upto++] = b;
}
}
@@ -106,8 +106,9 @@ public static void CheckFooter(ChecksumIndexInput input)
if (StringHelper.StartsWith(scratch, CHECKSUM) == false)
{
+ // LUCENENET specific - use Utf8ToStringWithFallback() to handle invalid UTF-8 bytes
throw new CorruptIndexException("SimpleText failure: expected checksum line but got " +
- scratch.Utf8ToString() + " (resource=" + input + ")");
+ scratch.Utf8ToStringWithFallback() + " (resource=" + input + ")");
}
var actualChecksum =
(new BytesRef(scratch.Bytes, CHECKSUM.Length, scratch.Length - CHECKSUM.Length)).Utf8ToString();
@@ -124,4 +125,4 @@ public static void CheckFooter(ChecksumIndexInput input)
}
}
}
-}
\ No newline at end of file
+}
diff --git a/src/Lucene.Net.Facet/SortedSet/DefaultSortedSetDocValuesReaderState.cs b/src/Lucene.Net.Facet/SortedSet/DefaultSortedSetDocValuesReaderState.cs
index 91d4c355ab..3ca21e13f5 100644
--- a/src/Lucene.Net.Facet/SortedSet/DefaultSortedSetDocValuesReaderState.cs
+++ b/src/Lucene.Net.Facet/SortedSet/DefaultSortedSetDocValuesReaderState.cs
@@ -41,7 +41,7 @@ public class DefaultSortedSetDocValuesReaderState : SortedSetDocValuesReaderStat
///
/// Creates this, pulling doc values from the specified
- /// field.
+ /// field.
///
public DefaultSortedSetDocValuesReaderState(IndexReader reader, string field = FacetsConfig.DEFAULT_INDEX_FIELD_NAME)
{
@@ -79,7 +79,8 @@ public DefaultSortedSetDocValuesReaderState(IndexReader reader, string field = F
string[] components = FacetsConfig.StringToPath(spare.Utf8ToString());
if (components.Length != 2)
{
- throw new ArgumentException("this class can only handle 2 level hierarchy (dim/value); got: " + Arrays.ToString(components) + " " + spare.Utf8ToString());
+ // LUCENENET specific - use Utf8ToStringWithFallback() to handle invalid UTF-8 bytes
+ throw new ArgumentException("this class can only handle 2 level hierarchy (dim/value); got: " + Arrays.ToString(components) + " " + spare.Utf8ToStringWithFallback());
}
if (!components[0].Equals(lastDim, StringComparison.Ordinal))
{
@@ -101,7 +102,7 @@ public DefaultSortedSetDocValuesReaderState(IndexReader reader, string field = F
///
/// Return top-level doc values.
///
- public override SortedSetDocValues GetDocValues()
+ public override SortedSetDocValues GetDocValues()
{
return topReader.GetSortedSetDocValues(field);
}
@@ -132,4 +133,4 @@ public override OrdRange GetOrdRange(string dim)
///
public override int Count => valueCount;
}
-}
\ No newline at end of file
+}
diff --git a/src/Lucene.Net.Grouping/AbstractGroupFacetCollector.cs b/src/Lucene.Net.Grouping/AbstractGroupFacetCollector.cs
index 8abb12520e..89599b49a6 100644
--- a/src/Lucene.Net.Grouping/AbstractGroupFacetCollector.cs
+++ b/src/Lucene.Net.Grouping/AbstractGroupFacetCollector.cs
@@ -275,7 +275,7 @@ public override int GetHashCode()
public override string ToString()
{
return "FacetEntry{" +
- "value=" + value.Utf8ToString() +
+ "value=" + value.Utf8ToStringWithFallback() + // LUCENENET specific - use Utf8ToStringWithFallback() to handle invalid UTF-8 bytes
", count=" + count +
'}';
}
diff --git a/src/Lucene.Net.Join/TermsIncludingScoreQuery.cs b/src/Lucene.Net.Join/TermsIncludingScoreQuery.cs
index 9d91aa64c1..9a5c9c1476 100644
--- a/src/Lucene.Net.Join/TermsIncludingScoreQuery.cs
+++ b/src/Lucene.Net.Join/TermsIncludingScoreQuery.cs
@@ -134,7 +134,7 @@ public WeightAnonymousClass(TermsIncludingScoreQuery outerInstance, Weight origi
private TermsEnum segmentTermsEnum;
-
+
public override Explanation Explain(AtomicReaderContext context, int doc)
{
SVInnerScorer scorer = (SVInnerScorer) GetBulkScorer(context, false, null);
@@ -161,7 +161,7 @@ public override void Normalize(float norm, float topLevelBoost)
{
originalWeight.Normalize(norm, topLevelBoost*outerInstance.Boost);
}
-
+
public override Scorer GetScorer(AtomicReaderContext context, IBits acceptDocs)
{
Terms terms = context.AtomicReader.GetTerms(outerInstance._field);
@@ -181,7 +181,7 @@ public override Scorer GetScorer(AtomicReaderContext context, IBits acceptDocs)
return new SVInOrderScorer(outerInstance, this, acceptDocs, segmentTermsEnum, context.AtomicReader.MaxDoc, cost);
}
-
+
public override BulkScorer GetBulkScorer(AtomicReaderContext context, bool scoreDocsInOrder, IBits acceptDocs)
{
if (scoreDocsInOrder)
@@ -236,7 +236,7 @@ internal SVInnerScorer(TermsIncludingScoreQuery outerInstance, /* Weight weight,
//_cost = cost; // LUCENENET: Never read
_doc = -1;
}
-
+
public override bool Score(ICollector collector, int max)
{
FakeScorer fakeScorer = new FakeScorer();
@@ -285,12 +285,12 @@ private int NextDocOutOfOrder()
}
}
}
-
+
protected virtual int DocsEnumNextDoc()
{
return docsEnum.NextDoc();
}
-
+
internal Explanation Explain(int target) // LUCENENET NOTE: changed accessibility from private to internal
{
int docId;
@@ -314,7 +314,7 @@ internal Explanation Explain(int target) // LUCENENET NOTE: changed accessibilit
} while (docId != DocIdSetIterator.NO_MORE_DOCS);
return new ComplexExplanation(true, outerInstance._scores[outerInstance._ords[_scoreUpto]],
- "Score based on join value " + _termsEnum.Term.Utf8ToString());
+ "Score based on join value " + _termsEnum.Term.Utf8ToStringWithFallback()); // LUCENENET specific - use Utf8ToStringWithFallback() to handle invalid UTF-8 bytes
}
}
@@ -326,13 +326,13 @@ internal class MVInnerScorer : SVInnerScorer
internal readonly FixedBitSet alreadyEmittedDocs;
internal MVInnerScorer(TermsIncludingScoreQuery outerInstance, /* Weight weight, // LUCENENET: Never read */
- IBits acceptDocs, TermsEnum termsEnum, int maxDoc /*, long cost // LUCENENET: Never read */)
+ IBits acceptDocs, TermsEnum termsEnum, int maxDoc /*, long cost // LUCENENET: Never read */)
: base(outerInstance, /*weight, // LUCENENET: Never read */
acceptDocs, termsEnum /*, cost // LUCENENET: Never read */)
{
alreadyEmittedDocs = new FixedBitSet(maxDoc);
}
-
+
protected override int DocsEnumNextDoc()
{
while (true)
@@ -360,11 +360,11 @@ internal class SVInOrderScorer : Scorer
internal readonly long cost;
internal int currentDoc = -1;
-
+
[SuppressMessage("CodeQuality", "IDE0079:Remove unnecessary suppression", Justification = "This is a SonarCloud issue")]
[SuppressMessage("CodeQuality", "S1699:Constructors should only call non-overridable methods", Justification = "Internal class")]
internal SVInOrderScorer(TermsIncludingScoreQuery outerInstance, Weight weight, IBits acceptDocs,
- TermsEnum termsEnum, int maxDoc, long cost)
+ TermsEnum termsEnum, int maxDoc, long cost)
: base(weight)
{
this.m_outerInstance = outerInstance;
@@ -374,7 +374,7 @@ internal SVInOrderScorer(TermsIncludingScoreQuery outerInstance, Weight weight,
matchingDocsIterator = matchingDocs.GetIterator();
this.cost = cost;
}
-
+
protected virtual void FillDocsAndScores(FixedBitSet matchingDocs, IBits acceptDocs,
TermsEnum termsEnum)
{
@@ -398,12 +398,12 @@ protected virtual void FillDocsAndScores(FixedBitSet matchingDocs, IBits acceptD
}
}
}
-
+
public override float GetScore()
{
return scores[currentDoc];
}
-
+
public override int Freq => 1;
public override int DocID => currentDoc;
@@ -412,7 +412,7 @@ public override int NextDoc()
{
return currentDoc = matchingDocsIterator.NextDoc();
}
-
+
public override int Advance(int target)
{
return currentDoc = matchingDocsIterator.Advance(target);
@@ -432,7 +432,7 @@ internal MVInOrderScorer(TermsIncludingScoreQuery outerInstance, Weight weight,
: base(outerInstance, weight, acceptDocs, termsEnum, maxDoc, cost)
{
}
-
+
protected override void FillDocsAndScores(FixedBitSet matchingDocs, IBits acceptDocs,
TermsEnum termsEnum)
{
@@ -465,4 +465,4 @@ protected override void FillDocsAndScores(FixedBitSet matchingDocs, IBits accept
}
}
}
-}
\ No newline at end of file
+}
diff --git a/src/Lucene.Net.Misc/Misc/TermStats.cs b/src/Lucene.Net.Misc/Misc/TermStats.cs
index 7ac5fbc918..c2d1664870 100644
--- a/src/Lucene.Net.Misc/Misc/TermStats.cs
+++ b/src/Lucene.Net.Misc/Misc/TermStats.cs
@@ -45,7 +45,8 @@ internal string GetTermText()
public override string ToString()
{
- return ("TermStats: Term=" + TermText.Utf8ToString() + " DocFreq=" + DocFreq + " TotalTermFreq=" + TotalTermFreq);
+ // LUCENENET specific - use Utf8ToStringWithFallback() to handle invalid UTF-8 bytes
+ return "TermStats: Term=" + TermText.Utf8ToStringWithFallback() + " DocFreq=" + DocFreq + " TotalTermFreq=" + TotalTermFreq;
}
}
}
diff --git a/src/Lucene.Net.Queries/TermsFilter.cs b/src/Lucene.Net.Queries/TermsFilter.cs
index 3aae8295f3..ced20eed59 100644
--- a/src/Lucene.Net.Queries/TermsFilter.cs
+++ b/src/Lucene.Net.Queries/TermsFilter.cs
@@ -319,7 +319,7 @@ public override string ToString()
}
first = false;
builder.Append(current.field).Append(':');
- builder.Append(spare.Utf8ToString());
+ builder.Append(spare.Utf8ToStringWithFallback()); // LUCENENET specific - use Utf8ToStringWithFallback() to handle invalid UTF-8 bytes
}
}
diff --git a/src/Lucene.Net.Suggest/Suggest/Fst/FSTCompletion.cs b/src/Lucene.Net.Suggest/Suggest/Fst/FSTCompletion.cs
index 06587b33ca..ae71302ae0 100644
--- a/src/Lucene.Net.Suggest/Suggest/Fst/FSTCompletion.cs
+++ b/src/Lucene.Net.Suggest/Suggest/Fst/FSTCompletion.cs
@@ -59,7 +59,8 @@ internal Completion(BytesRef key, int bucket)
public override string ToString()
{
- return Utf8.Utf8ToString() + "/" + Bucket.ToString("0.0", CultureInfo.InvariantCulture);
+ // LUCENENET specific - use Utf8ToStringWithFallback() to handle invalid UTF-8 bytes
+ return Utf8.Utf8ToStringWithFallback() + "/" + Bucket.ToString("0.0", CultureInfo.InvariantCulture);
}
///
diff --git a/src/Lucene.Net/Codecs/BlockTreeTermsWriter.cs b/src/Lucene.Net/Codecs/BlockTreeTermsWriter.cs
index cdace9c1cc..e771023d59 100644
--- a/src/Lucene.Net/Codecs/BlockTreeTermsWriter.cs
+++ b/src/Lucene.Net/Codecs/BlockTreeTermsWriter.cs
@@ -440,7 +440,8 @@ public PendingTerm(BytesRef term, BlockTermState state)
public override string ToString()
{
- return Term.Utf8ToString();
+ // LUCENENET specific - use Utf8ToStringWithFallback() to handle invalid UTF-8 bytes
+ return Term.Utf8ToStringWithFallback();
}
}
@@ -468,7 +469,8 @@ public PendingBlock(BytesRef prefix, long fp, bool hasTerms, bool isFloor, int f
public override string ToString()
{
- return $"BLOCK: {Prefix.Utf8ToString()}";
+ // LUCENENET specific - use Utf8ToStringWithFallback() to handle invalid UTF-8 bytes
+ return $"BLOCK: {Prefix.Utf8ToStringWithFallback()}";
}
#nullable enable
diff --git a/src/Lucene.Net/Codecs/Lucene3x/Lucene3xFields.cs b/src/Lucene.Net/Codecs/Lucene3x/Lucene3xFields.cs
index 26a5b54a7a..fc941fab63 100644
--- a/src/Lucene.Net/Codecs/Lucene3x/Lucene3xFields.cs
+++ b/src/Lucene.Net/Codecs/Lucene3x/Lucene3xFields.cs
@@ -48,7 +48,7 @@ namespace Lucene.Net.Codecs.Lucene3x
///
/// Exposes flex API on a pre-flex index, as a codec.
///
- /// @lucene.experimental
+ /// @lucene.experimental
///
[Obsolete("(4.0)")]
internal class Lucene3xFields : FieldsProducer
@@ -344,7 +344,8 @@ private bool SeekToNonBMP(SegmentTermEnum te, BytesRef term, int pos)
if (DEBUG_SURROGATES)
{
- Console.WriteLine(" try seek term=" + UnicodeUtil.ToHexString(term.Utf8ToString()));
+ // LUCENENET specific - use Utf8ToStringWithFallback() to handle invalid UTF-8 bytes
+ Console.WriteLine(" try seek term=" + UnicodeUtil.ToHexString(term.Utf8ToStringWithFallback()));
}
// Seek "back":
@@ -488,7 +489,8 @@ private bool DoPop()
if (DEBUG_SURROGATES)
{
- Console.WriteLine(" seek to term=" + UnicodeUtil.ToHexString(scratchTerm.Utf8ToString()) + " " + scratchTerm.ToString());
+ // LUCENENET specific - use Utf8ToStringWithFallback() to handle invalid UTF-8 bytes
+ Console.WriteLine(" seek to term=" + UnicodeUtil.ToHexString(scratchTerm.Utf8ToStringWithFallback()) + " " + scratchTerm.ToString());
}
// TODO: more efficient seek? can we simply swap
@@ -599,10 +601,11 @@ private void SurrogateDance()
if (DEBUG_SURROGATES)
{
+ // LUCENENET specific - use Utf8ToStringWithFallback() to handle invalid UTF-8 bytes
Console.WriteLine(" dance");
- Console.WriteLine(" prev=" + UnicodeUtil.ToHexString(prevTerm.Utf8ToString()));
+ Console.WriteLine(" prev=" + UnicodeUtil.ToHexString(prevTerm.Utf8ToStringWithFallback()));
Console.WriteLine(" " + prevTerm.ToString());
- Console.WriteLine(" term=" + UnicodeUtil.ToHexString(scratchTerm.Utf8ToString()));
+ Console.WriteLine(" term=" + UnicodeUtil.ToHexString(scratchTerm.Utf8ToStringWithFallback()));
Console.WriteLine(" " + scratchTerm.ToString());
}
@@ -679,7 +682,8 @@ private void DoPushes()
if (DEBUG_SURROGATES)
{
- Console.WriteLine(" try seek 1 pos=" + upTo + " term=" + UnicodeUtil.ToHexString(scratchTerm.Utf8ToString()) + " " + scratchTerm.ToString() + " len=" + scratchTerm.Length);
+ // LUCENENET specific - use Utf8ToStringWithFallback() to handle invalid UTF-8 bytes
+ Console.WriteLine(" try seek 1 pos=" + upTo + " term=" + UnicodeUtil.ToHexString(scratchTerm.Utf8ToStringWithFallback()) + " " + scratchTerm.ToString() + " len=" + scratchTerm.Length);
}
// Seek "forward":
@@ -777,7 +781,7 @@ internal virtual void Reset(FieldInfo fieldInfo)
{
//System.out.println("pff.reset te=" + termEnum);
this.fieldInfo = fieldInfo;
-
+
internedFieldName = fieldInfo.Name.Intern();
Term term = new Term(internedFieldName);
@@ -832,7 +836,8 @@ public override SeekStatus SeekCeil(BytesRef term)
{
if (DEBUG_SURROGATES)
{
- Console.WriteLine("TE.seek target=" + UnicodeUtil.ToHexString(term.Utf8ToString()));
+ // LUCENENET specific - use Utf8ToStringWithFallback() to handle invalid UTF-8 bytes
+ Console.WriteLine("TE.seek target=" + UnicodeUtil.ToHexString(term.Utf8ToStringWithFallback()));
}
skipNext = false;
TermInfosReader tis = outerInstance.TermsDict;
@@ -1232,4 +1237,4 @@ public override void CheckIntegrity()
{
}
}
-}
\ No newline at end of file
+}
diff --git a/src/Lucene.Net/Util/BytesRef.cs b/src/Lucene.Net/Util/BytesRef.cs
index a7c339f7c9..0124269216 100644
--- a/src/Lucene.Net/Util/BytesRef.cs
+++ b/src/Lucene.Net/Util/BytesRef.cs
@@ -248,9 +248,9 @@ public string Utf8ToString()
/// resulting .
///
///
- /// LUCENENET specific version that does not throw exceptions,
- /// primarily for use in ToString() and other methods that
- /// should not throw exceptions.
+ /// LUCENENET specific version that does not throw exceptions on invalid UTF-8,
+ /// primarily for use in ToString() and other cases that should not throw exceptions,
+ /// such as when building a message for another exception.
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public string Utf8ToStringWithFallback()
@@ -604,11 +604,11 @@ public override string ToString()
switch (format)
{
case BytesRefFormat.UTF8:
- try
+ if (bytesRef.TryUtf8ToString(out var utf8String))
{
- return bytesRef.Utf8ToString();
+ return utf8String;
}
- catch (Exception e) when (e.IsIndexOutOfBoundsException())
+ else
{
return bytesRef.ToString();
}