Skip to content

Commit

Permalink
Rename IOUtils.CHARSET_UTF_8 to ENCODING_UTF_8_NO_BOM
Browse files Browse the repository at this point in the history
  • Loading branch information
paulirwin committed Dec 31, 2024
1 parent 722bc57 commit 0021555
Show file tree
Hide file tree
Showing 16 changed files with 41 additions and 41 deletions.
2 changes: 1 addition & 1 deletion src/Lucene.Net.Benchmark/Quality/Trec/QueryDriver.cs
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ public static void Main(string[] args)

FileInfo topicsFile = new FileInfo(args[0]);
FileInfo qrelsFile = new FileInfo(args[1]);
SubmissionReport submitLog = new SubmissionReport(new StreamWriter(new FileStream(args[2], FileMode.Create, FileAccess.Write), IOUtils.CHARSET_UTF_8 /* huh, no nio.Charset ctor? */), "lucene");
SubmissionReport submitLog = new SubmissionReport(new StreamWriter(new FileStream(args[2], FileMode.Create, FileAccess.Write), IOUtils.ENCODING_UTF_8_NO_BOM /* huh, no nio.Charset ctor? */), "lucene");
using Store.FSDirectory dir = Store.FSDirectory.Open(new DirectoryInfo(args[3]));
using IndexReader reader = DirectoryReader.Open(dir);
string fieldSpec = args.Length == 5 ? args[4] : "T"; // default to Title-only if not specified.
Expand Down
4 changes: 2 additions & 2 deletions src/Lucene.Net.TestFramework/Util/TestUtil.cs
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ public static CheckIndex.Status CheckIndex(Directory dir, bool crossCheckTermVec
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
CheckIndex checker = new CheckIndex(dir);
checker.CrossCheckTermVectors = crossCheckTermVectors;
checker.InfoStream = new StreamWriter(bos, IOUtils.CHARSET_UTF_8);
checker.InfoStream = new StreamWriter(bos, IOUtils.ENCODING_UTF_8_NO_BOM);
CheckIndex.Status indexStatus = checker.DoCheckIndex(null);
if (indexStatus is null || indexStatus.Clean == false)
{
Expand Down Expand Up @@ -203,7 +203,7 @@ public static void CheckReader(AtomicReader reader, bool crossCheckTermVectors)
{
// LUCENENET: dispose the StreamWriter and ByteArrayOutputStream when done
using ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
using StreamWriter infoStream = new StreamWriter(bos, IOUtils.CHARSET_UTF_8, leaveOpen: true, bufferSize: 1024);
using StreamWriter infoStream = new StreamWriter(bos, IOUtils.ENCODING_UTF_8_NO_BOM, leaveOpen: true, bufferSize: 1024);

reader.CheckIntegrity();
CheckIndex.Status.FieldNormStatus fieldNormStatus = Index.CheckIndex.TestFieldNorms(reader, infoStream);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ public void TestDocMakerLeak()
// DocMaker did not close its ContentSource if resetInputs was called twice,
// leading to a file handle leak.
FileInfo f = new FileInfo(Path.Combine(getWorkDir().FullName, "docMakerLeak.txt"));
TextWriter ps = new StreamWriter(new FileStream(f.FullName, FileMode.Create, FileAccess.Write), IOUtils.CHARSET_UTF_8);
TextWriter ps = new StreamWriter(new FileStream(f.FullName, FileMode.Create, FileAccess.Write), IOUtils.ENCODING_UTF_8_NO_BOM);
ps.WriteLine("one title\t" + (J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) + "\tsome content"); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
ps.Dispose();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ public virtual void TestBasic()
string result;
using (ByteArrayOutputStream bos = new ByteArrayOutputStream())
{
using (StreamWriter w = new StreamWriter(bos, IOUtils.CHARSET_UTF_8, 2048, true) { AutoFlush = true })
using (StreamWriter w = new StreamWriter(bos, IOUtils.ENCODING_UTF_8_NO_BOM, 2048, true) { AutoFlush = true })
{
PrintTaxonomyStats.PrintStats(taxoReader, w, true);
}
Expand Down
2 changes: 1 addition & 1 deletion src/Lucene.Net.Tests/Index/TestCheckIndex.cs
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ public virtual void TestDeletedDocs()

ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
CheckIndex checker = new CheckIndex(dir);
checker.InfoStream = new StreamWriter(bos, IOUtils.CHARSET_UTF_8);
checker.InfoStream = new StreamWriter(bos, IOUtils.ENCODING_UTF_8_NO_BOM);
if (Verbose)
{
checker.InfoStream = Console.Out;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ public virtual void TestInfoStreamGetsFieldName()
IndexWriter writer;
IndexWriterConfig c = new IndexWriterConfig(TEST_VERSION_CURRENT, new ThrowingAnalyzer());
ByteArrayOutputStream infoBytes = new ByteArrayOutputStream();
StreamWriter infoPrintStream = new StreamWriter(infoBytes, IOUtils.CHARSET_UTF_8);
StreamWriter infoPrintStream = new StreamWriter(infoBytes, IOUtils.ENCODING_UTF_8_NO_BOM);
TextWriterInfoStream printStreamInfoStream = new TextWriterInfoStream(infoPrintStream);
c.SetInfoStream(printStreamInfoStream);
writer = new IndexWriter(dir, c);
Expand Down Expand Up @@ -120,7 +120,7 @@ public virtual void TestNoExtraNoise()
IndexWriter writer;
IndexWriterConfig c = new IndexWriterConfig(TEST_VERSION_CURRENT, new ThrowingAnalyzer());
ByteArrayOutputStream infoBytes = new ByteArrayOutputStream();
StreamWriter infoPrintStream = new StreamWriter(infoBytes, IOUtils.CHARSET_UTF_8);
StreamWriter infoPrintStream = new StreamWriter(infoBytes, IOUtils.ENCODING_UTF_8_NO_BOM);
TextWriterInfoStream printStreamInfoStream = new TextWriterInfoStream(infoPrintStream);
c.SetInfoStream(printStreamInfoStream);
writer = new IndexWriter(dir, c);
Expand Down
4 changes: 2 additions & 2 deletions src/Lucene.Net.Tests/Index/TestIndexWriterDelete.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1400,7 +1400,7 @@ public virtual void TestDeletesCheckIndexOutput()
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
//MemoryStream bos = new MemoryStream(1024);
CheckIndex checker = new CheckIndex(dir);
checker.InfoStream = new StreamWriter(bos, IOUtils.CHARSET_UTF_8);
checker.InfoStream = new StreamWriter(bos, IOUtils.ENCODING_UTF_8_NO_BOM);
CheckIndex.Status indexStatus = checker.DoCheckIndex(null);
Assert.IsTrue(indexStatus.Clean);
checker.FlushInfoStream();
Expand All @@ -1413,7 +1413,7 @@ public virtual void TestDeletesCheckIndexOutput()
w.Dispose();

bos = new ByteArrayOutputStream(1024);
checker.InfoStream = new StreamWriter(bos, IOUtils.CHARSET_UTF_8);
checker.InfoStream = new StreamWriter(bos, IOUtils.ENCODING_UTF_8_NO_BOM);
indexStatus = checker.DoCheckIndex(null);
Assert.IsTrue(indexStatus.Clean);
checker.FlushInfoStream();
Expand Down
6 changes: 3 additions & 3 deletions src/Lucene.Net.Tests/Index/TestPayloads.cs
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ public virtual void TestPayloadFieldBit()
// enabled in only some documents
d.Add(NewTextField("f3", "this field has payloads in some docs", Field.Store.NO));
// only add payload data for field f2
analyzer.SetPayloadData("f2", "somedata".GetBytes(IOUtils.CHARSET_UTF_8), 0, 1);
analyzer.SetPayloadData("f2", "somedata".GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM), 0, 1);
writer.AddDocument(d);
// flush
writer.Dispose();
Expand All @@ -106,8 +106,8 @@ public virtual void TestPayloadFieldBit()
d.Add(NewTextField("f2", "this field has payloads in all docs", Field.Store.NO));
d.Add(NewTextField("f3", "this field has payloads in some docs", Field.Store.NO));
// add payload data for field f2 and f3
analyzer.SetPayloadData("f2", "somedata".GetBytes(IOUtils.CHARSET_UTF_8), 0, 1);
analyzer.SetPayloadData("f3", "somedata".GetBytes(IOUtils.CHARSET_UTF_8), 0, 3);
analyzer.SetPayloadData("f2", "somedata".GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM), 0, 1);
analyzer.SetPayloadData("f3", "somedata".GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM), 0, 3);
writer.AddDocument(d);

// force merge
Expand Down
22 changes: 11 additions & 11 deletions src/Lucene.Net.Tests/Search/Spans/TestBasics.cs
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ public override bool IncrementToken()
if (m_input.IncrementToken())
{
#pragma warning disable 612, 618
payloadAttr.Payload = new BytesRef(("pos: " + pos).GetBytes(IOUtils.CHARSET_UTF_8));
payloadAttr.Payload = new BytesRef(("pos: " + pos).GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM));
#pragma warning restore 612, 618
pos++;
return true;
Expand Down Expand Up @@ -533,7 +533,7 @@ public virtual void TestSpanPayloadCheck()
{
SpanTermQuery term1 = new SpanTermQuery(new Term("field", "five"));
#pragma warning disable 612, 618
BytesRef pay = new BytesRef(("pos: " + 5).GetBytes(IOUtils.CHARSET_UTF_8));
BytesRef pay = new BytesRef(("pos: " + 5).GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM));
#pragma warning restore 612, 618
SpanQuery query = new SpanPayloadCheckQuery(term1, new JCG.List<byte[]>() { pay.Bytes });
CheckHits(query, new int[] { 1125, 1135, 1145, 1155, 1165, 1175, 1185, 1195, 1225, 1235, 1245, 1255, 1265, 1275, 1285, 1295, 1325, 1335, 1345, 1355, 1365, 1375, 1385, 1395, 1425, 1435, 1445, 1455, 1465, 1475, 1485, 1495, 1525, 1535, 1545, 1555, 1565, 1575, 1585, 1595, 1625, 1635, 1645, 1655, 1665, 1675, 1685, 1695, 1725, 1735, 1745, 1755, 1765, 1775, 1785, 1795, 1825, 1835, 1845, 1855, 1865, 1875, 1885, 1895, 1925, 1935, 1945, 1955, 1965, 1975, 1985, 1995 });
Expand All @@ -549,8 +549,8 @@ public virtual void TestSpanPayloadCheck()
clauses[1] = term2;
snq = new SpanNearQuery(clauses, 0, true);
#pragma warning disable 612, 618
pay = new BytesRef(("pos: " + 0).GetBytes(IOUtils.CHARSET_UTF_8));
pay2 = new BytesRef(("pos: " + 1).GetBytes(IOUtils.CHARSET_UTF_8));
pay = new BytesRef(("pos: " + 0).GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM));
pay2 = new BytesRef(("pos: " + 1).GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM));
#pragma warning restore 612, 618
list = new JCG.List<byte[]>();
list.Add(pay.Bytes);
Expand All @@ -563,9 +563,9 @@ public virtual void TestSpanPayloadCheck()
clauses[2] = new SpanTermQuery(new Term("field", "five"));
snq = new SpanNearQuery(clauses, 0, true);
#pragma warning disable 612, 618
pay = new BytesRef(("pos: " + 0).GetBytes(IOUtils.CHARSET_UTF_8));
pay2 = new BytesRef(("pos: " + 1).GetBytes(IOUtils.CHARSET_UTF_8));
BytesRef pay3 = new BytesRef(("pos: " + 2).GetBytes(IOUtils.CHARSET_UTF_8));
pay = new BytesRef(("pos: " + 0).GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM));
pay2 = new BytesRef(("pos: " + 1).GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM));
BytesRef pay3 = new BytesRef(("pos: " + 2).GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM));
#pragma warning restore 612, 618
list = new JCG.List<byte[]>();
list.Add(pay.Bytes);
Expand Down Expand Up @@ -597,10 +597,10 @@ public virtual void TestComplexSpanChecks()

var payloads = new JCG.List<byte[]>();
#pragma warning disable 612, 618
BytesRef pay = new BytesRef(("pos: " + 0).GetBytes(IOUtils.CHARSET_UTF_8));
BytesRef pay2 = new BytesRef(("pos: " + 1).GetBytes(IOUtils.CHARSET_UTF_8));
BytesRef pay3 = new BytesRef(("pos: " + 3).GetBytes(IOUtils.CHARSET_UTF_8));
BytesRef pay4 = new BytesRef(("pos: " + 4).GetBytes(IOUtils.CHARSET_UTF_8));
BytesRef pay = new BytesRef(("pos: " + 0).GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM));
BytesRef pay2 = new BytesRef(("pos: " + 1).GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM));
BytesRef pay3 = new BytesRef(("pos: " + 3).GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM));
BytesRef pay4 = new BytesRef(("pos: " + 4).GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM));
#pragma warning restore 612, 618
payloads.Add(pay.Bytes);
payloads.Add(pay2.Bytes);
Expand Down
4 changes: 2 additions & 2 deletions src/Lucene.Net.Tests/Util/TestOfflineSorter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ public virtual void TestEmpty_AsStream()
public virtual void TestSingleLine()
{
#pragma warning disable 612, 618
CheckSort(new OfflineSorter(), new byte[][] { "Single line only.".GetBytes(IOUtils.CHARSET_UTF_8) });
CheckSort(new OfflineSorter(), new byte[][] { "Single line only.".GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM) });
#pragma warning restore 612, 618
}

Expand All @@ -95,7 +95,7 @@ public virtual void TestSingleLine()
public virtual void TestSingleLine_AsStream()
{
#pragma warning disable 612, 618
CheckSortAsStream(new OfflineSorter(), new byte[][] { "Single line only.".GetBytes(IOUtils.CHARSET_UTF_8) });
CheckSortAsStream(new OfflineSorter(), new byte[][] { "Single line only.".GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM) });
#pragma warning restore 612, 618
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ private static void ReadField(DataInput @in, StoredFieldVisitor visitor, FieldIn
data = new byte[length];
@in.ReadBytes(data, 0, length);
#pragma warning disable 612, 618
visitor.StringField(info, IOUtils.CHARSET_UTF_8.GetString(data));
visitor.StringField(info, IOUtils.ENCODING_UTF_8_NO_BOM.GetString(data));
#pragma warning restore 612, 618
break;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,7 @@ private void ReadField(StoredFieldVisitor visitor, FieldInfo info, int bits)
}
else
{
visitor.StringField(info, IOUtils.CHARSET_UTF_8.GetString(bytes));
visitor.StringField(info, IOUtils.ENCODING_UTF_8_NO_BOM.GetString(bytes));
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ private void ReadField(StoredFieldVisitor visitor, FieldInfo info, int bits)
else
{
#pragma warning disable 612, 618
visitor.StringField(info, IOUtils.CHARSET_UTF_8.GetString(bytes));
visitor.StringField(info, IOUtils.ENCODING_UTF_8_NO_BOM.GetString(bytes));
#pragma warning restore 612, 618
}
}
Expand Down
6 changes: 3 additions & 3 deletions src/Lucene.Net/Support/StandardCharsets.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ namespace Lucene.Net.Support;

internal static class StandardCharsets
{
/// <inheritdoc cref="IOUtils.CHARSET_UTF_8"/>
/// <inheritdoc cref="IOUtils.ENCODING_UTF_8_NO_BOM"/>
/// <remarks>
/// This is a convenience reference to <see cref="IOUtils.CHARSET_UTF_8"/>.
/// This is a convenience reference to <see cref="IOUtils.ENCODING_UTF_8_NO_BOM"/>.
/// </remarks>
public static readonly Encoding UTF_8 = IOUtils.CHARSET_UTF_8;
public static readonly Encoding UTF_8 = IOUtils.ENCODING_UTF_8_NO_BOM;
}
6 changes: 3 additions & 3 deletions src/Lucene.Net/Util/IOUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -51,15 +51,15 @@ public static class IOUtils // LUCENENET specific - made static
/// For reading data, either this or <see cref="Encoding.UTF8"/> can be used,
/// as both will correctly interpret data with or without a BOM.
/// </summary>
public static readonly Encoding CHARSET_UTF_8 = new UTF8Encoding(
public static readonly Encoding ENCODING_UTF_8_NO_BOM = new UTF8Encoding(
encoderShouldEmitUTF8Identifier: false,
throwOnInvalidBytes: true);

/// <summary>
/// UTF-8 charset string.
/// <para/>Where possible, use <see cref="CHARSET_UTF_8"/> instead,
/// <para/>Where possible, use <see cref="ENCODING_UTF_8_NO_BOM"/> instead,
/// as using the <see cref="string"/> constant may slow things down. </summary>
/// <seealso cref="CHARSET_UTF_8"/>
/// <seealso cref="ENCODING_UTF_8_NO_BOM"/>
public static readonly string UTF_8 = "UTF-8";

/// <summary>
Expand Down
12 changes: 6 additions & 6 deletions src/Lucene.Net/Util/OfflineSorter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -600,15 +600,15 @@ public class ByteSequencesWriter : IDisposable
/// Constructs a <see cref="ByteSequencesWriter"/> to the provided <see cref="FileStream"/>. </summary>
/// <exception cref="ArgumentNullException"><paramref name="stream"/> is <c>null</c>.</exception>
public ByteSequencesWriter(FileStream stream)
: this(new BinaryWriter(stream, IOUtils.CHARSET_UTF_8, leaveOpen: false))
: this(new BinaryWriter(stream, IOUtils.ENCODING_UTF_8_NO_BOM, leaveOpen: false))
{
}

/// <summary>
/// Constructs a <see cref="ByteSequencesWriter"/> to the provided <see cref="FileStream"/>. </summary>
/// <exception cref="ArgumentNullException"><paramref name="stream"/> is <c>null</c>.</exception>
public ByteSequencesWriter(FileStream stream, bool leaveOpen)
: this(new BinaryWriter(stream, IOUtils.CHARSET_UTF_8, leaveOpen))
: this(new BinaryWriter(stream, IOUtils.ENCODING_UTF_8_NO_BOM, leaveOpen))
{
}

Expand All @@ -632,7 +632,7 @@ public ByteSequencesWriter(FileInfo file)
/// <summary>
/// Constructs a <see cref="ByteSequencesWriter"/> to the provided <see cref="BinaryWriter"/>.
/// <b>NOTE:</b> To match Lucene, pass the <paramref name="writer"/>'s constructor the
/// <see cref="IOUtils.CHARSET_UTF_8"/>, which is UTF-8 without a byte order mark.
/// <see cref="IOUtils.ENCODING_UTF_8_NO_BOM"/>, which is UTF-8 without a byte order mark.
/// </summary>
/// <exception cref="ArgumentNullException"><paramref name="writer"/> is <c>null</c>.</exception>
public ByteSequencesWriter(BinaryWriter writer)
Expand Down Expand Up @@ -722,15 +722,15 @@ public class ByteSequencesReader : IDisposable
/// Constructs a <see cref="ByteSequencesReader"/> from the provided <see cref="FileStream"/>. </summary>
/// <exception cref="ArgumentNullException"><paramref name="stream"/> is <c>null</c>.</exception>
public ByteSequencesReader(FileStream stream)
: this(new BinaryReader(stream, IOUtils.CHARSET_UTF_8, leaveOpen: false))
: this(new BinaryReader(stream, IOUtils.ENCODING_UTF_8_NO_BOM, leaveOpen: false))
{
}

/// <summary>
/// Constructs a <see cref="ByteSequencesReader"/> from the provided <see cref="FileStream"/>. </summary>
/// <exception cref="ArgumentNullException"><paramref name="stream"/> is <c>null</c>.</exception>
public ByteSequencesReader(FileStream stream, bool leaveOpen)
: this(new BinaryReader(stream, IOUtils.CHARSET_UTF_8, leaveOpen))
: this(new BinaryReader(stream, IOUtils.ENCODING_UTF_8_NO_BOM, leaveOpen))
{
}

Expand All @@ -756,7 +756,7 @@ public ByteSequencesReader(FileInfo file)
/// Constructs a <see cref="ByteSequencesReader"/> from the provided <see cref="BinaryReader"/>.
/// <para/>
/// <b>NOTE:</b> To match Lucene, pass the <paramref name="reader"/>'s constructor the
/// <see cref="IOUtils.CHARSET_UTF_8"/>, which is UTF-8 without a byte order mark.
/// <see cref="IOUtils.ENCODING_UTF_8_NO_BOM"/>, which is UTF-8 without a byte order mark.
/// </summary>
/// <exception cref="ArgumentNullException"><paramref name="reader"/> is <c>null</c>.</exception>
public ByteSequencesReader(BinaryReader reader)
Expand Down

0 comments on commit 0021555

Please sign in to comment.