Skip to content

Commit

Permalink
apacheGH-37861: [C#] Fix StringArray.GetString returning null instead…
Browse files Browse the repository at this point in the history
… of empty (apache#37862)

### Rationale for this change

Fixes apache#37861.

### What changes are included in this PR?

Add `BinaryArray.GetBytes(int, out bool)` overload that enables
`StringArray.GetString` to reliably determine if the value is null or
empty without needing an additional call to `IsNull`.

### Are these changes tested?

Added a test for `StringArray.GetString` (which didn't appear to have
any existing tests). Two of the cases fail without this change. I also
updated the tests that call `BinaryArray.GetBytes` to use the new
overload instead of a separate call to `IsNull` as extra validation.

### Are there any user-facing changes?

New public overload to an existing API.
* Closes: apache#37861
  • Loading branch information
spanglerco authored and loicalleyne committed Nov 13, 2023
1 parent 826c1d2 commit c881049
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 8 deletions.
20 changes: 16 additions & 4 deletions csharp/src/Apache.Arrow/Arrays/BinaryArray.cs
Original file line number Diff line number Diff line change
Expand Up @@ -331,20 +331,33 @@ public int GetValueLength(int index)
/// <remarks>
/// Note that this method cannot reliably identify null values, which are indistinguishable from empty byte
/// collection values when seen in the context of this method's return type of <see cref="ReadOnlySpan{Byte}"/>.
/// Use the <see cref="Array.IsNull"/> method instead to reliably determine null values.
/// Use the <see cref="Array.IsNull"/> method or the <see cref="GetBytes(int, out bool)"/> overload instead
/// to reliably determine null values.
/// </remarks>
/// <param name="index">Index at which to get bytes.</param>
/// <returns>Returns a <see cref="ReadOnlySpan{Byte}"/> object.</returns>
/// <exception cref="ArgumentOutOfRangeException">If the index is negative or beyond the length of the array.
/// </exception>
public ReadOnlySpan<byte> GetBytes(int index)
public ReadOnlySpan<byte> GetBytes(int index) => GetBytes(index, out _);

/// <summary>
/// Get the collection of bytes, as a read-only span, at a given index in the array.
/// </summary>
/// <param name="index">Index at which to get bytes.</param>
/// <param name="isNull">Set to <see langword="true"/> if the value at the given index is null.</param>
/// <returns>Returns a <see cref="ReadOnlySpan{Byte}"/> object.</returns>
/// <exception cref="ArgumentOutOfRangeException">If the index is negative or beyond the length of the array.
/// </exception>
public ReadOnlySpan<byte> GetBytes(int index, out bool isNull)
{
if (index < 0 || index >= Length)
{
throw new ArgumentOutOfRangeException(nameof(index));
}

if (IsNull(index))
isNull = IsNull(index);

if (isNull)
{
// Note that `return null;` is valid syntax, but would be misleading as `null` in the context of a span
// is actually returned as an empty span.
Expand All @@ -353,6 +366,5 @@ public ReadOnlySpan<byte> GetBytes(int index)

return ValueBuffer.Span.Slice(ValueOffsets[index], GetValueLength(index));
}

}
}
6 changes: 3 additions & 3 deletions csharp/src/Apache.Arrow/Arrays/StringArray.cs
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,11 @@ public StringArray(int length,

public string GetString(int index, Encoding encoding = default)
{
encoding = encoding ?? DefaultEncoding;
encoding ??= DefaultEncoding;

ReadOnlySpan<byte> bytes = GetBytes(index);
ReadOnlySpan<byte> bytes = GetBytes(index, out bool isNull);

if (bytes == default)
if (isNull)
{
return null;
}
Expand Down
3 changes: 2 additions & 1 deletion csharp/test/Apache.Arrow.Tests/BinaryArrayBuilderTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -481,7 +481,8 @@ private static void AssertArrayContents(IEnumerable<byte[]> expectedContents, Bi
for (int i = 0; i < array.Length; i++)
{
var expectedArray = expectedContentsArr[i];
var actualArray = array.IsNull(i) ? null : array.GetBytes(i).ToArray();
var actualSpan = array.GetBytes(i, out bool isNull);
var actualArray = isNull ? null : actualSpan.ToArray();
Assert.Equal(expectedArray, actualArray);
}
}
Expand Down
54 changes: 54 additions & 0 deletions csharp/test/Apache.Arrow.Tests/StringArrayTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
// Licensed to the Apache Software Foundation (ASF) under one or more
// contributor license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright ownership.
// The ASF licenses this file to You under the Apache License, Version 2.0
// (the "License"); you may not use this file except in compliance with
// the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using Xunit;

namespace Apache.Arrow.Tests
{
public class StringArrayTests
{
public class GetString
{
[Theory]
[InlineData(null, null)]
[InlineData(null, "")]
[InlineData(null, "value")]
[InlineData("", null)]
[InlineData("", "")]
[InlineData("", "value")]
[InlineData("value", null)]
[InlineData("value", "")]
[InlineData("value", "value")]
public void ReturnsAppendedValue(string firstValue, string secondValue)
{
// Arrange
// Create an array with two elements. The second element being null,
// empty, or non-empty may influence the underlying BinaryArray
// storage such that retrieving an empty first element could result
// in an empty span or a 0-length span backed by storage.
var array = new StringArray.Builder()
.Append(firstValue)
.Append(secondValue)
.Build();

// Act
var retrievedValue = array.GetString(0);

// Assert
Assert.Equal(firstValue, retrievedValue);
}
}
}
}

0 comments on commit c881049

Please sign in to comment.