Skip to content

Commit

Permalink
Added detection for base64 or base64Url (#38)
Browse files Browse the repository at this point in the history
* Added API

* Splitted Base64Tests

* Added tests

* Removed Invalid

DetectEncoding doesn't make any guarantee about the validity of the given encoded data.

* Implementation

* Added fast for single scan

fast = true: O(n), doesn't result in Unknown if base64 and base64Url are mixed
fast = false: O(2n), can result in Unknows if base64 and base64Url are mixed

* Demo added for DetectEncoding

* Renamed DetectEncoding.cs to DetectEncoding_T.cs

* Tests for the public entry
  • Loading branch information
gfoidl authored Dec 2, 2018
1 parent 8c4fd92 commit 4d16d8b
Show file tree
Hide file tree
Showing 7 changed files with 491 additions and 66 deletions.
29 changes: 28 additions & 1 deletion demo/gfoidl.Base64.Demo/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ class Program
{
static void Main()
{
Action[] demos = { RunGuidEncoding, RunGuidDecoding, RunBufferChainEncode };
Action[] demos = { RunGuidEncoding, RunGuidDecoding, RunBufferChainEncode, RunDetectEncoding };

foreach (Action demo in demos)
{
Expand Down Expand Up @@ -95,5 +95,32 @@ private static void RunBufferChainEncode()
decoded = decoded.Slice(0, written + written1);
Debug.Assert(data.SequenceEqual(decoded));
}
//---------------------------------------------------------------------
private static void RunDetectEncoding()
{
// Let's assume we don't know whether this string is base64 or base64Url
string encodedString = "a-_9";

Span<byte> data = stackalloc byte[Base64.Default.GetMaxDecodedLength(encodedString.Length)];

EncodingType encodingType = Base64.DetectEncoding(encodedString);

int written = 0;
switch (encodingType)
{
case EncodingType.Base64:
Base64.Default.Decode(encodedString.AsSpan(), data, out int _, out written);
break;
case EncodingType.Base64Url:
Base64.Url.Decode(encodedString.AsSpan(), data, out int _, out written);
break;
case EncodingType.Unknown:
throw new InvalidOperationException("should not be here");
}

data = data.Slice(0, written);

Debug.Assert(data.Length == 3);
}
}
}
96 changes: 96 additions & 0 deletions source/gfoidl.Base64/Base64.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using System;
using System.Buffers;
using System.Runtime.InteropServices;
using gfoidl.Base64.Internal;

namespace gfoidl.Base64
Expand Down Expand Up @@ -226,5 +227,100 @@ public abstract OperationStatus Decode(
/// <param name="encoded">The base64 encoded data in string-form.</param>
/// <returns>The base64 decoded data.</returns>
public abstract byte[] Decode(ReadOnlySpan<char> encoded);
//---------------------------------------------------------------------
/// <summary>
/// Detects whether <paramref name="encoded" /> is base64 or base64Url.
/// </summary>
/// <param name="encoded">The base64 encoded data.</param>
/// <param name="fast">
/// When <c>false</c> (default) <paramref name="encoded" /> is scanned
/// one time for base64 chars and a second time for base64Url chars.
/// So if there is a mix of them, <see cref="EncodingType.Unknown" />
/// will be returned.
/// <para>
/// When <c>true</c> <paramref name="encoded" /> is scanned only once
/// and for base64Url chars. So if there is a mix of base64 and base64Url,
/// the result will be <see cref="EncodingType.Base64Url" />, and may
/// throw a <see cref="FormatException" /> on decoding.
/// </para>
/// </param>
/// <returns>base64 or base64Url</returns>
/// <remarks>
/// It is an O(n) scan / detection of the encoding type, and input is
/// not validated for conforming the base64 standard. Thus there is no
/// 'Invalid' encoding type.
/// </remarks>
public static EncodingType DetectEncoding(ReadOnlySpan<byte> encoded, bool fast = false)
=> DetectEncoding<byte>(encoded, fast);
//---------------------------------------------------------------------
/// <summary>
/// Detects whether <paramref name="encoded" /> is base64 or base64Url.
/// </summary>
/// <param name="encoded">The base64 encoded data.</param>
/// <param name="fast">
/// When <c>false</c> (default) <paramref name="encoded" /> is scanned
/// one time for base64 chars and a second time for base64Url chars.
/// So if there is a mix of them, <see cref="EncodingType.Unknown" />
/// will be returned.
/// <para>
/// When <c>true</c> <paramref name="encoded" /> is scanned only once
/// and for base64Url chars. So if there is a mix of base64 and base64Url,
/// the result will be <see cref="EncodingType.Base64Url" />, and may
/// throw a <see cref="FormatException" /> on decoding.
/// </para>
/// </param>
/// <returns>base64 or base64Url</returns>
/// <remarks>
/// It is an O(n) fast scan / detection of the encoding type, and input is
/// not validated for conforming the base64 standard. Thus there is no
/// 'Invalid' encoding type.
/// </remarks>
public static EncodingType DetectEncoding(ReadOnlySpan<char> encoded, bool fast = false)
=> DetectEncoding<char>(encoded, fast);
//---------------------------------------------------------------------
// Also used for tests
internal static EncodingType DetectEncoding<T>(ReadOnlySpan<T> encoded, bool fast = false)
where T : IEquatable<T>
{
if (encoded.Length < 4) return EncodingType.Unknown;

T plus, slash, minus, underscore;

if (typeof(T) == typeof(byte))
{
plus = (T)(object)(byte)'+';
slash = (T)(object)(byte)'/';
minus = (T)(object)(byte)'-';
underscore = (T)(object)(byte)'_';
}
else if (typeof(T) == typeof(char))
{
plus = (T)(object)'+';
slash = (T)(object)'/';
minus = (T)(object)'-';
underscore = (T)(object)'_';
}
else
{
throw new NotSupportedException(); // just in case new types are introduced in the future
}

int indexBase64Url = encoded.LastIndexOfAny(minus, underscore);

if (fast)
{
return indexBase64Url >= 0 ? EncodingType.Base64Url : EncodingType.Base64;
}
else
{
int indexBase64 = encoded.LastIndexOfAny(plus, slash);

return indexBase64Url >= 0
? indexBase64 >= 0
? EncodingType.Unknown
: EncodingType.Base64Url
: EncodingType.Base64;
}
}
}
}
9 changes: 9 additions & 0 deletions source/gfoidl.Base64/EncodingType.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
namespace gfoidl.Base64
{
public enum EncodingType
{
Base64,
Base64Url,
Unknown
}
}
75 changes: 75 additions & 0 deletions tests/gfoidl.Base64.Tests/Base64Tests/Default.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
using System;
using System.Buffers;
using System.Runtime.InteropServices;
using NUnit.Framework;

namespace gfoidl.Base64.Tests.Base64Tests
{
[TestFixture(typeof(byte))]
[TestFixture(typeof(char))]
public class Default<T> where T : unmanaged
{
[Test]
public void Default___base64_is_used()
{
byte[] data = { 0xFF, 0xFE, 0x00 };
string expected = Convert.ToBase64String(data);

string actual = Base64.Default.Encode(data);

Assert.AreEqual(expected, actual);
}
//---------------------------------------------------------------------
[Test]
public void Default_with_buffers___base64_is_used()
{
byte[] data = { 0x00 };
const int encodedLength = 4;
Span<T> base64 = stackalloc T[encodedLength];
OperationStatus status;
int consumed, written;

if (typeof(T) == typeof(byte))
{
status = Base64.Default.Encode(data, MemoryMarshal.AsBytes(base64), out consumed, out written);
}
else if (typeof(T) == typeof(char))
{
status = Base64.Default.Encode(data, MemoryMarshal.Cast<T, char>(base64), out consumed, out written);
}
else
{
throw new NotSupportedException(); // just in case new types are introduced in the future
}

Assert.Multiple(() =>
{
Assert.AreEqual(OperationStatus.Done, status);
Assert.AreEqual(1, consumed);
Assert.AreEqual(4, written);
});

Span<byte> decoded = stackalloc byte[10];

if (typeof(T) == typeof(byte))
{
status = Base64.Default.Decode(MemoryMarshal.AsBytes(base64), decoded, out consumed, out written);
}
else if (typeof(T) == typeof(char))
{
status = Base64.Default.Decode(MemoryMarshal.Cast<T, char>(base64), decoded, out consumed, out written);
}
else
{
throw new NotSupportedException(); // just in case new types are introduced in the future
}

Assert.Multiple(() =>
{
Assert.AreEqual(OperationStatus.Done, status);
Assert.AreEqual(4, consumed);
Assert.AreEqual(1, written);
});
}
}
}
29 changes: 29 additions & 0 deletions tests/gfoidl.Base64.Tests/Base64Tests/DetectEncoding.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
using System;
using NUnit.Framework;

namespace gfoidl.Base64.Tests.Base64Tests
{
[TestFixture]
public class DetectEncoding
{
[Test]
public void Base64_given_byte___OK()
{
byte[] base64 = { (byte)'a', (byte)'+', (byte)'b', (byte)'/' };

EncodingType actual = Base64.DetectEncoding(base64);

Assert.AreEqual(EncodingType.Base64, actual);
}
//---------------------------------------------------------------------
[Test]
public void Base64_given_char___OK()
{
string base64 = "a+b/";

EncodingType actual = Base64.DetectEncoding(base64.AsSpan());

Assert.AreEqual(EncodingType.Base64, actual);
}
}
}
Loading

0 comments on commit 4d16d8b

Please sign in to comment.