Skip to content

Commit

Permalink
Feature/stringspan (#110)
Browse files Browse the repository at this point in the history
* Use system type.`TryParse` on `ReadOnlySpan<char>` types, retire UniversalTypeConverter library
  • Loading branch information
jas88 authored Jun 28, 2024
1 parent 41c1857 commit ee19862
Show file tree
Hide file tree
Showing 14 changed files with 168 additions and 127 deletions.
16 changes: 16 additions & 0 deletions .github/workflows/codeql.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,19 @@ jobs:
uses: github/codeql-action/analyze@v3
with:
category: "/language:${{ matrix.language }}"
upload: false
output: sarif-results

- name: filter-sarif
uses: advanced-security/filter-sarif@v1
with:
patterns: |
+**/*
-**/*.g.cs
input: sarif-results/csharp.sarif
output: sarif-results/csharp.sarif

- name: Upload SARIF
uses: github/codeql-action/upload-sarif@v3
with:
sarif_file: sarif-results/csharp.sarif
4 changes: 1 addition & 3 deletions PACKAGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,5 @@

| Package | Source Code | License | Purpose | Additional Risk Assessment |
| ------- | ------------| ------- | ------- | -------------------------- |
| Microsoft.SourceLink.GitHub | [GitHub](https://github.com/dotnet/sourcelink) | [MIT](https://opensource.org/licenses/MIT) | Enable source linkage from nupkg | Official MS project |
| UniversalTypeConverter | [GitHub](https://github.com/t-bruning/UniversalTypeConverter) | [MS-PL](https://github.com/t-bruning/UniversalTypeConverter/blob/master/LICENSE.md) | Parsing of individual strings into specific target types | |


None - TypeGuesser is now a self-contained pure .Net library!
4 changes: 2 additions & 2 deletions Tests/GuesserTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ public sealed class GuesserTests
[TestCase("5.000.000", typeof(string), "en-us", 9, 0, 0, "5.000.000")] //germans swap commas and dots so this is an illegal number
[TestCase("5,000,000", typeof(string), "de-de", 9, 0, 0, "5,000,000")] //germans swap commas and dots so this is an illegal number
[TestCase("5,000", typeof(int), "de-de", 5, 1,0,5)] //germans swap commas and dots

public void Test_OneString_IsType(string guessFor, Type expectedGuess, string culture,int expectedStringLength, int expectedBefore,int expectedAfter,object expectedParseValue)
public void Test_OneString_IsType(string guessFor, Type expectedGuess, string culture, int expectedStringLength,
int expectedBefore, int expectedAfter, object expectedParseValue)
{
var cultureInfo = new CultureInfo(culture);
var guesser = new Guesser {Culture = cultureInfo};
Expand Down
15 changes: 1 addition & 14 deletions Tests/PerformanceTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
using System.Diagnostics;
using System.Globalization;
using NUnit.Framework;
using TB.ComponentModel;
using TypeGuesser;
using TypeGuesser.Deciders;

Expand All @@ -26,8 +25,7 @@ public void Performance_Decimals()

var decider = new DecimalTypeDecider(new CultureInfo("en-GB"));

// ReSharper disable once NullableWarningSuppressionIsUsed - this is just for benchmarking
var req = new DatabaseTypeRequest(null!);
var req = new DatabaseTypeRequest(typeof(bool)) { Unicode = true };

var sw = new Stopwatch();

Expand Down Expand Up @@ -56,17 +54,6 @@ public void Performance_Decimals()
Console.WriteLine($"Guesser.AdjustToCompensateForValue:{sw.ElapsedMilliseconds} ms");


sw.Restart();

foreach (var s in inputs)
{
s.To<decimal>(culture);
}

sw.Stop();

Console.WriteLine($"To<decimal>:{sw.ElapsedMilliseconds} ms");


sw.Restart();

Expand Down
68 changes: 56 additions & 12 deletions TypeGuesser/Deciders/BoolTypeDecider.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
using System.Globalization;
using System.Text.RegularExpressions;
using System;
using System.Globalization;

namespace TypeGuesser.Deciders;

Expand All @@ -10,26 +10,70 @@ namespace TypeGuesser.Deciders;
/// Creates a new instance with the given <paramref name="culture"/>
/// </remarks>
/// <param name="culture"></param>
public sealed partial class BoolTypeDecider(CultureInfo culture) : DecideTypesForStrings<bool>(culture,TypeCompatibilityGroup.Numerical,typeof(bool))
public sealed class BoolTypeDecider(CultureInfo culture):DecideTypesForStrings<bool>(culture,TypeCompatibilityGroup.Numerical,typeof(bool))
{
private static readonly Regex SingleCharacter = SingleCharacterRegex();

/// <inheritdoc/>
protected override IDecideTypesForStrings CloneImpl(CultureInfo newCulture)
{
return new BoolTypeDecider(newCulture);
return new BoolTypeDecider(newCulture);
}

/// <inheritdoc />
protected override object? ParseImpl(ReadOnlySpan<char> candidateString)
{
if (bool.TryParse(candidateString, out var sysResult)) return sysResult;

candidateString = StripWhitespace(candidateString);

return candidateString.Length switch
{
1 => "1tTyYjJ0fFnN".IndexOf(candidateString[0]) != -1 ? "0fFnN".IndexOf(candidateString[0]) == -1 : null,
2 => candidateString.Equals("ja",StringComparison.OrdinalIgnoreCase) ? true :
(
candidateString.Equals("no",StringComparison.OrdinalIgnoreCase) ||
candidateString.Equals("-1",StringComparison.OrdinalIgnoreCase)
) ? false : null,
3 => candidateString.Equals("yes",StringComparison.OrdinalIgnoreCase) ||
candidateString.Equals(".t.",StringComparison.OrdinalIgnoreCase) ? true :
candidateString.Equals(".f.",StringComparison.OrdinalIgnoreCase) ? false : null,
4 => candidateString.Equals("true",StringComparison.OrdinalIgnoreCase) ? true :
candidateString.Equals("nein",StringComparison.OrdinalIgnoreCase) ? false : null,
5 => candidateString.Equals("false",StringComparison.OrdinalIgnoreCase) ? false : null,
_ => null
};
}

private static ReadOnlySpan<char> StripWhitespace(ReadOnlySpan<char> candidateString)
{
while (candidateString.Length > 0 && char.IsWhiteSpace(candidateString[0]))
candidateString = candidateString[1..];
while (candidateString.Length > 0 && char.IsWhiteSpace(candidateString[^1]))
candidateString = candidateString[..^1];
return candidateString;
}

/// <inheritdoc/>
protected override bool IsAcceptableAsTypeImpl(string candidateString, IDataTypeSize? size)
protected override bool IsAcceptableAsTypeImpl(ReadOnlySpan<char> candidateString,IDataTypeSize? size)
{
var strippedString = StripWhitespace(candidateString);

// "Y" / "N" is boolean unless the settings say it can't
if (!Settings.CharCanBeBoolean && SingleCharacter.IsMatch(candidateString))
if (!Settings.CharCanBeBoolean && strippedString.Length == 1 && char.IsAsciiLetter(strippedString[0]))
return false;

return base.IsAcceptableAsTypeImpl(candidateString, size);
return bool.TryParse(candidateString, out _) || candidateString.Length switch
{
1 => "1tTyYjJ0fFnN".IndexOf(candidateString[0]) != -1,
2 => candidateString.Equals("ja",StringComparison.OrdinalIgnoreCase) ||
candidateString.Equals("no",StringComparison.OrdinalIgnoreCase) ||
candidateString.Equals("-1",StringComparison.OrdinalIgnoreCase),
3 => candidateString.Equals("yes",StringComparison.OrdinalIgnoreCase) ||
candidateString.Equals(".t.",StringComparison.OrdinalIgnoreCase) ||
candidateString.Equals(".f.",StringComparison.OrdinalIgnoreCase),
4 => candidateString.Equals("true",StringComparison.OrdinalIgnoreCase) ||
candidateString.Equals("nein",StringComparison.OrdinalIgnoreCase),
5 => candidateString.Equals("false",StringComparison.OrdinalIgnoreCase),
_ => false
};
}

[GeneratedRegex(@"^\s*[A-Za-z]\s*$",RegexOptions.CultureInvariant)]
private static partial Regex SingleCharacterRegex();
}
38 changes: 16 additions & 22 deletions TypeGuesser/Deciders/DateTimeTypeDecider.cs
Original file line number Diff line number Diff line change
Expand Up @@ -157,15 +157,15 @@ protected override IDecideTypesForStrings CloneImpl(CultureInfo overrideCulture)
}

/// <inheritdoc/>
protected override object ParseImpl(string value)
protected override object ParseImpl(ReadOnlySpan<char> value)
{
// if user has specified a specific format that we are to use, use it
if (Settings.ExplicitDateFormats != null)
return DateTime.ParseExact(value, Settings.ExplicitDateFormats, _culture, DateTimeStyles.None);

// otherwise parse a value using any of the valid culture formats
if (!TryBruteParse(value, out var dt))
throw new FormatException(string.Format(SR.DateTimeTypeDecider_ParseImpl_Could_not_parse___0___to_a_valid_DateTime, value));
throw new FormatException(string.Format(SR.DateTimeTypeDecider_ParseImpl_Could_not_parse___0___to_a_valid_DateTime, value.ToString()));

return dt;
}
Expand Down Expand Up @@ -199,13 +199,13 @@ public void GuessDateFormat(IEnumerable<string> samples)
}

/// <inheritdoc />
public override bool IsAcceptableAsType(string candidateString, IDataTypeSize? size)
public override bool IsAcceptableAsType(ReadOnlySpan<char> candidateString, IDataTypeSize? size)
{
return IsExplicitDate(candidateString) || base.IsAcceptableAsType(candidateString, size);
}

/// <inheritdoc/>
protected override bool IsAcceptableAsTypeImpl(string candidateString, IDataTypeSize? sizeRecord)
protected override bool IsAcceptableAsTypeImpl(ReadOnlySpan<char> candidateString, IDataTypeSize? sizeRecord)
{
//if it's a float then it isn't a date is it! thanks C# for thinking 1.1 is the first of January
if (_decimalChecker.IsAcceptableAsType(candidateString, sizeRecord))
Expand All @@ -226,52 +226,46 @@ protected override bool IsAcceptableAsTypeImpl(string candidateString, IDataType
}
}

private readonly char[] _space = [' '];

private bool TryBruteParse(string? s, out DateTime dt)
private bool TryBruteParse(ReadOnlySpan<char> s, out DateTime dt)
{
//if it's legit according to the current culture
if (DateTime.TryParse(s, Culture, DateTimeStyles.None, out dt))
return true;

var split = s?.Split(_space, StringSplitOptions.RemoveEmptyEntries);

//if there are no tokens
if (split == null || split.Length == 0)
if (s.IsEmpty)
{
dt = DateTime.MinValue;
dt=DateTime.MinValue;
return false;
}

var sPoint = s.IndexOf(' ');

//if there is one token it is assumed either to be a date or a string
if (split.Length == 1)
if (TryGetTime(split[0], out dt))
return true;
else if (TryGetDate(split[0], out dt))
return true;
else
return false;
if (sPoint == -1)
{
return TryGetTime(s, out dt) || TryGetDate(s, out dt);
}

//if there are 2+ tokens then first token should be a date then the rest (concatenated) should be a time
//e.g. "28/2/1993 5:36:27 AM" gets evaluated as "28/2/1993" and then "5:36:27 AM"

if (TryGetDate(split[0], out dt) && TryGetTime(string.Join(" ", split.Skip(1)), out var time))
if (TryGetDate(s[..sPoint], out dt) && TryGetTime(s[(sPoint+1)..], out var time))
{
dt = new DateTime(dt.Year, dt.Month, dt.Day, time.Hour, time.Minute, time.Second, time.Millisecond);

return true;
}

dt = DateTime.MinValue;
return false;
}

private bool TryGetDate(string v, out DateTime date)
private bool TryGetDate(ReadOnlySpan<char> v, out DateTime date)
{
return DateTime.TryParseExact(v, _dateFormatToUse, Culture, DateTimeStyles.AllowInnerWhite, out date);
}

private bool TryGetTime(string v, out DateTime time)
private bool TryGetTime(ReadOnlySpan<char> v, out DateTime time)
{
return DateTime.TryParseExact(v, TimeFormats, Culture, DateTimeStyles.AllowInnerWhite, out time);
}
Expand Down
36 changes: 15 additions & 21 deletions TypeGuesser/Deciders/DecideTypesForStrings.cs
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
using System;
using System.Collections.Generic;
using System.Globalization;
using TB.ComponentModel;

namespace TypeGuesser.Deciders;

/// <summary>
/// Guesses whether strings are <see cref="DateTime"/> and handles parsing approved strings according to the <see cref="DecideTypesForStrings{T}.Culture"/>
/// </summary>
/// <typeparam name="T"></typeparam>
public abstract class DecideTypesForStrings<T> :IDecideTypesForStrings
public abstract class DecideTypesForStrings<T>:IDecideTypesForStrings
{
private CultureInfo _culture;

Expand All @@ -35,53 +34,54 @@ public virtual CultureInfo Culture
/// <param name="culture"></param>
/// <param name="compatibilityGroup">How your Type interacts with other Guessers, e.g. can you fallback from one to another</param>
/// <param name="typesSupported">All the Types your guesser supports e.g. multiple sizes of int (int32, int16 etc). These should not overlap with other guessers in the app domain</param>
protected DecideTypesForStrings(CultureInfo culture, TypeCompatibilityGroup compatibilityGroup,params Type[] typesSupported)
protected DecideTypesForStrings(CultureInfo culture,TypeCompatibilityGroup compatibilityGroup,params Type[] typesSupported)
{
_culture = culture;

Settings = GuessSettingsFactory.Create();

CompatibilityGroup = compatibilityGroup;

if(typesSupported.Length == 0)
if (typesSupported.Length == 0)
throw new ArgumentException(SR.DecideTypesForStrings_DecideTypesForStrings_DecideTypesForStrings_abstract_base_was_not_passed_any_typesSupported_by_implementing_derived_class);

TypesSupported = [..typesSupported];
TypesSupported = [.. typesSupported];
}

/// <inheritdoc/>
public virtual bool IsAcceptableAsType(string candidateString,IDataTypeSize? size)
public virtual bool IsAcceptableAsType(ReadOnlySpan<char> candidateString,IDataTypeSize? size)
{
//we must preserve leading zeroes if it's not actually 0 -- if they have 010101 then we have to use string but if they have just 0 we can use decimal
return !IDecideTypesForStrings.ZeroPrefixedNumber.IsMatch(candidateString) && IsAcceptableAsTypeImpl(candidateString, size);
return !IDecideTypesForStrings.ZeroPrefixedNumber.IsMatch(candidateString) && IsAcceptableAsTypeImpl(candidateString,size);
}

/// <summary>
/// Returns true if <see cref="Settings"/> contains an <see cref="GuessSettings.ExplicitDateFormats"/> and one of them matches the <paramref name="candidateString"/>
/// </summary>
/// <param name="candidateString"></param>
/// <returns></returns>
protected bool IsExplicitDate(string candidateString)
protected bool IsExplicitDate(ReadOnlySpan<char> candidateString)
{
//if user has an explicit type format in mind and the candidate string is not null (which should hopefully be handled sensibly elsewhere)
if(Settings.ExplicitDateFormats != null && !string.IsNullOrWhiteSpace(candidateString))
if (Settings.ExplicitDateFormats != null && !candidateString.IsEmpty && !candidateString.IsWhiteSpace())
return DateTime.TryParseExact(candidateString,Settings.ExplicitDateFormats,Culture,DateTimeStyles.None,out _);

return false;
}

/// <inheritdoc/>
public object? Parse(string value)
public object? Parse(ReadOnlySpan<char> value)
{
if (string.IsNullOrWhiteSpace(value))
if (value.IsEmpty || value.IsWhiteSpace())
return null;

try
{
return ParseImpl(value);
}catch(Exception ex)
}
catch (Exception ex)
{
throw new FormatException(string.Format(SR.DecideTypesForStrings_Parse_Could_not_parse_string_value___0___with_Decider_Type__1_, value, GetType().Name),ex);
throw new FormatException(string.Format(SR.DecideTypesForStrings_Parse_Could_not_parse_string_value___0___with_Decider_Type__1_,value.ToString(),GetType().Name),ex);
}
}

Expand All @@ -105,19 +105,13 @@ public IDecideTypesForStrings Clone()
/// </summary>
/// <param name="value"></param>
/// <returns></returns>
protected virtual object? ParseImpl(string value)
{
return value.To<T>(Culture);
}
protected abstract object? ParseImpl(ReadOnlySpan<char> value);

/// <summary>
/// Returns true if the given <paramref name="candidateString"/> is compatible with the T Type of this decider. This is the preferred method of overriding IsAcceptable.
/// </summary>
/// <param name="candidateString"></param>
/// <param name="size"></param>
/// <returns></returns>
protected virtual bool IsAcceptableAsTypeImpl(string candidateString,IDataTypeSize? size)
{
return candidateString.IsConvertibleTo<T>(Culture);
}
protected abstract bool IsAcceptableAsTypeImpl(ReadOnlySpan<char> candidateString,IDataTypeSize? size);
}
Loading

0 comments on commit ee19862

Please sign in to comment.