Skip to content

Commit

Permalink
Merge pull request #89 from BDisp/remove-gethexaformat-fix-86
Browse files Browse the repository at this point in the history
Fixes #86. "gethexaformat" messes up column widths.
  • Loading branch information
tig authored Dec 3, 2022
2 parents 216ace2 + dfb5d65 commit 093b2f5
Show file tree
Hide file tree
Showing 7 changed files with 157 additions and 127 deletions.
1 change: 1 addition & 0 deletions NStack.sln
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Files", "Solution
.github\workflows\build.yml = .github\workflows\build.yml
.github\workflows\publish.yml = .github\workflows\publish.yml
README.md = README.md
testenvironments.json = testenvironments.json
EndProjectSection
EndProject
Global
Expand Down
81 changes: 20 additions & 61 deletions NStack/unicode/Rune.ColumnWidth.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,9 @@
//
using NStack;

namespace System
{
public partial struct Rune
{
static uint[,] combining = new uint[,] {
namespace System {
public partial struct Rune {
static uint [,] combining = new uint [,] {
{ 0x0300, 0x036F }, { 0x0483, 0x0486 }, { 0x0488, 0x0489 },
{ 0x0591, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 },
{ 0x05C4, 0x05C5 }, { 0x05C7, 0x05C7 }, { 0x0600, 0x0603 },
Expand Down Expand Up @@ -48,16 +46,16 @@ public partial struct Rune
{ 0x1B36, 0x1B3A }, { 0x1B3C, 0x1B3C }, { 0x1B42, 0x1B42 },
{ 0x1B6B, 0x1B73 }, { 0x1DC0, 0x1DCA }, { 0x1DFE, 0x1DFF },
{ 0x200B, 0x200F }, { 0x202A, 0x202E }, { 0x2060, 0x2063 },
{ 0x206A, 0x206F }, { 0x20D0, 0x20EF }, { 0x2e9a, 0x2e9a },
{ 0x2ef4, 0x2eff }, { 0x2fd6, 0x2fef }, { 0x2ffc, 0x2fff },
{ 0x31e4, 0x31ef }, { 0x321f, 0x321f }, { 0xA48D, 0xA48F },
{ 0x206A, 0x206F }, { 0x20D0, 0x20EF }, { 0x2E9A, 0x2E9A },
{ 0x2EF4, 0x2EFF }, { 0x2FD6, 0x2FEF }, { 0x2FFC, 0x2FFF },
{ 0x31E4, 0x31EF }, { 0x321F, 0x321F }, { 0xA48D, 0xA48F },
{ 0xA806, 0xA806 }, { 0xA80B, 0xA80B }, { 0xA825, 0xA826 },
{ 0xFB1E, 0xFB1E }, { 0xFE00, 0xFE0F }, { 0xFE1A, 0xFE1F },
{ 0xFE20, 0xFE23 }, { 0xFE53, 0xFE53 }, { 0xFE67, 0xFE67 },
{ 0xFEFF, 0xFEFF }, { 0xFFF9, 0xFFFB },
};

static uint[,] combiningWideChars = new uint[,] {
static uint [,] combiningWideChars = new uint [,] {
/* Hangul Jamo init. consonants - 0x1100, 0x11ff */
/* Miscellaneous Technical - 0x2300, 0x23ff */
/* Hangul Syllables - 0x11a8, 0x11c2 */
Expand All @@ -84,22 +82,21 @@ public partial struct Rune
{ 0x3131, 0x318e }, { 0x3190, 0x3247 }, { 0x3250, 0x4dbf },
{ 0x4e00, 0xa4c6 }, { 0xa960, 0xa97c }, { 0xac00 ,0xd7a3 },
{ 0xf900, 0xfaff }, { 0xfe10, 0xfe1f }, { 0xfe30 ,0xfe6b },
{ 0xff01, 0xff60 }, { 0xffe0, 0xffe6 }
{ 0xff01, 0xff60 }, { 0xffe0, 0xffe6 }, { 0x10000, 0x10ffff }
};

static int bisearch(uint rune, uint[,] table, int max)
static int bisearch (uint rune, uint [,] table, int max)
{
int min = 0;
int mid;

if (rune < table[0, 0] || rune > table[max, 1])
if (rune < table [0, 0] || rune > table [max, 1])
return 0;
while (max >= min)
{
while (max >= min) {
mid = (min + max) / 2;
if (rune > table[mid, 1])
if (rune > table [mid, 1])
min = mid + 1;
else if (rune < table[mid, 0])
else if (rune < table [mid, 0])
max = mid - 1;
else
return 1;
Expand Down Expand Up @@ -127,82 +124,44 @@ static int bisearch(uint rune, uint[,] table, int max)
// return false;
//}

static uint gethexaformat(uint rune, int length)
{
var hex = rune.ToString($"x{length}");
var hexstr = hex.Substring(hex.Length - length, length);
return (uint)int.Parse(hexstr, System.Globalization.NumberStyles.HexNumber);
}

/// <summary>
/// Check if the rune is a non-spacing character.
/// </summary>
/// <param name="rune">The rune.</param>
/// <returns>True if is a non-spacing character, false otherwise.</returns>
public static bool IsNonSpacingChar(uint rune)
public static bool IsNonSpacingChar (uint rune)
{
return bisearch(rune, combining, combining.GetLength(0) - 1) != 0;
return bisearch (rune, combining, combining.GetLength (0) - 1) != 0;
}

/// <summary>
/// Check if the rune is a wide character.
/// </summary>
/// <param name="rune">The rune.</param>
/// <returns>True if is a wide character, false otherwise.</returns>
public static bool IsWideChar(uint rune)
public static bool IsWideChar (uint rune)
{
return bisearch(gethexaformat(rune, 4), combiningWideChars, combiningWideChars.GetLength(0) - 1) != 0;
return bisearch (rune, combiningWideChars, combiningWideChars.GetLength (0) - 1) != 0;
}

static char firstSurrogatePairChar = '\0';

/// <summary>
/// Number of column positions of a wide-character code. This is used to measure runes as displayed by text-based terminals.
/// </summary>
/// <returns>The width in columns, 0 if the argument is the null character, -1 if the value is not printable, otherwise the number of columns that the rune occupies.</returns>
/// <param name="rune">The rune.</param>
public static int ColumnWidth(Rune rune)
public static int ColumnWidth (Rune rune)
{
if (firstSurrogatePairChar != '\0')
firstSurrogatePairChar = '\0';
uint irune = (uint)rune;
if (irune < 0x20 || (irune >= 0x7f && irune < 0xa0))
return -1;
if (irune < 0x7f)
return 1;
/* binary search in table of non-spacing characters */
if (bisearch(gethexaformat(irune, 4), combining, combining.GetLength(0) - 1) != 0)
if (bisearch (irune, combining, combining.GetLength (0) - 1) != 0)
return 0;
/* if we arrive here, ucs is not a combining or C0/C1 control character */
return 1 +
(bisearch(gethexaformat(irune, 4), combiningWideChars, combiningWideChars.GetLength(0) - 1) != 0 ? 1 : 0);
}

/// <summary>
/// Number of column positions of a wide-character code. This is used to measure runes as displayed by text-based terminals.
/// </summary>
/// <returns>The width in columns, 0 if the argument is the null character, -1 if the value is not printable, otherwise the number of columns that the rune occupies.</returns>
/// <param name="c">The char.</param>
public static int ColumnWidth(char c)
{
if (!((Rune)c).IsValid)
{
if (firstSurrogatePairChar == '\0')
{
firstSurrogatePairChar = c;
return 0;
}
else if (firstSurrogatePairChar != '\0')
{
var r = new Rune(firstSurrogatePairChar, c);
firstSurrogatePairChar = '\0';
return ColumnWidth(r);
}
}
if (firstSurrogatePairChar != '\0')
firstSurrogatePairChar = '\0';

return ColumnWidth((Rune)c);
(bisearch (irune, combiningWideChars, combiningWideChars.GetLength (0) - 1) != 0 ? 1 : 0);
}
}
}
87 changes: 50 additions & 37 deletions NStack/unicode/Rune.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ namespace System {
/// <remarks>
///
/// </remarks>
[StructLayout(LayoutKind.Sequential)]
[StructLayout (LayoutKind.Sequential)]
public partial struct Rune {
// Stores the rune
uint value;
Expand Down Expand Up @@ -54,9 +54,8 @@ public partial struct Rune {
/// </remarks>
public Rune (uint rune)
{
if (rune > maxRune)
{
throw new ArgumentOutOfRangeException("Value is beyond the supplementary range!");
if (rune > maxRune) {
throw new ArgumentOutOfRangeException ("Value is beyond the supplementary range!");
}
this.value = rune;
}
Expand All @@ -77,43 +76,48 @@ public Rune (char ch)
/// <param name="lowSurrogate">The low surrogate code point.</param>
public Rune (uint highSurrogate, uint lowSurrogate)
{
if (EncodeSurrogatePair(highSurrogate, lowSurrogate, out Rune rune))
{
if (EncodeSurrogatePair (highSurrogate, lowSurrogate, out Rune rune)) {
this.value = rune;
}
else if (highSurrogate < highSurrogateMin || lowSurrogate > lowSurrogateMax)
{
throw new ArgumentOutOfRangeException($"Must be between {highSurrogateMin:x} and {lowSurrogateMax:x} inclusive!");
}
else
{
throw new ArgumentOutOfRangeException($"Resulted rune must be less or equal to {(uint)MaxRune:x}!");
} else if (highSurrogate < highSurrogateMin || lowSurrogate > lowSurrogateMax) {
throw new ArgumentOutOfRangeException ($"Must be between {highSurrogateMin:x} and {lowSurrogateMax:x} inclusive!");
} else {
throw new ArgumentOutOfRangeException ($"Resulted rune must be less or equal to {(uint)MaxRune:x}!");
}
}

/// <summary>
/// Gets a value indicating whether this <see cref="T:System.Rune"/> can be encoded as UTF-8
/// </summary>
/// <value><c>true</c> if is valid; otherwise, <c>false</c>.</value>
public bool IsValid => ValidRune(value);
public bool IsValid => ValidRune (value);

/// <summary>
/// Gets a value indicating whether this <see cref="T:System.Rune"/> is a surrogate code point.
/// </summary>
/// <returns><c>true</c>If is a surrogate code point, <c>false</c>otherwise.</returns>
public bool IsSurrogate => IsSurrogateRune(value);
public bool IsSurrogate => IsSurrogateRune (value);

/// <summary>
/// Gets a value indicating whether this <see cref="T:System.Rune"/> is a valid surrogate pair.
/// </summary>
/// <returns><c>true</c>If is a valid surrogate pair, <c>false</c>otherwise.</returns>
public bool IsSurrogatePair => DecodeSurrogatePair(value, out _);
public bool IsSurrogatePair => DecodeSurrogatePair (value, out _);

/// <summary>
/// Gets a value indicating whether this <see cref="T:System.Rune"/> is a high surrogate.
/// </summary>
public bool IsHighSurrogate => value >= highSurrogateMin && value <= highSurrogateMax;

/// <summary>
/// Gets a value indicating whether this <see cref="T:System.Rune"/> is a low surrogate.
/// </summary>
public bool IsLowSurrogate => value >= lowSurrogateMin && value <= lowSurrogateMax;

/// <summary>
/// Check if the rune is a non-spacing character.
/// </summary>
/// <returns>True if is a non-spacing character, false otherwise.</returns>
public bool IsNonSpacing => IsNonSpacingChar(value);
public bool IsNonSpacing => IsNonSpacingChar (value);

// Code points in the surrogate range are not valid for UTF-8.
const uint highSurrogateMin = 0xd800;
Expand Down Expand Up @@ -539,8 +543,7 @@ public static int InvalidIndex (byte [] buffer)
public static bool ValidRune (Rune rune)
{
if ((0 <= (int)rune.value && rune.value < highSurrogateMin) ||
(lowSurrogateMax < rune.value && rune.value <= MaxRune.value))
{
(lowSurrogateMax < rune.value && rune.value <= MaxRune.value)) {
return true;
}

Expand All @@ -552,7 +555,7 @@ public static bool ValidRune (Rune rune)
/// </summary>
/// <param name="rune">The rune.</param>
/// <returns><c>true</c>If is a surrogate code point, <c>false</c>otherwise.</returns>
public static bool IsSurrogateRune(uint rune)
public static bool IsSurrogateRune (uint rune)
{
return rune >= highSurrogateMin && rune <= lowSurrogateMax;
}
Expand All @@ -564,12 +567,11 @@ public static bool IsSurrogateRune(uint rune)
/// <param name="lowSurrogate">The low surrogate code point.</param>
/// <param name="rune">The returning rune.</param>
/// <returns><c>True</c>if the returning rune is greater than 0 <c>False</c>otherwise.</returns>
public static bool EncodeSurrogatePair(uint highsurrogate, uint lowSurrogate, out Rune rune)
public static bool EncodeSurrogatePair (uint highsurrogate, uint lowSurrogate, out Rune rune)
{
rune = 0;
if (highsurrogate >= highSurrogateMin && highsurrogate <= highSurrogateMax &&
lowSurrogate >= lowSurrogateMin && lowSurrogate <= lowSurrogateMax)
{
lowSurrogate >= lowSurrogateMin && lowSurrogate <= lowSurrogateMax) {
//return 0x10000 + ((highsurrogate - highSurrogateMin) * 0x0400) + (lowSurrogate - lowSurrogateMin);
return (rune = 0x10000 + ((highsurrogate - highSurrogateMin) << 10) + (lowSurrogate - lowSurrogateMin)) > 0;
}
Expand All @@ -582,14 +584,13 @@ public static bool EncodeSurrogatePair(uint highsurrogate, uint lowSurrogate, ou
/// <param name="rune">The rune</param>
/// <param name="chars">The chars if is valid. Empty otherwise.</param>
/// <returns><c>true</c>If is a valid surrogate pair, <c>false</c>otherwise.</returns>
public static bool DecodeSurrogatePair(uint rune, out char [] chars)
public static bool DecodeSurrogatePair (uint rune, out char [] chars)
{
uint s = rune - 0x10000;
uint h = highSurrogateMin + (s >> 10);
uint l = lowSurrogateMin + (s & 0x3FF);

if (EncodeSurrogatePair (h, l, out Rune dsp) && dsp == rune)
{
if (EncodeSurrogatePair (h, l, out Rune dsp) && dsp == rune) {
chars = new char [] { (char)h, (char)l };
return true;
}
Expand All @@ -603,13 +604,11 @@ public static bool DecodeSurrogatePair(uint rune, out char [] chars)
/// <param name="str">The string.</param>
/// <param name="chars">The chars if is valid. Empty otherwise.</param>
/// <returns><c>true</c>If is a valid surrogate pair, <c>false</c>otherwise.</returns>
public static bool DecodeSurrogatePair(string str, out char [] chars)
public static bool DecodeSurrogatePair (string str, out char [] chars)
{
if (str.Length == 2)
{
chars = str.ToCharArray();
if (EncodeSurrogatePair(chars[0], chars[1], out _))
{
if (str.Length == 2) {
chars = str.ToCharArray ();
if (EncodeSurrogatePair (chars [0], chars [1], out _)) {
return true;
}
}
Expand All @@ -622,9 +621,9 @@ public static bool DecodeSurrogatePair(string str, out char [] chars)
/// </summary>
/// <returns>The number of UTF8 bytes expected given the first prefix.</returns>
/// <param name="firstByte">Is the first byte of a UTF8 sequence.</param>
public static int ExpectedSizeFromFirstByte(byte firstByte)
public static int ExpectedSizeFromFirstByte (byte firstByte)
{
var x = first[firstByte];
var x = first [firstByte];

// Invalid runes, just return 1 for byte, and let higher level pass to print
if (x == xx)
Expand Down Expand Up @@ -806,7 +805,7 @@ public static Rune To (Case toCase, Rune rune)
{
uint rval = rune.value;
switch (toCase) {
case Case.Lower:
case Case.Lower:
return new Rune (NStack.Unicode.To (NStack.Unicode.Case.Lower, rval));
case Case.Title:
return new Rune (NStack.Unicode.To (NStack.Unicode.Case.Title, rval));
Expand Down Expand Up @@ -874,6 +873,20 @@ public static Rune To (Case toCase, Rune rune)
/// <param name="rune">Rune.</param>
public static implicit operator uint (Rune rune) => rune.value;

/// <summary>
/// Implicit operator conversion from a C# integer into a rune.
/// </summary>
/// <returns>Rune representing the C# integer</returns>
/// <param name="value">32-bit Integer.</param>
public static implicit operator Rune (int value) => new Rune ((uint)value);

/// <summary>
/// Implicit operator conversion from a byte to an unsigned integer
/// </summary>
/// <returns>The unsigned integer representation.</returns>
/// <param name="byt">Byte.</param>
public static implicit operator Rune (byte byt) => new Rune (byt);

/// <summary>
/// Implicit operator conversion from a C# char into a rune.
/// </summary>
Expand Down Expand Up @@ -905,7 +918,7 @@ public override string ToString ()
{
var buff = new byte [4];
var size = EncodeRune (this, buff, 0);
return System.Text.Encoding.UTF8.GetString(buff, 0, size);
return System.Text.Encoding.UTF8.GetString (buff, 0, size);
}

/// <summary>
Expand Down
6 changes: 3 additions & 3 deletions NStackTests/NStackTests.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@
<Version>0.20.0</Version>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.3.1" />
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.4.0" />
<PackageReference Include="NUnit" Version="3.13.3" />
<PackageReference Include="NUnit3TestAdapter" Version="4.2.1" />
<PackageReference Include="coverlet.collector" Version="3.1.2">
<PackageReference Include="NUnit3TestAdapter" Version="4.3.1" />
<PackageReference Include="coverlet.collector" Version="3.2.0">
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
</PackageReference>
Expand Down
Loading

0 comments on commit 093b2f5

Please sign in to comment.