src/System.Private.CoreLib/shared/System/Globalization/IdnMapping.cs

// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

// This file contains the IDN functions and implementation.
//
// This allows encoding of non-ASCII domain names in a "punycode" form,
// for example:
//
//     \u5B89\u5BA4\u5948\u7F8E\u6075-with-SUPER-MONKEYS
//
// is encoded as:
//
//     xn---with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n
//
// Additional options are provided to allow unassigned IDN characters and
// to validate according to the Std3ASCII Rules (like DNS names).
//
// There are also rules regarding bidirectionality of text and the length
// of segments.
//
// For additional rules see also:
//  RFC 3490 - Internationalizing Domain Names in Applications (IDNA)
//  RFC 3491 - Nameprep: A Stringprep Profile for Internationalized Domain Names (IDN)
//  RFC 3492 - Punycode: A Bootstring encoding of Unicode for Internationalized Domain Names in Applications (IDNA)

using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Text;

namespace System.Globalization
{
    // IdnMapping class used to map names to Punycode
    public sealed partial class IdnMapping
    {
        private bool _allowUnassigned;
        private bool _useStd3AsciiRules;

        public IdnMapping()
        {
        }

        public bool AllowUnassigned
        {
            get => _allowUnassigned;
            set => _allowUnassigned = value;
        }

        public bool UseStd3AsciiRules
        {
            get => _useStd3AsciiRules;
            set => _useStd3AsciiRules = value;
        }

        // Gets ASCII (Punycode) version of the string
        public string GetAscii(string unicode) =>
            GetAscii(unicode, 0);

        public string GetAscii(string unicode, int index)
        {
            if (unicode == null)
                throw new ArgumentNullException(nameof(unicode));
            return GetAscii(unicode, index, unicode.Length - index);
        }

        public string GetAscii(string unicode, int index, int count)
        {
            if (unicode == null)
                throw new ArgumentNullException(nameof(unicode));
            if (index < 0 || count < 0)
                throw new ArgumentOutOfRangeException((index < 0) ? nameof(index) : nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum);
            if (index > unicode.Length)
                throw new ArgumentOutOfRangeException(nameof(index), SR.ArgumentOutOfRange_Index);
            if (index > unicode.Length - count)
                throw new ArgumentOutOfRangeException(nameof(unicode), SR.ArgumentOutOfRange_IndexCountBuffer);

            if (count == 0)
            {
                throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(unicode));
            }
            if (unicode[index + count - 1] == 0)
            {
                throw new ArgumentException(SR.Format(SR.Argument_InvalidCharSequence, index + count - 1), nameof(unicode));
            }

            if (GlobalizationMode.Invariant)
            {
                return GetAsciiInvariant(unicode, index, count);
            }

            unsafe
            {
                fixed (char* pUnicode = unicode)
                {
                    return GetAsciiCore(unicode, pUnicode + index, count);
                }
            }
        }

        // Gets Unicode version of the string.  Normalized and limited to IDNA characters.
        public string GetUnicode(string ascii) =>
            GetUnicode(ascii, 0);

        public string GetUnicode(string ascii, int index)
        {
            if (ascii == null)
                throw new ArgumentNullException(nameof(ascii));
            return GetUnicode(ascii, index, ascii.Length - index);
        }

        public string GetUnicode(string ascii, int index, int count)
        {
            if (ascii == null)
                throw new ArgumentNullException(nameof(ascii));
            if (index < 0 || count < 0)
                throw new ArgumentOutOfRangeException((index < 0) ? nameof(index) : nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum);
            if (index > ascii.Length)
                throw new ArgumentOutOfRangeException(nameof(index), SR.ArgumentOutOfRange_Index);
            if (index > ascii.Length - count)
                throw new ArgumentOutOfRangeException(nameof(ascii), SR.ArgumentOutOfRange_IndexCountBuffer);

            // This is a case (i.e. explicitly null-terminated input) where behavior in .NET and Win32 intentionally differ.
            // The .NET APIs should (and did in v4.0 and earlier) throw an ArgumentException on input that includes a terminating null.
            // The Win32 APIs fail on an embedded null, but not on a terminating null.
            if (count > 0 && ascii[index + count - 1] == (char)0)
                throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii));

            if (GlobalizationMode.Invariant)
            {
                return GetUnicodeInvariant(ascii, index, count);
            }

            unsafe
            {
                fixed (char* pAscii = ascii)
                {
                    return GetUnicodeCore(ascii, pAscii + index, count);
                }
            }
        }

        public override bool Equals(object? obj) =>
            obj is IdnMapping that &&
            _allowUnassigned == that._allowUnassigned &&
            _useStd3AsciiRules == that._useStd3AsciiRules;

        public override int GetHashCode() =>
            (_allowUnassigned ? 100 : 200) + (_useStd3AsciiRules ? 1000 : 2000);

        [MethodImpl(MethodImplOptions.AggressiveInlining)]
        private static unsafe string GetStringForOutput(string originalString, char* input, int inputLength, char* output, int outputLength) =>
            originalString.Length == inputLength && new ReadOnlySpan<char>(input, inputLength).SequenceEqual(new ReadOnlySpan<char>(output, outputLength)) ?
                originalString :
                new string(output, 0, outputLength);

        //
        // Invariant implementation
        //

        private const char c_delimiter = '-';
        private const string c_strAcePrefix = "xn--";
        private const int c_labelLimit = 63;          // Not including dots
        private const int c_defaultNameLimit = 255;   // Including dots
        private const int c_initialN = 0x80;
        private const int c_maxint = 0x7ffffff;
        private const int c_initialBias = 72;
        private const int c_punycodeBase = 36;
        private const int c_tmin = 1;
        private const int c_tmax = 26;
        private const int c_skew = 38;
        private const int c_damp = 700;


        // Legal "dot" separators (i.e: . in www.microsoft.com)
        private static readonly char[] s_dotSeparators = { '.', '\u3002', '\uFF0E', '\uFF61' };

        private string GetAsciiInvariant(string unicode, int index, int count)
        {
            if (index > 0 || count < unicode.Length)
            {
                unicode = unicode.Substring(index, count);
            }

            // Check for ASCII only string, which will be unchanged
            if (ValidateStd3AndAscii(unicode, UseStd3AsciiRules, true))
            {
                return unicode;
            }

            // Cannot be null terminated (normalization won't help us with this one, and
            // may have returned false before checking the whole string above)
            Debug.Assert(count >= 1, "[IdnMapping.GetAscii] Expected 0 length strings to fail before now.");
            if (unicode[^1] <= 0x1f)
            {
                throw new ArgumentException(SR.Format(SR.Argument_InvalidCharSequence, unicode.Length - 1), nameof(unicode));
            }

            // May need to check Std3 rules again for non-ascii
            if (UseStd3AsciiRules)
            {
                ValidateStd3AndAscii(unicode, true, false);
            }

            // Go ahead and encode it
            return PunycodeEncode(unicode);
        }

        // See if we're only ASCII
        private static bool ValidateStd3AndAscii(string unicode, bool bUseStd3, bool bCheckAscii)
        {
            // If its empty, then its too small
            if (unicode.Length == 0)
                throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(unicode));

            int iLastDot = -1;

            // Loop the whole string
            for (int i = 0; i < unicode.Length; i++)
            {
                // Aren't allowing control chars (or 7f, but idn tables catch that, they don't catch \0 at end though)
                if (unicode[i] <= 0x1f)
                {
                    throw new ArgumentException(SR.Format(SR.Argument_InvalidCharSequence, i), nameof(unicode));
                }

                // If its Unicode or a control character, return false (non-ascii)
                if (bCheckAscii && unicode[i] >= 0x7f)
                    return false;

                // Check for dots
                if (IsDot(unicode[i]))
                {
                    // Can't have 2 dots in a row
                    if (i == iLastDot + 1)
                        throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(unicode));

                    // If its too far between dots then fail
                    if (i - iLastDot > c_labelLimit + 1)
                        throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(unicode));

                    // If validating Std3, then char before dot can't be - char
                    if (bUseStd3 && i > 0)
                        ValidateStd3(unicode[i - 1], true);

                    // Remember where the last dot is
                    iLastDot = i;
                    continue;
                }

                // If necessary, make sure its a valid std3 character
                if (bUseStd3)
                {
                    ValidateStd3(unicode[i], (i == iLastDot + 1));
                }
            }

            // If we never had a dot, then we need to be shorter than the label limit
            if (iLastDot == -1 && unicode.Length > c_labelLimit)
                throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(unicode));

            // Need to validate entire string length, 1 shorter if last char wasn't a dot
            if (unicode.Length > c_defaultNameLimit - (IsDot(unicode[^1]) ? 0 : 1))
                throw new ArgumentException(SR.Format(SR.Argument_IdnBadNameSize,
                                                        c_defaultNameLimit - (IsDot(unicode[^1]) ? 0 : 1)), nameof(unicode));

            // If last char wasn't a dot we need to check for trailing -
            if (bUseStd3 && !IsDot(unicode[^1]))
                ValidateStd3(unicode[^1], true);

            return true;
        }

        /* PunycodeEncode() converts Unicode to Punycode.  The input     */
        /* is represented as an array of Unicode code points (not code    */
        /* units; surrogate pairs are not allowed), and the output        */
        /* will be represented as an array of ASCII code points.  The     */
        /* output string is *not* null-terminated; it will contain        */
        /* zeros if and only if the input contains zeros.  (Of course     */
        /* the caller can leave room for a terminator and add one if      */
        /* needed.)  The input_length is the number of code points in     */
        /* the input.  The output_length is an in/out argument: the       */
        /* caller passes in the maximum number of code points that it     */

        /* can receive, and on successful return it will contain the      */
        /* number of code points actually output.  The case_flags array   */
        /* holds input_length boolean values, where nonzero suggests that */
        /* the corresponding Unicode character be forced to uppercase     */
        /* after being decoded (if possible), and zero suggests that      */
        /* it be forced to lowercase (if possible).  ASCII code points    */
        /* are encoded literally, except that ASCII letters are forced    */
        /* to uppercase or lowercase according to the corresponding       */
        /* uppercase flags.  If case_flags is a null pointer then ASCII   */
        /* letters are left as they are, and other code points are        */
        /* treated as if their uppercase flags were zero.  The return     */
        /* value can be any of the punycode_status values defined above   */
        /* except punycode_bad_input; if not punycode_success, then       */
        /* output_size and output might contain garbage.                  */
        private static string PunycodeEncode(string unicode)
        {
            // 0 length strings aren't allowed
            if (unicode.Length == 0)
                throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(unicode));

            StringBuilder output = new StringBuilder(unicode.Length);
            int iNextDot = 0;
            int iAfterLastDot = 0;
            int iOutputAfterLastDot = 0;

            // Find the next dot
            while (iNextDot < unicode.Length)
            {
                // Find end of this segment
                iNextDot = unicode.IndexOfAny(s_dotSeparators, iAfterLastDot);
                Debug.Assert(iNextDot <= unicode.Length, "[IdnMapping.punycode_encode]IndexOfAny is broken");
                if (iNextDot < 0)
                    iNextDot = unicode.Length;

                // Only allowed to have empty . section at end (www.microsoft.com.)
                if (iNextDot == iAfterLastDot)
                {
                    // Only allowed to have empty sections as trailing .
                    if (iNextDot != unicode.Length)
                        throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(unicode));
                    // Last dot, stop
                    break;
                }

                // We'll need an Ace prefix
                output.Append(c_strAcePrefix);

                // Everything resets every segment.
                bool bRightToLeft = false;

                // Check for RTL.  If right-to-left, then 1st & last chars must be RTL
                BidiCategory eBidi = CharUnicodeInfo.GetBidiCategory(unicode, iAfterLastDot);
                if (eBidi == BidiCategory.RightToLeft || eBidi == BidiCategory.RightToLeftArabic)
                {
                    // It has to be right to left.
                    bRightToLeft = true;

                    // Check last char
                    int iTest = iNextDot - 1;
                    if (char.IsLowSurrogate(unicode, iTest))
                    {
                        iTest--;
                    }

                    eBidi = CharUnicodeInfo.GetBidiCategory(unicode, iTest);
                    if (eBidi != BidiCategory.RightToLeft && eBidi != BidiCategory.RightToLeftArabic)
                    {
                        // Oops, last wasn't RTL, last should be RTL if first is RTL
                        throw new ArgumentException(SR.Argument_IdnBadBidi, nameof(unicode));
                    }
                }

                // Handle the basic code points
                int basicCount;
                int numProcessed = 0;           // Num code points that have been processed so far (this segment)
                for (basicCount = iAfterLastDot; basicCount < iNextDot; basicCount++)
                {
                    // Can't be lonely surrogate because it would've thrown in normalization
                    Debug.Assert(char.IsLowSurrogate(unicode, basicCount) == false, "[IdnMapping.punycode_encode]Unexpected low surrogate");

                    // Double check our bidi rules
                    BidiCategory testBidi = CharUnicodeInfo.GetBidiCategory(unicode, basicCount);

                    // If we're RTL, we can't have LTR chars
                    if (bRightToLeft && testBidi == BidiCategory.LeftToRight)
                    {
                        // Oops, throw error
                        throw new ArgumentException(SR.Argument_IdnBadBidi, nameof(unicode));
                    }

                    // If we're not RTL we can't have RTL chars
                    if (!bRightToLeft && (testBidi == BidiCategory.RightToLeft || testBidi == BidiCategory.RightToLeftArabic))
                    {
                        // Oops, throw error
                        throw new ArgumentException(SR.Argument_IdnBadBidi, nameof(unicode));
                    }

                    // If its basic then add it
                    if (Basic(unicode[basicCount]))
                    {
                        output.Append(EncodeBasic(unicode[basicCount]));
                        numProcessed++;
                    }
                    // If its a surrogate, skip the next since our bidi category tester doesn't handle it.
                    else if (char.IsSurrogatePair(unicode, basicCount))
                        basicCount++;
                }

                int numBasicCodePoints = numProcessed;     // number of basic code points

                // Stop if we ONLY had basic code points
                if (numBasicCodePoints == iNextDot - iAfterLastDot)
                {
                    // Get rid of xn-- and this segments done
                    output.Remove(iOutputAfterLastDot, c_strAcePrefix.Length);
                }
                else
                {
                    // If it has some non-basic code points the input cannot start with xn--
                    if (unicode.Length - iAfterLastDot >= c_strAcePrefix.Length &&
                        unicode.Substring(iAfterLastDot, c_strAcePrefix.Length).Equals(
                            c_strAcePrefix, StringComparison.OrdinalIgnoreCase))
                        throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(unicode));

                    // Need to do ACE encoding
                    int numSurrogatePairs = 0;            // number of surrogate pairs so far

                    // Add a delimiter (-) if we had any basic code points (between basic and encoded pieces)
                    if (numBasicCodePoints > 0)
                    {
                        output.Append(c_delimiter);
                    }

                    // Initialize the state
                    int n = c_initialN;
                    int delta = 0;
                    int bias = c_initialBias;

                    // Main loop
                    while (numProcessed < (iNextDot - iAfterLastDot))
                    {
                        /* All non-basic code points < n have been     */
                        /* handled already.  Find the next larger one: */
                        int j;
                        int m;
                        int test = 0;
                        for (m = c_maxint, j = iAfterLastDot;
                             j < iNextDot;
                             j += IsSupplementary(test) ? 2 : 1)
                        {
                            test = char.ConvertToUtf32(unicode, j);
                            if (test >= n && test < m) m = test;
                        }

                        /* Increase delta enough to advance the decoder's    */
                        /* <n,i> state to <m,0>, but guard against overflow: */
                        delta += (int)((m - n) * ((numProcessed - numSurrogatePairs) + 1));
                        Debug.Assert(delta > 0, "[IdnMapping.cs]1 punycode_encode - delta overflowed int");
                        n = m;

                        for (j = iAfterLastDot; j < iNextDot; j += IsSupplementary(test) ? 2 : 1)
                        {
                            // Make sure we're aware of surrogates
                            test = char.ConvertToUtf32(unicode, j);

                            // Adjust for character position (only the chars in our string already, some
                            // haven't been processed.

                            if (test < n)
                            {
                                delta++;
                                Debug.Assert(delta > 0, "[IdnMapping.cs]2 punycode_encode - delta overflowed int");
                            }

                            if (test == n)
                            {
                                // Represent delta as a generalized variable-length integer:
                                int q, k;
                                for (q = delta, k = c_punycodeBase; ; k += c_punycodeBase)
                                {
                                    int t = k <= bias ? c_tmin : k >= bias + c_tmax ? c_tmax : k - bias;
                                    if (q < t) break;
                                    Debug.Assert(c_punycodeBase != t, "[IdnMapping.punycode_encode]Expected c_punycodeBase (36) to be != t");
                                    output.Append(EncodeDigit(t + (q - t) % (c_punycodeBase - t)));
                                    q = (q - t) / (c_punycodeBase - t);
                                }

                                output.Append(EncodeDigit(q));
                                bias = Adapt(delta, (numProcessed - numSurrogatePairs) + 1, numProcessed == numBasicCodePoints);
                                delta = 0;
                                numProcessed++;

                                if (IsSupplementary(m))
                                {
                                    numProcessed++;
                                    numSurrogatePairs++;
                                }
                            }
                        }
                        ++delta;
                        ++n;
                        Debug.Assert(delta > 0, "[IdnMapping.cs]3 punycode_encode - delta overflowed int");
                    }
                }

                // Make sure its not too big
                if (output.Length - iOutputAfterLastDot > c_labelLimit)
                    throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(unicode));

                // Done with this segment, add dot if necessary
                if (iNextDot != unicode.Length)
                    output.Append('.');

                iAfterLastDot = iNextDot + 1;
                iOutputAfterLastDot = output.Length;
            }

            // Throw if we're too long
            if (output.Length > c_defaultNameLimit - (IsDot(unicode[^1]) ? 0 : 1))
                throw new ArgumentException(SR.Format(SR.Argument_IdnBadNameSize,
                                                c_defaultNameLimit - (IsDot(unicode[^1]) ? 0 : 1)), nameof(unicode));
            // Return our output string
            return output.ToString();
        }

        // Is it a dot?
        // are we U+002E (., full stop), U+3002 (ideographic full stop), U+FF0E (fullwidth full stop), or
        // U+FF61 (halfwidth ideographic full stop).
        // Note: IDNA Normalization gets rid of dots now, but testing for last dot is before normalization
        private static bool IsDot(char c) =>
            c == '.' || c == '\u3002' || c == '\uFF0E' || c == '\uFF61';

        private static bool IsSupplementary(int cTest) =>
            cTest >= 0x10000;

        private static bool Basic(uint cp) =>
            // Is it in ASCII range?
            cp < 0x80;

        // Validate Std3 rules for a character
        private static void ValidateStd3(char c, bool bNextToDot)
        {
            // Check for illegal characters
            if ((c <= ',' || c == '/' || (c >= ':' && c <= '@') ||      // Lots of characters not allowed
                (c >= '[' && c <= '`') || (c >= '{' && c <= (char)0x7F)) ||
                (c == '-' && bNextToDot))
                throw new ArgumentException(SR.Format(SR.Argument_IdnBadStd3, c), nameof(c));
        }

        private string GetUnicodeInvariant(string ascii, int index, int count)
        {
            if (index > 0 || count < ascii.Length)
            {
                // We're only using part of the string
                ascii = ascii.Substring(index, count);
            }
            // Convert Punycode to Unicode
            string strUnicode = PunycodeDecode(ascii);

            // Output name MUST obey IDNA rules & round trip (casing differences are allowed)
            if (!ascii.Equals(GetAscii(strUnicode), StringComparison.OrdinalIgnoreCase))
                throw new ArgumentException(SR.Argument_IdnIllegalName, nameof(ascii));

            return strUnicode;
        }

        /* PunycodeDecode() converts Punycode to Unicode.  The input is  */
        /* represented as an array of ASCII code points, and the output   */
        /* will be represented as an array of Unicode code points.  The   */
        /* input_length is the number of code points in the input.  The   */
        /* output_length is an in/out argument: the caller passes in      */
        /* the maximum number of code points that it can receive, and     */
        /* on successful return it will contain the actual number of      */
        /* code points output.  The case_flags array needs room for at    */
        /* least output_length values, or it can be a null pointer if the */
        /* case information is not needed.  A nonzero flag suggests that  */
        /* the corresponding Unicode character be forced to uppercase     */
        /* by the caller (if possible), while zero suggests that it be    */
        /* forced to lowercase (if possible).  ASCII code points are      */
        /* output already in the proper case, but their flags will be set */
        /* appropriately so that applying the flags would be harmless.    */
        /* The return value can be any of the punycode_status values      */
        /* defined above; if not punycode_success, then output_length,    */
        /* output, and case_flags might contain garbage.  On success, the */
        /* decoder will never need to write an output_length greater than */
        /* input_length, because of how the encoding is defined.          */

        private static string PunycodeDecode(string ascii)
        {
            // 0 length strings aren't allowed
            if (ascii.Length == 0)
                throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(ascii));

            // Throw if we're too long
            if (ascii.Length > c_defaultNameLimit - (IsDot(ascii[^1]) ? 0 : 1))
                throw new ArgumentException(SR.Format(SR.Argument_IdnBadNameSize,
                                            c_defaultNameLimit - (IsDot(ascii[^1]) ? 0 : 1)), nameof(ascii));

            // output stringbuilder
            StringBuilder output = new StringBuilder(ascii.Length);

            // Dot searching
            int iNextDot = 0;
            int iAfterLastDot = 0;
            int iOutputAfterLastDot = 0;

            while (iNextDot < ascii.Length)
            {
                // Find end of this segment
                iNextDot = ascii.IndexOf('.', iAfterLastDot);
                if (iNextDot < 0 || iNextDot > ascii.Length)
                    iNextDot = ascii.Length;

                // Only allowed to have empty . section at end (www.microsoft.com.)
                if (iNextDot == iAfterLastDot)
                {
                    // Only allowed to have empty sections as trailing .
                    if (iNextDot != ascii.Length)
                        throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(ascii));

                    // Last dot, stop
                    break;
                }

                // In either case it can't be bigger than segment size
                if (iNextDot - iAfterLastDot > c_labelLimit)
                    throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(ascii));

                // See if this section's ASCII or ACE
                if (ascii.Length < c_strAcePrefix.Length + iAfterLastDot ||
                    string.Compare(ascii, iAfterLastDot, c_strAcePrefix, 0, c_strAcePrefix.Length, StringComparison.OrdinalIgnoreCase) != 0)
                {
                    // Its ASCII, copy it
                    output.Append(ascii, iAfterLastDot, iNextDot - iAfterLastDot);
                }
                else
                {
                    // Not ASCII, bump up iAfterLastDot to be after ACE Prefix
                    iAfterLastDot += c_strAcePrefix.Length;

                    // Get number of basic code points (where delimiter is)
                    // numBasicCodePoints < 0 if there're no basic code points
                    int iTemp = ascii.LastIndexOf(c_delimiter, iNextDot - 1);

                    // Trailing - not allowed
                    if (iTemp == iNextDot - 1)
                        throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii));

                    int numBasicCodePoints;
                    if (iTemp <= iAfterLastDot)
                        numBasicCodePoints = 0;
                    else
                    {
                        numBasicCodePoints = iTemp - iAfterLastDot;

                        // Copy all the basic code points, making sure they're all in the allowed range,
                        // and losing the casing for all of them.
                        for (int copyAscii = iAfterLastDot; copyAscii < iAfterLastDot + numBasicCodePoints; copyAscii++)
                        {
                            // Make sure we don't allow unicode in the ascii part
                            if (ascii[copyAscii] > 0x7f)
                                throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii));

                            // When appending make sure they get lower cased
                            output.Append((char)(ascii[copyAscii] >= 'A' && ascii[copyAscii] <= 'Z' ? ascii[copyAscii] - 'A' + 'a' : ascii[copyAscii]));
                        }
                    }

                    // Get ready for main loop.  Start at beginning if we didn't have any
                    // basic code points, otherwise start after the -.
                    // asciiIndex will be next character to read from ascii
                    int asciiIndex = iAfterLastDot + (numBasicCodePoints > 0 ? numBasicCodePoints + 1 : 0);

                    // initialize our state
                    int n = c_initialN;
                    int bias = c_initialBias;
                    int i = 0;

                    int w, k;

                    // no Supplementary characters yet
                    int numSurrogatePairs = 0;

                    // Main loop, read rest of ascii
                    while (asciiIndex < iNextDot)
                    {
                        /* Decode a generalized variable-length integer into delta,  */
                        /* which gets added to i.  The overflow checking is easier   */
                        /* if we increase i as we go, then subtract off its starting */
                        /* value at the end to obtain delta.                         */
                        int oldi = i;

                        for (w = 1, k = c_punycodeBase; ; k += c_punycodeBase)
                        {
                            // Check to make sure we aren't overrunning our ascii string
                            if (asciiIndex >= iNextDot)
                                throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii));

                            // decode the digit from the next char
                            int digit = DecodeDigit(ascii[asciiIndex++]);

                            Debug.Assert(w > 0, "[IdnMapping.punycode_decode]Expected w > 0");
                            if (digit > (c_maxint - i) / w)
                                throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii));

                            i += (int)(digit * w);
                            int t = k <= bias ? c_tmin : k >= bias + c_tmax ? c_tmax : k - bias;
                            if (digit < t)
                                break;
                            Debug.Assert(c_punycodeBase != t, "[IdnMapping.punycode_decode]Expected t != c_punycodeBase (36)");
                            if (w > c_maxint / (c_punycodeBase - t))
                                throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii));
                            w *= (c_punycodeBase - t);
                        }

                        bias = Adapt(i - oldi, (output.Length - iOutputAfterLastDot - numSurrogatePairs) + 1, oldi == 0);

                        /* i was supposed to wrap around from output.Length to 0,   */
                        /* incrementing n each time, so we'll fix that now: */
                        Debug.Assert((output.Length - iOutputAfterLastDot - numSurrogatePairs) + 1 > 0,
                            "[IdnMapping.punycode_decode]Expected to have added > 0 characters this segment");
                        if (i / ((output.Length - iOutputAfterLastDot - numSurrogatePairs) + 1) > c_maxint - n)
                            throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii));
                        n += (int)(i / (output.Length - iOutputAfterLastDot - numSurrogatePairs + 1));
                        i %= (output.Length - iOutputAfterLastDot - numSurrogatePairs + 1);

                        // Make sure n is legal
                        if ((n < 0 || n > 0x10ffff) || (n >= 0xD800 && n <= 0xDFFF))
                            throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii));

                        // insert n at position i of the output:  Really tricky if we have surrogates
                        int iUseInsertLocation;
                        string strTemp = char.ConvertFromUtf32(n);

                        // If we have supplimentary characters
                        if (numSurrogatePairs > 0)
                        {
                            // Hard way, we have supplimentary characters
                            int iCount;
                            for (iCount = i, iUseInsertLocation = iOutputAfterLastDot; iCount > 0; iCount--, iUseInsertLocation++)
                            {
                                // If its a surrogate, we have to go one more
                                if (iUseInsertLocation >= output.Length)
                                    throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(ascii));
                                if (char.IsSurrogate(output[iUseInsertLocation]))
                                    iUseInsertLocation++;
                            }
                        }
                        else
                        {
                            // No Supplementary chars yet, just add i
                            iUseInsertLocation = iOutputAfterLastDot + i;
                        }

                        // Insert it
                        output.Insert(iUseInsertLocation, strTemp);

                        // If it was a surrogate increment our counter
                        if (IsSupplementary(n))
                            numSurrogatePairs++;

                        // Index gets updated
                        i++;
                    }

                    // Do BIDI testing
                    bool bRightToLeft = false;

                    // Check for RTL.  If right-to-left, then 1st & last chars must be RTL
                    BidiCategory eBidi = CharUnicodeInfo.GetBidiCategory(output, iOutputAfterLastDot);
                    if (eBidi == BidiCategory.RightToLeft || eBidi == BidiCategory.RightToLeftArabic)
                    {
                        // It has to be right to left.
                        bRightToLeft = true;
                    }

                    // Check the rest of them to make sure RTL/LTR is consistent
                    for (int iTest = iOutputAfterLastDot; iTest < output.Length; iTest++)
                    {
                        // This might happen if we run into a pair
                        if (char.IsLowSurrogate(output[iTest]))
                            continue;

                        // Check to see if its LTR
                        eBidi = CharUnicodeInfo.GetBidiCategory(output, iTest);
                        if ((bRightToLeft && eBidi == BidiCategory.LeftToRight) ||
                            (!bRightToLeft && (eBidi == BidiCategory.RightToLeft || eBidi == BidiCategory.RightToLeftArabic)))
                            throw new ArgumentException(SR.Argument_IdnBadBidi, nameof(ascii));
                    }

                    // Its also a requirement that the last one be RTL if 1st is RTL
                    if (bRightToLeft && eBidi != BidiCategory.RightToLeft && eBidi != BidiCategory.RightToLeftArabic)
                    {
                        // Oops, last wasn't RTL, last should be RTL if first is RTL
                        throw new ArgumentException(SR.Argument_IdnBadBidi, nameof(ascii));
                    }
                }

                // See if this label was too long
                if (iNextDot - iAfterLastDot > c_labelLimit)
                    throw new ArgumentException(SR.Argument_IdnBadLabelSize, nameof(ascii));

                // Done with this segment, add dot if necessary
                if (iNextDot != ascii.Length)
                    output.Append('.');

                iAfterLastDot = iNextDot + 1;
                iOutputAfterLastDot = output.Length;
            }

            // Throw if we're too long
            if (output.Length > c_defaultNameLimit - (IsDot(output[output.Length - 1]) ? 0 : 1))
                throw new ArgumentException(SR.Format(SR.Argument_IdnBadNameSize, c_defaultNameLimit - (IsDot(output[output.Length - 1]) ? 0 : 1)), nameof(ascii));

            // Return our output string
            return output.ToString();
        }

        // DecodeDigit(cp) returns the numeric value of a basic code */
        // point (for use in representing integers) in the range 0 to */
        // c_punycodeBase-1, or <0 if cp is does not represent a value. */

        private static int DecodeDigit(char cp)
        {
            if (cp >= '0' && cp <= '9')
                return cp - '0' + 26;

            // Two flavors for case differences
            if (cp >= 'a' && cp <= 'z')
                return cp - 'a';

            if (cp >= 'A' && cp <= 'Z')
                return cp - 'A';

            // Expected 0-9, A-Z or a-z, everything else is illegal
            throw new ArgumentException(SR.Argument_IdnBadPunycode, nameof(cp));
        }

        private static int Adapt(int delta, int numpoints, bool firsttime)
        {
            uint k;

            delta = firsttime ? delta / c_damp : delta / 2;
            Debug.Assert(numpoints != 0, "[IdnMapping.adapt]Expected non-zero numpoints.");
            delta += delta / numpoints;

            for (k = 0; delta > ((c_punycodeBase - c_tmin) * c_tmax) / 2; k += c_punycodeBase)
            {
                delta /= c_punycodeBase - c_tmin;
            }

            Debug.Assert(delta + c_skew != 0, "[IdnMapping.adapt]Expected non-zero delta+skew.");
            return (int)(k + (c_punycodeBase - c_tmin + 1) * delta / (delta + c_skew));
        }

        /* EncodeBasic(bcp,flag) forces a basic code point to lowercase */
        /* if flag is false, uppercase if flag is true, and returns    */
        /* the resulting code point.  The code point is unchanged if it  */
        /* is caseless.  The behavior is undefined if bcp is not a basic */
        /* code point.                                                   */

        private static char EncodeBasic(char bcp)
        {
            if (HasUpperCaseFlag(bcp))
                bcp += (char)('a' - 'A');

            return bcp;
        }

        // Return whether a punycode code point is flagged as being upper case.
        private static bool HasUpperCaseFlag(char punychar) =>
            punychar >= 'A' && punychar <= 'Z';

        /* EncodeDigit(d,flag) returns the basic code point whose value      */
        /* (when used for representing integers) is d, which needs to be in   */
        /* the range 0 to punycodeBase-1.  The lowercase form is used unless flag is  */
        /* true, in which case the uppercase form is used. */

        private static char EncodeDigit(int d)
        {
            Debug.Assert(d >= 0 && d < c_punycodeBase, "[IdnMapping.encode_digit]Expected 0 <= d < punycodeBase");
            // 26-35 map to ASCII 0-9
            if (d > 25) return (char)(d - 26 + '0');

            // 0-25 map to a-z or A-Z
            return (char)(d + 'a');
        }
    }
}