From ab9fe967f14cd2838ea7e4a43031665c6781b9ae Mon Sep 17 00:00:00 2001
From: Alexandre Julliard <julliard@winehq.org>
Date: Mon, 2 Mar 2020 16:28:48 +0100
Subject: [PATCH] ntdll: Reimplement IdnToUnicode() using the normalization
 table and the ntdll helpers.

Signed-off-by: Alexandre Julliard <julliard@winehq.org>
---
 dlls/kernel32/locale.c       | 173 +----------------------------------
 dlls/kernel32/tests/locale.c |  19 ++--
 dlls/ntdll/locale.c          | 129 ++++++++++++++++++++++++++
 dlls/ntdll/ntdll.spec        |   1 +
 include/winternl.h           |   1 +
 5 files changed, 148 insertions(+), 175 deletions(-)

diff --git a/dlls/kernel32/locale.c b/dlls/kernel32/locale.c
index 4100517ed16..44242273ef0 100644
--- a/dlls/kernel32/locale.c
+++ b/dlls/kernel32/locale.c
@@ -544,28 +544,6 @@ INT WINAPI GetGeoInfoA(GEOID geoid, GEOTYPE geotype, LPSTR data, int data_len, L
 }
 
 
-enum {
-    BASE = 36,
-    TMIN = 1,
-    TMAX = 26,
-    SKEW = 38,
-    DAMP = 700,
-    INIT_BIAS = 72,
-    INIT_N = 128
-};
-
-static inline INT adapt(INT delta, INT numpoints, BOOL firsttime)
-{
-    INT k;
-
-    delta /= (firsttime ? DAMP : 2);
-    delta += delta/numpoints;
-
-    for(k=0; delta>((BASE-TMIN)*TMAX)/2; k+=BASE)
-        delta /= BASE-TMIN;
-    return k+((BASE-TMIN+1)*delta)/(delta+SKEW);
-}
-
 /******************************************************************************
  *           IdnToAscii (KERNEL32.@)
  */
@@ -589,154 +567,11 @@ INT WINAPI IdnToNameprepUnicode( DWORD flags, const WCHAR *src, INT srclen, WCHA
 /******************************************************************************
  *           IdnToUnicode (KERNEL32.@)
  */
-INT WINAPI IdnToUnicode(DWORD dwFlags, LPCWSTR lpASCIICharStr, INT cchASCIIChar,
-                        LPWSTR lpUnicodeCharStr, INT cchUnicodeChar)
+INT WINAPI IdnToUnicode( DWORD flags, const WCHAR *src, INT srclen, WCHAR *dst, INT dstlen )
 {
-    INT i, label_start, label_end, out_label, out = 0;
-    WCHAR ch;
-
-    TRACE("%x %p %d %p %d\n", dwFlags, lpASCIICharStr, cchASCIIChar,
-        lpUnicodeCharStr, cchUnicodeChar);
-
-    for(label_start=0; label_start<cchASCIIChar;) {
-        INT n = INIT_N, pos = 0, old_pos, w, k, bias = INIT_BIAS, delim=0, digit, t;
-
-        out_label = out;
-        for(i=label_start; i<cchASCIIChar; i++) {
-            ch = lpASCIICharStr[i];
-
-            if(ch>0x7f || (i!=cchASCIIChar-1 && !ch)) {
-                SetLastError(ERROR_INVALID_NAME);
-                return 0;
-            }
-
-            if(!ch || ch=='.')
-                break;
-            if(ch == '-')
-                delim = i;
-
-            if((dwFlags&IDN_USE_STD3_ASCII_RULES) == 0)
-                continue;
-            if((ch>='a' && ch<='z') || (ch>='A' && ch<='Z')
-                    || (ch>='0' && ch<='9') || ch=='-')
-                continue;
-
-            SetLastError(ERROR_INVALID_NAME);
-            return 0;
-        }
-        label_end = i;
-        /* last label may be empty */
-        if(label_start==label_end && ch) {
-            SetLastError(ERROR_INVALID_NAME);
-            return 0;
-        }
-
-        if((dwFlags&IDN_USE_STD3_ASCII_RULES) && (lpASCIICharStr[label_start]=='-' ||
-                    lpASCIICharStr[label_end-1]=='-')) {
-            SetLastError(ERROR_INVALID_NAME);
-            return 0;
-        }
-        if(label_end-label_start > 63) {
-            SetLastError(ERROR_INVALID_NAME);
-            return 0;
-        }
-
-        if(label_end-label_start<4 ||
-                tolowerW(lpASCIICharStr[label_start])!='x' ||
-                tolowerW(lpASCIICharStr[label_start+1])!='n' ||
-                lpASCIICharStr[label_start+2]!='-' || lpASCIICharStr[label_start+3]!='-') {
-            if(label_end < cchASCIIChar)
-                label_end++;
-
-            if(!lpUnicodeCharStr) {
-                out += label_end-label_start;
-            }else if(out+label_end-label_start <= cchUnicodeChar) {
-                memcpy(lpUnicodeCharStr+out, lpASCIICharStr+label_start,
-                        (label_end-label_start)*sizeof(WCHAR));
-                out += label_end-label_start;
-            }else {
-                SetLastError(ERROR_INSUFFICIENT_BUFFER);
-                return 0;
-            }
-
-            label_start = label_end;
-            continue;
-        }
-
-        if(delim == label_start+3)
-            delim++;
-        if(!lpUnicodeCharStr) {
-            out += delim-label_start-4;
-        }else if(out+delim-label_start-4 <= cchUnicodeChar) {
-            memcpy(lpUnicodeCharStr+out, lpASCIICharStr+label_start+4,
-                    (delim-label_start-4)*sizeof(WCHAR));
-            out += delim-label_start-4;
-        }else {
-            SetLastError(ERROR_INSUFFICIENT_BUFFER);
-            return 0;
-        }
-        if(out != out_label)
-            delim++;
-
-        for(i=delim; i<label_end;) {
-            old_pos = pos;
-            w = 1;
-            for(k=BASE; ; k+=BASE) {
-                ch = i<label_end ? tolowerW(lpASCIICharStr[i++]) : 0;
-                if((ch<'a' || ch>'z') && (ch<'0' || ch>'9')) {
-                    SetLastError(ERROR_INVALID_NAME);
-                    return 0;
-                }
-                digit = ch<='9' ? ch-'0'+'z'-'a'+1 : ch-'a';
-                pos += digit*w;
-                t = k<=bias ? TMIN : k>=bias+TMAX ? TMAX : k-bias;
-                if(digit < t)
-                    break;
-                w *= BASE-t;
-            }
-            bias = adapt(pos-old_pos, out-out_label+1, old_pos==0);
-            n += pos/(out-out_label+1);
-            pos %= out-out_label+1;
-
-            if((dwFlags&IDN_ALLOW_UNASSIGNED)==0 &&
-                    get_table_entry(nameprep_char_type, n)==1/*UNASSIGNED*/) {
-                SetLastError(ERROR_INVALID_NAME);
-                return 0;
-            }
-            if(!lpUnicodeCharStr) {
-                out++;
-            }else if(out+1 <= cchASCIIChar) {
-                memmove(lpUnicodeCharStr+out_label+pos+1,
-                        lpUnicodeCharStr+out_label+pos,
-                        (out-out_label-pos)*sizeof(WCHAR));
-                lpUnicodeCharStr[out_label+pos] = n;
-                out++;
-            }else {
-                SetLastError(ERROR_INSUFFICIENT_BUFFER);
-                return 0;
-            }
-            pos++;
-        }
-
-        if(out-out_label > 63) {
-            SetLastError(ERROR_INVALID_NAME);
-            return 0;
-        }
-
-        if(label_end < cchASCIIChar) {
-            if(!lpUnicodeCharStr) {
-                out++;
-            }else if(out+1 <= cchUnicodeChar) {
-                lpUnicodeCharStr[out++] = lpASCIICharStr[label_end];
-            }else {
-                SetLastError(ERROR_INSUFFICIENT_BUFFER);
-                return 0;
-            }
-        }
-        label_start = label_end+1;
-    }
-
-    return out;
+    NTSTATUS status = RtlIdnToUnicode( flags, src, srclen, dst, &dstlen );
+    if (!set_ntstatus( status )) return 0;
+    return dstlen;
 }
 
 
diff --git a/dlls/kernel32/tests/locale.c b/dlls/kernel32/tests/locale.c
index 261bcff54e4..a1215cb08ef 100644
--- a/dlls/kernel32/tests/locale.c
+++ b/dlls/kernel32/tests/locale.c
@@ -4600,10 +4600,13 @@ static void test_IdnToUnicode(void)
         /* 5 */
         { 64, L"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0, 0 },
         { 8, L"xn--7va", IDN_ALLOW_UNASSIGNED, 2, 2, L"\x380" },
+        { 8, L"xn--7va", 0, 0, 0, L"\x380" },
         { -1, L"xn----bm3an932a1l5d.xn--xvj", 0, 8, 0, L"\xd803\xde78\x46b5-\xa861.\x2e87" },
         { -1, L"xn--z123456789012345678901234567890123456789012345678901234-9te", 0,
           57, 57, L"\xe4z123456789012345678901234567890123456789012345678901234" },
+        /* 10 */
         { -1, L"foo.bar", 0, 8, 8, L"foo.bar" },
+        { -1, L"d.xn----dha", 0, 5, 5, L"d.\x00fc-" },
     };
 
     WCHAR buf[1024];
@@ -4612,13 +4615,11 @@ static void test_IdnToUnicode(void)
     for (i=0; i<ARRAY_SIZE(test_data); i++)
     {
         ret = pIdnToUnicode(test_data[i].flags, test_data[i].in, test_data[i].in_len, NULL, 0);
-        todo_wine_if (i > 6)
         ok(ret == test_data[i].ret || broken(ret == test_data[i].broken_ret), "%d: ret = %d\n", i, ret);
 
         SetLastError(0xdeadbeef);
         ret = pIdnToUnicode(test_data[i].flags, test_data[i].in, test_data[i].in_len, buf, ARRAY_SIZE(buf));
         err = GetLastError();
-        todo_wine_if (i > 6)
         ok(ret == test_data[i].ret || broken(ret == test_data[i].broken_ret), "%d: ret = %d\n", i, ret);
         ok(err == ret ? 0xdeadbeef : ERROR_INVALID_NAME, "%d: err = %d\n", i, err);
         ok(!wcsncmp(test_data[i].out, buf, ret), "%d: buf = %s\n", i, wine_dbgstr_wn(buf, ret));
@@ -4632,8 +4633,8 @@ static BOOL is_idn_error( const WCHAR *str )
     for (p = wcstok( err, L" []" ); p; p = wcstok( NULL, L" []" ) )
     {
         if (*p == 'B' || !wcscmp( p, L"V8" )) continue;  /* BiDi */
-        if (!wcscmp( p, L"V2" ) || !wcscmp( p, L"V3" )) continue;  /* CheckHyphens */
-        if (!wcscmp( p, L"V7" )) continue;  /* CheckJoiners */
+        if (!wcscmp( p, L"V2" )) continue;  /* CheckHyphens */
+        if (!wcscmp( p, L"V5" )) continue;  /* Combining marks */
         return TRUE;
     }
     return FALSE;
@@ -4705,7 +4706,7 @@ static void test_Idn(void)
             error = columns[2];
             SetLastError( 0xdeadbeef );
             memset( dst, 0xcc, sizeof(dst) );
-            ret = pIdnToUnicode( 0, columns[0], -1, dst, ARRAY_SIZE(dst) );
+            ret = pIdnToUnicode( IDN_USE_STD3_ASCII_RULES, columns[0], -1, dst, ARRAY_SIZE(dst) );
             for (i = 0; columns[0][i]; i++) if (columns[0][i] > 0x7f) break;
             if (columns[0][i])
             {
@@ -4713,10 +4714,16 @@ static void test_Idn(void)
             }
             else if (!is_idn_error( error ))
             {
-                ok( ret, "line %u: toUnicode failed for %s\n", line, debugstr_w(columns[0]) );
+                ok( ret, "line %u: toUnicode failed for %s expected %s\n", line,
+                    debugstr_w(columns[0]), debugstr_w(expect) );
                 if (ret) ok( !wcscmp( dst, expect ), "line %u: got %s expected %s\n",
                              line, debugstr_w(dst), debugstr_w(expect) );
             }
+            else
+            {
+                ok( !ret, "line %u: toUnicode didn't fail for %s got %s expected error %s\n",
+                    line, debugstr_w(columns[0]), debugstr_w(dst), debugstr_w(error) );
+            }
         }
         fclose( f );
     }
diff --git a/dlls/ntdll/locale.c b/dlls/ntdll/locale.c
index 7c859b75d2a..8833cfa712c 100644
--- a/dlls/ntdll/locale.c
+++ b/dlls/ntdll/locale.c
@@ -2238,3 +2238,132 @@ NTSTATUS WINAPI RtlIdnToNameprepUnicode( DWORD flags, const WCHAR *src, INT srcl
     *dstlen = buflen;
     return status;
 }
+
+
+/******************************************************************************
+ *      RtlIdnToUnicode   (NTDLL.@)
+ */
+NTSTATUS WINAPI RtlIdnToUnicode( DWORD flags, const WCHAR *src, INT srclen, WCHAR *dst, INT *dstlen )
+{
+    const struct norm_table *info;
+    int i, buflen, start, end, out_label, out = 0;
+    NTSTATUS status;
+    UINT buffer[64];
+    WCHAR ch;
+
+    if (!src || srclen < -1) return STATUS_INVALID_PARAMETER;
+    if (srclen == -1) srclen = strlenW( src ) + 1;
+
+    TRACE( "%x %s %p %d\n", flags, debugstr_wn(src, srclen), dst, *dstlen );
+
+    if ((status = load_norm_table( 13, &info ))) return status;
+
+    for (start = 0; start < srclen; )
+    {
+        int n = 0x80, bias = 72, pos = 0, old_pos, w, k, t, delim = 0, digit, delta;
+
+        out_label = out;
+        for (i = start; i < srclen; i++)
+        {
+            ch = src[i];
+            if (ch > 0x7f || (i != srclen - 1 && !ch)) return STATUS_INVALID_IDN_NORMALIZATION;
+            if (!ch || ch == '.') break;
+            if (ch == '-') delim = i;
+
+            if (!(flags & IDN_USE_STD3_ASCII_RULES)) continue;
+            if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
+                (ch >= '0' && ch <= '9') || ch == '-')
+                continue;
+            return STATUS_INVALID_IDN_NORMALIZATION;
+        }
+        end = i;
+
+        /* last label may be empty */
+        if (start == end && ch) return STATUS_INVALID_IDN_NORMALIZATION;
+
+        if (end - start < 4 ||
+            (src[start] != 'x' && src[start] != 'X') ||
+            (src[start + 1] != 'n' && src[start + 1] != 'N') ||
+            src[start + 2] != '-' || src[start + 3] != '-')
+        {
+            if (end - start > 63) return STATUS_INVALID_IDN_NORMALIZATION;
+
+            if ((flags & IDN_USE_STD3_ASCII_RULES) && (src[start] == '-' || src[end - 1] == '-'))
+                return STATUS_INVALID_IDN_NORMALIZATION;
+
+            if (end < srclen) end++;
+            if (*dstlen)
+            {
+                if (out + end - start <= *dstlen)
+                    memcpy( dst + out, src + start, (end - start) * sizeof(WCHAR));
+                else return STATUS_BUFFER_TOO_SMALL;
+            }
+            out += end - start;
+            start = end;
+            continue;
+        }
+
+        if (delim == start + 3) delim++;
+        buflen = 0;
+        for (i = start + 4; i < delim && buflen < ARRAY_SIZE(buffer); i++) buffer[buflen++] = src[i];
+        if (buflen) i++;
+        while (i < end)
+        {
+            old_pos = pos;
+            w = 1;
+            for (k = BASE; ; k += BASE)
+            {
+                if (i >= end) return STATUS_INVALID_IDN_NORMALIZATION;
+                ch = src[i++];
+                if (ch >= 'a' && ch <= 'z') digit = ch - 'a';
+                else if (ch >= 'A' && ch <= 'Z') digit = ch - 'A';
+                else if (ch >= '0' && ch <= '9') digit = ch - '0' + 26;
+                else return STATUS_INVALID_IDN_NORMALIZATION;
+                pos += digit * w;
+                t = k <= bias ? TMIN : k >= bias + TMAX ? TMAX : k - bias;
+                if (digit < t) break;
+                w *= BASE - t;
+            }
+
+            delta = (pos - old_pos) / (!old_pos ? DAMP : 2);
+            delta += delta / (buflen + 1);
+            for (k = 0; delta > ((BASE - TMIN) * TMAX) / 2; k += BASE) delta /= BASE - TMIN;
+            bias = k + ((BASE - TMIN + 1) * delta) / (delta + SKEW);
+            n += pos / (buflen + 1);
+            pos %= buflen + 1;
+
+            if (buflen >= ARRAY_SIZE(buffer) - 1) return STATUS_INVALID_IDN_NORMALIZATION;
+            memmove( buffer + pos + 1, buffer + pos, (buflen - pos) * sizeof(*buffer) );
+            buffer[pos++] = n;
+            buflen++;
+        }
+
+        if (check_invalid_chars( info, flags, buffer, buflen )) return STATUS_INVALID_IDN_NORMALIZATION;
+
+        for (i = 0; i < buflen; i++)
+        {
+            int len = 1 + (buffer[i] >= 0x10000);
+            if (*dstlen)
+            {
+                if (out + len <= *dstlen) put_utf16( dst + out, buffer[i] );
+                else return STATUS_BUFFER_TOO_SMALL;
+            }
+            out += len;
+        }
+
+        if (out - out_label > 63) return STATUS_INVALID_IDN_NORMALIZATION;
+
+        if (end < srclen)
+        {
+            if (*dstlen)
+            {
+                if (out + 1 <= *dstlen) dst[out] = src[end];
+                else return STATUS_BUFFER_TOO_SMALL;
+            }
+            out++;
+        }
+        start = end + 1;
+    }
+    *dstlen = out;
+    return STATUS_SUCCESS;
+}
diff --git a/dlls/ntdll/ntdll.spec b/dlls/ntdll/ntdll.spec
index 7964075ede6..f1f495837d4 100644
--- a/dlls/ntdll/ntdll.spec
+++ b/dlls/ntdll/ntdll.spec
@@ -724,6 +724,7 @@
 @ stdcall RtlIdentifierAuthoritySid(ptr)
 @ stdcall RtlIdnToAscii(long wstr long ptr ptr)
 @ stdcall RtlIdnToNameprepUnicode(long wstr long ptr ptr)
+@ stdcall RtlIdnToUnicode(long wstr long ptr ptr)
 @ stdcall RtlImageDirectoryEntryToData(long long long ptr)
 @ stdcall RtlImageNtHeader(long)
 @ stdcall RtlImageRvaToSection(ptr long long)
diff --git a/include/winternl.h b/include/winternl.h
index 4586378572c..f55d275320a 100644
--- a/include/winternl.h
+++ b/include/winternl.h
@@ -2799,6 +2799,7 @@ NTSYSAPI NTSTATUS  WINAPI RtlGUIDFromString(PUNICODE_STRING,GUID*);
 NTSYSAPI PSID_IDENTIFIER_AUTHORITY WINAPI RtlIdentifierAuthoritySid(PSID);
 NTSYSAPI NTSTATUS  WINAPI RtlIdnToAscii(DWORD,const WCHAR*,INT,WCHAR*,INT*);
 NTSYSAPI NTSTATUS  WINAPI RtlIdnToNameprepUnicode(DWORD,const WCHAR*,INT,WCHAR*,INT*);
+NTSYSAPI NTSTATUS  WINAPI RtlIdnToUnicode(DWORD,const WCHAR*,INT,WCHAR*,INT*);
 NTSYSAPI PVOID     WINAPI RtlImageDirectoryEntryToData(HMODULE,BOOL,WORD,ULONG *);
 NTSYSAPI PIMAGE_NT_HEADERS WINAPI RtlImageNtHeader(HMODULE);
 NTSYSAPI PIMAGE_SECTION_HEADER WINAPI RtlImageRvaToSection(const IMAGE_NT_HEADERS *,HMODULE,DWORD);
-- 
2.24.1