From ab9fe967f14cd2838ea7e4a43031665c6781b9ae Mon Sep 17 00:00:00 2001 From: Alexandre Julliard <julliard@winehq.org> Date: Mon, 2 Mar 2020 16:28:48 +0100 Subject: [PATCH] ntdll: Reimplement IdnToUnicode() using the normalization table and the ntdll helpers. Signed-off-by: Alexandre Julliard <julliard@winehq.org> --- dlls/kernel32/locale.c | 173 +---------------------------------- dlls/kernel32/tests/locale.c | 19 ++-- dlls/ntdll/locale.c | 129 ++++++++++++++++++++++++++ dlls/ntdll/ntdll.spec | 1 + include/winternl.h | 1 + 5 files changed, 148 insertions(+), 175 deletions(-) diff --git a/dlls/kernel32/locale.c b/dlls/kernel32/locale.c index 4100517ed16..44242273ef0 100644 --- a/dlls/kernel32/locale.c +++ b/dlls/kernel32/locale.c @@ -544,28 +544,6 @@ INT WINAPI GetGeoInfoA(GEOID geoid, GEOTYPE geotype, LPSTR data, int data_len, L } -enum { - BASE = 36, - TMIN = 1, - TMAX = 26, - SKEW = 38, - DAMP = 700, - INIT_BIAS = 72, - INIT_N = 128 -}; - -static inline INT adapt(INT delta, INT numpoints, BOOL firsttime) -{ - INT k; - - delta /= (firsttime ? DAMP : 2); - delta += delta/numpoints; - - for(k=0; delta>((BASE-TMIN)*TMAX)/2; k+=BASE) - delta /= BASE-TMIN; - return k+((BASE-TMIN+1)*delta)/(delta+SKEW); -} - /****************************************************************************** * IdnToAscii (KERNEL32.@) */ @@ -589,154 +567,11 @@ INT WINAPI IdnToNameprepUnicode( DWORD flags, const WCHAR *src, INT srclen, WCHA /****************************************************************************** * IdnToUnicode (KERNEL32.@) */ -INT WINAPI IdnToUnicode(DWORD dwFlags, LPCWSTR lpASCIICharStr, INT cchASCIIChar, - LPWSTR lpUnicodeCharStr, INT cchUnicodeChar) +INT WINAPI IdnToUnicode( DWORD flags, const WCHAR *src, INT srclen, WCHAR *dst, INT dstlen ) { - INT i, label_start, label_end, out_label, out = 0; - WCHAR ch; - - TRACE("%x %p %d %p %d\n", dwFlags, lpASCIICharStr, cchASCIIChar, - lpUnicodeCharStr, cchUnicodeChar); - - for(label_start=0; label_start<cchASCIIChar;) { - INT n = INIT_N, pos = 0, old_pos, w, k, bias = INIT_BIAS, delim=0, digit, t; - - out_label = out; - for(i=label_start; i<cchASCIIChar; i++) { - ch = lpASCIICharStr[i]; - - if(ch>0x7f || (i!=cchASCIIChar-1 && !ch)) { - SetLastError(ERROR_INVALID_NAME); - return 0; - } - - if(!ch || ch=='.') - break; - if(ch == '-') - delim = i; - - if((dwFlags&IDN_USE_STD3_ASCII_RULES) == 0) - continue; - if((ch>='a' && ch<='z') || (ch>='A' && ch<='Z') - || (ch>='0' && ch<='9') || ch=='-') - continue; - - SetLastError(ERROR_INVALID_NAME); - return 0; - } - label_end = i; - /* last label may be empty */ - if(label_start==label_end && ch) { - SetLastError(ERROR_INVALID_NAME); - return 0; - } - - if((dwFlags&IDN_USE_STD3_ASCII_RULES) && (lpASCIICharStr[label_start]=='-' || - lpASCIICharStr[label_end-1]=='-')) { - SetLastError(ERROR_INVALID_NAME); - return 0; - } - if(label_end-label_start > 63) { - SetLastError(ERROR_INVALID_NAME); - return 0; - } - - if(label_end-label_start<4 || - tolowerW(lpASCIICharStr[label_start])!='x' || - tolowerW(lpASCIICharStr[label_start+1])!='n' || - lpASCIICharStr[label_start+2]!='-' || lpASCIICharStr[label_start+3]!='-') { - if(label_end < cchASCIIChar) - label_end++; - - if(!lpUnicodeCharStr) { - out += label_end-label_start; - }else if(out+label_end-label_start <= cchUnicodeChar) { - memcpy(lpUnicodeCharStr+out, lpASCIICharStr+label_start, - (label_end-label_start)*sizeof(WCHAR)); - out += label_end-label_start; - }else { - SetLastError(ERROR_INSUFFICIENT_BUFFER); - return 0; - } - - label_start = label_end; - continue; - } - - if(delim == label_start+3) - delim++; - if(!lpUnicodeCharStr) { - out += delim-label_start-4; - }else if(out+delim-label_start-4 <= cchUnicodeChar) { - memcpy(lpUnicodeCharStr+out, lpASCIICharStr+label_start+4, - (delim-label_start-4)*sizeof(WCHAR)); - out += delim-label_start-4; - }else { - SetLastError(ERROR_INSUFFICIENT_BUFFER); - return 0; - } - if(out != out_label) - delim++; - - for(i=delim; i<label_end;) { - old_pos = pos; - w = 1; - for(k=BASE; ; k+=BASE) { - ch = i<label_end ? tolowerW(lpASCIICharStr[i++]) : 0; - if((ch<'a' || ch>'z') && (ch<'0' || ch>'9')) { - SetLastError(ERROR_INVALID_NAME); - return 0; - } - digit = ch<='9' ? ch-'0'+'z'-'a'+1 : ch-'a'; - pos += digit*w; - t = k<=bias ? TMIN : k>=bias+TMAX ? TMAX : k-bias; - if(digit < t) - break; - w *= BASE-t; - } - bias = adapt(pos-old_pos, out-out_label+1, old_pos==0); - n += pos/(out-out_label+1); - pos %= out-out_label+1; - - if((dwFlags&IDN_ALLOW_UNASSIGNED)==0 && - get_table_entry(nameprep_char_type, n)==1/*UNASSIGNED*/) { - SetLastError(ERROR_INVALID_NAME); - return 0; - } - if(!lpUnicodeCharStr) { - out++; - }else if(out+1 <= cchASCIIChar) { - memmove(lpUnicodeCharStr+out_label+pos+1, - lpUnicodeCharStr+out_label+pos, - (out-out_label-pos)*sizeof(WCHAR)); - lpUnicodeCharStr[out_label+pos] = n; - out++; - }else { - SetLastError(ERROR_INSUFFICIENT_BUFFER); - return 0; - } - pos++; - } - - if(out-out_label > 63) { - SetLastError(ERROR_INVALID_NAME); - return 0; - } - - if(label_end < cchASCIIChar) { - if(!lpUnicodeCharStr) { - out++; - }else if(out+1 <= cchUnicodeChar) { - lpUnicodeCharStr[out++] = lpASCIICharStr[label_end]; - }else { - SetLastError(ERROR_INSUFFICIENT_BUFFER); - return 0; - } - } - label_start = label_end+1; - } - - return out; + NTSTATUS status = RtlIdnToUnicode( flags, src, srclen, dst, &dstlen ); + if (!set_ntstatus( status )) return 0; + return dstlen; } diff --git a/dlls/kernel32/tests/locale.c b/dlls/kernel32/tests/locale.c index 261bcff54e4..a1215cb08ef 100644 --- a/dlls/kernel32/tests/locale.c +++ b/dlls/kernel32/tests/locale.c @@ -4600,10 +4600,13 @@ static void test_IdnToUnicode(void) /* 5 */ { 64, L"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0, 0 }, { 8, L"xn--7va", IDN_ALLOW_UNASSIGNED, 2, 2, L"\x380" }, + { 8, L"xn--7va", 0, 0, 0, L"\x380" }, { -1, L"xn----bm3an932a1l5d.xn--xvj", 0, 8, 0, L"\xd803\xde78\x46b5-\xa861.\x2e87" }, { -1, L"xn--z123456789012345678901234567890123456789012345678901234-9te", 0, 57, 57, L"\xe4z123456789012345678901234567890123456789012345678901234" }, + /* 10 */ { -1, L"foo.bar", 0, 8, 8, L"foo.bar" }, + { -1, L"d.xn----dha", 0, 5, 5, L"d.\x00fc-" }, }; WCHAR buf[1024]; @@ -4612,13 +4615,11 @@ static void test_IdnToUnicode(void) for (i=0; i<ARRAY_SIZE(test_data); i++) { ret = pIdnToUnicode(test_data[i].flags, test_data[i].in, test_data[i].in_len, NULL, 0); - todo_wine_if (i > 6) ok(ret == test_data[i].ret || broken(ret == test_data[i].broken_ret), "%d: ret = %d\n", i, ret); SetLastError(0xdeadbeef); ret = pIdnToUnicode(test_data[i].flags, test_data[i].in, test_data[i].in_len, buf, ARRAY_SIZE(buf)); err = GetLastError(); - todo_wine_if (i > 6) ok(ret == test_data[i].ret || broken(ret == test_data[i].broken_ret), "%d: ret = %d\n", i, ret); ok(err == ret ? 0xdeadbeef : ERROR_INVALID_NAME, "%d: err = %d\n", i, err); ok(!wcsncmp(test_data[i].out, buf, ret), "%d: buf = %s\n", i, wine_dbgstr_wn(buf, ret)); @@ -4632,8 +4633,8 @@ static BOOL is_idn_error( const WCHAR *str ) for (p = wcstok( err, L" []" ); p; p = wcstok( NULL, L" []" ) ) { if (*p == 'B' || !wcscmp( p, L"V8" )) continue; /* BiDi */ - if (!wcscmp( p, L"V2" ) || !wcscmp( p, L"V3" )) continue; /* CheckHyphens */ - if (!wcscmp( p, L"V7" )) continue; /* CheckJoiners */ + if (!wcscmp( p, L"V2" )) continue; /* CheckHyphens */ + if (!wcscmp( p, L"V5" )) continue; /* Combining marks */ return TRUE; } return FALSE; @@ -4705,7 +4706,7 @@ static void test_Idn(void) error = columns[2]; SetLastError( 0xdeadbeef ); memset( dst, 0xcc, sizeof(dst) ); - ret = pIdnToUnicode( 0, columns[0], -1, dst, ARRAY_SIZE(dst) ); + ret = pIdnToUnicode( IDN_USE_STD3_ASCII_RULES, columns[0], -1, dst, ARRAY_SIZE(dst) ); for (i = 0; columns[0][i]; i++) if (columns[0][i] > 0x7f) break; if (columns[0][i]) { @@ -4713,10 +4714,16 @@ static void test_Idn(void) } else if (!is_idn_error( error )) { - ok( ret, "line %u: toUnicode failed for %s\n", line, debugstr_w(columns[0]) ); + ok( ret, "line %u: toUnicode failed for %s expected %s\n", line, + debugstr_w(columns[0]), debugstr_w(expect) ); if (ret) ok( !wcscmp( dst, expect ), "line %u: got %s expected %s\n", line, debugstr_w(dst), debugstr_w(expect) ); } + else + { + ok( !ret, "line %u: toUnicode didn't fail for %s got %s expected error %s\n", + line, debugstr_w(columns[0]), debugstr_w(dst), debugstr_w(error) ); + } } fclose( f ); } diff --git a/dlls/ntdll/locale.c b/dlls/ntdll/locale.c index 7c859b75d2a..8833cfa712c 100644 --- a/dlls/ntdll/locale.c +++ b/dlls/ntdll/locale.c @@ -2238,3 +2238,132 @@ NTSTATUS WINAPI RtlIdnToNameprepUnicode( DWORD flags, const WCHAR *src, INT srcl *dstlen = buflen; return status; } + + +/****************************************************************************** + * RtlIdnToUnicode (NTDLL.@) + */ +NTSTATUS WINAPI RtlIdnToUnicode( DWORD flags, const WCHAR *src, INT srclen, WCHAR *dst, INT *dstlen ) +{ + const struct norm_table *info; + int i, buflen, start, end, out_label, out = 0; + NTSTATUS status; + UINT buffer[64]; + WCHAR ch; + + if (!src || srclen < -1) return STATUS_INVALID_PARAMETER; + if (srclen == -1) srclen = strlenW( src ) + 1; + + TRACE( "%x %s %p %d\n", flags, debugstr_wn(src, srclen), dst, *dstlen ); + + if ((status = load_norm_table( 13, &info ))) return status; + + for (start = 0; start < srclen; ) + { + int n = 0x80, bias = 72, pos = 0, old_pos, w, k, t, delim = 0, digit, delta; + + out_label = out; + for (i = start; i < srclen; i++) + { + ch = src[i]; + if (ch > 0x7f || (i != srclen - 1 && !ch)) return STATUS_INVALID_IDN_NORMALIZATION; + if (!ch || ch == '.') break; + if (ch == '-') delim = i; + + if (!(flags & IDN_USE_STD3_ASCII_RULES)) continue; + if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || + (ch >= '0' && ch <= '9') || ch == '-') + continue; + return STATUS_INVALID_IDN_NORMALIZATION; + } + end = i; + + /* last label may be empty */ + if (start == end && ch) return STATUS_INVALID_IDN_NORMALIZATION; + + if (end - start < 4 || + (src[start] != 'x' && src[start] != 'X') || + (src[start + 1] != 'n' && src[start + 1] != 'N') || + src[start + 2] != '-' || src[start + 3] != '-') + { + if (end - start > 63) return STATUS_INVALID_IDN_NORMALIZATION; + + if ((flags & IDN_USE_STD3_ASCII_RULES) && (src[start] == '-' || src[end - 1] == '-')) + return STATUS_INVALID_IDN_NORMALIZATION; + + if (end < srclen) end++; + if (*dstlen) + { + if (out + end - start <= *dstlen) + memcpy( dst + out, src + start, (end - start) * sizeof(WCHAR)); + else return STATUS_BUFFER_TOO_SMALL; + } + out += end - start; + start = end; + continue; + } + + if (delim == start + 3) delim++; + buflen = 0; + for (i = start + 4; i < delim && buflen < ARRAY_SIZE(buffer); i++) buffer[buflen++] = src[i]; + if (buflen) i++; + while (i < end) + { + old_pos = pos; + w = 1; + for (k = BASE; ; k += BASE) + { + if (i >= end) return STATUS_INVALID_IDN_NORMALIZATION; + ch = src[i++]; + if (ch >= 'a' && ch <= 'z') digit = ch - 'a'; + else if (ch >= 'A' && ch <= 'Z') digit = ch - 'A'; + else if (ch >= '0' && ch <= '9') digit = ch - '0' + 26; + else return STATUS_INVALID_IDN_NORMALIZATION; + pos += digit * w; + t = k <= bias ? TMIN : k >= bias + TMAX ? TMAX : k - bias; + if (digit < t) break; + w *= BASE - t; + } + + delta = (pos - old_pos) / (!old_pos ? DAMP : 2); + delta += delta / (buflen + 1); + for (k = 0; delta > ((BASE - TMIN) * TMAX) / 2; k += BASE) delta /= BASE - TMIN; + bias = k + ((BASE - TMIN + 1) * delta) / (delta + SKEW); + n += pos / (buflen + 1); + pos %= buflen + 1; + + if (buflen >= ARRAY_SIZE(buffer) - 1) return STATUS_INVALID_IDN_NORMALIZATION; + memmove( buffer + pos + 1, buffer + pos, (buflen - pos) * sizeof(*buffer) ); + buffer[pos++] = n; + buflen++; + } + + if (check_invalid_chars( info, flags, buffer, buflen )) return STATUS_INVALID_IDN_NORMALIZATION; + + for (i = 0; i < buflen; i++) + { + int len = 1 + (buffer[i] >= 0x10000); + if (*dstlen) + { + if (out + len <= *dstlen) put_utf16( dst + out, buffer[i] ); + else return STATUS_BUFFER_TOO_SMALL; + } + out += len; + } + + if (out - out_label > 63) return STATUS_INVALID_IDN_NORMALIZATION; + + if (end < srclen) + { + if (*dstlen) + { + if (out + 1 <= *dstlen) dst[out] = src[end]; + else return STATUS_BUFFER_TOO_SMALL; + } + out++; + } + start = end + 1; + } + *dstlen = out; + return STATUS_SUCCESS; +} diff --git a/dlls/ntdll/ntdll.spec b/dlls/ntdll/ntdll.spec index 7964075ede6..f1f495837d4 100644 --- a/dlls/ntdll/ntdll.spec +++ b/dlls/ntdll/ntdll.spec @@ -724,6 +724,7 @@ @ stdcall RtlIdentifierAuthoritySid(ptr) @ stdcall RtlIdnToAscii(long wstr long ptr ptr) @ stdcall RtlIdnToNameprepUnicode(long wstr long ptr ptr) +@ stdcall RtlIdnToUnicode(long wstr long ptr ptr) @ stdcall RtlImageDirectoryEntryToData(long long long ptr) @ stdcall RtlImageNtHeader(long) @ stdcall RtlImageRvaToSection(ptr long long) diff --git a/include/winternl.h b/include/winternl.h index 4586378572c..f55d275320a 100644 --- a/include/winternl.h +++ b/include/winternl.h @@ -2799,6 +2799,7 @@ NTSYSAPI NTSTATUS WINAPI RtlGUIDFromString(PUNICODE_STRING,GUID*); NTSYSAPI PSID_IDENTIFIER_AUTHORITY WINAPI RtlIdentifierAuthoritySid(PSID); NTSYSAPI NTSTATUS WINAPI RtlIdnToAscii(DWORD,const WCHAR*,INT,WCHAR*,INT*); NTSYSAPI NTSTATUS WINAPI RtlIdnToNameprepUnicode(DWORD,const WCHAR*,INT,WCHAR*,INT*); +NTSYSAPI NTSTATUS WINAPI RtlIdnToUnicode(DWORD,const WCHAR*,INT,WCHAR*,INT*); NTSYSAPI PVOID WINAPI RtlImageDirectoryEntryToData(HMODULE,BOOL,WORD,ULONG *); NTSYSAPI PIMAGE_NT_HEADERS WINAPI RtlImageNtHeader(HMODULE); NTSYSAPI PIMAGE_SECTION_HEADER WINAPI RtlImageRvaToSection(const IMAGE_NT_HEADERS *,HMODULE,DWORD); -- 2.24.1