Commit 2aa4673f authored by Alexandre Julliard's avatar Alexandre Julliard

ntdll: Support UTF-8 codepage in string conversion functions.

parent deab47be
......@@ -387,16 +387,22 @@ NTSTATUS WINAPI RtlGetLocaleFileMappingAddress( void **ptr, LCID *lcid, LARGE_IN
*/
WCHAR WINAPI RtlAnsiCharToUnicodeChar( char **ansi )
{
unsigned char ch = *(*ansi)++;
if (nls_info.AnsiTableInfo.CodePage == CP_UTF8)
{
unsigned int res;
if (ch < 0x80) return ch;
if ((res = decode_utf8_char( ch, (const char **)ansi, *ansi + 3 )) > 0x10ffff) res = 0xfffd;
return res;
}
if (nls_info.AnsiTableInfo.DBCSOffsets)
{
USHORT off = nls_info.AnsiTableInfo.DBCSOffsets[(unsigned char)**ansi];
if (off)
{
(*ansi)++;
return nls_info.AnsiTableInfo.DBCSOffsets[off + (unsigned char)*(*ansi)++];
}
USHORT off = nls_info.AnsiTableInfo.DBCSOffsets[ch];
if (off) return nls_info.AnsiTableInfo.DBCSOffsets[off + (unsigned char)*(*ansi)++];
}
return nls_info.AnsiTableInfo.MultiByteTable[(unsigned char)*(*ansi)++];
return nls_info.AnsiTableInfo.MultiByteTable[ch];
}
......@@ -518,13 +524,14 @@ NTSTATUS WINAPI RtlUnicodeToCustomCPN( CPTABLEINFO *info, char *dst, DWORD dstle
NTSTATUS WINAPI RtlMultiByteToUnicodeN( WCHAR *dst, DWORD dstlen, DWORD *reslen,
const char *src, DWORD srclen )
{
if (nls_info.AnsiTableInfo.WideCharTable)
return RtlCustomCPToUnicodeN( &nls_info.AnsiTableInfo, dst, dstlen, reslen, src, srclen );
unsigned int ret;
if (nls_info.AnsiTableInfo.CodePage != CP_UTF8)
ret = cp_mbstowcs( &nls_info.AnsiTableInfo, dst, dstlen / sizeof(WCHAR), src, srclen );
else
utf8_mbstowcs( dst, dstlen / sizeof(WCHAR), &ret, src, srclen );
/* locale not setup yet */
dstlen = min( srclen, dstlen / sizeof(WCHAR) );
if (reslen) *reslen = dstlen * sizeof(WCHAR);
while (dstlen--) *dst++ = *src++ & 0x7f;
if (reslen) *reslen = ret * sizeof(WCHAR);
return STATUS_SUCCESS;
}
......@@ -534,7 +541,14 @@ NTSTATUS WINAPI RtlMultiByteToUnicodeN( WCHAR *dst, DWORD dstlen, DWORD *reslen,
*/
NTSTATUS WINAPI RtlMultiByteToUnicodeSize( DWORD *size, const char *str, DWORD len )
{
*size = cp_mbstowcs_size( &nls_info.AnsiTableInfo, str, len ) * sizeof(WCHAR);
unsigned int ret;
if (nls_info.AnsiTableInfo.CodePage != CP_UTF8)
ret = cp_mbstowcs_size( &nls_info.AnsiTableInfo, str, len );
else
utf8_mbstowcs_size( str, len, &ret );
*size = ret * sizeof(WCHAR);
return STATUS_SUCCESS;
}
......@@ -545,7 +559,15 @@ NTSTATUS WINAPI RtlMultiByteToUnicodeSize( DWORD *size, const char *str, DWORD l
NTSTATUS WINAPI RtlOemToUnicodeN( WCHAR *dst, DWORD dstlen, DWORD *reslen,
const char *src, DWORD srclen )
{
return RtlCustomCPToUnicodeN( &nls_info.OemTableInfo, dst, dstlen, reslen, src, srclen );
unsigned int ret;
if (nls_info.OemTableInfo.CodePage != CP_UTF8)
ret = cp_mbstowcs( &nls_info.OemTableInfo, dst, dstlen / sizeof(WCHAR), src, srclen );
else
utf8_mbstowcs( dst, dstlen / sizeof(WCHAR), &ret, src, srclen );
if (reslen) *reslen = ret * sizeof(WCHAR);
return STATUS_SUCCESS;
}
......@@ -555,7 +577,14 @@ NTSTATUS WINAPI RtlOemToUnicodeN( WCHAR *dst, DWORD dstlen, DWORD *reslen,
*/
DWORD WINAPI RtlOemStringToUnicodeSize( const STRING *str )
{
return (cp_mbstowcs_size( &nls_info.OemTableInfo, str->Buffer, str->Length ) + 1) * sizeof(WCHAR);
unsigned int ret;
if (nls_info.OemTableInfo.CodePage != CP_UTF8)
ret = cp_mbstowcs_size( &nls_info.OemTableInfo, str->Buffer, str->Length );
else
utf8_mbstowcs_size( str->Buffer, str->Length, &ret );
return (ret + 1) * sizeof(WCHAR);
}
......@@ -565,7 +594,14 @@ DWORD WINAPI RtlOemStringToUnicodeSize( const STRING *str )
*/
DWORD WINAPI RtlUnicodeStringToOemSize( const UNICODE_STRING *str )
{
return cp_wcstombs_size( &nls_info.OemTableInfo, str->Buffer, str->Length / sizeof(WCHAR) ) + 1;
unsigned int ret;
if (nls_info.OemTableInfo.CodePage != CP_UTF8)
ret = cp_wcstombs_size( &nls_info.OemTableInfo, str->Buffer, str->Length / sizeof(WCHAR) );
else
utf8_wcstombs_size( str->Buffer, str->Length / sizeof(WCHAR), &ret );
return ret + 1;
}
......@@ -575,18 +611,14 @@ DWORD WINAPI RtlUnicodeStringToOemSize( const UNICODE_STRING *str )
NTSTATUS WINAPI RtlUnicodeToMultiByteN( char *dst, DWORD dstlen, DWORD *reslen,
const WCHAR *src, DWORD srclen )
{
if (nls_info.AnsiTableInfo.WideCharTable)
return RtlUnicodeToCustomCPN( &nls_info.AnsiTableInfo, dst, dstlen, reslen, src, srclen );
unsigned int ret;
/* locale not setup yet */
dstlen = min( srclen / sizeof(WCHAR), dstlen );
if (reslen) *reslen = dstlen;
while (dstlen--)
{
WCHAR ch = *src++;
if (ch > 0x7f) ch = '?';
*dst++ = ch;
}
if (nls_info.AnsiTableInfo.CodePage != CP_UTF8)
ret = cp_wcstombs( &nls_info.AnsiTableInfo, dst, dstlen, src, srclen / sizeof(WCHAR) );
else
utf8_wcstombs( dst, dstlen, &ret, src, srclen / sizeof(WCHAR) );
if (reslen) *reslen = ret;
return STATUS_SUCCESS;
}
......@@ -596,7 +628,14 @@ NTSTATUS WINAPI RtlUnicodeToMultiByteN( char *dst, DWORD dstlen, DWORD *reslen,
*/
NTSTATUS WINAPI RtlUnicodeToMultiByteSize( DWORD *size, const WCHAR *str, DWORD len )
{
*size = cp_wcstombs_size( &nls_info.AnsiTableInfo, str, len / sizeof(WCHAR) );
unsigned int ret;
if (nls_info.AnsiTableInfo.CodePage != CP_UTF8)
ret = cp_wcstombs_size( &nls_info.AnsiTableInfo, str, len / sizeof(WCHAR) );
else
utf8_wcstombs_size( str, len / sizeof(WCHAR), &ret );
*size = ret;
return STATUS_SUCCESS;
}
......@@ -607,7 +646,15 @@ NTSTATUS WINAPI RtlUnicodeToMultiByteSize( DWORD *size, const WCHAR *str, DWORD
NTSTATUS WINAPI RtlUnicodeToOemN( char *dst, DWORD dstlen, DWORD *reslen,
const WCHAR *src, DWORD srclen )
{
return RtlUnicodeToCustomCPN( &nls_info.OemTableInfo, dst, dstlen, reslen, src, srclen );
unsigned int ret;
if (nls_info.OemTableInfo.CodePage != CP_UTF8)
ret = cp_wcstombs( &nls_info.OemTableInfo, dst, dstlen, src, srclen / sizeof(WCHAR) );
else
utf8_wcstombs( dst, dstlen, &ret, src, srclen / sizeof(WCHAR) );
if (reslen) *reslen = ret;
return STATUS_SUCCESS;
}
......@@ -712,12 +759,77 @@ NTSTATUS WINAPI RtlUpcaseUnicodeToCustomCPN( CPTABLEINFO *info, char *dst, DWORD
}
static NTSTATUS upcase_unicode_to_utf8( char *dst, DWORD dstlen, DWORD *reslen,
const WCHAR *src, DWORD srclen )
{
char *end;
unsigned int val;
NTSTATUS status = STATUS_SUCCESS;
srclen /= sizeof(WCHAR);
for (end = dst + dstlen; srclen; srclen--, src++)
{
WCHAR ch = casemap( nls_info.UpperCaseTable, *src );
if (ch < 0x80) /* 0x00-0x7f: 1 byte */
{
if (dst > end - 1) break;
*dst++ = ch;
continue;
}
if (ch < 0x800) /* 0x80-0x7ff: 2 bytes */
{
if (dst > end - 2) break;
dst[1] = 0x80 | (ch & 0x3f);
ch >>= 6;
dst[0] = 0xc0 | ch;
dst += 2;
continue;
}
if (!get_utf16( src, srclen, &val ))
{
val = 0xfffd;
status = STATUS_SOME_NOT_MAPPED;
}
if (val < 0x10000) /* 0x800-0xffff: 3 bytes */
{
if (dst > end - 3) break;
dst[2] = 0x80 | (val & 0x3f);
val >>= 6;
dst[1] = 0x80 | (val & 0x3f);
val >>= 6;
dst[0] = 0xe0 | val;
dst += 3;
}
else /* 0x10000-0x10ffff: 4 bytes */
{
if (dst > end - 4) break;
dst[3] = 0x80 | (val & 0x3f);
val >>= 6;
dst[2] = 0x80 | (val & 0x3f);
val >>= 6;
dst[1] = 0x80 | (val & 0x3f);
val >>= 6;
dst[0] = 0xf0 | val;
dst += 4;
src++;
srclen--;
}
}
if (srclen) status = STATUS_BUFFER_TOO_SMALL;
if (reslen) *reslen = dstlen - (end - dst);
return status;
}
/**************************************************************************
* RtlUpcaseUnicodeToMultiByteN (NTDLL.@)
*/
NTSTATUS WINAPI RtlUpcaseUnicodeToMultiByteN( char *dst, DWORD dstlen, DWORD *reslen,
const WCHAR *src, DWORD srclen )
{
if (nls_info.AnsiTableInfo.CodePage == CP_UTF8)
return upcase_unicode_to_utf8( dst, dstlen, reslen, src, srclen );
return RtlUpcaseUnicodeToCustomCPN( &nls_info.AnsiTableInfo, dst, dstlen, reslen, src, srclen );
}
......@@ -728,20 +840,9 @@ NTSTATUS WINAPI RtlUpcaseUnicodeToMultiByteN( char *dst, DWORD dstlen, DWORD *re
NTSTATUS WINAPI RtlUpcaseUnicodeToOemN( char *dst, DWORD dstlen, DWORD *reslen,
const WCHAR *src, DWORD srclen )
{
if (nls_info.OemTableInfo.WideCharTable)
return RtlUpcaseUnicodeToCustomCPN( &nls_info.OemTableInfo, dst, dstlen, reslen, src, srclen );
/* locale not setup yet */
dstlen = min( srclen / sizeof(WCHAR), dstlen );
if (reslen) *reslen = dstlen;
while (dstlen--)
{
WCHAR ch = *src++;
if (ch > 0x7f) ch = '?';
else ch = casemap_ascii( ch );
*dst++ = ch;
}
return STATUS_SUCCESS;
if (nls_info.OemTableInfo.CodePage == CP_UTF8)
return upcase_unicode_to_utf8( dst, dstlen, reslen, src, srclen );
return RtlUpcaseUnicodeToCustomCPN( &nls_info.OemTableInfo, dst, dstlen, reslen, src, srclen );
}
......
......@@ -506,15 +506,13 @@ LPSTR __cdecl _strlwr( LPSTR str )
*/
int __cdecl toupper( int c )
{
char str[2], *p = str;
char str[4], *p = str;
WCHAR wc;
DWORD len;
str[0] = c;
str[1] = c >> 8;
memcpy( str, &c, sizeof(c) );
wc = RtlAnsiCharToUnicodeChar( &p );
wc = RtlUpcaseUnicodeChar( wc );
RtlUnicodeToMultiByteN( str, sizeof(str), &len, &wc, sizeof(wc) );
if (RtlUpcaseUnicodeToMultiByteN( str, 2, &len, &wc, sizeof(wc) )) return c;
if (len == 2) return ((unsigned char)str[0] << 8) + (unsigned char)str[1];
return (unsigned char)str[0];
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment