Commit fb270ddc authored by Alexandre Julliard's avatar Alexandre Julliard

Added a bunch of code page tables for multibyte<->wide char

conversions (with the help of Dmitry Timoshkov).
parent 0072c881
......@@ -50,6 +50,7 @@ LIBSUBDIRS = \
relay32 \
resources \
scheduler \
unicode \
win32 \
windows
......@@ -114,6 +115,7 @@ LIBOBJS = \
relay32/relay32.o \
resources/resources.o \
scheduler/scheduler.o \
unicode/unicode.o \
win32/win32.o \
windows/windows.o
......
......@@ -6301,6 +6301,7 @@ tools/Makefile
tools/cvdump/Makefile
tools/wrc/Makefile
tsx11/Makefile
unicode/Makefile
win32/Makefile
windows/Makefile
windows/ttydrv/Makefile
......@@ -6534,6 +6535,7 @@ tools/Makefile
tools/cvdump/Makefile
tools/wrc/Makefile
tsx11/Makefile
unicode/Makefile
win32/Makefile
windows/Makefile
windows/ttydrv/Makefile
......
......@@ -1095,6 +1095,7 @@ tools/Makefile
tools/cvdump/Makefile
tools/wrc/Makefile
tsx11/Makefile
unicode/Makefile
win32/Makefile
windows/Makefile
windows/ttydrv/Makefile
......
/*
* Wine internal Unicode definitions
*
* Copyright 2000 Alexandre Julliard
*/
#ifndef __WINE_UNICODE_H
#define __WINE_UNICODE_H
/* code page info common to SBCS and DBCS */
struct cp_info
{
unsigned int codepage; /* codepage id */
unsigned int char_size; /* char size (1 or 2 bytes) */
char def_char[2]; /* default char value */
unsigned short def_unicode_char; /* default Unicode char value */
const char *name; /* code page name */
};
struct sbcs_table
{
struct cp_info info;
const unsigned short *cp2uni; /* code page -> Unicode map */
const unsigned char *uni2cp_low; /* Unicode -> code page map */
const unsigned short *uni2cp_high;
};
struct dbcs_table
{
struct cp_info info;
const unsigned short *cp2uni; /* code page -> Unicode map */
const unsigned char *cp2uni_leadbytes;
const unsigned short *uni2cp_low; /* Unicode -> code page map */
const unsigned short *uni2cp_high;
unsigned char lead_bytes[12]; /* lead bytes ranges */
};
union cptable
{
struct cp_info info;
struct sbcs_table sbcs;
struct dbcs_table dbcs;
};
extern const union cptable *cp_get_table( unsigned int codepage );
extern const union cptable *cp_enum_table( unsigned int index );
extern int cp_mbstowcs( const union cptable *table, int flags,
const char *src, int srclen,
unsigned short *dst, int dstlen );
extern int cp_wcstombs( const union cptable *table, int flags,
const unsigned short *src, int srclen,
char *dst, int dstlen );
static inline int is_dbcs_leadbyte( const union cptable *table, unsigned char ch )
{
return (table->info.char_size == 2) && (table->dbcs.cp2uni_leadbytes[ch]);
}
#endif /* __WINE_UNICODE_H */
DEFS = @DLLFLAGS@ -D__WINE__
TOPSRCDIR = @top_srcdir@
TOPOBJDIR = ..
SRCDIR = @srcdir@
VPATH = @srcdir@
MODULE = unicode
CODEPAGES = \
037 \
424 \
437 \
500 \
737 \
775 \
850 \
852 \
855 \
856 \
857 \
860 \
861 \
862 \
863 \
864 \
865 \
866 \
869 \
874 \
875 \
878 \
932 \
936 \
949 \
950 \
1006 \
1026 \
1250 \
1251 \
1252 \
1253 \
1254 \
1255 \
1256 \
1257 \
1258 \
10000 \
10006 \
10007 \
10029 \
10079 \
10081 \
20866 \
28591 \
28592 \
28593 \
28594 \
28595 \
28596 \
28597 \
28598 \
28599
C_SRCS = \
cptable.c \
mbtowc.c \
wctomb.c \
$(CODEPAGES:%=c_%.c)
all: $(MODULE).o
@MAKE_RULES@
### Dependencies:
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
/*
* Codepage tables
*
* Copyright 2000 Alexandre Julliard
*/
#include <stdlib.h>
#include "wine/unicode.h"
/* Everything below this line is generated automatically by cpmap.pl */
/* ### cpmap begin ### */
extern union cptable cptable_037;
extern union cptable cptable_424;
extern union cptable cptable_437;
extern union cptable cptable_500;
extern union cptable cptable_737;
extern union cptable cptable_775;
extern union cptable cptable_850;
extern union cptable cptable_852;
extern union cptable cptable_855;
extern union cptable cptable_856;
extern union cptable cptable_857;
extern union cptable cptable_860;
extern union cptable cptable_861;
extern union cptable cptable_862;
extern union cptable cptable_863;
extern union cptable cptable_864;
extern union cptable cptable_865;
extern union cptable cptable_866;
extern union cptable cptable_869;
extern union cptable cptable_874;
extern union cptable cptable_875;
extern union cptable cptable_878;
extern union cptable cptable_932;
extern union cptable cptable_936;
extern union cptable cptable_949;
extern union cptable cptable_950;
extern union cptable cptable_1006;
extern union cptable cptable_1026;
extern union cptable cptable_1250;
extern union cptable cptable_1251;
extern union cptable cptable_1252;
extern union cptable cptable_1253;
extern union cptable cptable_1254;
extern union cptable cptable_1255;
extern union cptable cptable_1256;
extern union cptable cptable_1257;
extern union cptable cptable_1258;
extern union cptable cptable_10000;
extern union cptable cptable_10006;
extern union cptable cptable_10007;
extern union cptable cptable_10029;
extern union cptable cptable_10079;
extern union cptable cptable_10081;
extern union cptable cptable_20866;
extern union cptable cptable_28591;
extern union cptable cptable_28592;
extern union cptable cptable_28593;
extern union cptable cptable_28594;
extern union cptable cptable_28595;
extern union cptable cptable_28596;
extern union cptable cptable_28597;
extern union cptable cptable_28598;
extern union cptable cptable_28599;
static const union cptable * const cptables[53] =
{
&cptable_037,
&cptable_424,
&cptable_437,
&cptable_500,
&cptable_737,
&cptable_775,
&cptable_850,
&cptable_852,
&cptable_855,
&cptable_856,
&cptable_857,
&cptable_860,
&cptable_861,
&cptable_862,
&cptable_863,
&cptable_864,
&cptable_865,
&cptable_866,
&cptable_869,
&cptable_874,
&cptable_875,
&cptable_878,
&cptable_932,
&cptable_936,
&cptable_949,
&cptable_950,
&cptable_1006,
&cptable_1026,
&cptable_1250,
&cptable_1251,
&cptable_1252,
&cptable_1253,
&cptable_1254,
&cptable_1255,
&cptable_1256,
&cptable_1257,
&cptable_1258,
&cptable_10000,
&cptable_10006,
&cptable_10007,
&cptable_10029,
&cptable_10079,
&cptable_10081,
&cptable_20866,
&cptable_28591,
&cptable_28592,
&cptable_28593,
&cptable_28594,
&cptable_28595,
&cptable_28596,
&cptable_28597,
&cptable_28598,
&cptable_28599,
};
/* ### cpmap end ### */
/* Everything above this line is generated automatically by cpmap.pl */
#define NB_CODEPAGES (sizeof(cptables)/sizeof(cptables[0]))
static int cmp_codepage( const void *codepage, const void *entry )
{
return (unsigned int)codepage - (*(union cptable **)entry)->info.codepage;
}
/* get the table of a given code page */
const union cptable *cp_get_table( unsigned int codepage )
{
const union cptable **res;
if (!(res = bsearch( (void *)codepage, cptables, NB_CODEPAGES,
sizeof(cptables[0]), cmp_codepage ))) return NULL;
return *res;
}
/* enum valid codepages */
const union cptable *cp_enum_table( unsigned int index )
{
if (index >= NB_CODEPAGES) return NULL;
return cptables[index];
}
# Default Unicode mappings
#
# Copyright 2000 Alexandre Julliard
#
# This file contains default mappings for Unicode chars that aren't
# defined directly in the code page file. It is used by the cpmap.pl
# tool to build codepage tables.
#
# The first column defines equivalence sets; if any character in the set
# is defined in the codepage file, all others in the set will map to the
# same value.
#
# The second column specifies the default char to map the whole set to,
# if no characters in the set are defined by the codepage file.
#
# Characters that have an explicit decomposition in the UnicodeData.txt
# file (for instance 00c0 LATIN CAPITAL LETTER A WITH GRAVE -> 0041 0300)
# are handled automatically and do not have to be defined here. They can
# still be defined if necessary to override the unicode decomposition.
#
# letters
00a2,20a1 'c' # CENT SIGN, COLON SIGN
00a3,20a4 'L' # POUND SIGN, LIRA SIGN
00a5 'Y' # YEN SIGN
00a9 'c' # COPYRIGHT SIGN
00ae 'r' # REGISTERED SIGN
00b5,03bc 'u' # MICRO SIGN, GREEK SMALL LETTER MU
00c6 'A' # LATIN CAPITAL LETTER AE
00d0,0110,0189 'D' # LATIN CAPITAL LETTER D WITH STROKE/AFRICAN D/ETH
00d7 'x' # MULTIPLICATION SIGN
00d8,2205 'O' # LATIN CAPITAL LETTER O WITH STROKE, EMPTY SET
00df,03b2 none # LATIN SMALL LETTER SHARP S, GREEK SMALL LETTER BETA
00e6 'a' # LATIN SMALL LETTER AE
00f0 'd' # LATIN SMALL LETTER ETH
00f8 'o' # LATIN SMALL LETTER O WITH STROKE
0111 'd' # LATIN SMALL LETTER D WITH STROKE
0126 'H' # LATIN CAPITAL LETTER H WITH STROKE
0127 'h' # LATIN SMALL LETTER H WITH STROKE
0131 'i' # LATIN SMALL LETTER DOTLESS I
0141 'L' # LATIN CAPITAL LETTER L WITH STROKE
0142 'l' # LATIN SMALL LETTER L WITH STROKE
0152 'O' # LATIN CAPITAL LIGATURE OE
0153 'o' # LATIN SMALL LIGATURE OE
0166 'T' # LATIN CAPITAL LETTER T WITH STROKE
0167 't' # LATIN SMALL LETTER T WITH STROKE
0180 'b' # LATIN SMALL LETTER B WITH STROKE
0190 'E' # LATIN CAPITAL LETTER OPEN E
0191 'F' # LATIN CAPITAL LETTER F WITH HOOK
0192 'f' # LATIN SMALL LETTER F WITH HOOK
0197 'I' # LATIN CAPITAL LETTER I WITH STROKE
019a 'l' # LATIN SMALL LETTER L WITH BAR
019f 'O' # LATIN CAPITAL LETTER O WITH MIDDLE TILDE
01ab 't' # LATIN SMALL LETTER T WITH PALATAL HOOK
01ae 'T' # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
01b6 'z' # LATIN SMALL LETTER Z WITH STROKE
0391 'A' # GREEK CAPITAL LETTER ALPHA
0392 'B' # GREEK CAPITAL LETTER BETA
0395 'E' # GREEK CAPITAL LETTER EPSILON
0396 'Z' # GREEK CAPITAL LETTER ZETA
0397 'H' # GREEK CAPITAL LETTER ETA
0399 'I' # GREEK CAPITAL LETTER IOTA
039a 'K' # GREEK CAPITAL LETTER KAPPA
039c 'M' # GREEK CAPITAL LETTER MU
039d 'N' # GREEK CAPITAL LETTER NU
039f 'O' # GREEK CAPITAL LETTER OMICRON
03a1 'P' # GREEK CAPITAL LETTER RHO
03a4 'T' # GREEK CAPITAL LETTER TAU
03a5 'Y' # GREEK CAPITAL LETTER UPSILON
03a7 'X' # GREEK CAPITAL LETTER CHI
01e4 'G' # LATIN CAPITAL LETTER G WITH STROKE
01e5 'g' # LATIN SMALL LETTER G WITH STROKE
0261 'g' # LATIN SMALL LETTER SCRIPT G
2118 'P' # SCRIPT CAPITAL P
212e 'e' # ESTIMATED SYMBOL
# accents
00a8,0308 '"' # DIAERESIS
00af,02c9,0304 2014 # MACRON -> EM DASH
00b4,02b9,02ca,0301,2032 ''' # ACUTE ACCENT, PRIME
00b8,0327 ',' # CEDILLA
02ba,02dd,030b,2033 '"' # DOUBLE ACUTE ACCENT, DOUBLE PRIME
02c4,2303 '^' # UP ARROWHEAD
02c6,0302 '^' # CIRCUMFLEX ACCENT
02c7,030c 'v' # COMBINING CARON -> CARON
02c8 ''' # MODIFIER LETTER VERTICAL LINE
02cb,0300 '`' # GRAVE ACCENT
02cd,0331,0332 '_' # MODIFIER LETTER LOW MACRON, COMBINING MACRON BELOW, COMBINING LOW LINE
02d8,0306 none # BREVE
02d9,0307 none # DOT ABOVE
02da,030a,2070,2218 00b0 # RING ABOVE, SUPERSCRIPT ZERO, RING OPERATOR -> DEGREE SIGN
02db,0328 none # OGONEK
02dc,0303 '~' # SMALL TILDE
0305,203e 00af # OVERLINE -> MACRON
030e '"' # COMBINING DOUBLE VERTICAL LINE ABOVE
0333 '_' # COMBINING DOUBLE LOW LINE
# mathematical symbols
00b1,2213 none # PLUS-MINUS SIGN, MINUS-OR-PLUS SIGN
2044,2215 '/' # FRACTION/DIVISION SLASH
2216 '\' # SET MINUS
2217 '*' # ASTERISK OPERATOR
221a 'V' # SQUARE ROOT
221f 'L' # RIGHT ANGLE
2223 '|' # DIVIDES
2229 'n' # INTERSECTION
2236 ':' # RATIO
2248 02DC # ALMOST EQUAL TO -> SMALL TILDE
2261,2263 '=' # IDENTICAL TO, STRICTLY EQUIVALENT TO
226a 00ab # MUCH LESS-THAN -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
226b 00bb # MUCH GREATER-THAN -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
22c5 00b7 # DOT OPERATOR -> MIDDLE DOT
# misc symbols
00a1 '!' # INVERTED EXCLAMATION MARK
00ab,300a '<' # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK, LEFT DOUBLE ANGLE BRACKET
00ad '-' # SOFT HYPHEN
00bb,300b '>' # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK, RIGHT DOUBLE ANGLE BRACKET
01c0 2502 # LATIN LETTER DENTAL CLICK -> BOX DRAWINGS LIGHT VERTICAL
01c3 '!' # LATIN LETTER RETROFLEX CLICK
02bb 2018 # MODIFIER LETTER TURNED COMMA -> LEFT SINGLE QUOTATION MARK
02bc,2019 ''' # RIGHT SINGLE QUOTATION MARK
2010,2011,2212 '-' # HYPHEN, MINUS SIGN
2013,2014,2015 '-' # EN DASH, EM DASH, HORIZONTAL BAR, MINUS SIGN
2018,201b,2035 '`' # LEFT SINGLE QUOTATION MARK, SINGLE HIGH-REVERSED-9 QUOTATION MARK, REVERSED PRIME
201a ',' # SINGLE LOW-9 QUOTATION MARK
201c,301d '"' # LEFT DOUBLE QUOTATION MARK, REVERSED DOUBLE PRIME QUOTATION MARK
201d,301e '"' # RIGHT DOUBLE QUOTATION MARK, DOUBLE PRIME QUOTATION MARK
201e,301f ',' # LOW DOUBLE PRIME QUOTATION MARK, DOUBLE LOW-9 QUOTATION MARK
2022,2219 none # BULLET, BULLET OPERATOR
2039,3008 '<' # SINGLE LEFT-POINTING ANGLE QUOTATION MARK, LEFT ANGLE BRACKET
203a,3009 '>' # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK, RIGHT ANGLE BRACKET
203c '!' # DOUBLE EXCLAMATION MARK
2190 2039 # LEFTWARDS ARROW -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
2191 02c6 # UPWARDS ARROW -> MODIFIER LETTER CIRCUMFLEX ACCENT
2192 203a # RIGHTWARDS ARROW -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
2193 02c7 # DOWNWARDS ARROW -> CARON
2194 '-' # LEFT RIGHT ARROW
2195 '|' # UP DOWN ARROW
223c '~' # TILDE
301a '[' # LEFT WHITE SQUARE BRACKET
301b ']' # RIGHT WHITE SQUARE BRACKET
fe49,fe4a,fe4b,fe4c 203e # DOUBLE WAVY OVERLINE -> OVERLINE
fe4d,fe4e,fe4f '_' # DASHED/CENTRELINE/WAVY LOW LINE
# box drawing chars
2500,2501 '-' # BOX DRAWINGS LIGHT/HEAVY HORIZONTAL
2502,2503 '|' # BOX DRAWINGS LIGHT/HEAVY VERTICAL
2504,2505 '-' # BOX DRAWINGS LIGHT/HEAVY TRIPLE DASH HORIZONTAL
2506,2507 00a6 # BOX DRAWINGS LIGHT/HEAVY TRIPLE DASH VERTICAL -> BROKEN BAR
2508,2509 '-' # BOX DRAWINGS LIGHT/HEAVY QUADRUPLE DASH HORIZONTAL
250a,250b 00a6 # BOX DRAWINGS LIGHT/HEAVY QUADRUPLE DASH VERTICAL -> BROKEN BAR
250c,250d,250e,250f '+' # BOX DRAWINGS LIGHT/HEAVY DOWN AND RIGHT
2510,2511,2512,2513 00ac # BOX DRAWINGS LIGHT/HEAVY DOWN AND LEFT -> NOT SIGN
2514,2515,2516,2517 'L' # BOX DRAWINGS LIGHT/HEAVY UP AND RIGHT
2518,2519,251a,251b '+' # BOX DRAWINGS LIGHT/HEAVY UP AND LEFT
251c,251d,251e,251f,2520,2521,2522,2523 '+' # BOX DRAWINGS LIGHT/HEAVY VERTICAL AND RIGHT
2524,2525,2526,2527,2528,2529,252a,252b '+' # BOX DRAWINGS LIGHT/HEAVY VERTICAL AND LEFT
252c,252d,252e,252f,2530,2531,2532,2533 'T' # BOX DRAWINGS LIGHT/HEAVY DOWN AND HORIZONTAL
2534,2535,2536,2537,2538,2539,253a,253b '+' # BOX DRAWINGS LIGHT/HEAVY UP AND HORIZONTAL
253c,253d,253e,253f,2540,2541,2542,2543,2544,2545,2546,2547,2548,2549,254a,254b '+' # BOX DRAWINGS LIGHT/HEAVY VERTICAL AND HORIZONTAL
254c,254d '-' # BOX DRAWINGS LIGHT/HEAVY DOUBLE DASH HORIZONTAL
254e,254f 00a6 # BOX DRAWINGS LIGHT/HEAVY DOUBLE DASH VERTICAL -> BROKEN BAR
2550 '=' # BOX DRAWINGS DOUBLE HORIZONTAL
2551 '|' # BOX DRAWINGS DOUBLE VERTICAL
2552,2553,2554 '+' # BOX DRAWINGS DOWN AND RIGHT
2555,2556,2557 00ac # BOX DRAWINGS DOWN AND LEFT -> NOT SIGN
2558,2559,255a 'L' # BOX DRAWINGS UP AND RIGHT
255b,255c,255d '+' # BOX DRAWINGS UP AND LEFT
255e,255f,2560 '+' # BOX DRAWINGS VERTICAL AND RIGHT
2561,2562,2563 '+' # BOX DRAWINGS VERTICAL AND LEFT
2564,2565,2566 'T' # BOX DRAWINGS DOWN AND HORIZONTAL
2567,2568,2569 '+' # BOX DRAWINGS UP AND HORIZONTAL
256a,256b,256c '+' # BOX DRAWINGS VERTICAL AND HORIZONTAL
2571 '/' # BOX DRAWINGS LIGHT DIAGONAL UPPER RIGHT TO LOWER LEFT
2572 '\' # BOX DRAWINGS LIGHT DIAGONAL UPPER LEFT TO LOWER RIGHT
2573 'X' # BOX DRAWINGS LIGHT DIAGONAL CROSS
2574,2576,2578,257a,257c,257e '-' # BOX DRAWINGS LIGHT/HEAVY LEFT/RIGHT
2575,2577,2579,257b,257d,257f '|' # BOX DRAWINGS LIGHT/HEAVY UP/DOWN
25cb,25ef 'O' # WHITE/LARGE CIRCLE
/*
* MultiByteToWideChar implementation
*
* Copyright 2000 Alexandre Julliard
*/
#include <string.h>
#include "winnls.h"
#include "wine/unicode.h"
/* check src string for invalid chars; return non-zero if invalid char found */
static inline int check_invalid_chars_sbcs( const struct sbcs_table *table,
const unsigned char *src, unsigned int srclen )
{
const unsigned short * const cp2uni = table->cp2uni;
while (srclen)
{
if (cp2uni[*src] == table->info.def_unicode_char && *src != table->info.def_char[0])
break;
src++;
srclen--;
}
return srclen;
}
/* mbstowcs for single-byte code page */
/* all lengths are in characters, not bytes */
static inline int mbstowcs_sbcs( const struct sbcs_table *table,
const unsigned char *src, unsigned int srclen,
unsigned short *dst, unsigned int dstlen )
{
const unsigned short * const cp2uni = table->cp2uni;
int ret = srclen;
if (dstlen < srclen)
{
/* buffer too small: fill it up to dstlen and return error */
srclen = dstlen;
ret = -1;
}
for (;;)
{
switch(srclen)
{
default:
case 16: dst[15] = cp2uni[src[15]];
case 15: dst[14] = cp2uni[src[14]];
case 14: dst[13] = cp2uni[src[13]];
case 13: dst[12] = cp2uni[src[12]];
case 12: dst[11] = cp2uni[src[11]];
case 11: dst[10] = cp2uni[src[10]];
case 10: dst[9] = cp2uni[src[9]];
case 9: dst[8] = cp2uni[src[8]];
case 8: dst[7] = cp2uni[src[7]];
case 7: dst[6] = cp2uni[src[6]];
case 6: dst[5] = cp2uni[src[5]];
case 5: dst[4] = cp2uni[src[4]];
case 4: dst[3] = cp2uni[src[3]];
case 3: dst[2] = cp2uni[src[2]];
case 2: dst[1] = cp2uni[src[1]];
case 1: dst[0] = cp2uni[src[0]];
case 0: break;
}
if (srclen < 16) return ret;
dst += 16;
src += 16;
srclen -= 16;
}
}
/* query necessary dst length for src string */
static inline int get_length_dbcs( const struct dbcs_table *table,
const unsigned char *src, unsigned int srclen )
{
const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
int len;
for (len = 0; srclen; srclen--, src++, len++)
{
if (cp2uni_lb[*src])
{
if (!--srclen) break; /* partial char, ignore it */
src++;
}
}
return len;
}
/* check src string for invalid chars; return non-zero if invalid char found */
static inline int check_invalid_chars_dbcs( const struct dbcs_table *table,
const unsigned char *src, unsigned int srclen )
{
const unsigned short * const cp2uni = table->cp2uni;
const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
while (srclen)
{
unsigned char off = cp2uni_lb[*src];
if (off) /* multi-byte char */
{
if (srclen == 1) break; /* partial char, error */
if (cp2uni[(off << 8) + src[1]] == table->info.def_unicode_char &&
(src[0] != table->info.def_char[0] || src[1] != table->info.def_char[1]))
break;
src++;
srclen--;
}
else if (cp2uni[*src] == table->info.def_unicode_char &&
(*src != table->info.def_char[0] || table->info.def_char[1])) break;
src++;
srclen--;
}
return srclen;
}
/* mbstowcs for double-byte code page */
/* all lengths are in characters, not bytes */
static inline int mbstowcs_dbcs( const struct dbcs_table *table,
const unsigned char *src, unsigned int srclen,
unsigned short *dst, unsigned int dstlen )
{
const unsigned short * const cp2uni = table->cp2uni;
const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
int len;
for (len = dstlen; srclen && len; len--, srclen--, src++, dst++)
{
unsigned char off = cp2uni_lb[*src];
if (off)
{
if (!--srclen) break; /* partial char, ignore it */
src++;
*dst = cp2uni[(off << 8) + *src];
}
else *dst = cp2uni[*src];
}
if (srclen) return -1; /* overflow */
return dstlen - len;
}
/* return -1 on dst buffer overflow, -2 on invalid input char */
int cp_mbstowcs( const union cptable *table, int flags,
const char *src, int srclen,
unsigned short *dst, int dstlen )
{
if (table->info.char_size == 1)
{
if (flags & MB_ERR_INVALID_CHARS)
{
if (!check_invalid_chars_sbcs( &table->sbcs, src, srclen )) return -2;
}
if (!dstlen) return srclen;
return mbstowcs_sbcs( &table->sbcs, src, srclen, dst, dstlen );
}
else /* mbcs */
{
if (flags & MB_ERR_INVALID_CHARS)
{
if (!check_invalid_chars_dbcs( &table->dbcs, src, srclen )) return -2;
}
if (!dstlen) return get_length_dbcs( &table->dbcs, src, srclen );
return mbstowcs_dbcs( &table->dbcs, src, srclen, dst, dstlen );
}
}
/*
* WideCharToMultiByte implementation
*
* Copyright 2000 Alexandre Julliard
*/
#include <string.h>
#include "winnls.h"
#include "wine/unicode.h"
/* wcstombs for single-byte code page */
static inline int wcstombs_sbcs( const struct sbcs_table *table,
const unsigned short *src, unsigned int srclen,
char *dst, unsigned int dstlen )
{
const unsigned char * const uni2cp_low = table->uni2cp_low;
const unsigned short * const uni2cp_high = table->uni2cp_high;
int ret = srclen;
if (dstlen < srclen)
{
/* buffer too small: fill it up to dstlen and return error */
srclen = dstlen;
ret = -1;
}
for (;;)
{
switch(srclen)
{
default:
case 16: dst[15] = uni2cp_low[uni2cp_high[src[15] >> 8] + (src[15] & 0xff)];
case 15: dst[14] = uni2cp_low[uni2cp_high[src[14] >> 8] + (src[14] & 0xff)];
case 14: dst[13] = uni2cp_low[uni2cp_high[src[13] >> 8] + (src[13] & 0xff)];
case 13: dst[12] = uni2cp_low[uni2cp_high[src[12] >> 8] + (src[12] & 0xff)];
case 12: dst[11] = uni2cp_low[uni2cp_high[src[11] >> 8] + (src[11] & 0xff)];
case 11: dst[10] = uni2cp_low[uni2cp_high[src[10] >> 8] + (src[10] & 0xff)];
case 10: dst[9] = uni2cp_low[uni2cp_high[src[9] >> 8] + (src[9] & 0xff)];
case 9: dst[8] = uni2cp_low[uni2cp_high[src[8] >> 8] + (src[8] & 0xff)];
case 8: dst[7] = uni2cp_low[uni2cp_high[src[7] >> 8] + (src[7] & 0xff)];
case 7: dst[6] = uni2cp_low[uni2cp_high[src[6] >> 8] + (src[6] & 0xff)];
case 6: dst[5] = uni2cp_low[uni2cp_high[src[5] >> 8] + (src[5] & 0xff)];
case 5: dst[4] = uni2cp_low[uni2cp_high[src[4] >> 8] + (src[4] & 0xff)];
case 4: dst[3] = uni2cp_low[uni2cp_high[src[3] >> 8] + (src[3] & 0xff)];
case 3: dst[2] = uni2cp_low[uni2cp_high[src[2] >> 8] + (src[2] & 0xff)];
case 2: dst[1] = uni2cp_low[uni2cp_high[src[1] >> 8] + (src[1] & 0xff)];
case 1: dst[0] = uni2cp_low[uni2cp_high[src[0] >> 8] + (src[0] & 0xff)];
case 0: break;
}
if (srclen < 16) return ret;
dst += 16;
src += 16;
srclen -= 16;
}
}
/* query necessary dst length for src string */
static inline int get_length_dbcs( const struct dbcs_table *table,
const unsigned short *src, unsigned int srclen )
{
const unsigned short * const uni2cp_low = table->uni2cp_low;
const unsigned short * const uni2cp_high = table->uni2cp_high;
int len;
for (len = 0; srclen; srclen--, src++, len++)
{
if (uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)] & 0xff00) len++;
}
return len;
}
/* wcstombs for double-byte code page */
static inline int wcstombs_dbcs( const struct dbcs_table *table,
const unsigned short *src, unsigned int srclen,
char *dst, unsigned int dstlen )
{
const unsigned short * const uni2cp_low = table->uni2cp_low;
const unsigned short * const uni2cp_high = table->uni2cp_high;
int len;
for (len = dstlen; srclen && len; len--, srclen--, src++)
{
unsigned short res = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)];
if (res & 0xff00)
{
if (len == 1) break; /* do not output a partial char */
len--;
*dst++ = res >> 8;
}
*dst++ = (char)res;
}
if (srclen) return -1; /* overflow */
return dstlen - len;
}
/* wide char to multi byte string conversion */
/* return -1 on dst buffer overflow */
int cp_wcstombs( const union cptable *table, int flags,
const unsigned short *src, int srclen,
char *dst, int dstlen )
{
if (table->info.char_size == 1)
{
if (!dstlen) return srclen;
return wcstombs_sbcs( &table->sbcs, src, srclen, dst, dstlen );
}
else /* mbcs */
{
if (!dstlen) return get_length_dbcs( &table->dbcs, src, srclen );
return wcstombs_sbcs( &table->sbcs, src, srclen, dst, dstlen );
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment