mbtowc.c 8.94 KB
Newer Older
1 2 3 4
/*
 * MultiByteToWideChar implementation
 *
 * Copyright 2000 Alexandre Julliard
5 6 7 8 9 10 11 12 13 14 15 16 17 18
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19 20 21 22 23 24 25
 */

#include <string.h>

#include "winnls.h"
#include "wine/unicode.h"

26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
/* get the decomposition of a Unicode char */
static int get_decomposition( WCHAR src, WCHAR *dst, unsigned int dstlen )
{
    extern const WCHAR unicode_decompose_table[];
    const WCHAR *ptr = unicode_decompose_table;
    int res;

    *dst = src;
    ptr = unicode_decompose_table + ptr[src >> 8];
    ptr = unicode_decompose_table + ptr[(src >> 4) & 0x0f] + 2 * (src & 0x0f);
    if (!*ptr) return 1;
    if (dstlen <= 1) return 0;
    /* apply the decomposition recursively to the first char */
    if ((res = get_decomposition( *ptr, dst, dstlen-1 ))) dst[res++] = ptr[1];
    return res;
}

43 44 45 46
/* check src string for invalid chars; return non-zero if invalid char found */
static inline int check_invalid_chars_sbcs( const struct sbcs_table *table,
                                            const unsigned char *src, unsigned int srclen )
{
47
    const WCHAR * const cp2uni = table->cp2uni;
48 49
    while (srclen)
    {
50
        if (cp2uni[*src] == table->info.def_unicode_char && *src != table->info.def_char)
51 52 53 54 55 56 57 58 59 60 61
            break;
        src++;
        srclen--;
    }
    return srclen;
}

/* mbstowcs for single-byte code page */
/* all lengths are in characters, not bytes */
static inline int mbstowcs_sbcs( const struct sbcs_table *table,
                                 const unsigned char *src, unsigned int srclen,
62
                                 WCHAR *dst, unsigned int dstlen )
63
{
64
    const WCHAR * const cp2uni = table->cp2uni;
65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103
    int ret = srclen;

    if (dstlen < srclen)
    {
        /* buffer too small: fill it up to dstlen and return error */
        srclen = dstlen;
        ret = -1;
    }

    for (;;)
    {
        switch(srclen)
        {
        default:
        case 16: dst[15] = cp2uni[src[15]];
        case 15: dst[14] = cp2uni[src[14]];
        case 14: dst[13] = cp2uni[src[13]];
        case 13: dst[12] = cp2uni[src[12]];
        case 12: dst[11] = cp2uni[src[11]];
        case 11: dst[10] = cp2uni[src[10]];
        case 10: dst[9]  = cp2uni[src[9]];
        case 9:  dst[8]  = cp2uni[src[8]];
        case 8:  dst[7]  = cp2uni[src[7]];
        case 7:  dst[6]  = cp2uni[src[6]];
        case 6:  dst[5]  = cp2uni[src[5]];
        case 5:  dst[4]  = cp2uni[src[4]];
        case 4:  dst[3]  = cp2uni[src[3]];
        case 3:  dst[2]  = cp2uni[src[2]];
        case 2:  dst[1]  = cp2uni[src[1]];
        case 1:  dst[0]  = cp2uni[src[0]];
        case 0: break;
        }
        if (srclen < 16) return ret;
        dst += 16;
        src += 16;
        srclen -= 16;
    }
}

104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
/* mbstowcs for single-byte code page with char decomposition */
static int mbstowcs_sbcs_decompose( const struct sbcs_table *table,
                                    const unsigned char *src, unsigned int srclen,
                                    WCHAR *dst, unsigned int dstlen )
{
    const WCHAR * const cp2uni = table->cp2uni;
    unsigned int len;

    if (!dstlen)  /* compute length */
    {
        WCHAR dummy[4]; /* no decomposition is larger than 4 chars */
        for (len = 0; srclen; srclen--, src++)
            len += get_decomposition( cp2uni[*src], dummy, 4 );
        return len;
    }

    for (len = dstlen; srclen && len; srclen--, src++)
    {
        int res = get_decomposition( cp2uni[*src], dst, len );
        if (!res) break;
        len -= res;
        dst += res;
    }
    if (srclen) return -1;  /* overflow */
    return dstlen - len;
}

131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
/* query necessary dst length for src string */
static inline int get_length_dbcs( const struct dbcs_table *table,
                                   const unsigned char *src, unsigned int srclen )
{
    const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
    int len;

    for (len = 0; srclen; srclen--, src++, len++)
    {
        if (cp2uni_lb[*src])
        {
            if (!--srclen) break;  /* partial char, ignore it */
            src++;
        }
    }
    return len;
}

/* check src string for invalid chars; return non-zero if invalid char found */
static inline int check_invalid_chars_dbcs( const struct dbcs_table *table,
                                            const unsigned char *src, unsigned int srclen )
{
153
    const WCHAR * const cp2uni = table->cp2uni;
154 155 156 157 158 159 160 161 162
    const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;

    while (srclen)
    {
        unsigned char off = cp2uni_lb[*src];
        if (off)  /* multi-byte char */
        {
            if (srclen == 1) break;  /* partial char, error */
            if (cp2uni[(off << 8) + src[1]] == table->info.def_unicode_char &&
163
                ((src[0] << 8) | src[1]) != table->info.def_char) break;
164 165 166 167
            src++;
            srclen--;
        }
        else if (cp2uni[*src] == table->info.def_unicode_char &&
168
                 *src != table->info.def_char) break;
169 170 171 172 173 174 175 176 177 178
        src++;
        srclen--;
    }
    return srclen;
}

/* mbstowcs for double-byte code page */
/* all lengths are in characters, not bytes */
static inline int mbstowcs_dbcs( const struct dbcs_table *table,
                                 const unsigned char *src, unsigned int srclen,
179
                                 WCHAR *dst, unsigned int dstlen )
180
{
181
    const WCHAR * const cp2uni = table->cp2uni;
182
    const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
183 184 185
    unsigned int len;

    if (!dstlen) return get_length_dbcs( table, src, srclen );
186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202

    for (len = dstlen; srclen && len; len--, srclen--, src++, dst++)
    {
        unsigned char off = cp2uni_lb[*src];
        if (off)
        {
            if (!--srclen) break;  /* partial char, ignore it */
            src++;
            *dst = cp2uni[(off << 8) + *src];
        }
        else *dst = cp2uni[*src];
    }
    if (srclen) return -1;  /* overflow */
    return dstlen - len;
}


203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250
/* mbstowcs for double-byte code page with character decomposition */
static int mbstowcs_dbcs_decompose( const struct dbcs_table *table,
                                    const unsigned char *src, unsigned int srclen,
                                    WCHAR *dst, unsigned int dstlen )
{
    const WCHAR * const cp2uni = table->cp2uni;
    const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
    unsigned int len;
    WCHAR ch;
    int res;

    if (!dstlen)  /* compute length */
    {
        WCHAR dummy[4]; /* no decomposition is larger than 4 chars */
        for (len = 0; srclen; srclen--, src++)
        {
            unsigned char off = cp2uni_lb[*src];
            if (off)
            {
                if (!--srclen) break;  /* partial char, ignore it */
                src++;
                ch = cp2uni[(off << 8) + *src];
            }
            else ch = cp2uni[*src];
            len += get_decomposition( ch, dummy, 4 );
        }
        return len;
    }

    for (len = dstlen; srclen && len; srclen--, src++)
    {
        unsigned char off = cp2uni_lb[*src];
        if (off)
        {
            if (!--srclen) break;  /* partial char, ignore it */
            src++;
            ch = cp2uni[(off << 8) + *src];
        }
        else ch = cp2uni[*src];
        if (!(res = get_decomposition( ch, dst, len ))) break;
        dst += res;
        len -= res;
    }
    if (srclen) return -1;  /* overflow */
    return dstlen - len;
}


251 252 253
/* return -1 on dst buffer overflow, -2 on invalid input char */
int cp_mbstowcs( const union cptable *table, int flags,
                 const char *src, int srclen,
254
                 WCHAR *dst, int dstlen )
255 256 257 258 259
{
    if (table->info.char_size == 1)
    {
        if (flags & MB_ERR_INVALID_CHARS)
        {
260
            if (check_invalid_chars_sbcs( &table->sbcs, src, srclen )) return -2;
261
        }
262 263 264 265 266 267
        if (!(flags & MB_COMPOSITE))
        {
            if (!dstlen) return srclen;
            return mbstowcs_sbcs( &table->sbcs, src, srclen, dst, dstlen );
        }
        return mbstowcs_sbcs_decompose( &table->sbcs, src, srclen, dst, dstlen );
268 269 270 271 272
    }
    else /* mbcs */
    {
        if (flags & MB_ERR_INVALID_CHARS)
        {
273
            if (check_invalid_chars_dbcs( &table->dbcs, src, srclen )) return -2;
274
        }
275 276 277 278
        if (!(flags & MB_COMPOSITE))
            return mbstowcs_dbcs( &table->dbcs, src, srclen, dst, dstlen );
        else
            return mbstowcs_dbcs_decompose( &table->dbcs, src, srclen, dst, dstlen );
279 280
    }
}