wctomb.c 15.5 KB
Newer Older
1 2 3 4
/*
 * WideCharToMultiByte implementation
 *
 * Copyright 2000 Alexandre Julliard
5 6 7 8 9 10 11 12 13 14 15 16 17 18
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19 20 21 22 23 24 25
 */

#include <string.h>

#include "winnls.h"
#include "wine/unicode.h"

26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
/* search for a character in the unicode_compose_table; helper for compose() */
static inline int binary_search( WCHAR ch, int low, int high )
{
    extern const WCHAR unicode_compose_table[];
    while (low <= high)
    {
        int pos = (low + high) / 2;
        if (unicode_compose_table[2*pos] < ch)
        {
            low = pos + 1;
            continue;
        }
        if (unicode_compose_table[2*pos] > ch)
        {
            high = pos - 1;
            continue;
        }
        return pos;
    }
    return -1;
}

/* return the result of the composition of two Unicode chars, or 0 if none */
static WCHAR compose( const WCHAR *str )
{
    extern const WCHAR unicode_compose_table[];
    extern const unsigned int unicode_compose_table_size;

    int idx = 1, low = 0, high = unicode_compose_table_size - 1;
    for (;;)
    {
        int pos = binary_search( str[idx], low, high );
        if (pos == -1) return 0;
        if (!idx--) return unicode_compose_table[2*pos+1];
        low = unicode_compose_table[2*pos+1];
        high = unicode_compose_table[2*pos+3] - 1;
    }
}


/****************************************************************/
/* sbcs support */

/* check if 'ch' is an acceptable sbcs mapping for 'wch' */
static inline int is_valid_sbcs_mapping( const struct sbcs_table *table, int flags,
                                         WCHAR wch, unsigned char ch )
{
    if (flags & WC_NO_BEST_FIT_CHARS) return (table->cp2uni[ch] == wch);
    if (ch != (unsigned char)table->info.def_char) return 1;
    return (wch == table->info.def_unicode_char);
}

/* query necessary dst length for src string */
79 80
static int get_length_sbcs( const struct sbcs_table *table, int flags,
                            const WCHAR *src, unsigned int srclen, int *used )
81
{
82 83 84 85 86 87 88
    const unsigned char  * const uni2cp_low = table->uni2cp_low;
    const unsigned short * const uni2cp_high = table->uni2cp_high;
    int ret, tmp;
    WCHAR composed;

    if (!used) used = &tmp;  /* avoid checking on every char */
    *used = 0;
89

90
    for (ret = 0; srclen; ret++, src++, srclen--)
91
    {
92 93
        WCHAR wch = *src;
        unsigned char ch;
94

95
        if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = compose(src)))
96
        {
97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
            /* now check if we can use the composed char */
            ch = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
            if (is_valid_sbcs_mapping( table, flags, composed, ch ))
            {
                /* we have a good mapping, use it */
                src++;
                srclen--;
                continue;
            }
            /* no mapping for the composed char, check the other flags */
            if (flags & WC_DEFAULTCHAR) /* use the default char instead */
            {
                *used = 1;
                src++;  /* skip the non-spacing char */
                srclen--;
                continue;
            }
            if (flags & WC_DISCARDNS) /* skip the second char of the composition */
115
            {
116 117
                src++;
                srclen--;
118
            }
119 120 121 122 123 124
            /* WC_SEPCHARS is the default */
        }
        if (!*used)
        {
            ch = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)];
            *used = !is_valid_sbcs_mapping( table, flags, wch, ch );
125 126 127 128 129
        }
    }
    return ret;
}

130 131
/* wcstombs for single-byte code page */
static inline int wcstombs_sbcs( const struct sbcs_table *table,
132
                                 const WCHAR *src, unsigned int srclen,
133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175
                                 char *dst, unsigned int dstlen )
{
    const unsigned char  * const uni2cp_low = table->uni2cp_low;
    const unsigned short * const uni2cp_high = table->uni2cp_high;
    int ret = srclen;

    if (dstlen < srclen)
    {
        /* buffer too small: fill it up to dstlen and return error */
        srclen = dstlen;
        ret = -1;
    }

    for (;;)
    {
        switch(srclen)
        {
        default:
        case 16: dst[15] = uni2cp_low[uni2cp_high[src[15] >> 8] + (src[15] & 0xff)];
        case 15: dst[14] = uni2cp_low[uni2cp_high[src[14] >> 8] + (src[14] & 0xff)];
        case 14: dst[13] = uni2cp_low[uni2cp_high[src[13] >> 8] + (src[13] & 0xff)];
        case 13: dst[12] = uni2cp_low[uni2cp_high[src[12] >> 8] + (src[12] & 0xff)];
        case 12: dst[11] = uni2cp_low[uni2cp_high[src[11] >> 8] + (src[11] & 0xff)];
        case 11: dst[10] = uni2cp_low[uni2cp_high[src[10] >> 8] + (src[10] & 0xff)];
        case 10: dst[9]  = uni2cp_low[uni2cp_high[src[9]  >> 8] + (src[9]  & 0xff)];
        case 9:  dst[8]  = uni2cp_low[uni2cp_high[src[8]  >> 8] + (src[8]  & 0xff)];
        case 8:  dst[7]  = uni2cp_low[uni2cp_high[src[7]  >> 8] + (src[7]  & 0xff)];
        case 7:  dst[6]  = uni2cp_low[uni2cp_high[src[6]  >> 8] + (src[6]  & 0xff)];
        case 6:  dst[5]  = uni2cp_low[uni2cp_high[src[5]  >> 8] + (src[5]  & 0xff)];
        case 5:  dst[4]  = uni2cp_low[uni2cp_high[src[4]  >> 8] + (src[4]  & 0xff)];
        case 4:  dst[3]  = uni2cp_low[uni2cp_high[src[3]  >> 8] + (src[3]  & 0xff)];
        case 3:  dst[2]  = uni2cp_low[uni2cp_high[src[2]  >> 8] + (src[2]  & 0xff)];
        case 2:  dst[1]  = uni2cp_low[uni2cp_high[src[1]  >> 8] + (src[1]  & 0xff)];
        case 1:  dst[0]  = uni2cp_low[uni2cp_high[src[0]  >> 8] + (src[0]  & 0xff)];
        case 0: break;
        }
        if (srclen < 16) return ret;
        dst += 16;
        src += 16;
        srclen -= 16;
    }
}

176 177
/* slow version of wcstombs_sbcs that handles the various flags */
static int wcstombs_sbcs_slow( const struct sbcs_table *table, int flags,
178
                               const WCHAR *src, unsigned int srclen,
179 180 181 182 183 184
                               char *dst, unsigned int dstlen,
                               const char *defchar, int *used )
{
    const unsigned char  * const uni2cp_low = table->uni2cp_low;
    const unsigned short * const uni2cp_high = table->uni2cp_high;
    const unsigned char table_default = table->info.def_char & 0xff;
185 186 187
    unsigned int len;
    int tmp;
    WCHAR composed;
188 189 190

    if (!defchar) defchar = &table_default;
    if (!used) used = &tmp;  /* avoid checking on every char */
191
    *used = 0;
192

193
    for (len = dstlen; srclen && len; dst++, len--, src++, srclen--)
194
    {
195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226
        WCHAR wch = *src;

        if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = compose(src)))
        {
            /* now check if we can use the composed char */
            *dst = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
            if (is_valid_sbcs_mapping( table, flags, composed, *dst ))
            {
                /* we have a good mapping, use it */
                src++;
                srclen--;
                continue;
            }
            /* no mapping for the composed char, check the other flags */
            if (flags & WC_DEFAULTCHAR) /* use the default char instead */
            {
                *dst = *defchar;
                *used = 1;
                src++;  /* skip the non-spacing char */
                srclen--;
                continue;
            }
            if (flags & WC_DISCARDNS) /* skip the second char of the composition */
            {
                src++;
                srclen--;
            }
            /* WC_SEPCHARS is the default */
        }

        *dst = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)];
        if (!is_valid_sbcs_mapping( table, flags, wch, *dst ))
227
        {
228
            *dst = *defchar;
229 230 231
            *used = 1;
        }
    }
232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255
    if (srclen) return -1;  /* overflow */
    return dstlen - len;
}


/****************************************************************/
/* dbcs support */

/* check if 'ch' is an acceptable dbcs mapping for 'wch' */
static inline int is_valid_dbcs_mapping( const struct dbcs_table *table, int flags,
                                         WCHAR wch, unsigned short ch )
{
    if (ch == table->info.def_char && wch != table->info.def_unicode_char) return 0;
    if (flags & WC_NO_BEST_FIT_CHARS)
    {
        /* check if char maps back to the same Unicode value */
        if (ch & 0xff00)
        {
            unsigned char off = table->cp2uni_leadbytes[ch >> 8];
            return (table->cp2uni[(off << 8) + (ch & 0xff)] == wch);
        }
        return (table->cp2uni[ch & 0xff] == wch);
    }
    return 1;
256 257
}

258
/* query necessary dst length for src string */
259 260
static int get_length_dbcs( const struct dbcs_table *table, int flags,
                            const WCHAR *src, unsigned int srclen,
261
                            const char *defchar, int *used )
262 263 264
{
    const unsigned short * const uni2cp_low = table->uni2cp_low;
    const unsigned short * const uni2cp_high = table->uni2cp_high;
265 266
    WCHAR defchar_value = table->info.def_char;
    WCHAR composed;
267
    int len, tmp;
268

269
    if (!defchar && !used && !(flags & WC_COMPOSITECHECK))
270
    {
271 272 273 274 275 276 277 278
        for (len = 0; srclen; srclen--, src++, len++)
        {
            if (uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)] & 0xff00) len++;
        }
        return len;
    }

    if (defchar) defchar_value = defchar[1] ? ((defchar[0] << 8) | defchar[1]) : defchar[0];
279 280
    if (!used) used = &tmp;  /* avoid checking on every char */
    *used = 0;
281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302
    for (len = 0; srclen; len++, srclen--, src++)
    {
        unsigned short res;
        WCHAR wch = *src;

        if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = compose(src)))
        {
            /* now check if we can use the composed char */
            res = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];

            if (is_valid_dbcs_mapping( table, flags, composed, res ))
            {
                /* we have a good mapping for the composed char, use it */
                if (res & 0xff00) len++;
                src++;
                srclen--;
                continue;
            }
            /* no mapping for the composed char, check the other flags */
            if (flags & WC_DEFAULTCHAR) /* use the default char instead */
            {
                if (defchar_value & 0xff00) len++;
303
                *used = 1;
304 305 306 307 308 309 310 311 312 313 314 315 316
                src++;  /* skip the non-spacing char */
                srclen--;
                continue;
            }
            if (flags & WC_DISCARDNS) /* skip the second char of the composition */
            {
                src++;
                srclen--;
            }
            /* WC_SEPCHARS is the default */
        }

        res = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)];
317 318 319 320 321
        if (!is_valid_dbcs_mapping( table, flags, wch, res ))
        {
            res = defchar_value;
            *used = 1;
        }
322
        if (res & 0xff00) len++;
323 324 325 326 327 328
    }
    return len;
}

/* wcstombs for double-byte code page */
static inline int wcstombs_dbcs( const struct dbcs_table *table,
329
                                 const WCHAR *src, unsigned int srclen,
330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350
                                 char *dst, unsigned int dstlen )
{
    const unsigned short * const uni2cp_low = table->uni2cp_low;
    const unsigned short * const uni2cp_high = table->uni2cp_high;
    int len;

    for (len = dstlen; srclen && len; len--, srclen--, src++)
    {
        unsigned short res = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)];
        if (res & 0xff00)
        {
            if (len == 1) break;  /* do not output a partial char */
            len--;
            *dst++ = res >> 8;
        }
        *dst++ = (char)res;
    }
    if (srclen) return -1;  /* overflow */
    return dstlen - len;
}

351 352
/* slow version of wcstombs_dbcs that handles the various flags */
static int wcstombs_dbcs_slow( const struct dbcs_table *table, int flags,
353
                               const WCHAR *src, unsigned int srclen,
354 355 356 357 358
                               char *dst, unsigned int dstlen,
                               const char *defchar, int *used )
{
    const unsigned short * const uni2cp_low = table->uni2cp_low;
    const unsigned short * const uni2cp_high = table->uni2cp_high;
359
    WCHAR defchar_value = table->info.def_char;
360
    WCHAR composed;
361 362 363 364
    int len, tmp;

    if (defchar) defchar_value = defchar[1] ? ((defchar[0] << 8) | defchar[1]) : defchar[0];
    if (!used) used = &tmp;  /* avoid checking on every char */
365
    *used = 0;
366 367 368

    for (len = dstlen; srclen && len; len--, srclen--, src++)
    {
369 370
        unsigned short res;
        WCHAR wch = *src;
371

372
        if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = compose(src)))
373
        {
374 375 376 377
            /* now check if we can use the composed char */
            res = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];

            if (is_valid_dbcs_mapping( table, flags, composed, res ))
378
            {
379 380 381 382
                /* we have a good mapping for the composed char, use it */
                src++;
                srclen--;
                goto output_char;
383
            }
384 385
            /* no mapping for the composed char, check the other flags */
            if (flags & WC_DEFAULTCHAR) /* use the default char instead */
386 387 388
            {
                res = defchar_value;
                *used = 1;
389 390 391 392 393 394 395 396
                src++;  /* skip the non-spacing char */
                srclen--;
                goto output_char;
            }
            if (flags & WC_DISCARDNS) /* skip the second char of the composition */
            {
                src++;
                srclen--;
397
            }
398 399 400 401 402 403 404 405
            /* WC_SEPCHARS is the default */
        }

        res = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)];
        if (!is_valid_dbcs_mapping( table, flags, wch, res ))
        {
            res = defchar_value;
            *used = 1;
406 407
        }

408
    output_char:
409 410 411 412 413 414 415 416 417 418 419 420
        if (res & 0xff00)
        {
            if (len == 1) break;  /* do not output a partial char */
            len--;
            *dst++ = res >> 8;
        }
        *dst++ = (char)res;
    }
    if (srclen) return -1;  /* overflow */
    return dstlen - len;
}

421 422 423
/* wide char to multi byte string conversion */
/* return -1 on dst buffer overflow */
int cp_wcstombs( const union cptable *table, int flags,
424
                 const WCHAR *src, int srclen,
425
                 char *dst, int dstlen, const char *defchar, int *used )
426 427 428
{
    if (table->info.char_size == 1)
    {
429
        if (flags || defchar || used)
430 431
        {
            if (!dstlen) return get_length_sbcs( &table->sbcs, flags, src, srclen, used );
432 433
            return wcstombs_sbcs_slow( &table->sbcs, flags, src, srclen,
                                       dst, dstlen, defchar, used );
434 435
        }
        if (!dstlen) return srclen;
436 437 438 439
        return wcstombs_sbcs( &table->sbcs, src, srclen, dst, dstlen );
    }
    else /* mbcs */
    {
440
        if (!dstlen) return get_length_dbcs( &table->dbcs, flags, src, srclen, defchar, used );
441 442 443
        if (flags || defchar || used)
            return wcstombs_dbcs_slow( &table->dbcs, flags, src, srclen,
                                       dst, dstlen, defchar, used );
444
        return wcstombs_dbcs( &table->dbcs, src, srclen, dst, dstlen );
445 446
    }
}