Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
W
wine-cw
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Registry
Registry
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
wine
wine-cw
Commits
aea78538
Commit
aea78538
authored
Aug 11, 2000
by
Alexandre Julliard
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Added UTF-8 conversion support.
parent
1a1bd1aa
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
224 additions
and
25 deletions
+224
-25
unicode.h
include/wine/unicode.h
+2
-0
codepage.c
memory/codepage.c
+34
-25
Makefile.in
unicode/Makefile.in
+1
-0
utf8.c
unicode/utf8.c
+187
-0
No files found.
include/wine/unicode.h
View file @
aea78538
...
...
@@ -53,6 +53,8 @@ extern int cp_mbstowcs( const union cptable *table, int flags,
extern
int
cp_wcstombs
(
const
union
cptable
*
table
,
int
flags
,
const
WCHAR
*
src
,
int
srclen
,
char
*
dst
,
int
dstlen
,
const
char
*
defchar
,
int
*
used
);
extern
int
utf8_wcstombs
(
const
WCHAR
*
src
,
int
srclen
,
char
*
dst
,
int
dstlen
);
extern
int
utf8_mbstowcs
(
int
flags
,
const
char
*
src
,
int
srclen
,
WCHAR
*
dst
,
int
dstlen
);
static
inline
int
is_dbcs_leadbyte
(
const
union
cptable
*
table
,
unsigned
char
ch
)
{
...
...
memory/codepage.c
View file @
aea78538
...
...
@@ -254,24 +254,28 @@ INT WINAPI MultiByteToWideChar( UINT page, DWORD flags, LPCSTR src, INT srclen,
if
(
srclen
==
-
1
)
srclen
=
strlen
(
src
)
+
1
;
if
(
page
>=
CP_UTF7
)
if
(
flags
&
MB_COMPOSITE
)
FIXME
(
"MB_COMPOSITE not supported
\n
"
);
if
(
flags
&
MB_USEGLYPHCHARS
)
FIXME
(
"MB_USEGLYPHCHARS not supported
\n
"
);
switch
(
page
)
{
case
CP_UTF7
:
FIXME
(
"UTF not supported
\n
"
);
SetLastError
(
ERROR_CALL_NOT_IMPLEMENTED
);
return
0
;
case
CP_UTF8
:
ret
=
utf8_mbstowcs
(
flags
,
src
,
srclen
,
dst
,
dstlen
);
break
;
default:
if
(
!
(
table
=
get_codepage_table
(
page
)))
{
SetLastError
(
ERROR_INVALID_PARAMETER
);
return
0
;
}
ret
=
cp_mbstowcs
(
table
,
flags
,
src
,
srclen
,
dst
,
dstlen
);
break
;
}
if
(
!
(
table
=
get_codepage_table
(
page
)))
{
SetLastError
(
ERROR_INVALID_PARAMETER
);
return
0
;
}
if
(
flags
&
MB_COMPOSITE
)
FIXME
(
"MB_COMPOSITE not supported
\n
"
);
if
(
flags
&
MB_USEGLYPHCHARS
)
FIXME
(
"MB_USEGLYPHCHARS not supported
\n
"
);
ret
=
cp_mbstowcs
(
table
,
flags
,
src
,
srclen
,
dst
,
dstlen
);
if
(
ret
<
0
)
{
switch
(
ret
)
...
...
@@ -326,24 +330,29 @@ INT WINAPI WideCharToMultiByte( UINT page, DWORD flags, LPCWSTR src, INT srclen,
if
(
srclen
==
-
1
)
srclen
=
strlenW
(
src
)
+
1
;
if
(
page
>=
CP_UTF7
)
{
FIXME
(
"UTF not supported
\n
"
);
SetLastError
(
ERROR_CALL_NOT_IMPLEMENTED
);
return
0
;
}
/* if (flags & WC_COMPOSITECHECK) FIXME( "WC_COMPOSITECHECK (%lx) not supported\n", flags );*/
if
(
!
(
table
=
get_codepage_table
(
page
))
)
switch
(
page
)
{
SetLastError
(
ERROR_INVALID_PARAMETER
);
case
CP_UTF7
:
FIXME
(
"UTF-7 not supported
\n
"
);
SetLastError
(
ERROR_CALL_NOT_IMPLEMENTED
);
return
0
;
case
CP_UTF8
:
ret
=
utf8_wcstombs
(
src
,
srclen
,
dst
,
dstlen
);
break
;
default:
if
(
!
(
table
=
get_codepage_table
(
page
)))
{
SetLastError
(
ERROR_INVALID_PARAMETER
);
return
0
;
}
ret
=
cp_wcstombs
(
table
,
flags
,
src
,
srclen
,
dst
,
dstlen
,
defchar
,
used
?
&
used_tmp
:
NULL
);
if
(
used
)
*
used
=
used_tmp
;
break
;
}
/* if (flags & WC_COMPOSITECHECK) FIXME( "WC_COMPOSITECHECK (%lx) not supported\n", flags );*/
ret
=
cp_wcstombs
(
table
,
flags
,
src
,
srclen
,
dst
,
dstlen
,
defchar
,
used
?
&
used_tmp
:
NULL
);
if
(
used
)
*
used
=
used_tmp
;
if
(
ret
==
-
1
)
{
SetLastError
(
ERROR_INSUFFICIENT_BUFFER
);
...
...
unicode/Makefile.in
View file @
aea78538
...
...
@@ -70,6 +70,7 @@ C_SRCS = \
cptable.c
\
mbtowc.c
\
string.c
\
utf8.c
\
wctomb.c
\
$
(
CODEPAGES:%
=
c_%.c
)
...
...
unicode/utf8.c
0 → 100644
View file @
aea78538
/*
* UTF-8 support routines
*
* Copyright 2000 Alexandre Julliard
*/
#include <string.h>
#include "winnls.h"
#include "wine/unicode.h"
/* number of following bytes in sequence based on first byte value (for bytes above 0x7f) */
static
const
char
utf8_length
[
128
]
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
/* 0x80-0x8f */
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
/* 0x90-0x9f */
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
/* 0xa0-0xaf */
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
/* 0xb0-0xbf */
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
/* 0xc0-0xcf */
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
/* 0xd0-0xdf */
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
/* 0xe0-0xef */
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
4
,
4
,
4
,
4
,
5
,
5
,
0
,
0
/* 0xf0-0xff */
};
/* first byte mask depending on UTF-8 sequence length */
static
const
unsigned
char
utf8_mask
[
6
]
=
{
0x7f
,
0x1f
,
0x0f
,
0x07
,
0x03
,
0x01
};
/* minimum Unicode value depending on UTF-8 sequence length */
static
const
unsigned
int
utf8_minval
[
6
]
=
{
0x0
,
0x80
,
0x800
,
0x10000
,
0x200000
,
0x4000000
};
/* query necessary dst length for src string */
inline
static
int
get_length_wcs_utf8
(
const
WCHAR
*
src
,
unsigned
int
srclen
)
{
int
len
;
for
(
len
=
0
;
srclen
;
srclen
--
,
src
++
,
len
++
)
{
if
(
*
src
>=
0x80
)
{
len
++
;
if
(
*
src
>=
0x800
)
len
++
;
}
}
return
len
;
}
/* wide char to UTF-8 string conversion */
/* return -1 on dst buffer overflow */
int
utf8_wcstombs
(
const
WCHAR
*
src
,
int
srclen
,
char
*
dst
,
int
dstlen
)
{
int
ret
=
srclen
;
if
(
!
dstlen
)
return
get_length_wcs_utf8
(
src
,
srclen
);
for
(
ret
=
srclen
;
srclen
;
srclen
--
,
src
++
)
{
WCHAR
ch
=
*
src
;
if
(
ch
<
0x80
)
/* 0x00-0x7f: 1 byte */
{
if
(
!
dstlen
--
)
return
-
1
;
/* overflow */
*
dst
++
=
ch
;
continue
;
}
if
(
ch
<
0x800
)
/* 0x80-0x7ff: 2 bytes */
{
if
((
dstlen
-=
2
)
<
0
)
return
-
1
;
/* overflow */
dst
[
1
]
=
0x80
|
(
ch
&
0x3f
);
ch
>>=
6
;
dst
[
0
]
=
0xc0
|
ch
;
dst
+=
2
;
continue
;
}
/* 0x800-0xffff: 3 bytes */
if
((
dstlen
-=
3
)
<
0
)
return
-
1
;
/* overflow */
dst
[
2
]
=
0x80
|
(
ch
&
0x3f
);
ch
>>=
6
;
dst
[
1
]
=
0x80
|
(
ch
&
0x3f
);
ch
>>=
6
;
dst
[
0
]
=
0xe0
|
ch
;
dst
+=
3
;
}
return
ret
;
}
/* query necessary dst length for src string */
inline
static
int
get_length_mbs_utf8
(
const
unsigned
char
*
src
,
int
srclen
)
{
int
ret
;
const
unsigned
char
*
srcend
=
src
+
srclen
;
for
(
ret
=
0
;
src
<
srcend
;
ret
++
)
{
unsigned
char
ch
=
*
src
++
;
if
(
ch
<
0xc0
)
continue
;
switch
(
utf8_length
[
ch
-
0x80
])
{
case
5
:
if
(
src
>=
srcend
)
return
ret
;
/* ignore partial char */
if
((
ch
=
*
src
^
0x80
)
>=
0x40
)
continue
;
src
++
;
case
4
:
if
(
src
>=
srcend
)
return
ret
;
/* ignore partial char */
if
((
ch
=
*
src
^
0x80
)
>=
0x40
)
continue
;
src
++
;
case
3
:
if
(
src
>=
srcend
)
return
ret
;
/* ignore partial char */
if
((
ch
=
*
src
^
0x80
)
>=
0x40
)
continue
;
src
++
;
case
2
:
if
(
src
>=
srcend
)
return
ret
;
/* ignore partial char */
if
((
ch
=
*
src
^
0x80
)
>=
0x40
)
continue
;
src
++
;
case
1
:
if
(
src
>=
srcend
)
return
ret
;
/* ignore partial char */
if
((
ch
=
*
src
^
0x80
)
>=
0x40
)
continue
;
src
++
;
}
}
return
ret
;
}
/* UTF-8 to wide char string conversion */
/* return -1 on dst buffer overflow, -2 on invalid input char */
int
utf8_mbstowcs
(
int
flags
,
const
char
*
src
,
int
srclen
,
WCHAR
*
dst
,
int
dstlen
)
{
int
len
,
count
;
unsigned
int
res
;
const
char
*
srcend
=
src
+
srclen
;
if
(
!
dstlen
)
return
get_length_mbs_utf8
(
src
,
srclen
);
for
(
count
=
dstlen
;
count
&&
(
src
<
srcend
);
count
--
,
dst
++
)
{
unsigned
char
ch
=
*
src
++
;
if
(
ch
<
0x80
)
/* special fast case for 7-bit ASCII */
{
*
dst
=
ch
;
continue
;
}
len
=
utf8_length
[
ch
-
0x80
];
res
=
ch
&
utf8_mask
[
len
];
switch
(
len
)
{
case
5
:
if
(
src
>=
srcend
)
goto
done
;
/* ignore partial char */
if
((
ch
=
*
src
^
0x80
)
>=
0x40
)
goto
bad
;
res
=
(
res
<<
6
)
|
ch
;
src
++
;
case
4
:
if
(
src
>=
srcend
)
goto
done
;
/* ignore partial char */
if
((
ch
=
*
src
^
0x80
)
>=
0x40
)
goto
bad
;
res
=
(
res
<<
6
)
|
ch
;
src
++
;
case
3
:
if
(
src
>=
srcend
)
goto
done
;
/* ignore partial char */
if
((
ch
=
*
src
^
0x80
)
>=
0x40
)
goto
bad
;
res
=
(
res
<<
6
)
|
ch
;
src
++
;
case
2
:
if
(
src
>=
srcend
)
goto
done
;
/* ignore partial char */
if
((
ch
=
*
src
^
0x80
)
>=
0x40
)
goto
bad
;
res
=
(
res
<<
6
)
|
ch
;
src
++
;
case
1
:
if
(
src
>=
srcend
)
goto
done
;
/* ignore partial char */
if
((
ch
=
*
src
^
0x80
)
>=
0x40
)
goto
bad
;
res
=
(
res
<<
6
)
|
ch
;
src
++
;
if
(
res
<
utf8_minval
[
len
])
goto
bad
;
if
(
res
>=
0x10000
)
goto
bad
;
/* FIXME: maybe we should do surrogates here */
*
dst
=
res
;
continue
;
}
bad:
if
(
flags
&
MB_ERR_INVALID_CHARS
)
return
-
2
;
/* bad char */
*
dst
=
(
WCHAR
)
'?'
;
}
if
(
src
<
srcend
)
return
-
1
;
/* overflow */
done:
return
dstlen
-
count
;
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment